chemouda commited on
Commit
58b3e09
·
verified ·
1 Parent(s): cbbe854

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -0
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import warnings
4
+ from typing import List
5
+ from operator import itemgetter
6
+ from dotenv import load_dotenv
7
+
8
+ from langfuse.callback import CallbackHandler
9
+ langfuse_handler = CallbackHandler(
10
+ public_key="pk-lf-b5beeffa-b30f-4390-b850-5a1628c7bc5e",
11
+ secret_key="sk-lf-a3e49606-e293-4d32-b451-21627a02556e",
12
+ host="https://cloud.langfuse.com"
13
+ )
14
+
15
+ import gradio as gr
16
+
17
+ from langchain_openai import OpenAIEmbeddings
18
+ from langchain_openai import ChatOpenAI
19
+
20
+ from langchain.docstore.document import Document
21
+
22
+ from langchain.prompts import PromptTemplate
23
+ from langchain.prompts import ChatPromptTemplate
24
+
25
+ from langchain.chains import RetrievalQA
26
+
27
+ from langchain_core.output_parsers import StrOutputParser, CommaSeparatedListOutputParser
28
+ from pydantic import BaseModel, Field
29
+ from langchain_core.runnables import RunnablePassthrough
30
+ from langchain_core.runnables import RunnableParallel
31
+
32
+ from langchain_community.vectorstores import FAISS
33
+ from langchain_community.document_transformers import EmbeddingsRedundantFilter
34
+
35
+ from langchain.retrievers import EnsembleRetriever
36
+ from langchain.retrievers.merger_retriever import MergerRetriever
37
+ from langchain.retrievers.document_compressors import DocumentCompressorPipeline
38
+ from langchain.retrievers import ContextualCompressionRetriever
39
+
40
+ #from langchain_cohere import CohereEmbeddings, CohereRerank
41
+
42
+ #from langchain_groq import ChatGroq
43
+
44
+ load_dotenv()
45
+
46
+ os.getenv("COHERE_API_KEY")
47
+ #os.getenv("GROQ_API_KEY")
48
+ #os.getenv("OPENAI_API_KEY")
49
+
50
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
51
+ #embeddings_cohere = CohereEmbeddings(model="embed-multilingual-v3.0")
52
+
53
+ def load_vectorstore(index_name, embeddings, k=100):
54
+ return FAISS.load_local(index_name, embeddings, allow_dangerous_deserialization=True).as_retriever(
55
+ search_kwargs={ "k": k}
56
+ )
57
+
58
+ retriever_names = ['large']
59
+
60
+ retrievers = {}
61
+ retrievers_docs = {}
62
+
63
+ for name in retriever_names:
64
+ retrievers[name] = load_vectorstore(f"{name}", embeddings)
65
+ retrievers_docs[name] = (lambda x: x["input"]) | retrievers[name]
66
+
67
+
68
+ def format_docs_with_id(docs: List[Document]) -> str:
69
+ """
70
+ Formatte les documents fournis avec des informations pertinentes sur chaque source.
71
+ Inclut XINumber, Book Number, Raw Material Cost RMC, Fragrance Formula Name et Fragrance Formula Descriptors.
72
+ Args:
73
+ docs (List[Document]): Liste des documents ou articles à formater.
74
+ Returns:
75
+ str: Représentation formatée des documents.
76
+ """
77
+ formatted = [
78
+ (
79
+ f"XINumber: {doc.metadata.get('XINumber', 'Missing')}\n"
80
+ f"Book Number: {doc.metadata.get('Book Number', 'Missing')}\n"
81
+ f"Raw Material Cost: {doc.metadata.get('RMC', 'Missing')}\n"
82
+ f"Fragrance Formula Name: {doc.metadata.get('Formula Name', 'Missing')}\n"
83
+ f"Date Evaluated: {doc.metadata.get('Date Evaluated', 'Missing')}\n"
84
+ f"Application Product: {doc.metadata.get('Application', 'Missing')}\n"
85
+ f"Fragrance Type: {doc.metadata.get('Fragrance Type', 'Missing')}\n"
86
+ f"Fragrance Formula Notes: {doc.page_content}\n"
87
+ )
88
+ for doc in docs
89
+ ]
90
+ return "\n\n" + "\n\n".join(formatted)
91
+
92
+
93
+ def prompt_fn(criteria):
94
+
95
+ return (
96
+ "You are fragrance formula design expert and you have to chose all formulas from context that fits most the client brief."
97
+ "Return formulas with all their details: XINumber, Book Number, Raw Material Cost RMC, Application Product, Fragrance Formula Name, Fragrance Formula and Descriptors"
98
+ "Return 15 formulas at least that fits. Reorder the returned formulas according to the matching criterias."
99
+ "Return only the Formulas with all their details without any additional comments."
100
+
101
+ f"Here are additional criterias to respect and to filter for from context: {criteria}"
102
+ "\n\nHere is the context: "
103
+ "{context}"
104
+ )
105
+
106
+ llm = ChatOpenAI(temperature=0, model="gpt-4o")
107
+ structured_llm = llm.with_structured_output(CitedAnswer_fr)
108
+
109
+ retrieve_docs = {name: (lambda x: x["input"]) | retrievers[name] for name in retriever_names}
110
+
111
+ def legal(question, criteria='select from all context'):
112
+
113
+ prompt = ChatPromptTemplate.from_messages([
114
+ ("system", prompt_fn(criteria)),
115
+ ("human", "{input}"),
116
+ ])
117
+
118
+
119
+ rag_chain_from_docs = (
120
+ RunnablePassthrough.assign(context=(lambda x: format_docs_with_id(x["context"])))
121
+ | prompt
122
+ | llm
123
+ )
124
+
125
+ chains = {
126
+ name: RunnablePassthrough.assign(context=retrieve_docs[name]).assign(answer=rag_chain_from_docs)
127
+ for name in retriever_names
128
+ }
129
+
130
+ name = "large"
131
+
132
+ if name not in chains:
133
+ raise ValueError(f"Invalid typologie: {name}")
134
+ #try:
135
+ #result = chains[name].invoke({"input": question})
136
+ result = chains[name].invoke({"input": question}, config={"callbacks": [langfuse_handler]})
137
+ return result["answer"].content #result["answer"].articles, result["answer"].citations
138
+ #except Exception as e:
139
+ # return "Je ne sais pa#| "", ""
140
+
141
+
142
+ with gr.Blocks() as demo:
143
+ gr.Markdown("## OnScent Fragrance Intelligent Library Search")
144
+ #gr.Markdown("Developed by ScentGenie")
145
+ gr.Markdown("### Client Brief or Description")
146
+
147
+ with gr.Row():
148
+ input1 = gr.Textbox(label="Brief", placeholder="Wrapped in a blanket by the fireplace")
149
+ gr.Markdown("### Additional Criterias")
150
+ gr.Markdown("Criterias like Application area, RMC, Notes to exclude etc")
151
+ with gr.Row():
152
+ input2 = gr.Textbox(label="Additional Criterias (can be left empy)", placeholder=" for hair products with RMC under 15$ and without vanilla note")
153
+ #with gr.Row():
154
+ # input3 = gr.Dropdown(["Advanced", "Fast"], label="Mode", value="Advanced")
155
+
156
+ gr.Markdown("## Recommended Formulas")
157
+ output1 = gr.Text(label="Recommendations ranked by Relevance")
158
+ #output2 = gr.Text(label="Documents Sources")
159
+ #output3 = gr.Text(label="Documents IDs")
160
+
161
+ btn = gr.Button("Submit")
162
+ btn.click(legal, inputs=[input1, input2], outputs=[output1])
163
+
164
+ demo.launch(debug=True)