Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import warnings
|
4 |
+
from typing import List
|
5 |
+
from operator import itemgetter
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
+
from langfuse.callback import CallbackHandler
|
9 |
+
langfuse_handler = CallbackHandler(
|
10 |
+
public_key="pk-lf-b5beeffa-b30f-4390-b850-5a1628c7bc5e",
|
11 |
+
secret_key="sk-lf-a3e49606-e293-4d32-b451-21627a02556e",
|
12 |
+
host="https://cloud.langfuse.com"
|
13 |
+
)
|
14 |
+
|
15 |
+
import gradio as gr
|
16 |
+
|
17 |
+
from langchain_openai import OpenAIEmbeddings
|
18 |
+
from langchain_openai import ChatOpenAI
|
19 |
+
|
20 |
+
from langchain.docstore.document import Document
|
21 |
+
|
22 |
+
from langchain.prompts import PromptTemplate
|
23 |
+
from langchain.prompts import ChatPromptTemplate
|
24 |
+
|
25 |
+
from langchain.chains import RetrievalQA
|
26 |
+
|
27 |
+
from langchain_core.output_parsers import StrOutputParser, CommaSeparatedListOutputParser
|
28 |
+
from pydantic import BaseModel, Field
|
29 |
+
from langchain_core.runnables import RunnablePassthrough
|
30 |
+
from langchain_core.runnables import RunnableParallel
|
31 |
+
|
32 |
+
from langchain_community.vectorstores import FAISS
|
33 |
+
from langchain_community.document_transformers import EmbeddingsRedundantFilter
|
34 |
+
|
35 |
+
from langchain.retrievers import EnsembleRetriever
|
36 |
+
from langchain.retrievers.merger_retriever import MergerRetriever
|
37 |
+
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
|
38 |
+
from langchain.retrievers import ContextualCompressionRetriever
|
39 |
+
|
40 |
+
#from langchain_cohere import CohereEmbeddings, CohereRerank
|
41 |
+
|
42 |
+
#from langchain_groq import ChatGroq
|
43 |
+
|
44 |
+
load_dotenv()
|
45 |
+
|
46 |
+
os.getenv("COHERE_API_KEY")
|
47 |
+
#os.getenv("GROQ_API_KEY")
|
48 |
+
#os.getenv("OPENAI_API_KEY")
|
49 |
+
|
50 |
+
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
|
51 |
+
#embeddings_cohere = CohereEmbeddings(model="embed-multilingual-v3.0")
|
52 |
+
|
53 |
+
def load_vectorstore(index_name, embeddings, k=100):
|
54 |
+
return FAISS.load_local(index_name, embeddings, allow_dangerous_deserialization=True).as_retriever(
|
55 |
+
search_kwargs={ "k": k}
|
56 |
+
)
|
57 |
+
|
58 |
+
retriever_names = ['large']
|
59 |
+
|
60 |
+
retrievers = {}
|
61 |
+
retrievers_docs = {}
|
62 |
+
|
63 |
+
for name in retriever_names:
|
64 |
+
retrievers[name] = load_vectorstore(f"{name}", embeddings)
|
65 |
+
retrievers_docs[name] = (lambda x: x["input"]) | retrievers[name]
|
66 |
+
|
67 |
+
|
68 |
+
def format_docs_with_id(docs: List[Document]) -> str:
|
69 |
+
"""
|
70 |
+
Formatte les documents fournis avec des informations pertinentes sur chaque source.
|
71 |
+
Inclut XINumber, Book Number, Raw Material Cost RMC, Fragrance Formula Name et Fragrance Formula Descriptors.
|
72 |
+
Args:
|
73 |
+
docs (List[Document]): Liste des documents ou articles à formater.
|
74 |
+
Returns:
|
75 |
+
str: Représentation formatée des documents.
|
76 |
+
"""
|
77 |
+
formatted = [
|
78 |
+
(
|
79 |
+
f"XINumber: {doc.metadata.get('XINumber', 'Missing')}\n"
|
80 |
+
f"Book Number: {doc.metadata.get('Book Number', 'Missing')}\n"
|
81 |
+
f"Raw Material Cost: {doc.metadata.get('RMC', 'Missing')}\n"
|
82 |
+
f"Fragrance Formula Name: {doc.metadata.get('Formula Name', 'Missing')}\n"
|
83 |
+
f"Date Evaluated: {doc.metadata.get('Date Evaluated', 'Missing')}\n"
|
84 |
+
f"Application Product: {doc.metadata.get('Application', 'Missing')}\n"
|
85 |
+
f"Fragrance Type: {doc.metadata.get('Fragrance Type', 'Missing')}\n"
|
86 |
+
f"Fragrance Formula Notes: {doc.page_content}\n"
|
87 |
+
)
|
88 |
+
for doc in docs
|
89 |
+
]
|
90 |
+
return "\n\n" + "\n\n".join(formatted)
|
91 |
+
|
92 |
+
|
93 |
+
def prompt_fn(criteria):
|
94 |
+
|
95 |
+
return (
|
96 |
+
"You are fragrance formula design expert and you have to chose all formulas from context that fits most the client brief."
|
97 |
+
"Return formulas with all their details: XINumber, Book Number, Raw Material Cost RMC, Application Product, Fragrance Formula Name, Fragrance Formula and Descriptors"
|
98 |
+
"Return 15 formulas at least that fits. Reorder the returned formulas according to the matching criterias."
|
99 |
+
"Return only the Formulas with all their details without any additional comments."
|
100 |
+
|
101 |
+
f"Here are additional criterias to respect and to filter for from context: {criteria}"
|
102 |
+
"\n\nHere is the context: "
|
103 |
+
"{context}"
|
104 |
+
)
|
105 |
+
|
106 |
+
llm = ChatOpenAI(temperature=0, model="gpt-4o")
|
107 |
+
structured_llm = llm.with_structured_output(CitedAnswer_fr)
|
108 |
+
|
109 |
+
retrieve_docs = {name: (lambda x: x["input"]) | retrievers[name] for name in retriever_names}
|
110 |
+
|
111 |
+
def legal(question, criteria='select from all context'):
|
112 |
+
|
113 |
+
prompt = ChatPromptTemplate.from_messages([
|
114 |
+
("system", prompt_fn(criteria)),
|
115 |
+
("human", "{input}"),
|
116 |
+
])
|
117 |
+
|
118 |
+
|
119 |
+
rag_chain_from_docs = (
|
120 |
+
RunnablePassthrough.assign(context=(lambda x: format_docs_with_id(x["context"])))
|
121 |
+
| prompt
|
122 |
+
| llm
|
123 |
+
)
|
124 |
+
|
125 |
+
chains = {
|
126 |
+
name: RunnablePassthrough.assign(context=retrieve_docs[name]).assign(answer=rag_chain_from_docs)
|
127 |
+
for name in retriever_names
|
128 |
+
}
|
129 |
+
|
130 |
+
name = "large"
|
131 |
+
|
132 |
+
if name not in chains:
|
133 |
+
raise ValueError(f"Invalid typologie: {name}")
|
134 |
+
#try:
|
135 |
+
#result = chains[name].invoke({"input": question})
|
136 |
+
result = chains[name].invoke({"input": question}, config={"callbacks": [langfuse_handler]})
|
137 |
+
return result["answer"].content #result["answer"].articles, result["answer"].citations
|
138 |
+
#except Exception as e:
|
139 |
+
# return "Je ne sais pa#| "", ""
|
140 |
+
|
141 |
+
|
142 |
+
with gr.Blocks() as demo:
|
143 |
+
gr.Markdown("## OnScent Fragrance Intelligent Library Search")
|
144 |
+
#gr.Markdown("Developed by ScentGenie")
|
145 |
+
gr.Markdown("### Client Brief or Description")
|
146 |
+
|
147 |
+
with gr.Row():
|
148 |
+
input1 = gr.Textbox(label="Brief", placeholder="Wrapped in a blanket by the fireplace")
|
149 |
+
gr.Markdown("### Additional Criterias")
|
150 |
+
gr.Markdown("Criterias like Application area, RMC, Notes to exclude etc")
|
151 |
+
with gr.Row():
|
152 |
+
input2 = gr.Textbox(label="Additional Criterias (can be left empy)", placeholder=" for hair products with RMC under 15$ and without vanilla note")
|
153 |
+
#with gr.Row():
|
154 |
+
# input3 = gr.Dropdown(["Advanced", "Fast"], label="Mode", value="Advanced")
|
155 |
+
|
156 |
+
gr.Markdown("## Recommended Formulas")
|
157 |
+
output1 = gr.Text(label="Recommendations ranked by Relevance")
|
158 |
+
#output2 = gr.Text(label="Documents Sources")
|
159 |
+
#output3 = gr.Text(label="Documents IDs")
|
160 |
+
|
161 |
+
btn = gr.Button("Submit")
|
162 |
+
btn.click(legal, inputs=[input1, input2], outputs=[output1])
|
163 |
+
|
164 |
+
demo.launch(debug=True)
|