Spaces:
Running
Running
import json | |
import os | |
import warnings | |
from typing import List | |
from operator import itemgetter | |
from dotenv import load_dotenv | |
from langfuse.callback import CallbackHandler | |
langfuse_handler = CallbackHandler( | |
public_key="pk-lf-b5beeffa-b30f-4390-b850-5a1628c7bc5e", | |
secret_key="sk-lf-a3e49606-e293-4d32-b451-21627a02556e", | |
host="https://cloud.langfuse.com" | |
) | |
import gradio as gr | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_openai import ChatOpenAI | |
from langchain.docstore.document import Document | |
from langchain.prompts import PromptTemplate | |
from langchain.prompts import ChatPromptTemplate | |
from langchain.chains import RetrievalQA | |
from langchain_core.output_parsers import StrOutputParser, CommaSeparatedListOutputParser | |
from pydantic import BaseModel, Field | |
from langchain_core.runnables import RunnablePassthrough | |
from langchain_core.runnables import RunnableParallel | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.document_transformers import EmbeddingsRedundantFilter | |
from langchain.retrievers import EnsembleRetriever | |
from langchain.retrievers.merger_retriever import MergerRetriever | |
from langchain.retrievers.document_compressors import DocumentCompressorPipeline | |
from langchain.retrievers import ContextualCompressionRetriever | |
#from langchain_cohere import CohereEmbeddings, CohereRerank | |
#from langchain_groq import ChatGroq | |
load_dotenv() | |
os.getenv("COHERE_API_KEY") | |
#os.getenv("GROQ_API_KEY") | |
#os.getenv("OPENAI_API_KEY") | |
embeddings = OpenAIEmbeddings(model="text-embedding-3-large") | |
#embeddings_cohere = CohereEmbeddings(model="embed-multilingual-v3.0") | |
def load_vectorstore(index_name, embeddings, k=100): | |
return FAISS.load_local(index_name, embeddings, allow_dangerous_deserialization=True).as_retriever( | |
search_kwargs={ "k": k} | |
) | |
retriever_names = ['large'] | |
retrievers = {} | |
retrievers_docs = {} | |
for name in retriever_names: | |
retrievers[name] = load_vectorstore(f"{name}", embeddings) | |
retrievers_docs[name] = (lambda x: x["input"]) | retrievers[name] | |
def format_docs_with_id(docs: List[Document]) -> str: | |
""" | |
Formatte les documents fournis avec des informations pertinentes sur chaque source. | |
Inclut XINumber, Book Number, Raw Material Cost RMC, Fragrance Formula Name et Fragrance Formula Descriptors. | |
Args: | |
docs (List[Document]): Liste des documents ou articles à formater. | |
Returns: | |
str: Représentation formatée des documents. | |
""" | |
formatted = [ | |
( | |
f"XINumber: {doc.metadata.get('XINumber', 'Missing')}\n" | |
f"Book Number: {doc.metadata.get('Book Number', 'Missing')}\n" | |
f"Raw Material Cost: {doc.metadata.get('RMC', 'Missing')}\n" | |
f"Fragrance Formula Name: {doc.metadata.get('Formula Name', 'Missing')}\n" | |
f"Date Evaluated: {doc.metadata.get('Date Evaluated', 'Missing')}\n" | |
f"Application Product: {doc.metadata.get('Application', 'Missing')}\n" | |
f"Fragrance Type: {doc.metadata.get('Fragrance Type', 'Missing')}\n" | |
f"Fragrance Formula Notes: {doc.page_content}\n" | |
) | |
for doc in docs | |
] | |
return "\n\n" + "\n\n".join(formatted) | |
def prompt_fn(criteria): | |
return ( | |
"You are fragrance formula design expert and you have to chose all formulas from context that fits most the client brief." | |
"Return formulas with all their details: XINumber, Book Number, Raw Material Cost RMC, Application Product, Fragrance Formula Name, Fragrance Formula and Descriptors" | |
"Return 15 formulas at least that fits. Reorder the returned formulas according to the matching criterias." | |
"Return only the Formulas with all their details without any additional comments." | |
f"Here are additional criterias to respect and to filter for from context: {criteria}" | |
"\n\nHere is the context: " | |
"{context}" | |
) | |
llm = ChatOpenAI(temperature=0, model="gpt-4o") | |
structured_llm = llm.with_structured_output(CitedAnswer_fr) | |
retrieve_docs = {name: (lambda x: x["input"]) | retrievers[name] for name in retriever_names} | |
def legal(question, criteria='select from all context'): | |
prompt = ChatPromptTemplate.from_messages([ | |
("system", prompt_fn(criteria)), | |
("human", "{input}"), | |
]) | |
rag_chain_from_docs = ( | |
RunnablePassthrough.assign(context=(lambda x: format_docs_with_id(x["context"]))) | |
| prompt | |
| llm | |
) | |
chains = { | |
name: RunnablePassthrough.assign(context=retrieve_docs[name]).assign(answer=rag_chain_from_docs) | |
for name in retriever_names | |
} | |
name = "large" | |
if name not in chains: | |
raise ValueError(f"Invalid typologie: {name}") | |
#try: | |
#result = chains[name].invoke({"input": question}) | |
result = chains[name].invoke({"input": question}, config={"callbacks": [langfuse_handler]}) | |
return result["answer"].content #result["answer"].articles, result["answer"].citations | |
#except Exception as e: | |
# return "Je ne sais pa#| "", "" | |
with gr.Blocks() as demo: | |
gr.Markdown("## OnScent Fragrance Intelligent Library Search") | |
#gr.Markdown("Developed by ScentGenie") | |
gr.Markdown("### Client Brief or Description") | |
with gr.Row(): | |
input1 = gr.Textbox(label="Brief", placeholder="Wrapped in a blanket by the fireplace") | |
gr.Markdown("### Additional Criterias") | |
gr.Markdown("Criterias like Application area, RMC, Notes to exclude etc") | |
with gr.Row(): | |
input2 = gr.Textbox(label="Additional Criterias (can be left empy)", placeholder=" for hair products with RMC under 15$ and without vanilla note") | |
#with gr.Row(): | |
# input3 = gr.Dropdown(["Advanced", "Fast"], label="Mode", value="Advanced") | |
gr.Markdown("## Recommended Formulas") | |
output1 = gr.Text(label="Recommendations ranked by Relevance") | |
#output2 = gr.Text(label="Documents Sources") | |
#output3 = gr.Text(label="Documents IDs") | |
btn = gr.Button("Submit") | |
btn.click(legal, inputs=[input1, input2], outputs=[output1]) | |
demo.launch(debug=True) |