Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import logging | |
from operator import itemgetter | |
from langchain.memory import ConversationBufferWindowMemory | |
from langchain.retrievers import ContextualCompressionRetriever | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.runnables import Runnable, RunnableLambda | |
from langchain_openai import ChatOpenAI | |
from rag.retrievers import RetrieversConfig | |
from .helpers import ( | |
DocumentFormatter, | |
create_langsmith_client, | |
get_datetime, | |
get_reranker, | |
) | |
from .prompt_template import generate_prompt_template | |
logging.basicConfig(level=logging.ERROR) | |
def retrievers_setup(retrievers_config, reranker: bool = False) -> tuple: | |
"""Set up retrievers with re-ranking | |
Args: | |
retrievers_config (_type_): | |
reranker (bool, optional): Defaults to False. | |
Returns: | |
tuple: Retrievers | |
""" | |
# Practitioners | |
practitioners_retriever = retrievers_config.get_practitioners_retriever(k=10) | |
# Tall Tree documents | |
documents_retriever = retrievers_config.get_documents_retriever(k=10) | |
# Re-ranking (optional): Improves quality and serves as a filter | |
if reranker: | |
practitioners_retriever_reranker = ContextualCompressionRetriever( | |
base_compressor=get_reranker(top_n=10), | |
base_retriever=practitioners_retriever, | |
) | |
documents_retriever_reranker = ContextualCompressionRetriever( | |
base_compressor=get_reranker(top_n=8), | |
base_retriever=documents_retriever, | |
) | |
return practitioners_retriever_reranker, documents_retriever_reranker | |
else: | |
return practitioners_retriever, documents_retriever | |
# Set retrievers as global variables (I see better loading time from Streamlit this way) | |
practitioners_retriever, documents_retriever = retrievers_setup( | |
retrievers_config=RetrieversConfig(), reranker=True | |
) | |
# Set up runnable and chat memory | |
def get_runnable_and_memory( | |
model: str = "gpt-4o-mini", temperature: float = 0.1 | |
) -> tuple[Runnable, ConversationBufferWindowMemory]: | |
"""Set up runnable and chat memory | |
Args: | |
model_name (str, optional): LLM model. Defaults to "gpt-4o-mini". | |
temperature (float, optional): Model temperature. Defaults to 0.1. | |
Returns: | |
Runnable, Memory: Runnable and Memory | |
""" | |
# Set up Langsmith to trace the runnable | |
create_langsmith_client() | |
# LLM and prompt template | |
llm = ChatOpenAI( | |
model=model, | |
temperature=temperature, | |
) | |
prompt = generate_prompt_template() | |
# Set conversation history window memory. It only uses the last k interactions | |
memory = ConversationBufferWindowMemory( | |
memory_key="history", | |
return_messages=True, | |
k=6, | |
) | |
# Set up runnable using LCEL | |
setup = { | |
"practitioners_db": itemgetter("user_query") | |
| practitioners_retriever | |
| DocumentFormatter("Practitioner #"), | |
"tall_tree_db": itemgetter("user_query") | |
| documents_retriever | |
| DocumentFormatter("No."), | |
"timestamp": lambda _: get_datetime(), | |
"history": RunnableLambda(memory.load_memory_variables) | itemgetter("history"), | |
"user_query": itemgetter("user_query"), | |
} | |
runnable = setup | prompt | llm | StrOutputParser() | |
return runnable, memory | |