Spaces:

olegperegudov
/

talk2docs

Build error

App Files Files Community

olegperegudov commited on Feb 9

Commit

11f324c

•

1 Parent(s): 13b81ea

wip

Browse files

Files changed (5) hide show

.gitignore +3 -0
app.py +43 -2
build_model.py +23 -0
constants.py +45 -0
utils.py +100 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+data
+env
+model

app.py CHANGED Viewed

@@ -1,4 +1,45 @@
 import streamlit as st
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)

 import streamlit as st
+import utils
+from build_model import load_model
+st.title("Buzzbot")
+# Initialize retriever and model
+if "retriever" not in st.session_state:
+    st.session_state["retriever"] = utils.build_retriever()
+if "model" not in st.session_state:
+    st.session_state["model"] = load_model()
+if "conversation" not in st.session_state:
+    st.session_state["conversation"] = utils.Conversation()
+# Initialize chat history
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+# Display chat messages from history on app rerun
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+        if message['role']=="assistant":
+            st.caption(message["source_docs"])
+# Accept user input
+if user_input := st.chat_input("What is up?"):
+    # Add user message to chat history
+    st.session_state.messages.append({"role": "user", "content": user_input, "source_docs": None})
+    # Display user message in chat message container
+    with st.chat_message("user"):
+        st.markdown(user_input)
+    # Display assistant response in chat message container
+    with st.chat_message("assistant"):
+        with st.spinner(""):
+            answer, source_docs = utils.ask_question(
+                user_input, st.session_state.conversation, st.session_state.model, st.session_state.retriever
+            )
+            st.write(answer)
+            # for source_doc in source_docs:
+            st.caption(source_docs)
+    st.session_state.messages.append({"role": "assistant", "content": answer, "source_docs": source_docs})

build_model.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from langchain.callbacks.manager import CallbackManager
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain_community.llms import LlamaCpp
+import constants
+callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
+def load_model():
+    return LlamaCpp(
+        model_path=constants.MODEL_SAVE_PATH,
+        temperature=constants.TEMPERATURE,
+        max_tokens=constants.MAX_TOKENS,
+        top_p=constants.TOP_P,
+        # callback_manager=callback_manager, # will stream to stdout, but wont attach to variable
+        verbose=False,  # Verbose is required to pass to the callback manager
+        n_gpu_layers=constants.N_GPU_LAYERS,
+        n_batch=constants.N_BATCH,
+        n_ctx=constants.N_CTX,
+        repeat_penalty=constants.REPEAT_PENALTY,
+        streaming=False,
+    )

constants.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+import torch
+# model path
+MODEL_NAME = "saiga_mistral_7b.Q4_K_M.gguf"
+MODEL_URL = f"https://huggingface.co/TheBloke/saiga_mistral_7b-GGUF/blob/main/{MODEL_NAME}"
+# FOR PRODUCTION
+CWD = os.path.dirname(os.path.realpath(__file__))
+DATA_PATH = os.path.join(CWD, "data")
+DOCS_PATH = os.path.join(DATA_PATH, "docs")
+MODEL_PATH = os.path.join(CWD, "model")
+MODEL_SAVE_PATH = os.path.join(MODEL_PATH, MODEL_NAME)
+# RAG params
+N_GPU_LAYERS = (
+    -1 if torch.cuda.is_available() else 0
+)  # The number of layers to put on the GPU. The rest will be on the CPU (0 means all layers on the CPU).
+N_BATCH = 1024  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU
+TEMPERATURE = 0.1  # The temperature of the sampling. 0.1 is a good value for most cases
+MAX_TOKENS = 1024  # The maximum number of tokens to generate
+TOP_P = 2
+N_CTX = 2048  # context len, up to a maximum of 32k
+CHUNK_SIZE = 750  # max number of letters for each chunk during splitting
+CHUNK_OVERLAP = 200  # overlap between chunks
+SEARCH_TYPE = "mmr"
+LAST_MESSAGES = 3  # The number of last messages in conversation history to include in the context
+REPEAT_PENALTY = 1.1  # The penalty for repeating tokens in the output
+DEVICE = "cuda" if N_GPU_LAYERS > 0 else "cpu"
+EMBED_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
+VECTOR_STORE_PATH = os.path.join(DATA_PATH, "chroma_db")
+# retriever config
+SEARCH_KWARGS = {"k": 3, "score_threshold": 0.6}
+DEFAULT_MESSAGE_TEMPLATE = "<s>{role}\n{content}</s>"
+DEFAULT_RESPONSE_TEMPLATE = "<s>bot\n"
+DEFAULT_SYSTEM_PROMPT = "Ты ассистент помощник, который отвечает на вопросы используя предоставленный контекст. \
+                        В качестве контекста используются тексты из различных источников. \
+                        Постарайся ответить на вопрос максимально точно. \
+                        Для ответа используй только информацию из контекста и вопроса. Ничего не выдумывай. \
+                        Если не можешь ответить на вопрос, напиши - 'Не хватает данных для ответа.' "

utils.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import os
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import DirectoryLoader
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import Chroma
+import constants
+class Conversation:
+    def __init__(
+        self,
+        message_template=constants.DEFAULT_MESSAGE_TEMPLATE,
+        system_prompt=constants.DEFAULT_SYSTEM_PROMPT,
+        response_template=constants.DEFAULT_RESPONSE_TEMPLATE,
+    ):
+        self.message_template = message_template
+        self.response_template = response_template
+        self.messages = [{"role": "system", "content": system_prompt}]
+    def add_user_message(self, message):
+        self.messages.append({"role": "user", "content": message})
+    def add_bot_message(self, message):
+        self.messages.append({"role": "bot", "content": message})
+    def get_conversation_history(self):
+        final_text = ""
+        # 1st system message + last few messages (excluding system duplicate)
+        context_and_last_few_messages = [self.messages[0]] + self.messages[1:][-constants.LAST_MESSAGES :]
+        for message in context_and_last_few_messages:
+            message_text = self.message_template.format(**message)
+            final_text += message_text
+        return final_text.strip()
+def source_documents(relevant_docs):
+    source_docs = set()
+    for doc in relevant_docs:
+        fname = doc.metadata["source"]
+        fname_base = os.path.splitext(os.path.basename(fname))[0]
+        source_docs.add(fname_base)
+    return list(source_docs)
+def load_raw_documents():
+    return DirectoryLoader(constants.DOCS_PATH, glob="*.txt").load()
+def build_nodes(raw_documents):
+    return RecursiveCharacterTextSplitter(
+        chunk_size=constants.CHUNK_SIZE,
+        chunk_overlap=constants.CHUNK_OVERLAP,
+        length_function=len,
+        is_separator_regex=False,
+    ).split_documents(raw_documents)
+def build_embeddings():
+    return HuggingFaceEmbeddings(model_name=constants.EMBED_MODEL_NAME, model_kwargs={"device": constants.DEVICE})
+def build_db(nodes, embeddings):
+    return Chroma.from_documents(nodes, embeddings)
+def build_retriever():
+    raw_documents = load_raw_documents()
+    nodes = build_nodes(raw_documents)
+    embeddings = build_embeddings()
+    db = build_db(nodes, embeddings)
+    return db.as_retriever(search_kwargs=constants.SEARCH_KWARGS, search_type=constants.SEARCH_TYPE)
+def fetch_relevant_nodes(question, retriever):
+    relevant_docs = retriever.get_relevant_documents(question)
+    context = [doc.page_content for doc in relevant_docs]
+    source_docs = source_documents(relevant_docs)
+    context = list(set(context))  # remove duplicated strings from context
+    return context, source_docs
+def ask_question(question, conversation, model, retriever):
+    context, source_docs = fetch_relevant_nodes(question, retriever)
+    # add user message to conversation's context
+    conversation.add_user_message(question)
+    conversation_history = conversation.get_conversation_history()
+    prompt = f"{conversation_history}\n\
+               {context}\n\
+               {constants.DEFAULT_RESPONSE_TEMPLATE}"
+    answer = model.invoke(prompt)
+    # add bot message to conversation's context
+    conversation.add_bot_message(answer)
+    return answer, source_docs