Spaces:

Daemontatox
/

Mawared-Support-Assistant

Sleeping

App Files Files Community

Daemontatox commited on Jan 14

Commit

78fd9fa

verified ·

1 Parent(s): 7b48faf

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -119

app.py CHANGED Viewed

@@ -1,13 +1,5 @@
 import spaces
 import subprocess
-subprocess.run(
-    'pip install flash-attn --no-build-isolation',
-    env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
-    shell=True
-)
 import os
 import torch
 from dotenv import load_dotenv
@@ -20,17 +12,14 @@ from qdrant_client import QdrantClient, models
 from langchain_openai import ChatOpenAI
 import gradio as gr
 import logging
-from typing import List, Tuple
 from dataclasses import dataclass
 from datetime import datetime
-from transformers import AutoTokenizer, AutoModelForCausalLM ,pipeline
-from langchain_huggingface.llms import HuggingFacePipeline
-import re
 from langchain_huggingface.llms import HuggingFacePipeline
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline,BitsAndBytesConfig,TextIteratorStreamer
 from langchain_cerebras import ChatCerebras
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -51,7 +40,6 @@ class ChatHistory:
         self.messages.append(Message(role=role, content=content, timestamp=timestamp))
     def get_formatted_history(self, max_messages: int = 10) -> str:
-        """Returns the most recent conversation history formatted as a string"""
         recent_messages = self.messages[-max_messages:] if len(self.messages) > max_messages else self.messages
         formatted_history = "\n".join([
             f"{msg.role}: {msg.content}" for msg in recent_messages
@@ -61,10 +49,9 @@ class ChatHistory:
     def clear(self):
         self.messages = []
-# Load environment variables
 load_dotenv()
-# HuggingFace API Token
 HF_TOKEN = os.getenv("HF_TOKEN")
 C_apikey = os.getenv("C_apikey")
 OPENAPI_KEY = os.getenv("OPENAPI_KEY")
@@ -73,10 +60,9 @@ if not HF_TOKEN:
     logger.error("HF_TOKEN is not set in the environment variables.")
     exit(1)
-# HuggingFace Embeddings
 embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-# Qdrant Client Setup
 try:
     client = QdrantClient(
         url=os.getenv("QDRANT_URL"),
@@ -84,125 +70,61 @@ try:
         prefer_grpc=False
     )
 except Exception as e:
-    logger.error("Failed to connect to Qdrant. Ensure QDRANT_URL and QDRANT_API_KEY are correctly set.")
     exit(1)
-# Define collection name
 collection_name = "mawared"
-# Try to create collection
 try:
     client.create_collection(
         collection_name=collection_name,
         vectors_config=models.VectorParams(
-            size=384,  # GTE-large embedding size
             distance=models.Distance.COSINE
         )
     )
-    logger.info(f"Created new collection: {collection_name}")
 except Exception as e:
-    if "already exists" in str(e):
-        logger.info(f"Collection {collection_name} already exists, continuing...")
-    else:
         logger.error(f"Error creating collection: {e}")
         exit(1)
-# Create Qdrant vector store
 db = Qdrant(
     client=client,
     collection_name=collection_name,
     embeddings=embeddings,
 )
-# Create retriever
 retriever = db.as_retriever(
     search_type="similarity",
     search_kwargs={"k": 5}
 )
-# retriever = db.as_retriever(
-#     search_type="mmr",
-#     search_kwargs={"k": 5, "fetch_k": 10, "lambda_mult": 0.5}
-# )
-# retriever = db.as_retriever(
-#     search_type="similarity_score_threshold",
-#     search_kwargs={"k": 5, "score_threshold": 0.8}
-# )
-# Load model directly
-# Set up the LLM
-# llm = ChatOpenAI(
-#     base_url="https://api-inference.huggingface.co/v1/",
-#     temperature=0,
-#     api_key=HF_TOKEN,
-#     model="mistralai/Mistral-Nemo-Instruct-2407",
-#     max_tokens=None,
-#     timeout=None
-# )
-#llm = ChatOpenAI(
-    #base_url="https://openrouter.ai/api/v1",
-    #temperature=0.01,
-    #api_key=OPENAPI_KEY,
-    #model="google/gemini-2.0-flash-exp:free",
-    #max_tokens=None,
-    #timeout=None,
-    #max_retries=3,
-#)
 llm = ChatCerebras(
-     model="llama-3.3-70b",
-     api_key=C_apikey
 )
-# Create prompt template with chat history
 template = """
-You are an expert assistant specializing in the Mawared HR System. Your role is to provide precise and contextually relevant answers based on the retrieved context and chat history.
 Key Responsibilities:
 Use the given chat history and retrieved context to craft accurate and detailed responses.
 If necessary, ask specific and targeted clarifying questions to gather more information.
 Present step-by-step instructions in a clear, numbered format when applicable.
-Rules for Responses:
-Strictly use the information from the provided context and chat history. Avoid making up or fabricating any details.
-Do not reference the retrieval process, sources, pages, or documents in your responses.
-Maintain a conversational flow by asking relevant follow-up questions to engage the user and enhance the interaction.
-Inputs for Your Response:
 Previous Conversation: {chat_history}
 Retrieved Context: {context}
 Current Question: {question}
-Answer:{{answer}}
-Your answers must be expressive, detailed, and fully address the user’s needs without deviating from the provided information.
 """
 prompt = ChatPromptTemplate.from_template(template)
-# Create the RAG chain with chat history
 def create_rag_chain(chat_history: str):
     chain = (
         {
@@ -216,38 +138,62 @@ def create_rag_chain(chat_history: str):
     )
     return chain
-# Initialize chat history
 chat_history = ChatHistory()
-# Gradio Function
 @spaces.GPU()
-def ask_question_gradio(question, history):
     try:
-        # Add user question to chat history
         chat_history.add_message("user", question)
-        # Get formatted history
         formatted_history = chat_history.get_formatted_history()
-        # Create chain with current chat history
         rag_chain = create_rag_chain(formatted_history)
-        # Generate response
         response = ""
-        for chunk in rag_chain.stream(question):
-            response += chunk
-        # Add assistant response to chat history
         chat_history.add_message("assistant", response)
-        # Update Gradio chat history
-        history.append({"role": "user", "content": question})
-        history.append({"role": "assistant", "content": response})
-        return "", history
     except Exception as e:
         logger.error(f"Error during question processing: {e}")
-        return "", history + [{"role": "assistant", "content": "An error occurred. Please try again later."}]
 def clear_chat():
     chat_history.clear()
@@ -255,17 +201,15 @@ def clear_chat():
 # Gradio Interface
 with gr.Blocks(theme='Yntec/HaleyCH_Theme_Orange_Green') as iface:
-    gr.Image("Image.jpg" , width=750 , height=300 ,show_label=False, show_download_button=False)
     gr.Markdown("# Mawared HR Assistant 2.5.1")
     gr.Markdown('### Instructions')
-    gr.Markdown("Ask a question about MawaredHR and get a detailed answer , if you get an error try again with same prompt , its an Api issue and we are working on it 😀")
     chatbot = gr.Chatbot(
         height=750,
         show_label=False,
-        type="messages"  # Using the new messages format
     )
     with gr.Row():
@@ -287,6 +231,5 @@ with gr.Blocks(theme='Yntec/HaleyCH_Theme_Orange_Green') as iface:
         outputs=[chatbot, question_input]
     )
-# Launch the Gradio App
 if __name__ == "__main__":
     iface.launch()

 import spaces
 import subprocess
 import os
 import torch
 from dotenv import load_dotenv
 from langchain_openai import ChatOpenAI
 import gradio as gr
 import logging
+from typing import List, Tuple, Generator
 from dataclasses import dataclass
 from datetime import datetime
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from langchain_huggingface.llms import HuggingFacePipeline
 from langchain_cerebras import ChatCerebras
+from queue import Queue
+from threading import Thread
 # Configure logging
 logging.basicConfig(level=logging.INFO)
         self.messages.append(Message(role=role, content=content, timestamp=timestamp))
     def get_formatted_history(self, max_messages: int = 10) -> str:
         recent_messages = self.messages[-max_messages:] if len(self.messages) > max_messages else self.messages
         formatted_history = "\n".join([
             f"{msg.role}: {msg.content}" for msg in recent_messages
     def clear(self):
         self.messages = []
+# Load environment variables and setup (same as before)
 load_dotenv()
 HF_TOKEN = os.getenv("HF_TOKEN")
 C_apikey = os.getenv("C_apikey")
 OPENAPI_KEY = os.getenv("OPENAPI_KEY")
     logger.error("HF_TOKEN is not set in the environment variables.")
     exit(1)
 embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+# Qdrant setup (same as before)
 try:
     client = QdrantClient(
         url=os.getenv("QDRANT_URL"),
         prefer_grpc=False
     )
 except Exception as e:
+    logger.error("Failed to connect to Qdrant.")
     exit(1)
 collection_name = "mawared"
 try:
     client.create_collection(
         collection_name=collection_name,
         vectors_config=models.VectorParams(
+            size=384,
             distance=models.Distance.COSINE
         )
     )
 except Exception as e:
+    if "already exists" not in str(e):
         logger.error(f"Error creating collection: {e}")
         exit(1)
 db = Qdrant(
     client=client,
     collection_name=collection_name,
     embeddings=embeddings,
 )
 retriever = db.as_retriever(
     search_type="similarity",
     search_kwargs={"k": 5}
 )
 llm = ChatCerebras(
+    model="llama-3.3-70b",
+    api_key=C_apikey,
+    streaming=True  # Enable streaming
 )
 template = """
+You are a Friendly assistant specializing in the Mawared HR System.
+Your role is to provide precise and contextually relevant answers based on the retrieved context and chat history.
+Your top priority is user experience and satisfaction, only answer questions based on Mawared HR system and ignore everything else.
 Key Responsibilities:
 Use the given chat history and retrieved context to craft accurate and detailed responses.
 If necessary, ask specific and targeted clarifying questions to gather more information.
 Present step-by-step instructions in a clear, numbered format when applicable.
+If you think you will not be able to provide a clear answer based on the user question , ask a clariifying question and ask for more details.
 Previous Conversation: {chat_history}
 Retrieved Context: {context}
 Current Question: {question}
+Answer:
 """
 prompt = ChatPromptTemplate.from_template(template)
 def create_rag_chain(chat_history: str):
     chain = (
         {
     )
     return chain
 chat_history = ChatHistory()
+def process_stream(stream_queue: Queue, history: List[dict]) -> Generator[List[dict], None, None]:
+    """Process the streaming response and update the chat interface"""
+    current_response = ""
+    while True:
+        chunk = stream_queue.get()
+        if chunk is None:  # Signal that streaming is complete
+            break
+        current_response += chunk
+        new_history = history.copy()
+        new_history[-1]["content"] = current_response
+        yield new_history
 @spaces.GPU()
+def ask_question_gradio(question: str, history: List[dict]) -> Generator[tuple, None, None]:
     try:
         chat_history.add_message("user", question)
         formatted_history = chat_history.get_formatted_history()
         rag_chain = create_rag_chain(formatted_history)
+        # Update history with user message
+        history.append({"role": "user", "content": question})
+        history.append({"role": "assistant", "content": ""})
+        # Create a queue for streaming responses
+        stream_queue = Queue()
+        # Function to process the stream in a separate thread
+        def stream_processor():
+            try:
+                for chunk in rag_chain.stream(question):
+                    stream_queue.put(chunk)
+                stream_queue.put(None)  # Signal completion
+            except Exception as e:
+                logger.error(f"Streaming error: {e}")
+                stream_queue.put(None)
+        # Start streaming in a separate thread
+        Thread(target=stream_processor).start()
+        # Yield updates to the chat interface
         response = ""
+        for updated_history in process_stream(stream_queue, history):
+            response = updated_history[-1]["content"]
+            yield "", updated_history
+        # Add final response to chat history
         chat_history.add_message("assistant", response)
     except Exception as e:
         logger.error(f"Error during question processing: {e}")
+        history.append({"role": "assistant", "content": "An error occurred. Please try again later."})
+        yield "", history
 def clear_chat():
     chat_history.clear()
 # Gradio Interface
 with gr.Blocks(theme='Yntec/HaleyCH_Theme_Orange_Green') as iface:
+    gr.Image("Image.jpg", width=750, height=300, show_label=False, show_download_button=False)
     gr.Markdown("# Mawared HR Assistant 2.5.1")
     gr.Markdown('### Instructions')
+    gr.Markdown("Ask a question about MawaredHR and get a detailed answer, if you get an error try again with same prompt, its an Api issue and we are working on it 😀")
     chatbot = gr.Chatbot(
         height=750,
         show_label=False,
+        bubble_full_width=False,
     )
     with gr.Row():
         outputs=[chatbot, question_input]
     )
 if __name__ == "__main__":
     iface.launch()