Spaces:

ariansyahdedy
/

chat

Sleeping

App Files Files Community

ariansyahdedy commited on Dec 20, 2024

Commit

be9a762

1 Parent(s): fef76d0

RAG

Browse files

Files changed (6) hide show

app/handlers/message_handler.py +3 -2
app/handlers/webhook_handler.py +3 -2
app/main.py +2 -0
app/search/rag_pipeline.py +11 -0
app/services/message.py +18 -0
requirements.txt +0 -0

app/handlers/message_handler.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Dict, List, Optional
 from app.services.cache import MessageCache
 from app.services.chat_manager import ChatManager
@@ -26,7 +26,7 @@ class MessageHandler:
         self.media_handler = media_handler
         self.logger = logger
-    async def handle(self, raw_message: dict, whatsapp_token: str, whatsapp_url:str,gemini_api:str) -> dict:
         try:
             # Parse message
             message = MessageParser.parse(raw_message)
@@ -46,6 +46,7 @@ class MessageHandler:
                 message.sender_id,
                 message.content,
                 self.chat_manager.get_chat_history(message.sender_id),
                 whatsapp_token=whatsapp_token,
                 whatsapp_url=whatsapp_url,
                 **media_paths

+from typing import Dict, List, Optional, Any
 from app.services.cache import MessageCache
 from app.services.chat_manager import ChatManager
         self.media_handler = media_handler
         self.logger = logger
+    async def handle(self, raw_message: dict, whatsapp_token: str, whatsapp_url:str,gemini_api:str, rag_system:Any = None) -> dict:
         try:
             # Parse message
             message = MessageParser.parse(raw_message)
                 message.sender_id,
                 message.content,
                 self.chat_manager.get_chat_history(message.sender_id),
+                rag_sytem = rag_system,
                 whatsapp_token=whatsapp_token,
                 whatsapp_url=whatsapp_url,
                 **media_paths

app/handlers/webhook_handler.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # webhook_handler.py
 from dataclasses import dataclass
-from typing import List, Dict
 import time
 import logging
 from fastapi import Request, status
@@ -18,7 +18,7 @@ class WebhookHandler:
         self.message_handler = message_handler
         self.logger = logging.getLogger(__name__)
-    async def process_webhook(self, payload: dict, whatsapp_token: str, whatsapp_url:str,gemini_api:str) -> WebhookResponse:
         request_id = f"req_{int(time.time()*1000)}"
         results = []
@@ -40,6 +40,7 @@ class WebhookHandler:
                             whatsapp_token=whatsapp_token,
                             whatsapp_url=whatsapp_url,
                             gemini_api=gemini_api,
                         )
                         results.append(response)

 # webhook_handler.py
 from dataclasses import dataclass
+from typing import List, Dict, Any
 import time
 import logging
 from fastapi import Request, status
         self.message_handler = message_handler
         self.logger = logging.getLogger(__name__)
+    async def process_webhook(self, payload: dict, whatsapp_token: str, whatsapp_url:str,gemini_api:str, rag_system:Any = None) -> WebhookResponse:
         request_id = f"req_{int(time.time()*1000)}"
         results = []
                             whatsapp_token=whatsapp_token,
                             whatsapp_url=whatsapp_url,
                             gemini_api=gemini_api,
+                            rag_system=rag_system,
                         )
                         results.append(response)

app/main.py CHANGED Viewed

@@ -99,6 +99,7 @@ async def webhook(request: Request):
     try:
         payload = await request.json()
         # validated_payload = WebhookPayload(**payload)  # Validate payload
         # logger.info(f"Validated Payload: {validated_payload}")
@@ -117,6 +118,7 @@ async def webhook(request: Request):
             whatsapp_token=ACCESS_TOKEN,
             whatsapp_url=WHATSAPP_API_URL,
             gemini_api=GEMINI_API,
         )
         return JSONResponse(
             content=response.__dict__,

     try:
         payload = await request.json()
+        rag_system = request.app.state.rag_system
         # validated_payload = WebhookPayload(**payload)  # Validate payload
         # logger.info(f"Validated Payload: {validated_payload}")
             whatsapp_token=ACCESS_TOKEN,
             whatsapp_url=WHATSAPP_API_URL,
             gemini_api=GEMINI_API,
+            rag_system=rag_system,
         )
         return JSONResponse(
             content=response.__dict__,

app/search/rag_pipeline.py CHANGED Viewed

@@ -17,6 +17,17 @@ from app.utils.token_counter import TokenCounter
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # rag.py
 class RAGSystem:
     def __init__(self, embedding_model):

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+from keybert import KeyBERT
+import asyncio
+async def extract_keywords_async(doc, threshold=0.4):
+    kw_model = KeyBERT()
+    loop = asyncio.get_event_loop()
+    keywords = await loop.run_in_executor(None, kw_model.extract_keywords, doc, threshold)
+    keywords = [key for key, _ in keywords]
+    return keywords
 # rag.py
 class RAGSystem:
     def __init__(self, embedding_model):

app/services/message.py CHANGED Viewed

@@ -100,6 +100,7 @@ async def process_message_with_llm(
     sender_id: str,
     content: str,
     history: List[Dict[str, str]],
     whatsapp_token: str,
     whatsapp_url:str,
     image_file_path: Optional[str] = None,
@@ -113,6 +114,7 @@ async def process_message_with_llm(
             sender=sender_id,
             content=content,
             history=history,
             image_file_path=image_file_path,
             doc_path=doc_path,
             video_file_path=video_file_path
@@ -130,6 +132,7 @@ async def generate_response_from_gemini(
     sender: str,
     content: str,
     history: List[Dict[str, str]],
     image_file_path: Optional[str] = None,
     doc_path: Optional[str] = None,
     video_file_path: Optional[str] = None,
@@ -143,6 +146,20 @@ async def generate_response_from_gemini(
         # Start chat with history
         chat = model.start_chat(history=history)
         # Process image
         if image_file_path:
             logger.info(f"Processing image at {image_file_path}")
@@ -334,6 +351,7 @@ async def generate_response_from_chatgpt(sender: str, content: str, timestamp: s
         print("Error generating reply:", e)
         return "Sorry, I couldn't generate a response at this time."
 # async def generate_response_from_chatgpt(
 #     sender: str,
 #     content: str,

     sender_id: str,
     content: str,
     history: List[Dict[str, str]],
+    rag_system: Any,
     whatsapp_token: str,
     whatsapp_url:str,
     image_file_path: Optional[str] = None,
             sender=sender_id,
             content=content,
             history=history,
+            rag_system=rag_system,
             image_file_path=image_file_path,
             doc_path=doc_path,
             video_file_path=video_file_path
     sender: str,
     content: str,
     history: List[Dict[str, str]],
+    rag_system: Any = None,
     image_file_path: Optional[str] = None,
     doc_path: Optional[str] = None,
     video_file_path: Optional[str] = None,
         # Start chat with history
         chat = model.start_chat(history=history)
+        if rag_system:
+            keywords = await rag_system.extract_keywords_async(content)
+            # Implement RAG: Retrieve relevant documents
+            retrieved_docs = await rag_system.adv_query(content, keywords=keywords, top_k=1)
+            if retrieved_docs:
+                logger.info(f"Retrieved {len(retrieved_docs)} documents for context.")
+                # Format the retrieved documents as a context string
+                context = "\n\n".join([f"Content: {doc['text']}" for doc in retrieved_docs])
+                # Option 1: Append to history as a system message
+                history.append({"role": "system", "content": f"Relevant documents:\n{context}"})
+                # Reinitialize chat with updated history
+                chat = model.start_chat(history=history)
         # Process image
         if image_file_path:
             logger.info(f"Processing image at {image_file_path}")
         print("Error generating reply:", e)
         return "Sorry, I couldn't generate a response at this time."
 # async def generate_response_from_chatgpt(
 #     sender: str,
 #     content: str,

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ