ariansyahdedy commited on
Commit
be9a762
·
1 Parent(s): fef76d0
app/handlers/message_handler.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Dict, List, Optional
2
 
3
  from app.services.cache import MessageCache
4
  from app.services.chat_manager import ChatManager
@@ -26,7 +26,7 @@ class MessageHandler:
26
  self.media_handler = media_handler
27
  self.logger = logger
28
 
29
- async def handle(self, raw_message: dict, whatsapp_token: str, whatsapp_url:str,gemini_api:str) -> dict:
30
  try:
31
  # Parse message
32
  message = MessageParser.parse(raw_message)
@@ -46,6 +46,7 @@ class MessageHandler:
46
  message.sender_id,
47
  message.content,
48
  self.chat_manager.get_chat_history(message.sender_id),
 
49
  whatsapp_token=whatsapp_token,
50
  whatsapp_url=whatsapp_url,
51
  **media_paths
 
1
+ from typing import Dict, List, Optional, Any
2
 
3
  from app.services.cache import MessageCache
4
  from app.services.chat_manager import ChatManager
 
26
  self.media_handler = media_handler
27
  self.logger = logger
28
 
29
+ async def handle(self, raw_message: dict, whatsapp_token: str, whatsapp_url:str,gemini_api:str, rag_system:Any = None) -> dict:
30
  try:
31
  # Parse message
32
  message = MessageParser.parse(raw_message)
 
46
  message.sender_id,
47
  message.content,
48
  self.chat_manager.get_chat_history(message.sender_id),
49
+ rag_sytem = rag_system,
50
  whatsapp_token=whatsapp_token,
51
  whatsapp_url=whatsapp_url,
52
  **media_paths
app/handlers/webhook_handler.py CHANGED
@@ -1,6 +1,6 @@
1
  # webhook_handler.py
2
  from dataclasses import dataclass
3
- from typing import List, Dict
4
  import time
5
  import logging
6
  from fastapi import Request, status
@@ -18,7 +18,7 @@ class WebhookHandler:
18
  self.message_handler = message_handler
19
  self.logger = logging.getLogger(__name__)
20
 
21
- async def process_webhook(self, payload: dict, whatsapp_token: str, whatsapp_url:str,gemini_api:str) -> WebhookResponse:
22
  request_id = f"req_{int(time.time()*1000)}"
23
  results = []
24
 
@@ -40,6 +40,7 @@ class WebhookHandler:
40
  whatsapp_token=whatsapp_token,
41
  whatsapp_url=whatsapp_url,
42
  gemini_api=gemini_api,
 
43
  )
44
  results.append(response)
45
 
 
1
  # webhook_handler.py
2
  from dataclasses import dataclass
3
+ from typing import List, Dict, Any
4
  import time
5
  import logging
6
  from fastapi import Request, status
 
18
  self.message_handler = message_handler
19
  self.logger = logging.getLogger(__name__)
20
 
21
+ async def process_webhook(self, payload: dict, whatsapp_token: str, whatsapp_url:str,gemini_api:str, rag_system:Any = None) -> WebhookResponse:
22
  request_id = f"req_{int(time.time()*1000)}"
23
  results = []
24
 
 
40
  whatsapp_token=whatsapp_token,
41
  whatsapp_url=whatsapp_url,
42
  gemini_api=gemini_api,
43
+ rag_system=rag_system,
44
  )
45
  results.append(response)
46
 
app/main.py CHANGED
@@ -99,6 +99,7 @@ async def webhook(request: Request):
99
  try:
100
  payload = await request.json()
101
 
 
102
  # validated_payload = WebhookPayload(**payload) # Validate payload
103
  # logger.info(f"Validated Payload: {validated_payload}")
104
 
@@ -117,6 +118,7 @@ async def webhook(request: Request):
117
  whatsapp_token=ACCESS_TOKEN,
118
  whatsapp_url=WHATSAPP_API_URL,
119
  gemini_api=GEMINI_API,
 
120
  )
121
  return JSONResponse(
122
  content=response.__dict__,
 
99
  try:
100
  payload = await request.json()
101
 
102
+ rag_system = request.app.state.rag_system
103
  # validated_payload = WebhookPayload(**payload) # Validate payload
104
  # logger.info(f"Validated Payload: {validated_payload}")
105
 
 
118
  whatsapp_token=ACCESS_TOKEN,
119
  whatsapp_url=WHATSAPP_API_URL,
120
  gemini_api=GEMINI_API,
121
+ rag_system=rag_system,
122
  )
123
  return JSONResponse(
124
  content=response.__dict__,
app/search/rag_pipeline.py CHANGED
@@ -17,6 +17,17 @@ from app.utils.token_counter import TokenCounter
17
  logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
19
 
 
 
 
 
 
 
 
 
 
 
 
20
  # rag.py
21
  class RAGSystem:
22
  def __init__(self, embedding_model):
 
17
  logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
19
 
20
+
21
+ from keybert import KeyBERT
22
+ import asyncio
23
+
24
+ async def extract_keywords_async(doc, threshold=0.4):
25
+ kw_model = KeyBERT()
26
+ loop = asyncio.get_event_loop()
27
+ keywords = await loop.run_in_executor(None, kw_model.extract_keywords, doc, threshold)
28
+ keywords = [key for key, _ in keywords]
29
+ return keywords
30
+
31
  # rag.py
32
  class RAGSystem:
33
  def __init__(self, embedding_model):
app/services/message.py CHANGED
@@ -100,6 +100,7 @@ async def process_message_with_llm(
100
  sender_id: str,
101
  content: str,
102
  history: List[Dict[str, str]],
 
103
  whatsapp_token: str,
104
  whatsapp_url:str,
105
  image_file_path: Optional[str] = None,
@@ -113,6 +114,7 @@ async def process_message_with_llm(
113
  sender=sender_id,
114
  content=content,
115
  history=history,
 
116
  image_file_path=image_file_path,
117
  doc_path=doc_path,
118
  video_file_path=video_file_path
@@ -130,6 +132,7 @@ async def generate_response_from_gemini(
130
  sender: str,
131
  content: str,
132
  history: List[Dict[str, str]],
 
133
  image_file_path: Optional[str] = None,
134
  doc_path: Optional[str] = None,
135
  video_file_path: Optional[str] = None,
@@ -143,6 +146,20 @@ async def generate_response_from_gemini(
143
  # Start chat with history
144
  chat = model.start_chat(history=history)
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  # Process image
147
  if image_file_path:
148
  logger.info(f"Processing image at {image_file_path}")
@@ -334,6 +351,7 @@ async def generate_response_from_chatgpt(sender: str, content: str, timestamp: s
334
  print("Error generating reply:", e)
335
  return "Sorry, I couldn't generate a response at this time."
336
 
 
337
  # async def generate_response_from_chatgpt(
338
  # sender: str,
339
  # content: str,
 
100
  sender_id: str,
101
  content: str,
102
  history: List[Dict[str, str]],
103
+ rag_system: Any,
104
  whatsapp_token: str,
105
  whatsapp_url:str,
106
  image_file_path: Optional[str] = None,
 
114
  sender=sender_id,
115
  content=content,
116
  history=history,
117
+ rag_system=rag_system,
118
  image_file_path=image_file_path,
119
  doc_path=doc_path,
120
  video_file_path=video_file_path
 
132
  sender: str,
133
  content: str,
134
  history: List[Dict[str, str]],
135
+ rag_system: Any = None,
136
  image_file_path: Optional[str] = None,
137
  doc_path: Optional[str] = None,
138
  video_file_path: Optional[str] = None,
 
146
  # Start chat with history
147
  chat = model.start_chat(history=history)
148
 
149
+ if rag_system:
150
+ keywords = await rag_system.extract_keywords_async(content)
151
+ # Implement RAG: Retrieve relevant documents
152
+ retrieved_docs = await rag_system.adv_query(content, keywords=keywords, top_k=1)
153
+ if retrieved_docs:
154
+ logger.info(f"Retrieved {len(retrieved_docs)} documents for context.")
155
+ # Format the retrieved documents as a context string
156
+ context = "\n\n".join([f"Content: {doc['text']}" for doc in retrieved_docs])
157
+ # Option 1: Append to history as a system message
158
+ history.append({"role": "system", "content": f"Relevant documents:\n{context}"})
159
+
160
+ # Reinitialize chat with updated history
161
+ chat = model.start_chat(history=history)
162
+
163
  # Process image
164
  if image_file_path:
165
  logger.info(f"Processing image at {image_file_path}")
 
351
  print("Error generating reply:", e)
352
  return "Sorry, I couldn't generate a response at this time."
353
 
354
+
355
  # async def generate_response_from_chatgpt(
356
  # sender: str,
357
  # content: str,
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ