mitulagr2 commited on
Commit
4929aba
·
1 Parent(s): 794ae55

Add websockets

Browse files
Files changed (5) hide show
  1. Dockerfile +3 -20
  2. app/main.py +39 -6
  3. app/rag.py +5 -6
  4. requirements.txt +1 -0
  5. start_service.sh +0 -16
Dockerfile CHANGED
@@ -1,34 +1,17 @@
1
- #
2
  FROM python:3.11
3
 
4
- #
5
  WORKDIR /code
6
 
7
- #
8
  COPY ./requirements.txt /code/requirements.txt
9
 
10
- #
11
- RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
12
-
13
- #
14
- COPY ./start_service.sh /code/start_service.sh
15
-
16
- #
17
  COPY ./app /code/app
18
 
19
- RUN useradd -m docker && echo "docker:docker" | chpasswd && adduser docker sudo
20
 
21
- # RUN curl -fsSL https://ollama.com/install.sh | sh
22
 
23
  USER docker
24
 
25
- # RUN nohup ollama serve & sleep 5
26
-
27
- #
28
- # RUN chmod +x /code/start_service.sh
29
-
30
- #
31
  EXPOSE 7860
32
 
33
- # # Run .sh file
34
- ENTRYPOINT ["sh", "/code/start_service.sh"]
 
 
1
  FROM python:3.11
2
 
 
3
  WORKDIR /code
4
 
 
5
  COPY ./requirements.txt /code/requirements.txt
6
 
 
 
 
 
 
 
 
7
  COPY ./app /code/app
8
 
9
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
10
 
11
+ RUN useradd -m docker && echo "docker:docker" | chpasswd && adduser docker sudo
12
 
13
  USER docker
14
 
 
 
 
 
 
 
15
  EXPOSE 7860
16
 
17
+ CMD ["fastapi", "run", "/code/app/main.py", "--port", "7860"]
 
app/main.py CHANGED
@@ -1,10 +1,7 @@
1
  import os
2
  import shutil
3
- import tempfile
4
- from tempfile import NamedTemporaryFile
5
- from pathlib import Path
6
-
7
- from fastapi import FastAPI, UploadFile
8
  from fastapi.middleware import Middleware
9
  from fastapi.middleware.cors import CORSMiddleware
10
  from fastapi.responses import StreamingResponse
@@ -24,11 +21,47 @@ app = FastAPI(middleware=middleware)
24
  files_dir = os.path.expanduser("~/wtp_be_files/")
25
  session_assistant = ChatPDF()
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  @app.get("/query")
28
  async def process_input(text: str):
29
  if text and len(text.strip()) > 0:
30
  text = text.strip()
31
- print("PRINTING STREAM")
32
  return StreamingResponse(session_assistant.ask(text), media_type='text/event-stream')
33
 
34
 
 
1
  import os
2
  import shutil
3
+ from typing import List
4
+ from fastapi import FastAPI, UploadFile, WebSocket, WebSocketDisconnect
 
 
 
5
  from fastapi.middleware import Middleware
6
  from fastapi.middleware.cors import CORSMiddleware
7
  from fastapi.responses import StreamingResponse
 
21
  files_dir = os.path.expanduser("~/wtp_be_files/")
22
  session_assistant = ChatPDF()
23
 
24
+ class ConnectionManager:
25
+ def __init__(self):
26
+ self.active_connections: List[WebSocket] = []
27
+
28
+ async def connect(self, websocket: WebSocket):
29
+ await websocket.accept()
30
+ self.active_connections.append(websocket)
31
+
32
+ def disconnect(self, websocket: WebSocket):
33
+ self.active_connections.remove(websocket)
34
+
35
+ async def send_personal_message(self, message: str, websocket: WebSocket):
36
+ await websocket.send_text(message)
37
+
38
+ async def broadcast(self, message: str):
39
+ for connection in self.active_connections:
40
+ await connection.send_text(message)
41
+
42
+ manager = ConnectionManager()
43
+
44
+ @app.websocket("/ws/{client_id}")
45
+ async def websocket_endpoint(websocket: WebSocket, client_id: int):
46
+ await manager.connect(websocket)
47
+ now = datetime.now()
48
+ current_time = now.strftime("%H:%M")
49
+ try:
50
+ while True:
51
+ data = await websocket.receive_text()
52
+ # await manager.send_personal_message(f"You wrote: {data}", websocket)
53
+ message = {"time":current_time,"clientId":client_id,"message":data}
54
+ await manager.broadcast(json.dumps(message))
55
+
56
+ except WebSocketDisconnect:
57
+ manager.disconnect(websocket)
58
+ message = {"time":current_time,"clientId":client_id,"message":"Offline"}
59
+ await manager.broadcast(json.dumps(message))
60
+
61
  @app.get("/query")
62
  async def process_input(text: str):
63
  if text and len(text.strip()) > 0:
64
  text = text.strip()
 
65
  return StreamingResponse(session_assistant.ask(text), media_type='text/event-stream')
66
 
67
 
app/rag.py CHANGED
@@ -22,8 +22,6 @@ logging.basicConfig(level=logging.INFO)
22
  logger = logging.getLogger(__name__)
23
 
24
  class ChatPDF:
25
- query_engine = None
26
-
27
  def __init__(self):
28
  self.text_parser = SentenceSplitter(chunk_size=512, chunk_overlap=24)
29
 
@@ -45,7 +43,7 @@ class ChatPDF:
45
  model_url="https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q4_k_m.gguf",
46
  temperature=0.1,
47
  max_new_tokens=256,
48
- context_window=3900,
49
  # generate_kwargs={},
50
  # model_kwargs={"n_gpu_layers": -1},
51
  # messages_to_prompt=self.messages_to_prompt,
@@ -108,10 +106,11 @@ class ChatPDF:
108
  logger.info("retrieving the response to the query")
109
  streaming_response = self.query_engine.query(query)
110
  print(streaming_response)
111
- # return streaming_response.response_gen
112
- for text in streaming_response.response_gen:
 
113
  print(text)
114
  yield text
115
 
116
  def clear(self):
117
- self.query_engine = None
 
22
  logger = logging.getLogger(__name__)
23
 
24
  class ChatPDF:
 
 
25
  def __init__(self):
26
  self.text_parser = SentenceSplitter(chunk_size=512, chunk_overlap=24)
27
 
 
43
  model_url="https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q4_k_m.gguf",
44
  temperature=0.1,
45
  max_new_tokens=256,
46
+ context_window=3900, #32k
47
  # generate_kwargs={},
48
  # model_kwargs={"n_gpu_layers": -1},
49
  # messages_to_prompt=self.messages_to_prompt,
 
106
  logger.info("retrieving the response to the query")
107
  streaming_response = self.query_engine.query(query)
108
  print(streaming_response)
109
+ print("PRINTING STREAM")
110
+ generator = streaming_response.response_gen
111
+ for text in generator:
112
  print(text)
113
  yield text
114
 
115
  def clear(self):
116
+ pass
requirements.txt CHANGED
@@ -5,4 +5,5 @@ qdrant-client
5
  python-dotenv
6
  llama-index-llms-llama-cpp
7
  llama-index-embeddings-fastembed
 
8
  fastembed==0.2.7
 
5
  python-dotenv
6
  llama-index-llms-llama-cpp
7
  llama-index-embeddings-fastembed
8
+ websockets
9
  fastembed==0.2.7
start_service.sh DELETED
@@ -1,16 +0,0 @@
1
- #!/bin/sh
2
-
3
- # # Start Ollama in the background
4
- # ollama serve &
5
-
6
- # # Wait for Ollama to start
7
- # sleep 5
8
-
9
- # #
10
- # ollama pull mxbai-embed-large
11
-
12
- # # Pull and run <YOUR_MODEL_NAME>
13
- # ollama pull qwen:1.8b
14
-
15
- #
16
- fastapi run /code/app/main.py --port 7860