Files changed (1) hide show
  1. app.py +95 -56
app.py CHANGED
@@ -1,63 +1,102 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
  ],
59
  )
60
 
61
-
62
  if __name__ == "__main__":
63
  demo.launch()
 
1
  import gradio as gr
2
+ import os
3
+ from upstash_vector import Index
4
+ from openai import OpenAI
5
+ import dotenv
6
+
7
+ # Load environment variables
8
+ dotenv.load_dotenv()
9
+
10
+ # Services (keep the same as in your original code)
11
+ class ConfigService:
12
+ @staticmethod
13
+ def load_config():
14
+ return {
15
+ "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY"),
16
+ "UPSTASH_VECTOR_REST_URL": os.getenv("UPSTASH_VECTOR_REST_URL"),
17
+ "UPSTASH_VECTOR_REST_TOKEN": os.getenv("UPSTASH_VECTOR_REST_TOKEN"),
18
+ }
19
+
20
+ class ClientService:
21
+ @staticmethod
22
+ def initialize_clients(config):
23
+ return {
24
+ "index": Index(url=config["UPSTASH_VECTOR_REST_URL"], token=config["UPSTASH_VECTOR_REST_TOKEN"]),
25
+ "openai": OpenAI(api_key=config["OPENAI_API_KEY"]),
26
+ }
27
+
28
+ class EmbeddingService:
29
+ @staticmethod
30
+ def get_embedding(content, client):
31
+ response = client.embeddings.create(
32
+ input=content,
33
+ model="text-embedding-3-large"
34
+ )
35
+ return response.data[0].embedding
36
+
37
+ class VectorSearchService:
38
+ @staticmethod
39
+ def search(index, vector, top_k=2):
40
+ return index.query(vector=vector, top_k=top_k, include_metadata=True)
41
+
42
+ class PromptService:
43
+ @staticmethod
44
+ def load_system_prompt(file_path):
45
+ with open(file_path, 'r') as file:
46
+ return file.read().strip()
47
+
48
+ @staticmethod
49
+ def create_chat_prompt(question, context):
50
+ return f"Question: {question}\n\nContext: {context}"
51
+
52
+ class ChatService:
53
+ def __init__(self, clients, system_prompt):
54
+ self.clients = clients
55
+ self.messages = [{"role": "system", "content": system_prompt}]
56
+
57
+ def ask_question(self, question):
58
+ question_embedding = EmbeddingService.get_embedding(question, self.clients["openai"])
59
+ search_results = VectorSearchService.search(self.clients["index"], question_embedding)
60
+
61
+ context = []
62
+ for r in search_results:
63
+ if r.score > 0.7:
64
+ context.append(r.metadata['content'])
65
+
66
+ context = "\n".join(context)
67
+ final_prompt = PromptService.create_chat_prompt(question, context)
68
+ self.messages.append({"role": "user", "content": final_prompt})
69
+
70
+ chat_completion = self.clients["openai"].chat.completions.create(
71
+ model="gpt-4o-mini",
72
+ messages=self.messages
73
+ )
74
+ response_text = chat_completion.choices[0].message.content
75
+ self.messages.append({"role": "assistant", "content": response_text})
76
+ return response_text
77
+
78
+ # Initialize services
79
+ config = ConfigService.load_config()
80
+ clients = ClientService.initialize_clients(config)
81
+ system_prompt = PromptService.load_system_prompt("prompts/sys.md")
82
+ chat_service = ChatService(clients, system_prompt)
83
+
84
+ # Gradio interface
85
+ def chatbot(message, history):
86
+ response = chat_service.ask_question(message)
87
+ return response
88
+
89
  demo = gr.ChatInterface(
90
+ fn=chatbot,
91
+ title="Vector Database Cloud Chatbot",
92
+ description="Ask questions about Vector Database Cloud",
93
+ theme="default",
94
+ examples=[
95
+ "What is Vector Database Cloud?",
96
+ "How does one-click deployment work?",
97
+ "What vector databases are supported?",
 
 
 
 
98
  ],
99
  )
100
 
 
101
  if __name__ == "__main__":
102
  demo.launch()