SattamALMU commited on
Commit
219ce27
1 Parent(s): 76bc54c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -0
app.py CHANGED
@@ -1,5 +1,87 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ import os
4
+ import openai
5
+ import pandas as pd
6
+ import faiss
7
+ import pickle
8
+ from sentence_transformers import SentenceTransformer
9
+ embedding_model = SentenceTransformer('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
10
+ openai.api_key = os.getenv("OPENAI_API_KEY")
11
+ db_index = faiss.read_index("db_index.faiss")
12
+ df = pd.read_csv('cleaned_data.csv')
13
+ with open('metadata_info.pkl', 'rb') as file:
14
+ metadata_info = pickle.load(file)
15
+
16
+
17
+ def search(query):
18
+ cleaned_query = query
19
+ query_embedding = embedding_model.encode(cleaned_query).reshape(1, -1).astype('float32')
20
+ D, I = db_index.search(query_embedding, k=10)
21
+ results = []
22
+ for idx in I[0]:
23
+ if idx < 3327:
24
+ doc_index = idx
25
+ results.append({
26
+ 'type': 'metadata',
27
+ 'title': df.iloc[doc_index]['title'],
28
+ 'author': df.iloc[doc_index]['author'],
29
+ 'publish_date': df.iloc[doc_index]['publish_date'],
30
+ 'full_text': df.iloc[doc_index]['full_text'],
31
+ 'source': df.iloc[doc_index]['url']
32
+ })
33
+ else:
34
+ chunk_index = idx - 3327
35
+ metadata = metadata_info[chunk_index]
36
+ doc_index = metadata['index']
37
+ chunk_text = metadata['chunk']
38
+ results.append({
39
+ 'type': 'content',
40
+ 'title': df.iloc[doc_index]['title'],
41
+ 'author': df.iloc[doc_index]['author'],
42
+ 'publish_date': df.iloc[doc_index]['publish_date'],
43
+ 'content': chunk_text,
44
+ 'source': df.iloc[doc_index]['url']
45
+ })
46
+
47
+ return results
48
+
49
+
50
+ def generate_answer(query):
51
+ prompt = f"""
52
+ Based on the following query from a user, please generate a detailed answer based on the context
53
+ focusing on which is the best based on the query. You should responsd as you are a news and politician expert agent and are conversing with the
54
+ user in a nice cordial way. If the query question is not in the context say I don't know, and always provide the url as the source of the information.
55
+ Remove the special characters and (/n ) , make the output clean and concise.
56
+ ###########
57
+ query:
58
+ "{query}"
59
+ ########
60
+ context:"
61
+ "{search(query)}"
62
+ #####
63
+ Return in Markdown format with each hotel highlighted.
64
+ """
65
+
66
+ messages = [
67
+ {"role": "system", "content": "You are a helpful assistant."},
68
+ {"role": "user", "content": prompt}
69
+ ]
70
+ response = openai.ChatCompletion.create(
71
+ model="gpt-4o-mini",
72
+ max_tokens=1500,
73
+ n=1,
74
+ stop=None,
75
+ temperature=0.2, #higher temperature means more creative or more hallucination
76
+ messages = messages
77
+
78
+
79
+ )
80
+
81
+ # Extract the generated response from the API response
82
+ generated_text = response.choices[0].message['content'].strip()
83
+
84
+ return generated_text
85
 
86
  """
87
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference