Spaces:

SattamALMU
/

Newsarticle

Sleeping

App Files Files Community

SattamALMU commited on Aug 17

Commit

219ce27

•

1 Parent(s): 76bc54c

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -0

app.py CHANGED Viewed

@@ -1,5 +1,87 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference

 import gradio as gr
 from huggingface_hub import InferenceClient
+import os
+import openai
+import pandas as pd
+import faiss
+import pickle
+from sentence_transformers import SentenceTransformer
+embedding_model = SentenceTransformer('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
+openai.api_key = os.getenv("OPENAI_API_KEY")
+db_index = faiss.read_index("db_index.faiss")
+df = pd.read_csv('cleaned_data.csv')
+with open('metadata_info.pkl', 'rb') as file:
+    metadata_info = pickle.load(file)
+def search(query):
+    cleaned_query = query
+    query_embedding = embedding_model.encode(cleaned_query).reshape(1, -1).astype('float32')
+    D, I = db_index.search(query_embedding, k=10)
+    results = []
+    for idx in I[0]:
+        if idx < 3327:
+            doc_index = idx
+            results.append({
+                'type': 'metadata',
+                'title': df.iloc[doc_index]['title'],
+                'author': df.iloc[doc_index]['author'],
+                'publish_date': df.iloc[doc_index]['publish_date'],
+                'full_text': df.iloc[doc_index]['full_text'],
+                'source': df.iloc[doc_index]['url']
+            })
+        else:
+            chunk_index = idx - 3327
+            metadata = metadata_info[chunk_index]
+            doc_index = metadata['index']
+            chunk_text = metadata['chunk']
+            results.append({
+                'type': 'content',
+                'title': df.iloc[doc_index]['title'],
+                'author': df.iloc[doc_index]['author'],
+                'publish_date': df.iloc[doc_index]['publish_date'],
+                'content': chunk_text,
+                'source': df.iloc[doc_index]['url']
+            })
+    return results
+def generate_answer(query):
+    prompt = f"""
+    Based on the following query from a user, please generate a detailed answer based on the context
+    focusing on which is the best based on the query. You should responsd as you are a news and politician expert agent and are conversing with the
+    user in a nice cordial way. If the query question is not in the context say I don't know, and always provide the url as the source of the information.
+    Remove the special characters and (/n ) , make the output clean and concise.
+    ###########
+    query:
+    "{query}"
+    ########
+    context:"
+    "{search(query)}"
+    #####
+    Return in Markdown format with each hotel highlighted.
+    """
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": prompt}
+    ]
+    response = openai.ChatCompletion.create(
+        model="gpt-4o-mini",
+        max_tokens=1500,
+        n=1,
+        stop=None,
+        temperature=0.2, #higher temperature means more creative or more hallucination
+        messages = messages
+    )
+    # Extract the generated response from the API response
+    generated_text = response.choices[0].message['content'].strip()
+    return generated_text
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference