Spaces:

ccm
/

chat-with-idetc

Runtime error

App Files Files Community

ccm commited on Aug 7, 2024

Commit

41ef5eb

verified ·

1 Parent(s): 0400fe2

Update main.py

Browse files

Files changed (1) hide show

main.py +37 -22

main.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import json  # to work with JSON
 import threading  # to allow streaming response
 import time  # to pave the deliver of the message
@@ -6,46 +5,50 @@ import datasets  # for loading RAG database
 import faiss  # to create a search index
 import gradio  # for the interface
 import numpy  # to work with vectors
-import pandas  # to work with pandas
 import sentence_transformers  # to load an embedding model
 import spaces  # for GPU
 import transformers  # to load an LLM
-# Constants
 GREETING = (
     "Howdy! I'm an AI agent that uses [retrieval-augmented generation](https://en.wikipedia.org/wiki/Retrieval-augmented_generation) "
     "to answer questions about research published at [ASME IDETC](https://asmedigitalcollection.asme.org/IDETC-CIE) within the last 10 years or so. "
     "I always try to cite my sources, but sometimes things get a little weird. "
     "What can I tell you about today?"
 )
 EXAMPLE_QUERIES = [
     "What's the difference between a markov chain and a hidden markov model?",
     "What can you tell me about analytical target cascading?",
     "What is known about different modes for human-AI teaming?",
 ]
 EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
 LLM_MODEL_NAME = "Qwen/Qwen2-7B-Instruct"
 # Load the dataset and convert to pandas
 data = datasets.load_dataset("ccm/rag-idetc")["train"].to_pandas()
 # Load the model for later use in embeddings
-model = sentence_transformers.SentenceTransformer(EMBEDDING_MODEL_NAME)
 # Create an LLM pipeline that we can send queries to
 tokenizer = transformers.AutoTokenizer.from_pretrained(LLM_MODEL_NAME)
 streamer = transformers.TextIteratorStreamer(
     tokenizer, skip_prompt=True, skip_special_tokens=True
 )
-chatmodel = transformers.AutoModelForCausalLM.from_pretrained(
     LLM_MODEL_NAME, torch_dtype="auto", device_map="auto"
 )
 # Create a FAISS index for fast similarity search
-metric = faiss.METRIC_INNER_PRODUCT
-vectors = numpy.stack(data["embedding"].tolist(), axis=0).astype('float32')
 index = faiss.IndexFlatL2(len(data["embedding"][0]))
-index.metric_type = metric
 faiss.normalize_L2(vectors)
 index.train(vectors)
 index.add(vectors)
@@ -60,7 +63,7 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
     Returns:
         tuple[str, str]: A tuple containing the prompt and references
     """
-    encoded_query = numpy.expand_dims(model.encode(query), axis=0)
     faiss.normalize_L2(encoded_query)
     D, I = index.search(encoded_query, k)
     top_five = data.loc[I[0]]
@@ -68,16 +71,16 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
     print(top_five["text"].values)
     prompt = (
-        "You are an AI assistant who delights in helping people learn about research from the IDETC Conference."
         "Your main task is to provide an ANSWER to the USER_QUERY based on the RESEARCH_EXCERPTS."
         "Your ANSWER should be concise.\n\n"
-        "RESEARCH_EXCERPTS:\n{{ABSTRACTS_GO_HERE}}\n\n"
         "USER_GUERY:\n{{QUERY_GOES_HERE}}\n\n"
         "ANSWER:\n"
     )
     references = {}
-    research_abstracts = ""
     for i in range(k):
         title = top_five["title"].values[i]
@@ -86,21 +89,32 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
         path = top_five["path"].values[i]
         text = top_five["text"].values[i]
-        research_abstracts += str(i + i) + ". This excerpt is from: '" + title + "':\n" + text + "\n"
         header = "[" + title.title() + "](" + url + ")\n"
         if header not in references.keys():
             references[header] = []
         references[header].append(text)
-    prompt = prompt.replace("{{ABSTRACTS_GO_HERE}}", research_abstracts)
     prompt = prompt.replace("{{QUERY_GOES_HERE}}", query)
     print(references)
-    return prompt, "\n\n### References\n\n"+"\n".join([str(i+1)+". " + ref + "\n  - ".join(["", *["\"..." + x + "...\"" for x in references[ref]]]) for i, ref in enumerate(references.keys())])
 def postprocess(response: str, bypass_from_preprocessing: str) -> str:
     """
     Applies a postprocessing step to the LLM's response before the user receives it
@@ -142,7 +156,7 @@ def reply(message: str, history: list[str]) -> str:
     model_inputs = tokenizer([text], return_tensors="pt").to("cuda:0")
     generate_kwargs = dict(model_inputs, streamer=streamer, max_new_tokens=512)
-    t = threading.Thread(target=chatmodel.generate, kwargs=generate_kwargs)
     t.start()
     partial_message = ""
@@ -160,7 +174,10 @@ gradio.ChatInterface(
     reply,
     examples=EXAMPLE_QUERIES,
     chatbot=gradio.Chatbot(
-        avatar_images=[None, "https://event.asme.org/Events/media/library/images/IDETC-CIE/IDETC-Logo-Announcements.png?ext=.png"],
         show_label=False,
         show_share_button=False,
         show_copy_button=False,
@@ -172,5 +189,3 @@ gradio.ChatInterface(
     undo_btn=None,
     clear_btn=None,
 ).launch(debug=True)

 import threading  # to allow streaming response
 import time  # to pave the deliver of the message
 import faiss  # to create a search index
 import gradio  # for the interface
 import numpy  # to work with vectors
 import sentence_transformers  # to load an embedding model
 import spaces  # for GPU
 import transformers  # to load an LLM
+# The greeting supplied by the agent when it starts
 GREETING = (
     "Howdy! I'm an AI agent that uses [retrieval-augmented generation](https://en.wikipedia.org/wiki/Retrieval-augmented_generation) "
     "to answer questions about research published at [ASME IDETC](https://asmedigitalcollection.asme.org/IDETC-CIE) within the last 10 years or so. "
     "I always try to cite my sources, but sometimes things get a little weird. "
     "What can I tell you about today?"
 )
+# Example queries supplied in the interface
 EXAMPLE_QUERIES = [
     "What's the difference between a markov chain and a hidden markov model?",
     "What can you tell me about analytical target cascading?",
     "What is known about different modes for human-AI teaming?",
 ]
+# The embedding model used
 EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
+# The conversational model used
 LLM_MODEL_NAME = "Qwen/Qwen2-7B-Instruct"
 # Load the dataset and convert to pandas
 data = datasets.load_dataset("ccm/rag-idetc")["train"].to_pandas()
 # Load the model for later use in embeddings
+embedding_model = sentence_transformers.SentenceTransformer(EMBEDDING_MODEL_NAME)
 # Create an LLM pipeline that we can send queries to
 tokenizer = transformers.AutoTokenizer.from_pretrained(LLM_MODEL_NAME)
 streamer = transformers.TextIteratorStreamer(
     tokenizer, skip_prompt=True, skip_special_tokens=True
 )
+chat_model = transformers.AutoModelForCausalLM.from_pretrained(
     LLM_MODEL_NAME, torch_dtype="auto", device_map="auto"
 )
 # Create a FAISS index for fast similarity search
+vectors = numpy.stack(data["embedding"].tolist(), axis=0).astype("float32")
 index = faiss.IndexFlatL2(len(data["embedding"][0]))
+index.metric_type = faiss.METRIC_INNER_PRODUCT
 faiss.normalize_L2(vectors)
 index.train(vectors)
 index.add(vectors)
     Returns:
         tuple[str, str]: A tuple containing the prompt and references
     """
+    encoded_query = numpy.expand_dims(embedding_model.encode(query), axis=0)
     faiss.normalize_L2(encoded_query)
     D, I = index.search(encoded_query, k)
     top_five = data.loc[I[0]]
     print(top_five["text"].values)
     prompt = (
+        "You are an AI assistant who delights in helping people learn about research from the IDETC Conference."
         "Your main task is to provide an ANSWER to the USER_QUERY based on the RESEARCH_EXCERPTS."
         "Your ANSWER should be concise.\n\n"
+        "RESEARCH_EXCERPTS:\n{{EXCERPTS_GO_HERE}}\n\n"
         "USER_GUERY:\n{{QUERY_GOES_HERE}}\n\n"
         "ANSWER:\n"
     )
     references = {}
+    research_excerpts = ""
     for i in range(k):
         title = top_five["title"].values[i]
         path = top_five["path"].values[i]
         text = top_five["text"].values[i]
+        research_excerpts += (
+            str(i + i) + ". This excerpt is from: '" + title + "':\n" + text + "\n"
+        )
         header = "[" + title.title() + "](" + url + ")\n"
         if header not in references.keys():
             references[header] = []
         references[header].append(text)
+    prompt = prompt.replace("{{EXCERPTS_GO_HERE}}", research_excerpts)
     prompt = prompt.replace("{{QUERY_GOES_HERE}}", query)
     print(references)
+    return prompt, "\n\n### References\n\n" + "\n".join(
+        [
+            str(i + 1)
+            + ". "
+            + ref
+            + "\n  - ".join(["", *['"...' + x + '..."' for x in references[ref]]])
+            for i, ref in enumerate(references.keys())
+        ]
+    )
 def postprocess(response: str, bypass_from_preprocessing: str) -> str:
     """
     Applies a postprocessing step to the LLM's response before the user receives it
     model_inputs = tokenizer([text], return_tensors="pt").to("cuda:0")
     generate_kwargs = dict(model_inputs, streamer=streamer, max_new_tokens=512)
+    t = threading.Thread(target=chat_model.generate, kwargs=generate_kwargs)
     t.start()
     partial_message = ""
     reply,
     examples=EXAMPLE_QUERIES,
     chatbot=gradio.Chatbot(
+        avatar_images=(
+            None,
+            "https://event.asme.org/Events/media/library/images/IDETC-CIE/IDETC-Logo-Announcements.png?ext=.png",
+        ),
         show_label=False,
         show_share_button=False,
         show_copy_button=False,
     undo_btn=None,
     clear_btn=None,
 ).launch(debug=True)