Spaces:

kiyer
/

pathfinder

Running on CPU Upgrade

App Files Files Community

kartheikiyer commited on 27 days ago

Commit

c0025fa

1 Parent(s): 2f5de34

more df index fixes

Browse files

Files changed (2) hide show

.ipynb_checkpoints/app_gradio-checkpoint.py +14 -1
app_gradio.py +14 -1

.ipynb_checkpoints/app_gradio-checkpoint.py CHANGED Viewed

@@ -533,12 +533,16 @@ def compileinfo(query, atom_qns, atom_qn_ans, atom_qn_strs):
 def deep_research(question, top_k, ec):
     full_answer = '## ' + question
     gen_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
     messages = [("system",df_atomic_prompt,),("human", question),]
     rscope_text = gen_client.invoke(messages).content
     full_answer = full_answer +' \n'+ rscope_text
     rscope_messages = [("system","""In the given text, what are the main atomic questions being asked? Please answer as a concise list.""",),("human", rscope_text),]
     rscope_qns = gen_client.invoke(rscope_messages).content
@@ -563,11 +567,15 @@ def deep_research(question, top_k, ec):
         atom_qn_strs.append(linkstr)
         full_answer = full_answer +' \n### '+atom_qns[i]
         full_answer = full_answer +' \n'+smallans
     finalans, finallinks = compileinfo(question, atom_qns, atom_qn_ans, atom_qn_strs)
     full_answer = full_answer +' \n'+'### Summary:\n'+finalans
     full_df = pd.concat(atom_qn_dfs, ignore_index=True)
     rag_answer = {}
     rag_answer['answer'] = full_answer
@@ -602,26 +610,30 @@ def run_pathfinder(query, top_k, extra_keywords, toggles, prompt_type, rag_type,
             ec.rerank = True
         if prompt_type == "Deep Research (BETA)":
-            gr.Info("Starting deep research, please go grab some coffee - this takes a few mins.")
             formatted_df, rag_answer = deep_research(query, top_k = top_k, ec=ec)
             yield formatted_df, rag_answer['answer'], None, None, None
         else:
             # progress(0.2, desc=search_text_list[np.random.choice(len(search_text_list))])
             rs, small_df = ec.retrieve(query, top_k = top_k, return_scores=True)
             formatted_df = ec.return_formatted_df(rs, small_df)
             yield formatted_df, None, None, None, None
             # progress(0.4, desc=gen_text_list[np.random.choice(len(gen_text_list))])
             rag_answer = run_rag_qa(query, formatted_df, prompt_type)
             yield formatted_df, rag_answer['answer'], None, None, None
         # progress(0.6, desc="Generating consensus")
         consensus_answer = evaluate_overall_consensus(query, [formatted_df['abstract'][i+1] for i in range(len(formatted_df))])
         consensus = '## Consensus \n'+consensus_answer.consensus + '\n\n'+consensus_answer.explanation + '\n\n > Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score
         yield formatted_df, rag_answer['answer'], consensus, None, None
         # progress(0.8, desc="Analyzing question type")
         question_type_gen = guess_question_type(query)
         if '<categorization>' in question_type_gen:
             question_type_gen = question_type_gen.split('<categorization>')[1]
@@ -632,6 +644,7 @@ def run_pathfinder(query, top_k, extra_keywords, toggles, prompt_type, rag_type,
         yield formatted_df, rag_answer['answer'], consensus, qn_type, None
         # progress(1.0, desc="Visualizing embeddings")
         fig = make_embedding_plot(formatted_df, top_k, consensus_answer)
         yield formatted_df, rag_answer['answer'], consensus, qn_type, fig

 def deep_research(question, top_k, ec):
     full_answer = '## ' + question
+    yield None, None
     gen_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
     messages = [("system",df_atomic_prompt,),("human", question),]
     rscope_text = gen_client.invoke(messages).content
     full_answer = full_answer +' \n'+ rscope_text
+    rag_answer = {}
+    rag_answer['answer'] = full_answer
+    yield None, rag_answer
     rscope_messages = [("system","""In the given text, what are the main atomic questions being asked? Please answer as a concise list.""",),("human", rscope_text),]
     rscope_qns = gen_client.invoke(rscope_messages).content
         atom_qn_strs.append(linkstr)
         full_answer = full_answer +' \n### '+atom_qns[i]
         full_answer = full_answer +' \n'+smallans
+        rag_answer = {}
+        rag_answer['answer'] = full_answer
+        yield None, rag_answer
     finalans, finallinks = compileinfo(question, atom_qns, atom_qn_ans, atom_qn_strs)
     full_answer = full_answer +' \n'+'### Summary:\n'+finalans
     full_df = pd.concat(atom_qn_dfs, ignore_index=True)
+    full_df.index = full_df.index + 1
     rag_answer = {}
     rag_answer['answer'] = full_answer
             ec.rerank = True
         if prompt_type == "Deep Research (BETA)":
+            gr.Info("Starting deep research - this takes a few mins, so grab a drink or stretch your legs.")
             formatted_df, rag_answer = deep_research(query, top_k = top_k, ec=ec)
             yield formatted_df, rag_answer['answer'], None, None, None
         else:
             # progress(0.2, desc=search_text_list[np.random.choice(len(search_text_list))])
+            gr.Info(search_text_list[np.random.choice(len(search_text_list))])
             rs, small_df = ec.retrieve(query, top_k = top_k, return_scores=True)
             formatted_df = ec.return_formatted_df(rs, small_df)
             yield formatted_df, None, None, None, None
             # progress(0.4, desc=gen_text_list[np.random.choice(len(gen_text_list))])
+            gr.Info(gen_text_list[np.random.choice(len(gen_text_list))])
             rag_answer = run_rag_qa(query, formatted_df, prompt_type)
             yield formatted_df, rag_answer['answer'], None, None, None
         # progress(0.6, desc="Generating consensus")
+        gr.Info("Generating consensus")
         consensus_answer = evaluate_overall_consensus(query, [formatted_df['abstract'][i+1] for i in range(len(formatted_df))])
         consensus = '## Consensus \n'+consensus_answer.consensus + '\n\n'+consensus_answer.explanation + '\n\n > Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score
         yield formatted_df, rag_answer['answer'], consensus, None, None
         # progress(0.8, desc="Analyzing question type")
+        gr.Info("Analyzing question type")
         question_type_gen = guess_question_type(query)
         if '<categorization>' in question_type_gen:
             question_type_gen = question_type_gen.split('<categorization>')[1]
         yield formatted_df, rag_answer['answer'], consensus, qn_type, None
         # progress(1.0, desc="Visualizing embeddings")
+        gr.Info("Visualizing embeddings")
         fig = make_embedding_plot(formatted_df, top_k, consensus_answer)
         yield formatted_df, rag_answer['answer'], consensus, qn_type, fig

app_gradio.py CHANGED Viewed

@@ -533,12 +533,16 @@ def compileinfo(query, atom_qns, atom_qn_ans, atom_qn_strs):
 def deep_research(question, top_k, ec):
     full_answer = '## ' + question
     gen_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
     messages = [("system",df_atomic_prompt,),("human", question),]
     rscope_text = gen_client.invoke(messages).content
     full_answer = full_answer +' \n'+ rscope_text
     rscope_messages = [("system","""In the given text, what are the main atomic questions being asked? Please answer as a concise list.""",),("human", rscope_text),]
     rscope_qns = gen_client.invoke(rscope_messages).content
@@ -563,11 +567,15 @@ def deep_research(question, top_k, ec):
         atom_qn_strs.append(linkstr)
         full_answer = full_answer +' \n### '+atom_qns[i]
         full_answer = full_answer +' \n'+smallans
     finalans, finallinks = compileinfo(question, atom_qns, atom_qn_ans, atom_qn_strs)
     full_answer = full_answer +' \n'+'### Summary:\n'+finalans
     full_df = pd.concat(atom_qn_dfs, ignore_index=True)
     rag_answer = {}
     rag_answer['answer'] = full_answer
@@ -602,26 +610,30 @@ def run_pathfinder(query, top_k, extra_keywords, toggles, prompt_type, rag_type,
             ec.rerank = True
         if prompt_type == "Deep Research (BETA)":
-            gr.Info("Starting deep research, please go grab some coffee - this takes a few mins.")
             formatted_df, rag_answer = deep_research(query, top_k = top_k, ec=ec)
             yield formatted_df, rag_answer['answer'], None, None, None
         else:
             # progress(0.2, desc=search_text_list[np.random.choice(len(search_text_list))])
             rs, small_df = ec.retrieve(query, top_k = top_k, return_scores=True)
             formatted_df = ec.return_formatted_df(rs, small_df)
             yield formatted_df, None, None, None, None
             # progress(0.4, desc=gen_text_list[np.random.choice(len(gen_text_list))])
             rag_answer = run_rag_qa(query, formatted_df, prompt_type)
             yield formatted_df, rag_answer['answer'], None, None, None
         # progress(0.6, desc="Generating consensus")
         consensus_answer = evaluate_overall_consensus(query, [formatted_df['abstract'][i+1] for i in range(len(formatted_df))])
         consensus = '## Consensus \n'+consensus_answer.consensus + '\n\n'+consensus_answer.explanation + '\n\n > Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score
         yield formatted_df, rag_answer['answer'], consensus, None, None
         # progress(0.8, desc="Analyzing question type")
         question_type_gen = guess_question_type(query)
         if '<categorization>' in question_type_gen:
             question_type_gen = question_type_gen.split('<categorization>')[1]
@@ -632,6 +644,7 @@ def run_pathfinder(query, top_k, extra_keywords, toggles, prompt_type, rag_type,
         yield formatted_df, rag_answer['answer'], consensus, qn_type, None
         # progress(1.0, desc="Visualizing embeddings")
         fig = make_embedding_plot(formatted_df, top_k, consensus_answer)
         yield formatted_df, rag_answer['answer'], consensus, qn_type, fig

 def deep_research(question, top_k, ec):
     full_answer = '## ' + question
+    yield None, None
     gen_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
     messages = [("system",df_atomic_prompt,),("human", question),]
     rscope_text = gen_client.invoke(messages).content
     full_answer = full_answer +' \n'+ rscope_text
+    rag_answer = {}
+    rag_answer['answer'] = full_answer
+    yield None, rag_answer
     rscope_messages = [("system","""In the given text, what are the main atomic questions being asked? Please answer as a concise list.""",),("human", rscope_text),]
     rscope_qns = gen_client.invoke(rscope_messages).content
         atom_qn_strs.append(linkstr)
         full_answer = full_answer +' \n### '+atom_qns[i]
         full_answer = full_answer +' \n'+smallans
+        rag_answer = {}
+        rag_answer['answer'] = full_answer
+        yield None, rag_answer
     finalans, finallinks = compileinfo(question, atom_qns, atom_qn_ans, atom_qn_strs)
     full_answer = full_answer +' \n'+'### Summary:\n'+finalans
     full_df = pd.concat(atom_qn_dfs, ignore_index=True)
+    full_df.index = full_df.index + 1
     rag_answer = {}
     rag_answer['answer'] = full_answer
             ec.rerank = True
         if prompt_type == "Deep Research (BETA)":
+            gr.Info("Starting deep research - this takes a few mins, so grab a drink or stretch your legs.")
             formatted_df, rag_answer = deep_research(query, top_k = top_k, ec=ec)
             yield formatted_df, rag_answer['answer'], None, None, None
         else:
             # progress(0.2, desc=search_text_list[np.random.choice(len(search_text_list))])
+            gr.Info(search_text_list[np.random.choice(len(search_text_list))])
             rs, small_df = ec.retrieve(query, top_k = top_k, return_scores=True)
             formatted_df = ec.return_formatted_df(rs, small_df)
             yield formatted_df, None, None, None, None
             # progress(0.4, desc=gen_text_list[np.random.choice(len(gen_text_list))])
+            gr.Info(gen_text_list[np.random.choice(len(gen_text_list))])
             rag_answer = run_rag_qa(query, formatted_df, prompt_type)
             yield formatted_df, rag_answer['answer'], None, None, None
         # progress(0.6, desc="Generating consensus")
+        gr.Info("Generating consensus")
         consensus_answer = evaluate_overall_consensus(query, [formatted_df['abstract'][i+1] for i in range(len(formatted_df))])
         consensus = '## Consensus \n'+consensus_answer.consensus + '\n\n'+consensus_answer.explanation + '\n\n > Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score
         yield formatted_df, rag_answer['answer'], consensus, None, None
         # progress(0.8, desc="Analyzing question type")
+        gr.Info("Analyzing question type")
         question_type_gen = guess_question_type(query)
         if '<categorization>' in question_type_gen:
             question_type_gen = question_type_gen.split('<categorization>')[1]
         yield formatted_df, rag_answer['answer'], consensus, qn_type, None
         # progress(1.0, desc="Visualizing embeddings")
+        gr.Info("Visualizing embeddings")
         fig = make_embedding_plot(formatted_df, top_k, consensus_answer)
         yield formatted_df, rag_answer['answer'], consensus, qn_type, fig