Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
kartheikiyer
commited on
Commit
·
c0025fa
1
Parent(s):
2f5de34
more df index fixes
Browse files- .ipynb_checkpoints/app_gradio-checkpoint.py +14 -1
- app_gradio.py +14 -1
.ipynb_checkpoints/app_gradio-checkpoint.py
CHANGED
@@ -533,12 +533,16 @@ def compileinfo(query, atom_qns, atom_qn_ans, atom_qn_strs):
|
|
533 |
def deep_research(question, top_k, ec):
|
534 |
|
535 |
full_answer = '## ' + question
|
|
|
536 |
|
537 |
gen_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
|
538 |
messages = [("system",df_atomic_prompt,),("human", question),]
|
539 |
rscope_text = gen_client.invoke(messages).content
|
540 |
|
541 |
full_answer = full_answer +' \n'+ rscope_text
|
|
|
|
|
|
|
542 |
|
543 |
rscope_messages = [("system","""In the given text, what are the main atomic questions being asked? Please answer as a concise list.""",),("human", rscope_text),]
|
544 |
rscope_qns = gen_client.invoke(rscope_messages).content
|
@@ -563,11 +567,15 @@ def deep_research(question, top_k, ec):
|
|
563 |
atom_qn_strs.append(linkstr)
|
564 |
full_answer = full_answer +' \n### '+atom_qns[i]
|
565 |
full_answer = full_answer +' \n'+smallans
|
|
|
|
|
|
|
566 |
|
567 |
finalans, finallinks = compileinfo(question, atom_qns, atom_qn_ans, atom_qn_strs)
|
568 |
full_answer = full_answer +' \n'+'### Summary:\n'+finalans
|
569 |
|
570 |
full_df = pd.concat(atom_qn_dfs, ignore_index=True)
|
|
|
571 |
|
572 |
rag_answer = {}
|
573 |
rag_answer['answer'] = full_answer
|
@@ -602,26 +610,30 @@ def run_pathfinder(query, top_k, extra_keywords, toggles, prompt_type, rag_type,
|
|
602 |
ec.rerank = True
|
603 |
|
604 |
if prompt_type == "Deep Research (BETA)":
|
605 |
-
gr.Info("Starting deep research
|
606 |
formatted_df, rag_answer = deep_research(query, top_k = top_k, ec=ec)
|
607 |
yield formatted_df, rag_answer['answer'], None, None, None
|
608 |
|
609 |
else:
|
610 |
# progress(0.2, desc=search_text_list[np.random.choice(len(search_text_list))])
|
|
|
611 |
rs, small_df = ec.retrieve(query, top_k = top_k, return_scores=True)
|
612 |
formatted_df = ec.return_formatted_df(rs, small_df)
|
613 |
yield formatted_df, None, None, None, None
|
614 |
|
615 |
# progress(0.4, desc=gen_text_list[np.random.choice(len(gen_text_list))])
|
|
|
616 |
rag_answer = run_rag_qa(query, formatted_df, prompt_type)
|
617 |
yield formatted_df, rag_answer['answer'], None, None, None
|
618 |
|
619 |
# progress(0.6, desc="Generating consensus")
|
|
|
620 |
consensus_answer = evaluate_overall_consensus(query, [formatted_df['abstract'][i+1] for i in range(len(formatted_df))])
|
621 |
consensus = '## Consensus \n'+consensus_answer.consensus + '\n\n'+consensus_answer.explanation + '\n\n > Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score
|
622 |
yield formatted_df, rag_answer['answer'], consensus, None, None
|
623 |
|
624 |
# progress(0.8, desc="Analyzing question type")
|
|
|
625 |
question_type_gen = guess_question_type(query)
|
626 |
if '<categorization>' in question_type_gen:
|
627 |
question_type_gen = question_type_gen.split('<categorization>')[1]
|
@@ -632,6 +644,7 @@ def run_pathfinder(query, top_k, extra_keywords, toggles, prompt_type, rag_type,
|
|
632 |
yield formatted_df, rag_answer['answer'], consensus, qn_type, None
|
633 |
|
634 |
# progress(1.0, desc="Visualizing embeddings")
|
|
|
635 |
fig = make_embedding_plot(formatted_df, top_k, consensus_answer)
|
636 |
|
637 |
yield formatted_df, rag_answer['answer'], consensus, qn_type, fig
|
|
|
533 |
def deep_research(question, top_k, ec):
|
534 |
|
535 |
full_answer = '## ' + question
|
536 |
+
yield None, None
|
537 |
|
538 |
gen_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
|
539 |
messages = [("system",df_atomic_prompt,),("human", question),]
|
540 |
rscope_text = gen_client.invoke(messages).content
|
541 |
|
542 |
full_answer = full_answer +' \n'+ rscope_text
|
543 |
+
rag_answer = {}
|
544 |
+
rag_answer['answer'] = full_answer
|
545 |
+
yield None, rag_answer
|
546 |
|
547 |
rscope_messages = [("system","""In the given text, what are the main atomic questions being asked? Please answer as a concise list.""",),("human", rscope_text),]
|
548 |
rscope_qns = gen_client.invoke(rscope_messages).content
|
|
|
567 |
atom_qn_strs.append(linkstr)
|
568 |
full_answer = full_answer +' \n### '+atom_qns[i]
|
569 |
full_answer = full_answer +' \n'+smallans
|
570 |
+
rag_answer = {}
|
571 |
+
rag_answer['answer'] = full_answer
|
572 |
+
yield None, rag_answer
|
573 |
|
574 |
finalans, finallinks = compileinfo(question, atom_qns, atom_qn_ans, atom_qn_strs)
|
575 |
full_answer = full_answer +' \n'+'### Summary:\n'+finalans
|
576 |
|
577 |
full_df = pd.concat(atom_qn_dfs, ignore_index=True)
|
578 |
+
full_df.index = full_df.index + 1
|
579 |
|
580 |
rag_answer = {}
|
581 |
rag_answer['answer'] = full_answer
|
|
|
610 |
ec.rerank = True
|
611 |
|
612 |
if prompt_type == "Deep Research (BETA)":
|
613 |
+
gr.Info("Starting deep research - this takes a few mins, so grab a drink or stretch your legs.")
|
614 |
formatted_df, rag_answer = deep_research(query, top_k = top_k, ec=ec)
|
615 |
yield formatted_df, rag_answer['answer'], None, None, None
|
616 |
|
617 |
else:
|
618 |
# progress(0.2, desc=search_text_list[np.random.choice(len(search_text_list))])
|
619 |
+
gr.Info(search_text_list[np.random.choice(len(search_text_list))])
|
620 |
rs, small_df = ec.retrieve(query, top_k = top_k, return_scores=True)
|
621 |
formatted_df = ec.return_formatted_df(rs, small_df)
|
622 |
yield formatted_df, None, None, None, None
|
623 |
|
624 |
# progress(0.4, desc=gen_text_list[np.random.choice(len(gen_text_list))])
|
625 |
+
gr.Info(gen_text_list[np.random.choice(len(gen_text_list))])
|
626 |
rag_answer = run_rag_qa(query, formatted_df, prompt_type)
|
627 |
yield formatted_df, rag_answer['answer'], None, None, None
|
628 |
|
629 |
# progress(0.6, desc="Generating consensus")
|
630 |
+
gr.Info("Generating consensus")
|
631 |
consensus_answer = evaluate_overall_consensus(query, [formatted_df['abstract'][i+1] for i in range(len(formatted_df))])
|
632 |
consensus = '## Consensus \n'+consensus_answer.consensus + '\n\n'+consensus_answer.explanation + '\n\n > Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score
|
633 |
yield formatted_df, rag_answer['answer'], consensus, None, None
|
634 |
|
635 |
# progress(0.8, desc="Analyzing question type")
|
636 |
+
gr.Info("Analyzing question type")
|
637 |
question_type_gen = guess_question_type(query)
|
638 |
if '<categorization>' in question_type_gen:
|
639 |
question_type_gen = question_type_gen.split('<categorization>')[1]
|
|
|
644 |
yield formatted_df, rag_answer['answer'], consensus, qn_type, None
|
645 |
|
646 |
# progress(1.0, desc="Visualizing embeddings")
|
647 |
+
gr.Info("Visualizing embeddings")
|
648 |
fig = make_embedding_plot(formatted_df, top_k, consensus_answer)
|
649 |
|
650 |
yield formatted_df, rag_answer['answer'], consensus, qn_type, fig
|
app_gradio.py
CHANGED
@@ -533,12 +533,16 @@ def compileinfo(query, atom_qns, atom_qn_ans, atom_qn_strs):
|
|
533 |
def deep_research(question, top_k, ec):
|
534 |
|
535 |
full_answer = '## ' + question
|
|
|
536 |
|
537 |
gen_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
|
538 |
messages = [("system",df_atomic_prompt,),("human", question),]
|
539 |
rscope_text = gen_client.invoke(messages).content
|
540 |
|
541 |
full_answer = full_answer +' \n'+ rscope_text
|
|
|
|
|
|
|
542 |
|
543 |
rscope_messages = [("system","""In the given text, what are the main atomic questions being asked? Please answer as a concise list.""",),("human", rscope_text),]
|
544 |
rscope_qns = gen_client.invoke(rscope_messages).content
|
@@ -563,11 +567,15 @@ def deep_research(question, top_k, ec):
|
|
563 |
atom_qn_strs.append(linkstr)
|
564 |
full_answer = full_answer +' \n### '+atom_qns[i]
|
565 |
full_answer = full_answer +' \n'+smallans
|
|
|
|
|
|
|
566 |
|
567 |
finalans, finallinks = compileinfo(question, atom_qns, atom_qn_ans, atom_qn_strs)
|
568 |
full_answer = full_answer +' \n'+'### Summary:\n'+finalans
|
569 |
|
570 |
full_df = pd.concat(atom_qn_dfs, ignore_index=True)
|
|
|
571 |
|
572 |
rag_answer = {}
|
573 |
rag_answer['answer'] = full_answer
|
@@ -602,26 +610,30 @@ def run_pathfinder(query, top_k, extra_keywords, toggles, prompt_type, rag_type,
|
|
602 |
ec.rerank = True
|
603 |
|
604 |
if prompt_type == "Deep Research (BETA)":
|
605 |
-
gr.Info("Starting deep research
|
606 |
formatted_df, rag_answer = deep_research(query, top_k = top_k, ec=ec)
|
607 |
yield formatted_df, rag_answer['answer'], None, None, None
|
608 |
|
609 |
else:
|
610 |
# progress(0.2, desc=search_text_list[np.random.choice(len(search_text_list))])
|
|
|
611 |
rs, small_df = ec.retrieve(query, top_k = top_k, return_scores=True)
|
612 |
formatted_df = ec.return_formatted_df(rs, small_df)
|
613 |
yield formatted_df, None, None, None, None
|
614 |
|
615 |
# progress(0.4, desc=gen_text_list[np.random.choice(len(gen_text_list))])
|
|
|
616 |
rag_answer = run_rag_qa(query, formatted_df, prompt_type)
|
617 |
yield formatted_df, rag_answer['answer'], None, None, None
|
618 |
|
619 |
# progress(0.6, desc="Generating consensus")
|
|
|
620 |
consensus_answer = evaluate_overall_consensus(query, [formatted_df['abstract'][i+1] for i in range(len(formatted_df))])
|
621 |
consensus = '## Consensus \n'+consensus_answer.consensus + '\n\n'+consensus_answer.explanation + '\n\n > Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score
|
622 |
yield formatted_df, rag_answer['answer'], consensus, None, None
|
623 |
|
624 |
# progress(0.8, desc="Analyzing question type")
|
|
|
625 |
question_type_gen = guess_question_type(query)
|
626 |
if '<categorization>' in question_type_gen:
|
627 |
question_type_gen = question_type_gen.split('<categorization>')[1]
|
@@ -632,6 +644,7 @@ def run_pathfinder(query, top_k, extra_keywords, toggles, prompt_type, rag_type,
|
|
632 |
yield formatted_df, rag_answer['answer'], consensus, qn_type, None
|
633 |
|
634 |
# progress(1.0, desc="Visualizing embeddings")
|
|
|
635 |
fig = make_embedding_plot(formatted_df, top_k, consensus_answer)
|
636 |
|
637 |
yield formatted_df, rag_answer['answer'], consensus, qn_type, fig
|
|
|
533 |
def deep_research(question, top_k, ec):
|
534 |
|
535 |
full_answer = '## ' + question
|
536 |
+
yield None, None
|
537 |
|
538 |
gen_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
|
539 |
messages = [("system",df_atomic_prompt,),("human", question),]
|
540 |
rscope_text = gen_client.invoke(messages).content
|
541 |
|
542 |
full_answer = full_answer +' \n'+ rscope_text
|
543 |
+
rag_answer = {}
|
544 |
+
rag_answer['answer'] = full_answer
|
545 |
+
yield None, rag_answer
|
546 |
|
547 |
rscope_messages = [("system","""In the given text, what are the main atomic questions being asked? Please answer as a concise list.""",),("human", rscope_text),]
|
548 |
rscope_qns = gen_client.invoke(rscope_messages).content
|
|
|
567 |
atom_qn_strs.append(linkstr)
|
568 |
full_answer = full_answer +' \n### '+atom_qns[i]
|
569 |
full_answer = full_answer +' \n'+smallans
|
570 |
+
rag_answer = {}
|
571 |
+
rag_answer['answer'] = full_answer
|
572 |
+
yield None, rag_answer
|
573 |
|
574 |
finalans, finallinks = compileinfo(question, atom_qns, atom_qn_ans, atom_qn_strs)
|
575 |
full_answer = full_answer +' \n'+'### Summary:\n'+finalans
|
576 |
|
577 |
full_df = pd.concat(atom_qn_dfs, ignore_index=True)
|
578 |
+
full_df.index = full_df.index + 1
|
579 |
|
580 |
rag_answer = {}
|
581 |
rag_answer['answer'] = full_answer
|
|
|
610 |
ec.rerank = True
|
611 |
|
612 |
if prompt_type == "Deep Research (BETA)":
|
613 |
+
gr.Info("Starting deep research - this takes a few mins, so grab a drink or stretch your legs.")
|
614 |
formatted_df, rag_answer = deep_research(query, top_k = top_k, ec=ec)
|
615 |
yield formatted_df, rag_answer['answer'], None, None, None
|
616 |
|
617 |
else:
|
618 |
# progress(0.2, desc=search_text_list[np.random.choice(len(search_text_list))])
|
619 |
+
gr.Info(search_text_list[np.random.choice(len(search_text_list))])
|
620 |
rs, small_df = ec.retrieve(query, top_k = top_k, return_scores=True)
|
621 |
formatted_df = ec.return_formatted_df(rs, small_df)
|
622 |
yield formatted_df, None, None, None, None
|
623 |
|
624 |
# progress(0.4, desc=gen_text_list[np.random.choice(len(gen_text_list))])
|
625 |
+
gr.Info(gen_text_list[np.random.choice(len(gen_text_list))])
|
626 |
rag_answer = run_rag_qa(query, formatted_df, prompt_type)
|
627 |
yield formatted_df, rag_answer['answer'], None, None, None
|
628 |
|
629 |
# progress(0.6, desc="Generating consensus")
|
630 |
+
gr.Info("Generating consensus")
|
631 |
consensus_answer = evaluate_overall_consensus(query, [formatted_df['abstract'][i+1] for i in range(len(formatted_df))])
|
632 |
consensus = '## Consensus \n'+consensus_answer.consensus + '\n\n'+consensus_answer.explanation + '\n\n > Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score
|
633 |
yield formatted_df, rag_answer['answer'], consensus, None, None
|
634 |
|
635 |
# progress(0.8, desc="Analyzing question type")
|
636 |
+
gr.Info("Analyzing question type")
|
637 |
question_type_gen = guess_question_type(query)
|
638 |
if '<categorization>' in question_type_gen:
|
639 |
question_type_gen = question_type_gen.split('<categorization>')[1]
|
|
|
644 |
yield formatted_df, rag_answer['answer'], consensus, qn_type, None
|
645 |
|
646 |
# progress(1.0, desc="Visualizing embeddings")
|
647 |
+
gr.Info("Visualizing embeddings")
|
648 |
fig = make_embedding_plot(formatted_df, top_k, consensus_answer)
|
649 |
|
650 |
yield formatted_df, rag_answer['answer'], consensus, qn_type, fig
|