kartheikiyer commited on
Commit
c0025fa
·
1 Parent(s): 2f5de34

more df index fixes

Browse files
.ipynb_checkpoints/app_gradio-checkpoint.py CHANGED
@@ -533,12 +533,16 @@ def compileinfo(query, atom_qns, atom_qn_ans, atom_qn_strs):
533
  def deep_research(question, top_k, ec):
534
 
535
  full_answer = '## ' + question
 
536
 
537
  gen_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
538
  messages = [("system",df_atomic_prompt,),("human", question),]
539
  rscope_text = gen_client.invoke(messages).content
540
 
541
  full_answer = full_answer +' \n'+ rscope_text
 
 
 
542
 
543
  rscope_messages = [("system","""In the given text, what are the main atomic questions being asked? Please answer as a concise list.""",),("human", rscope_text),]
544
  rscope_qns = gen_client.invoke(rscope_messages).content
@@ -563,11 +567,15 @@ def deep_research(question, top_k, ec):
563
  atom_qn_strs.append(linkstr)
564
  full_answer = full_answer +' \n### '+atom_qns[i]
565
  full_answer = full_answer +' \n'+smallans
 
 
 
566
 
567
  finalans, finallinks = compileinfo(question, atom_qns, atom_qn_ans, atom_qn_strs)
568
  full_answer = full_answer +' \n'+'### Summary:\n'+finalans
569
 
570
  full_df = pd.concat(atom_qn_dfs, ignore_index=True)
 
571
 
572
  rag_answer = {}
573
  rag_answer['answer'] = full_answer
@@ -602,26 +610,30 @@ def run_pathfinder(query, top_k, extra_keywords, toggles, prompt_type, rag_type,
602
  ec.rerank = True
603
 
604
  if prompt_type == "Deep Research (BETA)":
605
- gr.Info("Starting deep research, please go grab some coffee - this takes a few mins.")
606
  formatted_df, rag_answer = deep_research(query, top_k = top_k, ec=ec)
607
  yield formatted_df, rag_answer['answer'], None, None, None
608
 
609
  else:
610
  # progress(0.2, desc=search_text_list[np.random.choice(len(search_text_list))])
 
611
  rs, small_df = ec.retrieve(query, top_k = top_k, return_scores=True)
612
  formatted_df = ec.return_formatted_df(rs, small_df)
613
  yield formatted_df, None, None, None, None
614
 
615
  # progress(0.4, desc=gen_text_list[np.random.choice(len(gen_text_list))])
 
616
  rag_answer = run_rag_qa(query, formatted_df, prompt_type)
617
  yield formatted_df, rag_answer['answer'], None, None, None
618
 
619
  # progress(0.6, desc="Generating consensus")
 
620
  consensus_answer = evaluate_overall_consensus(query, [formatted_df['abstract'][i+1] for i in range(len(formatted_df))])
621
  consensus = '## Consensus \n'+consensus_answer.consensus + '\n\n'+consensus_answer.explanation + '\n\n > Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score
622
  yield formatted_df, rag_answer['answer'], consensus, None, None
623
 
624
  # progress(0.8, desc="Analyzing question type")
 
625
  question_type_gen = guess_question_type(query)
626
  if '<categorization>' in question_type_gen:
627
  question_type_gen = question_type_gen.split('<categorization>')[1]
@@ -632,6 +644,7 @@ def run_pathfinder(query, top_k, extra_keywords, toggles, prompt_type, rag_type,
632
  yield formatted_df, rag_answer['answer'], consensus, qn_type, None
633
 
634
  # progress(1.0, desc="Visualizing embeddings")
 
635
  fig = make_embedding_plot(formatted_df, top_k, consensus_answer)
636
 
637
  yield formatted_df, rag_answer['answer'], consensus, qn_type, fig
 
533
  def deep_research(question, top_k, ec):
534
 
535
  full_answer = '## ' + question
536
+ yield None, None
537
 
538
  gen_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
539
  messages = [("system",df_atomic_prompt,),("human", question),]
540
  rscope_text = gen_client.invoke(messages).content
541
 
542
  full_answer = full_answer +' \n'+ rscope_text
543
+ rag_answer = {}
544
+ rag_answer['answer'] = full_answer
545
+ yield None, rag_answer
546
 
547
  rscope_messages = [("system","""In the given text, what are the main atomic questions being asked? Please answer as a concise list.""",),("human", rscope_text),]
548
  rscope_qns = gen_client.invoke(rscope_messages).content
 
567
  atom_qn_strs.append(linkstr)
568
  full_answer = full_answer +' \n### '+atom_qns[i]
569
  full_answer = full_answer +' \n'+smallans
570
+ rag_answer = {}
571
+ rag_answer['answer'] = full_answer
572
+ yield None, rag_answer
573
 
574
  finalans, finallinks = compileinfo(question, atom_qns, atom_qn_ans, atom_qn_strs)
575
  full_answer = full_answer +' \n'+'### Summary:\n'+finalans
576
 
577
  full_df = pd.concat(atom_qn_dfs, ignore_index=True)
578
+ full_df.index = full_df.index + 1
579
 
580
  rag_answer = {}
581
  rag_answer['answer'] = full_answer
 
610
  ec.rerank = True
611
 
612
  if prompt_type == "Deep Research (BETA)":
613
+ gr.Info("Starting deep research - this takes a few mins, so grab a drink or stretch your legs.")
614
  formatted_df, rag_answer = deep_research(query, top_k = top_k, ec=ec)
615
  yield formatted_df, rag_answer['answer'], None, None, None
616
 
617
  else:
618
  # progress(0.2, desc=search_text_list[np.random.choice(len(search_text_list))])
619
+ gr.Info(search_text_list[np.random.choice(len(search_text_list))])
620
  rs, small_df = ec.retrieve(query, top_k = top_k, return_scores=True)
621
  formatted_df = ec.return_formatted_df(rs, small_df)
622
  yield formatted_df, None, None, None, None
623
 
624
  # progress(0.4, desc=gen_text_list[np.random.choice(len(gen_text_list))])
625
+ gr.Info(gen_text_list[np.random.choice(len(gen_text_list))])
626
  rag_answer = run_rag_qa(query, formatted_df, prompt_type)
627
  yield formatted_df, rag_answer['answer'], None, None, None
628
 
629
  # progress(0.6, desc="Generating consensus")
630
+ gr.Info("Generating consensus")
631
  consensus_answer = evaluate_overall_consensus(query, [formatted_df['abstract'][i+1] for i in range(len(formatted_df))])
632
  consensus = '## Consensus \n'+consensus_answer.consensus + '\n\n'+consensus_answer.explanation + '\n\n > Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score
633
  yield formatted_df, rag_answer['answer'], consensus, None, None
634
 
635
  # progress(0.8, desc="Analyzing question type")
636
+ gr.Info("Analyzing question type")
637
  question_type_gen = guess_question_type(query)
638
  if '<categorization>' in question_type_gen:
639
  question_type_gen = question_type_gen.split('<categorization>')[1]
 
644
  yield formatted_df, rag_answer['answer'], consensus, qn_type, None
645
 
646
  # progress(1.0, desc="Visualizing embeddings")
647
+ gr.Info("Visualizing embeddings")
648
  fig = make_embedding_plot(formatted_df, top_k, consensus_answer)
649
 
650
  yield formatted_df, rag_answer['answer'], consensus, qn_type, fig
app_gradio.py CHANGED
@@ -533,12 +533,16 @@ def compileinfo(query, atom_qns, atom_qn_ans, atom_qn_strs):
533
  def deep_research(question, top_k, ec):
534
 
535
  full_answer = '## ' + question
 
536
 
537
  gen_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
538
  messages = [("system",df_atomic_prompt,),("human", question),]
539
  rscope_text = gen_client.invoke(messages).content
540
 
541
  full_answer = full_answer +' \n'+ rscope_text
 
 
 
542
 
543
  rscope_messages = [("system","""In the given text, what are the main atomic questions being asked? Please answer as a concise list.""",),("human", rscope_text),]
544
  rscope_qns = gen_client.invoke(rscope_messages).content
@@ -563,11 +567,15 @@ def deep_research(question, top_k, ec):
563
  atom_qn_strs.append(linkstr)
564
  full_answer = full_answer +' \n### '+atom_qns[i]
565
  full_answer = full_answer +' \n'+smallans
 
 
 
566
 
567
  finalans, finallinks = compileinfo(question, atom_qns, atom_qn_ans, atom_qn_strs)
568
  full_answer = full_answer +' \n'+'### Summary:\n'+finalans
569
 
570
  full_df = pd.concat(atom_qn_dfs, ignore_index=True)
 
571
 
572
  rag_answer = {}
573
  rag_answer['answer'] = full_answer
@@ -602,26 +610,30 @@ def run_pathfinder(query, top_k, extra_keywords, toggles, prompt_type, rag_type,
602
  ec.rerank = True
603
 
604
  if prompt_type == "Deep Research (BETA)":
605
- gr.Info("Starting deep research, please go grab some coffee - this takes a few mins.")
606
  formatted_df, rag_answer = deep_research(query, top_k = top_k, ec=ec)
607
  yield formatted_df, rag_answer['answer'], None, None, None
608
 
609
  else:
610
  # progress(0.2, desc=search_text_list[np.random.choice(len(search_text_list))])
 
611
  rs, small_df = ec.retrieve(query, top_k = top_k, return_scores=True)
612
  formatted_df = ec.return_formatted_df(rs, small_df)
613
  yield formatted_df, None, None, None, None
614
 
615
  # progress(0.4, desc=gen_text_list[np.random.choice(len(gen_text_list))])
 
616
  rag_answer = run_rag_qa(query, formatted_df, prompt_type)
617
  yield formatted_df, rag_answer['answer'], None, None, None
618
 
619
  # progress(0.6, desc="Generating consensus")
 
620
  consensus_answer = evaluate_overall_consensus(query, [formatted_df['abstract'][i+1] for i in range(len(formatted_df))])
621
  consensus = '## Consensus \n'+consensus_answer.consensus + '\n\n'+consensus_answer.explanation + '\n\n > Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score
622
  yield formatted_df, rag_answer['answer'], consensus, None, None
623
 
624
  # progress(0.8, desc="Analyzing question type")
 
625
  question_type_gen = guess_question_type(query)
626
  if '<categorization>' in question_type_gen:
627
  question_type_gen = question_type_gen.split('<categorization>')[1]
@@ -632,6 +644,7 @@ def run_pathfinder(query, top_k, extra_keywords, toggles, prompt_type, rag_type,
632
  yield formatted_df, rag_answer['answer'], consensus, qn_type, None
633
 
634
  # progress(1.0, desc="Visualizing embeddings")
 
635
  fig = make_embedding_plot(formatted_df, top_k, consensus_answer)
636
 
637
  yield formatted_df, rag_answer['answer'], consensus, qn_type, fig
 
533
  def deep_research(question, top_k, ec):
534
 
535
  full_answer = '## ' + question
536
+ yield None, None
537
 
538
  gen_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
539
  messages = [("system",df_atomic_prompt,),("human", question),]
540
  rscope_text = gen_client.invoke(messages).content
541
 
542
  full_answer = full_answer +' \n'+ rscope_text
543
+ rag_answer = {}
544
+ rag_answer['answer'] = full_answer
545
+ yield None, rag_answer
546
 
547
  rscope_messages = [("system","""In the given text, what are the main atomic questions being asked? Please answer as a concise list.""",),("human", rscope_text),]
548
  rscope_qns = gen_client.invoke(rscope_messages).content
 
567
  atom_qn_strs.append(linkstr)
568
  full_answer = full_answer +' \n### '+atom_qns[i]
569
  full_answer = full_answer +' \n'+smallans
570
+ rag_answer = {}
571
+ rag_answer['answer'] = full_answer
572
+ yield None, rag_answer
573
 
574
  finalans, finallinks = compileinfo(question, atom_qns, atom_qn_ans, atom_qn_strs)
575
  full_answer = full_answer +' \n'+'### Summary:\n'+finalans
576
 
577
  full_df = pd.concat(atom_qn_dfs, ignore_index=True)
578
+ full_df.index = full_df.index + 1
579
 
580
  rag_answer = {}
581
  rag_answer['answer'] = full_answer
 
610
  ec.rerank = True
611
 
612
  if prompt_type == "Deep Research (BETA)":
613
+ gr.Info("Starting deep research - this takes a few mins, so grab a drink or stretch your legs.")
614
  formatted_df, rag_answer = deep_research(query, top_k = top_k, ec=ec)
615
  yield formatted_df, rag_answer['answer'], None, None, None
616
 
617
  else:
618
  # progress(0.2, desc=search_text_list[np.random.choice(len(search_text_list))])
619
+ gr.Info(search_text_list[np.random.choice(len(search_text_list))])
620
  rs, small_df = ec.retrieve(query, top_k = top_k, return_scores=True)
621
  formatted_df = ec.return_formatted_df(rs, small_df)
622
  yield formatted_df, None, None, None, None
623
 
624
  # progress(0.4, desc=gen_text_list[np.random.choice(len(gen_text_list))])
625
+ gr.Info(gen_text_list[np.random.choice(len(gen_text_list))])
626
  rag_answer = run_rag_qa(query, formatted_df, prompt_type)
627
  yield formatted_df, rag_answer['answer'], None, None, None
628
 
629
  # progress(0.6, desc="Generating consensus")
630
+ gr.Info("Generating consensus")
631
  consensus_answer = evaluate_overall_consensus(query, [formatted_df['abstract'][i+1] for i in range(len(formatted_df))])
632
  consensus = '## Consensus \n'+consensus_answer.consensus + '\n\n'+consensus_answer.explanation + '\n\n > Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score
633
  yield formatted_df, rag_answer['answer'], consensus, None, None
634
 
635
  # progress(0.8, desc="Analyzing question type")
636
+ gr.Info("Analyzing question type")
637
  question_type_gen = guess_question_type(query)
638
  if '<categorization>' in question_type_gen:
639
  question_type_gen = question_type_gen.split('<categorization>')[1]
 
644
  yield formatted_df, rag_answer['answer'], consensus, qn_type, None
645
 
646
  # progress(1.0, desc="Visualizing embeddings")
647
+ gr.Info("Visualizing embeddings")
648
  fig = make_embedding_plot(formatted_df, top_k, consensus_answer)
649
 
650
  yield formatted_df, rag_answer['answer'], consensus, qn_type, fig