Spaces:

jskim
/

paper-matching

Runtime error

App Files Files Community

jskim commited on Apr 3, 2023

Commit

c2dbf38

1 Parent(s): 6004e76

adding slider to the top results

Browse files

Files changed (2) hide show

app.py +259 -226
input_format.py +0 -1

app.py CHANGED Viewed

@@ -1,17 +1,17 @@
 import gradio as gr
-import os
 from transformers import AutoTokenizer, AutoModel
 from sentence_transformers import SentenceTransformer
 import pickle
 import nltk
-nltk.download('punkt') # tokenizer
-nltk.download('averaged_perceptron_tagger') # postagger
 import time
 from input_format import *
 from score import *
-# load document scoring model
 #torch.cuda.is_available = lambda : False  # uncomment to test with CPU only
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 #pretrained_model = 'allenai/specter'
@@ -20,21 +20,25 @@ tokenizer = AutoTokenizer.from_pretrained(pretrained_model)
 doc_model = AutoModel.from_pretrained(pretrained_model)
 doc_model.to(device)
-# load sentence model
 sent_model = doc_model # have the same model for document and sentence level
 # OR specify different model for sentence level
 #sent_model = SentenceTransformer('sentence-transformers/gtr-t5-base')
 #sent_model.to(device)
 def get_similar_paper(
     title_input,
     abstract_text_input,
     author_id_input,
     results={}, # this state variable will be updated and returned
-):
     progress = gr.Progress()
-    num_papers_show = 10 # number of top papers to show from the reviewer
     if title_input == None:
         title_input = '' # if no title is given, just focus on abstract.
     print('retrieving similar papers...')
@@ -60,17 +64,18 @@ def get_similar_paper(
     results = {
         'name': name,
         'titles': titles,
         'abstracts': abstracts,
         'urls': paper_urls,
         'doc_scores': doc_scores
     }
-    # Select top K choices of papers to show
-    titles = titles[:num_papers_show]
-    abstracts = abstracts[:num_papers_show]
-    doc_scores = doc_scores[:num_papers_show]
-    paper_urls = paper_urls[:num_papers_show]
     display_title = ['[ %0.3f ] %s'%(s, t) for t, s in zip(titles, doc_scores)]
     end = time.time()
@@ -90,8 +95,8 @@ def get_similar_paper(
             tokenizer,
             abstract_text_input,
             ab,
-            K=None, # top two sentences from the candidate
-            top_pair_num=3, # top five sentence pairs to show upfront
         )
         num_cand_sents = sent_ids.shape[1]
@@ -116,104 +121,136 @@ def get_similar_paper(
             'top_pairs': top_pairs_info,
             'url': url
         }
     end = time.time()
     highlight_time = end - start
     print('done in [%0.2f] seconds'%(highlight_time))
-    # debugging only
-    pickle.dump(results, open('info.pkl', 'wb'))
     ## Set up output elements
-    # first the list of top papers, sentences to select from, paper_title, affinity
     title = results[display_title[0]]['title'] # set default title as the top paper
     url = results[display_title[0]]['url']
     aff_score = results[display_title[0]]['doc_score']
     title_out = """<a href="%s" target="_blank"><h5>%s</h5></a>"""%(url, title)
     aff_score_out = '##### Affinity Score: %s'%aff_score
-    out = [
         gr.update(choices=display_title, value=display_title[0],  interactive=True), # set of papers (radio)
         gr.update(choices=input_sentences, value=input_sentences[0], interactive=True), # submission sentences
         gr.update(value=title_out), # paper_title
-        gr.update(value=aff_score_out)  # affinity
     ]
-    # set up elements to visualize upfront
-    top_papers_show = 3 # number of top papers to show upfront
-    top_num_info_show = 2 # number of sentence pairs from each paper to show upfront
-    summary_out = []
     for i in range(top_papers_show):
         if i == 0:
-            out_tmp = [
-                gr.update(value="""<a href="%s" target="_blank"><h4>%s</h4></a>"""%(paper_urls[i], titles[i]), visible=True),
                 gr.update(value="""#### Affinity Score: %0.3f
-                          <div class="help-tip">
                                 <p>Measures how similar the paper's abstract is to the submission abstract.</p>
-                          </div>
-                          """%doc_scores[i],
-                          visible=True) # document affinity
-            ]
         else:
-            out_tmp = [
-                gr.update(value="""<a href="%s" target="_blank"><h4>%s</h4></a>"""%(paper_urls[i], titles[i]), visible=True),
                 gr.update(value='#### Affinity Score: %0.3f'%doc_scores[i], visible=True) # document affinity
-            ]
-        tp = results[display_title[i]]['top_pairs']
         for j in range(top_num_info_show):
             if i == 0 and j == 0:
-                out_tmp += [
                     gr.update(value="""Sentence Relevance:\n%0.3f
                             <div class="help-tip">
                                 <p>Measures how similar the sentence pairs are.</p>
-                            </div>"""%tp[j]['score'], visible=True), # sentence relevance
-                    tp[j]['query']['original'],
-                    tp[j]['query'],
-                    tp[j]['candidate']['original'],
-                    tp[j]['candidate']
-                ]
             else:
-                out_tmp += [
-                    gr.update(value='Sentence Relevance:\n%0.3f'%tp[j]['score'], visible=True), # sentence relevance
-                    tp[j]['query']['original'],
-                    tp[j]['query'],
-                    tp[j]['candidate']['original'],
-                    tp[j]['candidate']
-                ]
-        summary_out += out_tmp
-    # add updates to the show more button
-    out = out + summary_out + [gr.update(visible=True)] # make show more button visible
-    assert(len(out) == (top_num_info_show * 5 + 2) * top_papers_show + 5)
-    out += [gr.update(value="""
-        <h3>Top three relevant papers by the reviewer <a href="%s" target="_blank">%s</a></h3>
-        For each paper, two sentence pairs (one from the submission, one from the paper) with the highest relevance scores are shown.
-        **<span style="color:black;background-color:#65B5E3;">Blue highlights</span>**: phrases that appear in both sentences.
-        """%(author_id_input, results['name']),
-        visible=True)] # result 1 description
-    out += [gr.update(visible=True), gr.update(visible=True)] # demarcation line between results
-    # progress status
-    out += [gr.update(value='Done (in %0.1f seconds)'%(retrieval_time+highlight_time), visible=True)]
-    # result 2 description
-    desc = """
-        ##### Click a paper by %s on the left (sorted by affinity scores), and a sentence from the submission on the right, to see which parts of the paper are relevant.
-    """%results['name']
-    out += [gr.update(value=desc)]
-    # slider to control the number of highlights
-    out += [gr.update(value=1, maximum=len(sent_tokenize(abstracts[0])))]
-    # finally add the search results to pass on to the Gradio State varaible
-    out += [results]
-    return tuple(out)
 def show_more(info):
     # show the interactive part of the app
     return (
@@ -290,6 +327,49 @@ def change_num_highlight(
     else:
         return
 with gr.Blocks(css='style.css') as demo:
     info = gr.State({})  # cached search results as a State variable shared throughout
@@ -310,15 +390,13 @@ A typical meta-reviewer workflow lacks supportive information on **what makes th
 R2P2 provides more information about each reviewer. It searches for the **most relevant papers** among the reviewer's previous publications and **highlights relevant parts** within them.
     """
-    # TODO add instruction video link
     # More details (video, addendum)
-    more_details_instruction = """Check out <a href="", target="_blank">this video</a> for a quick demo of what R2P2 is and how it can help. You can find more details <a href="file/details.html", target="_blank">here</a>, along with our privacy policy and disclaimer."""
     gr.Markdown(general_instruction)
     gr.HTML(more_details_instruction)
     gr.Markdown("""---""")
     ### INPUT
     with gr.Row() as input_row:
         with gr.Column(scale=3):
@@ -350,97 +428,57 @@ R2P2 provides more information about each reviewer. It searches for the **most r
     with gr.Row():
         search_status = gr.Textbox(label='Search Status', interactive=False, visible=False)
-    ### OVERVIEW
-    # Paper title, score, and top-ranking sentence pairs -- two sentence pairs per paper, three papers
-    ## ONE BLOCK OF INFO FOR A SINGLE PAPER
-    ## PAPER1
-    with gr.Row():
-        result1_desc = gr.Markdown(value='', visible=False)
     with gr.Row():
-        with gr.Column(scale=3):
-            paper_title1 = gr.Markdown(value='', visible=False)
-        with gr.Column(scale=1):
-            affinity1 = gr.Markdown(value='', visible=False)
-    with gr.Row() as rel1_1:
-        with gr.Column(scale=1):
-            sent_pair_score1_1 = gr.Markdown(interactive=False, value='', visible=False)
-        with gr.Column(scale=4):
-            sent_pair_source1_1 = gr.Textbox(label='Sentence from Submission', visible=False)
-            sent_pair_source1_1_hl = gr.components.Interpretation(sent_pair_source1_1)
-        with gr.Column(scale=4):
-            sent_pair_candidate1_1 = gr.Textbox(label="Sentence from Reviewer's Paper", visible=False)
-            sent_pair_candidate1_1_hl = gr.components.Interpretation(sent_pair_candidate1_1)
-    with gr.Row() as rel1_2:
-        with gr.Column(scale=1):
-            sent_pair_score1_2 = gr.Markdown(interactive=False, value='', visible=False)
-        with gr.Column(scale=4):
-            sent_pair_source1_2 = gr.Textbox(label='Sentence from Submission', visible=False)
-            sent_pair_source1_2_hl = gr.components.Interpretation(sent_pair_source1_2)
-        with gr.Column(scale=4):
-            sent_pair_candidate1_2 = gr.Textbox(label="Sentence from Reviewer's Paper", visible=False)
-            sent_pair_candidate1_2_hl = gr.components.Interpretation(sent_pair_candidate1_2)
-    with gr.Row(visible=False) as demarc1:
-        gr.Markdown(
-            """---"""
-        )
-    ## PAPER 2
-    with gr.Row():
-        with gr.Column(scale=3):
-            paper_title2 = gr.Markdown(value='', visible=False)
-        with gr.Column(scale=1):
-            affinity2 = gr.Markdown(value='', visible=False)
-    with gr.Row() as rel2_1:
-        with gr.Column(scale=1):
-            sent_pair_score2_1 = gr.Markdown(interactive=False, value='', visible=False)
-        with gr.Column(scale=4):
-            sent_pair_source2_1 = gr.Textbox(label='Sentence from Submission', visible=False)
-            sent_pair_source2_1_hl = gr.components.Interpretation(sent_pair_source2_1)
-        with gr.Column(scale=4):
-            sent_pair_candidate2_1 = gr.Textbox(label="Sentence from Reviewer's Paper", visible=False)
-            sent_pair_candidate2_1_hl = gr.components.Interpretation(sent_pair_candidate2_1)
-    with gr.Row() as rel2_2:
-        with gr.Column(scale=1):
-            sent_pair_score2_2 = gr.Markdown(interactive=False, value='', visible=False)
-        with gr.Column(scale=4):
-            sent_pair_source2_2 = gr.Textbox(label='Sentence from Submission', visible=False)
-            sent_pair_source2_2_hl = gr.components.Interpretation(sent_pair_source2_2)
-        with gr.Column(scale=4):
-            sent_pair_candidate2_2 = gr.Textbox(label="Sentence from Reviewer's Paper", visible=False)
-            sent_pair_candidate2_2_hl = gr.components.Interpretation(sent_pair_candidate2_2)
-    with gr.Row(visible=False) as demarc2:
-        gr.Markdown(
-            """---"""
-        )
-    ## PAPER 3
-    with gr.Row():
-        with gr.Column(scale=3):
-            paper_title3 = gr.Markdown(value='', visible=False)
-        with gr.Column(scale=1):
-            affinity3 = gr.Markdown(value='', visible=False)
-    with gr.Row() as rel3_1:
-        with gr.Column(scale=1):
-            sent_pair_score3_1 = gr.Markdown(interactive=False, value='', visible=False)
-        with gr.Column(scale=4):
-            sent_pair_source3_1 = gr.Textbox(label='Sentence from Submission', visible=False)
-            sent_pair_source3_1_hl = gr.components.Interpretation(sent_pair_source3_1)
-        with gr.Column(scale=4):
-            sent_pair_candidate3_1 = gr.Textbox(label="Sentence from Reviewer's Paper", visible=False)
-            sent_pair_candidate3_1_hl = gr.components.Interpretation(sent_pair_candidate3_1)
-    with gr.Row() as rel3_2:
-        with gr.Column(scale=1):
-            sent_pair_score3_2 = gr.Markdown(interactive=False, value='', visible=False)
-        with gr.Column(scale=4):
-            sent_pair_source3_2 = gr.Textbox(label='Sentence from Submission', visible=False)
-            sent_pair_source3_2_hl = gr.components.Interpretation(sent_pair_source3_2)
-        with gr.Column(scale=4):
-            sent_pair_candidate3_2 = gr.Textbox(label="Sentence from Reviewer's Paper", visible=False)
-            sent_pair_candidate3_2_hl = gr.components.Interpretation(sent_pair_candidate3_2)
     ## Show more button
     with gr.Row():
         see_more_rel_btn = gr.Button('Explore more', visible=False)
@@ -499,74 +537,47 @@ R2P2 provides more information about each reviewer. It searches for the **most r
                 # highlighted text from paper
                 highlight = gr.components.Interpretation(paper_abstract)
     ### EVENT LISTENERS
     compute_btn.click(
         fn=show_status,
         inputs=[],
         outputs=search_status
     )
-    # retrieve similar papers and show top results
     compute_btn.click(
         fn=get_similar_paper,
         inputs=[
             title_input,
             abstract_text_input,
             author_id_input,
             info
         ],
-        outputs=[
-            selected_papers_radio, # list of papers for show more section
-            source_sentences, # list of sentences for show more section
-            paper_title, # paper title for show more section
-            affinity, # paper affinity for show more section
-            paper_title1, # paper info
-            affinity1,
-            sent_pair_score1_1,
-            sent_pair_source1_1,
-            sent_pair_source1_1_hl,
-            sent_pair_candidate1_1,
-            sent_pair_candidate1_1_hl,
-            sent_pair_score1_2,
-            sent_pair_source1_2,
-            sent_pair_source1_2_hl,
-            sent_pair_candidate1_2,
-            sent_pair_candidate1_2_hl,
-            paper_title2,
-            affinity2,
-            sent_pair_score2_1,
-            sent_pair_source2_1,
-            sent_pair_source2_1_hl,
-            sent_pair_candidate2_1,
-            sent_pair_candidate2_1_hl,
-            sent_pair_score2_2,
-            sent_pair_source2_2,
-            sent_pair_source2_2_hl,
-            sent_pair_candidate2_2,
-            sent_pair_candidate2_2_hl,
-            paper_title3,
-            affinity3,
-            sent_pair_score3_1,
-            sent_pair_source3_1,
-            sent_pair_source3_1_hl,
-            sent_pair_candidate3_1,
-            sent_pair_candidate3_1_hl,
-            sent_pair_score3_2,
-            sent_pair_source3_2,
-            sent_pair_source3_2_hl,
-            sent_pair_candidate3_2,
-            sent_pair_candidate3_2_hl,
-            see_more_rel_btn,
-            result1_desc,
-            demarc1,
-            demarc2,
-            search_status,
-            result2_desc,
-            highlight_slider,
-            info,
-        ],
         show_progress=True,
         scroll_to_output=True
     )
@@ -617,6 +628,7 @@ R2P2 provides more information about each reviewer. It searches for the **most r
         ]
     )
     highlight_slider.change(
         fn=change_num_highlight,
         inputs=[
@@ -630,6 +642,27 @@ R2P2 provides more information about each reviewer. It searches for the **most r
         ]
     )
-if __name__ == "__main__":
-    demo.queue().launch()  # add ?__theme=light to force light mode

 import gradio as gr
 from transformers import AutoTokenizer, AutoModel
 from sentence_transformers import SentenceTransformer
 import pickle
 import nltk
 import time
 from input_format import *
 from score import *
+nltk.download('punkt') # tokenizer
+nltk.download('averaged_perceptron_tagger') # postagger
+## load document scoring model
 #torch.cuda.is_available = lambda : False  # uncomment to test with CPU only
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 #pretrained_model = 'allenai/specter'
 doc_model = AutoModel.from_pretrained(pretrained_model)
 doc_model.to(device)
+## load sentence model
 sent_model = doc_model # have the same model for document and sentence level
 # OR specify different model for sentence level
 #sent_model = SentenceTransformer('sentence-transformers/gtr-t5-base')
 #sent_model.to(device)
+NUM_PAPERS_SHOW = 5 # max number of top papers to show from the reviewer upfront
+NUM_PAIRS_SHOW = 5 # max number of top sentence pairs to show
 def get_similar_paper(
     title_input,
     abstract_text_input,
     author_id_input,
+    top_paper_slider,
+    top_pair_slider,
     results={}, # this state variable will be updated and returned
+):
     progress = gr.Progress()
     if title_input == None:
         title_input = '' # if no title is given, just focus on abstract.
     print('retrieving similar papers...')
     results = {
         'name': name,
+        'author_url': author_id_input,
         'titles': titles,
         'abstracts': abstracts,
         'urls': paper_urls,
         'doc_scores': doc_scores
     }
+    # Select top 10 papers to show
+    titles = titles[:10]
+    abstracts = abstracts[:10]
+    doc_scores = doc_scores[:10]
+    paper_urls = paper_urls[:10]
     display_title = ['[ %0.3f ] %s'%(s, t) for t, s in zip(titles, doc_scores)]
     end = time.time()
             tokenizer,
             abstract_text_input,
             ab,
+            K=None,
+            top_pair_num=10, # top ten sentence pairs at max to show upfront
         )
         num_cand_sents = sent_ids.shape[1]
             'top_pairs': top_pairs_info,
             'url': url
         }
     end = time.time()
     highlight_time = end - start
     print('done in [%0.2f] seconds'%(highlight_time))
     ## Set up output elements
+    ## Components for Initial Part
+    result1_desc_value = """
+        <h3>Top %d relevant papers by the reviewer <a href="%s" target="_blank">%s</a></h3>
+        For each paper, top %d sentence pairs (one from the submission, one from the paper) with the highest relevance scores are shown.
+        **<span style="color:black;background-color:#65B5E3;">Blue highlights</span>**: phrases that appear in both sentences.
+        """%(int(top_paper_slider), author_id_input, results['name'], int(top_pair_slider))
+    out1 = [
+        gr.update(visible=True), # Explore more button
+        gr.update(value=result1_desc_value, visible=True), # result 1 description
+        gr.update(value='Done (in %0.1f seconds)'%(retrieval_time+highlight_time), visible=True), # search status
+        gr.update(visible=True),  # top paper slider
+        gr.update(visible=True) # top pair slider
+    ]
+    ### Components for Results in Initial Part
+    top_papers_show = int(top_paper_slider) # number of top papers to show upfront
+    top_num_info_show = int(top_pair_slider) # number of sentence pairs from each paper to show upfront
+    output = setup_outputs(results, top_papers_show, top_num_info_show)
+    out2 = []
+    for x in output:
+        out2 += x
+    ### Components for Explore More Section
+    # list of top papers, sentences to select from, paper_title, affinity
     title = results[display_title[0]]['title'] # set default title as the top paper
     url = results[display_title[0]]['url']
     aff_score = results[display_title[0]]['doc_score']
     title_out = """<a href="%s" target="_blank"><h5>%s</h5></a>"""%(url, title)
     aff_score_out = '##### Affinity Score: %s'%aff_score
+    result2_desc_value = """
+        ##### Click a paper by %s (left, sorted by affinity scores), and a sentence from the submission (center), to see which parts of the paper are relevant (right).
+        """%results['name']
+    out3 = [
         gr.update(choices=display_title, value=display_title[0],  interactive=True), # set of papers (radio)
         gr.update(choices=input_sentences, value=input_sentences[0], interactive=True), # submission sentences
         gr.update(value=title_out), # paper_title
+        gr.update(value=aff_score_out),  # affinity
+        gr.update(value=result2_desc_value), # result 2 description (show more section)
+        gr.update(value=1, maximum=len(sent_tokenize(abstracts[0]))), # highlight slider to control
     ]
+    ## Return by adding the State variable info
+    return out1 + out2 + out3 + [results]
+def setup_outputs(info, top_papers_show, top_num_info_show):
+    titles = info['titles']
+    doc_scores = info['doc_scores']
+    paper_urls = info['urls']
+    display_title = ['[ %0.3f ] %s'%(s, t) for t, s in zip(info['titles'], info['doc_scores'])]
+    title = []
+    affinity = []
+    sent_pair_score = []
+    sent_text_query = []
+    sent_text_candidate = []
+    sent_hl_query = []
+    sent_hl_candidate = []
+    demarc_lines = []
     for i in range(top_papers_show):
         if i == 0:
+            title.append(
+                gr.update(value="""<a href="%s" target="_blank"><h4>%s</h4></a>"""%(paper_urls[i], titles[i]), visible=True)
+            )
+            affinity.append(
                 gr.update(value="""#### Affinity Score: %0.3f
+                        <div class="help-tip">
                                 <p>Measures how similar the paper's abstract is to the submission abstract.</p>
+                        </div>
+                        """%doc_scores[i], visible=True) # document affinity
+            )
         else:
+            title.append(
+                gr.update(value="""<a href="%s" target="_blank"><h4>%s</h4></a>"""%(paper_urls[i], titles[i]), visible=True)
+            )
+            affinity.append(
                 gr.update(value='#### Affinity Score: %0.3f'%doc_scores[i], visible=True) # document affinity
+            )
+        demarc_lines.append(gr.Markdown.update(visible=True))
+        # fill in the rest as
+        tp = info[display_title[i]]['top_pairs']
         for j in range(top_num_info_show):
             if i == 0 and j == 0:
+                # for the first entry add help tip
+                sent_pair_score.append(
                     gr.update(value="""Sentence Relevance:\n%0.3f
                             <div class="help-tip">
                                 <p>Measures how similar the sentence pairs are.</p>
+                            </div>"""%tp[j]['score'], visible=True)
+                )
             else:
+                sent_pair_score.append(
+                    gr.Textbox.update(value='Sentence Relevance:\n%0.3f'%tp[j]['score'], visible=True)
+                )
+            sent_text_query.append(gr.Textbox.update(tp[j]['query']['original']))
+            sent_text_candidate.append(gr.Textbox.update(tp[j]['candidate']['original']))
+            sent_hl_query.append(tp[j]['query'])
+            sent_hl_candidate.append(tp[j]['candidate'])
+            #row2.append(gr.update(visible=True))
+        sent_pair_score += [gr.Markdown.update(visible=False)] * (NUM_PAIRS_SHOW - top_num_info_show)
+        sent_text_query += [gr.Textbox.update(value='', visible=False)] * (NUM_PAIRS_SHOW - top_num_info_show)
+        sent_text_candidate += [gr.Textbox.update(value='', visible=False)] * (NUM_PAIRS_SHOW - top_num_info_show)
+        sent_hl_query += [None] * (NUM_PAIRS_SHOW - top_num_info_show)
+        sent_hl_candidate += [None] * (NUM_PAIRS_SHOW - top_num_info_show)
+    # mark others not visible
+    title += [gr.Markdown.update(visible=False)] * (NUM_PAPERS_SHOW - top_papers_show)
+    affinity += [gr.Markdown.update(visible=False)] * (NUM_PAPERS_SHOW - top_papers_show)
+    demarc_lines += [gr.Markdown.update(visible=False)] * (NUM_PAPERS_SHOW - top_papers_show)
+    sent_pair_score += [gr.Markdown.update(visible=False)] * (NUM_PAPERS_SHOW - top_papers_show) * NUM_PAIRS_SHOW
+    sent_text_query += [gr.Textbox.update(value='', visible=False)] * (NUM_PAPERS_SHOW - top_papers_show) * NUM_PAIRS_SHOW
+    sent_text_candidate += [gr.Textbox.update(value='', visible=False)]  * (NUM_PAPERS_SHOW - top_papers_show) * NUM_PAIRS_SHOW
+    sent_hl_query += [None] * (NUM_PAPERS_SHOW - top_papers_show) * NUM_PAIRS_SHOW
+    sent_hl_candidate += [None] * (NUM_PAPERS_SHOW - top_papers_show) * NUM_PAIRS_SHOW
+    assert(len(title) == NUM_PAPERS_SHOW)
+    assert(len(affinity) == NUM_PAPERS_SHOW)
+    assert(len(sent_pair_score) == NUM_PAIRS_SHOW * NUM_PAPERS_SHOW)
+    return title, affinity, demarc_lines, sent_pair_score, sent_text_query, sent_text_candidate, sent_hl_query, sent_hl_candidate
 def show_more(info):
     # show the interactive part of the app
     return (
     else:
         return
+def change_top_output(top_paper_slider, top_pair_slider, info={}):
+    top_papers_show = int(top_paper_slider)
+    top_num_info_show = int(top_pair_slider)
+    result1_desc_value = """
+    <h3>Top %d relevant papers by the reviewer <a href="%s" target="_blank">%s</a></h3>
+    For each paper, top %d sentence pairs (one from the submission, one from the paper) with the highest relevance scores are shown.
+    **<span style="color:black;background-color:#65B5E3;">Blue highlights</span>**: phrases that appear in both sentences.
+    """%(int(top_paper_slider), info['author_url'], info['name'], int(top_pair_slider))
+    if len(info.keys()) != 0:
+        tmp = setup_outputs(info, top_papers_show, top_num_info_show)
+        x = []
+        for t in tmp:
+            x += t
+        return x + [gr.update(value=result1_desc_value)]
+    else:
+        return
+def reinit_hl(top_paper_slider, top_pair_slider, *args):
+    args = list(args)
+    base = 3*NUM_PAPERS_SHOW+NUM_PAPERS_SHOW*NUM_PAIRS_SHOW
+    increment = NUM_PAPERS_SHOW*NUM_PAIRS_SHOW
+    text_query = args[base:base+increment]
+    text_candidate = args[base+increment:base+2*increment]
+    hl_query = args[base+2*increment:base+3*increment]
+    hl_candidate = args[base+3*increment:base+4*increment]
+    for i in range(int(top_paper_slider)):
+        for j in range(int(top_pair_slider),NUM_PAIRS_SHOW):
+            hl_query[i*NUM_PAIRS_SHOW+j] = gr.components.Interpretation(text_query[i*NUM_PAIRS_SHOW+j])
+            hl_candidate[i*NUM_PAIRS_SHOW+j] = gr.components.Interpretation(text_candidate[i*NUM_PAIRS_SHOW+j])
+    for i in range(int(top_paper_slider),NUM_PAPERS_SHOW):
+        for j in range(NUM_PAPERS_SHOW):
+            hl_query[i*NUM_PAIRS_SHOW+j] = gr.components.Interpretation(text_query[i*NUM_PAIRS_SHOW+j])
+            hl_candidate[i*NUM_PAIRS_SHOW+j] = gr.components.Interpretation(text_candidate[i*NUM_PAIRS_SHOW+j])
+    args[base:base+increment] = text_query
+    args[base+increment:base+2*increment] = text_candidate
+    args[base+2*increment:base+3*increment] = hl_query
+    args[base+3*increment:base+4*increment] = hl_candidate
+    return args
 with gr.Blocks(css='style.css') as demo:
     info = gr.State({})  # cached search results as a State variable shared throughout
 R2P2 provides more information about each reviewer. It searches for the **most relevant papers** among the reviewer's previous publications and **highlights relevant parts** within them.
     """
     # More details (video, addendum)
+    more_details_instruction = """Check out <a href="https://drive.google.com/file/d/1Ex_-cOplBitO7riNGliecFc8H3chXUN-/view?usp=share_link", target="_blank">this video</a> for a quick introduction of what R2P2 is and how it can help. You can find more details <a href="file/details.html", target="_blank">here</a>, along with our privacy policy and disclaimer."""
     gr.Markdown(general_instruction)
     gr.HTML(more_details_instruction)
     gr.Markdown("""---""")
     ### INPUT
     with gr.Row() as input_row:
         with gr.Column(scale=3):
     with gr.Row():
         search_status = gr.Textbox(label='Search Status', interactive=False, visible=False)
+    ### OVERVIEW RESULTS
+    # Paper title, score, and top-ranking sentence pairs
+    # a knob for controlling the number of output displayed
     with gr.Row():
+        with gr.Column(scale=5):
+            result1_desc = gr.Markdown(value='', visible=False)
+        with gr.Column(scale=2):
+            with gr.Row():
+                top_paper_slider = gr.Slider(label='Top-K Papers by the Reviewer', value=3, minimum=3, step=1, maximum=NUM_PAPERS_SHOW, visible=False)
+            with gr.Row():
+                top_pair_slider = gr.Slider(label='Top-K Sentence Pairs per Paper', value=2, minimum=2, step=1, maximum=NUM_PAIRS_SHOW, visible=False)
+    paper_title_up = []
+    paper_affinity_up = []
+    sent_pair_score = []
+    sent_text_query = []
+    sent_text_candidate = []
+    sent_hl_query = []
+    sent_hl_candidate = []
+    demarc_lines = []
+    row_elems1 = []
+    row_elems2 = []
+    for i in range(NUM_PAPERS_SHOW):
+        with gr.Row():
+            with gr.Column(scale=3):
+                tt = gr.Markdown(value='', visible=False)
+                paper_title_up.append(tt)
+            with gr.Column(scale=1):
+                aff = gr.Markdown(value='', visible=False)
+                paper_affinity_up.append(aff)
+        for j in range(NUM_PAIRS_SHOW):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    sps = gr.Markdown(value='', visible=False)
+                    sent_pair_score.append(sps)
+                with gr.Column(scale=5):
+                    stq = gr.Textbox(label='Sentence from Submission', visible=False)
+                    shq = gr.components.Interpretation(stq, visible=False)
+                    sent_text_query.append(stq)
+                    sent_hl_query.append(shq)
+                with gr.Column(scale=5):
+                    stc = gr.Textbox(label="Sentence from Reviewer's Paper", visible=False)
+                    shc = gr.components.Interpretation(stc, visible=False)
+                    sent_text_candidate.append(stc)
+                    sent_hl_candidate.append(shc)
+        with gr.Row():
+            dml = gr.Markdown("""---""", visible=False)
+            demarc_lines.append(dml)
     ## Show more button
     with gr.Row():
         see_more_rel_btn = gr.Button('Explore more', visible=False)
                 # highlighted text from paper
                 highlight = gr.components.Interpretation(paper_abstract)
     ### EVENT LISTENERS
+    # components to work with
+    init_components = [
+        see_more_rel_btn, # explore more button
+        result1_desc, # description for first results
+        search_status, # search status
+        top_paper_slider,
+        top_pair_slider
+    ]
+    init_result_components = \
+        paper_title_up + paper_affinity_up + demarc_lines + sent_pair_score + \
+            sent_text_query + sent_text_candidate + sent_hl_query + sent_hl_candidate
+    explore_more_components = [
+        selected_papers_radio, # list of papers for show more section
+        source_sentences, # list of sentences for show more section
+        paper_title, # paper title for show more section
+        affinity, # affinity for show more section
+        result2_desc, # description for explore more
+        highlight_slider, # highlight slider
+    ]
     compute_btn.click(
         fn=show_status,
         inputs=[],
         outputs=search_status
     )
     compute_btn.click(
         fn=get_similar_paper,
         inputs=[
             title_input,
             abstract_text_input,
             author_id_input,
+            top_paper_slider,
+            top_pair_slider,
             info
         ],
+        outputs=init_components + init_result_components + explore_more_components + [info],
         show_progress=True,
         scroll_to_output=True
     )
         ]
     )
+    # change number of higlights to show
     highlight_slider.change(
         fn=change_num_highlight,
         inputs=[
         ]
     )
+    # change number of top papers to show initially
+    top_paper_slider.change(
+        fn=change_top_output,
+        inputs=[
+            top_paper_slider,
+            top_pair_slider,
+            info
+        ],
+        outputs=init_result_components+[result1_desc]
+    )
+    # change number of top sentence pairs to show initially
+    top_pair_slider.change(
+        fn=change_top_output,
+        inputs=[
+            top_paper_slider,
+            top_pair_slider,
+            info
+        ],
+        outputs=init_result_components+[result1_desc]
+    )
+if __name__ == "__main__":
+    demo.queue().launch()  # add ?__theme=light to force light mode

input_format.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import numpy as np
 from pypdf import PdfReader
 from urllib.parse import urlparse
 import requests

 from pypdf import PdfReader
 from urllib.parse import urlparse
 import requests