jskim commited on
Commit
0532283
·
1 Parent(s): 4f8ef7b

adding null check for papers retrieved; made relevant parts pop up without additional refresh button step

Browse files
Files changed (2) hide show
  1. app.py +66 -81
  2. score.py +4 -2
app.py CHANGED
@@ -43,7 +43,8 @@ def get_similar_paper(
43
  name, papers = get_text_from_author_id(author_id_input)
44
 
45
  # Compute Doc-level affinity scores for the Papers
46
- print('computing scores...')
 
47
  titles, abstracts, doc_scores = compute_document_score(
48
  doc_model,
49
  tokenizer,
@@ -57,7 +58,6 @@ def get_similar_paper(
57
  'abstracts': abstracts,
58
  'doc_scores': doc_scores
59
  }
60
- pickle.dump(tmp, open('paper_info.pkl', 'wb'))
61
 
62
  # Select top K choices of papers to show
63
  titles = titles[:num_papers_show]
@@ -66,54 +66,47 @@ def get_similar_paper(
66
 
67
  display_title = ['[ %0.3f ] %s'%(s, t) for t, s in zip(titles, doc_scores)]
68
  end = time.time()
69
- print('retrieval complete in [%0.2f] seconds'%(end - start))
70
 
71
- return (
72
- gr.update(choices=display_title, interactive=True, visible=True), # set of papers
73
- gr.update(choices=input_sentences, interactive=True), # submission sentences
74
- gr.update(visible=True), # title row
75
- gr.update(visible=True), # abstract row
76
- gr.update(visible=True) # button
77
- )
78
-
79
- def get_highlights(
80
- abstract_text_input,
81
- pdf_file_input,
82
- abstract,
83
- K=2
84
- ):
85
  print('obtaining highlights..')
86
  start = time.time()
87
- # Compute sent-level and phrase-level affinity scores for each papers
88
- sent_ids, sent_scores, info = get_highlight_info(
89
- sent_model,
90
- abstract_text_input,
91
- abstract,
92
- K=K
93
- )
94
-
95
  input_sentences = sent_tokenize(abstract_text_input)
96
  num_sents = len(input_sentences)
97
-
98
- word_scores = dict()
99
-
100
- # different highlights for each input sentence
101
- for i in range(num_sents):
102
- word_scores[str(i)] = {
103
- "original": abstract,
104
- "interpretation": list(zip(info['all_words'], info[i]['scores']))
105
- } # format to feed to for Gradio Interpretation component
106
-
107
- tmp = {
108
- 'source_sentences': input_sentences,
109
- 'highlight': word_scores
110
- }
111
- pickle.dump(tmp, open('highlight_info.pkl', 'wb'))
 
 
 
 
 
 
 
 
 
 
 
112
  end = time.time()
113
  print('done in [%0.2f] seconds'%(end - start))
114
 
115
- # update the visibility of radio choices
116
- return gr.update(visible=True)
 
 
 
 
117
 
118
  def update_name(author_id_input):
119
  # update the name of the author based on the id input
@@ -121,13 +114,13 @@ def update_name(author_id_input):
121
 
122
  return gr.update(value=name)
123
 
124
- def change_output_highlight(source_sent_choice):
125
  # change the output highlight based on the sentence selected from the submission
126
- fname = 'highlight_info.pkl'
127
  if os.path.exists(fname):
128
  tmp = pickle.load(open(fname, 'rb'))
129
- source_sents = tmp['source_sentences']
130
- highlights = tmp['highlight']
131
  for i, s in enumerate(source_sents):
132
  #print('changing highlight')
133
  if source_sent_choice == s:
@@ -137,14 +130,15 @@ def change_output_highlight(source_sent_choice):
137
 
138
  def change_paper(selected_papers_radio):
139
  # change the paper to show based on the paper selected
140
- fname = 'paper_info.pkl'
141
  if os.path.exists(fname):
142
  tmp = pickle.load(open(fname, 'rb'))
143
- for title, abstract, aff_score in zip(tmp['titles'], tmp['abstracts'], tmp['doc_scores']):
144
- display_title = '[ %0.3f ] %s'%(aff_score, title)
145
- if display_title == selected_papers_radio:
146
- #print('changing paper')
147
- return title, abstract, aff_score # update title, abstract, and affinity score fields
 
148
  else:
149
  return
150
 
@@ -162,18 +156,19 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
162
 
163
  ##### Input
164
  - The tool requires two inputs: (1) an academic paper's abstract in a text format, (2) and a potential reviewer's [Semantic Scholar](https://www.semanticscholar.org/) profile link. Once you put in a valid profile link, the reviewer's name will be displayed.
165
- - Once the name is confirmed, press the `Search Similar Papers from the Reviewer` button.
166
- ##### Search Similar Papers
167
- - Based on the input information above, the tool will search for similar papers from the reviewer's previous publications using [Semantic Scholar API](https://www.semanticscholar.org/product/api).
168
  - It will list top 10 similar papers along with the **affinity scores** (ranging from 0 -1) for each, computed using text representations from a [language model](https://github.com/allenai/specter/tree/master/specter).
169
  - You can click on different papers to see title, abstract, and affinity scores in detail.
170
- ##### Show Relevant Parts
171
- - Once you have retrieved the similar papers above, and selected a paper that you are interested in, you will have an option to see what parts of the selected paper may be relevant to the submission abstract. Click `Show Relevant Parts from Selected Paper` button.
172
  - On the left, you will see individual sentences from the submission abstract you can select from.
173
  - On the right, you will see the abstract of the selected paper, with **highlights**.
174
  - **<span style="color:black;background-color:#DB7262;">Red highlights</span>**: sentences from the reviewer's paper abstract with high semantic similarity to the selected sentence.
175
  - **<span style="color:black;background-color:#5296D5;">Blue highlights</span>**: phrases from the reviewer's paper abstract that is included in the selected sentence.
176
- - To see relevant parts in a different paper from the reviewer, select another paper above and re-click `Show Relevant Parts from Selected Paper` button to refresh.
 
177
  """
178
  )
179
 
@@ -190,7 +185,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
190
  name = gr.Textbox(label='Confirm Reviewer Name', interactive=False)
191
  author_id_input.change(fn=update_name, inputs=author_id_input, outputs=name)
192
  with gr.Row():
193
- compute_btn = gr.Button('Search Similar Papers from the Reviewer')
194
 
195
  ### PAPER INFORMATION
196
 
@@ -199,7 +194,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
199
  selected_papers_radio = gr.Radio(
200
  choices=[], # will be udpated with the button click
201
  visible=False, # also will be updated with the button click
202
- label='Selected Top Papers from the Reviewer'
203
  )
204
 
205
  # selected paper information
@@ -208,14 +203,12 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
208
  paper_title = gr.Textbox(label='Title', interactive=False)
209
  with gr.Column(scale=1):
210
  affinity= gr.Number(label='Affinity', interactive=False, value=0)
211
- with gr.Row(visibe=False) as abstract_row:
212
  paper_abstract = gr.Textbox(label='Abstract', interactive=False, visible=False)
213
 
214
- with gr.Row(visible=False) as explain_button_row:
215
- explain_btn = gr.Button('Show Relevant Parts from Selected Paper')
216
-
217
  ### RELEVANT PARTS (HIGHLIGHTS)
218
-
219
  with gr.Row():
220
  with gr.Column(scale=2): # text from submission
221
  source_sentences = gr.Radio(
@@ -240,26 +233,17 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
240
  selected_papers_radio,
241
  source_sentences,
242
  title_row,
243
- paper_abstract,
244
- explain_button_row,
245
  ]
246
  )
247
 
248
- # get highlights
249
- explain_btn.click(
250
- fn=get_highlights,
251
- inputs=[
252
- abstract_text_input,
253
- pdf_file_input,
254
- paper_abstract
255
- ],
256
- outputs=source_sentences
257
- )
258
-
259
  # change highlight based on selected sentences from submission
260
  source_sentences.change(
261
  fn=change_output_highlight,
262
- inputs=source_sentences,
 
 
 
263
  outputs=highlight
264
  )
265
 
@@ -270,7 +254,8 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
270
  outputs= [
271
  paper_title,
272
  paper_abstract,
273
- affinity
 
274
  ]
275
  )
276
 
@@ -280,6 +265,6 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
280
  **Disclaimer.** This tool and its output should not serve as the sole justification for confirming a match for the submission. It is intended as a supplementary tool that the user may use at their discretion; the correctness of the output of the tool is not guaranteed. This may be improved by updating the internal models used to compute the affinity scores and sentence relevance, which may require additional research independently. The tool does not compromise the privacy of the reviewers as it relies only on their publicly-available information (e.g., names and list of previously published papers).
281
  """
282
  )
283
-
284
  if __name__ == "__main__":
285
  demo.launch()
 
43
  name, papers = get_text_from_author_id(author_id_input)
44
 
45
  # Compute Doc-level affinity scores for the Papers
46
+ print('computing scores...')
47
+ # TODO detect duplicate papers?
48
  titles, abstracts, doc_scores = compute_document_score(
49
  doc_model,
50
  tokenizer,
 
58
  'abstracts': abstracts,
59
  'doc_scores': doc_scores
60
  }
 
61
 
62
  # Select top K choices of papers to show
63
  titles = titles[:num_papers_show]
 
66
 
67
  display_title = ['[ %0.3f ] %s'%(s, t) for t, s in zip(titles, doc_scores)]
68
  end = time.time()
69
+ print('paper retrieval complete in [%0.2f] seconds'%(end - start))
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  print('obtaining highlights..')
72
  start = time.time()
 
 
 
 
 
 
 
 
73
  input_sentences = sent_tokenize(abstract_text_input)
74
  num_sents = len(input_sentences)
75
+ for aa, (tt, ab, ds) in enumerate(zip(titles, abstracts, doc_scores)):
76
+ # Compute sent-level and phrase-level affinity scores for each papers
77
+ sent_ids, sent_scores, info = get_highlight_info(
78
+ sent_model,
79
+ abstract_text_input,
80
+ ab,
81
+ K=2
82
+ )
83
+
84
+ word_scores = dict()
85
+
86
+ # different highlights for each input sentence
87
+ for i in range(num_sents):
88
+ word_scores[str(i)] = {
89
+ "original": ab,
90
+ "interpretation": list(zip(info['all_words'], info[i]['scores']))
91
+ } # format to feed to for Gradio Interpretation component
92
+
93
+ tmp[display_title[aa]] = {
94
+ 'title': tt,
95
+ 'abstract': ab,
96
+ 'doc_score': ds,
97
+ 'source_sentences': input_sentences,
98
+ 'highlight': word_scores
99
+ }
100
+ pickle.dump(tmp, open('info.pkl', 'wb')) # TODO better ways of saving intermediate results?
101
  end = time.time()
102
  print('done in [%0.2f] seconds'%(end - start))
103
 
104
+ return (
105
+ gr.update(choices=display_title, interactive=True, visible=True), # set of papers
106
+ gr.update(choices=input_sentences, interactive=True, visible=True), # submission sentences
107
+ gr.update(visible=True), # title row
108
+ gr.update(visible=True), # abstract row
109
+ )
110
 
111
  def update_name(author_id_input):
112
  # update the name of the author based on the id input
 
114
 
115
  return gr.update(value=name)
116
 
117
+ def change_output_highlight(selected_papers_radio, source_sent_choice):
118
  # change the output highlight based on the sentence selected from the submission
119
+ fname = 'info.pkl'
120
  if os.path.exists(fname):
121
  tmp = pickle.load(open(fname, 'rb'))
122
+ source_sents = tmp[selected_papers_radio]['source_sentences']
123
+ highlights = tmp[selected_papers_radio]['highlight']
124
  for i, s in enumerate(source_sents):
125
  #print('changing highlight')
126
  if source_sent_choice == s:
 
130
 
131
  def change_paper(selected_papers_radio):
132
  # change the paper to show based on the paper selected
133
+ fname = 'info.pkl'
134
  if os.path.exists(fname):
135
  tmp = pickle.load(open(fname, 'rb'))
136
+ title = tmp[selected_papers_radio]['title']
137
+ abstract = tmp[selected_papers_radio]['abstract']
138
+ aff_score = tmp[selected_papers_radio]['doc_score']
139
+ highlights = tmp[selected_papers_radio]['highlight']
140
+ return title, abstract, aff_score, highlights['0']
141
+
142
  else:
143
  return
144
 
 
156
 
157
  ##### Input
158
  - The tool requires two inputs: (1) an academic paper's abstract in a text format, (2) and a potential reviewer's [Semantic Scholar](https://www.semanticscholar.org/) profile link. Once you put in a valid profile link, the reviewer's name will be displayed.
159
+ - Once the name is confirmed, press the `What Makes this a Good Match?` button.
160
+ ##### Similar Papers From the Reviewer
161
+ - Based on the input information above, the tool will first search for similar papers from the reviewer's previous publications using [Semantic Scholar API](https://www.semanticscholar.org/product/api).
162
  - It will list top 10 similar papers along with the **affinity scores** (ranging from 0 -1) for each, computed using text representations from a [language model](https://github.com/allenai/specter/tree/master/specter).
163
  - You can click on different papers to see title, abstract, and affinity scores in detail.
164
+ ##### Relevant Parts
165
+ - Below the list of papers, we highlight relevant parts in the selected paper compared to the submission abstract.
166
  - On the left, you will see individual sentences from the submission abstract you can select from.
167
  - On the right, you will see the abstract of the selected paper, with **highlights**.
168
  - **<span style="color:black;background-color:#DB7262;">Red highlights</span>**: sentences from the reviewer's paper abstract with high semantic similarity to the selected sentence.
169
  - **<span style="color:black;background-color:#5296D5;">Blue highlights</span>**: phrases from the reviewer's paper abstract that is included in the selected sentence.
170
+ - To see relevant parts in a different paper from the reviewer, select the new paper.
171
+ -------
172
  """
173
  )
174
 
 
185
  name = gr.Textbox(label='Confirm Reviewer Name', interactive=False)
186
  author_id_input.change(fn=update_name, inputs=author_id_input, outputs=name)
187
  with gr.Row():
188
+ compute_btn = gr.Button('What Makes This a Good Match?')
189
 
190
  ### PAPER INFORMATION
191
 
 
194
  selected_papers_radio = gr.Radio(
195
  choices=[], # will be udpated with the button click
196
  visible=False, # also will be updated with the button click
197
+ label='Top Relevant Papers from the Reviewer'
198
  )
199
 
200
  # selected paper information
 
203
  paper_title = gr.Textbox(label='Title', interactive=False)
204
  with gr.Column(scale=1):
205
  affinity= gr.Number(label='Affinity', interactive=False, value=0)
206
+ with gr.Row():
207
  paper_abstract = gr.Textbox(label='Abstract', interactive=False, visible=False)
208
 
209
+ ## TODO consider adding more direct information feeding to the users before giving them options for interactions.
210
+
 
211
  ### RELEVANT PARTS (HIGHLIGHTS)
 
212
  with gr.Row():
213
  with gr.Column(scale=2): # text from submission
214
  source_sentences = gr.Radio(
 
233
  selected_papers_radio,
234
  source_sentences,
235
  title_row,
236
+ paper_abstract
 
237
  ]
238
  )
239
 
 
 
 
 
 
 
 
 
 
 
 
240
  # change highlight based on selected sentences from submission
241
  source_sentences.change(
242
  fn=change_output_highlight,
243
+ inputs=[
244
+ selected_papers_radio,
245
+ source_sentences
246
+ ],
247
  outputs=highlight
248
  )
249
 
 
254
  outputs= [
255
  paper_title,
256
  paper_abstract,
257
+ affinity,
258
+ highlight
259
  ]
260
  )
261
 
 
265
  **Disclaimer.** This tool and its output should not serve as the sole justification for confirming a match for the submission. It is intended as a supplementary tool that the user may use at their discretion; the correctness of the output of the tool is not guaranteed. This may be improved by updating the internal models used to compute the affinity scores and sentence relevance, which may require additional research independently. The tool does not compromise the privacy of the reviewers as it relies only on their publicly-available information (e.g., names and list of previously published papers).
266
  """
267
  )
268
+
269
  if __name__ == "__main__":
270
  demo.launch()
score.py CHANGED
@@ -183,9 +183,11 @@ def compute_document_score(doc_model, tokenizer, query, papers, batch=5):
183
  titles = []
184
  abstracts = []
185
  for p in papers:
186
- titles.append(p['title'])
187
- abstracts.append(p['abstract'])
 
188
  scores = predict_docscore(doc_model, tokenizer, query, titles, abstracts, batch=batch)
 
189
  idx_sorted = np.argsort(scores)[::-1]
190
 
191
  titles_sorted = [titles[x] for x in idx_sorted]
 
183
  titles = []
184
  abstracts = []
185
  for p in papers:
186
+ if p['title'] is not None and p['abstract'] is not None:
187
+ titles.append(p['title'])
188
+ abstracts.append(p['abstract'])
189
  scores = predict_docscore(doc_model, tokenizer, query, titles, abstracts, batch=batch)
190
+ assert(len(scores) == len(abstracts))
191
  idx_sorted = np.argsort(scores)[::-1]
192
 
193
  titles_sorted = [titles[x] for x in idx_sorted]