zmbfeng commited on
Commit
78ef37b
·
1 Parent(s): b91bc9f

display original paragraph as well

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -129,7 +129,6 @@ if 'paragraph_sentence_encodings' in st.session_state:
129
  query_tokens = st.session_state.bert_tokenizer(query, return_tensors="pt", padding=True, truncation=True).to(
130
  'cuda')
131
  with torch.no_grad(): # Disable gradient calculation for inference
132
- # Perform the forward pass on the GPU
133
  query_encoding = st.session_state.bert_model(**query_tokens).last_hidden_state[:, 0,
134
  :].cpu().numpy() # Move the result to CPU and convert to NumPy
135
 
@@ -154,7 +153,6 @@ if 'paragraph_sentence_encodings' in st.session_state:
154
 
155
  sentence_similarities.sort(reverse=True, key=lambda x: x[0])
156
 
157
- # Calculate the average of the top three sentence similarities
158
  if len(sentence_similarities) >= 3:
159
  top_three_avg_similarity = np.mean([s[0] for s in sentence_similarities[:3]])
160
  top_three_avg_commonality = np.mean([s[2] for s in sentence_similarities[:3]])
@@ -168,18 +166,23 @@ if 'paragraph_sentence_encodings' in st.session_state:
168
  top_three_avg_commonality = 0
169
  top_three_sentences = []
170
 
171
- # Move top 3 sentences to the beginning of the paragraph
172
  top_three_texts = [s[1] for s in top_three_sentences]
173
  remaining_texts = [s[0] for s in paragraph_sentence_encoding[1] if s and s[0] not in top_three_texts]
174
  reordered_paragraph = top_three_texts + remaining_texts
175
 
 
 
 
176
  paragraph_scores.append(
177
- (top_three_avg_similarity, top_three_avg_commonality, {'text': ' '.join(reordered_paragraph)}))
 
 
178
 
179
  sentence_scores = sorted(sentence_scores, key=lambda x: x[0], reverse=True)
180
  paragraph_scores = sorted(paragraph_scores, key=lambda x: x[0], reverse=True)
181
 
182
  st.write("Top scored paragraphs and their scores:")
183
  for similarity_score, commonality_score, paragraph in paragraph_scores[:5]:
184
- st.write(
185
- f"Similarity Score: {similarity_score}, Commonality Score: {commonality_score}, Paragraph: {paragraph['text']}")
 
 
129
  query_tokens = st.session_state.bert_tokenizer(query, return_tensors="pt", padding=True, truncation=True).to(
130
  'cuda')
131
  with torch.no_grad(): # Disable gradient calculation for inference
 
132
  query_encoding = st.session_state.bert_model(**query_tokens).last_hidden_state[:, 0,
133
  :].cpu().numpy() # Move the result to CPU and convert to NumPy
134
 
 
153
 
154
  sentence_similarities.sort(reverse=True, key=lambda x: x[0])
155
 
 
156
  if len(sentence_similarities) >= 3:
157
  top_three_avg_similarity = np.mean([s[0] for s in sentence_similarities[:3]])
158
  top_three_avg_commonality = np.mean([s[2] for s in sentence_similarities[:3]])
 
166
  top_three_avg_commonality = 0
167
  top_three_sentences = []
168
 
 
169
  top_three_texts = [s[1] for s in top_three_sentences]
170
  remaining_texts = [s[0] for s in paragraph_sentence_encoding[1] if s and s[0] not in top_three_texts]
171
  reordered_paragraph = top_three_texts + remaining_texts
172
 
173
+ original_paragraph = ' '.join([s[0] for s in paragraph_sentence_encoding[1] if s])
174
+ modified_paragraph = ' '.join(reordered_paragraph)
175
+
176
  paragraph_scores.append(
177
+ (top_three_avg_similarity, top_three_avg_commonality,
178
+ {'modified_text': modified_paragraph, 'original_text': original_paragraph})
179
+ )
180
 
181
  sentence_scores = sorted(sentence_scores, key=lambda x: x[0], reverse=True)
182
  paragraph_scores = sorted(paragraph_scores, key=lambda x: x[0], reverse=True)
183
 
184
  st.write("Top scored paragraphs and their scores:")
185
  for similarity_score, commonality_score, paragraph in paragraph_scores[:5]:
186
+ st.write(f"Similarity Score: {similarity_score}, Commonality Score: {commonality_score}")
187
+ st.write("Modified Paragraph: ", paragraph['modified_text'])
188
+ st.write("Original Paragraph: ", paragraph['original_text'])