Spaces:
Sleeping
Sleeping
display original paragraph as well
Browse files
app.py
CHANGED
@@ -129,7 +129,6 @@ if 'paragraph_sentence_encodings' in st.session_state:
|
|
129 |
query_tokens = st.session_state.bert_tokenizer(query, return_tensors="pt", padding=True, truncation=True).to(
|
130 |
'cuda')
|
131 |
with torch.no_grad(): # Disable gradient calculation for inference
|
132 |
-
# Perform the forward pass on the GPU
|
133 |
query_encoding = st.session_state.bert_model(**query_tokens).last_hidden_state[:, 0,
|
134 |
:].cpu().numpy() # Move the result to CPU and convert to NumPy
|
135 |
|
@@ -154,7 +153,6 @@ if 'paragraph_sentence_encodings' in st.session_state:
|
|
154 |
|
155 |
sentence_similarities.sort(reverse=True, key=lambda x: x[0])
|
156 |
|
157 |
-
# Calculate the average of the top three sentence similarities
|
158 |
if len(sentence_similarities) >= 3:
|
159 |
top_three_avg_similarity = np.mean([s[0] for s in sentence_similarities[:3]])
|
160 |
top_three_avg_commonality = np.mean([s[2] for s in sentence_similarities[:3]])
|
@@ -168,18 +166,23 @@ if 'paragraph_sentence_encodings' in st.session_state:
|
|
168 |
top_three_avg_commonality = 0
|
169 |
top_three_sentences = []
|
170 |
|
171 |
-
# Move top 3 sentences to the beginning of the paragraph
|
172 |
top_three_texts = [s[1] for s in top_three_sentences]
|
173 |
remaining_texts = [s[0] for s in paragraph_sentence_encoding[1] if s and s[0] not in top_three_texts]
|
174 |
reordered_paragraph = top_three_texts + remaining_texts
|
175 |
|
|
|
|
|
|
|
176 |
paragraph_scores.append(
|
177 |
-
(top_three_avg_similarity, top_three_avg_commonality,
|
|
|
|
|
178 |
|
179 |
sentence_scores = sorted(sentence_scores, key=lambda x: x[0], reverse=True)
|
180 |
paragraph_scores = sorted(paragraph_scores, key=lambda x: x[0], reverse=True)
|
181 |
|
182 |
st.write("Top scored paragraphs and their scores:")
|
183 |
for similarity_score, commonality_score, paragraph in paragraph_scores[:5]:
|
184 |
-
st.write(
|
185 |
-
|
|
|
|
129 |
query_tokens = st.session_state.bert_tokenizer(query, return_tensors="pt", padding=True, truncation=True).to(
|
130 |
'cuda')
|
131 |
with torch.no_grad(): # Disable gradient calculation for inference
|
|
|
132 |
query_encoding = st.session_state.bert_model(**query_tokens).last_hidden_state[:, 0,
|
133 |
:].cpu().numpy() # Move the result to CPU and convert to NumPy
|
134 |
|
|
|
153 |
|
154 |
sentence_similarities.sort(reverse=True, key=lambda x: x[0])
|
155 |
|
|
|
156 |
if len(sentence_similarities) >= 3:
|
157 |
top_three_avg_similarity = np.mean([s[0] for s in sentence_similarities[:3]])
|
158 |
top_three_avg_commonality = np.mean([s[2] for s in sentence_similarities[:3]])
|
|
|
166 |
top_three_avg_commonality = 0
|
167 |
top_three_sentences = []
|
168 |
|
|
|
169 |
top_three_texts = [s[1] for s in top_three_sentences]
|
170 |
remaining_texts = [s[0] for s in paragraph_sentence_encoding[1] if s and s[0] not in top_three_texts]
|
171 |
reordered_paragraph = top_three_texts + remaining_texts
|
172 |
|
173 |
+
original_paragraph = ' '.join([s[0] for s in paragraph_sentence_encoding[1] if s])
|
174 |
+
modified_paragraph = ' '.join(reordered_paragraph)
|
175 |
+
|
176 |
paragraph_scores.append(
|
177 |
+
(top_three_avg_similarity, top_three_avg_commonality,
|
178 |
+
{'modified_text': modified_paragraph, 'original_text': original_paragraph})
|
179 |
+
)
|
180 |
|
181 |
sentence_scores = sorted(sentence_scores, key=lambda x: x[0], reverse=True)
|
182 |
paragraph_scores = sorted(paragraph_scores, key=lambda x: x[0], reverse=True)
|
183 |
|
184 |
st.write("Top scored paragraphs and their scores:")
|
185 |
for similarity_score, commonality_score, paragraph in paragraph_scores[:5]:
|
186 |
+
st.write(f"Similarity Score: {similarity_score}, Commonality Score: {commonality_score}")
|
187 |
+
st.write("Modified Paragraph: ", paragraph['modified_text'])
|
188 |
+
st.write("Original Paragraph: ", paragraph['original_text'])
|