Spaces:

zmbfeng
/

knowledge_extraction_a

Sleeping

App Files Files Community

zmbfeng commited on Aug 6, 2024

Commit

78ef37b

1 Parent(s): b91bc9f

display original paragraph as well

Browse files

Files changed (1) hide show

app.py +9 -6

app.py CHANGED Viewed

@@ -129,7 +129,6 @@ if 'paragraph_sentence_encodings' in st.session_state:
         query_tokens = st.session_state.bert_tokenizer(query, return_tensors="pt", padding=True, truncation=True).to(
             'cuda')
         with torch.no_grad():  # Disable gradient calculation for inference
-            # Perform the forward pass on the GPU
             query_encoding = st.session_state.bert_model(**query_tokens).last_hidden_state[:, 0,
                              :].cpu().numpy()  # Move the result to CPU and convert to NumPy
@@ -154,7 +153,6 @@ if 'paragraph_sentence_encodings' in st.session_state:
             sentence_similarities.sort(reverse=True, key=lambda x: x[0])
-            # Calculate the average of the top three sentence similarities
             if len(sentence_similarities) >= 3:
                 top_three_avg_similarity = np.mean([s[0] for s in sentence_similarities[:3]])
                 top_three_avg_commonality = np.mean([s[2] for s in sentence_similarities[:3]])
@@ -168,18 +166,23 @@ if 'paragraph_sentence_encodings' in st.session_state:
                 top_three_avg_commonality = 0
                 top_three_sentences = []
-            # Move top 3 sentences to the beginning of the paragraph
             top_three_texts = [s[1] for s in top_three_sentences]
             remaining_texts = [s[0] for s in paragraph_sentence_encoding[1] if s and s[0] not in top_three_texts]
             reordered_paragraph = top_three_texts + remaining_texts
             paragraph_scores.append(
-                (top_three_avg_similarity, top_three_avg_commonality, {'text': ' '.join(reordered_paragraph)}))
         sentence_scores = sorted(sentence_scores, key=lambda x: x[0], reverse=True)
         paragraph_scores = sorted(paragraph_scores, key=lambda x: x[0], reverse=True)
         st.write("Top scored paragraphs and their scores:")
         for similarity_score, commonality_score, paragraph in paragraph_scores[:5]:
-            st.write(
-                f"Similarity Score: {similarity_score}, Commonality Score: {commonality_score}, Paragraph: {paragraph['text']}")

         query_tokens = st.session_state.bert_tokenizer(query, return_tensors="pt", padding=True, truncation=True).to(
             'cuda')
         with torch.no_grad():  # Disable gradient calculation for inference
             query_encoding = st.session_state.bert_model(**query_tokens).last_hidden_state[:, 0,
                              :].cpu().numpy()  # Move the result to CPU and convert to NumPy
             sentence_similarities.sort(reverse=True, key=lambda x: x[0])
             if len(sentence_similarities) >= 3:
                 top_three_avg_similarity = np.mean([s[0] for s in sentence_similarities[:3]])
                 top_three_avg_commonality = np.mean([s[2] for s in sentence_similarities[:3]])
                 top_three_avg_commonality = 0
                 top_three_sentences = []
             top_three_texts = [s[1] for s in top_three_sentences]
             remaining_texts = [s[0] for s in paragraph_sentence_encoding[1] if s and s[0] not in top_three_texts]
             reordered_paragraph = top_three_texts + remaining_texts
+            original_paragraph = ' '.join([s[0] for s in paragraph_sentence_encoding[1] if s])
+            modified_paragraph = ' '.join(reordered_paragraph)
             paragraph_scores.append(
+                (top_three_avg_similarity, top_three_avg_commonality,
+                 {'modified_text': modified_paragraph, 'original_text': original_paragraph})
+            )
         sentence_scores = sorted(sentence_scores, key=lambda x: x[0], reverse=True)
         paragraph_scores = sorted(paragraph_scores, key=lambda x: x[0], reverse=True)
         st.write("Top scored paragraphs and their scores:")
         for similarity_score, commonality_score, paragraph in paragraph_scores[:5]:
+            st.write(f"Similarity Score: {similarity_score}, Commonality Score: {commonality_score}")
+            st.write("Modified Paragraph: ", paragraph['modified_text'])
+            st.write("Original Paragraph: ", paragraph['original_text'])