a-guy-from-burma
commited on
Update app.py
Browse fileschunk matching, new way
app.py
CHANGED
@@ -4,6 +4,7 @@ import torch
|
|
4 |
import nltk
|
5 |
from nltk import pos_tag
|
6 |
from nltk.tokenize import word_tokenize
|
|
|
7 |
import requests
|
8 |
|
9 |
nltk.download('averaged_perceptron_tagger')
|
@@ -33,10 +34,7 @@ def calculate_similarity(text1, text2):
|
|
33 |
return f"{similarity.item():.2%} Similarity"
|
34 |
|
35 |
def report_issue(text1, text2, similarity):
|
36 |
-
# Replace '[FORM_ID]' with the actual ID of your Google Form.
|
37 |
url = 'https://docs.google.com/forms/d/e/1FAIpQLSdABQaCNCmHXDyHLsL2lLsxgu386hv9ALU2UbCVL9bUoIwemQ/formResponse'
|
38 |
-
#https://docs.google.com/forms/d/e/1FAIpQLSdABQaCNCmHXDyHLsL2lLsxgu386hv9ALU2UbCVL9bUoIwemQ/viewform?usp=sf_link
|
39 |
-
# Replace 'entry.XXXXX' with the actual entry IDs from your Google Form.
|
40 |
data = {
|
41 |
'entry.1041881480': text1,
|
42 |
'entry.1520964719': text2,
|
@@ -48,6 +46,23 @@ def report_issue(text1, text2, similarity):
|
|
48 |
else:
|
49 |
return "Failed to send report."
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
with gr.Blocks() as app:
|
52 |
with gr.Row():
|
53 |
text1 = gr.Textbox(label="Input Text 1")
|
@@ -55,14 +70,22 @@ with gr.Blocks() as app:
|
|
55 |
with gr.Row():
|
56 |
button = gr.Button("Calculate Similarity")
|
57 |
output = gr.Text(label="Similarity")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
button.click(
|
60 |
-
fn=
|
61 |
inputs=[text1, text2],
|
62 |
-
outputs=output
|
63 |
)
|
64 |
|
65 |
-
report_button = gr.Button("
|
66 |
report_button.click(
|
67 |
fn=report_issue,
|
68 |
inputs=[text1, text2, output],
|
|
|
4 |
import nltk
|
5 |
from nltk import pos_tag
|
6 |
from nltk.tokenize import word_tokenize
|
7 |
+
from nltk.chunk import RegexpParser
|
8 |
import requests
|
9 |
|
10 |
nltk.download('averaged_perceptron_tagger')
|
|
|
34 |
return f"{similarity.item():.2%} Similarity"
|
35 |
|
36 |
def report_issue(text1, text2, similarity):
|
|
|
37 |
url = 'https://docs.google.com/forms/d/e/1FAIpQLSdABQaCNCmHXDyHLsL2lLsxgu386hv9ALU2UbCVL9bUoIwemQ/formResponse'
|
|
|
|
|
38 |
data = {
|
39 |
'entry.1041881480': text1,
|
40 |
'entry.1520964719': text2,
|
|
|
46 |
else:
|
47 |
return "Failed to send report."
|
48 |
|
49 |
+
def extract_chunks(text):
|
50 |
+
# Define grammar for chunking
|
51 |
+
grammar = r"""
|
52 |
+
NP: {<DT>?<JJ>*<NN>+} # Chunk sequences of DT, JJ, NN
|
53 |
+
PP: {<IN><NP>} # Chunk prepositions followed by NP
|
54 |
+
VP: {<VB.*><NP|PP>*} # Chunk verbs and their arguments
|
55 |
+
"""
|
56 |
+
# Tokenize and POS-tag
|
57 |
+
words = word_tokenize(text)
|
58 |
+
tagged_words = pos_tag(words)
|
59 |
+
chunk_parser = RegexpParser(grammar)
|
60 |
+
tree = chunk_parser.parse(tagged_words)
|
61 |
+
|
62 |
+
# Extract phrases
|
63 |
+
phrases = [" ".join(word for word, tag in subtree.leaves()) for subtree in tree.subtrees() if subtree.label() in ['NP', 'PP', 'VP']]
|
64 |
+
return phrases
|
65 |
+
|
66 |
with gr.Blocks() as app:
|
67 |
with gr.Row():
|
68 |
text1 = gr.Textbox(label="Input Text 1")
|
|
|
70 |
with gr.Row():
|
71 |
button = gr.Button("Calculate Similarity")
|
72 |
output = gr.Text(label="Similarity")
|
73 |
+
chunks_output = gr.Text(label="Extracted Chunks")
|
74 |
+
|
75 |
+
def combined_function(text1, text2):
|
76 |
+
similarity = calculate_similarity(text1, text2)
|
77 |
+
chunks1 = extract_chunks(text1)
|
78 |
+
chunks2 = extract_chunks(text2)
|
79 |
+
chunks_text = f"Chunks in Text 1: {chunks1}\nChunks in Text 2: {chunks2}"
|
80 |
+
return similarity, chunks_text
|
81 |
|
82 |
button.click(
|
83 |
+
fn=combined_function,
|
84 |
inputs=[text1, text2],
|
85 |
+
outputs=[output, chunks_output]
|
86 |
)
|
87 |
|
88 |
+
report_button = gr.Button("Send result for better training")
|
89 |
report_button.click(
|
90 |
fn=report_issue,
|
91 |
inputs=[text1, text2, output],
|