a-guy-from-burma commited on
Commit
84949b2
·
verified ·
1 Parent(s): f4e65c8

Update app.py

Browse files

chunk matching, new way

Files changed (1) hide show
  1. app.py +29 -6
app.py CHANGED
@@ -4,6 +4,7 @@ import torch
4
  import nltk
5
  from nltk import pos_tag
6
  from nltk.tokenize import word_tokenize
 
7
  import requests
8
 
9
  nltk.download('averaged_perceptron_tagger')
@@ -33,10 +34,7 @@ def calculate_similarity(text1, text2):
33
  return f"{similarity.item():.2%} Similarity"
34
 
35
  def report_issue(text1, text2, similarity):
36
- # Replace '[FORM_ID]' with the actual ID of your Google Form.
37
  url = 'https://docs.google.com/forms/d/e/1FAIpQLSdABQaCNCmHXDyHLsL2lLsxgu386hv9ALU2UbCVL9bUoIwemQ/formResponse'
38
- #https://docs.google.com/forms/d/e/1FAIpQLSdABQaCNCmHXDyHLsL2lLsxgu386hv9ALU2UbCVL9bUoIwemQ/viewform?usp=sf_link
39
- # Replace 'entry.XXXXX' with the actual entry IDs from your Google Form.
40
  data = {
41
  'entry.1041881480': text1,
42
  'entry.1520964719': text2,
@@ -48,6 +46,23 @@ def report_issue(text1, text2, similarity):
48
  else:
49
  return "Failed to send report."
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  with gr.Blocks() as app:
52
  with gr.Row():
53
  text1 = gr.Textbox(label="Input Text 1")
@@ -55,14 +70,22 @@ with gr.Blocks() as app:
55
  with gr.Row():
56
  button = gr.Button("Calculate Similarity")
57
  output = gr.Text(label="Similarity")
 
 
 
 
 
 
 
 
58
 
59
  button.click(
60
- fn=calculate_similarity,
61
  inputs=[text1, text2],
62
- outputs=output
63
  )
64
 
65
- report_button = gr.Button("Report to Developer")
66
  report_button.click(
67
  fn=report_issue,
68
  inputs=[text1, text2, output],
 
4
  import nltk
5
  from nltk import pos_tag
6
  from nltk.tokenize import word_tokenize
7
+ from nltk.chunk import RegexpParser
8
  import requests
9
 
10
  nltk.download('averaged_perceptron_tagger')
 
34
  return f"{similarity.item():.2%} Similarity"
35
 
36
  def report_issue(text1, text2, similarity):
 
37
  url = 'https://docs.google.com/forms/d/e/1FAIpQLSdABQaCNCmHXDyHLsL2lLsxgu386hv9ALU2UbCVL9bUoIwemQ/formResponse'
 
 
38
  data = {
39
  'entry.1041881480': text1,
40
  'entry.1520964719': text2,
 
46
  else:
47
  return "Failed to send report."
48
 
49
+ def extract_chunks(text):
50
+ # Define grammar for chunking
51
+ grammar = r"""
52
+ NP: {<DT>?<JJ>*<NN>+} # Chunk sequences of DT, JJ, NN
53
+ PP: {<IN><NP>} # Chunk prepositions followed by NP
54
+ VP: {<VB.*><NP|PP>*} # Chunk verbs and their arguments
55
+ """
56
+ # Tokenize and POS-tag
57
+ words = word_tokenize(text)
58
+ tagged_words = pos_tag(words)
59
+ chunk_parser = RegexpParser(grammar)
60
+ tree = chunk_parser.parse(tagged_words)
61
+
62
+ # Extract phrases
63
+ phrases = [" ".join(word for word, tag in subtree.leaves()) for subtree in tree.subtrees() if subtree.label() in ['NP', 'PP', 'VP']]
64
+ return phrases
65
+
66
  with gr.Blocks() as app:
67
  with gr.Row():
68
  text1 = gr.Textbox(label="Input Text 1")
 
70
  with gr.Row():
71
  button = gr.Button("Calculate Similarity")
72
  output = gr.Text(label="Similarity")
73
+ chunks_output = gr.Text(label="Extracted Chunks")
74
+
75
+ def combined_function(text1, text2):
76
+ similarity = calculate_similarity(text1, text2)
77
+ chunks1 = extract_chunks(text1)
78
+ chunks2 = extract_chunks(text2)
79
+ chunks_text = f"Chunks in Text 1: {chunks1}\nChunks in Text 2: {chunks2}"
80
+ return similarity, chunks_text
81
 
82
  button.click(
83
+ fn=combined_function,
84
  inputs=[text1, text2],
85
+ outputs=[output, chunks_output]
86
  )
87
 
88
+ report_button = gr.Button("Send result for better training")
89
  report_button.click(
90
  fn=report_issue,
91
  inputs=[text1, text2, output],