azaninello commited on
Commit
c3b1412
1 Parent(s): 9703df0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -16
app.py CHANGED
@@ -10,41 +10,94 @@ nltk.download('punkt')
10
 
11
  file = "text.txt"
12
 
13
- spacy_model = 'https://huggingface.co/spacy/it_core_news_sm'
14
-
15
  import spacy
16
- nlp_IT = spacy.load(spacy_model)
17
 
18
  def get_lists(file):
19
  with open(file, 'r', encoding='utf-8') as f:
20
  text = f.read()
21
 
22
- word_tokenized_text = word_tokenize(text, language='italian')
23
- word_tokenized_text_lower = [word.lower() for word in word_tokenized_text]
24
-
25
  sent_tokenized_text = sent_tokenize(text, language='italian')
26
  sent_tokenized_text_lower = [sent.lower() for sent in sent_tokenized_text]
27
 
28
- return word_tokenized_text, word_tokenized_text_lower, sent_tokenized_text, sent_tokenized_text_lower
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- #words, words_lower, sentences, sentences = get_lists(file)
 
 
 
 
 
 
 
 
 
 
 
31
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
 
 
 
 
 
33
 
34
  demo = gr.Interface(
35
- sentence_builder,
36
  [
37
  gr.Textbox(),
38
- gr.Radio(["park", "zoo", "road"]),
39
- gr.CheckboxGroup(["ran", "swam", "ate", "slept"]),
40
- gr.Checkbox(label="Is it the morning?"),
41
  ],
42
  "text",
43
  examples=[
44
- ["cats", "park", ["ran", "swam"], True],
45
- ["dog", "zoo", ["ate", "swam"], False],
46
- ["bird", "road", ["ran"], False],
47
- ["cat", "zoo", ["ate"], True],
48
  ],
49
  )
50
 
 
10
 
11
  file = "text.txt"
12
 
 
 
13
  import spacy
14
+ nlp_IT = spacy.load("it_core_news_sm")
15
 
16
  def get_lists(file):
17
  with open(file, 'r', encoding='utf-8') as f:
18
  text = f.read()
19
 
 
 
 
20
  sent_tokenized_text = sent_tokenize(text, language='italian')
21
  sent_tokenized_text_lower = [sent.lower() for sent in sent_tokenized_text]
22
 
23
+ return sent_tokenized_text, sent_tokenized_text_lower
24
+
25
+ sentences, sentences_lower = get_lists(file)
26
+
27
+ def search_engine_collocations(target = 'scarto' , colloc = 'azioni' , nlp = nlp_it, sentences_lower = sentences_lower, sentences = sentences):
28
+
29
+ verbs = []
30
+ adjectives = []
31
+ nouns = []
32
+ result = 0
33
+
34
+ for i,sent in enumerate(sentences_lower):
35
+ if target.lower() in sent:
36
+ result += 1
37
+ doc = nlp(sent)
38
+ for token in doc:
39
+ if 'VERB' in token.pos_:
40
+ verbs.append(token.lemma_)
41
+ elif 'ADJ' in token.pos_:
42
+ adjectives.append(token.lemma_)
43
+ elif 'NOUN' in token.pos_:
44
+ nouns.append(token.lemma_)
45
+
46
+
47
+ if result == 0:
48
+ return "Non ho trovato la parola '{target}'.\n"
49
+
50
+ else:
51
+ if colloc = 'azioni' and verbs != []:
52
+ verbs_fdist = FreqDist(verbs)
53
+
54
+ stringed_results = ''
55
+ for n,r in enumerate(verbs_fdist.most_common()):
56
+ stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
57
 
58
+ return f"Ho trovato {len(verbs)} azioni legate a '{target}'\n{stringed_results}"
59
+
60
+ elif verbs = []:
61
+ return f"Non ho trovato azioni legate a '{target}'"
62
+
63
+
64
+ if colloc = 'caratteristiche' and adjectives != []:
65
+ adj_fdist = FreqDist(adjectives)
66
+
67
+ stringed_results = ''
68
+ for n,r in enumerate(adj_fdist.most_common()):
69
+ stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
70
 
71
+ return f"Ho trovato {len(adjectives)} caratteristiche legate a '{target}'\n{stringed_results}"
72
+
73
+ elif adjectives = []:
74
+ return f"Non ho trovato caratteristiche legate a '{target}'"
75
+
76
+
77
+ if colloc = 'concetti' and nouns != []:
78
+ nouns_fdist = FreqDist(verbs)
79
+
80
+ stringed_results = ''
81
+ for n,r in enumerate(nouns_fdist.most_common()):
82
+ stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
83
 
84
+ return f"Ho trovato {len(nouns)} concetti legati a '{target}'\n{stringed_results}"
85
+
86
+ elif nouns = []:
87
+ return f"Non ho trovato concetti legate a '{target}'"
88
+
89
 
90
  demo = gr.Interface(
91
+ search_engine_collocations,
92
  [
93
  gr.Textbox(),
94
+ gr.Radio(["azioni", "caratteristiche", "concetti"]),
 
 
95
  ],
96
  "text",
97
  examples=[
98
+ ["scarto", "azioni"],
99
+ ["rifiuto", "caratteristiche"],
100
+ ["sostenibilità", "concetti"],
 
101
  ],
102
  )
103