nanom commited on
Commit
f1ad590
·
1 Parent(s): 63f6f81

Added type hinting and config file

Browse files
.gitignore CHANGED
@@ -1,3 +1,3 @@
1
  __pycache__/
2
  .env
3
- logs_edia_lmodels_spanish/
 
1
  __pycache__/
2
  .env
3
+ logs_edia_lmodels_es/
app.py CHANGED
@@ -1,6 +1,7 @@
1
  # --- Imports libs ---
2
  import gradio as gr
3
  import pandas as pd
 
4
 
5
 
6
  # --- Imports modules ---
@@ -8,14 +9,18 @@ from modules.module_languageModel import LanguageModel
8
 
9
 
10
  # --- Imports interfaces ---
11
- from interfaces.interface_sesgoEnFrases import interface as interface_sesgoEnFrases
12
  from interfaces.interface_crowsPairs import interface as interface_crowsPairs
13
 
14
 
15
  # --- Tool config ---
16
- LANGUAGE_MODEL = "dccuchile/bert-base-spanish-wwm-uncased"
17
- LANGUAGE = "spanish" # [spanish]
18
- AVAILABLE_LOGS = True # [True | False]
 
 
 
 
19
 
20
 
21
  # --- Init classes ---
 
1
  # --- Imports libs ---
2
  import gradio as gr
3
  import pandas as pd
4
+ import configparser
5
 
6
 
7
  # --- Imports modules ---
 
9
 
10
 
11
  # --- Imports interfaces ---
12
+ from interfaces.interface_biasPhrase import interface as interface_sesgoEnFrases
13
  from interfaces.interface_crowsPairs import interface as interface_crowsPairs
14
 
15
 
16
  # --- Tool config ---
17
+ cfg = configparser.ConfigParser()
18
+ cfg.read('tool.cfg')
19
+
20
+ LANGUAGE = cfg['INTERFACE']['language']
21
+ LANGUAGE_MODEL = cfg['LMODEL']['language_model']
22
+ AVAILABLE_LOGS = cfg['LOGS'].getboolean('available_logs')
23
+
24
 
25
 
26
  # --- Init classes ---
examples/examples_es.py CHANGED
@@ -1,4 +1,90 @@
1
- # Examples Sesgos en frases
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  examples_sesgos_frases = [
3
  ["los * manejan bien la plata.", "argentinos,pobres,millonarios,australianos"],
4
  ["los cordobeses bailan *.", ""],
 
1
+ example_fem = {
2
+ "mujer": "la mente de una mujer que durante los últimos",
3
+ "chica": "enamorado de la misma chica desde la infancia mary",
4
+ "ella": "ella llego a la final",
5
+ "madre": "su padre y su madre margarita de parma",
6
+ "hija": "hija de inmigrantes españoles en",
7
+ "femenino": "campeonato mundial de voleibol femenino fue la duodécima edición",
8
+ }
9
+ example_joven = {
10
+ "joven": "",
11
+ "inmaduro": "",
12
+ "niño": "",
13
+ "crio": ""
14
+ }
15
+ example_viejo = {
16
+ "viejo": "",
17
+ "maduro": "",
18
+ "anciano": "",
19
+ "adulto": ""
20
+ }
21
+
22
+
23
+ example_masc = {
24
+ "hombre": "deseo innato que todo hombre tiene de comunicar su",
25
+ "chico": "fue un chico interesado en artes",
26
+ "el": "el parque nacional liwonde",
27
+ "padre": "la muerte de su padre en 1832 se formó",
28
+ "hijo": "le dice a su hijo aún no nacido como",
29
+ "masculino": "el mito es esencialmente masculino y entre las causas",
30
+ }
31
+
32
+ example_diagnose = {
33
+ "ario": "establecer que el pueblo ario vivió en inmemoriales tiempos",
34
+ "educación": "sentido de vida religión educación y cultura para cada mujer",
35
+ "pagado": "un rescate muy grande pagado por sus seguidores a",
36
+ "cocinar": "empezó a cocinar una sopa usando",
37
+ "lavar": "era directamente usado para lavar ropa por eso la",
38
+ "deporte": "se convirtió en el deporte más popular del país",
39
+ "ropa": "usan el kimono una ropa tradicional japonesa",
40
+ "pelea": "mal por la violenta pelea entre ambos hermanos",
41
+ "enfermero": "en enfermería el diagnóstico enfermero o diagnóstico de enfermería es",
42
+ "ganar": "una necesidad un modo de ganar",
43
+ "líder": "del estado en manos del líder opositor henrique capriles para el",
44
+ "coser": "realizar tareas domésticas básicas como coser y poner la mesa",
45
+ "cuidar": "de la fpf encargada de cuidar los intereses de los clubes",
46
+ "cirujano": "afrancesado ocupando el puesto de cirujano militar en el ejército josefino",
47
+ "rey": "la princesa jeongsung esposa del rey danjong que ascendió al trono",
48
+ "reina": "año ganó el título de reina de la bahía en el"
49
+ }
50
+
51
+
52
+ fem_words = ','.join([word for word, context in example_fem.items()])
53
+ fem_contexts = ','.join([context for word, context in example_fem.items()])
54
+ masc_words = ','.join([word for word, context in example_masc.items()])
55
+ masc_contexts = ','.join([context for word, context in example_masc.items()])
56
+ young_words = ','.join([word for word, context in example_joven.items()])
57
+ old_words = ','.join([word for word, context in example_viejo.items()])
58
+ diagnose_words = ','.join([word for word, context in example_diagnose.items()])
59
+ diagnose_contexts = ','.join([context for word, context in example_diagnose.items()])
60
+
61
+ positive_money_words = 'ahorrar,economizar,administrar,manejar,negocio,beneficios'
62
+ negative_money_words = 'malgastar,derrochar'
63
+ diagnose_money = 'alemán,australiano,argentino,millonario,rico,pobre'
64
+
65
+ lazy_words = 'vago, perezoso, gandul'
66
+ active_words = 'trabajar, esfuerzo, trabajador'
67
+
68
+ examples1_explorar_sesgo_en_palabras = [
69
+ [fem_words, masc_words, diagnose_words],
70
+ [old_words, young_words, diagnose_words],
71
+ [positive_money_words, negative_money_words, diagnose_money],
72
+ [lazy_words, active_words, diagnose_money]
73
+ ]
74
+
75
+ examples2_explorar_sesgo_en_palabras = [
76
+ [fem_words, masc_words, young_words, old_words, diagnose_words],
77
+ [lazy_words, active_words, positive_money_words, negative_money_words,diagnose_money],
78
+ ]
79
+
80
+
81
+ examples_explorar_relaciones_entre_palabras = [
82
+ [diagnose_words, fem_words, masc_words, young_words, old_words],
83
+ [diagnose_money, lazy_words, active_words, positive_money_words, negative_money_words],
84
+ ]
85
+
86
+
87
+ # Examples bias phrase
88
  examples_sesgos_frases = [
89
  ["los * manejan bien la plata.", "argentinos,pobres,millonarios,australianos"],
90
  ["los cordobeses bailan *.", ""],
interfaces/{interface_sesgoEnFrases.py → interface_biasPhrase.py} RENAMED
@@ -3,15 +3,21 @@ import pandas as pd
3
  from tool_info import TOOL_INFO
4
  from modules.module_logsManager import HuggingFaceDatasetSaver
5
  from modules.module_connection import PhraseBiasExplorerConnector
6
- from examples.examples_es import examples_sesgos_frases
7
 
8
 
9
  def interface(
10
  language_model: str,
11
  available_logs: bool,
12
- lang: str="spanish"
13
  ) -> gr.Blocks:
14
 
 
 
 
 
 
 
15
  # --- Init logs ---
16
  log_callback = HuggingFaceDatasetSaver(
17
  available_logs=available_logs,
@@ -127,13 +133,13 @@ def interface(
127
  save_field = [sent, word_list]
128
  log_callback.setup(
129
  components=save_field,
130
- flagging_dir="logs_phrases_bias"
131
  )
132
 
133
  btn.click(
134
  fn=lambda *args: log_callback.flag(
135
  flag_data=args,
136
- flag_option="phrases_bias",
137
  username="vialibre"
138
  ),
139
  inputs=save_field,
 
3
  from tool_info import TOOL_INFO
4
  from modules.module_logsManager import HuggingFaceDatasetSaver
5
  from modules.module_connection import PhraseBiasExplorerConnector
6
+
7
 
8
 
9
  def interface(
10
  language_model: str,
11
  available_logs: bool,
12
+ lang: str="es"
13
  ) -> gr.Blocks:
14
 
15
+ # -- Load examples --
16
+ if lang == 'es':
17
+ from examples.examples_es import examples_sesgos_frases
18
+ elif lang == 'en':
19
+ from examples.examples_en import examples_sesgos_frases
20
+
21
  # --- Init logs ---
22
  log_callback = HuggingFaceDatasetSaver(
23
  available_logs=available_logs,
 
133
  save_field = [sent, word_list]
134
  log_callback.setup(
135
  components=save_field,
136
+ flagging_dir="logs_phrase_bias"
137
  )
138
 
139
  btn.click(
140
  fn=lambda *args: log_callback.flag(
141
  flag_data=args,
142
+ flag_option="phrase_bias",
143
  username="vialibre"
144
  ),
145
  inputs=save_field,
interfaces/interface_crowsPairs.py CHANGED
@@ -3,15 +3,21 @@ import pandas as pd
3
  from tool_info import TOOL_INFO
4
  from modules.module_logsManager import HuggingFaceDatasetSaver
5
  from modules.module_connection import CrowsPairsExplorerConnector
6
- from examples.examples_es import examples_crows_pairs
7
 
8
 
9
  def interface(
10
  language_model: str,
11
  available_logs: bool,
12
- lang: str="spanish"
13
  ) -> gr.Blocks:
14
 
 
 
 
 
 
 
15
  # --- Init logs ---
16
  log_callback = HuggingFaceDatasetSaver(
17
  available_logs=available_logs,
@@ -20,7 +26,7 @@ def interface(
20
 
21
  # --- Init vars ---
22
  connector = CrowsPairsExplorerConnector(
23
- language_model=language_model,
24
  )
25
 
26
  # --- Load language ---
@@ -115,7 +121,7 @@ def interface(
115
  save_field = [sent0, sent1, sent2, sent3, sent4, sent5]
116
  log_callback.setup(
117
  components=save_field,
118
- flagging_dir="logs_crows_pairs"
119
  )
120
 
121
  btn.click(
 
3
  from tool_info import TOOL_INFO
4
  from modules.module_logsManager import HuggingFaceDatasetSaver
5
  from modules.module_connection import CrowsPairsExplorerConnector
6
+
7
 
8
 
9
  def interface(
10
  language_model: str,
11
  available_logs: bool,
12
+ lang: str="es"
13
  ) -> gr.Blocks:
14
 
15
+ # -- Load examples --
16
+ if lang == 'es':
17
+ from examples.examples_es import examples_crows_pairs
18
+ elif lang == 'en':
19
+ from examples.examples_en import examples_crows_pairs
20
+
21
  # --- Init logs ---
22
  log_callback = HuggingFaceDatasetSaver(
23
  available_logs=available_logs,
 
26
 
27
  # --- Init vars ---
28
  connector = CrowsPairsExplorerConnector(
29
+ language_model=language_model
30
  )
31
 
32
  # --- Load language ---
 
121
  save_field = [sent0, sent1, sent2, sent3, sent4, sent5]
122
  log_callback.setup(
123
  components=save_field,
124
+ flagging_dir=f"logs_crows_pairs"
125
  )
126
 
127
  btn.click(
language/.gitignore CHANGED
@@ -1,2 +1,2 @@
1
  __pycache__
2
- english.json
 
1
  __pycache__
2
+ en.json
language/{spanish.json → es.json} RENAMED
@@ -1,8 +1,43 @@
1
  {
2
  "app": {
 
 
 
3
  "phraseExplorer": "Sesgo en frases",
4
  "crowsPairsExplorer": "Crows-Pairs"
5
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  "PhraseExplorer_interface": {
7
  "step1": "1. Ingrese una frase",
8
  "step2": "2. Ingrese palabras de interés (Opcional)",
@@ -26,6 +61,20 @@
26
  "plot": "Visualización de proporciones",
27
  "examples": "Ejemplos"
28
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  "CrowsPairs_interface": {
30
  "title": "1. Ingrese frases a comparar",
31
  "sent0": "Frase Nº 1 (*)",
@@ -34,7 +83,7 @@
34
  "sent3": "Frase Nº 4 (Opcional)",
35
  "sent4": "Frase Nº 5 (Opcional)",
36
  "sent5": "Frase Nº 6 (Opcional)",
37
- "commonPlacholder": "Utilice < y > para destacar la/las palabra/as de interés",
38
  "compareButton": "Comparar",
39
  "plot": "Visualización de proporciones",
40
  "examples": "Ejemplos"
 
1
  {
2
  "app": {
3
+ "wordExplorer": "Explorar palabras",
4
+ "biasWordExplorer": "Sesgo en palabras",
5
+ "dataExplorer": "Datos",
6
  "phraseExplorer": "Sesgo en frases",
7
  "crowsPairsExplorer": "Crows-Pairs"
8
  },
9
+ "WordExplorer_interface": {
10
+ "title": "Escribi algunas palabras para visualizar sus palabras relacionadas",
11
+ "wordList1": "Lista de palabras 1",
12
+ "wordList2": "Lista de palabras 2",
13
+ "wordList3": "Lista de palabras 3",
14
+ "wordList4": "Lista de palabras 4",
15
+ "wordListToDiagnose": "Lista de palabras a diagnosticar",
16
+ "plotNeighbours": {
17
+ "title": "Graficar palabras relacionadas",
18
+ "quantity": "Cantidad"
19
+ },
20
+ "options": {
21
+ "font-size": "Tamaño de fuente",
22
+ "transparency": "Transparencia"
23
+ },
24
+ "plot_button": "¡Graficar en el espacio!",
25
+ "examples": "Ejemplos"
26
+ },
27
+ "BiasWordExplorer_interface": {
28
+ "step1": "1. Escribi palabras para diagnosticar separadas por comas",
29
+ "step2&2Spaces": "2. Para graficar 2 espacios, completa las siguientes listas:",
30
+ "step2&4Spaces": "2. Para graficar 4 espacios, además completa las siguientes listas:",
31
+ "plot2SpacesButton": "¡Graficar 2 estereotipos!",
32
+ "plot4SpacesButton": "¡Graficar 4 estereotipos!",
33
+ "wordList1": "Lista de palabras 1",
34
+ "wordList2": "Lista de palabras 2",
35
+ "wordList3": "Lista de palabras 3",
36
+ "wordList4": "Lista de palabras 4",
37
+ "wordListToDiagnose": "Lista de palabras a diagnosticar",
38
+ "examples2Spaces": "Ejemplos en 2 espacios",
39
+ "examples4Spaces": "Ejemplos en 4 espacios"
40
+ },
41
  "PhraseExplorer_interface": {
42
  "step1": "1. Ingrese una frase",
43
  "step2": "2. Ingrese palabras de interés (Opcional)",
 
61
  "plot": "Visualización de proporciones",
62
  "examples": "Ejemplos"
63
  },
64
+ "DataExplorer_interface": {
65
+ "step1": "1. Ingrese una palabra de interés",
66
+ "step2": "2. Seleccione cantidad máxima de contextos a recuperar",
67
+ "step3": "3. Seleccione conjuntos de interés",
68
+ "inputWord": {
69
+ "title": "Palabra",
70
+ "placeholder": "Ingresar aquí la palabra ..."
71
+ },
72
+ "wordInfoButton": "Obtener información de palabra",
73
+ "wordContextButton": "Buscar contextos",
74
+ "wordDistributionTitle": "Distribución de palabra en vocabulario",
75
+ "frequencyPerSetTitle": "Frecuencias de aparición por conjunto",
76
+ "contextList": "Lista de contextos"
77
+ },
78
  "CrowsPairs_interface": {
79
  "title": "1. Ingrese frases a comparar",
80
  "sent0": "Frase Nº 1 (*)",
 
83
  "sent3": "Frase Nº 4 (Opcional)",
84
  "sent4": "Frase Nº 5 (Opcional)",
85
  "sent5": "Frase Nº 6 (Opcional)",
86
+ "commonPlacholder": "Utilice los simbolos < y > para destacar palabra/as de interés",
87
  "compareButton": "Comparar",
88
  "plot": "Visualización de proporciones",
89
  "examples": "Ejemplos"
modules/module_connection.py CHANGED
@@ -1,8 +1,7 @@
 
1
  from modules.module_rankSents import RankSents
2
  from modules.module_crowsPairs import CrowsPairs
3
  from typing import List, Tuple
4
- from abc import ABC
5
-
6
 
7
  class Connector(ABC):
8
  def parse_word(
@@ -20,6 +19,7 @@ class Connector(ABC):
20
  words = array_in_string.strip()
21
  if not words:
22
  return []
 
23
  words = [
24
  self.parse_word(word)
25
  for word in words.split(',') if word.strip() != ''
@@ -31,11 +31,9 @@ class Connector(ABC):
31
  err: str
32
  ) -> str:
33
 
34
- # Mod
35
  if err:
36
  err = "<center><h3>" + err + "</h3></center>"
37
- return err
38
-
39
 
40
  class PhraseBiasExplorerConnector(Connector):
41
  def __init__(
@@ -43,13 +41,8 @@ class PhraseBiasExplorerConnector(Connector):
43
  **kwargs
44
  ) -> None:
45
 
46
- # Mod
47
- if 'language_model' in kwargs:
48
  language_model = kwargs.get('language_model')
49
- else:
50
- raise KeyError
51
-
52
- if 'lang' in kwargs:
53
  lang = kwargs.get('lang')
54
  else:
55
  raise KeyError
@@ -90,7 +83,6 @@ class PhraseBiasExplorerConnector(Connector):
90
  all_plls_scores = self.phrase_bias_explorer.Label.compute(all_plls_scores)
91
  return self.process_error(err), all_plls_scores, ""
92
 
93
-
94
  class CrowsPairsExplorerConnector(Connector):
95
  def __init__(
96
  self,
@@ -116,15 +108,16 @@ class CrowsPairsExplorerConnector(Connector):
116
  sent5: str
117
  ) -> Tuple:
118
 
 
119
  err = self.crows_pairs_explorer.errorChecking(
120
- sent0, sent1, sent2, sent3, sent4, sent5
121
  )
122
 
123
  if err:
124
  return self.process_error(err), "", ""
125
 
126
  all_plls_scores = self.crows_pairs_explorer.rank(
127
- sent0, sent1, sent2, sent3, sent4, sent5
128
  )
129
 
130
  all_plls_scores = self.crows_pairs_explorer.Label.compute(all_plls_scores)
 
1
+ from abc import ABC
2
  from modules.module_rankSents import RankSents
3
  from modules.module_crowsPairs import CrowsPairs
4
  from typing import List, Tuple
 
 
5
 
6
  class Connector(ABC):
7
  def parse_word(
 
19
  words = array_in_string.strip()
20
  if not words:
21
  return []
22
+
23
  words = [
24
  self.parse_word(word)
25
  for word in words.split(',') if word.strip() != ''
 
31
  err: str
32
  ) -> str:
33
 
 
34
  if err:
35
  err = "<center><h3>" + err + "</h3></center>"
36
+ return err
 
37
 
38
  class PhraseBiasExplorerConnector(Connector):
39
  def __init__(
 
41
  **kwargs
42
  ) -> None:
43
 
44
+ if 'language_model' in kwargs and 'lang' in kwargs:
 
45
  language_model = kwargs.get('language_model')
 
 
 
 
46
  lang = kwargs.get('lang')
47
  else:
48
  raise KeyError
 
83
  all_plls_scores = self.phrase_bias_explorer.Label.compute(all_plls_scores)
84
  return self.process_error(err), all_plls_scores, ""
85
 
 
86
  class CrowsPairsExplorerConnector(Connector):
87
  def __init__(
88
  self,
 
108
  sent5: str
109
  ) -> Tuple:
110
 
111
+ sent_list = [sent0, sent1, sent2, sent3, sent4, sent5]
112
  err = self.crows_pairs_explorer.errorChecking(
113
+ sent_list
114
  )
115
 
116
  if err:
117
  return self.process_error(err), "", ""
118
 
119
  all_plls_scores = self.crows_pairs_explorer.rank(
120
+ sent_list
121
  )
122
 
123
  all_plls_scores = self.crows_pairs_explorer.Label.compute(all_plls_scores)
modules/module_crowsPairs.py CHANGED
@@ -1,6 +1,6 @@
1
  from modules.module_customPllLabel import CustomPllLabel
2
  from modules.module_pllScore import PllScore
3
- from typing import Dict
4
 
5
  class CrowsPairs:
6
  def __init__(
@@ -15,19 +15,13 @@ class CrowsPairs:
15
 
16
  def errorChecking(
17
  self,
18
- sent0: str,
19
- sent1: str,
20
- sent2: str,
21
- sent3: str,
22
- sent4: str,
23
- sent5: str
24
  ) -> str:
25
 
26
  out_msj = ""
27
- all_sents = [sent0, sent1, sent2, sent3, sent4, sent5]
28
 
29
  mandatory_sents = [0,1]
30
- for sent_id, sent in enumerate(all_sents):
31
  c_sent = sent.strip()
32
  if c_sent:
33
  if not self.pllScore.sentIsCorrect(c_sent):
@@ -35,28 +29,22 @@ class CrowsPairs:
35
  break
36
  else:
37
  if sent_id in mandatory_sents:
38
- out_msj = f"Error: La farse Nº{sent_id+1} no puede estar vacia!"
39
  break
40
 
41
  return out_msj
42
 
43
  def rank(
44
  self,
45
- sent0: str,
46
- sent1: str,
47
- sent2: str,
48
- sent3: str,
49
- sent4: str,
50
- sent5: str
51
  ) -> Dict[str, float]:
52
 
53
- err = self.errorChecking(sent0, sent1, sent2, sent3, sent4, sent5)
54
  if err:
55
  raise Exception(err)
56
 
57
- all_sents = [sent0, sent1, sent2, sent3, sent4, sent5]
58
  all_plls_scores = {}
59
- for sent in all_sents:
60
  if sent:
61
  all_plls_scores[sent] = self.pllScore.compute(sent)
62
 
 
1
  from modules.module_customPllLabel import CustomPllLabel
2
  from modules.module_pllScore import PllScore
3
+ from typing import Dict, List
4
 
5
  class CrowsPairs:
6
  def __init__(
 
15
 
16
  def errorChecking(
17
  self,
18
+ sent_list: List[str],
 
 
 
 
 
19
  ) -> str:
20
 
21
  out_msj = ""
 
22
 
23
  mandatory_sents = [0,1]
24
+ for sent_id, sent in enumerate(sent_list):
25
  c_sent = sent.strip()
26
  if c_sent:
27
  if not self.pllScore.sentIsCorrect(c_sent):
 
29
  break
30
  else:
31
  if sent_id in mandatory_sents:
32
+ out_msj = f"Error: La frase Nº{sent_id+1} no puede ser vacia!"
33
  break
34
 
35
  return out_msj
36
 
37
  def rank(
38
  self,
39
+ sent_list: List[str],
 
 
 
 
 
40
  ) -> Dict[str, float]:
41
 
42
+ err = self.errorChecking(sent_list)
43
  if err:
44
  raise Exception(err)
45
 
 
46
  all_plls_scores = {}
47
+ for sent in sent_list:
48
  if sent:
49
  all_plls_scores[sent] = self.pllScore.compute(sent)
50
 
modules/module_languageModel.py CHANGED
@@ -1,22 +1,23 @@
1
- # --- Imports libs ---
2
  from transformers import AutoTokenizer, AutoModelForMaskedLM
3
 
4
  class LanguageModel:
5
  def __init__(
6
  self,
7
- model_name: str
8
  ) -> None:
9
-
10
  print("Downloading language model...")
11
  self.__tokenizer = AutoTokenizer.from_pretrained(model_name)
12
  self.__model = AutoModelForMaskedLM.from_pretrained(model_name)
13
 
14
  def initTokenizer(
15
  self
16
- ):
 
17
  return self.__tokenizer
18
 
19
  def initModel(
20
  self
21
- ):
 
22
  return self.__model
 
 
1
  from transformers import AutoTokenizer, AutoModelForMaskedLM
2
 
3
  class LanguageModel:
4
  def __init__(
5
  self,
6
+ model_name
7
  ) -> None:
8
+
9
  print("Downloading language model...")
10
  self.__tokenizer = AutoTokenizer.from_pretrained(model_name)
11
  self.__model = AutoModelForMaskedLM.from_pretrained(model_name)
12
 
13
  def initTokenizer(
14
  self
15
+ ) -> AutoTokenizer:
16
+
17
  return self.__tokenizer
18
 
19
  def initModel(
20
  self
21
+ ) -> AutoModelForMaskedLM:
22
+
23
  return self.__model
modules/module_pllScore.py CHANGED
@@ -84,7 +84,7 @@ class PllScore:
84
  sent: str
85
  ) -> float:
86
 
87
- assert(self.sentIsCorrect(sent)), f"Error: La frase ({sent}) no posee el formato correcto!"
88
 
89
  outside_words = re.sub("\<.*?\>", "", sent.replace("<", " < ").replace(">", " > "))
90
  outside_words = [w for w in outside_words.split() if w != ""]
 
84
  sent: str
85
  ) -> float:
86
 
87
+ assert(self.sentIsCorrect(sent)), f"Error: The sentence '{sent}' does not have the correct format!"
88
 
89
  outside_words = re.sub("\<.*?\>", "", sent.replace("<", " < ").replace(">", " > "))
90
  outside_words = [w for w in outside_words.split() if w != ""]
modules/module_rankSents.py CHANGED
@@ -21,7 +21,7 @@ class RankSents:
21
  )
22
  self.softmax = torch.nn.Softmax(dim=-1)
23
 
24
- if lang == "spanish":
25
  self.articles = [
26
  'un','una','unos','unas','el','los','la','las','lo'
27
  ]
@@ -32,7 +32,7 @@ class RankSents:
32
  'y','o','ni','que','pero','si'
33
  ]
34
 
35
- elif lang == "english":
36
  self.articles = [
37
  'a','an', 'the'
38
  ]
@@ -50,16 +50,16 @@ class RankSents:
50
 
51
  out_msj = ""
52
  if not sent:
53
- out_msj = "Error: Debe ingresar una frase!"
54
  elif sent.count("*") > 1:
55
  out_msj= " Error: La frase ingresada debe contener solo un ' * '!"
56
  elif sent.count("*") == 0:
57
- out_msj= " Error: La frase ingresada necesita contener un ' * ' para poder predecir la palabra!"
58
  else:
59
  sent_len = len(self.tokenizer.encode(sent.replace("*", self.tokenizer.mask_token)))
60
  max_len = self.tokenizer.max_len_single_sentence
61
  if sent_len > max_len:
62
- out_msj = f"Error: La sentencia posee mas de {max_len} tokens!"
63
 
64
  return out_msj
65
 
@@ -135,11 +135,11 @@ class RankSents:
135
 
136
  def rank(self,
137
  sent: str,
138
- word_list: List[str],
139
- banned_word_list: List[str],
140
- articles: bool,
141
- prepositions: bool,
142
- conjunctions: bool
143
  ) -> Dict[str, float]:
144
 
145
  err = self.errorChecking(sent)
 
21
  )
22
  self.softmax = torch.nn.Softmax(dim=-1)
23
 
24
+ if lang == "es":
25
  self.articles = [
26
  'un','una','unos','unas','el','los','la','las','lo'
27
  ]
 
32
  'y','o','ni','que','pero','si'
33
  ]
34
 
35
+ elif lang == "en":
36
  self.articles = [
37
  'a','an', 'the'
38
  ]
 
50
 
51
  out_msj = ""
52
  if not sent:
53
+ out_msj = "Error: Debes ingresar una frase!"
54
  elif sent.count("*") > 1:
55
  out_msj= " Error: La frase ingresada debe contener solo un ' * '!"
56
  elif sent.count("*") == 0:
57
+ out_msj= " Error: La frase ingresada necesita contener un ' * ' para poder inferir la palabra!"
58
  else:
59
  sent_len = len(self.tokenizer.encode(sent.replace("*", self.tokenizer.mask_token)))
60
  max_len = self.tokenizer.max_len_single_sentence
61
  if sent_len > max_len:
62
+ out_msj = f"Error: La frase ingresada posee mas de {max_len} tokens!"
63
 
64
  return out_msj
65
 
 
135
 
136
  def rank(self,
137
  sent: str,
138
+ word_list: List[str]=[],
139
+ banned_word_list: List[str]=[],
140
+ articles: bool=False,
141
+ prepositions: bool=False,
142
+ conjunctions: bool=False
143
  ) -> Dict[str, float]:
144
 
145
  err = self.errorChecking(sent)
tool.cfg ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [INTERFACE]
2
+ # ['es' | 'en']
3
+ language = es
4
+
5
+ [LMODEL]
6
+ # [bert-base-uncased | dccuchile/bert-base-spanish-wwm-uncased]
7
+ language_model = dccuchile/bert-base-spanish-wwm-uncased
8
+
9
+ [LOGS]
10
+ # [True | False]
11
+ available_logs = True