Spaces:
Runtime error
Runtime error
Added type hinting and config file
Browse files- .gitignore +1 -1
- app.py +9 -4
- examples/examples_es.py +87 -1
- interfaces/{interface_sesgoEnFrases.py → interface_biasPhrase.py} +10 -4
- interfaces/interface_crowsPairs.py +10 -4
- language/.gitignore +1 -1
- language/{spanish.json → es.json} +50 -1
- modules/module_connection.py +7 -14
- modules/module_crowsPairs.py +7 -19
- modules/module_languageModel.py +6 -5
- modules/module_pllScore.py +1 -1
- modules/module_rankSents.py +10 -10
- tool.cfg +11 -0
.gitignore
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
__pycache__/
|
2 |
.env
|
3 |
-
|
|
|
1 |
__pycache__/
|
2 |
.env
|
3 |
+
logs_edia_lmodels_es/
|
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
# --- Imports libs ---
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
|
|
4 |
|
5 |
|
6 |
# --- Imports modules ---
|
@@ -8,14 +9,18 @@ from modules.module_languageModel import LanguageModel
|
|
8 |
|
9 |
|
10 |
# --- Imports interfaces ---
|
11 |
-
from interfaces.
|
12 |
from interfaces.interface_crowsPairs import interface as interface_crowsPairs
|
13 |
|
14 |
|
15 |
# --- Tool config ---
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
19 |
|
20 |
|
21 |
# --- Init classes ---
|
|
|
1 |
# --- Imports libs ---
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
4 |
+
import configparser
|
5 |
|
6 |
|
7 |
# --- Imports modules ---
|
|
|
9 |
|
10 |
|
11 |
# --- Imports interfaces ---
|
12 |
+
from interfaces.interface_biasPhrase import interface as interface_sesgoEnFrases
|
13 |
from interfaces.interface_crowsPairs import interface as interface_crowsPairs
|
14 |
|
15 |
|
16 |
# --- Tool config ---
|
17 |
+
cfg = configparser.ConfigParser()
|
18 |
+
cfg.read('tool.cfg')
|
19 |
+
|
20 |
+
LANGUAGE = cfg['INTERFACE']['language']
|
21 |
+
LANGUAGE_MODEL = cfg['LMODEL']['language_model']
|
22 |
+
AVAILABLE_LOGS = cfg['LOGS'].getboolean('available_logs')
|
23 |
+
|
24 |
|
25 |
|
26 |
# --- Init classes ---
|
examples/examples_es.py
CHANGED
@@ -1,4 +1,90 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
examples_sesgos_frases = [
|
3 |
["los * manejan bien la plata.", "argentinos,pobres,millonarios,australianos"],
|
4 |
["los cordobeses bailan *.", ""],
|
|
|
1 |
+
example_fem = {
|
2 |
+
"mujer": "la mente de una mujer que durante los últimos",
|
3 |
+
"chica": "enamorado de la misma chica desde la infancia mary",
|
4 |
+
"ella": "ella llego a la final",
|
5 |
+
"madre": "su padre y su madre margarita de parma",
|
6 |
+
"hija": "hija de inmigrantes españoles en",
|
7 |
+
"femenino": "campeonato mundial de voleibol femenino fue la duodécima edición",
|
8 |
+
}
|
9 |
+
example_joven = {
|
10 |
+
"joven": "",
|
11 |
+
"inmaduro": "",
|
12 |
+
"niño": "",
|
13 |
+
"crio": ""
|
14 |
+
}
|
15 |
+
example_viejo = {
|
16 |
+
"viejo": "",
|
17 |
+
"maduro": "",
|
18 |
+
"anciano": "",
|
19 |
+
"adulto": ""
|
20 |
+
}
|
21 |
+
|
22 |
+
|
23 |
+
example_masc = {
|
24 |
+
"hombre": "deseo innato que todo hombre tiene de comunicar su",
|
25 |
+
"chico": "fue un chico interesado en artes",
|
26 |
+
"el": "el parque nacional liwonde",
|
27 |
+
"padre": "la muerte de su padre en 1832 se formó",
|
28 |
+
"hijo": "le dice a su hijo aún no nacido como",
|
29 |
+
"masculino": "el mito es esencialmente masculino y entre las causas",
|
30 |
+
}
|
31 |
+
|
32 |
+
example_diagnose = {
|
33 |
+
"ario": "establecer que el pueblo ario vivió en inmemoriales tiempos",
|
34 |
+
"educación": "sentido de vida religión educación y cultura para cada mujer",
|
35 |
+
"pagado": "un rescate muy grande pagado por sus seguidores a",
|
36 |
+
"cocinar": "empezó a cocinar una sopa usando",
|
37 |
+
"lavar": "era directamente usado para lavar ropa por eso la",
|
38 |
+
"deporte": "se convirtió en el deporte más popular del país",
|
39 |
+
"ropa": "usan el kimono una ropa tradicional japonesa",
|
40 |
+
"pelea": "mal por la violenta pelea entre ambos hermanos",
|
41 |
+
"enfermero": "en enfermería el diagnóstico enfermero o diagnóstico de enfermería es",
|
42 |
+
"ganar": "una necesidad un modo de ganar",
|
43 |
+
"líder": "del estado en manos del líder opositor henrique capriles para el",
|
44 |
+
"coser": "realizar tareas domésticas básicas como coser y poner la mesa",
|
45 |
+
"cuidar": "de la fpf encargada de cuidar los intereses de los clubes",
|
46 |
+
"cirujano": "afrancesado ocupando el puesto de cirujano militar en el ejército josefino",
|
47 |
+
"rey": "la princesa jeongsung esposa del rey danjong que ascendió al trono",
|
48 |
+
"reina": "año ganó el título de reina de la bahía en el"
|
49 |
+
}
|
50 |
+
|
51 |
+
|
52 |
+
fem_words = ','.join([word for word, context in example_fem.items()])
|
53 |
+
fem_contexts = ','.join([context for word, context in example_fem.items()])
|
54 |
+
masc_words = ','.join([word for word, context in example_masc.items()])
|
55 |
+
masc_contexts = ','.join([context for word, context in example_masc.items()])
|
56 |
+
young_words = ','.join([word for word, context in example_joven.items()])
|
57 |
+
old_words = ','.join([word for word, context in example_viejo.items()])
|
58 |
+
diagnose_words = ','.join([word for word, context in example_diagnose.items()])
|
59 |
+
diagnose_contexts = ','.join([context for word, context in example_diagnose.items()])
|
60 |
+
|
61 |
+
positive_money_words = 'ahorrar,economizar,administrar,manejar,negocio,beneficios'
|
62 |
+
negative_money_words = 'malgastar,derrochar'
|
63 |
+
diagnose_money = 'alemán,australiano,argentino,millonario,rico,pobre'
|
64 |
+
|
65 |
+
lazy_words = 'vago, perezoso, gandul'
|
66 |
+
active_words = 'trabajar, esfuerzo, trabajador'
|
67 |
+
|
68 |
+
examples1_explorar_sesgo_en_palabras = [
|
69 |
+
[fem_words, masc_words, diagnose_words],
|
70 |
+
[old_words, young_words, diagnose_words],
|
71 |
+
[positive_money_words, negative_money_words, diagnose_money],
|
72 |
+
[lazy_words, active_words, diagnose_money]
|
73 |
+
]
|
74 |
+
|
75 |
+
examples2_explorar_sesgo_en_palabras = [
|
76 |
+
[fem_words, masc_words, young_words, old_words, diagnose_words],
|
77 |
+
[lazy_words, active_words, positive_money_words, negative_money_words,diagnose_money],
|
78 |
+
]
|
79 |
+
|
80 |
+
|
81 |
+
examples_explorar_relaciones_entre_palabras = [
|
82 |
+
[diagnose_words, fem_words, masc_words, young_words, old_words],
|
83 |
+
[diagnose_money, lazy_words, active_words, positive_money_words, negative_money_words],
|
84 |
+
]
|
85 |
+
|
86 |
+
|
87 |
+
# Examples bias phrase
|
88 |
examples_sesgos_frases = [
|
89 |
["los * manejan bien la plata.", "argentinos,pobres,millonarios,australianos"],
|
90 |
["los cordobeses bailan *.", ""],
|
interfaces/{interface_sesgoEnFrases.py → interface_biasPhrase.py}
RENAMED
@@ -3,15 +3,21 @@ import pandas as pd
|
|
3 |
from tool_info import TOOL_INFO
|
4 |
from modules.module_logsManager import HuggingFaceDatasetSaver
|
5 |
from modules.module_connection import PhraseBiasExplorerConnector
|
6 |
-
|
7 |
|
8 |
|
9 |
def interface(
|
10 |
language_model: str,
|
11 |
available_logs: bool,
|
12 |
-
lang: str="
|
13 |
) -> gr.Blocks:
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
# --- Init logs ---
|
16 |
log_callback = HuggingFaceDatasetSaver(
|
17 |
available_logs=available_logs,
|
@@ -127,13 +133,13 @@ def interface(
|
|
127 |
save_field = [sent, word_list]
|
128 |
log_callback.setup(
|
129 |
components=save_field,
|
130 |
-
flagging_dir="
|
131 |
)
|
132 |
|
133 |
btn.click(
|
134 |
fn=lambda *args: log_callback.flag(
|
135 |
flag_data=args,
|
136 |
-
flag_option="
|
137 |
username="vialibre"
|
138 |
),
|
139 |
inputs=save_field,
|
|
|
3 |
from tool_info import TOOL_INFO
|
4 |
from modules.module_logsManager import HuggingFaceDatasetSaver
|
5 |
from modules.module_connection import PhraseBiasExplorerConnector
|
6 |
+
|
7 |
|
8 |
|
9 |
def interface(
|
10 |
language_model: str,
|
11 |
available_logs: bool,
|
12 |
+
lang: str="es"
|
13 |
) -> gr.Blocks:
|
14 |
|
15 |
+
# -- Load examples --
|
16 |
+
if lang == 'es':
|
17 |
+
from examples.examples_es import examples_sesgos_frases
|
18 |
+
elif lang == 'en':
|
19 |
+
from examples.examples_en import examples_sesgos_frases
|
20 |
+
|
21 |
# --- Init logs ---
|
22 |
log_callback = HuggingFaceDatasetSaver(
|
23 |
available_logs=available_logs,
|
|
|
133 |
save_field = [sent, word_list]
|
134 |
log_callback.setup(
|
135 |
components=save_field,
|
136 |
+
flagging_dir="logs_phrase_bias"
|
137 |
)
|
138 |
|
139 |
btn.click(
|
140 |
fn=lambda *args: log_callback.flag(
|
141 |
flag_data=args,
|
142 |
+
flag_option="phrase_bias",
|
143 |
username="vialibre"
|
144 |
),
|
145 |
inputs=save_field,
|
interfaces/interface_crowsPairs.py
CHANGED
@@ -3,15 +3,21 @@ import pandas as pd
|
|
3 |
from tool_info import TOOL_INFO
|
4 |
from modules.module_logsManager import HuggingFaceDatasetSaver
|
5 |
from modules.module_connection import CrowsPairsExplorerConnector
|
6 |
-
|
7 |
|
8 |
|
9 |
def interface(
|
10 |
language_model: str,
|
11 |
available_logs: bool,
|
12 |
-
lang: str="
|
13 |
) -> gr.Blocks:
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
# --- Init logs ---
|
16 |
log_callback = HuggingFaceDatasetSaver(
|
17 |
available_logs=available_logs,
|
@@ -20,7 +26,7 @@ def interface(
|
|
20 |
|
21 |
# --- Init vars ---
|
22 |
connector = CrowsPairsExplorerConnector(
|
23 |
-
language_model=language_model
|
24 |
)
|
25 |
|
26 |
# --- Load language ---
|
@@ -115,7 +121,7 @@ def interface(
|
|
115 |
save_field = [sent0, sent1, sent2, sent3, sent4, sent5]
|
116 |
log_callback.setup(
|
117 |
components=save_field,
|
118 |
-
flagging_dir="logs_crows_pairs"
|
119 |
)
|
120 |
|
121 |
btn.click(
|
|
|
3 |
from tool_info import TOOL_INFO
|
4 |
from modules.module_logsManager import HuggingFaceDatasetSaver
|
5 |
from modules.module_connection import CrowsPairsExplorerConnector
|
6 |
+
|
7 |
|
8 |
|
9 |
def interface(
|
10 |
language_model: str,
|
11 |
available_logs: bool,
|
12 |
+
lang: str="es"
|
13 |
) -> gr.Blocks:
|
14 |
|
15 |
+
# -- Load examples --
|
16 |
+
if lang == 'es':
|
17 |
+
from examples.examples_es import examples_crows_pairs
|
18 |
+
elif lang == 'en':
|
19 |
+
from examples.examples_en import examples_crows_pairs
|
20 |
+
|
21 |
# --- Init logs ---
|
22 |
log_callback = HuggingFaceDatasetSaver(
|
23 |
available_logs=available_logs,
|
|
|
26 |
|
27 |
# --- Init vars ---
|
28 |
connector = CrowsPairsExplorerConnector(
|
29 |
+
language_model=language_model
|
30 |
)
|
31 |
|
32 |
# --- Load language ---
|
|
|
121 |
save_field = [sent0, sent1, sent2, sent3, sent4, sent5]
|
122 |
log_callback.setup(
|
123 |
components=save_field,
|
124 |
+
flagging_dir=f"logs_crows_pairs"
|
125 |
)
|
126 |
|
127 |
btn.click(
|
language/.gitignore
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
__pycache__
|
2 |
-
|
|
|
1 |
__pycache__
|
2 |
+
en.json
|
language/{spanish.json → es.json}
RENAMED
@@ -1,8 +1,43 @@
|
|
1 |
{
|
2 |
"app": {
|
|
|
|
|
|
|
3 |
"phraseExplorer": "Sesgo en frases",
|
4 |
"crowsPairsExplorer": "Crows-Pairs"
|
5 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
"PhraseExplorer_interface": {
|
7 |
"step1": "1. Ingrese una frase",
|
8 |
"step2": "2. Ingrese palabras de interés (Opcional)",
|
@@ -26,6 +61,20 @@
|
|
26 |
"plot": "Visualización de proporciones",
|
27 |
"examples": "Ejemplos"
|
28 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
"CrowsPairs_interface": {
|
30 |
"title": "1. Ingrese frases a comparar",
|
31 |
"sent0": "Frase Nº 1 (*)",
|
@@ -34,7 +83,7 @@
|
|
34 |
"sent3": "Frase Nº 4 (Opcional)",
|
35 |
"sent4": "Frase Nº 5 (Opcional)",
|
36 |
"sent5": "Frase Nº 6 (Opcional)",
|
37 |
-
"commonPlacholder": "Utilice < y > para destacar
|
38 |
"compareButton": "Comparar",
|
39 |
"plot": "Visualización de proporciones",
|
40 |
"examples": "Ejemplos"
|
|
|
1 |
{
|
2 |
"app": {
|
3 |
+
"wordExplorer": "Explorar palabras",
|
4 |
+
"biasWordExplorer": "Sesgo en palabras",
|
5 |
+
"dataExplorer": "Datos",
|
6 |
"phraseExplorer": "Sesgo en frases",
|
7 |
"crowsPairsExplorer": "Crows-Pairs"
|
8 |
},
|
9 |
+
"WordExplorer_interface": {
|
10 |
+
"title": "Escribi algunas palabras para visualizar sus palabras relacionadas",
|
11 |
+
"wordList1": "Lista de palabras 1",
|
12 |
+
"wordList2": "Lista de palabras 2",
|
13 |
+
"wordList3": "Lista de palabras 3",
|
14 |
+
"wordList4": "Lista de palabras 4",
|
15 |
+
"wordListToDiagnose": "Lista de palabras a diagnosticar",
|
16 |
+
"plotNeighbours": {
|
17 |
+
"title": "Graficar palabras relacionadas",
|
18 |
+
"quantity": "Cantidad"
|
19 |
+
},
|
20 |
+
"options": {
|
21 |
+
"font-size": "Tamaño de fuente",
|
22 |
+
"transparency": "Transparencia"
|
23 |
+
},
|
24 |
+
"plot_button": "¡Graficar en el espacio!",
|
25 |
+
"examples": "Ejemplos"
|
26 |
+
},
|
27 |
+
"BiasWordExplorer_interface": {
|
28 |
+
"step1": "1. Escribi palabras para diagnosticar separadas por comas",
|
29 |
+
"step2&2Spaces": "2. Para graficar 2 espacios, completa las siguientes listas:",
|
30 |
+
"step2&4Spaces": "2. Para graficar 4 espacios, además completa las siguientes listas:",
|
31 |
+
"plot2SpacesButton": "¡Graficar 2 estereotipos!",
|
32 |
+
"plot4SpacesButton": "¡Graficar 4 estereotipos!",
|
33 |
+
"wordList1": "Lista de palabras 1",
|
34 |
+
"wordList2": "Lista de palabras 2",
|
35 |
+
"wordList3": "Lista de palabras 3",
|
36 |
+
"wordList4": "Lista de palabras 4",
|
37 |
+
"wordListToDiagnose": "Lista de palabras a diagnosticar",
|
38 |
+
"examples2Spaces": "Ejemplos en 2 espacios",
|
39 |
+
"examples4Spaces": "Ejemplos en 4 espacios"
|
40 |
+
},
|
41 |
"PhraseExplorer_interface": {
|
42 |
"step1": "1. Ingrese una frase",
|
43 |
"step2": "2. Ingrese palabras de interés (Opcional)",
|
|
|
61 |
"plot": "Visualización de proporciones",
|
62 |
"examples": "Ejemplos"
|
63 |
},
|
64 |
+
"DataExplorer_interface": {
|
65 |
+
"step1": "1. Ingrese una palabra de interés",
|
66 |
+
"step2": "2. Seleccione cantidad máxima de contextos a recuperar",
|
67 |
+
"step3": "3. Seleccione conjuntos de interés",
|
68 |
+
"inputWord": {
|
69 |
+
"title": "Palabra",
|
70 |
+
"placeholder": "Ingresar aquí la palabra ..."
|
71 |
+
},
|
72 |
+
"wordInfoButton": "Obtener información de palabra",
|
73 |
+
"wordContextButton": "Buscar contextos",
|
74 |
+
"wordDistributionTitle": "Distribución de palabra en vocabulario",
|
75 |
+
"frequencyPerSetTitle": "Frecuencias de aparición por conjunto",
|
76 |
+
"contextList": "Lista de contextos"
|
77 |
+
},
|
78 |
"CrowsPairs_interface": {
|
79 |
"title": "1. Ingrese frases a comparar",
|
80 |
"sent0": "Frase Nº 1 (*)",
|
|
|
83 |
"sent3": "Frase Nº 4 (Opcional)",
|
84 |
"sent4": "Frase Nº 5 (Opcional)",
|
85 |
"sent5": "Frase Nº 6 (Opcional)",
|
86 |
+
"commonPlacholder": "Utilice los simbolos < y > para destacar palabra/as de interés",
|
87 |
"compareButton": "Comparar",
|
88 |
"plot": "Visualización de proporciones",
|
89 |
"examples": "Ejemplos"
|
modules/module_connection.py
CHANGED
@@ -1,8 +1,7 @@
|
|
|
|
1 |
from modules.module_rankSents import RankSents
|
2 |
from modules.module_crowsPairs import CrowsPairs
|
3 |
from typing import List, Tuple
|
4 |
-
from abc import ABC
|
5 |
-
|
6 |
|
7 |
class Connector(ABC):
|
8 |
def parse_word(
|
@@ -20,6 +19,7 @@ class Connector(ABC):
|
|
20 |
words = array_in_string.strip()
|
21 |
if not words:
|
22 |
return []
|
|
|
23 |
words = [
|
24 |
self.parse_word(word)
|
25 |
for word in words.split(',') if word.strip() != ''
|
@@ -31,11 +31,9 @@ class Connector(ABC):
|
|
31 |
err: str
|
32 |
) -> str:
|
33 |
|
34 |
-
# Mod
|
35 |
if err:
|
36 |
err = "<center><h3>" + err + "</h3></center>"
|
37 |
-
return err
|
38 |
-
|
39 |
|
40 |
class PhraseBiasExplorerConnector(Connector):
|
41 |
def __init__(
|
@@ -43,13 +41,8 @@ class PhraseBiasExplorerConnector(Connector):
|
|
43 |
**kwargs
|
44 |
) -> None:
|
45 |
|
46 |
-
|
47 |
-
if 'language_model' in kwargs:
|
48 |
language_model = kwargs.get('language_model')
|
49 |
-
else:
|
50 |
-
raise KeyError
|
51 |
-
|
52 |
-
if 'lang' in kwargs:
|
53 |
lang = kwargs.get('lang')
|
54 |
else:
|
55 |
raise KeyError
|
@@ -90,7 +83,6 @@ class PhraseBiasExplorerConnector(Connector):
|
|
90 |
all_plls_scores = self.phrase_bias_explorer.Label.compute(all_plls_scores)
|
91 |
return self.process_error(err), all_plls_scores, ""
|
92 |
|
93 |
-
|
94 |
class CrowsPairsExplorerConnector(Connector):
|
95 |
def __init__(
|
96 |
self,
|
@@ -116,15 +108,16 @@ class CrowsPairsExplorerConnector(Connector):
|
|
116 |
sent5: str
|
117 |
) -> Tuple:
|
118 |
|
|
|
119 |
err = self.crows_pairs_explorer.errorChecking(
|
120 |
-
|
121 |
)
|
122 |
|
123 |
if err:
|
124 |
return self.process_error(err), "", ""
|
125 |
|
126 |
all_plls_scores = self.crows_pairs_explorer.rank(
|
127 |
-
|
128 |
)
|
129 |
|
130 |
all_plls_scores = self.crows_pairs_explorer.Label.compute(all_plls_scores)
|
|
|
1 |
+
from abc import ABC
|
2 |
from modules.module_rankSents import RankSents
|
3 |
from modules.module_crowsPairs import CrowsPairs
|
4 |
from typing import List, Tuple
|
|
|
|
|
5 |
|
6 |
class Connector(ABC):
|
7 |
def parse_word(
|
|
|
19 |
words = array_in_string.strip()
|
20 |
if not words:
|
21 |
return []
|
22 |
+
|
23 |
words = [
|
24 |
self.parse_word(word)
|
25 |
for word in words.split(',') if word.strip() != ''
|
|
|
31 |
err: str
|
32 |
) -> str:
|
33 |
|
|
|
34 |
if err:
|
35 |
err = "<center><h3>" + err + "</h3></center>"
|
36 |
+
return err
|
|
|
37 |
|
38 |
class PhraseBiasExplorerConnector(Connector):
|
39 |
def __init__(
|
|
|
41 |
**kwargs
|
42 |
) -> None:
|
43 |
|
44 |
+
if 'language_model' in kwargs and 'lang' in kwargs:
|
|
|
45 |
language_model = kwargs.get('language_model')
|
|
|
|
|
|
|
|
|
46 |
lang = kwargs.get('lang')
|
47 |
else:
|
48 |
raise KeyError
|
|
|
83 |
all_plls_scores = self.phrase_bias_explorer.Label.compute(all_plls_scores)
|
84 |
return self.process_error(err), all_plls_scores, ""
|
85 |
|
|
|
86 |
class CrowsPairsExplorerConnector(Connector):
|
87 |
def __init__(
|
88 |
self,
|
|
|
108 |
sent5: str
|
109 |
) -> Tuple:
|
110 |
|
111 |
+
sent_list = [sent0, sent1, sent2, sent3, sent4, sent5]
|
112 |
err = self.crows_pairs_explorer.errorChecking(
|
113 |
+
sent_list
|
114 |
)
|
115 |
|
116 |
if err:
|
117 |
return self.process_error(err), "", ""
|
118 |
|
119 |
all_plls_scores = self.crows_pairs_explorer.rank(
|
120 |
+
sent_list
|
121 |
)
|
122 |
|
123 |
all_plls_scores = self.crows_pairs_explorer.Label.compute(all_plls_scores)
|
modules/module_crowsPairs.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from modules.module_customPllLabel import CustomPllLabel
|
2 |
from modules.module_pllScore import PllScore
|
3 |
-
from typing import Dict
|
4 |
|
5 |
class CrowsPairs:
|
6 |
def __init__(
|
@@ -15,19 +15,13 @@ class CrowsPairs:
|
|
15 |
|
16 |
def errorChecking(
|
17 |
self,
|
18 |
-
|
19 |
-
sent1: str,
|
20 |
-
sent2: str,
|
21 |
-
sent3: str,
|
22 |
-
sent4: str,
|
23 |
-
sent5: str
|
24 |
) -> str:
|
25 |
|
26 |
out_msj = ""
|
27 |
-
all_sents = [sent0, sent1, sent2, sent3, sent4, sent5]
|
28 |
|
29 |
mandatory_sents = [0,1]
|
30 |
-
for sent_id, sent in enumerate(
|
31 |
c_sent = sent.strip()
|
32 |
if c_sent:
|
33 |
if not self.pllScore.sentIsCorrect(c_sent):
|
@@ -35,28 +29,22 @@ class CrowsPairs:
|
|
35 |
break
|
36 |
else:
|
37 |
if sent_id in mandatory_sents:
|
38 |
-
out_msj = f"Error: La
|
39 |
break
|
40 |
|
41 |
return out_msj
|
42 |
|
43 |
def rank(
|
44 |
self,
|
45 |
-
|
46 |
-
sent1: str,
|
47 |
-
sent2: str,
|
48 |
-
sent3: str,
|
49 |
-
sent4: str,
|
50 |
-
sent5: str
|
51 |
) -> Dict[str, float]:
|
52 |
|
53 |
-
err = self.errorChecking(
|
54 |
if err:
|
55 |
raise Exception(err)
|
56 |
|
57 |
-
all_sents = [sent0, sent1, sent2, sent3, sent4, sent5]
|
58 |
all_plls_scores = {}
|
59 |
-
for sent in
|
60 |
if sent:
|
61 |
all_plls_scores[sent] = self.pllScore.compute(sent)
|
62 |
|
|
|
1 |
from modules.module_customPllLabel import CustomPllLabel
|
2 |
from modules.module_pllScore import PllScore
|
3 |
+
from typing import Dict, List
|
4 |
|
5 |
class CrowsPairs:
|
6 |
def __init__(
|
|
|
15 |
|
16 |
def errorChecking(
|
17 |
self,
|
18 |
+
sent_list: List[str],
|
|
|
|
|
|
|
|
|
|
|
19 |
) -> str:
|
20 |
|
21 |
out_msj = ""
|
|
|
22 |
|
23 |
mandatory_sents = [0,1]
|
24 |
+
for sent_id, sent in enumerate(sent_list):
|
25 |
c_sent = sent.strip()
|
26 |
if c_sent:
|
27 |
if not self.pllScore.sentIsCorrect(c_sent):
|
|
|
29 |
break
|
30 |
else:
|
31 |
if sent_id in mandatory_sents:
|
32 |
+
out_msj = f"Error: La frase Nº{sent_id+1} no puede ser vacia!"
|
33 |
break
|
34 |
|
35 |
return out_msj
|
36 |
|
37 |
def rank(
|
38 |
self,
|
39 |
+
sent_list: List[str],
|
|
|
|
|
|
|
|
|
|
|
40 |
) -> Dict[str, float]:
|
41 |
|
42 |
+
err = self.errorChecking(sent_list)
|
43 |
if err:
|
44 |
raise Exception(err)
|
45 |
|
|
|
46 |
all_plls_scores = {}
|
47 |
+
for sent in sent_list:
|
48 |
if sent:
|
49 |
all_plls_scores[sent] = self.pllScore.compute(sent)
|
50 |
|
modules/module_languageModel.py
CHANGED
@@ -1,22 +1,23 @@
|
|
1 |
-
# --- Imports libs ---
|
2 |
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
3 |
|
4 |
class LanguageModel:
|
5 |
def __init__(
|
6 |
self,
|
7 |
-
model_name
|
8 |
) -> None:
|
9 |
-
|
10 |
print("Downloading language model...")
|
11 |
self.__tokenizer = AutoTokenizer.from_pretrained(model_name)
|
12 |
self.__model = AutoModelForMaskedLM.from_pretrained(model_name)
|
13 |
|
14 |
def initTokenizer(
|
15 |
self
|
16 |
-
):
|
|
|
17 |
return self.__tokenizer
|
18 |
|
19 |
def initModel(
|
20 |
self
|
21 |
-
):
|
|
|
22 |
return self.__model
|
|
|
|
|
1 |
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
2 |
|
3 |
class LanguageModel:
|
4 |
def __init__(
|
5 |
self,
|
6 |
+
model_name
|
7 |
) -> None:
|
8 |
+
|
9 |
print("Downloading language model...")
|
10 |
self.__tokenizer = AutoTokenizer.from_pretrained(model_name)
|
11 |
self.__model = AutoModelForMaskedLM.from_pretrained(model_name)
|
12 |
|
13 |
def initTokenizer(
|
14 |
self
|
15 |
+
) -> AutoTokenizer:
|
16 |
+
|
17 |
return self.__tokenizer
|
18 |
|
19 |
def initModel(
|
20 |
self
|
21 |
+
) -> AutoModelForMaskedLM:
|
22 |
+
|
23 |
return self.__model
|
modules/module_pllScore.py
CHANGED
@@ -84,7 +84,7 @@ class PllScore:
|
|
84 |
sent: str
|
85 |
) -> float:
|
86 |
|
87 |
-
assert(self.sentIsCorrect(sent)), f"Error:
|
88 |
|
89 |
outside_words = re.sub("\<.*?\>", "", sent.replace("<", " < ").replace(">", " > "))
|
90 |
outside_words = [w for w in outside_words.split() if w != ""]
|
|
|
84 |
sent: str
|
85 |
) -> float:
|
86 |
|
87 |
+
assert(self.sentIsCorrect(sent)), f"Error: The sentence '{sent}' does not have the correct format!"
|
88 |
|
89 |
outside_words = re.sub("\<.*?\>", "", sent.replace("<", " < ").replace(">", " > "))
|
90 |
outside_words = [w for w in outside_words.split() if w != ""]
|
modules/module_rankSents.py
CHANGED
@@ -21,7 +21,7 @@ class RankSents:
|
|
21 |
)
|
22 |
self.softmax = torch.nn.Softmax(dim=-1)
|
23 |
|
24 |
-
if lang == "
|
25 |
self.articles = [
|
26 |
'un','una','unos','unas','el','los','la','las','lo'
|
27 |
]
|
@@ -32,7 +32,7 @@ class RankSents:
|
|
32 |
'y','o','ni','que','pero','si'
|
33 |
]
|
34 |
|
35 |
-
elif lang == "
|
36 |
self.articles = [
|
37 |
'a','an', 'the'
|
38 |
]
|
@@ -50,16 +50,16 @@ class RankSents:
|
|
50 |
|
51 |
out_msj = ""
|
52 |
if not sent:
|
53 |
-
out_msj = "Error:
|
54 |
elif sent.count("*") > 1:
|
55 |
out_msj= " Error: La frase ingresada debe contener solo un ' * '!"
|
56 |
elif sent.count("*") == 0:
|
57 |
-
out_msj= " Error: La frase ingresada necesita contener un ' * ' para poder
|
58 |
else:
|
59 |
sent_len = len(self.tokenizer.encode(sent.replace("*", self.tokenizer.mask_token)))
|
60 |
max_len = self.tokenizer.max_len_single_sentence
|
61 |
if sent_len > max_len:
|
62 |
-
out_msj = f"Error: La
|
63 |
|
64 |
return out_msj
|
65 |
|
@@ -135,11 +135,11 @@ class RankSents:
|
|
135 |
|
136 |
def rank(self,
|
137 |
sent: str,
|
138 |
-
word_list: List[str],
|
139 |
-
banned_word_list: List[str],
|
140 |
-
articles: bool,
|
141 |
-
prepositions: bool,
|
142 |
-
conjunctions: bool
|
143 |
) -> Dict[str, float]:
|
144 |
|
145 |
err = self.errorChecking(sent)
|
|
|
21 |
)
|
22 |
self.softmax = torch.nn.Softmax(dim=-1)
|
23 |
|
24 |
+
if lang == "es":
|
25 |
self.articles = [
|
26 |
'un','una','unos','unas','el','los','la','las','lo'
|
27 |
]
|
|
|
32 |
'y','o','ni','que','pero','si'
|
33 |
]
|
34 |
|
35 |
+
elif lang == "en":
|
36 |
self.articles = [
|
37 |
'a','an', 'the'
|
38 |
]
|
|
|
50 |
|
51 |
out_msj = ""
|
52 |
if not sent:
|
53 |
+
out_msj = "Error: Debes ingresar una frase!"
|
54 |
elif sent.count("*") > 1:
|
55 |
out_msj= " Error: La frase ingresada debe contener solo un ' * '!"
|
56 |
elif sent.count("*") == 0:
|
57 |
+
out_msj= " Error: La frase ingresada necesita contener un ' * ' para poder inferir la palabra!"
|
58 |
else:
|
59 |
sent_len = len(self.tokenizer.encode(sent.replace("*", self.tokenizer.mask_token)))
|
60 |
max_len = self.tokenizer.max_len_single_sentence
|
61 |
if sent_len > max_len:
|
62 |
+
out_msj = f"Error: La frase ingresada posee mas de {max_len} tokens!"
|
63 |
|
64 |
return out_msj
|
65 |
|
|
|
135 |
|
136 |
def rank(self,
|
137 |
sent: str,
|
138 |
+
word_list: List[str]=[],
|
139 |
+
banned_word_list: List[str]=[],
|
140 |
+
articles: bool=False,
|
141 |
+
prepositions: bool=False,
|
142 |
+
conjunctions: bool=False
|
143 |
) -> Dict[str, float]:
|
144 |
|
145 |
err = self.errorChecking(sent)
|
tool.cfg
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[INTERFACE]
|
2 |
+
# ['es' | 'en']
|
3 |
+
language = es
|
4 |
+
|
5 |
+
[LMODEL]
|
6 |
+
# [bert-base-uncased | dccuchile/bert-base-spanish-wwm-uncased]
|
7 |
+
language_model = dccuchile/bert-base-spanish-wwm-uncased
|
8 |
+
|
9 |
+
[LOGS]
|
10 |
+
# [True | False]
|
11 |
+
available_logs = True
|