import pandas as pd
import gradio as gr
from abc import ABC
from modules.module_word2Context import Word2Context
from typing import List, Tuple
class Connector(ABC):
def parse_word(
self,
word: str
) -> str:
return word.lower().strip()
def parse_words(
self,
array_in_string: str
) -> List[str]:
words = array_in_string.strip()
if not words:
return []
words = [
self.parse_word(word)
for word in words.split(',') if word.strip() != ''
]
return words
def process_error(
self,
err: str
) -> str:
if err:
err = "
" + err + "
"
return err
class Word2ContextExplorerConnector(Connector):
def __init__(
self,
**kwargs
) -> None:
vocabulary = kwargs.get('vocabulary', None)
context = kwargs.get('context', None)
if vocabulary is None or context is None:
raise KeyError
self.word2context_explorer = Word2Context(
context, # Context dataset HF name | path
vocabulary # Vocabulary class instance
)
def get_word_info(
self,
word: str
) -> Tuple:
word = self.parse_word(word)
err = ""
contexts = pd.DataFrame([], columns=[''])
subsets_info = ""
distribution_plot = None
word_cloud_plot = None
subsets_choice = gr.CheckboxGroup.update(choices=[])
err = self.word2context_explorer.errorChecking(word)
if err:
return self.process_error(err), contexts, subsets_info, distribution_plot, word_cloud_plot, subsets_choice
subsets_info, subsets_origin_info = self.word2context_explorer.getSubsetsInfo(word)
clean_keys = [key.split(" ")[0].strip() for key in subsets_origin_info]
subsets_choice = gr.CheckboxGroup.update(choices=clean_keys)
distribution_plot = self.word2context_explorer.genDistributionPlot(word)
word_cloud_plot = self.word2context_explorer.genWordCloudPlot(word)
return self.process_error(err), contexts, subsets_info, distribution_plot, word_cloud_plot, subsets_choice
def get_word_context(
self,
word: str,
n_context: int,
subset_choice: List[str]
) -> Tuple:
word = self.parse_word(word)
err = ""
contexts = pd.DataFrame([], columns=[''])
err = self.word2context_explorer.errorChecking(word)
if err:
return self.process_error(err), contexts
if len(subset_choice) > 0:
ds = self.word2context_explorer.findSplits(word, subset_choice)
else:
err = self.process_error("Error: Palabra no ingresada y/o conjunto/s de interés no seleccionado/s!")
return err, contexts
list_of_contexts = self.word2context_explorer.getContexts(word, n_context, ds)
contexts = pd.DataFrame(list_of_contexts, columns=['#','contexto','conjunto'])
contexts["buscar"] = contexts.contexto.apply(lambda text: self.word2context_explorer.genWebLink(text))
return self.process_error(err), contexts