Spaces:
Runtime error
Runtime error
import pandas as pd | |
import gradio as gr | |
from abc import ABC | |
from modules.module_word2Context import Word2Context | |
from typing import List, Tuple | |
class Connector(ABC): | |
def parse_word( | |
self, | |
word: str | |
) -> str: | |
return word.lower().strip() | |
def parse_words( | |
self, | |
array_in_string: str | |
) -> List[str]: | |
words = array_in_string.strip() | |
if not words: | |
return [] | |
words = [ | |
self.parse_word(word) | |
for word in words.split(',') if word.strip() != '' | |
] | |
return words | |
def process_error( | |
self, | |
err: str | |
) -> str: | |
if err: | |
err = "<center><h3>" + err + "</h3></center>" | |
return err | |
class Word2ContextExplorerConnector(Connector): | |
def __init__( | |
self, | |
**kwargs | |
) -> None: | |
vocabulary = kwargs.get('vocabulary', None) | |
context = kwargs.get('context', None) | |
if vocabulary is None or context is None: | |
raise KeyError | |
self.word2context_explorer = Word2Context( | |
context, # Context dataset HF name | path | |
vocabulary # Vocabulary class instance | |
) | |
def get_word_info( | |
self, | |
word: str | |
) -> Tuple: | |
word = self.parse_word(word) | |
err = "" | |
contexts = pd.DataFrame([], columns=['']) | |
subsets_info = "" | |
distribution_plot = None | |
word_cloud_plot = None | |
subsets_choice = gr.CheckboxGroup.update(choices=[]) | |
err = self.word2context_explorer.errorChecking(word) | |
if err: | |
return self.process_error(err), contexts, subsets_info, distribution_plot, word_cloud_plot, subsets_choice | |
subsets_info, subsets_origin_info = self.word2context_explorer.getSubsetsInfo(word) | |
clean_keys = [key.split(" ")[0].strip() for key in subsets_origin_info] | |
subsets_choice = gr.CheckboxGroup.update(choices=clean_keys) | |
distribution_plot = self.word2context_explorer.genDistributionPlot(word) | |
word_cloud_plot = self.word2context_explorer.genWordCloudPlot(word) | |
return self.process_error(err), contexts, subsets_info, distribution_plot, word_cloud_plot, subsets_choice | |
def get_word_context( | |
self, | |
word: str, | |
n_context: int, | |
subset_choice: List[str] | |
) -> Tuple: | |
word = self.parse_word(word) | |
err = "" | |
contexts = pd.DataFrame([], columns=['']) | |
err = self.word2context_explorer.errorChecking(word) | |
if err: | |
return self.process_error(err), contexts | |
if len(subset_choice) > 0: | |
ds = self.word2context_explorer.findSplits(word, subset_choice) | |
else: | |
err = self.process_error("Error: Palabra no ingresada y/o conjunto/s de interés no seleccionado/s!") | |
return err, contexts | |
list_of_contexts = self.word2context_explorer.getContexts(word, n_context, ds) | |
contexts = pd.DataFrame(list_of_contexts, columns=['#','contexto','conjunto']) | |
contexts["buscar"] = contexts.contexto.apply(lambda text: self.word2context_explorer.genWebLink(text)) | |
return self.process_error(err), contexts |