import pandas as pd import gradio as gr from abc import ABC from modules.module_word2Context import Word2Context from typing import List, Tuple class Connector(ABC): def parse_word( self, word: str ) -> str: return word.lower().strip() def parse_words( self, array_in_string: str ) -> List[str]: words = array_in_string.strip() if not words: return [] words = [ self.parse_word(word) for word in words.split(',') if word.strip() != '' ] return words def process_error( self, err: str ) -> str: if err: err = "

" + err + "

" return err class Word2ContextExplorerConnector(Connector): def __init__( self, **kwargs ) -> None: vocabulary = kwargs.get('vocabulary', None) context = kwargs.get('context', None) if vocabulary is None or context is None: raise KeyError self.word2context_explorer = Word2Context( context, # Context dataset HF name | path vocabulary # Vocabulary class instance ) def get_word_info( self, word: str ) -> Tuple: word = self.parse_word(word) err = "" contexts = pd.DataFrame([], columns=['']) subsets_info = "" distribution_plot = None word_cloud_plot = None subsets_choice = gr.CheckboxGroup.update(choices=[]) err = self.word2context_explorer.errorChecking(word) if err: return self.process_error(err), contexts, subsets_info, distribution_plot, word_cloud_plot, subsets_choice subsets_info, subsets_origin_info = self.word2context_explorer.getSubsetsInfo(word) clean_keys = [key.split(" ")[0].strip() for key in subsets_origin_info] subsets_choice = gr.CheckboxGroup.update(choices=clean_keys) distribution_plot = self.word2context_explorer.genDistributionPlot(word) word_cloud_plot = self.word2context_explorer.genWordCloudPlot(word) return self.process_error(err), contexts, subsets_info, distribution_plot, word_cloud_plot, subsets_choice def get_word_context( self, word: str, n_context: int, subset_choice: List[str] ) -> Tuple: word = self.parse_word(word) err = "" contexts = pd.DataFrame([], columns=['']) err = self.word2context_explorer.errorChecking(word) if err: return self.process_error(err), contexts if len(subset_choice) > 0: ds = self.word2context_explorer.findSplits(word, subset_choice) else: err = self.process_error("Error: Palabra no ingresada y/o conjunto/s de interés no seleccionado/s!") return err, contexts list_of_contexts = self.word2context_explorer.getContexts(word, n_context, ds) contexts = pd.DataFrame(list_of_contexts, columns=['#','contexto','conjunto']) contexts["buscar"] = contexts.contexto.apply(lambda text: self.word2context_explorer.genWebLink(text)) return self.process_error(err), contexts