Spaces:
Runtime error
Runtime error
from transformers import * | |
import gradio as gr | |
tokenizer_s = AutoTokenizer.from_pretrained("oskrmiguel/mt5-simplification-spanish") | |
model_s = AutoModelForSeq2SeqLM.from_pretrained("oskrmiguel/mt5-simplification-spanish") | |
tokenizer = BertTokenizerFast.from_pretrained('mrm8488/bert2bert_shared-spanish-finetuned-summarization') | |
model = EncoderDecoderModel.from_pretrained('mrm8488/bert2bert_shared-spanish-finetuned-summarization') | |
model_q = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/bert2bert-spanish-question-generation") | |
tokenizer_q = AutoTokenizer.from_pretrained("mrm8488/bert2bert-spanish-question-generation") | |
tokenizer_a = AutoTokenizer.from_pretrained("mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es") | |
model_a = AutoModelForQuestionAnswering.from_pretrained("mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es") | |
nlp_a = pipeline('question-answering', model='mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es', | |
tokenizer=( | |
'mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es', | |
{"use_fast": False} | |
)) | |
def generate_summary(text): | |
inputs = tokenizer([text], padding="max_length", truncation=True, max_length=64, return_tensors="pt") | |
input_ids = inputs.input_ids | |
attention_mask = inputs.attention_mask | |
output = model.generate(input_ids, attention_mask=attention_mask) | |
return tokenizer.decode(output[0], skip_special_tokens=True) | |
def generate_simple_text(data): | |
outputs = [] | |
for text in data.split("."): | |
inputs = tokenizer_s(text, max_length=1024, padding=True, truncation=True, return_tensors='pt') | |
output = model_s.generate(inputs['input_ids'], max_length=100) | |
outputs.append(['\n'.join([tokenizer_s.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in output])]) | |
return outputs | |
def generate_questions(data): | |
outputs = [] | |
for text in data.split("."): | |
inputs_q = tokenizer_q(text, return_tensors="pt") | |
outputs_q = model_q.generate(inputs_q['input_ids'], max_length=100) | |
question = tokenizer_q.decode(outputs_q[0], skip_special_tokens=True) | |
outputs.append(question) | |
return outputs | |
def generate_answer(question_texts, context_text): | |
return nlp_a({'question': question_texts,'context': context_text } | |
)['answer'] | |
def generate_paragraph(data): | |
return list(filter(lambda x : x != '', data.split('\n'))) | |
contexto = gr.inputs.Textbox(lines=10, placeholder="Ingresa un cuento de niños") | |
resultado = gr.outputs.HTML(label="Resultado") | |
opciones = gr.inputs.CheckboxGroup(["Resumir", "Facil Lectura", "Generar Preguntas", "Ver Respuestas"]) | |
parrafo_longitud = gr.inputs.Slider(50, 500) | |
def generate_question(contexto,opciones, parrafo_longitud): | |
parrafos = generate_paragraph(contexto) | |
resultado="" | |
resumen=[] | |
preguntas=[] | |
simples=[] | |
respuestas=[] | |
for i, text in enumerate(parrafos): | |
if len(text) < parrafo_longitud: | |
resumen.append(text) | |
if "Facil Lectura" in opciones: | |
simples.append(text) | |
else: | |
sumarize = generate_summary(text) | |
resumen.append(sumarize) | |
if "Generar Preguntas" in opciones: | |
questions = generate_questions(sumarize) | |
preguntas.append(str(i+1)+"-> "+questions[0]) | |
if "Ver Respuestas" in opciones: | |
respuestas.append(str(i+1)+"-> "+generate_answer(questions[0], sumarize)) | |
if "Facil Lectura" in opciones: | |
simples.append(generate_simple_text(sumarize)[0][0]) | |
resultado += "<p><b>Resumen:</b> "+'<br/>'.join(resumen)+"</p>" | |
resultado += "<p><b>Texto Simple:</b> "+'<br/>'.join(simples)+"</p>" | |
resultado += "<p><b>Preguntas:</b> "+'<br/>'.join(preguntas)+"</p>" | |
resultado += "<p><b>Respuestas:</b> "+'<br/>'.join(respuestas)+"</p>" | |
return resultado | |
iface = gr.Interface( | |
fn=generate_question, | |
inputs=[contexto, opciones, parrafo_longitud], | |
outputs=resultado) | |
iface.launch(debug=True) |