Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
# Ejemplos de preguntas | |
mis_ejemplos = [ | |
["La cocina de los gallegos es fabulosa"], | |
["Los niños juegan a la pelota"], | |
["Los científicos son muy trabajadores"], | |
["Las enfermeras se esforzaron mucho durante la pandemia"], | |
["Los ciudadanos Marcos y Ernesto no están contentos con los políticos"] | |
] | |
# Load complete model in 4bits | |
################## | |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
import torch | |
hub_model = 'somosnlp/es-inclusivo-translator' | |
# Load tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(hub_model, trust_remote_code=True) | |
## Load model in 4bits | |
# bnb_configuration | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_quant_type='nf4', | |
bnb_4bit_compute_dtype=torch.bfloat16, | |
bnb_4bit_use_double_quant=False | |
) | |
# model | |
model = AutoModelForCausalLM.from_pretrained( | |
hub_model, | |
quantization_config=bnb_config, | |
trust_remote_code=True, | |
device_map="auto" | |
) | |
# generation_config | |
generation_config = model.generation_config | |
generation_config.max_new_tokens = 100 | |
generation_config.temperature = 0.7 | |
generation_config.top_p = 0.7 | |
generation_config.num_return_sequences = 1 | |
generation_config.pad_token_id = tokenizer.eos_token_id | |
generation_config.eos_token_id = tokenizer.eos_token_id | |
generation_config.do_sample = True # line added | |
# Define inference function | |
def translate_es_inclusivo(exclusive_text): | |
# generate input prompt | |
eval_prompt = f"""Reescribe el siguiente texto utilizando lenguaje inclusivo.\n | |
Texto: {exclusive_text}\n | |
Texto en lenguaje inclusivo:""" | |
# tokenize input | |
model_input = tokenizer(eval_prompt, return_tensors="pt").to(model.device) | |
# set max_new_tokens if necessary | |
if len(model_input['input_ids'][0]) > 80: | |
model.generation_config.max_new_tokens = len(model_input['input_ids'][0]) + 0.2 * len(model_input['input_ids'][0]) | |
# get length of encoded prompt | |
prompt_token_len = len(model_input['input_ids'][0]) | |
# generate and decode | |
with torch.no_grad(): | |
inclusive_text = tokenizer.decode(model.generate(**model_input, generation_config=generation_config)[0][prompt_token_len:], | |
skip_special_tokens=True) | |
return inclusive_text | |
# <-- set article variable --> | |
article = "- **Motivation:** Languages are powerful tools to communicate ideas, but their use is not impartial. The selection of words carries inherent biases and reflects subjective perspectives. In some cases, language is wielded to enforce ideologies, \ | |
th purpose of this app is to automatically translate Spanish phrases into neutral/inclusive phrases, while mantaining grammar correctness and consistency.\n" \ | |
"- **Team Members:** Gaia Quintana Fleitas (gaiaq), Andrés Martínez Fernández-Salguero (andresmfs), Imanuel Rozenberg (manu_20392), Miguel López (wizmik12), Josué Sauca (josue_sauca).\n " \ | |
"- **Social Impact:** An inclusive translator holds significant social impact by promoting equity and representation within texts. By rectifying biases ingrained in language and fostering inclusivity, it combats discrimination, amplifies the visibility of marginalized groups, and contributes to the cultivation of a more inclusive and respectful society." | |
iface = gr.Interface( | |
fn=translate_es_inclusivo, | |
inputs="text", | |
outputs="text", | |
title="ES Inclusive Language (Hackathon SomosNLP '24)", | |
description="Enter a Spanish phrase and get it converted into neutral/inclusive form.", | |
examples = mis_ejemplos, | |
article = article | |
) | |
iface.launch() |