Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,36 +2,86 @@ import gradio as gr
|
|
2 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
|
4 |
|
5 |
-
|
6 |
-
# Model in Hugging Hub
|
7 |
-
tokenizer = AutoTokenizer.from_pretrained("Andresmfs/st5s-es-inclusivo")
|
8 |
-
model = AutoModelForSeq2SeqLM.from_pretrained("Andresmfs/st5s-es-inclusivo")
|
9 |
-
|
10 |
-
def make_neutral(phrase):
|
11 |
-
# Define prompt for converting gendered text to neutral
|
12 |
-
input_ids = tokenizer(phrase, return_tensors="pt").input_ids
|
13 |
-
|
14 |
-
# Call the LLM to generate neutral text
|
15 |
-
outputs = model.generate(input_ids)
|
16 |
-
|
17 |
-
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
18 |
-
|
19 |
# Ejemplos de preguntas
|
20 |
mis_ejemplos = [
|
21 |
["La cocina de los gallegos es fabulosa."],
|
22 |
["Los niños juegan a la pelota."],
|
23 |
-
["Los científicos son muy listos"],
|
24 |
["Las enfermeras se esforzaron mucho durante la pandemia."],
|
|
|
25 |
|
26 |
]
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
iface = gr.Interface(
|
30 |
-
fn=
|
31 |
inputs="text",
|
32 |
outputs="text",
|
33 |
title="ES Inclusive Language",
|
34 |
description="Enter a Spanish phrase and get it converted into neutral/inclusive form.",
|
35 |
examples = mis_ejemplos
|
36 |
)
|
37 |
-
|
|
|
2 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
# Ejemplos de preguntas
|
6 |
mis_ejemplos = [
|
7 |
["La cocina de los gallegos es fabulosa."],
|
8 |
["Los niños juegan a la pelota."],
|
9 |
+
["Los científicos son muy listos."],
|
10 |
["Las enfermeras se esforzaron mucho durante la pandemia."],
|
11 |
+
["Los políticos no son del agrado de los ciudadanos."]
|
12 |
|
13 |
]
|
14 |
|
15 |
+
# Load complete model in 4bits
|
16 |
+
##################
|
17 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
18 |
+
import torch
|
19 |
+
|
20 |
+
hub_model = 'Andresmfs/merged_aguila-prueba-guardado'
|
21 |
+
|
22 |
+
# Load tokenizer
|
23 |
+
tokenizer = AutoTokenizer.from_pretrained(hub_model, trust_remote_code=True)
|
24 |
+
|
25 |
+
## Load model in 4bits
|
26 |
+
# bnb_configuration
|
27 |
+
bnb_config = BitsAndBytesConfig(
|
28 |
+
load_in_4bit=True,
|
29 |
+
bnb_4bit_quant_type='nf4',
|
30 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
31 |
+
bnb_4bit_use_double_quant=False
|
32 |
+
)
|
33 |
+
|
34 |
+
# model
|
35 |
+
model = AutoModelForCausalLM.from_pretrained(
|
36 |
+
hub_model,
|
37 |
+
quantization_config=bnb_config,
|
38 |
+
trust_remote_code=True,
|
39 |
+
device_map="auto"
|
40 |
+
)
|
41 |
+
|
42 |
+
# generation_config
|
43 |
+
generation_config = model.generation_config
|
44 |
+
generation_config.max_new_tokens = 100
|
45 |
+
generation_config.temperature = 0.7
|
46 |
+
generation_config.top_p = 0.7
|
47 |
+
generation_config.num_return_sequences = 1
|
48 |
+
generation_config.pad_token_id = tokenizer.eos_token_id
|
49 |
+
generation_config.eos_token_id = tokenizer.eos_token_id
|
50 |
+
generation_config.do_sample = True # line added
|
51 |
+
|
52 |
+
# Define inference function
|
53 |
+
def translate_es_inclusivo(exclusive_text):
|
54 |
+
|
55 |
+
# generate input prompt
|
56 |
+
eval_prompt = f"""Reescribe el siguiente texto utilizando lenguaje inclusivo.\n
|
57 |
+
Texto: {exclusive_text}\n
|
58 |
+
Texto en lenguaje inclusivo:"""
|
59 |
+
|
60 |
+
# tokenize input
|
61 |
+
model_input = tokenizer(eval_prompt, return_tensors="pt").to(model.device)
|
62 |
+
|
63 |
+
# set max_new_tokens if necessary
|
64 |
+
if len(model_input['input_ids'][0]) > 80:
|
65 |
+
model.generation_config.max_new_tokens = len(model_input['input_ids'][0]) + 0.2 * len(model_input['input_ids'][0])
|
66 |
+
|
67 |
+
# get length of encoded prompt
|
68 |
+
prompt_token_len = len(model_input['input_ids'][0])
|
69 |
+
|
70 |
+
# generate and decode
|
71 |
+
with torch.no_grad():
|
72 |
+
inclusive_text = tokenizer.decode(model.generate(**model_input, generation_config=generation_config)[0][prompt_token_len:],
|
73 |
+
skip_special_tokens=True)
|
74 |
+
|
75 |
+
return inclusive_text
|
76 |
+
|
77 |
+
|
78 |
|
79 |
iface = gr.Interface(
|
80 |
+
fn=translate_es_inclusivo,
|
81 |
inputs="text",
|
82 |
outputs="text",
|
83 |
title="ES Inclusive Language",
|
84 |
description="Enter a Spanish phrase and get it converted into neutral/inclusive form.",
|
85 |
examples = mis_ejemplos
|
86 |
)
|
87 |
+
demo.launch()
|