Wizmik12 commited on
Commit
022b401
1 Parent(s): f53a293

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -17
app.py CHANGED
@@ -2,36 +2,86 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
 
4
 
5
-
6
- # Model in Hugging Hub
7
- tokenizer = AutoTokenizer.from_pretrained("Andresmfs/st5s-es-inclusivo")
8
- model = AutoModelForSeq2SeqLM.from_pretrained("Andresmfs/st5s-es-inclusivo")
9
-
10
- def make_neutral(phrase):
11
- # Define prompt for converting gendered text to neutral
12
- input_ids = tokenizer(phrase, return_tensors="pt").input_ids
13
-
14
- # Call the LLM to generate neutral text
15
- outputs = model.generate(input_ids)
16
-
17
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
18
-
19
  # Ejemplos de preguntas
20
  mis_ejemplos = [
21
  ["La cocina de los gallegos es fabulosa."],
22
  ["Los niños juegan a la pelota."],
23
- ["Los científicos son muy listos"],
24
  ["Las enfermeras se esforzaron mucho durante la pandemia."],
 
25
 
26
  ]
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  iface = gr.Interface(
30
- fn=make_neutral,
31
  inputs="text",
32
  outputs="text",
33
  title="ES Inclusive Language",
34
  description="Enter a Spanish phrase and get it converted into neutral/inclusive form.",
35
  examples = mis_ejemplos
36
  )
37
- iface.launch()
 
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
 
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  # Ejemplos de preguntas
6
  mis_ejemplos = [
7
  ["La cocina de los gallegos es fabulosa."],
8
  ["Los niños juegan a la pelota."],
9
+ ["Los científicos son muy listos."],
10
  ["Las enfermeras se esforzaron mucho durante la pandemia."],
11
+ ["Los políticos no son del agrado de los ciudadanos."]
12
 
13
  ]
14
 
15
+ # Load complete model in 4bits
16
+ ##################
17
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
18
+ import torch
19
+
20
+ hub_model = 'Andresmfs/merged_aguila-prueba-guardado'
21
+
22
+ # Load tokenizer
23
+ tokenizer = AutoTokenizer.from_pretrained(hub_model, trust_remote_code=True)
24
+
25
+ ## Load model in 4bits
26
+ # bnb_configuration
27
+ bnb_config = BitsAndBytesConfig(
28
+ load_in_4bit=True,
29
+ bnb_4bit_quant_type='nf4',
30
+ bnb_4bit_compute_dtype=torch.bfloat16,
31
+ bnb_4bit_use_double_quant=False
32
+ )
33
+
34
+ # model
35
+ model = AutoModelForCausalLM.from_pretrained(
36
+ hub_model,
37
+ quantization_config=bnb_config,
38
+ trust_remote_code=True,
39
+ device_map="auto"
40
+ )
41
+
42
+ # generation_config
43
+ generation_config = model.generation_config
44
+ generation_config.max_new_tokens = 100
45
+ generation_config.temperature = 0.7
46
+ generation_config.top_p = 0.7
47
+ generation_config.num_return_sequences = 1
48
+ generation_config.pad_token_id = tokenizer.eos_token_id
49
+ generation_config.eos_token_id = tokenizer.eos_token_id
50
+ generation_config.do_sample = True # line added
51
+
52
+ # Define inference function
53
+ def translate_es_inclusivo(exclusive_text):
54
+
55
+ # generate input prompt
56
+ eval_prompt = f"""Reescribe el siguiente texto utilizando lenguaje inclusivo.\n
57
+ Texto: {exclusive_text}\n
58
+ Texto en lenguaje inclusivo:"""
59
+
60
+ # tokenize input
61
+ model_input = tokenizer(eval_prompt, return_tensors="pt").to(model.device)
62
+
63
+ # set max_new_tokens if necessary
64
+ if len(model_input['input_ids'][0]) > 80:
65
+ model.generation_config.max_new_tokens = len(model_input['input_ids'][0]) + 0.2 * len(model_input['input_ids'][0])
66
+
67
+ # get length of encoded prompt
68
+ prompt_token_len = len(model_input['input_ids'][0])
69
+
70
+ # generate and decode
71
+ with torch.no_grad():
72
+ inclusive_text = tokenizer.decode(model.generate(**model_input, generation_config=generation_config)[0][prompt_token_len:],
73
+ skip_special_tokens=True)
74
+
75
+ return inclusive_text
76
+
77
+
78
 
79
  iface = gr.Interface(
80
+ fn=translate_es_inclusivo,
81
  inputs="text",
82
  outputs="text",
83
  title="ES Inclusive Language",
84
  description="Enter a Spanish phrase and get it converted into neutral/inclusive form.",
85
  examples = mis_ejemplos
86
  )
87
+ demo.launch()