cecilemacaire commited on
Commit
4bb7f61
·
verified ·
1 Parent(s): 89be0dc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ import pandas as pd
4
+
5
+ # Charger le modèle et le tokenizer
6
+ checkpoint = "Propicto/t2p-t5-large-orfeo"
7
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
8
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
9
+
10
+ # Lire le lexique
11
+ @st.cache
12
+ def read_lexicon(lexicon):
13
+ df = pd.read_csv(lexicon, sep='\t')
14
+ df['keyword_no_cat'] = df['lemma'].str.split(' #').str[0].str.strip().str.replace(' ', '_')
15
+ return df
16
+
17
+ lexicon = read_lexicon("lexicon.csv")
18
+
19
+ # Processus de sortie de la traduction
20
+ def process_output_trad(pred):
21
+ return pred.split()
22
+
23
+ def get_id_picto_from_predicted_lemma(df_lexicon, lemma):
24
+ id_picto = df_lexicon.loc[df_lexicon['keyword_no_cat'] == lemma, 'id_picto'].tolist()
25
+ return (id_picto[0], lemma) if id_picto else (0, lemma)
26
+
27
+ # Génération du contenu HTML pour afficher les pictogrammes
28
+ def generate_html(ids):
29
+ html_content = '<html><body>'
30
+ for picto_id, lemma in ids:
31
+ if picto_id != 0: # ignore invalid IDs
32
+ img_url = f"https://static.arasaac.org/pictograms/{picto_id}/{picto_id}_500.png"
33
+ html_content += f'''
34
+ <figure style="display:inline-block; margin:1px;">
35
+ <img src="{img_url}" alt="{lemma}" width="200" height="200" />
36
+ <figcaption>{lemma}</figcaption>
37
+ </figure>
38
+ '''
39
+ html_content += '</body></html>'
40
+ return html_content
41
+
42
+ # Interface utilisateur
43
+ st.title("Pictogramme Générateur de Traduction")
44
+
45
+ sentence = st.text_input("Entrez une phrase en français:")
46
+ if sentence:
47
+ inputs = tokenizer(sentence, return_tensors="pt").input_ids
48
+ outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
49
+ pred = tokenizer.decode(outputs[0], skip_special_tokens=True)
50
+
51
+ sentence_to_map = process_output_trad(pred)
52
+ pictogram_ids = [get_id_picto_from_predicted_lemma(lexicon, lemma) for lemma in sentence_to_map]
53
+
54
+ html = generate_html(pictogram_ids)
55
+ st.components.v1.html(html, height=600, scrolling=True)