Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,7 @@ from datetime import datetime
|
|
6 |
# Model description
|
7 |
description = """
|
8 |
# 🇫🇷 Lucie-7B-Instruct
|
|
|
9 |
Lucie is a French language model based on Mistral-7B, fine-tuned on French data and instructions.
|
10 |
This demo allows you to interact with the model and adjust various generation parameters.
|
11 |
"""
|
@@ -23,13 +24,37 @@ On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Buil
|
|
23 |
model_id = "OpenLLM-France/Lucie-7B-Instruct-v1"
|
24 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
25 |
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
model = AutoModelForCausalLM.from_pretrained(
|
28 |
model_id,
|
|
|
29 |
device_map="auto",
|
30 |
-
torch_dtype=torch.bfloat16
|
|
|
31 |
)
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
@spaces.GPU
|
34 |
def generate_response(system_prompt, user_prompt, temperature, max_new_tokens, top_p, repetition_penalty, top_k):
|
35 |
# Construct the full prompt with system and user messages
|
@@ -59,7 +84,24 @@ def generate_response(system_prompt, user_prompt, temperature, max_new_tokens, t
|
|
59 |
|
60 |
# Create the Gradio interface
|
61 |
with gr.Blocks() as demo:
|
62 |
-
gr.Markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
with gr.Row():
|
65 |
with gr.Column():
|
@@ -123,18 +165,67 @@ with gr.Blocks() as demo:
|
|
123 |
lines=10
|
124 |
)
|
125 |
|
126 |
-
# Example prompts
|
127 |
gr.Examples(
|
128 |
examples=[
|
129 |
-
|
130 |
-
[
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
],
|
135 |
-
inputs=[system_prompt, user_prompt],
|
136 |
outputs=output,
|
137 |
-
label="Exemples de prompts"
|
138 |
)
|
139 |
|
140 |
# Set up the generation event
|
|
|
6 |
# Model description
|
7 |
description = """
|
8 |
# 🇫🇷 Lucie-7B-Instruct
|
9 |
+
|
10 |
Lucie is a French language model based on Mistral-7B, fine-tuned on French data and instructions.
|
11 |
This demo allows you to interact with the model and adjust various generation parameters.
|
12 |
"""
|
|
|
24 |
model_id = "OpenLLM-France/Lucie-7B-Instruct-v1"
|
25 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
26 |
|
27 |
+
# Get the token from environment variables
|
28 |
+
hf_token = os.getenv('READTOKEN')
|
29 |
+
if not hf_token:
|
30 |
+
raise ValueError("Please set the READTOKEN environment variable")
|
31 |
+
|
32 |
+
# Initialize tokenizer and model with token authentication
|
33 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
34 |
+
model_id,
|
35 |
+
token=hf_token,
|
36 |
+
trust_remote_code=True
|
37 |
+
)
|
38 |
+
|
39 |
model = AutoModelForCausalLM.from_pretrained(
|
40 |
model_id,
|
41 |
+
token=hf_token,
|
42 |
device_map="auto",
|
43 |
+
torch_dtype=torch.bfloat16,
|
44 |
+
trust_remote_code=True
|
45 |
)
|
46 |
|
47 |
+
def format_model_info(config):
|
48 |
+
info = []
|
49 |
+
important_keys = [
|
50 |
+
"model_type", "vocab_size", "hidden_size", "num_attention_heads",
|
51 |
+
"num_hidden_layers", "max_position_embeddings", "torch_dtype"
|
52 |
+
]
|
53 |
+
for key in important_keys:
|
54 |
+
if key in config:
|
55 |
+
info.append(f"**{key}:** {config[key]}")
|
56 |
+
return "\n".join(info)
|
57 |
+
|
58 |
@spaces.GPU
|
59 |
def generate_response(system_prompt, user_prompt, temperature, max_new_tokens, top_p, repetition_penalty, top_k):
|
60 |
# Construct the full prompt with system and user messages
|
|
|
84 |
|
85 |
# Create the Gradio interface
|
86 |
with gr.Blocks() as demo:
|
87 |
+
gr.Markdown(Title)
|
88 |
+
with gr.Row():
|
89 |
+
with gr.Column():
|
90 |
+
with gr.Group():
|
91 |
+
gr.Markdown("### Model Configuration")
|
92 |
+
gr.Markdown(format_model_info(config_json))
|
93 |
+
|
94 |
+
with gr.Column():
|
95 |
+
with gr.Group():
|
96 |
+
gr.Markdown("### Tokenizer Configuration")
|
97 |
+
gr.Markdown(f"""
|
98 |
+
**Vocabulary Size:** {tokenizer.vocab_size}
|
99 |
+
**Model Max Length:** {tokenizer.model_max_length}
|
100 |
+
**Padding Token:** {tokenizer.pad_token}
|
101 |
+
**EOS Token:** {tokenizer.eos_token}
|
102 |
+
""")
|
103 |
+
with gr.Row():
|
104 |
+
gr.Markdown(join_us)
|
105 |
|
106 |
with gr.Row():
|
107 |
with gr.Column():
|
|
|
165 |
lines=10
|
166 |
)
|
167 |
|
168 |
+
# Example prompts with all parameters
|
169 |
gr.Examples(
|
170 |
examples=[
|
171 |
+
# Format: [system_prompt, user_prompt, temperature, max_tokens, top_p, rep_penalty, top_k]
|
172 |
+
[
|
173 |
+
"Tu es Lucie, une assistante IA française serviable et amicale.",
|
174 |
+
"Bonjour! Comment vas-tu aujourd'hui?",
|
175 |
+
0.7, # temperature
|
176 |
+
512, # max_new_tokens
|
177 |
+
0.9, # top_p
|
178 |
+
1.2, # repetition_penalty
|
179 |
+
50 # top_k
|
180 |
+
],
|
181 |
+
[
|
182 |
+
"Tu es une experte en intelligence artificielle.",
|
183 |
+
"Peux-tu m'expliquer ce qu'est l'intelligence artificielle?",
|
184 |
+
0.8, # higher temperature for more creative explanation
|
185 |
+
1024, # longer response
|
186 |
+
0.95, # higher top_p for more diverse output
|
187 |
+
1.1, # lower repetition penalty
|
188 |
+
40 # lower top_k for more focused output
|
189 |
+
],
|
190 |
+
[
|
191 |
+
"Tu es une poétesse française.",
|
192 |
+
"Écris un court poème sur Paris.",
|
193 |
+
0.9, # higher temperature for more creativity
|
194 |
+
256, # shorter for poetry
|
195 |
+
0.95, # higher top_p for more creative language
|
196 |
+
1.3, # higher repetition penalty for unique words
|
197 |
+
60 # higher top_k for more varied vocabulary
|
198 |
+
],
|
199 |
+
[
|
200 |
+
"Tu es une experte en gastronomie française.",
|
201 |
+
"Quels sont les plats traditionnels français les plus connus?",
|
202 |
+
0.7, # moderate temperature for factual response
|
203 |
+
768, # medium length
|
204 |
+
0.9, # balanced top_p
|
205 |
+
1.2, # standard repetition penalty
|
206 |
+
50 # standard top_k
|
207 |
+
],
|
208 |
+
[
|
209 |
+
"Tu es une historienne spécialisée dans l'histoire de France.",
|
210 |
+
"Explique-moi l'histoire de la Révolution française en quelques phrases.",
|
211 |
+
0.6, # lower temperature for more factual response
|
212 |
+
1024, # longer for historical context
|
213 |
+
0.85, # lower top_p for more focused output
|
214 |
+
1.1, # lower repetition penalty
|
215 |
+
30 # lower top_k for more consistent output
|
216 |
+
]
|
217 |
+
],
|
218 |
+
inputs=[
|
219 |
+
system_prompt,
|
220 |
+
user_prompt,
|
221 |
+
temperature,
|
222 |
+
max_new_tokens,
|
223 |
+
top_p,
|
224 |
+
repetition_penalty,
|
225 |
+
top_k
|
226 |
],
|
|
|
227 |
outputs=output,
|
228 |
+
label="Exemples de prompts avec paramètres optimisés"
|
229 |
)
|
230 |
|
231 |
# Set up the generation event
|