|
import spaces |
|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import torch |
|
from datetime import datetime |
|
import os |
|
|
|
|
|
Title = """# Welcome to 🌟Tonic's 🌠Lucie-7B-Instruct Demo""" |
|
|
|
description = """ |
|
🌠Lucie-7B-Instruct is a fine-tuned version of [Lucie-7B](https://huggingface.co./OpenLLM-France/Lucie-7B), an open-source, multilingual causal language model created by OpenLLM-France. |
|
|
|
🌠Lucie-7B-Instruct is fine-tuned on synthetic instructions produced by ChatGPT and Gemma and a small set of customized prompts about OpenLLM and Lucie. |
|
""" |
|
|
|
training = """ |
|
## Training details |
|
|
|
### Training data |
|
|
|
Lucie-7B-Instruct is trained on the following datasets: |
|
* [Alpaca-cleaned](https://huggingface.co./datasets/yahma/alpaca-cleaned) (English; 51604 samples) |
|
* [Alpaca-cleaned-fr](https://huggingface.co./datasets/cmh/alpaca_data_cleaned_fr_52k) (French; 51655 samples) |
|
* [Magpie-Gemma](https://huggingface.co./datasets/Magpie-Align/Magpie-Gemma2-Pro-200K-Filtered) (English; 195167 samples) |
|
* [Wildchat](https://huggingface.co./datasets/allenai/WildChat-1M) (French subset; 26436 samples) |
|
* Hard-coded prompts concerning OpenLLM and Lucie (based on [allenai/tulu-3-hard-coded-10x](https://huggingface.co./datasets/allenai/tulu-3-hard-coded-10x)) |
|
* French: openllm_french.jsonl (24x10 samples) |
|
* English: openllm_english.jsonl (24x10 samples)""" |
|
|
|
join_us = """ |
|
## Join us: |
|
🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 |
|
[![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP) |
|
On 🤗Huggingface: [MultiTransformer](https://huggingface.co./MultiTransformer) |
|
On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute) |
|
🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗 |
|
""" |
|
|
|
|
|
model_id = "OpenLLM-France/Lucie-7B-Instruct" |
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
hf_token = os.getenv('READTOKEN') |
|
if not hf_token: |
|
raise ValueError("Please set the READTOKEN environment variable") |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
model_id, |
|
token=hf_token, |
|
trust_remote_code=True |
|
) |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_id, |
|
token=hf_token, |
|
device_map="auto", |
|
torch_dtype=torch.bfloat16, |
|
trust_remote_code=True |
|
) |
|
|
|
config_json = model.config.to_dict() |
|
|
|
def format_model_info(config): |
|
info = [] |
|
important_keys = [ |
|
"model_type", "vocab_size", "hidden_size", "num_attention_heads", |
|
"num_hidden_layers", "max_position_embeddings", "torch_dtype" |
|
] |
|
for key in important_keys: |
|
if key in config: |
|
value = config[key] |
|
|
|
if key == "torch_dtype" and hasattr(value, "name"): |
|
value = value.name |
|
info.append(f"**{key}:** {value}") |
|
return "\n".join(info) |
|
|
|
@spaces.GPU |
|
def generate_response(system_prompt, user_prompt, temperature, max_new_tokens, top_p, repetition_penalty, top_k): |
|
|
|
full_prompt = f"""<|system|>{system_prompt}</s> |
|
<|user|>{user_prompt}</s> |
|
<|assistant|>""" |
|
|
|
|
|
inputs = tokenizer(full_prompt, return_tensors="pt").to(device) |
|
|
|
|
|
outputs = model.generate( |
|
**inputs, |
|
max_new_tokens=max_new_tokens, |
|
temperature=temperature, |
|
top_p=top_p, |
|
top_k=top_k, |
|
repetition_penalty=repetition_penalty, |
|
do_sample=True, |
|
pad_token_id=tokenizer.eos_token_id |
|
) |
|
|
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
return response.split("<|assistant|>")[-1].strip() |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(Title) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown(description) |
|
with gr.Column(): |
|
gr.Markdown(training) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Group(): |
|
gr.Markdown("### Model Configuration") |
|
gr.Markdown(format_model_info(config_json)) |
|
|
|
with gr.Column(): |
|
with gr.Group(): |
|
gr.Markdown("### Tokenizer Configuration") |
|
gr.Markdown(f""" |
|
**Vocabulary Size:** {tokenizer.vocab_size} |
|
**Model Max Length:** {tokenizer.model_max_length} |
|
**Padding Token:** {tokenizer.pad_token} |
|
**EOS Token:** {tokenizer.eos_token} |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Group(): |
|
gr.Markdown(join_us) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
|
|
system_prompt = gr.Textbox( |
|
label="Message Système", |
|
value="Tu es Lucie, une assistante IA française serviable et amicale. Tu réponds toujours en français de manière précise et utile. Tu es honnête et si tu ne sais pas quelque chose, tu le dis simplement.", |
|
lines=3 |
|
) |
|
|
|
|
|
user_prompt = gr.Textbox( |
|
label="🗣️Votre message", |
|
placeholder="Entrez votre texte ici...", |
|
lines=5 |
|
) |
|
|
|
with gr.Accordion("🧪Paramètres avancés", open=False): |
|
temperature = gr.Slider( |
|
minimum=0.1, |
|
maximum=2.0, |
|
value=0.7, |
|
step=0.1, |
|
label="🌡️Temperature" |
|
) |
|
max_new_tokens = gr.Slider( |
|
minimum=1, |
|
maximum=2048, |
|
value=512, |
|
step=1, |
|
label="💶Longueur maximale" |
|
) |
|
top_p = gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.9, |
|
step=0.1, |
|
label="🏅Top-p" |
|
) |
|
top_k = gr.Slider( |
|
minimum=1, |
|
maximum=100, |
|
value=50, |
|
step=1, |
|
label="🏆Top-k" |
|
) |
|
repetition_penalty = gr.Slider( |
|
minimum=1.0, |
|
maximum=2.0, |
|
value=1.2, |
|
step=0.1, |
|
label="🦜Pénalité de répétition" |
|
) |
|
|
|
generate_btn = gr.Button("🌠Générer") |
|
|
|
with gr.Column(): |
|
|
|
output = gr.Textbox( |
|
label="🌠Lucie", |
|
lines=10 |
|
) |
|
|
|
|
|
gr.Examples( |
|
examples=[ |
|
|
|
[ |
|
"Tu es Lucie, une assistante IA française serviable et amicale.", |
|
"Bonjour! Comment vas-tu aujourd'hui?", |
|
0.7, |
|
512, |
|
0.9, |
|
1.2, |
|
50 |
|
], |
|
[ |
|
"Tu es une experte en intelligence artificielle.", |
|
"Peux-tu m'expliquer ce qu'est l'intelligence artificielle?", |
|
0.8, |
|
1024, |
|
0.95, |
|
1.1, |
|
40 |
|
], |
|
[ |
|
"Tu es une poétesse française.", |
|
"Écris un court poème sur Paris.", |
|
0.9, |
|
256, |
|
0.95, |
|
1.3, |
|
60 |
|
], |
|
[ |
|
"Tu es une experte en gastronomie française.", |
|
"Quels sont les plats traditionnels français les plus connus?", |
|
0.7, |
|
768, |
|
0.9, |
|
1.2, |
|
50 |
|
], |
|
[ |
|
"Tu es une historienne spécialisée dans l'histoire de France.", |
|
"Explique-moi l'histoire de la Révolution française en quelques phrases.", |
|
0.6, |
|
1024, |
|
0.85, |
|
1.1, |
|
30 |
|
] |
|
], |
|
inputs=[ |
|
system_prompt, |
|
user_prompt, |
|
temperature, |
|
max_new_tokens, |
|
top_p, |
|
repetition_penalty, |
|
top_k |
|
], |
|
outputs=output, |
|
label="Exemples" |
|
) |
|
|
|
|
|
generate_btn.click( |
|
fn=generate_response, |
|
inputs=[system_prompt, user_prompt, temperature, max_new_tokens, top_p, repetition_penalty, top_k], |
|
outputs=output |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch(ssr_mode=False) |