import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Chemin vers le dossier contenant les fichiers du modèle (.bin, tokenizer) MODEL_PATH = "TOOTLE/deepseek_finetuning_model_education" # Remplace par le chemin réel de ton modèle prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response. Your goal is to teach maths a beginner so make it friendly and accessible. Break down your chain of thoughts as for him/her to understand. ### Instruction: You are a maths expert with advanced knowledge in pedagogy, arithmetics, geometry, analysis, calculus. Please answer the following questions. ### Question: {} ### Response: """ def load_model(): tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) model = AutoModelForCausalLM.from_pretrained( MODEL_PATH, torch_dtype=torch.float16, # Spécifiez float16 pour économiser de la mémoire device_map="auto", offload_folder="offload" # Ajoutez un dossier pour le déchargement des poids ) return model, tokenizer def chatbot_response(prompt): inputs = tokenizer( prompt_style.format(prompt, ""), return_tensors="pt", truncation=True, max_length=512 ) print(inputs) outputs = model.generate( inputs["input_ids"], max_new_tokens=1024, temperature=0.7, do_sample=True ) print(outputs) response = tokenizer.decode(outputs[0], skip_special_tokens=True) print(response) reponse = response.split("### Response:")[-1] print(reponse) return reponse # Load model and tokenizer model, tokenizer = load_model() # Gradio interface with gr.Blocks() as demo: gr.Markdown("# 💬 Chat with Gemma Model") with gr.Row(): input_text = gr.Textbox( label="Ask your question:", placeholder="Example: explain relativity" ) output_text = gr.Textbox(label="Model response:") submit_button = gr.Button("Send") submit_button.click(chatbot_response, inputs=input_text, outputs=output_text) if __name__ == "__main__": demo.launch()