import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Chemin vers le dossier contenant les fichiers du modèle (.bin, tokenizer)
MODEL_PATH = "TOOTLE/deepseek_finetuning_model_education"  # Remplace par le chemin réel de ton modèle


prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.
Your goal is to teach maths a beginner so make it friendly and accessible. Break down your chain of thoughts as for him/her to understand.

### Instruction:
You are a maths expert with advanced knowledge in pedagogy, arithmetics, geometry, analysis, calculus.
Please answer the following questions.

### Question:
{}

### Response:
<think>"""

def load_model():
    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_PATH,
        torch_dtype=torch.float16,  # Spécifiez float16 pour économiser de la mémoire
        device_map="auto",
        offload_folder="offload"  # Ajoutez un dossier pour le déchargement des poids
    )
    return model, tokenizer


def chatbot_response(prompt):
    inputs = tokenizer(
        prompt_style.format(prompt, ""), 
        return_tensors="pt", 
        truncation=True, 
        max_length=512
    )
    print(inputs)
    outputs = model.generate(
        inputs["input_ids"],
        max_new_tokens=1024,
        temperature=0.7,
        do_sample=True
    )
    print(outputs)
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(response)
    reponse = response.split("### Response:")[-1]
    print(reponse)
    return reponse

# Load model and tokenizer
model, tokenizer = load_model()

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# 💬 Chat with Gemma Model")
    
    with gr.Row():
        input_text = gr.Textbox(
            label="Ask your question:",
            placeholder="Example: explain relativity"
        )
        output_text = gr.Textbox(label="Model response:")
    
    submit_button = gr.Button("Send")
    submit_button.click(chatbot_response, inputs=input_text, outputs=output_text)

if __name__ == "__main__":
    demo.launch()