import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Model path - use the actual Hugging Face model ID or local path
MODEL_PATH = "TOOTLE/Gemma_instruct_model_gguf"  # or your local model path

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
You are a software engineering expert and your job is help your junior solve coding problems.

### Input:
{}

### Response:
"""

def load_model():
    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_PATH,
        torch_dtype=torch.float16,  # Spécifiez float16 pour économiser de la mémoire
        device_map="auto",
        offload_folder="offload"  # Ajoutez un dossier pour le déchargement des poids
    )
    return model, tokenizer


def chatbot_response(prompt):
    inputs = tokenizer(
        alpaca_prompt.format(prompt), 
        return_tensors="pt", 
        truncation=True, 
        max_length=512
    )
    print(inputs)
    outputs = model.generate(
        inputs["input_ids"],
        max_new_tokens=1024,
        temperature=0.7,
        do_sample=True
    )
    print(outputs)
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(response)
    reponse = response.split("### Response:")
    return reponse[-1]

# Load model and tokenizer
model, tokenizer = load_model()

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# 💬 Chat with Gemma Model")
    
    with gr.Row():
        input_text = gr.Textbox(
            label="Ask your question:",
            placeholder="Example: Code in python a function that perform the addition of two float numbers..."
        )
        output_text = gr.Textbox(label="Model response:")
    
    submit_button = gr.Button("Send")
    submit_button.click(chatbot_response, inputs=input_text, outputs=output_text)

if __name__ == "__main__":
    demo.launch()