import gradio as gr
import os
from huggingface_hub import login
import spaces

# Model information
model_id = "CohereForAI/c4ai-command-r7b-arabic-02-2025"

# Try to authenticate explicitly using your HF_TOKEN from environment variables
hf_token = os.environ.get("HF_TOKEN")
if hf_token:
    print(f"Found HF_TOKEN in environment (length: {len(hf_token)})")
    try:
        login(token=hf_token)
        print("Logged in to Hugging Face with token")
    except Exception as e:
        print(f"Error logging in: {str(e)}")
else:
    print("No HF_TOKEN found in environment variables")

# This function will be GPU-accelerated when available via ZeroGPU
@spaces.GPU
def generate_text(prompt, temperature=0.3, max_length=100):
    try:
        import torch
        from transformers import AutoTokenizer, AutoModelForCausalLM
        
        print(f"Starting model loading process for {model_id}")
        
        # Load tokenizer (authentication is provided via token parameter)
        print("Loading tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained(
            model_id,
            token=hf_token
        )
        print("Tokenizer loaded successfully")
        
        # Load model (using low_cpu_mem_usage and offload_state_dict for large models)
        print("Loading model...")
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=torch.float16,
            device_map="auto",
            token=hf_token,
            low_cpu_mem_usage=True,
            offload_state_dict=True
        )
        print("Model loaded successfully")
        
        # Generate text based on prompt
        print(f"Generating text for prompt: {prompt[:30]}...")
        messages = [{"role": "user", "content": prompt}]
        input_ids = tokenizer.apply_chat_template(
            messages, 
            tokenize=True, 
            add_generation_prompt=True, 
            return_tensors="pt"
        ).to(model.device)
        
        # Generate the response tokens
        gen_tokens = model.generate(
            input_ids,
            max_new_tokens=max_length,
            do_sample=True,
            temperature=temperature,
        )
        
        # Decode the generated tokens
        gen_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
        
        # Optionally remove the prompt part from the generated text if it is included
        conversation = tokenizer.apply_chat_template(
            messages, 
            tokenize=False, 
            add_generation_prompt=True
        )
        if gen_text.startswith(conversation):
            gen_text = gen_text[len(conversation):].strip()
        
        return gen_text
    
    except Exception as e:
        import traceback
        traceback.print_exc()
        return f"Error: {str(e)}"

# Build the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown(f"# {model_id} Text Generation")
    
    # Display authentication status
    auth_status = "✅ Token found" if hf_token else "❌ No token found"
    gr.Markdown(f"**Auth Status:** {auth_status} | **Using LFS-optimized loading**")
    
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                label="أدخل النص هنا",
                placeholder="أدخل سؤالك أو موضوعك هنا...",
                lines=5
            )
            
            with gr.Row():
                submit_btn = gr.Button("إرسال", variant="primary")
                clear_btn = gr.Button("مسح")
            
            with gr.Accordion("إعدادات متقدمة", open=False):
                temperature_slider = gr.Slider(
                    minimum=0.1, 
                    maximum=1.0, 
                    value=0.3, 
                    step=0.1, 
                    label="درجة الحرارة"
                )
                max_length_slider = gr.Slider(
                    minimum=10, 
                    maximum=500, 
                    value=100, 
                    step=10, 
                    label="أقصى طول للنص"
                )
        
            gr.Markdown("### أمثلة")
            gr.Examples(
                examples=[
                    ["كيف أطبخ الكبسة؟"],
                    ["نظرية النسبية في الفيزياء"],
                    ["متوسط طول ليلة في الصيف"],
                    ["كيفية قيادة سيارة"],
                    ["ما هو مصدر الطاقة المتجددة؟"],
                ],
                inputs=input_text
            )
    
        with gr.Column():
            output_text = gr.Textbox(
                label="النص المُوَلَّد",
                lines=20
            )
    
    # Set up event handlers
    submit_btn.click(
        fn=generate_text,
        inputs=[input_text, temperature_slider, max_length_slider],
        outputs=output_text
    )
    
    clear_btn.click(
        fn=lambda: ("", ""),
        inputs=None,
        outputs=[input_text, output_text]
    )

demo.launch()