import gradio as gr import os from huggingface_hub import login import spaces # Model information model_id = "CohereForAI/c4ai-command-r7b-arabic-02-2025" # Try to authenticate explicitly using your HF_TOKEN from environment variables hf_token = os.environ.get("HF_TOKEN") if hf_token: print(f"Found HF_TOKEN in environment (length: {len(hf_token)})") try: login(token=hf_token) print("Logged in to Hugging Face with token") except Exception as e: print(f"Error logging in: {str(e)}") else: print("No HF_TOKEN found in environment variables") # This function will be GPU-accelerated when available via ZeroGPU @spaces.GPU def generate_text(prompt, temperature=0.3, max_length=100): try: import torch from transformers import AutoTokenizer, AutoModelForCausalLM print(f"Starting model loading process for {model_id}") # Load tokenizer (authentication is provided via token parameter) print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained( model_id, token=hf_token ) print("Tokenizer loaded successfully") # Load model (using low_cpu_mem_usage and offload_state_dict for large models) print("Loading model...") model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.float16, device_map="auto", token=hf_token, low_cpu_mem_usage=True, offload_state_dict=True ) print("Model loaded successfully") # Generate text based on prompt print(f"Generating text for prompt: {prompt[:30]}...") messages = [{"role": "user", "content": prompt}] input_ids = tokenizer.apply_chat_template( messages, tokenize=True, add_generation_prompt=True, return_tensors="pt" ).to(model.device) # Generate the response tokens gen_tokens = model.generate( input_ids, max_new_tokens=max_length, do_sample=True, temperature=temperature, ) # Decode the generated tokens gen_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True) # Optionally remove the prompt part from the generated text if it is included conversation = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) if gen_text.startswith(conversation): gen_text = gen_text[len(conversation):].strip() return gen_text except Exception as e: import traceback traceback.print_exc() return f"Error: {str(e)}" # Build the Gradio interface with gr.Blocks() as demo: gr.Markdown(f"# {model_id} Text Generation") # Display authentication status auth_status = "✅ Token found" if hf_token else "❌ No token found" gr.Markdown(f"**Auth Status:** {auth_status} | **Using LFS-optimized loading**") with gr.Row(): with gr.Column(): input_text = gr.Textbox( label="أدخل النص هنا", placeholder="أدخل سؤالك أو موضوعك هنا...", lines=5 ) with gr.Row(): submit_btn = gr.Button("إرسال", variant="primary") clear_btn = gr.Button("مسح") with gr.Accordion("إعدادات متقدمة", open=False): temperature_slider = gr.Slider( minimum=0.1, maximum=1.0, value=0.3, step=0.1, label="درجة الحرارة" ) max_length_slider = gr.Slider( minimum=10, maximum=500, value=100, step=10, label="أقصى طول للنص" ) gr.Markdown("### أمثلة") gr.Examples( examples=[ ["كيف أطبخ الكبسة؟"], ["نظرية النسبية في الفيزياء"], ["متوسط طول ليلة في الصيف"], ["كيفية قيادة سيارة"], ["ما هو مصدر الطاقة المتجددة؟"], ], inputs=input_text ) with gr.Column(): output_text = gr.Textbox( label="النص المُوَلَّد", lines=20 ) # Set up event handlers submit_btn.click( fn=generate_text, inputs=[input_text, temperature_slider, max_length_slider], outputs=output_text ) clear_btn.click( fn=lambda: ("", ""), inputs=None, outputs=[input_text, output_text] ) demo.launch()