Spaces:

abdull4h
/

C4AI-Arabic-Playground

Running on Zero

App Files Files Community

abdull4h commited on about 9 hours ago

Commit

ba3ca90

verified ·

1 Parent(s): 52b9a6b

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -15

app.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import gradio as gr
 import os
 from huggingface_hub import login
-import spaces  # Provided by the Spaces runtime
-# Define the model repository
 model_id = "CohereForAI/c4ai-command-r7b-arabic-02-2025"
 # Get your Hugging Face token from environment variables (ensure HF_TOKEN is set)
@@ -18,34 +17,34 @@ if hf_token:
 else:
     print("No HF_TOKEN found. Please set the HF_TOKEN environment variable.")
-# This function will be GPU-accelerated via ZeroGPU when using @spaces.GPU.
-@spaces.GPU
 def chat(prompt):
     try:
         import torch
         from transformers import AutoTokenizer, AutoModelForCausalLM
-        print(f"Loading model and tokenizer for {model_id}...")
-        # Load the tokenizer and model using the authentication token
         tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
             use_auth_token=hf_token,
-            torch_dtype=torch.float16,
-            device_map="auto"
         )
         print("Model and tokenizer loaded successfully.")
-        # Prepare the input using the model's chat template
         messages = [{"role": "user", "content": prompt}]
         input_ids = tokenizer.apply_chat_template(
             messages,
             tokenize=True,
             add_generation_prompt=True,
             return_tensors="pt"
-        ).to(model.device)
-        # Generate the response
         gen_tokens = model.generate(
             input_ids,
             max_new_tokens=100,
@@ -56,7 +55,7 @@ def chat(prompt):
         # Decode the generated tokens
         gen_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
-        # Optionally remove the prompt/chat template from the generated text
         conversation = tokenizer.apply_chat_template(
             messages,
             tokenize=False,
@@ -72,13 +71,13 @@ def chat(prompt):
         traceback.print_exc()
         return f"Error: {str(e)}"
-# Create a simple Gradio interface for chat
 demo = gr.Interface(
     fn=chat,
     inputs=gr.Textbox(label="أدخل نص الدردشة", placeholder="مرحبا، كيف حالك؟", lines=3),
     outputs=gr.Textbox(label="النص المُوَلَّد"),
-    title="Chat with CohereForAI/c4ai-command-r7b-arabic-02-2025",
-    description="A simple chat interface using ZeroGPU and HF_TOKEN authentication."
 )
 demo.launch()

 import gradio as gr
 import os
 from huggingface_hub import login
+# Define the model repository (gated model)
 model_id = "CohereForAI/c4ai-command-r7b-arabic-02-2025"
 # Get your Hugging Face token from environment variables (ensure HF_TOKEN is set)
 else:
     print("No HF_TOKEN found. Please set the HF_TOKEN environment variable.")
 def chat(prompt):
     try:
         import torch
         from transformers import AutoTokenizer, AutoModelForCausalLM
+        print(f"Loading model and tokenizer for {model_id} on CPU...")
+        # Load tokenizer using your authentication token
         tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
+        # Load model on CPU (use float32 for CPU)
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
             use_auth_token=hf_token,
+            torch_dtype=torch.float32,
+            device_map="cpu"
         )
         print("Model and tokenizer loaded successfully.")
+        # Prepare the input using the chat template
         messages = [{"role": "user", "content": prompt}]
         input_ids = tokenizer.apply_chat_template(
             messages,
             tokenize=True,
             add_generation_prompt=True,
             return_tensors="pt"
+        ).to("cpu")
+        # Generate the response tokens
         gen_tokens = model.generate(
             input_ids,
             max_new_tokens=100,
         # Decode the generated tokens
         gen_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
+        # Optionally remove the prompt portion (chat template) from the generated text
         conversation = tokenizer.apply_chat_template(
             messages,
             tokenize=False,
         traceback.print_exc()
         return f"Error: {str(e)}"
+# Create a simple Gradio interface for chatting with the model on CPU
 demo = gr.Interface(
     fn=chat,
     inputs=gr.Textbox(label="أدخل نص الدردشة", placeholder="مرحبا، كيف حالك؟", lines=3),
     outputs=gr.Textbox(label="النص المُوَلَّد"),
+    title="Chat with CohereForAI/c4ai-command-r7b-arabic-02-2025 (CPU Mode)",
+    description="A simple chat interface running on CPU with HF_TOKEN authentication."
 )
 demo.launch()