Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
from huggingface_hub import login
|
4 |
-
import spaces # Provided by the Spaces runtime
|
5 |
|
6 |
-
# Define the model repository
|
7 |
model_id = "CohereForAI/c4ai-command-r7b-arabic-02-2025"
|
8 |
|
9 |
# Get your Hugging Face token from environment variables (ensure HF_TOKEN is set)
|
@@ -18,34 +17,34 @@ if hf_token:
|
|
18 |
else:
|
19 |
print("No HF_TOKEN found. Please set the HF_TOKEN environment variable.")
|
20 |
|
21 |
-
# This function will be GPU-accelerated via ZeroGPU when using @spaces.GPU.
|
22 |
-
@spaces.GPU
|
23 |
def chat(prompt):
|
24 |
try:
|
25 |
import torch
|
26 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
27 |
|
28 |
-
print(f"Loading model and tokenizer for {model_id}...")
|
29 |
-
# Load
|
30 |
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
|
|
|
|
|
31 |
model = AutoModelForCausalLM.from_pretrained(
|
32 |
model_id,
|
33 |
use_auth_token=hf_token,
|
34 |
-
torch_dtype=torch.
|
35 |
-
device_map="
|
36 |
)
|
37 |
print("Model and tokenizer loaded successfully.")
|
38 |
|
39 |
-
# Prepare the input using the
|
40 |
messages = [{"role": "user", "content": prompt}]
|
41 |
input_ids = tokenizer.apply_chat_template(
|
42 |
messages,
|
43 |
tokenize=True,
|
44 |
add_generation_prompt=True,
|
45 |
return_tensors="pt"
|
46 |
-
).to(
|
47 |
|
48 |
-
# Generate the response
|
49 |
gen_tokens = model.generate(
|
50 |
input_ids,
|
51 |
max_new_tokens=100,
|
@@ -56,7 +55,7 @@ def chat(prompt):
|
|
56 |
# Decode the generated tokens
|
57 |
gen_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
|
58 |
|
59 |
-
# Optionally remove the prompt
|
60 |
conversation = tokenizer.apply_chat_template(
|
61 |
messages,
|
62 |
tokenize=False,
|
@@ -72,13 +71,13 @@ def chat(prompt):
|
|
72 |
traceback.print_exc()
|
73 |
return f"Error: {str(e)}"
|
74 |
|
75 |
-
# Create a simple Gradio interface for
|
76 |
demo = gr.Interface(
|
77 |
fn=chat,
|
78 |
inputs=gr.Textbox(label="أدخل نص الدردشة", placeholder="مرحبا، كيف حالك؟", lines=3),
|
79 |
outputs=gr.Textbox(label="النص المُوَلَّد"),
|
80 |
-
title="Chat with CohereForAI/c4ai-command-r7b-arabic-02-2025",
|
81 |
-
description="A simple chat interface
|
82 |
)
|
83 |
|
84 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
from huggingface_hub import login
|
|
|
4 |
|
5 |
+
# Define the model repository (gated model)
|
6 |
model_id = "CohereForAI/c4ai-command-r7b-arabic-02-2025"
|
7 |
|
8 |
# Get your Hugging Face token from environment variables (ensure HF_TOKEN is set)
|
|
|
17 |
else:
|
18 |
print("No HF_TOKEN found. Please set the HF_TOKEN environment variable.")
|
19 |
|
|
|
|
|
20 |
def chat(prompt):
|
21 |
try:
|
22 |
import torch
|
23 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
24 |
|
25 |
+
print(f"Loading model and tokenizer for {model_id} on CPU...")
|
26 |
+
# Load tokenizer using your authentication token
|
27 |
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
|
28 |
+
|
29 |
+
# Load model on CPU (use float32 for CPU)
|
30 |
model = AutoModelForCausalLM.from_pretrained(
|
31 |
model_id,
|
32 |
use_auth_token=hf_token,
|
33 |
+
torch_dtype=torch.float32,
|
34 |
+
device_map="cpu"
|
35 |
)
|
36 |
print("Model and tokenizer loaded successfully.")
|
37 |
|
38 |
+
# Prepare the input using the chat template
|
39 |
messages = [{"role": "user", "content": prompt}]
|
40 |
input_ids = tokenizer.apply_chat_template(
|
41 |
messages,
|
42 |
tokenize=True,
|
43 |
add_generation_prompt=True,
|
44 |
return_tensors="pt"
|
45 |
+
).to("cpu")
|
46 |
|
47 |
+
# Generate the response tokens
|
48 |
gen_tokens = model.generate(
|
49 |
input_ids,
|
50 |
max_new_tokens=100,
|
|
|
55 |
# Decode the generated tokens
|
56 |
gen_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
|
57 |
|
58 |
+
# Optionally remove the prompt portion (chat template) from the generated text
|
59 |
conversation = tokenizer.apply_chat_template(
|
60 |
messages,
|
61 |
tokenize=False,
|
|
|
71 |
traceback.print_exc()
|
72 |
return f"Error: {str(e)}"
|
73 |
|
74 |
+
# Create a simple Gradio interface for chatting with the model on CPU
|
75 |
demo = gr.Interface(
|
76 |
fn=chat,
|
77 |
inputs=gr.Textbox(label="أدخل نص الدردشة", placeholder="مرحبا، كيف حالك؟", lines=3),
|
78 |
outputs=gr.Textbox(label="النص المُوَلَّد"),
|
79 |
+
title="Chat with CohereForAI/c4ai-command-r7b-arabic-02-2025 (CPU Mode)",
|
80 |
+
description="A simple chat interface running on CPU with HF_TOKEN authentication."
|
81 |
)
|
82 |
|
83 |
demo.launch()
|