abdull4h commited on
Commit
ba3ca90
·
verified ·
1 Parent(s): 52b9a6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -15
app.py CHANGED
@@ -1,9 +1,8 @@
1
  import gradio as gr
2
  import os
3
  from huggingface_hub import login
4
- import spaces # Provided by the Spaces runtime
5
 
6
- # Define the model repository
7
  model_id = "CohereForAI/c4ai-command-r7b-arabic-02-2025"
8
 
9
  # Get your Hugging Face token from environment variables (ensure HF_TOKEN is set)
@@ -18,34 +17,34 @@ if hf_token:
18
  else:
19
  print("No HF_TOKEN found. Please set the HF_TOKEN environment variable.")
20
 
21
- # This function will be GPU-accelerated via ZeroGPU when using @spaces.GPU.
22
- @spaces.GPU
23
  def chat(prompt):
24
  try:
25
  import torch
26
  from transformers import AutoTokenizer, AutoModelForCausalLM
27
 
28
- print(f"Loading model and tokenizer for {model_id}...")
29
- # Load the tokenizer and model using the authentication token
30
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
 
 
31
  model = AutoModelForCausalLM.from_pretrained(
32
  model_id,
33
  use_auth_token=hf_token,
34
- torch_dtype=torch.float16,
35
- device_map="auto"
36
  )
37
  print("Model and tokenizer loaded successfully.")
38
 
39
- # Prepare the input using the model's chat template
40
  messages = [{"role": "user", "content": prompt}]
41
  input_ids = tokenizer.apply_chat_template(
42
  messages,
43
  tokenize=True,
44
  add_generation_prompt=True,
45
  return_tensors="pt"
46
- ).to(model.device)
47
 
48
- # Generate the response
49
  gen_tokens = model.generate(
50
  input_ids,
51
  max_new_tokens=100,
@@ -56,7 +55,7 @@ def chat(prompt):
56
  # Decode the generated tokens
57
  gen_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
58
 
59
- # Optionally remove the prompt/chat template from the generated text
60
  conversation = tokenizer.apply_chat_template(
61
  messages,
62
  tokenize=False,
@@ -72,13 +71,13 @@ def chat(prompt):
72
  traceback.print_exc()
73
  return f"Error: {str(e)}"
74
 
75
- # Create a simple Gradio interface for chat
76
  demo = gr.Interface(
77
  fn=chat,
78
  inputs=gr.Textbox(label="أدخل نص الدردشة", placeholder="مرحبا، كيف حالك؟", lines=3),
79
  outputs=gr.Textbox(label="النص المُوَلَّد"),
80
- title="Chat with CohereForAI/c4ai-command-r7b-arabic-02-2025",
81
- description="A simple chat interface using ZeroGPU and HF_TOKEN authentication."
82
  )
83
 
84
  demo.launch()
 
1
  import gradio as gr
2
  import os
3
  from huggingface_hub import login
 
4
 
5
+ # Define the model repository (gated model)
6
  model_id = "CohereForAI/c4ai-command-r7b-arabic-02-2025"
7
 
8
  # Get your Hugging Face token from environment variables (ensure HF_TOKEN is set)
 
17
  else:
18
  print("No HF_TOKEN found. Please set the HF_TOKEN environment variable.")
19
 
 
 
20
  def chat(prompt):
21
  try:
22
  import torch
23
  from transformers import AutoTokenizer, AutoModelForCausalLM
24
 
25
+ print(f"Loading model and tokenizer for {model_id} on CPU...")
26
+ # Load tokenizer using your authentication token
27
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
28
+
29
+ # Load model on CPU (use float32 for CPU)
30
  model = AutoModelForCausalLM.from_pretrained(
31
  model_id,
32
  use_auth_token=hf_token,
33
+ torch_dtype=torch.float32,
34
+ device_map="cpu"
35
  )
36
  print("Model and tokenizer loaded successfully.")
37
 
38
+ # Prepare the input using the chat template
39
  messages = [{"role": "user", "content": prompt}]
40
  input_ids = tokenizer.apply_chat_template(
41
  messages,
42
  tokenize=True,
43
  add_generation_prompt=True,
44
  return_tensors="pt"
45
+ ).to("cpu")
46
 
47
+ # Generate the response tokens
48
  gen_tokens = model.generate(
49
  input_ids,
50
  max_new_tokens=100,
 
55
  # Decode the generated tokens
56
  gen_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
57
 
58
+ # Optionally remove the prompt portion (chat template) from the generated text
59
  conversation = tokenizer.apply_chat_template(
60
  messages,
61
  tokenize=False,
 
71
  traceback.print_exc()
72
  return f"Error: {str(e)}"
73
 
74
+ # Create a simple Gradio interface for chatting with the model on CPU
75
  demo = gr.Interface(
76
  fn=chat,
77
  inputs=gr.Textbox(label="أدخل نص الدردشة", placeholder="مرحبا، كيف حالك؟", lines=3),
78
  outputs=gr.Textbox(label="النص المُوَلَّد"),
79
+ title="Chat with CohereForAI/c4ai-command-r7b-arabic-02-2025 (CPU Mode)",
80
+ description="A simple chat interface running on CPU with HF_TOKEN authentication."
81
  )
82
 
83
  demo.launch()