DeepSeek-R1-32b-api

Running

App Files Files Community

ruslanmv commited on Jan 28

Commit

9122113

verified ·

1 Parent(s): 381d2e1

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -24

app.py CHANGED Viewed

@@ -1,34 +1,21 @@
 import gradio as gr
 import subprocess
-# Function to load a model using Hugging Face Spaces and enable GPU
-def load_model_with_gpu(model_name):
-    print(f"Attempting to load {model_name} with GPU enabled...")
     try:
-        # Use subprocess to run hf.space_info and get GPU setting
-        result = subprocess.run(
-            ["python", "-c", f"from huggingface_hub import space_info; print(space_info('{model_name}').hardware)"],
-            capture_output=True,
-            text=True,
-            check=True
-        )
-        hardware = result.stdout.strip()
-        print(f"Hardware for {model_name}: {hardware}")
         demo = gr.load(name=model_name, src="spaces")
-        # Return the loaded model demo
         print(f"Successfully loaded {model_name}")
         return demo
     except Exception as e:
         print(f"Error loading model {model_name}: {e}")
         return None
-# Load the models with GPU enabled (if available)
-deepseek_r1_distill = load_model_with_gpu("deepseek-ai/DeepSeek-R1-Distill-Qwen-32B")
-deepseek_r1 = load_model_with_gpu("deepseek-ai/DeepSeek-R1")
-deepseek_r1_zero = load_model_with_gpu("deepseek-ai/DeepSeek-R1-Zero")
 # --- Chatbot function ---
 def chatbot(input_text, history, model_choice, system_message, max_new_tokens, temperature, top_p):
@@ -46,16 +33,22 @@ def chatbot(input_text, history, model_choice, system_message, max_new_tokens, t
         default_response = "Model not selected or could not be loaded."
         history.append((input_text, default_response))
         return history, history, "", model_choice, system_message, max_new_tokens, temperature, top_p
-    # Adjust the call to the model, remove default_value if not applicable
-    model_output = model_demo(input_text, history, max_new_tokens, temperature, top_p, system_message)
     # Check if model_output is iterable and has expected number of elements
     if not isinstance(model_output, (list, tuple)) or len(model_output) < 2:
         error_message = "Model output does not have the expected format."
         history.append((input_text, error_message))
         return history, history, "", model_choice, system_message, max_new_tokens, temperature, top_p
     response = model_output[-1][1] if model_output[-1][1] else "Model did not return a response."
     history.append((input_text, response))
     return history, history, "", model_choice, system_message, max_new_tokens, temperature, top_p

 import gradio as gr
 import subprocess
+# Function to load a model using Hugging Face Spaces
+def load_model_from_space(model_name):
+    print(f"Attempting to load {model_name}...")
     try:
         demo = gr.load(name=model_name, src="spaces")
         print(f"Successfully loaded {model_name}")
         return demo
     except Exception as e:
         print(f"Error loading model {model_name}: {e}")
         return None
+# Load the models
+deepseek_r1_distill = load_model_from_space("deepseek-ai/DeepSeek-R1-Distill-Qwen-32B")
+deepseek_r1 = load_model_from_space("deepseek-ai/DeepSeek-R1")
+deepseek_r1_zero = load_model_from_space("deepseek-ai/DeepSeek-R1-Zero")
 # --- Chatbot function ---
 def chatbot(input_text, history, model_choice, system_message, max_new_tokens, temperature, top_p):
         default_response = "Model not selected or could not be loaded."
         history.append((input_text, default_response))
         return history, history, "", model_choice, system_message, max_new_tokens, temperature, top_p
+    # Call the model's 'predict' function.
+    try:
+      model_output = model_demo(input_text, history, max_new_tokens, temperature, top_p, system_message, fn_index=0)
+    except Exception as e:
+      print(f"An error occurred: {e}")
+      model_output= "An error occurred please check the model and try again."
+      history.append((input_text, model_output))
+      return history, history, "", model_choice, system_message, max_new_tokens, temperature, top_p
     # Check if model_output is iterable and has expected number of elements
     if not isinstance(model_output, (list, tuple)) or len(model_output) < 2:
         error_message = "Model output does not have the expected format."
         history.append((input_text, error_message))
         return history, history, "", model_choice, system_message, max_new_tokens, temperature, top_p
     response = model_output[-1][1] if model_output[-1][1] else "Model did not return a response."
     history.append((input_text, response))
     return history, history, "", model_choice, system_message, max_new_tokens, temperature, top_p