Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
|
11 |
|
12 |
DESCRIPTION = '''
|
13 |
<div>
|
14 |
-
<h1 style="text-align: center;">deepseek-ai/DeepSeek-R1-Distill-Llama-
|
15 |
</div>
|
16 |
'''
|
17 |
|
@@ -23,7 +23,7 @@ LICENSE = """
|
|
23 |
|
24 |
PLACEHOLDER = """
|
25 |
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
|
26 |
-
<h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">DeepSeek-R1-Distill-Llama-
|
27 |
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Ask me anything...</p>
|
28 |
</div>
|
29 |
"""
|
@@ -44,8 +44,8 @@ h1 {
|
|
44 |
"""
|
45 |
|
46 |
# Load the tokenizer and model
|
47 |
-
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Llama-
|
48 |
-
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Llama-
|
49 |
terminators = [
|
50 |
tokenizer.eos_token_id,
|
51 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
@@ -58,7 +58,7 @@ def chat_llama3_8b(message: str,
|
|
58 |
max_new_tokens: int
|
59 |
) -> str:
|
60 |
"""
|
61 |
-
Generate a streaming response using the llama3-
|
62 |
Args:
|
63 |
message (str): The input message.
|
64 |
history (list): The conversation history used by ChatInterface.
|
|
|
11 |
|
12 |
DESCRIPTION = '''
|
13 |
<div>
|
14 |
+
<h1 style="text-align: center;">deepseek-ai/DeepSeek-R1-Distill-Llama-32B</h1>
|
15 |
</div>
|
16 |
'''
|
17 |
|
|
|
23 |
|
24 |
PLACEHOLDER = """
|
25 |
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
|
26 |
+
<h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">DeepSeek-R1-Distill-Llama-32B</h1>
|
27 |
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Ask me anything...</p>
|
28 |
</div>
|
29 |
"""
|
|
|
44 |
"""
|
45 |
|
46 |
# Load the tokenizer and model
|
47 |
+
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Llama-32B")
|
48 |
+
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Llama-32B", device_map="auto") # to("cuda:0")
|
49 |
terminators = [
|
50 |
tokenizer.eos_token_id,
|
51 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
|
|
58 |
max_new_tokens: int
|
59 |
) -> str:
|
60 |
"""
|
61 |
+
Generate a streaming response using the llama3-32B model.
|
62 |
Args:
|
63 |
message (str): The input message.
|
64 |
history (list): The conversation history used by ChatInterface.
|