DeepSeek-R1-Distill-Qwen-32B-bnb-4bit

Runtime error

Aratako commited on 20 days ago

Commit

17ae551

verified ·

1 Parent(s): bd4a583

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -45,9 +45,7 @@ h1 {
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-32B")
-tokenizer.add_special_tokens({
-    'additional_special_tokens': ['<think>', '</think>']
-})
 model = AutoModelForCausalLM.from_pretrained("unsloth/DeepSeek-R1-Distill-Qwen-32B-bnb-4bit", device_map="auto")  # to("cuda:0")
 terminators = [
     tokenizer.eos_token_id,
@@ -79,7 +77,7 @@ def chat_llama3_8b(message: str,
     print(tokenizer.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False))
     print(input_ids)
-    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         input_ids= input_ids,

 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-32B")
 model = AutoModelForCausalLM.from_pretrained("unsloth/DeepSeek-R1-Distill-Qwen-32B-bnb-4bit", device_map="auto")  # to("cuda:0")
 terminators = [
     tokenizer.eos_token_id,
     print(tokenizer.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False))
     print(input_ids)
+    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=False)
     generate_kwargs = dict(
         input_ids= input_ids,