legaltextai commited on
Commit
609a014
Β·
verified Β·
1 Parent(s): ec7fc4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -1,17 +1,23 @@
1
  import gradio as gr
2
  import spaces
3
- from transformers import pipeline
4
  import torch
 
 
 
 
 
 
 
 
5
 
6
- # Initialize model with explicit dtype
7
  model = pipeline(
8
  "text-generation",
9
  model="unsloth/DeepSeek-R1-Distill-Llama-8B",
10
- device_map="auto",
11
- torch_dtype=torch.float16, # Now recognizes 'torch'
12
- model_kwargs={"load_in_8bit": True}
13
  )
14
 
 
15
  @spaces.GPU(duration=300) # Increased to 5 minutes
16
  def chat_response(message, history):
17
  # Add explicit initialization check
 
1
  import gradio as gr
2
  import spaces
 
3
  import torch
4
+ from transformers import BitsAndBytesConfig, pipeline
5
+
6
+ quant_config = BitsAndBytesConfig(
7
+ load_in_4bit=True,
8
+ bnb_4bit_quant_type="nf4",
9
+ bnb_4bit_compute_dtype=torch.float16,
10
+ bnb_4bit_use_double_quant=True
11
+ )
12
 
 
13
  model = pipeline(
14
  "text-generation",
15
  model="unsloth/DeepSeek-R1-Distill-Llama-8B",
16
+ quantization_config=quant_config,
17
+ device_map="auto"
 
18
  )
19
 
20
+
21
  @spaces.GPU(duration=300) # Increased to 5 minutes
22
  def chat_response(message, history):
23
  # Add explicit initialization check