Sadmanteemi commited on
Commit
57b0e45
·
verified ·
1 Parent(s): ebc169c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -29
app.py CHANGED
@@ -33,17 +33,14 @@ As a derivate work of [Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/
33
  this demo is governed by the original [license](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE).
34
  """
35
 
36
- # if not torch.cuda.is_available():
37
- # DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
38
-
39
-
40
  if torch.cuda.is_available() or os.getenv("ZERO_GPU_SUPPORT", False):
41
  model_id = "chuanli11/Llama-3.2-3B-Instruct-uncensored"
42
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
43
  tokenizer = AutoTokenizer.from_pretrained(model_id)
44
  else:
45
- raise RuntimeError("No compatible GPU environment found for this model.")
46
-
 
47
 
48
  @spaces.GPU
49
  def generate(
@@ -102,27 +99,6 @@ chat_interface = gr.ChatInterface(
102
  step=0.1,
103
  value=0.6,
104
  ),
105
- # gr.Slider(
106
- # label="Top-p (nucleus sampling)",
107
- # minimum=0.05,
108
- # maximum=1.0,
109
- # step=0.05,
110
- # value=0.9,
111
- # ),
112
- # gr.Slider(
113
- # label="Top-k",
114
- # minimum=1,
115
- # maximum=1000,
116
- # step=1,
117
- # value=50,
118
- # ),
119
- # gr.Slider(
120
- # label="Repetition penalty",
121
- # minimum=1.0,
122
- # maximum=2.0,
123
- # step=0.05,
124
- # value=1.2,
125
- # ),
126
  ],
127
  stop_btn=None,
128
  examples=[
@@ -133,9 +109,8 @@ chat_interface = gr.ChatInterface(
133
 
134
  with gr.Blocks(css="style.css", fill_height=True) as demo:
135
  gr.Markdown(DESCRIPTION)
136
- # gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
137
  chat_interface.render()
138
  gr.Markdown(LICENSE)
139
 
140
  if __name__ == "__main__":
141
- demo.queue(max_size=20).launch()
 
33
  this demo is governed by the original [license](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE).
34
  """
35
 
 
 
 
 
36
  if torch.cuda.is_available() or os.getenv("ZERO_GPU_SUPPORT", False):
37
  model_id = "chuanli11/Llama-3.2-3B-Instruct-uncensored"
38
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
39
  tokenizer = AutoTokenizer.from_pretrained(model_id)
40
  else:
41
+ model_id = "chuanli11/Llama-3.2-3B-Instruct-uncensored"
42
+ model = AutoModelForCausalLM.from_pretrained(model_id)
43
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
44
 
45
  @spaces.GPU
46
  def generate(
 
99
  step=0.1,
100
  value=0.6,
101
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  ],
103
  stop_btn=None,
104
  examples=[
 
109
 
110
  with gr.Blocks(css="style.css", fill_height=True) as demo:
111
  gr.Markdown(DESCRIPTION)
 
112
  chat_interface.render()
113
  gr.Markdown(LICENSE)
114
 
115
  if __name__ == "__main__":
116
+ demo.queue(max_size=20).launch()