yangwang92 commited on
Commit
cc040f7
1 Parent(s): e772d15

add 70b models

Browse files
Files changed (1) hide show
  1. app.py +22 -2
app.py CHANGED
@@ -12,14 +12,34 @@ from huggingface_hub import snapshot_download
12
  from vptq.app_utils import get_chat_loop_generator
13
 
14
  models = [
 
 
 
 
 
 
 
 
 
 
 
 
15
  {
16
  "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-65536-woft",
17
  "bits": "4 bits"
18
  },
19
  {
20
- "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-256-woft",
 
 
 
 
21
  "bits": "3 bits"
22
  },
 
 
 
 
23
  ]
24
 
25
  # Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)
@@ -153,7 +173,7 @@ download_thread.start()
153
 
154
  loaded_models = {}
155
 
156
- @spaces.GPU
157
  def respond(
158
  message,
159
  history: list[tuple[str, str]],
 
12
  from vptq.app_utils import get_chat_loop_generator
13
 
14
  models = [
15
+ {
16
+ "name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v16-k65536-65536-woft",
17
+ "bits": "2 bits"
18
+ },
19
+ {
20
+ "name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k65536-256-woft",
21
+ "bits": "3 bits"
22
+ },
23
+ {
24
+ "name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k65536-65536-woft",
25
+ "bits": "4 bits"
26
+ },
27
  {
28
  "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-65536-woft",
29
  "bits": "4 bits"
30
  },
31
  {
32
+ "name": "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-65536-woft",
33
+ "bits": "4 bits"
34
+ },
35
+ {
36
+ "name": "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-256-woft",
37
  "bits": "3 bits"
38
  },
39
+ {
40
+ "name": "VPTQ-community/Qwen2.5-72B-Instruct-v16-k65536-65536-woft",
41
+ "bits": "2 bits"
42
+ },
43
  ]
44
 
45
  # Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)
 
173
 
174
  loaded_models = {}
175
 
176
+ @spaces.GPU(duration=120)
177
  def respond(
178
  message,
179
  history: list[tuple[str, str]],