OpenSourceRonin commited on
Commit
0ec2418
1 Parent(s): 9da61be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -11
app.py CHANGED
@@ -29,21 +29,13 @@ models = [
29
  "bits": "3 bits"
30
  },
31
  {
32
- "name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k65536-65536-woft",
33
- "bits": "4 bits"
34
- },
35
- {
36
- "name": "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-65536-woft",
37
- "bits": "4 bits"
38
  },
39
  {
40
  "name": "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-256-woft",
41
  "bits": "3 bits"
42
  },
43
- {
44
- "name": "VPTQ-community/Qwen2.5-72B-Instruct-v16-k65536-65536-woft",
45
- "bits": "2 bits"
46
- },
47
  ]
48
 
49
  # Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)
@@ -177,7 +169,7 @@ download_thread.start()
177
 
178
  loaded_models = {}
179
 
180
- @spaces.GPU(duration=120)
181
  def respond(
182
  message,
183
  history: list[tuple[str, str]],
 
29
  "bits": "3 bits"
30
  },
31
  {
32
+ "name": "VPTQ-community/Qwen2.5-72B-Instruct-v16-k65536-65536-woft",
33
+ "bits": "2 bits"
 
 
 
 
34
  },
35
  {
36
  "name": "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-256-woft",
37
  "bits": "3 bits"
38
  },
 
 
 
 
39
  ]
40
 
41
  # Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)
 
169
 
170
  loaded_models = {}
171
 
172
+ @spaces.GPU
173
  def respond(
174
  message,
175
  history: list[tuple[str, str]],