OpenSourceRonin commited on
Commit
d1789cc
1 Parent(s): a005089

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -12,18 +12,22 @@ from huggingface_hub import snapshot_download
12
  from vptq.app_utils import get_chat_loop_generator
13
 
14
  models = [
 
 
 
 
15
  {
16
  "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-256-woft",
17
  "bits": "3 bits"
18
  },
19
  {
20
- "name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v16-k65536-65536-woft",
21
- "bits": "2 bits"
22
  },
23
  {
24
- "name": "VPTQ-community/Qwen2.5-72B-Instruct-v16-k65536-65536-woft",
25
- "bits": "2 bits"
26
- }
27
  ]
28
 
29
  # Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)
 
12
  from vptq.app_utils import get_chat_loop_generator
13
 
14
  models = [
15
+ {
16
+ "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v12-k65536-4096-woft",
17
+ "bits": "2.3 bits"
18
+ },
19
  {
20
  "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-256-woft",
21
  "bits": "3 bits"
22
  },
23
  {
24
+ "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-4096-woft",
25
+ "bits": "3.5 bits"
26
  },
27
  {
28
+ "name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k32768-0-woft",
29
+ "bits": "1.85 bits"
30
+ },
31
  ]
32
 
33
  # Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)