File size: 1,023 Bytes
2631c60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
{
    "api_key": null,
    "verify_url": "http://johnrachwan.pythonanywhere.com",
    "smash_config": {
        "pruners": "None",
        "pruning_ratio": 0.0,
        "factorizers": "None",
        "quantizers": "['awq']",
        "weight_quantization_bits": 4,
        "output_deviation": 0.005,
        "compilers": "None",
        "static_batch": true,
        "static_shape": true,
        "controlnet": "None",
        "unet_dim": 4,
        "device": "cuda",
        "cache_dir": "/ceph/hdd/staff/charpent/.cache/models_27dp2il",
        "batch_size": 1,
        "model_name": "OpenLLM-Ro/RoLlama2-7b-Instruct",
        "task": "text_text_generation",
        "max_batch_size": 1,
        "qtype_weight": "torch.qint8",
        "qtype_activation": "torch.quint8",
        "qobserver": "<class 'torch.ao.quantization.observer.MinMaxObserver'>",
        "qscheme": "torch.per_tensor_symmetric",
        "qconfig": "x86",
        "group_size": 128,
        "damp_percent": 0.1,
        "save_load_fn": "hf-awq"
    }
}