kajdun
/

iubaris-13b-v3_GPTQ

Text Generation

Inference Endpoints

4-bit precision

Model card Files Files and versions Community

kajdun commited on Aug 21, 2023

Commit

46814d3

·

1 Parent(s): b5a0ba1

Update handler.py

Files changed (1) hide show

handler.py +4 -4

handler.py CHANGED Viewed

@@ -14,10 +14,10 @@ class EndpointHandler():
                                         "bits": 4,
                                         "group_size": 128,
                                         "damp_percent": 0.01,
-                                        "desc_act": false,
-                                        "static_groups": false,
-                                        "sym": true,
-                                        "true_sequential": true
                                       })
         # load the optimized model
         model = AutoGPTQForCausalLM.from_quantized(path, device="cuda:0", quantize_config=quantize_config, use_safetensors=True) #file_name="model-quantized.onnx")

                                         "bits": 4,
                                         "group_size": 128,
                                         "damp_percent": 0.01,
+                                        "desc_act": False,
+                                        "static_groups": False,
+                                        "sym": True,
+                                        "true_sequential": True
                                       })
         # load the optimized model
         model = AutoGPTQForCausalLM.from_quantized(path, device="cuda:0", quantize_config=quantize_config, use_safetensors=True) #file_name="model-quantized.onnx")