rephrase / app.py
rodrigomasini's picture
Update app.py
e6d4598
raw
history blame
762 Bytes
import gradio as gr
from transformers import AutoTokenizer, pipeline, logging
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
quantized_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
model_basename = "Jackson2-4bit-128g-GPTQ.safetensors"
use_strict = False
use_triton = False
tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, use_fast=True)
quantize_config = BaseQuantizeConfig(
bits=4,
group_size=128,
desc_act=False
)
model = AutoGPTQForCausalLM.from_quantized(quantized_model_dir,
use_safetensors=True,
strict=use_strict,
model_basename=model_basename,
device="cuda:0",
use_triton=use_triton,
quantize_config=quantize_config)
demo.launch()