Spaces:

rodrigomasini
/

rephrase

Paused

rephrase / app_v2.py

Update app_v2.py

136e5a5 12 months ago

1.4 kB

	import streamlit as st
	from transformers import AutoTokenizer
	from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
	from huggingface_hub import snapshot_download

	cwd = os.getcwd()
	cachedir = cwd + '/cache'

	local_folder = cachedir + "/model"

	# Check if the directory exists before creating it
	if not os.path.exists(cachedir):
	os.mkdir(cachedir)

	# Define pretrained and quantized model directories
	pretrained_quantized_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
	quantized_model_dir = "opt-125m-4bit"

	quantized_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"

	# Check if the model has already been downloaded
	model_path = os.path.join(local_folder, 'pytorch_model.bin')
	if not os.path.isfile(model_path):
	snapshot_download(repo_id=quantized_model_dir, local_dir=local_folder, local_dir_use_symlinks=True)

	model_basename = cachedir + "/model/Jackson2-4bit-128g-GPTQ"

	use_strict = False
	use_triton = False

	# Load tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained(local_folder, use_fast=True)

	quantize_config = BaseQuantizeConfig(
	bits=4,
	group_size=128,
	desc_act=False
	)

	model = AutoGPTQForCausalLM.from_quantized(
	local_folder,
	use_safetensors=True,
	strict=use_strict,
	model_basename=model_basename,
	device="cuda:0",
	use_triton=use_triton,
	quantize_config=quantize_config
	)

	st.write(model.hf_device_map)