Spaces:

rodrigomasini
/

rephrase

Paused

App Files Files Community

rephrase / app_v1.py

rodrigomasini

Update app_v1.py

22bcba4 12 months ago

raw

history blame

1.91 kB

	import streamlit as st
	from transformers import AutoTokenizer
	from auto_gptq import AutoGPTQForCausalLM
	from huggingface_hub import snapshot_download

	import os
	import threading

	cwd = os.getcwd()
	cachedir = cwd+'/cache'

	# Check if the directory exists before creating it
	if not os.path.exists(cachedir):
	os.mkdir(cachedir)

	os.environ['HF_HOME'] = cachedir

	local_folder = cachedir + "/model"


	quantized_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"

	snapshot_download(repo_id=quantized_model_dir, local_dir=local_folder, local_dir_use_symlinks=True)

	model_basename = cachedir + "/model/Jackson2-4bit-128g-GPTQ"

	class QuantizedModel:
	def __init__(self, model_dir):
	self.tokenizer = AutoTokenizer.from_pretrained(model_dir, use_fast=False)
	self.model = AutoGPTQForCausalLM.from_quantized(
	model_dir,
	use_safetensors=True,
	strict=False,
	device="cuda:0",
	use_triton=False
	)

	def generate(self, prompt, max_new_tokens=512, temperature=0.1, top_p=0.95, repetition_penalty=1.15):
	inputs = self.tokenizer(prompt, return_tensors="pt")
	outputs = self.model.generate(
	input_ids=inputs['input_ids'].to("cuda:0"),
	attention_mask=inputs['attention_mask'].to("cuda:0"),
	max_length=max_new_tokens + inputs['input_ids'].size(-1),
	temperature=temperature,
	top_p=top_p,
	repetition_penalty=repetition_penalty
	)
	return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

	quantized_model = QuantizedModel(local_folder)

	user_input = st.text_input("Input a phrase")

	prompt_template = f'USER: {user_input}\nASSISTANT:'

	# Generate output when the "Generate" button is pressed
	if st.button("Generate the prompt"):
	output = quantized_model.generate(prompt_template)
	st.text_area("Prompt", value=output)