Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
# Replace with your model name | |
#MODEL_NAME = "ssirikon/Gemma7b-bnb-Unsloth" | |
#MODEL_NAME = "unsloth/gemma-7b-bnb-4bit" | |
MODEL_NAME = "unsloth/mistral-7b-bnb-4bit" | |
# Load the model and tokenizer | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_NAME, | |
device_map="auto", | |
torch_dtype=torch.float16, | |
load_in_4bit=True, # Load the model in 4-bit precision | |
# Removed the unsupported argument | |
) | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
# **Change 1: Set `llm_int8_skip_modules` to avoid deep copy** | |
#model.quantization_config.llm_int8_skip_modules = ['lm_head'] | |
# Create a pipeline for text generation | |
generator = pipeline( | |
task="summarization", | |
model=model, | |
tokenizer=tokenizer, | |
max_new_tokens=50, # Adjust as needed | |
do_sample=True, | |
top_k=10, | |
num_return_sequences=1, | |
eos_token_id=tokenizer.eos_token_id, | |
) | |
def generate_text(email): | |
result = generator("Generate a subject line for the following email.\n"+email) | |
return result[0]["generated_text"] | |
# Create a Gradio interface | |
demo = gr.Interface( | |
fn=generate_text, | |
inputs=gr.Textbox(lines=5, label="Enter your Email here:"), | |
outputs=gr.Textbox(label="Generated Subject"), | |
title="Email Subject Generation demo", | |
description="Enter an email and let the model generate the subject for you!", | |
) | |
demo.launch(debug=True) |