mattcracker's picture
Update app.py
f5262bf verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
# 获取环境变量中的 token
hf_token = os.getenv("HF_TOKEN")
# 加载模型和分词器
model_path = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"
tokenizer = AutoTokenizer.from_pretrained(
model_path,
token=hf_token
)
model = AutoModelForCausalLM.from_pretrained(
model_path,
token=hf_token,
device_map="auto",
load_in_4bit=True, # 启用4-bit量化加载
)
def generate_text(prompt, max_length=512, temperature=0.7, top_p=0.9):
# 准备输入
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# 生成回答
with torch.no_grad():
outputs = model.generate(
**inputs,
max_length=max_length,
temperature=temperature,
top_p=top_p,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
)
# 解码输出
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
# 创建Gradio界面
iface = gr.Interface(
fn=generate_text,
inputs=[
gr.Textbox(lines=5, label="输入提示"),
gr.Slider(minimum=64, maximum=1024, value=512, label="最大长度"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="温度"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p"),
],
outputs=gr.Textbox(lines=5, label="生成的文本"),
title="Llama-3.2-1B-Instruct 演示",
description="输入提示,获取AI生成的回答",
)
# 启动应用
iface.launch()