magpie / app.py
davanstrien's picture
davanstrien HF staff
Update app.py
296a9f2 verified
raw
history blame
3.09 kB
import gradio as gr
import transformers
import torch
import json
from transformers import AutoTokenizer
import os
from huggingface_hub import login
import spaces
HF_TOKEN = os.getenv("HF_TOKEN")
login(HF_TOKEN)
# Load the model
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, add_special_tokens=True)
pipeline = transformers.pipeline(
"text-generation",
model=model_id,
model_kwargs={"torch_dtype": torch.bfloat16},
device="cuda",
)
# Load the model configuration
with open("model_configs.json", "r") as f:
model_configs = json.load(f)
model_config = model_configs[model_id]
# Extract instruction
extract_input = model_config["extract_input"]
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]
@spaces.GPU
def generate_instruction():
instruction = pipeline(
extract_input,
max_new_tokens=2048,
eos_token_id=terminators,
do_sample=True,
temperature=1,
top_p=1,
)
return instruction[0]["generated_text"][len(extract_input) :].split("\n")[0]
def generate_response(response_template):
return pipeline(
response_template,
max_new_tokens=2048,
eos_token_id=terminators,
do_sample=True,
temperature=1,
top_p=1,
)
def generate_instruction_response():
sanitized_instruction = generate_instruction()
response_template = f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{sanitized_instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"""
user_message = sanitized_instruction
response = generate_response(response_template)
assistant_response = response[0]["generated_text"][len(response_template) :]
return user_message, assistant_response
title = "Magpie demo"
description = """
This Gradio demo allows you to explore the approach outlined in the Magpie paper. "Magpie is a data synthesis pipeline that generates high-quality alignment data. Magpie does not rely on prompt engineering or seed questions. Instead, it directly constructs instruction data by prompting aligned LLMs with a pre-query template for sampling instructions." Essentially, instead of prompting the model with a question or a starting query, this approach relies on the pre-query template of the model to generate instructions. Essentially, you are giving the model only the template up to the point where a user instruction would start, and then the model generates the instruction and the response.
In this demo, you can see how the model generates a user instruction and a model response.
You can learn more about the approach [in the paper](https://huggingface.co./papers/2406.08464).
"""
# Create the Gradio interface
iface = gr.Interface(
fn=generate_instruction_response,
inputs=[],
outputs=[
gr.Text(label="Generated User Instruction"),
gr.Text(label="Generated Model Response"),
],
title=title,
description=description,
)
# Launch the app
iface.launch(debug=True)