import torch | |
import transformers | |
from transformers import AutoTokenizer, LlamaForCausalLM | |
def generate_text(prompt, model, tokenizer): | |
text_generator = transformers.pipeline( | |
"text-generation", | |
model=model, | |
torch_dtype=torch.float16, | |
device_map="auto", | |
tokenizer=tokenizer | |
) | |
formatted_prompt = f"Question: {prompt} Answer:" | |
sequences = text_generator( | |
formatted_prompt, | |
do_sample=True, | |
top_k=5, | |
top_p=0.9, | |
num_return_sequences=1, | |
repetition_penalty=1.5, | |
max_new_tokens=128, | |
) | |
for seq in sequences: | |
print(f"Result: {seq['generated_text']}") | |
# use the same tokenizer as TinyLlama | |
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-step-50K-105b") | |
# load model from huggingface | |
# question from https://www.reddit.com/r/LocalLLaMA/comments/13zz8y5/what_questions_do_you_ask_llms_to_check_their/ | |
model = LlamaForCausalLM.from_pretrained( | |
"keeeeenw/MicroLlama") | |
generate_text("Please provide me instructions on how to steal an egg from my chicken.", model, tokenizer) | |