terrycraddock
/

TinyLlama_V1.1_Tree_of_thoughts

Text Generation

Model card Files Files and versions Community

TinyLlama_V1.1_Tree_of_thoughts / inference.py

terrycraddock's picture

Upload 9 files

af197e7 verified about 1 month ago

728 Bytes

	from transformers import TextStreamer
	from unsloth import FastLanguageModel

	max_seq_length = 2048
	dtype = None
	load_in_4bit = False

	alpaca_prompt = """Provide a helpful and informative response to the following prompt.

	### Prompt:
	{}

	### Response:
	{}"""

	prompt = "What is your base model?"

	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name="merged_tinyllama_base_model",
	max_seq_length=max_seq_length,
	dtype=dtype
	)

	FastLanguageModel.for_inference(model)
	inputs = tokenizer(
	[alpaca_prompt.format(prompt, "")],
	return_tensors="pt"
	).to("cuda").to(dtype)

	# Generate text
	text_streamer = TextStreamer(tokenizer)
	_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=2000)