Spaces:

SantaBot
/

Jokestral

Running

Jokestral / app.py

Update app.py

0ac2ca4 verified 4 months ago

1.62 kB

	import gradio as gr
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama

	model_name = "SantaBot/Jokestral_4bit_guff"
	model_file = "unsloth.Q4_K_M.gguf"
	model_path = hf_hub_download(model_name, filename=model_file)
	llm = Llama(model_path=model_path)

	def make_inference(User_prompt, temperature=0.8, max_new_tokens=64,number_of_outputs=3):
	outputs=''
	for i in range(number_of_outputs):
	output = llm(
	User_prompt,
	max_tokens= max_new_tokens,
	stop=["</s>", "<s>"],
	echo=True,
	temperature=temperature
	)
	outputs+=f"""{i+1}. {output["choices"][0]["text"]}"""

	return outputs



	demo = gr.Interface(
	fn=make_inference,
	inputs=[
	gr.Text(value="Whats the difference", label="Your prompt"),
	gr.Slider(minimum=0,maximum=1,value=0.8,step=0.05),
	gr.Number(minimum=10,maximum=1024,value=64, label="Max new tokens"),
	gr.Number(minimum=1,maximum=10,value=3, label="Number of outputs")
	],
	outputs=[gr.Text(label="Output")],
	examples=[
	["Whats the difference",0.8,64,1],
	["Once a priest",0.8,64,1],
	["My doctor",0.8,64,1],
	["I saw",0.8,64,1],


	],
	allow_flagging="never",
	title ="Jokestral 🤣🫵🤡",
	description="Jokestral - this is Mistral-7b-v0.3 fine-tuned on [Short jokes dataset](https://www.kaggle.com/datasets/abhinavmoudgil95/short-jokes). Just write the first few words and get your joke. [More information](https://huggingface.co./SantaBot/Jokestral_16bit)"
	)

	demo.launch()