|
import gradio as gr |
|
from huggingface_hub import hf_hub_download |
|
from llama_cpp import Llama |
|
|
|
model_name = "SantaBot/Jokestral_4bit_guff" |
|
model_file = "unsloth.Q4_K_M.gguf" |
|
model_path = hf_hub_download(model_name, filename=model_file) |
|
llm = Llama(model_path=model_path) |
|
|
|
def make_inference(User_prompt, temperature=0.8, max_new_tokens=64,number_of_outputs=3): |
|
outputs='' |
|
for i in range(number_of_outputs): |
|
output = llm( |
|
User_prompt, |
|
max_tokens= max_new_tokens, |
|
stop=["</s>", "<s>"], |
|
echo=True, |
|
temperature=temperature |
|
) |
|
outputs+=f"""{i+1}. {output["choices"][0]["text"]}""" |
|
|
|
return outputs |
|
|
|
|
|
|
|
demo = gr.Interface( |
|
fn=make_inference, |
|
inputs=[ |
|
gr.Text(value="Whats the difference", label="Your prompt"), |
|
gr.Slider(minimum=0,maximum=1,value=0.8,step=0.05), |
|
gr.Number(minimum=10,maximum=1024,value=64, label="Max new tokens"), |
|
gr.Number(minimum=1,maximum=10,value=3, label="Number of outputs") |
|
], |
|
outputs=[gr.Text(label="Output")], |
|
examples=[ |
|
["Whats the difference",0.8,64,1], |
|
["Once a priest",0.8,64,1], |
|
["My doctor",0.8,64,1], |
|
["I saw",0.8,64,1], |
|
|
|
|
|
], |
|
allow_flagging="never", |
|
title ="Jokestral 🤣🫵🤡", |
|
description="Jokestral - this is Mistral-7b-v0.3 fine-tuned on [Short jokes dataset](https://www.kaggle.com/datasets/abhinavmoudgil95/short-jokes). Just write the first few words and get your joke. [More information](https://huggingface.co./SantaBot/Jokestral_16bit)" |
|
) |
|
|
|
demo.launch() |