Spaces:
Running
Running
File size: 2,343 Bytes
fc223d6 ffa07db e56656b fc223d6 ffa07db b17bbf5 0e535a1 ffa07db e741027 ffa07db e741027 ffa07db fc223d6 ffa07db fc223d6 ffa07db fc223d6 ffa07db fc223d6 ffa07db a52592f fc223d6 400a077 ffa07db fc223d6 ffa07db e741027 ffa07db fc223d6 ffa07db fc223d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
from gradio_client import Client
from huggingface_hub import get_token, InferenceClient
from llama_cpp import Llama
llm = Llama.from_pretrained(
repo_id="prithivMLmods/SmolLM2-135M-Instruct-GGUF",
filename="SmolLM2-135M-Instruct.Q5_K_M.gguf",
verbose=False,
n_ctx=7000
)
def generate(
user_prompt: str,
system_prompt: str = "You are a helpful assistant.",
max_tokens: int = 8192-7000,
temperature: float = 0.2,
top_p: float = 0.95,
top_k: int = 40,
):
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
return llm.create_chat_completion(
messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
)
with gr.Blocks() as demo:
gr.Markdown("""# RAG - generate
Generate a response to a query using a [HuggingFaceTB/SmolLM2-360M-Instruct and llama-cpp-python](https://huggingface.co./HuggingFaceTB/SmolLM2-360M-Instruct-GGUF?library=llama-cpp-python).
Part of [ai-blueprint](https://github.com/huggingface/ai-blueprint) - a blueprint for AI development, focusing on applied examples of RAG, information extraction, analysis and fine-tuning in the age of LLMs and agents.""")
with gr.Row():
system_prompt = gr.Textbox(label="System prompt", lines=3, value="You are a helpful assistant.")
user_prompt = gr.Textbox(label="Query", lines=3)
with gr.Accordion("kwargs"):
with gr.Row(variant="panel"):
max_tokens = gr.Number(label="Max tokens", value=1100)
temperature = gr.Number(label="Temperature", value=0.2)
top_p = gr.Number(label="Top p", value=0.95)
top_k = gr.Number(label="Top k", value=40)
submit_btn = gr.Button("Submit")
response_output = gr.Textbox(label="Response", lines=10)
documents_output = gr.Dataframe(
label="Documents", headers=["chunk", "url", "distance", "rank"], wrap=True
)
submit_btn.click(
fn=generate,
inputs=[
user_prompt,
system_prompt,
max_tokens,
temperature,
top_p,
top_k,
],
outputs=[response_output],
)
demo.launch()
|