chat_bot / app.py
ARiiN's picture
Updated app.py to use GGUF model
e351d4a
raw
history blame
423 Bytes
from llama_cpp import Llama
import gradio as gr
model_path = "matrixportal/Mistral-7B-Instruct-v0.3-Q4_K_M-GGUF" # Update with the correct filename
llm = Llama(model_path=model_path, n_ctx=2048, n_threads=8)
def chat_response(prompt):
output = llm(prompt, max_tokens=200, temperature=0.7)
return output["choices"][0]["text"]
iface = gr.Interface(fn=chat_response, inputs="text", outputs="text")
iface.launch()