nazimali's picture
Update app.py
9d2349f verified
raw
history blame
1.43 kB
import os
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import spaces
huggingface_token = os.getenv("HF_TOKEN")
infer_prompt = "فيما يلي تعليمات تصف مهمة. اكتب استجابة تكمل الطلب بشكل مناسب.\n\n### تعليمات:\n{}\n\n### إجابة:\n"
model_id = "nazimali/mistral-7b-v0.3-instruct-arabic"
file_name = "Q8_0.gguf"
llm = None
hf_hub_download(
repo_id=model_id,
filename=file_name,
local_dir="./models",
token=huggingface_token,
)
@spaces.GPU
def respond(
message,
history,
):
global llm
if llm is None:
llm = Llama(
model_path=f"./models/{file_name}",
flash_attn=True,
n_gpu_layers=-1,
n_ctx=1024,
verbose=True,
)
stream = llm.create_chat_completion(
messages=[{"role": "user", "content": infer_prompt.format(message) }],
max_tokens=512,
repeat_penalty=1.2,
stream=True,
temperature=0.7,
top_k=40,
top_p=0.95,
)
outputs = ""
for output in stream:
print(output)
outputs += output["choices"][0]["delta"].get("content", "")
yield outputs
demo = gr.ChatInterface(respond, examples=["السلام عليكم كيف حالك؟", "hello"], title="Mistral 7B Arabic Fine-tuned")
if __name__ == "__main__":
demo.launch()