File size: 1,432 Bytes
6a6d6d2 9d2349f 6a6d6d2 9d2349f 6a6d6d2 cba9501 6a6d6d2 cba9501 6a6d6d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import os
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import spaces
huggingface_token = os.getenv("HF_TOKEN")
infer_prompt = "فيما يلي تعليمات تصف مهمة. اكتب استجابة تكمل الطلب بشكل مناسب.\n\n### تعليمات:\n{}\n\n### إجابة:\n"
model_id = "nazimali/mistral-7b-v0.3-instruct-arabic"
file_name = "Q8_0.gguf"
llm = None
hf_hub_download(
repo_id=model_id,
filename=file_name,
local_dir="./models",
token=huggingface_token,
)
@spaces.GPU
def respond(
message,
history,
):
global llm
if llm is None:
llm = Llama(
model_path=f"./models/{file_name}",
flash_attn=True,
n_gpu_layers=-1,
n_ctx=1024,
verbose=True,
)
stream = llm.create_chat_completion(
messages=[{"role": "user", "content": infer_prompt.format(message) }],
max_tokens=512,
repeat_penalty=1.2,
stream=True,
temperature=0.7,
top_k=40,
top_p=0.95,
)
outputs = ""
for output in stream:
print(output)
outputs += output["choices"][0]["delta"].get("content", "")
yield outputs
demo = gr.ChatInterface(respond, examples=["السلام عليكم كيف حالك؟", "hello"], title="Mistral 7B Arabic Fine-tuned")
if __name__ == "__main__":
demo.launch() |