Spaces:

nazimali
/

mistral-7b-v0.3-instruct-arabic

Running on Zero

nazimali commited on 2 days ago

Commit

6a6d6d2

•

1 Parent(s): 0867e0e

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import os
+import gradio as gr
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+import spaces
+huggingface_token = os.getenv("HF_TOKEN")
+infer_prompt = "فيما يلي تعليمات تصف مهمة. اكتب استجابة تكمل الطلب بشكل مناسب.\n\n### تعليمات:\n{}\n\n### إجابة:\n"
+model_id = "nazimali/mistral-7b-v0.3-instruct-arabic"
+file_name = "Q8_0.gguf"
+llm = None
+hf_hub_download(
+    repo_id=model_id,
+    filename=file_name,
+    local_dir="./models",
+    token=huggingface_token,
+)
+@spaces.GPU
+def respond(
+    message,
+    history,
+):
+    global llm
+    if llm is None:
+        llm = Llama(
+            model_path=f"./models/{file_name}",
+            flash_attn=True,
+            n_gpu_layers=-1,
+            n_ctx=2048,
+            verbose=True,
+        )
+    stream = llm.create_chat_completion(
+        messages=[{"role": "user", "content": infer_prompt.format(message) }],
+        max_tokens=50,
+        repeat_penalty=1.2,
+        stream=True,
+        temperature=0.7,
+        top_k=40,
+        top_p=0.95,
+    )
+    outputs = ""
+    for output in stream:
+        print(output)
+        outputs += output["choices"][0]["delta"].get("content", "")
+        yield outputs
+demo = gr.ChatInterface(respond, examples=["السلام عليكم", "hello"], title="Mistral 7B Arabic Fine-tuned")
+if __name__ == "__main__":
+    demo.launch()