nazimali commited on
Commit
6a6d6d2
1 Parent(s): 0867e0e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ from huggingface_hub import hf_hub_download
5
+ from llama_cpp import Llama
6
+ import spaces
7
+
8
+ huggingface_token = os.getenv("HF_TOKEN")
9
+
10
+ infer_prompt = "فيما يلي تعليمات تصف مهمة. اكتب استجابة تكمل الطلب بشكل مناسب.\n\n### تعليمات:\n{}\n\n### إجابة:\n"
11
+ model_id = "nazimali/mistral-7b-v0.3-instruct-arabic"
12
+ file_name = "Q8_0.gguf"
13
+ llm = None
14
+
15
+
16
+ hf_hub_download(
17
+ repo_id=model_id,
18
+ filename=file_name,
19
+ local_dir="./models",
20
+ token=huggingface_token,
21
+ )
22
+
23
+
24
+ @spaces.GPU
25
+ def respond(
26
+ message,
27
+ history,
28
+ ):
29
+ global llm
30
+ if llm is None:
31
+ llm = Llama(
32
+ model_path=f"./models/{file_name}",
33
+ flash_attn=True,
34
+ n_gpu_layers=-1,
35
+ n_ctx=2048,
36
+ verbose=True,
37
+ )
38
+
39
+ stream = llm.create_chat_completion(
40
+ messages=[{"role": "user", "content": infer_prompt.format(message) }],
41
+ max_tokens=50,
42
+ repeat_penalty=1.2,
43
+ stream=True,
44
+ temperature=0.7,
45
+ top_k=40,
46
+ top_p=0.95,
47
+ )
48
+
49
+ outputs = ""
50
+ for output in stream:
51
+ print(output)
52
+ outputs += output["choices"][0]["delta"].get("content", "")
53
+ yield outputs
54
+
55
+
56
+
57
+ demo = gr.ChatInterface(respond, examples=["السلام عليكم", "hello"], title="Mistral 7B Arabic Fine-tuned")
58
+
59
+
60
+ if __name__ == "__main__":
61
+ demo.launch()