Spaces:

nazimali
/

mistral-7b-v0.3-instruct-arabic

Sleeping

Create app.py

6a6d6d2 verified about 2 months ago

1.41 kB

	import os

	import gradio as gr
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama
	import spaces

	huggingface_token = os.getenv("HF_TOKEN")

	infer_prompt = "فيما يلي تعليمات تصف مهمة. اكتب استجابة تكمل الطلب بشكل مناسب.\n\n### تعليمات:\n{}\n\n### إجابة:\n"
	model_id = "nazimali/mistral-7b-v0.3-instruct-arabic"
	file_name = "Q8_0.gguf"
	llm = None


	hf_hub_download(
	repo_id=model_id,
	filename=file_name,
	local_dir="./models",
	token=huggingface_token,
	)


	@spaces.GPU
	def respond(
	message,
	history,
	):
	global llm
	if llm is None:
	llm = Llama(
	model_path=f"./models/{file_name}",
	flash_attn=True,
	n_gpu_layers=-1,
	n_ctx=2048,
	verbose=True,
	)

	stream = llm.create_chat_completion(
	messages=[{"role": "user", "content": infer_prompt.format(message) }],
	max_tokens=50,
	repeat_penalty=1.2,
	stream=True,
	temperature=0.7,
	top_k=40,
	top_p=0.95,
	)

	outputs = ""
	for output in stream:
	print(output)
	outputs += output["choices"][0]["delta"].get("content", "")
	yield outputs



	demo = gr.ChatInterface(respond, examples=["السلام عليكم", "hello"], title="Mistral 7B Arabic Fine-tuned")


	if __name__ == "__main__":
	demo.launch()