ARiiN commited on
Commit
e351d4a
·
1 Parent(s): 0fb23b0

Updated app.py to use GGUF model

Browse files
Files changed (1) hide show
  1. app.py +6 -20
app.py CHANGED
@@ -1,27 +1,13 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
- import torch
4
  import gradio as gr
5
 
6
- model_name = "mistralai/Mistral-7B-Instruct-v0.2"
7
 
8
- tokenizer = AutoTokenizer.from_pretrained(model_name)
9
- model = AutoModelForCausalLM.from_pretrained(
10
- model_name,
11
- torch_dtype=torch.float16,
12
- low_cpu_mem_usage=True
13
- )
14
-
15
- pipe = pipeline(
16
- "text-generation",
17
- model=model,
18
- tokenizer=tokenizer,
19
- device=0
20
- )
21
 
22
  def chat_response(prompt):
23
- response = pipe(prompt, max_new_tokens=100, do_sample=True, temperature=0.7)
24
- return response[0]["generated_text"]
25
 
26
  iface = gr.Interface(fn=chat_response, inputs="text", outputs="text")
27
- iface.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ from llama_cpp import Llama
 
 
2
  import gradio as gr
3
 
4
+ model_path = "matrixportal/Mistral-7B-Instruct-v0.3-Q4_K_M-GGUF" # Update with the correct filename
5
 
6
+ llm = Llama(model_path=model_path, n_ctx=2048, n_threads=8)
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def chat_response(prompt):
9
+ output = llm(prompt, max_tokens=200, temperature=0.7)
10
+ return output["choices"][0]["text"]
11
 
12
  iface = gr.Interface(fn=chat_response, inputs="text", outputs="text")
13
+ iface.launch()