mostafaamiri commited on
Commit
ee71205
1 Parent(s): 301be21

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline
3
+ from peft import PeftModel
4
+ import re
5
+ import gradio as gr
6
+
7
+ tokenizer = LlamaTokenizer.from_pretrained('mostafaamiri/persian_llama_7b')
8
+
9
+ base_model = LlamaForCausalLM.from_pretrained(
10
+ "meta-llama/Llama-2-7b-hf",
11
+ load_in_8bit=False,
12
+ )
13
+ base_model.resize_token_embeddings(len(tokenizer))
14
+
15
+ model = PeftModel.from_pretrained(
16
+ base_model,
17
+ "mostafaamiri/persian_llama_7b",)
18
+
19
+
20
+ prompt_input = (
21
+ "Below is an instruction that describes a task. "
22
+ "Write a response that appropriately completes the request.\n\n"
23
+ "### Instruction:\n\n{instruction}\n\n### Response:\n\n"
24
+ )
25
+
26
+
27
+ def generate_prompt(instruction, input=None):
28
+ if input:
29
+ instruction = instruction + '\n' + input
30
+ return prompt_input.format_map({'instruction': instruction})
31
+ config=dict(
32
+ temperature=0.2,
33
+ top_k=40,
34
+ top_p=0.9,
35
+ do_sample=True,
36
+ num_beams=1,
37
+ repetition_penalty=1.2,
38
+ max_new_tokens=300
39
+ )
40
+
41
+
42
+ def launch_model(text):
43
+
44
+ sample_data = [text]
45
+ inputToken = tokenizer(generate_prompt(sample_data) , return_tensors="pt")
46
+
47
+ outputs = model.generate(**inputToken, **config)
48
+ output = tokenizer.decode(outputs[0],skip_special_tokens=True)
49
+ output = re.sub(r"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n\n\[.*\]\n\n### Response:\n\n", "", output)
50
+
51
+ return output
52
+
53
+
54
+ iface = gr.Interface(fn=launch_model, inputs="text", outputs="text")
55
+ iface.launch()