Fan21 commited on
Commit
31dffe8
1 Parent(s): b9dda93

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +81 -15
README.md CHANGED
@@ -12,19 +12,85 @@ This model is fine-tuned with LLaMA with 8 Nvidia RTX 1080Ti GPUs and enhanced w
12
  ### Here is how to use it with texts in HuggingFace
13
  ```python
14
  # A list of special tokens the model was trained with
15
- special_tokens_dict = {
16
- 'additional_special_tokens': [
17
- '[SAFE]','[UNSAFE]', '[OK]', '[SELF_M]','[SELF_F]', '[SELF_N]',
18
- '[PARTNER_M]', '[PARTNER_F]', '[PARTNER_N]',
19
- '[ABOUT_M]', '[ABOUT_F]', '[ABOUT_N]', '<speaker1>', '<speaker2>'
20
- ],
21
- 'bos_token': '<bos>',
22
- 'eos_token': '<eos>',
23
- }
24
- from transformers import AutoTokenizer, AutoModelForCausalLM
25
- math_bot_tokenizer = AutoTokenizer.from_pretrained('uf-aice-lab/SafeMathBot')
26
- safe_math_bot = AutoModelForCausalLM.from_pretrained('uf-aice-lab/SafeMathBot')
27
- text = "Replace me by any text you'd like."
28
- encoded_input = math_bot_tokenizer(text, return_tensors='pt')
29
- output = safe_math_bot(**encoded_input)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  ```
 
12
  ### Here is how to use it with texts in HuggingFace
13
  ```python
14
  # A list of special tokens the model was trained with
15
+
16
+ from transformers import LlamaTokenizer, AutoModelForCausalLM
17
+ tokenizer = LlamaTokenizer.from_pretrained("Fan21/Llama-mt-lora")
18
+ BASE_MODEL = "Fan21/Llama-mt-lora"
19
+ if torch.cuda.is_available():
20
+ device = "cuda"
21
+ else:
22
+ device = "cpu"
23
+ if device == "cuda":
24
+ model = LlamaForCausalLM.from_pretrained(
25
+ BASE_MODEL,
26
+ load_in_8bit=False,
27
+ torch_dtype=torch.float16,
28
+ device_map="auto",
29
+ )
30
+
31
+ elif device == "mps":
32
+ model = LlamaForCausalLM.from_pretrained(
33
+ BASE_MODEL,
34
+ device_map={"": device},
35
+ torch_dtype=torch.float16,
36
+ )
37
+
38
+ else:
39
+ model = LlamaForCausalLM.from_pretrained(
40
+ BASE_MODEL, device_map={"": device}, low_cpu_mem_usage=True
41
+ )
42
+
43
+ def generate_prompt(instruction, input=None):
44
+ if input:
45
+ return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
46
+ ### Instruction:
47
+ {instruction}
48
+ ### Input:
49
+ {input}
50
+ ### Response:"""
51
+ else:
52
+ return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
53
+ ### Instruction:
54
+ {instruction}
55
+ ### Response:"""
56
+
57
+ if device != "cpu":
58
+ model.half()
59
+ model.eval()
60
+ if torch.__version__ >= "2":
61
+ model = torch.compile(model)
62
+
63
+
64
+ def evaluate(
65
+ instruction,
66
+ input=None,
67
+ temperature=0.1,
68
+ top_p=0.75,
69
+ top_k=40,
70
+ num_beams=4,
71
+ max_new_tokens=128,
72
+ **kwargs,
73
+ ):
74
+ prompt = generate_prompt(instruction, input)
75
+ inputs = tokenizer(prompt, return_tensors="pt")
76
+ input_ids = inputs["input_ids"].to(device)
77
+ generation_config = GenerationConfig(
78
+ temperature=temperature,
79
+ top_p=top_p,
80
+ top_k=top_k,
81
+ num_beams=num_beams,
82
+ **kwargs,
83
+ )
84
+ with torch.no_grad():
85
+ generation_output = model.generate(
86
+ input_ids=input_ids,
87
+ generation_config=generation_config,
88
+ return_dict_in_generate=True,
89
+ output_scores=True,
90
+ max_new_tokens=max_new_tokens,
91
+ )
92
+ s = generation_output.sequences[0]
93
+ output = tokenizer.decode(s)
94
+ return output.split("### Response:")[1].strip()
95
+
96
  ```