Sandiago21 commited on
Commit
590318d
1 Parent(s): c2eb01d

Update README.md with improved way to load and use the model

Browse files
Files changed (1) hide show
  1. README.md +33 -58
README.md CHANGED
@@ -96,75 +96,50 @@ def generate_prompt(instruction: str, input_ctxt: str = None) -> str:
96
  Use the code below to get started with the model.
97
 
98
  ```python
99
- from transformers import LlamaTokenizer, LlamaForCausalLM
100
- from peft import PeftModel
101
-
102
- MODEL_NAME = "decapoda-research/llama-13b-hf"
103
- tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME, add_eos_token=True)
104
- tokenizer.pad_token_id = 0
105
-
106
- model = LlamaForCausalLM.from_pretrained(MODEL_NAME, load_in_8bit=True, device_map="auto")
107
- model = PeftModel.from_pretrained(model, "Sandiago21/llama-13b-hf")
108
- ```
109
-
110
- ### Example of Usage
111
- ```python
112
- from transformers import GenerationConfig
113
-
114
- PROMPT = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nWhich is the capital city of Greece and with which countries does Greece border?\n\n### Input:\nQuestion answering\n\n### Response:\n"""
115
- DEVICE = "cuda"
116
-
117
- inputs = tokenizer(
118
- PROMPT,
119
- return_tensors="pt",
120
  )
121
-
122
- input_ids = inputs["input_ids"].to(DEVICE)
123
-
124
  generation_config = GenerationConfig(
125
- temperature=0.1,
126
- top_p=0.95,
127
- repetition_penalty=1.2,
 
 
128
  )
129
 
130
- print("Generating Response ... ")
131
- with torch.no_grad():
132
- generation_output = model.generate(
133
- input_ids=input_ids,
134
- generation_config=generation_config,
135
- return_dict_in_generate=True,
136
- output_scores=True,
137
- max_new_tokens=256,
138
- )
139
-
140
- for s in generation_output.sequences:
141
- print(tokenizer.decode(s))
142
  ```
143
 
144
- ### Example Output
145
  ```python
146
- Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
147
-
148
- ### Instruction:
149
- Which is the capital city of Greece and with which countries does Greece border?
150
-
151
- ### Input:
152
- Question answering
153
-
154
- ### Response:
155
 
156
-
157
- Generating...
158
- <unk> Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
159
 
160
- ### Instruction:
161
- Which is the capital city of Greece and with which countries does Greece border?
 
 
 
 
 
162
 
163
- ### Input:
164
- Question answering
165
 
166
- ### Response:
167
- <unk>capital city of Athens and it borders Albania to the northwest, North Macedonia and Bulgaria to the northeast, Turkey to the east, and Libya to the southeast across the Mediterranean Sea.
168
  ```
169
 
170
  ## Training Details
 
96
  Use the code below to get started with the model.
97
 
98
  ```python
99
+ import torch
100
+ from transformers import GenerationConfig, LlamaTokenizer, LlamaForCausalLM
101
+
102
+ tokenizer = LlamaTokenizer.from_pretrained("chainyo/alpaca-lora-7b")
103
+ model = LlamaForCausalLM.from_pretrained(
104
+ "chainyo/alpaca-lora-7b",
105
+ load_in_8bit=True,
106
+ torch_dtype=torch.float16,
107
+ device_map="auto",
 
 
 
 
 
 
 
 
 
 
 
 
108
  )
 
 
 
109
  generation_config = GenerationConfig(
110
+ temperature=0.2,
111
+ top_p=0.75,
112
+ top_k=40,
113
+ num_beams=4,
114
+ max_new_tokens=128,
115
  )
116
 
117
+ model.eval()
118
+ if torch.__version__ >= "2":
119
+ model = torch.compile(model)
 
 
 
 
 
 
 
 
 
120
  ```
121
 
122
+ ### Example of Usage
123
  ```python
124
+ instruction = "What is the capital city of Greece and with which countries does Greece border?"
125
+ input_ctxt = None # For some tasks, you can provide an input context to help the model generate a better response.
 
 
 
 
 
 
 
126
 
127
+ prompt = generate_prompt(instruction, input_ctxt)
128
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids
129
+ input_ids = input_ids.to(model.device)
130
 
131
+ with torch.no_grad():
132
+ outputs = model.generate(
133
+ input_ids=input_ids,
134
+ generation_config=generation_config,
135
+ return_dict_in_generate=True,
136
+ output_scores=True,
137
+ )
138
 
139
+ response = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
140
+ print(response)
141
 
142
+ >>> The capital city of Greece is Athens and it borders Albania, Macedonia, Bulgaria and Turkey.
 
143
  ```
144
 
145
  ## Training Details