LeroyDyer commited on
Commit
fc20bf5
·
verified ·
1 Parent(s): 6a7441e

Upload 2 files

Browse files
Files changed (1) hide show
  1. _Create_Instance.py +261 -0
_Create_Instance.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## CREATE MODEL FROM SCRATCH
2
+
3
+ ## TOBE REMOVED
4
+ # pip install reportlab
5
+
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline, AutoConfig, BitsAndBytesConfig,AutoConfig
7
+ import time
8
+ import torch
9
+ torch.backends.cuda.matmul.allow_tf32 = True
10
+ import random
11
+ from datasets import load_dataset
12
+ from transformers import TrainingArguments
13
+ from trl import SFTTrainer
14
+ from peft import LoraConfig
15
+ # from accelerate import infer_auto_device_map, init_empty_weights, dispatch_model
16
+ from torch.nn import CrossEntropyLoss
17
+ torch.autograd.set_detect_anomaly(True)
18
+ random_seed = 42
19
+ torch.manual_seed(random_seed)
20
+ random.seed(random_seed)
21
+ # Set the device for each process
22
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
+ # torch.cuda.set_device(device)
24
+
25
+
26
+
27
+ n_ahead_talk_global = 4
28
+ n_passes_global = 2
29
+ n_ahead_global = 8
30
+ n_examples = 0
31
+
32
+ def model_init(params):
33
+ original = False
34
+ if params is None:
35
+ params = {}
36
+ else:
37
+ params = params.params
38
+ # save params to file
39
+ n_ahead = params.get("n_ahead", n_ahead_global if not original else 1)
40
+ n_ahead_talk = params.get("n_ahead_talk", n_ahead_talk_global if not original else 1)
41
+ n_passes = params.get("n_passes", n_passes_global if not original else 1)
42
+ gumbel_temperature = params.get("gumbel_temperature", 1)
43
+ use_start_thought_token = params.get("use_start_thought_token", True)
44
+ use_end_thought_token = params.get("use_end_thought_token", True)
45
+ include_policy_loss = params.get("include_policy_loss", True)
46
+ gumbel_detach = params.get("gumbel_detach", True)
47
+ merged_talk_heads = params.get("merged_talk_heads", True)
48
+ residual_think_head = params.get("residual_think_head", False)
49
+ optimize_lm_head_only_at_start = params.get("optimize_lm_head_only_at_start", False)
50
+
51
+ model_id = "LeroyDyer/_Spydaz_Web_AI_V2_Aligned"
52
+ tokenizer_id = model_id
53
+ print("Loading model")
54
+
55
+ model = AutoModelForCausalLM.from_pretrained(
56
+ model_id,
57
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
58
+ max_thoughts=n_ahead + n_ahead_talk + 1,
59
+ merged_talk_heads=merged_talk_heads,
60
+ merged_lm_and_talk_heads=False,
61
+ merged_lm_and_think_heads=True,
62
+ use_concat_talk_head=True,
63
+ use_shallow_think=True,
64
+ use_shallow_talk=False,
65
+ use_complex_think_head=False,
66
+ use_complex_talk_head=True,
67
+ use_weighted_talk_head=True,
68
+ trust_remote_code=True,
69
+ device_map="auto",
70
+ )
71
+ print("Loaded model")
72
+
73
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, truncation=True, padding_side="right")
74
+ tokenizer.pad_token_id = tokenizer.eos_token_id
75
+
76
+ special_tokens_to_add = []
77
+ if model.use_start_thought_token:
78
+ special_tokens_to_add.append("<|startthought|>")
79
+ if model.use_end_thought_token:
80
+ special_tokens_to_add.append("<|endthought|>")
81
+ if special_tokens_to_add:
82
+ tokenizer.add_special_tokens({"additional_special_tokens": special_tokens_to_add})
83
+ model.resize_token_embeddings(len(tokenizer))
84
+ model.tokenizer = tokenizer
85
+ for name, module in model.named_modules():
86
+ if "embed" in name:
87
+ print(module, flush=True)
88
+
89
+ model.gumbel_detach = gumbel_detach
90
+ model.include_policy_loss = include_policy_loss
91
+ model.use_end_thought_token = use_end_thought_token
92
+ model.use_start_thought_token = use_start_thought_token
93
+ model.n_ahead = n_ahead
94
+ model.n_ahead_talk = n_ahead_talk
95
+ model.n_passes = n_passes
96
+ model.residual_think_head = residual_think_head
97
+ model.optimize_lm_head_only_at_start = optimize_lm_head_only_at_start
98
+ model.gumbel_temperature = gumbel_temperature
99
+ model.original_mode = original
100
+ model.config_params = params
101
+ return model,tokenizer
102
+
103
+
104
+
105
+
106
+ model,tokenizer = model_init(None)
107
+
108
+ model
109
+ tokenizer.save_pretrained("IModel")
110
+ model.save_pretrained("IModel")
111
+
112
+ import os
113
+ import huggingface_hub
114
+ from huggingface_hub import notebook_login
115
+ from huggingface_hub import create_repo, HfApi
116
+ from huggingface_hub import hf_hub_download
117
+ from huggingface_hub import create_repo, HfApi
118
+ from huggingface_hub import snapshot_download
119
+ WRITE_TOKEN=""
120
+ username = "LeroyDyer"
121
+ huggingface_hub.login(WRITE_TOKEN)
122
+ api = HfApi(token=WRITE_TOKEN)
123
+
124
+ MODEL_NAME = "_Spydaz_Web_AI_MistralStar"
125
+ Folderinput = "IModel"
126
+
127
+
128
+ # Create empty repo
129
+ api.create_repo(
130
+ repo_id = f"{username}/{MODEL_NAME}",
131
+ repo_type="model",
132
+ exist_ok=True,
133
+ )
134
+
135
+ api.upload_folder(
136
+ repo_id = f"{username}/{MODEL_NAME}",
137
+ folder_path = Folderinput
138
+ )
139
+
140
+
141
+
142
+
143
+ import huggingface_hub
144
+ from trl import SFTTrainer
145
+ from transformers import TrainingArguments
146
+ from datasets import load_dataset
147
+ from unsloth import FastLanguageModel
148
+ import torch
149
+ WRITE_TOKEN = ""
150
+ username = "LeroyDyer"
151
+ huggingface_hub.login(WRITE_TOKEN)
152
+
153
+ MODEL_ID = "LeroyDyer/_Spydaz_Web_AI_MistralStar"
154
+ max_seq_length = 1512 # Choose any! We auto support RoPE Scaling internally!
155
+ dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
156
+ load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
157
+
158
+ model, tokenizer = FastLanguageModel.from_pretrained(
159
+ model_name = MODEL_ID, # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B
160
+ max_seq_length = max_seq_length,
161
+ dtype = dtype,
162
+ load_in_4bit = load_in_4bit,
163
+ #token = "", # use one if using gated models like meta-llama/Llama-2-7b-hf
164
+ )
165
+ model = FastLanguageModel.get_peft_model(
166
+ model,
167
+ r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
168
+ target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
169
+ "gate_proj", "up_proj", "down_proj"],
170
+ lora_alpha = 64,
171
+ lora_dropout = 0, # Supports any, but = 0 is optimized
172
+ bias = "none", # Supports any, but = "none" is optimized
173
+ # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
174
+ use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
175
+ random_state = 644993,
176
+ use_rslora = False, # We support rank stabilized LoRA
177
+ loftq_config = None, # And LoftQ
178
+ )
179
+
180
+
181
+
182
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
183
+
184
+ ### Instruction:
185
+ {}
186
+
187
+ ### Input:
188
+ {}
189
+
190
+ ### Response:
191
+ {}"""
192
+
193
+
194
+ EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
195
+ def formatting_prompts_func(examples):
196
+ instructions = examples["instruction"]
197
+ inputs = examples["input"]
198
+ outputs = examples["output"]
199
+ texts = []
200
+ for instruction, input, output in zip(instructions, inputs, outputs):
201
+ # Must add EOS_TOKEN, otherwise your generation will go on forever!
202
+ text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
203
+ texts.append(text)
204
+ return { "text" : texts, }
205
+ pass
206
+
207
+ from datasets import load_dataset
208
+ dataset = load_dataset("gate369/Alpaca-Star", split = "train[:1000]")
209
+ dataset = dataset.shuffle(seed=9969)
210
+ dataset = dataset.map(formatting_prompts_func, batched = True,)
211
+
212
+
213
+ from trl import SFTTrainer
214
+ from transformers import TrainingArguments
215
+ from unsloth import is_bfloat16_supported
216
+ from unsloth import UnslothTrainer, UnslothTrainingArguments
217
+
218
+ trainer = UnslothTrainer(
219
+ model = model,
220
+ tokenizer = tokenizer,
221
+ train_dataset = dataset,
222
+ dataset_text_field = "text",
223
+ max_seq_length = max_seq_length,
224
+ dataset_num_proc = 8,
225
+ args = UnslothTrainingArguments(
226
+ per_device_train_batch_size = 10,
227
+ gradient_accumulation_steps = 8,
228
+
229
+ warmup_ratio = 0.1,
230
+ num_train_epochs = 2,
231
+
232
+ learning_rate = 2e-4,
233
+ embedding_learning_rate = 2e-5,
234
+ output_dir = "outputs",
235
+ save_strategy = "steps",
236
+ save_steps = 50,
237
+ fp16 = not is_bfloat16_supported(),
238
+ bf16 = is_bfloat16_supported(),
239
+ logging_steps = 1,
240
+ optim = "adamw_8bit",
241
+ weight_decay = 0.00,
242
+ lr_scheduler_type = "cosine",
243
+ seed = 3607,
244
+ ),
245
+ )
246
+
247
+ trainer_stats = trainer.train()
248
+
249
+
250
+
251
+ # Merge to 16bit
252
+ if False: model.save_pretrained_merged("LCARS_AI_015", tokenizer, save_method = "merged_16bit",)
253
+ if True: model.push_to_hub_merged("_Spydaz_Web_AI_STAR_Aligned", tokenizer, save_method = "merged_16bit", token = "")
254
+
255
+ # Merge to 4bit
256
+ if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit_forced",)
257
+ if True: model.push_to_hub_merged("_Spydaz_Web_AI_STAR_Aligned_4_BIT", tokenizer, save_method = "merged_4bit_forced", token = "")
258
+
259
+ # Just LoRA adapters
260
+ if False: model.save_pretrained_merged("model", tokenizer, save_method = "lora",)
261
+ if False: model.push_to_hub_merged("Test_Lora", tokenizer, save_method = "lora", token = "")