alexkueck commited on
Commit
d663d84
·
1 Parent(s): b775bb9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -2
app.py CHANGED
@@ -80,7 +80,7 @@ login(token=os.environ["HF_ACCESS_TOKEN"]) #for read access!!!!
80
  #Modelle und Tokenizer
81
 
82
  #Alternativ mit beliebigen Modellen:
83
- base_model = "project-baize/baize-v2-7b" #load_8bit = False (in load_tokenizer_and_model)
84
  #base_model = "TheBloke/airoboros-13B-HF" #load_8bit = False (in load_tokenizer_and_model)
85
  #base_model = "EleutherAI/gpt-neo-1.3B" #load_8bit = False (in load_tokenizer_and_model)
86
  #base_model = "TheBloke/airoboros-13B-HF" #load_8bit = True
@@ -91,7 +91,7 @@ base_model = "project-baize/baize-v2-7b" #load_8bit = False (in load_tokenizer_
91
  # Load model directly
92
 
93
  #Tokenizer und Model laden
94
- tokenizer,model,device = load_tokenizer_and_model(base_model, False)
95
  #tokenizer.add_special_tokens({'pad_token': '[PAD]'}) #not necessary with fast Toekenizers like GPT2
96
 
97
  #Datensets für Finetuning laden
@@ -147,6 +147,8 @@ print ("training args")
147
  #Training Args
148
  batch_size = 2
149
 
 
 
150
  training_args = TrainingArguments(
151
  output_dir="alexkueck/li-tis-tuned-2",
152
  overwrite_output_dir = 'True',
@@ -174,12 +176,14 @@ training_args = TrainingArguments(
174
  #load_best_model_at_end=True
175
  #push_to_hub=True,
176
  )
 
177
 
178
  ############################################
179
  #def trainieren_neu(name):
180
  #Trainer zusammenstellen
181
  print ("################################")
182
  print ("trainer")
 
183
  trainer = Trainer(
184
  model=model,
185
  args=training_args,
@@ -188,6 +192,7 @@ trainer = Trainer(
188
  #tokenizer=tokenizer,
189
  compute_metrics=compute_metrics,
190
  )
 
191
 
192
  ###############################################
193
  #Special QA Trainer...#
@@ -206,6 +211,59 @@ trainer = QuestionAnsweringTrainer(
206
  '''
207
  #################################################
208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
  #trainer ausführen
211
  trainer.train()
 
80
  #Modelle und Tokenizer
81
 
82
  #Alternativ mit beliebigen Modellen:
83
+ base_model = "project-baize/baize-v2-7b" #load_8bit = True (in load_tokenizer_and_model)
84
  #base_model = "TheBloke/airoboros-13B-HF" #load_8bit = False (in load_tokenizer_and_model)
85
  #base_model = "EleutherAI/gpt-neo-1.3B" #load_8bit = False (in load_tokenizer_and_model)
86
  #base_model = "TheBloke/airoboros-13B-HF" #load_8bit = True
 
91
  # Load model directly
92
 
93
  #Tokenizer und Model laden
94
+ tokenizer,model,device = load_tokenizer_and_model(base_model, True)
95
  #tokenizer.add_special_tokens({'pad_token': '[PAD]'}) #not necessary with fast Toekenizers like GPT2
96
 
97
  #Datensets für Finetuning laden
 
147
  #Training Args
148
  batch_size = 2
149
 
150
+ '''
151
+ # Training Argumente setzen (kleinere LLMs)
152
  training_args = TrainingArguments(
153
  output_dir="alexkueck/li-tis-tuned-2",
154
  overwrite_output_dir = 'True',
 
176
  #load_best_model_at_end=True
177
  #push_to_hub=True,
178
  )
179
+ '''
180
 
181
  ############################################
182
  #def trainieren_neu(name):
183
  #Trainer zusammenstellen
184
  print ("################################")
185
  print ("trainer")
186
+ '''
187
  trainer = Trainer(
188
  model=model,
189
  args=training_args,
 
192
  #tokenizer=tokenizer,
193
  compute_metrics=compute_metrics,
194
  )
195
+ '''
196
 
197
  ###############################################
198
  #Special QA Trainer...#
 
211
  '''
212
  #################################################
213
 
214
+ #################################################
215
+ # special Trainer Baize Model
216
+ # Parameters
217
+ MICRO_BATCH_SIZE = int(sys.argv[2])
218
+ BATCH_SIZE = 64
219
+ size = sys.argv[1]
220
+ GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
221
+ EPOCHS = 1
222
+ LEARNING_RATE = float(sys.argv[3])
223
+ CUTOFF_LEN = 512
224
+ LORA_R = 8
225
+ LORA_ALPHA = 16
226
+ LORA_DROPOUT = 0.05
227
+ VAL_SET_SIZE = 2000
228
+ TARGET_MODULES = [
229
+ "q_proj",
230
+ "k_proj",
231
+ "v_proj",
232
+ "down_proj",
233
+ "gate_proj",
234
+ "up_proj",
235
+ ]
236
+ #DATA_PATH = "data/data_tmp.json"
237
+ OUTPUT_DIR = "alexkueck/li-tis-tuned-2"
238
+ trainer = transformers.Trainer(
239
+ model=model,
240
+ train_dataset=lm_datasets["train"],
241
+ eval_dataset=lm_datasets["test"],
242
+ args=transformers.TrainingArguments(
243
+ output_dir="alexkueck/li-tis-tuned-2",
244
+ overwrite_output_dir = 'True',
245
+ per_device_train_batch_size=MICRO_BATCH_SIZE,
246
+ per_device_eval_batch_size=MICRO_BATCH_SIZE,
247
+ gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
248
+ warmup_steps=100,
249
+ num_train_epochs=EPOCHS,
250
+ learning_rate=LEARNING_RATE,
251
+ fp16=True,
252
+ logging_steps=20,
253
+ evaluation_strategy="steps" if VAL_SET_SIZE > 0 else "no",
254
+ save_strategy="steps",
255
+ eval_steps=200 if VAL_SET_SIZE > 0 else None,
256
+ save_steps=200,
257
+ save_total_limit=100,
258
+ load_best_model_at_end=True if VAL_SET_SIZE > 0 else False,
259
+ ddp_find_unused_parameters=False if ddp else None,
260
+ ),
261
+ data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
262
+ )
263
+ model.config.use_cache = False
264
+
265
+
266
+
267
 
268
  #trainer ausführen
269
  trainer.train()