Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -80,7 +80,7 @@ login(token=os.environ["HF_ACCESS_TOKEN"]) #for read access!!!!
|
|
80 |
#Modelle und Tokenizer
|
81 |
|
82 |
#Alternativ mit beliebigen Modellen:
|
83 |
-
base_model = "project-baize/baize-v2-7b" #load_8bit =
|
84 |
#base_model = "TheBloke/airoboros-13B-HF" #load_8bit = False (in load_tokenizer_and_model)
|
85 |
#base_model = "EleutherAI/gpt-neo-1.3B" #load_8bit = False (in load_tokenizer_and_model)
|
86 |
#base_model = "TheBloke/airoboros-13B-HF" #load_8bit = True
|
@@ -91,7 +91,7 @@ base_model = "project-baize/baize-v2-7b" #load_8bit = False (in load_tokenizer_
|
|
91 |
# Load model directly
|
92 |
|
93 |
#Tokenizer und Model laden
|
94 |
-
tokenizer,model,device = load_tokenizer_and_model(base_model,
|
95 |
#tokenizer.add_special_tokens({'pad_token': '[PAD]'}) #not necessary with fast Toekenizers like GPT2
|
96 |
|
97 |
#Datensets für Finetuning laden
|
@@ -147,6 +147,8 @@ print ("training args")
|
|
147 |
#Training Args
|
148 |
batch_size = 2
|
149 |
|
|
|
|
|
150 |
training_args = TrainingArguments(
|
151 |
output_dir="alexkueck/li-tis-tuned-2",
|
152 |
overwrite_output_dir = 'True',
|
@@ -174,12 +176,14 @@ training_args = TrainingArguments(
|
|
174 |
#load_best_model_at_end=True
|
175 |
#push_to_hub=True,
|
176 |
)
|
|
|
177 |
|
178 |
############################################
|
179 |
#def trainieren_neu(name):
|
180 |
#Trainer zusammenstellen
|
181 |
print ("################################")
|
182 |
print ("trainer")
|
|
|
183 |
trainer = Trainer(
|
184 |
model=model,
|
185 |
args=training_args,
|
@@ -188,6 +192,7 @@ trainer = Trainer(
|
|
188 |
#tokenizer=tokenizer,
|
189 |
compute_metrics=compute_metrics,
|
190 |
)
|
|
|
191 |
|
192 |
###############################################
|
193 |
#Special QA Trainer...#
|
@@ -206,6 +211,59 @@ trainer = QuestionAnsweringTrainer(
|
|
206 |
'''
|
207 |
#################################################
|
208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
#trainer ausführen
|
211 |
trainer.train()
|
|
|
80 |
#Modelle und Tokenizer
|
81 |
|
82 |
#Alternativ mit beliebigen Modellen:
|
83 |
+
base_model = "project-baize/baize-v2-7b" #load_8bit = True (in load_tokenizer_and_model)
|
84 |
#base_model = "TheBloke/airoboros-13B-HF" #load_8bit = False (in load_tokenizer_and_model)
|
85 |
#base_model = "EleutherAI/gpt-neo-1.3B" #load_8bit = False (in load_tokenizer_and_model)
|
86 |
#base_model = "TheBloke/airoboros-13B-HF" #load_8bit = True
|
|
|
91 |
# Load model directly
|
92 |
|
93 |
#Tokenizer und Model laden
|
94 |
+
tokenizer,model,device = load_tokenizer_and_model(base_model, True)
|
95 |
#tokenizer.add_special_tokens({'pad_token': '[PAD]'}) #not necessary with fast Toekenizers like GPT2
|
96 |
|
97 |
#Datensets für Finetuning laden
|
|
|
147 |
#Training Args
|
148 |
batch_size = 2
|
149 |
|
150 |
+
'''
|
151 |
+
# Training Argumente setzen (kleinere LLMs)
|
152 |
training_args = TrainingArguments(
|
153 |
output_dir="alexkueck/li-tis-tuned-2",
|
154 |
overwrite_output_dir = 'True',
|
|
|
176 |
#load_best_model_at_end=True
|
177 |
#push_to_hub=True,
|
178 |
)
|
179 |
+
'''
|
180 |
|
181 |
############################################
|
182 |
#def trainieren_neu(name):
|
183 |
#Trainer zusammenstellen
|
184 |
print ("################################")
|
185 |
print ("trainer")
|
186 |
+
'''
|
187 |
trainer = Trainer(
|
188 |
model=model,
|
189 |
args=training_args,
|
|
|
192 |
#tokenizer=tokenizer,
|
193 |
compute_metrics=compute_metrics,
|
194 |
)
|
195 |
+
'''
|
196 |
|
197 |
###############################################
|
198 |
#Special QA Trainer...#
|
|
|
211 |
'''
|
212 |
#################################################
|
213 |
|
214 |
+
#################################################
|
215 |
+
# special Trainer Baize Model
|
216 |
+
# Parameters
|
217 |
+
MICRO_BATCH_SIZE = int(sys.argv[2])
|
218 |
+
BATCH_SIZE = 64
|
219 |
+
size = sys.argv[1]
|
220 |
+
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
|
221 |
+
EPOCHS = 1
|
222 |
+
LEARNING_RATE = float(sys.argv[3])
|
223 |
+
CUTOFF_LEN = 512
|
224 |
+
LORA_R = 8
|
225 |
+
LORA_ALPHA = 16
|
226 |
+
LORA_DROPOUT = 0.05
|
227 |
+
VAL_SET_SIZE = 2000
|
228 |
+
TARGET_MODULES = [
|
229 |
+
"q_proj",
|
230 |
+
"k_proj",
|
231 |
+
"v_proj",
|
232 |
+
"down_proj",
|
233 |
+
"gate_proj",
|
234 |
+
"up_proj",
|
235 |
+
]
|
236 |
+
#DATA_PATH = "data/data_tmp.json"
|
237 |
+
OUTPUT_DIR = "alexkueck/li-tis-tuned-2"
|
238 |
+
trainer = transformers.Trainer(
|
239 |
+
model=model,
|
240 |
+
train_dataset=lm_datasets["train"],
|
241 |
+
eval_dataset=lm_datasets["test"],
|
242 |
+
args=transformers.TrainingArguments(
|
243 |
+
output_dir="alexkueck/li-tis-tuned-2",
|
244 |
+
overwrite_output_dir = 'True',
|
245 |
+
per_device_train_batch_size=MICRO_BATCH_SIZE,
|
246 |
+
per_device_eval_batch_size=MICRO_BATCH_SIZE,
|
247 |
+
gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
|
248 |
+
warmup_steps=100,
|
249 |
+
num_train_epochs=EPOCHS,
|
250 |
+
learning_rate=LEARNING_RATE,
|
251 |
+
fp16=True,
|
252 |
+
logging_steps=20,
|
253 |
+
evaluation_strategy="steps" if VAL_SET_SIZE > 0 else "no",
|
254 |
+
save_strategy="steps",
|
255 |
+
eval_steps=200 if VAL_SET_SIZE > 0 else None,
|
256 |
+
save_steps=200,
|
257 |
+
save_total_limit=100,
|
258 |
+
load_best_model_at_end=True if VAL_SET_SIZE > 0 else False,
|
259 |
+
ddp_find_unused_parameters=False if ddp else None,
|
260 |
+
),
|
261 |
+
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
|
262 |
+
)
|
263 |
+
model.config.use_cache = False
|
264 |
+
|
265 |
+
|
266 |
+
|
267 |
|
268 |
#trainer ausführen
|
269 |
trainer.train()
|