amy-hyunji-lee
commited on
Add files using upload-large-folder tool
Browse files- config.yaml +12 -11
- model.pt +1 -1
- train.pt +2 -2
config.yaml
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
run_name: olmo-400M-
|
2 |
seed: 6198
|
3 |
epoch: null
|
4 |
dry_run: false
|
@@ -366,10 +366,10 @@ eval_interval: 2384
|
|
366 |
tokenizer:
|
367 |
identifier: tokenizers/allenai_gpt-neox-olmo-dolma-v1_5.json
|
368 |
truncate_direction: right
|
369 |
-
save_folder: /apdcephfs_sh2/share_300000800/user/kaixinma/amylee/fineweb-edu/workspace/olmo-400M-
|
370 |
remote_save_folder: null
|
371 |
canceled_check_interval: 50
|
372 |
-
save_interval:
|
373 |
save_interval_unsharded: 2384
|
374 |
save_interval_ephemeral: null
|
375 |
save_num_checkpoints_to_keep: 2
|
@@ -377,7 +377,7 @@ save_num_unsharded_checkpoints_to_keep: -1
|
|
377 |
save_overwrite: true
|
378 |
force_save_unsharded: false
|
379 |
no_pre_train_checkpoint: false
|
380 |
-
load_path: /apdcephfs_sh2/share_300000800/user/kaixinma/amylee/fineweb-edu/workspace/olmo-400M-base/
|
381 |
load_path_sharded_checkpointer: null
|
382 |
try_load_latest_save: false
|
383 |
reset_optimizer_state: false
|
@@ -387,11 +387,11 @@ new_style_checkpoints: null
|
|
387 |
max_duration: 1ep
|
388 |
global_train_batch_size: 1024
|
389 |
device_train_batch_size: 128
|
390 |
-
device_train_microbatch_size:
|
391 |
-
device_eval_batch_size:
|
392 |
eval_subset_num_batches: -1
|
393 |
-
eval_on_load:
|
394 |
-
device_train_grad_accum:
|
395 |
max_grad_norm: 1.0
|
396 |
max_grad_norm_ratio: null
|
397 |
precision: amp_bf16
|
@@ -399,7 +399,7 @@ wandb:
|
|
399 |
project: olmo-pretrain-ablation
|
400 |
entity: alee6868
|
401 |
group: null
|
402 |
-
name: olmo-400M-
|
403 |
tags:
|
404 |
- watching
|
405 |
log_artifacts: false
|
@@ -438,10 +438,11 @@ inject_interval: null
|
|
438 |
resus_portion: 1.0
|
439 |
resus_ratio: 1.0
|
440 |
data_shuffling: true
|
441 |
-
KE_loss:
|
442 |
sum_CE_KE_loss: true
|
443 |
-
lambda_ke_loss:
|
444 |
grad_ascent: false
|
445 |
trainable_parameter: ''
|
|
|
446 |
hf_datasets_cache_dir: null
|
447 |
module_outputs_save_steps: null
|
|
|
1 |
+
run_name: olmo-400M-keloss_0.0015_base.23840
|
2 |
seed: 6198
|
3 |
epoch: null
|
4 |
dry_run: false
|
|
|
366 |
tokenizer:
|
367 |
identifier: tokenizers/allenai_gpt-neox-olmo-dolma-v1_5.json
|
368 |
truncate_direction: right
|
369 |
+
save_folder: /apdcephfs_sh2/share_300000800/user/kaixinma/amylee/fineweb-edu/workspace/olmo-400M-keloss_0.0015_base.23840
|
370 |
remote_save_folder: null
|
371 |
canceled_check_interval: 50
|
372 |
+
save_interval: 10
|
373 |
save_interval_unsharded: 2384
|
374 |
save_interval_ephemeral: null
|
375 |
save_num_checkpoints_to_keep: 2
|
|
|
377 |
save_overwrite: true
|
378 |
force_save_unsharded: false
|
379 |
no_pre_train_checkpoint: false
|
380 |
+
load_path: /apdcephfs_sh2/share_300000800/user/kaixinma/amylee/fineweb-edu/workspace/olmo-400M-base/step23840-unsharded
|
381 |
load_path_sharded_checkpointer: null
|
382 |
try_load_latest_save: false
|
383 |
reset_optimizer_state: false
|
|
|
387 |
max_duration: 1ep
|
388 |
global_train_batch_size: 1024
|
389 |
device_train_batch_size: 128
|
390 |
+
device_train_microbatch_size: 4
|
391 |
+
device_eval_batch_size: 4
|
392 |
eval_subset_num_batches: -1
|
393 |
+
eval_on_load: false
|
394 |
+
device_train_grad_accum: 32
|
395 |
max_grad_norm: 1.0
|
396 |
max_grad_norm_ratio: null
|
397 |
precision: amp_bf16
|
|
|
399 |
project: olmo-pretrain-ablation
|
400 |
entity: alee6868
|
401 |
group: null
|
402 |
+
name: olmo-400M-keloss_0.0015_base.23840
|
403 |
tags:
|
404 |
- watching
|
405 |
log_artifacts: false
|
|
|
438 |
resus_portion: 1.0
|
439 |
resus_ratio: 1.0
|
440 |
data_shuffling: true
|
441 |
+
KE_loss: true
|
442 |
sum_CE_KE_loss: true
|
443 |
+
lambda_ke_loss: 0.0015
|
444 |
grad_ascent: false
|
445 |
trainable_parameter: ''
|
446 |
+
name_value: 0
|
447 |
hf_datasets_cache_dir: null
|
448 |
module_outputs_save_steps: null
|
model.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1754478590
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da22665aa908a8f73177c027c4172164c0102ca2f4940974adbb4f5b629ef8c4
|
3 |
size 1754478590
|
train.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c3837341016560f4ea86049487da46847d61926b2e0d61e0d811fd880d583a5
|
3 |
+
size 14988
|