amy-hyunji-lee commited on
Commit
4d983c3
·
verified ·
1 Parent(s): 14387c3

Add files using upload-large-folder tool

Browse files
Files changed (3) hide show
  1. config.yaml +12 -11
  2. model.pt +1 -1
  3. train.pt +2 -2
config.yaml CHANGED
@@ -1,4 +1,4 @@
1
- run_name: olmo-400M-base
2
  seed: 6198
3
  epoch: null
4
  dry_run: false
@@ -366,10 +366,10 @@ eval_interval: 2384
366
  tokenizer:
367
  identifier: tokenizers/allenai_gpt-neox-olmo-dolma-v1_5.json
368
  truncate_direction: right
369
- save_folder: /apdcephfs_sh2/share_300000800/user/kaixinma/amylee/fineweb-edu/workspace/olmo-400M-base
370
  remote_save_folder: null
371
  canceled_check_interval: 50
372
- save_interval: 2384
373
  save_interval_unsharded: 2384
374
  save_interval_ephemeral: null
375
  save_num_checkpoints_to_keep: 2
@@ -377,7 +377,7 @@ save_num_unsharded_checkpoints_to_keep: -1
377
  save_overwrite: true
378
  force_save_unsharded: false
379
  no_pre_train_checkpoint: false
380
- load_path: /apdcephfs_sh2/share_300000800/user/kaixinma/amylee/fineweb-edu/workspace/olmo-400M-base/step9536-unsharded
381
  load_path_sharded_checkpointer: null
382
  try_load_latest_save: false
383
  reset_optimizer_state: false
@@ -387,11 +387,11 @@ new_style_checkpoints: null
387
  max_duration: 1ep
388
  global_train_batch_size: 1024
389
  device_train_batch_size: 128
390
- device_train_microbatch_size: 16
391
- device_eval_batch_size: 16
392
  eval_subset_num_batches: -1
393
- eval_on_load: true
394
- device_train_grad_accum: 8
395
  max_grad_norm: 1.0
396
  max_grad_norm_ratio: null
397
  precision: amp_bf16
@@ -399,7 +399,7 @@ wandb:
399
  project: olmo-pretrain-ablation
400
  entity: alee6868
401
  group: null
402
- name: olmo-400M-base
403
  tags:
404
  - watching
405
  log_artifacts: false
@@ -438,10 +438,11 @@ inject_interval: null
438
  resus_portion: 1.0
439
  resus_ratio: 1.0
440
  data_shuffling: true
441
- KE_loss: false
442
  sum_CE_KE_loss: true
443
- lambda_ke_loss: null
444
  grad_ascent: false
445
  trainable_parameter: ''
 
446
  hf_datasets_cache_dir: null
447
  module_outputs_save_steps: null
 
1
+ run_name: olmo-400M-keloss_0.0015_base.23840
2
  seed: 6198
3
  epoch: null
4
  dry_run: false
 
366
  tokenizer:
367
  identifier: tokenizers/allenai_gpt-neox-olmo-dolma-v1_5.json
368
  truncate_direction: right
369
+ save_folder: /apdcephfs_sh2/share_300000800/user/kaixinma/amylee/fineweb-edu/workspace/olmo-400M-keloss_0.0015_base.23840
370
  remote_save_folder: null
371
  canceled_check_interval: 50
372
+ save_interval: 10
373
  save_interval_unsharded: 2384
374
  save_interval_ephemeral: null
375
  save_num_checkpoints_to_keep: 2
 
377
  save_overwrite: true
378
  force_save_unsharded: false
379
  no_pre_train_checkpoint: false
380
+ load_path: /apdcephfs_sh2/share_300000800/user/kaixinma/amylee/fineweb-edu/workspace/olmo-400M-base/step23840-unsharded
381
  load_path_sharded_checkpointer: null
382
  try_load_latest_save: false
383
  reset_optimizer_state: false
 
387
  max_duration: 1ep
388
  global_train_batch_size: 1024
389
  device_train_batch_size: 128
390
+ device_train_microbatch_size: 4
391
+ device_eval_batch_size: 4
392
  eval_subset_num_batches: -1
393
+ eval_on_load: false
394
+ device_train_grad_accum: 32
395
  max_grad_norm: 1.0
396
  max_grad_norm_ratio: null
397
  precision: amp_bf16
 
399
  project: olmo-pretrain-ablation
400
  entity: alee6868
401
  group: null
402
+ name: olmo-400M-keloss_0.0015_base.23840
403
  tags:
404
  - watching
405
  log_artifacts: false
 
438
  resus_portion: 1.0
439
  resus_ratio: 1.0
440
  data_shuffling: true
441
+ KE_loss: true
442
  sum_CE_KE_loss: true
443
+ lambda_ke_loss: 0.0015
444
  grad_ascent: false
445
  trainable_parameter: ''
446
+ name_value: 0
447
  hf_datasets_cache_dir: null
448
  module_outputs_save_steps: null
model.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:baa07c0cc94018b964838804f08f870aff022b78780e4d8682d146240be29a4d
3
  size 1754478590
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da22665aa908a8f73177c027c4172164c0102ca2f4940974adbb4f5b629ef8c4
3
  size 1754478590
train.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:413241207beb41cdb169d4c41aa66d66b93e9943539a2b355cad15b95314ec51
3
- size 15500
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c3837341016560f4ea86049487da46847d61926b2e0d61e0d811fd880d583a5
3
+ size 14988