plip commited on
Commit
695a3fd
1 Parent(s): 556424a

Training in progress, step 400000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:016da91c7614d6a57ffcc5938a0b68675bc967aee110a7fe186334a10cf0dffd
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a07b83dacbaddaed6e5cad0c80115438e4840da7d3099679de039cec028aef36
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da03f29f9d43fcaa0d12888b54defcd8a6a1be294c2c1e7d74429358a9688082
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aebaf503b44bc5d1dd314e475075123bf9e2c562de76e39616ae5b46cfba21a4
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c4ee1f5ea05fd5b5af4e685b13d34537bf6d8bfafcf82bbf12ab3c078f9c700
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae0c5aa717c3a9161f73c918d736998d943d82bab8038706d83985bab7118cb4
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c4ee1f5ea05fd5b5af4e685b13d34537bf6d8bfafcf82bbf12ab3c078f9c700
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae0c5aa717c3a9161f73c918d736998d943d82bab8038706d83985bab7118cb4
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c4ee1f5ea05fd5b5af4e685b13d34537bf6d8bfafcf82bbf12ab3c078f9c700
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae0c5aa717c3a9161f73c918d736998d943d82bab8038706d83985bab7118cb4
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c4ee1f5ea05fd5b5af4e685b13d34537bf6d8bfafcf82bbf12ab3c078f9c700
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae0c5aa717c3a9161f73c918d736998d943d82bab8038706d83985bab7118cb4
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c4ee1f5ea05fd5b5af4e685b13d34537bf6d8bfafcf82bbf12ab3c078f9c700
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae0c5aa717c3a9161f73c918d736998d943d82bab8038706d83985bab7118cb4
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c4ee1f5ea05fd5b5af4e685b13d34537bf6d8bfafcf82bbf12ab3c078f9c700
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae0c5aa717c3a9161f73c918d736998d943d82bab8038706d83985bab7118cb4
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c4ee1f5ea05fd5b5af4e685b13d34537bf6d8bfafcf82bbf12ab3c078f9c700
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae0c5aa717c3a9161f73c918d736998d943d82bab8038706d83985bab7118cb4
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c4ee1f5ea05fd5b5af4e685b13d34537bf6d8bfafcf82bbf12ab3c078f9c700
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae0c5aa717c3a9161f73c918d736998d943d82bab8038706d83985bab7118cb4
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:864647684ab3694f7aa2a258c1806e10c4abf99f67ed5e54443050e485ac9436
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba83cbca80cb672828600b248dd69c4c050beb355cdcf7faf0b56212421edca
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.939344512972118,
5
- "global_step": 390000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7806,11 +7806,211 @@
7806
  "eval_samples_per_second": 761.14,
7807
  "eval_steps_per_second": 12.178,
7808
  "step": 390000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7809
  }
7810
  ],
7811
  "max_steps": 500000,
7812
  "num_train_epochs": 13,
7813
- "total_flos": 1.2459877101988095e+22,
7814
  "trial_name": null,
7815
  "trial_params": null
7816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.194199500484224,
5
+ "global_step": 400000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7806
  "eval_samples_per_second": 761.14,
7807
  "eval_steps_per_second": 12.178,
7808
  "step": 390000
7809
+ },
7810
+ {
7811
+ "epoch": 9.95,
7812
+ "learning_rate": 4.639259509788768e-05,
7813
+ "loss": 0.2727,
7814
+ "step": 390500
7815
+ },
7816
+ {
7817
+ "epoch": 9.96,
7818
+ "learning_rate": 4.60754920716572e-05,
7819
+ "loss": 0.2727,
7820
+ "step": 391000
7821
+ },
7822
+ {
7823
+ "epoch": 9.96,
7824
+ "eval_loss": 0.8076984882354736,
7825
+ "eval_runtime": 1.3566,
7826
+ "eval_samples_per_second": 737.152,
7827
+ "eval_steps_per_second": 11.794,
7828
+ "step": 391000
7829
+ },
7830
+ {
7831
+ "epoch": 9.98,
7832
+ "learning_rate": 4.5759580226394167e-05,
7833
+ "loss": 0.2727,
7834
+ "step": 391500
7835
+ },
7836
+ {
7837
+ "epoch": 9.99,
7838
+ "learning_rate": 4.544486301685993e-05,
7839
+ "loss": 0.2726,
7840
+ "step": 392000
7841
+ },
7842
+ {
7843
+ "epoch": 9.99,
7844
+ "eval_loss": 0.8174979090690613,
7845
+ "eval_runtime": 1.2981,
7846
+ "eval_samples_per_second": 770.339,
7847
+ "eval_steps_per_second": 12.325,
7848
+ "step": 392000
7849
+ },
7850
+ {
7851
+ "epoch": 10.0,
7852
+ "learning_rate": 4.5131343884751484e-05,
7853
+ "loss": 0.2723,
7854
+ "step": 392500
7855
+ },
7856
+ {
7857
+ "epoch": 10.02,
7858
+ "learning_rate": 4.4819026258663774e-05,
7859
+ "loss": 0.2722,
7860
+ "step": 393000
7861
+ },
7862
+ {
7863
+ "epoch": 10.02,
7864
+ "eval_loss": 0.8073344826698303,
7865
+ "eval_runtime": 1.3446,
7866
+ "eval_samples_per_second": 743.69,
7867
+ "eval_steps_per_second": 11.899,
7868
+ "step": 393000
7869
+ },
7870
+ {
7871
+ "epoch": 10.03,
7872
+ "learning_rate": 4.450791355405234e-05,
7873
+ "loss": 0.2722,
7874
+ "step": 393500
7875
+ },
7876
+ {
7877
+ "epoch": 10.04,
7878
+ "learning_rate": 4.419800917319588e-05,
7879
+ "loss": 0.2725,
7880
+ "step": 394000
7881
+ },
7882
+ {
7883
+ "epoch": 10.04,
7884
+ "eval_loss": 0.8089223504066467,
7885
+ "eval_runtime": 1.4579,
7886
+ "eval_samples_per_second": 685.895,
7887
+ "eval_steps_per_second": 10.974,
7888
+ "step": 394000
7889
+ },
7890
+ {
7891
+ "epoch": 10.05,
7892
+ "learning_rate": 4.3889316505159056e-05,
7893
+ "loss": 0.2723,
7894
+ "step": 394500
7895
+ },
7896
+ {
7897
+ "epoch": 10.07,
7898
+ "learning_rate": 4.3581838925755465e-05,
7899
+ "loss": 0.2721,
7900
+ "step": 395000
7901
+ },
7902
+ {
7903
+ "epoch": 10.07,
7904
+ "eval_loss": 0.8180590867996216,
7905
+ "eval_runtime": 1.4056,
7906
+ "eval_samples_per_second": 711.416,
7907
+ "eval_steps_per_second": 11.383,
7908
+ "step": 395000
7909
+ },
7910
+ {
7911
+ "epoch": 10.08,
7912
+ "learning_rate": 4.327557979751057e-05,
7913
+ "loss": 0.2722,
7914
+ "step": 395500
7915
+ },
7916
+ {
7917
+ "epoch": 10.09,
7918
+ "learning_rate": 4.297054246962517e-05,
7919
+ "loss": 0.2722,
7920
+ "step": 396000
7921
+ },
7922
+ {
7923
+ "epoch": 10.09,
7924
+ "eval_loss": 0.8067134022712708,
7925
+ "eval_runtime": 1.3816,
7926
+ "eval_samples_per_second": 723.788,
7927
+ "eval_steps_per_second": 11.581,
7928
+ "step": 396000
7929
+ },
7930
+ {
7931
+ "epoch": 10.11,
7932
+ "learning_rate": 4.266673027793864e-05,
7933
+ "loss": 0.2717,
7934
+ "step": 396500
7935
+ },
7936
+ {
7937
+ "epoch": 10.12,
7938
+ "learning_rate": 4.236414654489242e-05,
7939
+ "loss": 0.2721,
7940
+ "step": 397000
7941
+ },
7942
+ {
7943
+ "epoch": 10.12,
7944
+ "eval_loss": 0.8154900074005127,
7945
+ "eval_runtime": 1.3572,
7946
+ "eval_samples_per_second": 736.809,
7947
+ "eval_steps_per_second": 11.789,
7948
+ "step": 397000
7949
+ },
7950
+ {
7951
+ "epoch": 10.13,
7952
+ "learning_rate": 4.206279457949371e-05,
7953
+ "loss": 0.272,
7954
+ "step": 397500
7955
+ },
7956
+ {
7957
+ "epoch": 10.14,
7958
+ "learning_rate": 4.1762677677279335e-05,
7959
+ "loss": 0.2718,
7960
+ "step": 398000
7961
+ },
7962
+ {
7963
+ "epoch": 10.14,
7964
+ "eval_loss": 0.814974844455719,
7965
+ "eval_runtime": 1.3197,
7966
+ "eval_samples_per_second": 757.75,
7967
+ "eval_steps_per_second": 12.124,
7968
+ "step": 398000
7969
+ },
7970
+ {
7971
+ "epoch": 10.16,
7972
+ "learning_rate": 4.146379912027964e-05,
7973
+ "loss": 0.2715,
7974
+ "step": 398500
7975
+ },
7976
+ {
7977
+ "epoch": 10.17,
7978
+ "learning_rate": 4.1166162176982664e-05,
7979
+ "loss": 0.272,
7980
+ "step": 399000
7981
+ },
7982
+ {
7983
+ "epoch": 10.17,
7984
+ "eval_loss": 0.8131051063537598,
7985
+ "eval_runtime": 1.3374,
7986
+ "eval_samples_per_second": 747.697,
7987
+ "eval_steps_per_second": 11.963,
7988
+ "step": 399000
7989
+ },
7990
+ {
7991
+ "epoch": 10.18,
7992
+ "learning_rate": 4.086977010229838e-05,
7993
+ "loss": 0.2717,
7994
+ "step": 399500
7995
+ },
7996
+ {
7997
+ "epoch": 10.19,
7998
+ "learning_rate": 4.057462613752294e-05,
7999
+ "loss": 0.2721,
8000
+ "step": 400000
8001
+ },
8002
+ {
8003
+ "epoch": 10.19,
8004
+ "eval_loss": 0.8092121481895447,
8005
+ "eval_runtime": 1.3464,
8006
+ "eval_samples_per_second": 742.696,
8007
+ "eval_steps_per_second": 11.883,
8008
+ "step": 400000
8009
  }
8010
  ],
8011
  "max_steps": 500000,
8012
  "num_train_epochs": 13,
8013
+ "total_flos": 1.277935076184955e+22,
8014
  "trial_name": null,
8015
  "trial_params": null
8016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da03f29f9d43fcaa0d12888b54defcd8a6a1be294c2c1e7d74429358a9688082
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aebaf503b44bc5d1dd314e475075123bf9e2c562de76e39616ae5b46cfba21a4
3
  size 102501541