plip commited on
Commit
dccc613
1 Parent(s): 5d192d4

Training in progress, step 400000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1df67f6183cc09f42fe2f0f37f8357c9b8e65f61395be8418801628a4f2e406
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0052ea96d09049ac49c449aedeb504aaa0702f05c2816a11a3a3ddaec5b81730
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a319b7f2b744152a6d0b7c7b011dcbab2f50e31847aad07fbf0452468b5c5506
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c24a6ee4938298e902a6aac60f7e4dbd5863d20fb13bdb77f169f65d5ebb0f24
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b801257833dcb303ed75576841dd61289770bb2e540d74ef6ab937039253da2
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e807b27cec95286b3bd60cbc83a1b68ff438706d30976e63e3494b79245fec8
3
+ size 14567
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6243d1d6e37c5feee8bc48e8b6c7c5cd2d209aa34c8943a605523b56be5d5e4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfa21a62b639444b92fd71394cab9bd093866e1e95bfe1d8e5b6de28be79d78c
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1abc03aa32f692919d10687fb19b346a26a86c47bfcdc5586f5937683d9a753d
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b36797d3e65ccb749a8a1d0f61a264827cdf623c6e7863e3dfa8e9c3447a0a8
3
+ size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05b257bebecd01fe3120913d4e730e44697707b0fb5a5618a441dd1149c501fa
3
  size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0003365be88deed59bbfa00e162d3630d1e08211c08c414200b29c59541ecb7c
3
  size 14439
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:593b188cbaecd147825c6ab6a7428985fef9dedd306035b01be6e3046807b5ca
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b337da7a0b88d9b474d84be36e393eab1c1ad113d73339c380d95ef63a86215
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab242969f427542b52610398ae9f7ae92f28c7f51c41a80b11d327fe5508415b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66dcedf6f85c5051262e76eedab8ba3cfcd6591d68fb4105fa9e6200b9ada3de
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d23f4669a6b401e22fd0094f76b78d30e5af5448c611b04bb84563f4723f22b5
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80c55158bbb5df46a8134172c7b9fd2596d23ab49cc5ead3b21e0f52b5035ea2
3
+ size 14439
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c94a5e21ff6cb02edd9d7611a60f73f17f810262fe1570be2488fda0b577fd53
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf0f19d16897e4a2b672fed3e2c7deef05b6f1ddc375a14dbbca90f09eb7f307
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:864647684ab3694f7aa2a258c1806e10c4abf99f67ed5e54443050e485ac9436
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba83cbca80cb672828600b248dd69c4c050beb355cdcf7faf0b56212421edca
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.948895493121725,
5
- "global_step": 390000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7806,11 +7806,211 @@
7806
  "eval_samples_per_second": 1955.764,
7807
  "eval_steps_per_second": 31.292,
7808
  "step": 390000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7809
  }
7810
  ],
7811
  "max_steps": 500000,
7812
  "num_train_epochs": 16,
7813
- "total_flos": 1.2459910544733766e+22,
7814
  "trial_name": null,
7815
  "trial_params": null
7816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.255277428842795,
5
+ "global_step": 400000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7806
  "eval_samples_per_second": 1955.764,
7807
  "eval_steps_per_second": 31.292,
7808
  "step": 390000
7809
+ },
7810
+ {
7811
+ "epoch": 11.96,
7812
+ "learning_rate": 4.639259509788768e-05,
7813
+ "loss": 0.3189,
7814
+ "step": 390500
7815
+ },
7816
+ {
7817
+ "epoch": 11.98,
7818
+ "learning_rate": 4.60754920716572e-05,
7819
+ "loss": 0.3193,
7820
+ "step": 391000
7821
+ },
7822
+ {
7823
+ "epoch": 11.98,
7824
+ "eval_loss": 0.7768589854240417,
7825
+ "eval_runtime": 0.4937,
7826
+ "eval_samples_per_second": 2025.701,
7827
+ "eval_steps_per_second": 32.411,
7828
+ "step": 391000
7829
+ },
7830
+ {
7831
+ "epoch": 11.99,
7832
+ "learning_rate": 4.5759580226394167e-05,
7833
+ "loss": 0.3191,
7834
+ "step": 391500
7835
+ },
7836
+ {
7837
+ "epoch": 12.01,
7838
+ "learning_rate": 4.544486301685993e-05,
7839
+ "loss": 0.3191,
7840
+ "step": 392000
7841
+ },
7842
+ {
7843
+ "epoch": 12.01,
7844
+ "eval_loss": 0.7793305516242981,
7845
+ "eval_runtime": 0.4963,
7846
+ "eval_samples_per_second": 2015.052,
7847
+ "eval_steps_per_second": 32.241,
7848
+ "step": 392000
7849
+ },
7850
+ {
7851
+ "epoch": 12.03,
7852
+ "learning_rate": 4.5131343884751484e-05,
7853
+ "loss": 0.3189,
7854
+ "step": 392500
7855
+ },
7856
+ {
7857
+ "epoch": 12.04,
7858
+ "learning_rate": 4.4819026258663774e-05,
7859
+ "loss": 0.3188,
7860
+ "step": 393000
7861
+ },
7862
+ {
7863
+ "epoch": 12.04,
7864
+ "eval_loss": 0.7801252007484436,
7865
+ "eval_runtime": 0.5034,
7866
+ "eval_samples_per_second": 1986.635,
7867
+ "eval_steps_per_second": 31.786,
7868
+ "step": 393000
7869
+ },
7870
+ {
7871
+ "epoch": 12.06,
7872
+ "learning_rate": 4.450791355405234e-05,
7873
+ "loss": 0.3188,
7874
+ "step": 393500
7875
+ },
7876
+ {
7877
+ "epoch": 12.07,
7878
+ "learning_rate": 4.419800917319588e-05,
7879
+ "loss": 0.3188,
7880
+ "step": 394000
7881
+ },
7882
+ {
7883
+ "epoch": 12.07,
7884
+ "eval_loss": 0.7790648937225342,
7885
+ "eval_runtime": 0.5072,
7886
+ "eval_samples_per_second": 1971.795,
7887
+ "eval_steps_per_second": 31.549,
7888
+ "step": 394000
7889
+ },
7890
+ {
7891
+ "epoch": 12.09,
7892
+ "learning_rate": 4.3889316505159056e-05,
7893
+ "loss": 0.3185,
7894
+ "step": 394500
7895
+ },
7896
+ {
7897
+ "epoch": 12.1,
7898
+ "learning_rate": 4.3581838925755465e-05,
7899
+ "loss": 0.3187,
7900
+ "step": 395000
7901
+ },
7902
+ {
7903
+ "epoch": 12.1,
7904
+ "eval_loss": 0.7797361016273499,
7905
+ "eval_runtime": 0.4981,
7906
+ "eval_samples_per_second": 2007.807,
7907
+ "eval_steps_per_second": 32.125,
7908
+ "step": 395000
7909
+ },
7910
+ {
7911
+ "epoch": 12.12,
7912
+ "learning_rate": 4.327557979751057e-05,
7913
+ "loss": 0.3187,
7914
+ "step": 395500
7915
+ },
7916
+ {
7917
+ "epoch": 12.13,
7918
+ "learning_rate": 4.297054246962517e-05,
7919
+ "loss": 0.3186,
7920
+ "step": 396000
7921
+ },
7922
+ {
7923
+ "epoch": 12.13,
7924
+ "eval_loss": 0.7804706692695618,
7925
+ "eval_runtime": 0.5208,
7926
+ "eval_samples_per_second": 1920.29,
7927
+ "eval_steps_per_second": 30.725,
7928
+ "step": 396000
7929
+ },
7930
+ {
7931
+ "epoch": 12.15,
7932
+ "learning_rate": 4.266673027793864e-05,
7933
+ "loss": 0.3184,
7934
+ "step": 396500
7935
+ },
7936
+ {
7937
+ "epoch": 12.16,
7938
+ "learning_rate": 4.236414654489242e-05,
7939
+ "loss": 0.3185,
7940
+ "step": 397000
7941
+ },
7942
+ {
7943
+ "epoch": 12.16,
7944
+ "eval_loss": 0.7770272493362427,
7945
+ "eval_runtime": 0.5197,
7946
+ "eval_samples_per_second": 1924.081,
7947
+ "eval_steps_per_second": 30.785,
7948
+ "step": 397000
7949
+ },
7950
+ {
7951
+ "epoch": 12.18,
7952
+ "learning_rate": 4.206279457949371e-05,
7953
+ "loss": 0.3183,
7954
+ "step": 397500
7955
+ },
7956
+ {
7957
+ "epoch": 12.19,
7958
+ "learning_rate": 4.1762677677279335e-05,
7959
+ "loss": 0.3185,
7960
+ "step": 398000
7961
+ },
7962
+ {
7963
+ "epoch": 12.19,
7964
+ "eval_loss": 0.7785659432411194,
7965
+ "eval_runtime": 0.5333,
7966
+ "eval_samples_per_second": 1875.117,
7967
+ "eval_steps_per_second": 30.002,
7968
+ "step": 398000
7969
+ },
7970
+ {
7971
+ "epoch": 12.21,
7972
+ "learning_rate": 4.146379912027964e-05,
7973
+ "loss": 0.3184,
7974
+ "step": 398500
7975
+ },
7976
+ {
7977
+ "epoch": 12.22,
7978
+ "learning_rate": 4.1166162176982664e-05,
7979
+ "loss": 0.3187,
7980
+ "step": 399000
7981
+ },
7982
+ {
7983
+ "epoch": 12.22,
7984
+ "eval_loss": 0.7719516754150391,
7985
+ "eval_runtime": 0.5091,
7986
+ "eval_samples_per_second": 1964.072,
7987
+ "eval_steps_per_second": 31.425,
7988
+ "step": 399000
7989
+ },
7990
+ {
7991
+ "epoch": 12.24,
7992
+ "learning_rate": 4.086977010229838e-05,
7993
+ "loss": 0.3182,
7994
+ "step": 399500
7995
+ },
7996
+ {
7997
+ "epoch": 12.26,
7998
+ "learning_rate": 4.057462613752294e-05,
7999
+ "loss": 0.3181,
8000
+ "step": 400000
8001
+ },
8002
+ {
8003
+ "epoch": 12.26,
8004
+ "eval_loss": 0.7777762413024902,
8005
+ "eval_runtime": 0.5134,
8006
+ "eval_samples_per_second": 1947.706,
8007
+ "eval_steps_per_second": 31.163,
8008
+ "step": 400000
8009
  }
8010
  ],
8011
  "max_steps": 500000,
8012
  "num_train_epochs": 16,
8013
+ "total_flos": 1.277938969555629e+22,
8014
  "trial_name": null,
8015
  "trial_params": null
8016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a319b7f2b744152a6d0b7c7b011dcbab2f50e31847aad07fbf0452468b5c5506
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c24a6ee4938298e902a6aac60f7e4dbd5863d20fb13bdb77f169f65d5ebb0f24
3
  size 102501541