Femboyuwu2000 commited on
Commit
6940d52
1 Parent(s): 0104963

Training in progress, step 8900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c29d62447db5ea142b6e63446fb0723995c9371071fb12f07c5dbfcd273e66f
3
  size 13982248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:630a910b2d5b8c9504add11b070a2b95445874621f6de2ea3077628d0553ffe3
3
  size 13982248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ac9456df1bbb624ec6b29c344fddfbe6e0ff7895487589b37e69161278d0602
3
  size 7062522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da3ec0e6441f43d24431895b711504845f578f35580f2b0f333b91a363d641c1
3
  size 7062522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2387e9a9fd0494921fc8ca09e34d3a00b5294dbcdd3c6d17ee624f943511e33
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e3f344cbf26501e998b8de868d6d509ad3fe3171ec390a602388814ddd9df38
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b1bd4ceabb39cc61ab83f832a18b2856f5381545b694264fd51377760218275
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dde4a2f54c5119ed26c871b2368383ec34337eb67d04dfb138caacf14a881ef
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6672,
5
  "eval_steps": 500,
6
- "global_step": 8340,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2926,6 +2926,202 @@
2926
  "learning_rate": 1.9681434114209617e-05,
2927
  "loss": 3.4415,
2928
  "step": 8340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2929
  }
2930
  ],
2931
  "logging_steps": 20,
@@ -2933,7 +3129,7 @@
2933
  "num_input_tokens_seen": 0,
2934
  "num_train_epochs": 2,
2935
  "save_steps": 20,
2936
- "total_flos": 1.9720585420898304e+16,
2937
  "train_batch_size": 8,
2938
  "trial_name": null,
2939
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.712,
5
  "eval_steps": 500,
6
+ "global_step": 8900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2926
  "learning_rate": 1.9681434114209617e-05,
2927
  "loss": 3.4415,
2928
  "step": 8340
2929
+ },
2930
+ {
2931
+ "epoch": 0.67,
2932
+ "grad_norm": 30.5143985748291,
2933
+ "learning_rate": 1.963525491562421e-05,
2934
+ "loss": 3.4806,
2935
+ "step": 8360
2936
+ },
2937
+ {
2938
+ "epoch": 0.67,
2939
+ "grad_norm": 27.770360946655273,
2940
+ "learning_rate": 1.958902709540811e-05,
2941
+ "loss": 3.4379,
2942
+ "step": 8380
2943
+ },
2944
+ {
2945
+ "epoch": 0.67,
2946
+ "grad_norm": 24.266944885253906,
2947
+ "learning_rate": 1.954275113846926e-05,
2948
+ "loss": 3.4933,
2949
+ "step": 8400
2950
+ },
2951
+ {
2952
+ "epoch": 0.67,
2953
+ "grad_norm": 43.798301696777344,
2954
+ "learning_rate": 1.9496427530220567e-05,
2955
+ "loss": 3.4107,
2956
+ "step": 8420
2957
+ },
2958
+ {
2959
+ "epoch": 0.68,
2960
+ "grad_norm": 32.54145431518555,
2961
+ "learning_rate": 1.9450056756574753e-05,
2962
+ "loss": 3.507,
2963
+ "step": 8440
2964
+ },
2965
+ {
2966
+ "epoch": 0.68,
2967
+ "grad_norm": 29.06185531616211,
2968
+ "learning_rate": 1.9403639303939293e-05,
2969
+ "loss": 3.4434,
2970
+ "step": 8460
2971
+ },
2972
+ {
2973
+ "epoch": 0.68,
2974
+ "grad_norm": 26.419170379638672,
2975
+ "learning_rate": 1.93571756592113e-05,
2976
+ "loss": 3.4684,
2977
+ "step": 8480
2978
+ },
2979
+ {
2980
+ "epoch": 0.68,
2981
+ "grad_norm": 31.831510543823242,
2982
+ "learning_rate": 1.9310666309772426e-05,
2983
+ "loss": 3.4565,
2984
+ "step": 8500
2985
+ },
2986
+ {
2987
+ "epoch": 0.68,
2988
+ "grad_norm": 33.18935775756836,
2989
+ "learning_rate": 1.926411174348373e-05,
2990
+ "loss": 3.4568,
2991
+ "step": 8520
2992
+ },
2993
+ {
2994
+ "epoch": 0.68,
2995
+ "grad_norm": 25.60289192199707,
2996
+ "learning_rate": 1.9217512448680586e-05,
2997
+ "loss": 3.3513,
2998
+ "step": 8540
2999
+ },
3000
+ {
3001
+ "epoch": 0.68,
3002
+ "grad_norm": 27.03973960876465,
3003
+ "learning_rate": 1.917086891416755e-05,
3004
+ "loss": 3.4551,
3005
+ "step": 8560
3006
+ },
3007
+ {
3008
+ "epoch": 0.69,
3009
+ "grad_norm": 39.20319366455078,
3010
+ "learning_rate": 1.9124181629213228e-05,
3011
+ "loss": 3.4217,
3012
+ "step": 8580
3013
+ },
3014
+ {
3015
+ "epoch": 0.69,
3016
+ "grad_norm": 25.287826538085938,
3017
+ "learning_rate": 1.9077451083545144e-05,
3018
+ "loss": 3.4172,
3019
+ "step": 8600
3020
+ },
3021
+ {
3022
+ "epoch": 0.69,
3023
+ "grad_norm": 51.33893585205078,
3024
+ "learning_rate": 1.903067776734461e-05,
3025
+ "loss": 3.4487,
3026
+ "step": 8620
3027
+ },
3028
+ {
3029
+ "epoch": 0.69,
3030
+ "grad_norm": 23.41849708557129,
3031
+ "learning_rate": 1.8983862171241577e-05,
3032
+ "loss": 3.4675,
3033
+ "step": 8640
3034
+ },
3035
+ {
3036
+ "epoch": 0.69,
3037
+ "grad_norm": 37.373104095458984,
3038
+ "learning_rate": 1.8937004786309504e-05,
3039
+ "loss": 3.4782,
3040
+ "step": 8660
3041
+ },
3042
+ {
3043
+ "epoch": 0.69,
3044
+ "grad_norm": 26.743480682373047,
3045
+ "learning_rate": 1.8890106104060177e-05,
3046
+ "loss": 3.432,
3047
+ "step": 8680
3048
+ },
3049
+ {
3050
+ "epoch": 0.7,
3051
+ "grad_norm": 39.051124572753906,
3052
+ "learning_rate": 1.8843166616438585e-05,
3053
+ "loss": 3.4937,
3054
+ "step": 8700
3055
+ },
3056
+ {
3057
+ "epoch": 0.7,
3058
+ "grad_norm": 32.51453399658203,
3059
+ "learning_rate": 1.8796186815817743e-05,
3060
+ "loss": 3.4618,
3061
+ "step": 8720
3062
+ },
3063
+ {
3064
+ "epoch": 0.7,
3065
+ "grad_norm": 22.23604393005371,
3066
+ "learning_rate": 1.874916719499353e-05,
3067
+ "loss": 3.5259,
3068
+ "step": 8740
3069
+ },
3070
+ {
3071
+ "epoch": 0.7,
3072
+ "grad_norm": 27.168733596801758,
3073
+ "learning_rate": 1.8702108247179512e-05,
3074
+ "loss": 3.4829,
3075
+ "step": 8760
3076
+ },
3077
+ {
3078
+ "epoch": 0.7,
3079
+ "grad_norm": 28.35675621032715,
3080
+ "learning_rate": 1.8655010466001794e-05,
3081
+ "loss": 3.4133,
3082
+ "step": 8780
3083
+ },
3084
+ {
3085
+ "epoch": 0.7,
3086
+ "grad_norm": 30.800825119018555,
3087
+ "learning_rate": 1.8607874345493806e-05,
3088
+ "loss": 3.4527,
3089
+ "step": 8800
3090
+ },
3091
+ {
3092
+ "epoch": 0.71,
3093
+ "grad_norm": 26.275304794311523,
3094
+ "learning_rate": 1.856070038009115e-05,
3095
+ "loss": 3.4386,
3096
+ "step": 8820
3097
+ },
3098
+ {
3099
+ "epoch": 0.71,
3100
+ "grad_norm": 27.828401565551758,
3101
+ "learning_rate": 1.85134890646264e-05,
3102
+ "loss": 3.4806,
3103
+ "step": 8840
3104
+ },
3105
+ {
3106
+ "epoch": 0.71,
3107
+ "grad_norm": 31.448450088500977,
3108
+ "learning_rate": 1.846624089432392e-05,
3109
+ "loss": 3.4659,
3110
+ "step": 8860
3111
+ },
3112
+ {
3113
+ "epoch": 0.71,
3114
+ "grad_norm": 30.396820068359375,
3115
+ "learning_rate": 1.8418956364794655e-05,
3116
+ "loss": 3.4717,
3117
+ "step": 8880
3118
+ },
3119
+ {
3120
+ "epoch": 0.71,
3121
+ "grad_norm": 65.90203857421875,
3122
+ "learning_rate": 1.8371635972030942e-05,
3123
+ "loss": 3.4938,
3124
+ "step": 8900
3125
  }
3126
  ],
3127
  "logging_steps": 20,
 
3129
  "num_input_tokens_seen": 0,
3130
  "num_train_epochs": 2,
3131
  "save_steps": 20,
3132
+ "total_flos": 2.103984238922957e+16,
3133
  "train_batch_size": 8,
3134
  "trial_name": null,
3135
  "trial_params": null