Upload folder using huggingface_hub
Browse files- model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state_0.pth +1 -1
- rng_state_1.pth +1 -1
- rng_state_2.pth +1 -1
- rng_state_3.pth +1 -1
- rng_state_4.pth +1 -1
- rng_state_5.pth +1 -1
- rng_state_6.pth +1 -1
- rng_state_7.pth +1 -1
- rng_state_8.pth +1 -1
- rng_state_9.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +3 -1424
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 213625344
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9b2c4db3a2d556ee001eb8fdf128d644b789b40b2f7cf64684b6fe78989053b
|
3 |
size 213625344
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 427334458
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:129003e0eae631ea59961c42baa93d6ece566b523e5b57d228356535ea34946d
|
3 |
size 427334458
|
rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16433
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:593cfa780b4b09ba583a139eb81c2eae72992c19fc5f8f38c81bd37ea47dbe04
|
3 |
size 16433
|
rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16433
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9560f8c460a856a55828494146d2d52ecf0d95a3dec5919d8f29a972450cec34
|
3 |
size 16433
|
rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16433
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e7b7bce88125710e1c78de933cf62b48d6ec5a97b36fc43a09d7f70aebd0307
|
3 |
size 16433
|
rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16433
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4cb51f987fa17a879db4d368b0564564ae49379d5c5ce803d79d24b4b5a43c13
|
3 |
size 16433
|
rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16433
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33ed1a64882ec192e7bdbd0b9dda7c3dd977bc8ef889d26ddca3e2380d9debae
|
3 |
size 16433
|
rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16433
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4dd0f015bb5215ff40f1555f5c47c89a7bd89b00e7ef4568ca045dc1c2b5514a
|
3 |
size 16433
|
rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16433
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac5bc7ba9f4a9e405864d41e902bc7509a5b6fa554a6cf09f24491e00dac06fb
|
3 |
size 16433
|
rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16433
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77e384968f192bdc497600d0108b82695a12247413143c0c9bd4e09fbb718212
|
3 |
size 16433
|
rng_state_8.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16433
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc46c9518ec829f507eb5833115c977024d13a12bc4e0ecff2238d818e6eb6dc
|
3 |
size 16433
|
rng_state_9.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16433
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56cb5d3bf2f9602568c76013ebf1c626061418df7f27c779c448921d362d5232
|
3 |
size 16433
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83a9c85c7b2c29125f99f000e54e900b05be0859260af7e4a0abf634beb2c469
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 2318,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2856,1427 +2856,6 @@
|
|
2856 |
"eval_samples_per_second": 610.045,
|
2857 |
"eval_steps_per_second": 7.626,
|
2858 |
"step": 9272
|
2859 |
-
},
|
2860 |
-
{
|
2861 |
-
"epoch": 0.4007417949713201,
|
2862 |
-
"grad_norm": 0.408203125,
|
2863 |
-
"learning_rate": 0.0005998532205145915,
|
2864 |
-
"loss": 1.0191,
|
2865 |
-
"step": 9292
|
2866 |
-
},
|
2867 |
-
{
|
2868 |
-
"epoch": 0.4017337301073878,
|
2869 |
-
"grad_norm": 0.46484375,
|
2870 |
-
"learning_rate": 0.0005988603004662408,
|
2871 |
-
"loss": 1.0074,
|
2872 |
-
"step": 9315
|
2873 |
-
},
|
2874 |
-
{
|
2875 |
-
"epoch": 0.4027256652434554,
|
2876 |
-
"grad_norm": 0.353515625,
|
2877 |
-
"learning_rate": 0.0005978673804178898,
|
2878 |
-
"loss": 1.0052,
|
2879 |
-
"step": 9338
|
2880 |
-
},
|
2881 |
-
{
|
2882 |
-
"epoch": 0.40371760037952303,
|
2883 |
-
"grad_norm": 0.361328125,
|
2884 |
-
"learning_rate": 0.000596874460369539,
|
2885 |
-
"loss": 1.0248,
|
2886 |
-
"step": 9361
|
2887 |
-
},
|
2888 |
-
{
|
2889 |
-
"epoch": 0.40470953551559063,
|
2890 |
-
"grad_norm": 0.376953125,
|
2891 |
-
"learning_rate": 0.000595881540321188,
|
2892 |
-
"loss": 1.0328,
|
2893 |
-
"step": 9384
|
2894 |
-
},
|
2895 |
-
{
|
2896 |
-
"epoch": 0.40570147065165824,
|
2897 |
-
"grad_norm": 0.4453125,
|
2898 |
-
"learning_rate": 0.0005948886202728372,
|
2899 |
-
"loss": 1.0254,
|
2900 |
-
"step": 9407
|
2901 |
-
},
|
2902 |
-
{
|
2903 |
-
"epoch": 0.4066934057877259,
|
2904 |
-
"grad_norm": 0.302734375,
|
2905 |
-
"learning_rate": 0.0005938957002244862,
|
2906 |
-
"loss": 1.0285,
|
2907 |
-
"step": 9430
|
2908 |
-
},
|
2909 |
-
{
|
2910 |
-
"epoch": 0.4076853409237935,
|
2911 |
-
"grad_norm": 0.439453125,
|
2912 |
-
"learning_rate": 0.0005929027801761354,
|
2913 |
-
"loss": 1.0077,
|
2914 |
-
"step": 9453
|
2915 |
-
},
|
2916 |
-
{
|
2917 |
-
"epoch": 0.40867727605986115,
|
2918 |
-
"grad_norm": 0.3515625,
|
2919 |
-
"learning_rate": 0.0005919098601277845,
|
2920 |
-
"loss": 1.0181,
|
2921 |
-
"step": 9476
|
2922 |
-
},
|
2923 |
-
{
|
2924 |
-
"epoch": 0.40966921119592875,
|
2925 |
-
"grad_norm": 0.33203125,
|
2926 |
-
"learning_rate": 0.0005909169400794336,
|
2927 |
-
"loss": 1.0427,
|
2928 |
-
"step": 9499
|
2929 |
-
},
|
2930 |
-
{
|
2931 |
-
"epoch": 0.41066114633199635,
|
2932 |
-
"grad_norm": 0.380859375,
|
2933 |
-
"learning_rate": 0.0005899240200310827,
|
2934 |
-
"loss": 1.0133,
|
2935 |
-
"step": 9522
|
2936 |
-
},
|
2937 |
-
{
|
2938 |
-
"epoch": 0.411653081468064,
|
2939 |
-
"grad_norm": 0.40234375,
|
2940 |
-
"learning_rate": 0.0005889310999827318,
|
2941 |
-
"loss": 1.0187,
|
2942 |
-
"step": 9545
|
2943 |
-
},
|
2944 |
-
{
|
2945 |
-
"epoch": 0.4126450166041316,
|
2946 |
-
"grad_norm": 0.44921875,
|
2947 |
-
"learning_rate": 0.000587938179934381,
|
2948 |
-
"loss": 1.0091,
|
2949 |
-
"step": 9568
|
2950 |
-
},
|
2951 |
-
{
|
2952 |
-
"epoch": 0.41363695174019927,
|
2953 |
-
"grad_norm": 0.37109375,
|
2954 |
-
"learning_rate": 0.0005869452598860301,
|
2955 |
-
"loss": 1.002,
|
2956 |
-
"step": 9591
|
2957 |
-
},
|
2958 |
-
{
|
2959 |
-
"epoch": 0.41462888687626687,
|
2960 |
-
"grad_norm": 0.47265625,
|
2961 |
-
"learning_rate": 0.0005859523398376792,
|
2962 |
-
"loss": 1.0321,
|
2963 |
-
"step": 9614
|
2964 |
-
},
|
2965 |
-
{
|
2966 |
-
"epoch": 0.4156208220123345,
|
2967 |
-
"grad_norm": 0.4453125,
|
2968 |
-
"learning_rate": 0.0005849594197893283,
|
2969 |
-
"loss": 1.0244,
|
2970 |
-
"step": 9637
|
2971 |
-
},
|
2972 |
-
{
|
2973 |
-
"epoch": 0.4166127571484021,
|
2974 |
-
"grad_norm": 0.4375,
|
2975 |
-
"learning_rate": 0.0005839664997409773,
|
2976 |
-
"loss": 1.0345,
|
2977 |
-
"step": 9660
|
2978 |
-
},
|
2979 |
-
{
|
2980 |
-
"epoch": 0.4176046922844697,
|
2981 |
-
"grad_norm": 0.357421875,
|
2982 |
-
"learning_rate": 0.0005829735796926266,
|
2983 |
-
"loss": 1.0157,
|
2984 |
-
"step": 9683
|
2985 |
-
},
|
2986 |
-
{
|
2987 |
-
"epoch": 0.4185966274205374,
|
2988 |
-
"grad_norm": 0.3515625,
|
2989 |
-
"learning_rate": 0.0005819806596442756,
|
2990 |
-
"loss": 0.9774,
|
2991 |
-
"step": 9706
|
2992 |
-
},
|
2993 |
-
{
|
2994 |
-
"epoch": 0.419588562556605,
|
2995 |
-
"grad_norm": 0.408203125,
|
2996 |
-
"learning_rate": 0.0005809877395959248,
|
2997 |
-
"loss": 1.0011,
|
2998 |
-
"step": 9729
|
2999 |
-
},
|
3000 |
-
{
|
3001 |
-
"epoch": 0.42058049769267264,
|
3002 |
-
"grad_norm": 0.41015625,
|
3003 |
-
"learning_rate": 0.0005799948195475738,
|
3004 |
-
"loss": 1.0148,
|
3005 |
-
"step": 9752
|
3006 |
-
},
|
3007 |
-
{
|
3008 |
-
"epoch": 0.42157243282874024,
|
3009 |
-
"grad_norm": 0.53515625,
|
3010 |
-
"learning_rate": 0.000579001899499223,
|
3011 |
-
"loss": 1.0058,
|
3012 |
-
"step": 9775
|
3013 |
-
},
|
3014 |
-
{
|
3015 |
-
"epoch": 0.42256436796480784,
|
3016 |
-
"grad_norm": 0.37890625,
|
3017 |
-
"learning_rate": 0.000578008979450872,
|
3018 |
-
"loss": 1.0241,
|
3019 |
-
"step": 9798
|
3020 |
-
},
|
3021 |
-
{
|
3022 |
-
"epoch": 0.4235563031008755,
|
3023 |
-
"grad_norm": 0.38671875,
|
3024 |
-
"learning_rate": 0.0005770160594025212,
|
3025 |
-
"loss": 1.0067,
|
3026 |
-
"step": 9821
|
3027 |
-
},
|
3028 |
-
{
|
3029 |
-
"epoch": 0.4245482382369431,
|
3030 |
-
"grad_norm": 0.3359375,
|
3031 |
-
"learning_rate": 0.0005760231393541703,
|
3032 |
-
"loss": 1.0105,
|
3033 |
-
"step": 9844
|
3034 |
-
},
|
3035 |
-
{
|
3036 |
-
"epoch": 0.42554017337301075,
|
3037 |
-
"grad_norm": 0.40625,
|
3038 |
-
"learning_rate": 0.0005750302193058193,
|
3039 |
-
"loss": 1.0226,
|
3040 |
-
"step": 9867
|
3041 |
-
},
|
3042 |
-
{
|
3043 |
-
"epoch": 0.42653210850907836,
|
3044 |
-
"grad_norm": 0.392578125,
|
3045 |
-
"learning_rate": 0.0005740372992574685,
|
3046 |
-
"loss": 1.0243,
|
3047 |
-
"step": 9890
|
3048 |
-
},
|
3049 |
-
{
|
3050 |
-
"epoch": 0.427524043645146,
|
3051 |
-
"grad_norm": 0.419921875,
|
3052 |
-
"learning_rate": 0.0005730443792091175,
|
3053 |
-
"loss": 1.0128,
|
3054 |
-
"step": 9913
|
3055 |
-
},
|
3056 |
-
{
|
3057 |
-
"epoch": 0.4285159787812136,
|
3058 |
-
"grad_norm": 0.373046875,
|
3059 |
-
"learning_rate": 0.0005720514591607668,
|
3060 |
-
"loss": 0.9993,
|
3061 |
-
"step": 9936
|
3062 |
-
},
|
3063 |
-
{
|
3064 |
-
"epoch": 0.4295079139172812,
|
3065 |
-
"grad_norm": 0.365234375,
|
3066 |
-
"learning_rate": 0.0005710585391124158,
|
3067 |
-
"loss": 1.012,
|
3068 |
-
"step": 9959
|
3069 |
-
},
|
3070 |
-
{
|
3071 |
-
"epoch": 0.43049984905334887,
|
3072 |
-
"grad_norm": 0.302734375,
|
3073 |
-
"learning_rate": 0.000570065619064065,
|
3074 |
-
"loss": 1.0178,
|
3075 |
-
"step": 9982
|
3076 |
-
},
|
3077 |
-
{
|
3078 |
-
"epoch": 0.43149178418941647,
|
3079 |
-
"grad_norm": 0.3671875,
|
3080 |
-
"learning_rate": 0.000569072699015714,
|
3081 |
-
"loss": 1.0192,
|
3082 |
-
"step": 10005
|
3083 |
-
},
|
3084 |
-
{
|
3085 |
-
"epoch": 0.43248371932548413,
|
3086 |
-
"grad_norm": 0.345703125,
|
3087 |
-
"learning_rate": 0.0005680797789673631,
|
3088 |
-
"loss": 0.9992,
|
3089 |
-
"step": 10028
|
3090 |
-
},
|
3091 |
-
{
|
3092 |
-
"epoch": 0.43347565446155173,
|
3093 |
-
"grad_norm": 0.4140625,
|
3094 |
-
"learning_rate": 0.0005670868589190123,
|
3095 |
-
"loss": 1.0033,
|
3096 |
-
"step": 10051
|
3097 |
-
},
|
3098 |
-
{
|
3099 |
-
"epoch": 0.43446758959761933,
|
3100 |
-
"grad_norm": 0.359375,
|
3101 |
-
"learning_rate": 0.0005660939388706614,
|
3102 |
-
"loss": 1.0153,
|
3103 |
-
"step": 10074
|
3104 |
-
},
|
3105 |
-
{
|
3106 |
-
"epoch": 0.435459524733687,
|
3107 |
-
"grad_norm": 0.36328125,
|
3108 |
-
"learning_rate": 0.0005651010188223105,
|
3109 |
-
"loss": 0.997,
|
3110 |
-
"step": 10097
|
3111 |
-
},
|
3112 |
-
{
|
3113 |
-
"epoch": 0.4364514598697546,
|
3114 |
-
"grad_norm": 0.3671875,
|
3115 |
-
"learning_rate": 0.0005641080987739596,
|
3116 |
-
"loss": 1.0147,
|
3117 |
-
"step": 10120
|
3118 |
-
},
|
3119 |
-
{
|
3120 |
-
"epoch": 0.43744339500582224,
|
3121 |
-
"grad_norm": 0.3515625,
|
3122 |
-
"learning_rate": 0.0005631151787256087,
|
3123 |
-
"loss": 1.0046,
|
3124 |
-
"step": 10143
|
3125 |
-
},
|
3126 |
-
{
|
3127 |
-
"epoch": 0.43843533014188985,
|
3128 |
-
"grad_norm": 0.37890625,
|
3129 |
-
"learning_rate": 0.0005621222586772579,
|
3130 |
-
"loss": 1.0008,
|
3131 |
-
"step": 10166
|
3132 |
-
},
|
3133 |
-
{
|
3134 |
-
"epoch": 0.4394272652779575,
|
3135 |
-
"grad_norm": 0.41796875,
|
3136 |
-
"learning_rate": 0.0005611293386289069,
|
3137 |
-
"loss": 1.0315,
|
3138 |
-
"step": 10189
|
3139 |
-
},
|
3140 |
-
{
|
3141 |
-
"epoch": 0.4404192004140251,
|
3142 |
-
"grad_norm": 0.365234375,
|
3143 |
-
"learning_rate": 0.0005601364185805561,
|
3144 |
-
"loss": 1.0164,
|
3145 |
-
"step": 10212
|
3146 |
-
},
|
3147 |
-
{
|
3148 |
-
"epoch": 0.4414111355500927,
|
3149 |
-
"grad_norm": 0.361328125,
|
3150 |
-
"learning_rate": 0.0005591434985322051,
|
3151 |
-
"loss": 1.0044,
|
3152 |
-
"step": 10235
|
3153 |
-
},
|
3154 |
-
{
|
3155 |
-
"epoch": 0.44240307068616036,
|
3156 |
-
"grad_norm": 0.439453125,
|
3157 |
-
"learning_rate": 0.0005581505784838543,
|
3158 |
-
"loss": 1.0236,
|
3159 |
-
"step": 10258
|
3160 |
-
},
|
3161 |
-
{
|
3162 |
-
"epoch": 0.44339500582222796,
|
3163 |
-
"grad_norm": 0.341796875,
|
3164 |
-
"learning_rate": 0.0005571576584355034,
|
3165 |
-
"loss": 1.0156,
|
3166 |
-
"step": 10281
|
3167 |
-
},
|
3168 |
-
{
|
3169 |
-
"epoch": 0.4443869409582956,
|
3170 |
-
"grad_norm": 0.373046875,
|
3171 |
-
"learning_rate": 0.0005561647383871526,
|
3172 |
-
"loss": 0.9916,
|
3173 |
-
"step": 10304
|
3174 |
-
},
|
3175 |
-
{
|
3176 |
-
"epoch": 0.4453788760943632,
|
3177 |
-
"grad_norm": 0.33203125,
|
3178 |
-
"learning_rate": 0.0005551718183388016,
|
3179 |
-
"loss": 0.9961,
|
3180 |
-
"step": 10327
|
3181 |
-
},
|
3182 |
-
{
|
3183 |
-
"epoch": 0.4463708112304308,
|
3184 |
-
"grad_norm": 0.392578125,
|
3185 |
-
"learning_rate": 0.0005541788982904507,
|
3186 |
-
"loss": 1.0021,
|
3187 |
-
"step": 10350
|
3188 |
-
},
|
3189 |
-
{
|
3190 |
-
"epoch": 0.4473627463664985,
|
3191 |
-
"grad_norm": 0.375,
|
3192 |
-
"learning_rate": 0.0005531859782420998,
|
3193 |
-
"loss": 1.0219,
|
3194 |
-
"step": 10373
|
3195 |
-
},
|
3196 |
-
{
|
3197 |
-
"epoch": 0.4483546815025661,
|
3198 |
-
"grad_norm": 0.4140625,
|
3199 |
-
"learning_rate": 0.000552193058193749,
|
3200 |
-
"loss": 0.9982,
|
3201 |
-
"step": 10396
|
3202 |
-
},
|
3203 |
-
{
|
3204 |
-
"epoch": 0.44934661663863373,
|
3205 |
-
"grad_norm": 0.392578125,
|
3206 |
-
"learning_rate": 0.0005512001381453981,
|
3207 |
-
"loss": 0.994,
|
3208 |
-
"step": 10419
|
3209 |
-
},
|
3210 |
-
{
|
3211 |
-
"epoch": 0.45033855177470133,
|
3212 |
-
"grad_norm": 0.34765625,
|
3213 |
-
"learning_rate": 0.0005502072180970471,
|
3214 |
-
"loss": 0.9899,
|
3215 |
-
"step": 10442
|
3216 |
-
},
|
3217 |
-
{
|
3218 |
-
"epoch": 0.451330486910769,
|
3219 |
-
"grad_norm": 0.3828125,
|
3220 |
-
"learning_rate": 0.0005492142980486963,
|
3221 |
-
"loss": 1.0096,
|
3222 |
-
"step": 10465
|
3223 |
-
},
|
3224 |
-
{
|
3225 |
-
"epoch": 0.4523224220468366,
|
3226 |
-
"grad_norm": 0.3984375,
|
3227 |
-
"learning_rate": 0.0005482213780003453,
|
3228 |
-
"loss": 0.9882,
|
3229 |
-
"step": 10488
|
3230 |
-
},
|
3231 |
-
{
|
3232 |
-
"epoch": 0.4533143571829042,
|
3233 |
-
"grad_norm": 0.390625,
|
3234 |
-
"learning_rate": 0.0005472284579519945,
|
3235 |
-
"loss": 0.999,
|
3236 |
-
"step": 10511
|
3237 |
-
},
|
3238 |
-
{
|
3239 |
-
"epoch": 0.45430629231897185,
|
3240 |
-
"grad_norm": 0.3984375,
|
3241 |
-
"learning_rate": 0.0005462355379036436,
|
3242 |
-
"loss": 1.0087,
|
3243 |
-
"step": 10534
|
3244 |
-
},
|
3245 |
-
{
|
3246 |
-
"epoch": 0.45529822745503945,
|
3247 |
-
"grad_norm": 0.388671875,
|
3248 |
-
"learning_rate": 0.0005452426178552927,
|
3249 |
-
"loss": 0.9985,
|
3250 |
-
"step": 10557
|
3251 |
-
},
|
3252 |
-
{
|
3253 |
-
"epoch": 0.4562901625911071,
|
3254 |
-
"grad_norm": 0.455078125,
|
3255 |
-
"learning_rate": 0.0005442496978069418,
|
3256 |
-
"loss": 1.0104,
|
3257 |
-
"step": 10580
|
3258 |
-
},
|
3259 |
-
{
|
3260 |
-
"epoch": 0.4572820977271747,
|
3261 |
-
"grad_norm": 0.61328125,
|
3262 |
-
"learning_rate": 0.0005432567777585909,
|
3263 |
-
"loss": 1.0056,
|
3264 |
-
"step": 10603
|
3265 |
-
},
|
3266 |
-
{
|
3267 |
-
"epoch": 0.4582740328632423,
|
3268 |
-
"grad_norm": 0.3359375,
|
3269 |
-
"learning_rate": 0.00054226385771024,
|
3270 |
-
"loss": 1.0115,
|
3271 |
-
"step": 10626
|
3272 |
-
},
|
3273 |
-
{
|
3274 |
-
"epoch": 0.45926596799930997,
|
3275 |
-
"grad_norm": 0.3515625,
|
3276 |
-
"learning_rate": 0.0005412709376618892,
|
3277 |
-
"loss": 1.0143,
|
3278 |
-
"step": 10649
|
3279 |
-
},
|
3280 |
-
{
|
3281 |
-
"epoch": 0.46025790313537757,
|
3282 |
-
"grad_norm": 0.388671875,
|
3283 |
-
"learning_rate": 0.0005402780176135383,
|
3284 |
-
"loss": 0.9916,
|
3285 |
-
"step": 10672
|
3286 |
-
},
|
3287 |
-
{
|
3288 |
-
"epoch": 0.4612498382714452,
|
3289 |
-
"grad_norm": 0.396484375,
|
3290 |
-
"learning_rate": 0.0005392850975651874,
|
3291 |
-
"loss": 0.9967,
|
3292 |
-
"step": 10695
|
3293 |
-
},
|
3294 |
-
{
|
3295 |
-
"epoch": 0.4622417734075128,
|
3296 |
-
"grad_norm": 0.41796875,
|
3297 |
-
"learning_rate": 0.0005382921775168364,
|
3298 |
-
"loss": 1.0009,
|
3299 |
-
"step": 10718
|
3300 |
-
},
|
3301 |
-
{
|
3302 |
-
"epoch": 0.4632337085435805,
|
3303 |
-
"grad_norm": 0.34765625,
|
3304 |
-
"learning_rate": 0.0005372992574684856,
|
3305 |
-
"loss": 0.9919,
|
3306 |
-
"step": 10741
|
3307 |
-
},
|
3308 |
-
{
|
3309 |
-
"epoch": 0.4642256436796481,
|
3310 |
-
"grad_norm": 0.3515625,
|
3311 |
-
"learning_rate": 0.0005363063374201347,
|
3312 |
-
"loss": 1.0128,
|
3313 |
-
"step": 10764
|
3314 |
-
},
|
3315 |
-
{
|
3316 |
-
"epoch": 0.4652175788157157,
|
3317 |
-
"grad_norm": 0.400390625,
|
3318 |
-
"learning_rate": 0.0005353134173717839,
|
3319 |
-
"loss": 0.9982,
|
3320 |
-
"step": 10787
|
3321 |
-
},
|
3322 |
-
{
|
3323 |
-
"epoch": 0.46620951395178334,
|
3324 |
-
"grad_norm": 0.3515625,
|
3325 |
-
"learning_rate": 0.0005343204973234329,
|
3326 |
-
"loss": 0.9998,
|
3327 |
-
"step": 10810
|
3328 |
-
},
|
3329 |
-
{
|
3330 |
-
"epoch": 0.46720144908785094,
|
3331 |
-
"grad_norm": 0.5390625,
|
3332 |
-
"learning_rate": 0.0005333275772750821,
|
3333 |
-
"loss": 1.0177,
|
3334 |
-
"step": 10833
|
3335 |
-
},
|
3336 |
-
{
|
3337 |
-
"epoch": 0.4681933842239186,
|
3338 |
-
"grad_norm": 0.37890625,
|
3339 |
-
"learning_rate": 0.0005323346572267311,
|
3340 |
-
"loss": 0.9899,
|
3341 |
-
"step": 10856
|
3342 |
-
},
|
3343 |
-
{
|
3344 |
-
"epoch": 0.4691853193599862,
|
3345 |
-
"grad_norm": 0.38671875,
|
3346 |
-
"learning_rate": 0.0005313417371783802,
|
3347 |
-
"loss": 1.0052,
|
3348 |
-
"step": 10879
|
3349 |
-
},
|
3350 |
-
{
|
3351 |
-
"epoch": 0.4701772544960538,
|
3352 |
-
"grad_norm": 0.36328125,
|
3353 |
-
"learning_rate": 0.0005303488171300294,
|
3354 |
-
"loss": 0.9741,
|
3355 |
-
"step": 10902
|
3356 |
-
},
|
3357 |
-
{
|
3358 |
-
"epoch": 0.47116918963212145,
|
3359 |
-
"grad_norm": 0.4453125,
|
3360 |
-
"learning_rate": 0.0005293558970816785,
|
3361 |
-
"loss": 1.0021,
|
3362 |
-
"step": 10925
|
3363 |
-
},
|
3364 |
-
{
|
3365 |
-
"epoch": 0.47216112476818906,
|
3366 |
-
"grad_norm": 0.322265625,
|
3367 |
-
"learning_rate": 0.0005283629770333276,
|
3368 |
-
"loss": 0.9896,
|
3369 |
-
"step": 10948
|
3370 |
-
},
|
3371 |
-
{
|
3372 |
-
"epoch": 0.4731530599042567,
|
3373 |
-
"grad_norm": 0.36328125,
|
3374 |
-
"learning_rate": 0.0005273700569849767,
|
3375 |
-
"loss": 1.0046,
|
3376 |
-
"step": 10971
|
3377 |
-
},
|
3378 |
-
{
|
3379 |
-
"epoch": 0.4741449950403243,
|
3380 |
-
"grad_norm": 0.345703125,
|
3381 |
-
"learning_rate": 0.0005263771369366258,
|
3382 |
-
"loss": 1.0004,
|
3383 |
-
"step": 10994
|
3384 |
-
},
|
3385 |
-
{
|
3386 |
-
"epoch": 0.47513693017639197,
|
3387 |
-
"grad_norm": 0.357421875,
|
3388 |
-
"learning_rate": 0.0005253842168882749,
|
3389 |
-
"loss": 1.0031,
|
3390 |
-
"step": 11017
|
3391 |
-
},
|
3392 |
-
{
|
3393 |
-
"epoch": 0.47612886531245957,
|
3394 |
-
"grad_norm": 0.359375,
|
3395 |
-
"learning_rate": 0.0005243912968399241,
|
3396 |
-
"loss": 1.007,
|
3397 |
-
"step": 11040
|
3398 |
-
},
|
3399 |
-
{
|
3400 |
-
"epoch": 0.47712080044852717,
|
3401 |
-
"grad_norm": 0.38671875,
|
3402 |
-
"learning_rate": 0.0005233983767915731,
|
3403 |
-
"loss": 1.0046,
|
3404 |
-
"step": 11063
|
3405 |
-
},
|
3406 |
-
{
|
3407 |
-
"epoch": 0.47811273558459483,
|
3408 |
-
"grad_norm": 0.341796875,
|
3409 |
-
"learning_rate": 0.0005224054567432222,
|
3410 |
-
"loss": 0.9956,
|
3411 |
-
"step": 11086
|
3412 |
-
},
|
3413 |
-
{
|
3414 |
-
"epoch": 0.47910467072066243,
|
3415 |
-
"grad_norm": 0.3515625,
|
3416 |
-
"learning_rate": 0.0005214125366948713,
|
3417 |
-
"loss": 1.01,
|
3418 |
-
"step": 11109
|
3419 |
-
},
|
3420 |
-
{
|
3421 |
-
"epoch": 0.4800966058567301,
|
3422 |
-
"grad_norm": 0.431640625,
|
3423 |
-
"learning_rate": 0.0005204196166465205,
|
3424 |
-
"loss": 1.0211,
|
3425 |
-
"step": 11132
|
3426 |
-
},
|
3427 |
-
{
|
3428 |
-
"epoch": 0.4810885409927977,
|
3429 |
-
"grad_norm": 0.375,
|
3430 |
-
"learning_rate": 0.0005194266965981696,
|
3431 |
-
"loss": 1.0039,
|
3432 |
-
"step": 11155
|
3433 |
-
},
|
3434 |
-
{
|
3435 |
-
"epoch": 0.4820804761288653,
|
3436 |
-
"grad_norm": 0.392578125,
|
3437 |
-
"learning_rate": 0.0005184337765498187,
|
3438 |
-
"loss": 0.9886,
|
3439 |
-
"step": 11178
|
3440 |
-
},
|
3441 |
-
{
|
3442 |
-
"epoch": 0.48307241126493294,
|
3443 |
-
"grad_norm": 0.515625,
|
3444 |
-
"learning_rate": 0.0005174408565014678,
|
3445 |
-
"loss": 0.9973,
|
3446 |
-
"step": 11201
|
3447 |
-
},
|
3448 |
-
{
|
3449 |
-
"epoch": 0.48406434640100054,
|
3450 |
-
"grad_norm": 0.396484375,
|
3451 |
-
"learning_rate": 0.0005164479364531169,
|
3452 |
-
"loss": 1.013,
|
3453 |
-
"step": 11224
|
3454 |
-
},
|
3455 |
-
{
|
3456 |
-
"epoch": 0.4850562815370682,
|
3457 |
-
"grad_norm": 0.451171875,
|
3458 |
-
"learning_rate": 0.000515455016404766,
|
3459 |
-
"loss": 0.9876,
|
3460 |
-
"step": 11247
|
3461 |
-
},
|
3462 |
-
{
|
3463 |
-
"epoch": 0.4860482166731358,
|
3464 |
-
"grad_norm": 0.375,
|
3465 |
-
"learning_rate": 0.0005144620963564152,
|
3466 |
-
"loss": 0.9984,
|
3467 |
-
"step": 11270
|
3468 |
-
},
|
3469 |
-
{
|
3470 |
-
"epoch": 0.48704015180920346,
|
3471 |
-
"grad_norm": 0.416015625,
|
3472 |
-
"learning_rate": 0.0005134691763080642,
|
3473 |
-
"loss": 0.9875,
|
3474 |
-
"step": 11293
|
3475 |
-
},
|
3476 |
-
{
|
3477 |
-
"epoch": 0.48803208694527106,
|
3478 |
-
"grad_norm": 0.369140625,
|
3479 |
-
"learning_rate": 0.0005124762562597134,
|
3480 |
-
"loss": 0.9954,
|
3481 |
-
"step": 11316
|
3482 |
-
},
|
3483 |
-
{
|
3484 |
-
"epoch": 0.48902402208133866,
|
3485 |
-
"grad_norm": 0.337890625,
|
3486 |
-
"learning_rate": 0.0005114833362113624,
|
3487 |
-
"loss": 0.9825,
|
3488 |
-
"step": 11339
|
3489 |
-
},
|
3490 |
-
{
|
3491 |
-
"epoch": 0.4900159572174063,
|
3492 |
-
"grad_norm": 0.37890625,
|
3493 |
-
"learning_rate": 0.0005104904161630117,
|
3494 |
-
"loss": 0.9983,
|
3495 |
-
"step": 11362
|
3496 |
-
},
|
3497 |
-
{
|
3498 |
-
"epoch": 0.4910078923534739,
|
3499 |
-
"grad_norm": 0.328125,
|
3500 |
-
"learning_rate": 0.0005094974961146607,
|
3501 |
-
"loss": 0.9818,
|
3502 |
-
"step": 11385
|
3503 |
-
},
|
3504 |
-
{
|
3505 |
-
"epoch": 0.4919998274895416,
|
3506 |
-
"grad_norm": 0.357421875,
|
3507 |
-
"learning_rate": 0.0005085045760663098,
|
3508 |
-
"loss": 0.9928,
|
3509 |
-
"step": 11408
|
3510 |
-
},
|
3511 |
-
{
|
3512 |
-
"epoch": 0.4929917626256092,
|
3513 |
-
"grad_norm": 0.4921875,
|
3514 |
-
"learning_rate": 0.0005075116560179589,
|
3515 |
-
"loss": 0.9771,
|
3516 |
-
"step": 11431
|
3517 |
-
},
|
3518 |
-
{
|
3519 |
-
"epoch": 0.4939836977616768,
|
3520 |
-
"grad_norm": 0.341796875,
|
3521 |
-
"learning_rate": 0.000506518735969608,
|
3522 |
-
"loss": 1.0059,
|
3523 |
-
"step": 11454
|
3524 |
-
},
|
3525 |
-
{
|
3526 |
-
"epoch": 0.49497563289774443,
|
3527 |
-
"grad_norm": 0.400390625,
|
3528 |
-
"learning_rate": 0.0005055258159212571,
|
3529 |
-
"loss": 1.0058,
|
3530 |
-
"step": 11477
|
3531 |
-
},
|
3532 |
-
{
|
3533 |
-
"epoch": 0.49596756803381203,
|
3534 |
-
"grad_norm": 0.328125,
|
3535 |
-
"learning_rate": 0.0005045328958729063,
|
3536 |
-
"loss": 0.9962,
|
3537 |
-
"step": 11500
|
3538 |
-
},
|
3539 |
-
{
|
3540 |
-
"epoch": 0.4969595031698797,
|
3541 |
-
"grad_norm": 0.326171875,
|
3542 |
-
"learning_rate": 0.0005035399758245554,
|
3543 |
-
"loss": 0.9828,
|
3544 |
-
"step": 11523
|
3545 |
-
},
|
3546 |
-
{
|
3547 |
-
"epoch": 0.4979514383059473,
|
3548 |
-
"grad_norm": 0.5078125,
|
3549 |
-
"learning_rate": 0.0005025470557762045,
|
3550 |
-
"loss": 0.9881,
|
3551 |
-
"step": 11546
|
3552 |
-
},
|
3553 |
-
{
|
3554 |
-
"epoch": 0.49894337344201495,
|
3555 |
-
"grad_norm": 0.43359375,
|
3556 |
-
"learning_rate": 0.0005015541357278536,
|
3557 |
-
"loss": 0.9863,
|
3558 |
-
"step": 11569
|
3559 |
-
},
|
3560 |
-
{
|
3561 |
-
"epoch": 0.49984905334885926,
|
3562 |
-
"eval_runtime": 163.9862,
|
3563 |
-
"eval_samples_per_second": 609.807,
|
3564 |
-
"eval_steps_per_second": 7.623,
|
3565 |
-
"step": 11590
|
3566 |
-
},
|
3567 |
-
{
|
3568 |
-
"epoch": 0.49993530857808255,
|
3569 |
-
"grad_norm": 0.353515625,
|
3570 |
-
"learning_rate": 0.0005005612156795026,
|
3571 |
-
"loss": 0.9764,
|
3572 |
-
"step": 11592
|
3573 |
-
},
|
3574 |
-
{
|
3575 |
-
"epoch": 0.5009272437141502,
|
3576 |
-
"grad_norm": 0.36328125,
|
3577 |
-
"learning_rate": 0.0004995682956311518,
|
3578 |
-
"loss": 0.9923,
|
3579 |
-
"step": 11615
|
3580 |
-
},
|
3581 |
-
{
|
3582 |
-
"epoch": 0.5019191788502178,
|
3583 |
-
"grad_norm": 0.39453125,
|
3584 |
-
"learning_rate": 0.0004985753755828009,
|
3585 |
-
"loss": 0.9738,
|
3586 |
-
"step": 11638
|
3587 |
-
},
|
3588 |
-
{
|
3589 |
-
"epoch": 0.5029111139862854,
|
3590 |
-
"grad_norm": 0.48828125,
|
3591 |
-
"learning_rate": 0.00049758245553445,
|
3592 |
-
"loss": 0.973,
|
3593 |
-
"step": 11661
|
3594 |
-
},
|
3595 |
-
{
|
3596 |
-
"epoch": 0.5039030491223531,
|
3597 |
-
"grad_norm": 0.384765625,
|
3598 |
-
"learning_rate": 0.0004965895354860991,
|
3599 |
-
"loss": 0.9741,
|
3600 |
-
"step": 11684
|
3601 |
-
},
|
3602 |
-
{
|
3603 |
-
"epoch": 0.5048949842584207,
|
3604 |
-
"grad_norm": 0.359375,
|
3605 |
-
"learning_rate": 0.0004955966154377482,
|
3606 |
-
"loss": 0.9842,
|
3607 |
-
"step": 11707
|
3608 |
-
},
|
3609 |
-
{
|
3610 |
-
"epoch": 0.5058869193944883,
|
3611 |
-
"grad_norm": 0.443359375,
|
3612 |
-
"learning_rate": 0.0004946036953893974,
|
3613 |
-
"loss": 0.9927,
|
3614 |
-
"step": 11730
|
3615 |
-
},
|
3616 |
-
{
|
3617 |
-
"epoch": 0.5068788545305559,
|
3618 |
-
"grad_norm": 0.44921875,
|
3619 |
-
"learning_rate": 0.0004936107753410465,
|
3620 |
-
"loss": 0.9921,
|
3621 |
-
"step": 11753
|
3622 |
-
},
|
3623 |
-
{
|
3624 |
-
"epoch": 0.5078707896666236,
|
3625 |
-
"grad_norm": 0.40625,
|
3626 |
-
"learning_rate": 0.0004926178552926956,
|
3627 |
-
"loss": 0.9827,
|
3628 |
-
"step": 11776
|
3629 |
-
},
|
3630 |
-
{
|
3631 |
-
"epoch": 0.5088627248026911,
|
3632 |
-
"grad_norm": 0.416015625,
|
3633 |
-
"learning_rate": 0.0004916249352443447,
|
3634 |
-
"loss": 0.9836,
|
3635 |
-
"step": 11799
|
3636 |
-
},
|
3637 |
-
{
|
3638 |
-
"epoch": 0.5098546599387588,
|
3639 |
-
"grad_norm": 0.36328125,
|
3640 |
-
"learning_rate": 0.0004906320151959938,
|
3641 |
-
"loss": 0.9783,
|
3642 |
-
"step": 11822
|
3643 |
-
},
|
3644 |
-
{
|
3645 |
-
"epoch": 0.5108465950748264,
|
3646 |
-
"grad_norm": 0.357421875,
|
3647 |
-
"learning_rate": 0.0004896390951476428,
|
3648 |
-
"loss": 1.0003,
|
3649 |
-
"step": 11845
|
3650 |
-
},
|
3651 |
-
{
|
3652 |
-
"epoch": 0.511838530210894,
|
3653 |
-
"grad_norm": 0.39453125,
|
3654 |
-
"learning_rate": 0.000488646175099292,
|
3655 |
-
"loss": 0.995,
|
3656 |
-
"step": 11868
|
3657 |
-
},
|
3658 |
-
{
|
3659 |
-
"epoch": 0.5128304653469616,
|
3660 |
-
"grad_norm": 0.376953125,
|
3661 |
-
"learning_rate": 0.0004876532550509411,
|
3662 |
-
"loss": 0.9952,
|
3663 |
-
"step": 11891
|
3664 |
-
},
|
3665 |
-
{
|
3666 |
-
"epoch": 0.5138224004830293,
|
3667 |
-
"grad_norm": 0.3828125,
|
3668 |
-
"learning_rate": 0.0004866603350025902,
|
3669 |
-
"loss": 0.9912,
|
3670 |
-
"step": 11914
|
3671 |
-
},
|
3672 |
-
{
|
3673 |
-
"epoch": 0.514814335619097,
|
3674 |
-
"grad_norm": 0.34375,
|
3675 |
-
"learning_rate": 0.00048566741495423933,
|
3676 |
-
"loss": 0.995,
|
3677 |
-
"step": 11937
|
3678 |
-
},
|
3679 |
-
{
|
3680 |
-
"epoch": 0.5158062707551645,
|
3681 |
-
"grad_norm": 0.408203125,
|
3682 |
-
"learning_rate": 0.00048467449490588845,
|
3683 |
-
"loss": 0.9856,
|
3684 |
-
"step": 11960
|
3685 |
-
},
|
3686 |
-
{
|
3687 |
-
"epoch": 0.5167982058912322,
|
3688 |
-
"grad_norm": 0.427734375,
|
3689 |
-
"learning_rate": 0.00048368157485753757,
|
3690 |
-
"loss": 0.9887,
|
3691 |
-
"step": 11983
|
3692 |
-
},
|
3693 |
-
{
|
3694 |
-
"epoch": 0.5177901410272998,
|
3695 |
-
"grad_norm": 0.376953125,
|
3696 |
-
"learning_rate": 0.0004826886548091867,
|
3697 |
-
"loss": 0.9815,
|
3698 |
-
"step": 12006
|
3699 |
-
},
|
3700 |
-
{
|
3701 |
-
"epoch": 0.5187820761633674,
|
3702 |
-
"grad_norm": 0.369140625,
|
3703 |
-
"learning_rate": 0.00048169573476083575,
|
3704 |
-
"loss": 0.9944,
|
3705 |
-
"step": 12029
|
3706 |
-
},
|
3707 |
-
{
|
3708 |
-
"epoch": 0.519774011299435,
|
3709 |
-
"grad_norm": 0.427734375,
|
3710 |
-
"learning_rate": 0.0004807028147124849,
|
3711 |
-
"loss": 0.9714,
|
3712 |
-
"step": 12052
|
3713 |
-
},
|
3714 |
-
{
|
3715 |
-
"epoch": 0.5207659464355027,
|
3716 |
-
"grad_norm": 0.326171875,
|
3717 |
-
"learning_rate": 0.000479709894664134,
|
3718 |
-
"loss": 0.9849,
|
3719 |
-
"step": 12075
|
3720 |
-
},
|
3721 |
-
{
|
3722 |
-
"epoch": 0.5217578815715703,
|
3723 |
-
"grad_norm": 0.427734375,
|
3724 |
-
"learning_rate": 0.0004787169746157831,
|
3725 |
-
"loss": 0.9861,
|
3726 |
-
"step": 12098
|
3727 |
-
},
|
3728 |
-
{
|
3729 |
-
"epoch": 0.5227498167076379,
|
3730 |
-
"grad_norm": 0.4765625,
|
3731 |
-
"learning_rate": 0.00047772405456743223,
|
3732 |
-
"loss": 1.0009,
|
3733 |
-
"step": 12121
|
3734 |
-
},
|
3735 |
-
{
|
3736 |
-
"epoch": 0.5237417518437055,
|
3737 |
-
"grad_norm": 0.345703125,
|
3738 |
-
"learning_rate": 0.00047673113451908135,
|
3739 |
-
"loss": 0.9892,
|
3740 |
-
"step": 12144
|
3741 |
-
},
|
3742 |
-
{
|
3743 |
-
"epoch": 0.5247336869797732,
|
3744 |
-
"grad_norm": 0.345703125,
|
3745 |
-
"learning_rate": 0.00047573821447073047,
|
3746 |
-
"loss": 0.9843,
|
3747 |
-
"step": 12167
|
3748 |
-
},
|
3749 |
-
{
|
3750 |
-
"epoch": 0.5257256221158407,
|
3751 |
-
"grad_norm": 0.40234375,
|
3752 |
-
"learning_rate": 0.0004747452944223796,
|
3753 |
-
"loss": 0.9767,
|
3754 |
-
"step": 12190
|
3755 |
-
},
|
3756 |
-
{
|
3757 |
-
"epoch": 0.5267175572519084,
|
3758 |
-
"grad_norm": 0.359375,
|
3759 |
-
"learning_rate": 0.00047375237437402866,
|
3760 |
-
"loss": 0.9599,
|
3761 |
-
"step": 12213
|
3762 |
-
},
|
3763 |
-
{
|
3764 |
-
"epoch": 0.527709492387976,
|
3765 |
-
"grad_norm": 0.388671875,
|
3766 |
-
"learning_rate": 0.0004727594543256778,
|
3767 |
-
"loss": 0.9797,
|
3768 |
-
"step": 12236
|
3769 |
-
},
|
3770 |
-
{
|
3771 |
-
"epoch": 0.5287014275240437,
|
3772 |
-
"grad_norm": 0.3359375,
|
3773 |
-
"learning_rate": 0.0004717665342773269,
|
3774 |
-
"loss": 0.9952,
|
3775 |
-
"step": 12259
|
3776 |
-
},
|
3777 |
-
{
|
3778 |
-
"epoch": 0.5296933626601112,
|
3779 |
-
"grad_norm": 0.359375,
|
3780 |
-
"learning_rate": 0.000470773614228976,
|
3781 |
-
"loss": 0.9851,
|
3782 |
-
"step": 12282
|
3783 |
-
},
|
3784 |
-
{
|
3785 |
-
"epoch": 0.5306852977961789,
|
3786 |
-
"grad_norm": 0.4140625,
|
3787 |
-
"learning_rate": 0.00046978069418062514,
|
3788 |
-
"loss": 0.9728,
|
3789 |
-
"step": 12305
|
3790 |
-
},
|
3791 |
-
{
|
3792 |
-
"epoch": 0.5316772329322466,
|
3793 |
-
"grad_norm": 0.376953125,
|
3794 |
-
"learning_rate": 0.00046878777413227426,
|
3795 |
-
"loss": 0.9813,
|
3796 |
-
"step": 12328
|
3797 |
-
},
|
3798 |
-
{
|
3799 |
-
"epoch": 0.5326691680683141,
|
3800 |
-
"grad_norm": 0.326171875,
|
3801 |
-
"learning_rate": 0.0004677948540839234,
|
3802 |
-
"loss": 0.9729,
|
3803 |
-
"step": 12351
|
3804 |
-
},
|
3805 |
-
{
|
3806 |
-
"epoch": 0.5336611032043818,
|
3807 |
-
"grad_norm": 0.33203125,
|
3808 |
-
"learning_rate": 0.0004668019340355725,
|
3809 |
-
"loss": 0.969,
|
3810 |
-
"step": 12374
|
3811 |
-
},
|
3812 |
-
{
|
3813 |
-
"epoch": 0.5346530383404494,
|
3814 |
-
"grad_norm": 0.43359375,
|
3815 |
-
"learning_rate": 0.00046580901398722156,
|
3816 |
-
"loss": 0.9786,
|
3817 |
-
"step": 12397
|
3818 |
-
},
|
3819 |
-
{
|
3820 |
-
"epoch": 0.535644973476517,
|
3821 |
-
"grad_norm": 0.388671875,
|
3822 |
-
"learning_rate": 0.00046481609393887063,
|
3823 |
-
"loss": 0.9773,
|
3824 |
-
"step": 12420
|
3825 |
-
},
|
3826 |
-
{
|
3827 |
-
"epoch": 0.5366369086125846,
|
3828 |
-
"grad_norm": 0.451171875,
|
3829 |
-
"learning_rate": 0.00046382317389051975,
|
3830 |
-
"loss": 0.9972,
|
3831 |
-
"step": 12443
|
3832 |
-
},
|
3833 |
-
{
|
3834 |
-
"epoch": 0.5376288437486523,
|
3835 |
-
"grad_norm": 0.408203125,
|
3836 |
-
"learning_rate": 0.00046283025384216887,
|
3837 |
-
"loss": 0.9893,
|
3838 |
-
"step": 12466
|
3839 |
-
},
|
3840 |
-
{
|
3841 |
-
"epoch": 0.5386207788847199,
|
3842 |
-
"grad_norm": 0.400390625,
|
3843 |
-
"learning_rate": 0.000461837333793818,
|
3844 |
-
"loss": 0.9747,
|
3845 |
-
"step": 12489
|
3846 |
-
},
|
3847 |
-
{
|
3848 |
-
"epoch": 0.5396127140207875,
|
3849 |
-
"grad_norm": 0.4921875,
|
3850 |
-
"learning_rate": 0.0004608444137454671,
|
3851 |
-
"loss": 0.9795,
|
3852 |
-
"step": 12512
|
3853 |
-
},
|
3854 |
-
{
|
3855 |
-
"epoch": 0.5406046491568551,
|
3856 |
-
"grad_norm": 0.37109375,
|
3857 |
-
"learning_rate": 0.00045985149369711623,
|
3858 |
-
"loss": 0.9608,
|
3859 |
-
"step": 12535
|
3860 |
-
},
|
3861 |
-
{
|
3862 |
-
"epoch": 0.5415965842929228,
|
3863 |
-
"grad_norm": 0.3515625,
|
3864 |
-
"learning_rate": 0.00045885857364876535,
|
3865 |
-
"loss": 0.966,
|
3866 |
-
"step": 12558
|
3867 |
-
},
|
3868 |
-
{
|
3869 |
-
"epoch": 0.5425885194289903,
|
3870 |
-
"grad_norm": 0.361328125,
|
3871 |
-
"learning_rate": 0.0004578656536004144,
|
3872 |
-
"loss": 0.9689,
|
3873 |
-
"step": 12581
|
3874 |
-
},
|
3875 |
-
{
|
3876 |
-
"epoch": 0.543580454565058,
|
3877 |
-
"grad_norm": 0.404296875,
|
3878 |
-
"learning_rate": 0.00045687273355206353,
|
3879 |
-
"loss": 0.9655,
|
3880 |
-
"step": 12604
|
3881 |
-
},
|
3882 |
-
{
|
3883 |
-
"epoch": 0.5445723897011256,
|
3884 |
-
"grad_norm": 0.37890625,
|
3885 |
-
"learning_rate": 0.00045587981350371265,
|
3886 |
-
"loss": 0.9693,
|
3887 |
-
"step": 12627
|
3888 |
-
},
|
3889 |
-
{
|
3890 |
-
"epoch": 0.5455643248371933,
|
3891 |
-
"grad_norm": 0.41015625,
|
3892 |
-
"learning_rate": 0.00045488689345536177,
|
3893 |
-
"loss": 0.9986,
|
3894 |
-
"step": 12650
|
3895 |
-
},
|
3896 |
-
{
|
3897 |
-
"epoch": 0.5465562599732608,
|
3898 |
-
"grad_norm": 0.345703125,
|
3899 |
-
"learning_rate": 0.0004538939734070109,
|
3900 |
-
"loss": 0.9715,
|
3901 |
-
"step": 12673
|
3902 |
-
},
|
3903 |
-
{
|
3904 |
-
"epoch": 0.5475481951093285,
|
3905 |
-
"grad_norm": 0.37890625,
|
3906 |
-
"learning_rate": 0.00045290105335866,
|
3907 |
-
"loss": 0.9781,
|
3908 |
-
"step": 12696
|
3909 |
-
},
|
3910 |
-
{
|
3911 |
-
"epoch": 0.5485401302453962,
|
3912 |
-
"grad_norm": 0.42578125,
|
3913 |
-
"learning_rate": 0.00045190813331030913,
|
3914 |
-
"loss": 1.0001,
|
3915 |
-
"step": 12719
|
3916 |
-
},
|
3917 |
-
{
|
3918 |
-
"epoch": 0.5495320653814637,
|
3919 |
-
"grad_norm": 0.43359375,
|
3920 |
-
"learning_rate": 0.0004509152132619582,
|
3921 |
-
"loss": 0.9811,
|
3922 |
-
"step": 12742
|
3923 |
-
},
|
3924 |
-
{
|
3925 |
-
"epoch": 0.5505240005175314,
|
3926 |
-
"grad_norm": 0.341796875,
|
3927 |
-
"learning_rate": 0.0004499222932136073,
|
3928 |
-
"loss": 0.9584,
|
3929 |
-
"step": 12765
|
3930 |
-
},
|
3931 |
-
{
|
3932 |
-
"epoch": 0.551515935653599,
|
3933 |
-
"grad_norm": 0.419921875,
|
3934 |
-
"learning_rate": 0.00044892937316525644,
|
3935 |
-
"loss": 0.977,
|
3936 |
-
"step": 12788
|
3937 |
-
},
|
3938 |
-
{
|
3939 |
-
"epoch": 0.5525078707896667,
|
3940 |
-
"grad_norm": 0.416015625,
|
3941 |
-
"learning_rate": 0.00044793645311690556,
|
3942 |
-
"loss": 0.9746,
|
3943 |
-
"step": 12811
|
3944 |
-
},
|
3945 |
-
{
|
3946 |
-
"epoch": 0.5534998059257342,
|
3947 |
-
"grad_norm": 0.390625,
|
3948 |
-
"learning_rate": 0.0004469435330685547,
|
3949 |
-
"loss": 0.9811,
|
3950 |
-
"step": 12834
|
3951 |
-
},
|
3952 |
-
{
|
3953 |
-
"epoch": 0.5544917410618019,
|
3954 |
-
"grad_norm": 0.35546875,
|
3955 |
-
"learning_rate": 0.0004459506130202038,
|
3956 |
-
"loss": 0.9523,
|
3957 |
-
"step": 12857
|
3958 |
-
},
|
3959 |
-
{
|
3960 |
-
"epoch": 0.5554836761978695,
|
3961 |
-
"grad_norm": 0.37890625,
|
3962 |
-
"learning_rate": 0.0004449576929718529,
|
3963 |
-
"loss": 0.9641,
|
3964 |
-
"step": 12880
|
3965 |
-
},
|
3966 |
-
{
|
3967 |
-
"epoch": 0.5564756113339371,
|
3968 |
-
"grad_norm": 0.36328125,
|
3969 |
-
"learning_rate": 0.00044396477292350204,
|
3970 |
-
"loss": 0.9845,
|
3971 |
-
"step": 12903
|
3972 |
-
},
|
3973 |
-
{
|
3974 |
-
"epoch": 0.5574675464700047,
|
3975 |
-
"grad_norm": 0.365234375,
|
3976 |
-
"learning_rate": 0.0004429718528751511,
|
3977 |
-
"loss": 0.9788,
|
3978 |
-
"step": 12926
|
3979 |
-
},
|
3980 |
-
{
|
3981 |
-
"epoch": 0.5584594816060724,
|
3982 |
-
"grad_norm": 0.390625,
|
3983 |
-
"learning_rate": 0.0004419789328268002,
|
3984 |
-
"loss": 0.9795,
|
3985 |
-
"step": 12949
|
3986 |
-
},
|
3987 |
-
{
|
3988 |
-
"epoch": 0.5594514167421399,
|
3989 |
-
"grad_norm": 0.37109375,
|
3990 |
-
"learning_rate": 0.00044098601277844934,
|
3991 |
-
"loss": 0.9716,
|
3992 |
-
"step": 12972
|
3993 |
-
},
|
3994 |
-
{
|
3995 |
-
"epoch": 0.5604433518782076,
|
3996 |
-
"grad_norm": 0.38671875,
|
3997 |
-
"learning_rate": 0.00043999309273009846,
|
3998 |
-
"loss": 0.9814,
|
3999 |
-
"step": 12995
|
4000 |
-
},
|
4001 |
-
{
|
4002 |
-
"epoch": 0.5614352870142753,
|
4003 |
-
"grad_norm": 0.34765625,
|
4004 |
-
"learning_rate": 0.00043900017268174753,
|
4005 |
-
"loss": 0.9724,
|
4006 |
-
"step": 13018
|
4007 |
-
},
|
4008 |
-
{
|
4009 |
-
"epoch": 0.5624272221503429,
|
4010 |
-
"grad_norm": 0.44921875,
|
4011 |
-
"learning_rate": 0.00043800725263339665,
|
4012 |
-
"loss": 0.9538,
|
4013 |
-
"step": 13041
|
4014 |
-
},
|
4015 |
-
{
|
4016 |
-
"epoch": 0.5634191572864105,
|
4017 |
-
"grad_norm": 0.3828125,
|
4018 |
-
"learning_rate": 0.00043701433258504577,
|
4019 |
-
"loss": 0.9744,
|
4020 |
-
"step": 13064
|
4021 |
-
},
|
4022 |
-
{
|
4023 |
-
"epoch": 0.5644110924224781,
|
4024 |
-
"grad_norm": 0.423828125,
|
4025 |
-
"learning_rate": 0.0004360214125366949,
|
4026 |
-
"loss": 0.9777,
|
4027 |
-
"step": 13087
|
4028 |
-
},
|
4029 |
-
{
|
4030 |
-
"epoch": 0.5654030275585458,
|
4031 |
-
"grad_norm": 0.365234375,
|
4032 |
-
"learning_rate": 0.00043502849248834395,
|
4033 |
-
"loss": 0.9688,
|
4034 |
-
"step": 13110
|
4035 |
-
},
|
4036 |
-
{
|
4037 |
-
"epoch": 0.5663949626946133,
|
4038 |
-
"grad_norm": 0.470703125,
|
4039 |
-
"learning_rate": 0.00043403557243999307,
|
4040 |
-
"loss": 0.988,
|
4041 |
-
"step": 13133
|
4042 |
-
},
|
4043 |
-
{
|
4044 |
-
"epoch": 0.567386897830681,
|
4045 |
-
"grad_norm": 0.341796875,
|
4046 |
-
"learning_rate": 0.0004330426523916422,
|
4047 |
-
"loss": 0.9678,
|
4048 |
-
"step": 13156
|
4049 |
-
},
|
4050 |
-
{
|
4051 |
-
"epoch": 0.5683788329667486,
|
4052 |
-
"grad_norm": 0.345703125,
|
4053 |
-
"learning_rate": 0.0004320497323432913,
|
4054 |
-
"loss": 0.9735,
|
4055 |
-
"step": 13179
|
4056 |
-
},
|
4057 |
-
{
|
4058 |
-
"epoch": 0.5693707681028163,
|
4059 |
-
"grad_norm": 0.416015625,
|
4060 |
-
"learning_rate": 0.00043105681229494043,
|
4061 |
-
"loss": 0.9612,
|
4062 |
-
"step": 13202
|
4063 |
-
},
|
4064 |
-
{
|
4065 |
-
"epoch": 0.5703627032388838,
|
4066 |
-
"grad_norm": 0.375,
|
4067 |
-
"learning_rate": 0.00043006389224658955,
|
4068 |
-
"loss": 0.9428,
|
4069 |
-
"step": 13225
|
4070 |
-
},
|
4071 |
-
{
|
4072 |
-
"epoch": 0.5713546383749515,
|
4073 |
-
"grad_norm": 0.4296875,
|
4074 |
-
"learning_rate": 0.00042907097219823867,
|
4075 |
-
"loss": 0.9654,
|
4076 |
-
"step": 13248
|
4077 |
-
},
|
4078 |
-
{
|
4079 |
-
"epoch": 0.5723465735110191,
|
4080 |
-
"grad_norm": 0.353515625,
|
4081 |
-
"learning_rate": 0.0004280780521498878,
|
4082 |
-
"loss": 0.9739,
|
4083 |
-
"step": 13271
|
4084 |
-
},
|
4085 |
-
{
|
4086 |
-
"epoch": 0.5733385086470867,
|
4087 |
-
"grad_norm": 0.380859375,
|
4088 |
-
"learning_rate": 0.00042708513210153686,
|
4089 |
-
"loss": 0.9755,
|
4090 |
-
"step": 13294
|
4091 |
-
},
|
4092 |
-
{
|
4093 |
-
"epoch": 0.5743304437831543,
|
4094 |
-
"grad_norm": 0.357421875,
|
4095 |
-
"learning_rate": 0.000426092212053186,
|
4096 |
-
"loss": 0.9784,
|
4097 |
-
"step": 13317
|
4098 |
-
},
|
4099 |
-
{
|
4100 |
-
"epoch": 0.575322378919222,
|
4101 |
-
"grad_norm": 0.3125,
|
4102 |
-
"learning_rate": 0.0004250992920048351,
|
4103 |
-
"loss": 0.9625,
|
4104 |
-
"step": 13340
|
4105 |
-
},
|
4106 |
-
{
|
4107 |
-
"epoch": 0.5763143140552897,
|
4108 |
-
"grad_norm": 0.345703125,
|
4109 |
-
"learning_rate": 0.0004241063719564842,
|
4110 |
-
"loss": 0.9521,
|
4111 |
-
"step": 13363
|
4112 |
-
},
|
4113 |
-
{
|
4114 |
-
"epoch": 0.5773062491913572,
|
4115 |
-
"grad_norm": 0.333984375,
|
4116 |
-
"learning_rate": 0.00042311345190813334,
|
4117 |
-
"loss": 0.984,
|
4118 |
-
"step": 13386
|
4119 |
-
},
|
4120 |
-
{
|
4121 |
-
"epoch": 0.5782981843274249,
|
4122 |
-
"grad_norm": 0.45703125,
|
4123 |
-
"learning_rate": 0.00042212053185978246,
|
4124 |
-
"loss": 0.9794,
|
4125 |
-
"step": 13409
|
4126 |
-
},
|
4127 |
-
{
|
4128 |
-
"epoch": 0.5792901194634925,
|
4129 |
-
"grad_norm": 0.396484375,
|
4130 |
-
"learning_rate": 0.0004211276118114316,
|
4131 |
-
"loss": 0.9705,
|
4132 |
-
"step": 13432
|
4133 |
-
},
|
4134 |
-
{
|
4135 |
-
"epoch": 0.5802820545995601,
|
4136 |
-
"grad_norm": 0.400390625,
|
4137 |
-
"learning_rate": 0.00042013469176308064,
|
4138 |
-
"loss": 0.97,
|
4139 |
-
"step": 13455
|
4140 |
-
},
|
4141 |
-
{
|
4142 |
-
"epoch": 0.5812739897356277,
|
4143 |
-
"grad_norm": 0.37890625,
|
4144 |
-
"learning_rate": 0.00041914177171472976,
|
4145 |
-
"loss": 0.968,
|
4146 |
-
"step": 13478
|
4147 |
-
},
|
4148 |
-
{
|
4149 |
-
"epoch": 0.5822659248716954,
|
4150 |
-
"grad_norm": 0.365234375,
|
4151 |
-
"learning_rate": 0.0004181488516663789,
|
4152 |
-
"loss": 0.9664,
|
4153 |
-
"step": 13501
|
4154 |
-
},
|
4155 |
-
{
|
4156 |
-
"epoch": 0.5832578600077629,
|
4157 |
-
"grad_norm": 0.361328125,
|
4158 |
-
"learning_rate": 0.000417155931618028,
|
4159 |
-
"loss": 0.9722,
|
4160 |
-
"step": 13524
|
4161 |
-
},
|
4162 |
-
{
|
4163 |
-
"epoch": 0.5842497951438306,
|
4164 |
-
"grad_norm": 0.369140625,
|
4165 |
-
"learning_rate": 0.0004161630115696771,
|
4166 |
-
"loss": 0.9695,
|
4167 |
-
"step": 13547
|
4168 |
-
},
|
4169 |
-
{
|
4170 |
-
"epoch": 0.5852417302798982,
|
4171 |
-
"grad_norm": 0.337890625,
|
4172 |
-
"learning_rate": 0.00041517009152132624,
|
4173 |
-
"loss": 0.9628,
|
4174 |
-
"step": 13570
|
4175 |
-
},
|
4176 |
-
{
|
4177 |
-
"epoch": 0.5862336654159659,
|
4178 |
-
"grad_norm": 0.330078125,
|
4179 |
-
"learning_rate": 0.0004141771714729753,
|
4180 |
-
"loss": 0.9515,
|
4181 |
-
"step": 13593
|
4182 |
-
},
|
4183 |
-
{
|
4184 |
-
"epoch": 0.5872256005520334,
|
4185 |
-
"grad_norm": 0.359375,
|
4186 |
-
"learning_rate": 0.0004131842514246244,
|
4187 |
-
"loss": 0.965,
|
4188 |
-
"step": 13616
|
4189 |
-
},
|
4190 |
-
{
|
4191 |
-
"epoch": 0.5882175356881011,
|
4192 |
-
"grad_norm": 0.392578125,
|
4193 |
-
"learning_rate": 0.0004121913313762735,
|
4194 |
-
"loss": 0.9598,
|
4195 |
-
"step": 13639
|
4196 |
-
},
|
4197 |
-
{
|
4198 |
-
"epoch": 0.5892094708241687,
|
4199 |
-
"grad_norm": 0.41796875,
|
4200 |
-
"learning_rate": 0.0004111984113279226,
|
4201 |
-
"loss": 0.9575,
|
4202 |
-
"step": 13662
|
4203 |
-
},
|
4204 |
-
{
|
4205 |
-
"epoch": 0.5902014059602363,
|
4206 |
-
"grad_norm": 0.5234375,
|
4207 |
-
"learning_rate": 0.00041020549127957173,
|
4208 |
-
"loss": 0.9933,
|
4209 |
-
"step": 13685
|
4210 |
-
},
|
4211 |
-
{
|
4212 |
-
"epoch": 0.591193341096304,
|
4213 |
-
"grad_norm": 0.423828125,
|
4214 |
-
"learning_rate": 0.00040921257123122085,
|
4215 |
-
"loss": 0.9621,
|
4216 |
-
"step": 13708
|
4217 |
-
},
|
4218 |
-
{
|
4219 |
-
"epoch": 0.5921852762323716,
|
4220 |
-
"grad_norm": 0.33203125,
|
4221 |
-
"learning_rate": 0.00040821965118286997,
|
4222 |
-
"loss": 0.964,
|
4223 |
-
"step": 13731
|
4224 |
-
},
|
4225 |
-
{
|
4226 |
-
"epoch": 0.5931772113684393,
|
4227 |
-
"grad_norm": 0.423828125,
|
4228 |
-
"learning_rate": 0.0004072267311345191,
|
4229 |
-
"loss": 0.9854,
|
4230 |
-
"step": 13754
|
4231 |
-
},
|
4232 |
-
{
|
4233 |
-
"epoch": 0.5941691465045068,
|
4234 |
-
"grad_norm": 0.3515625,
|
4235 |
-
"learning_rate": 0.0004062338110861682,
|
4236 |
-
"loss": 0.9883,
|
4237 |
-
"step": 13777
|
4238 |
-
},
|
4239 |
-
{
|
4240 |
-
"epoch": 0.5951610816405745,
|
4241 |
-
"grad_norm": 0.408203125,
|
4242 |
-
"learning_rate": 0.00040524089103781733,
|
4243 |
-
"loss": 0.9853,
|
4244 |
-
"step": 13800
|
4245 |
-
},
|
4246 |
-
{
|
4247 |
-
"epoch": 0.5961530167766421,
|
4248 |
-
"grad_norm": 0.408203125,
|
4249 |
-
"learning_rate": 0.0004042479709894664,
|
4250 |
-
"loss": 0.9557,
|
4251 |
-
"step": 13823
|
4252 |
-
},
|
4253 |
-
{
|
4254 |
-
"epoch": 0.5971449519127097,
|
4255 |
-
"grad_norm": 0.42578125,
|
4256 |
-
"learning_rate": 0.0004032550509411155,
|
4257 |
-
"loss": 0.9587,
|
4258 |
-
"step": 13846
|
4259 |
-
},
|
4260 |
-
{
|
4261 |
-
"epoch": 0.5981368870487773,
|
4262 |
-
"grad_norm": 0.44921875,
|
4263 |
-
"learning_rate": 0.00040226213089276464,
|
4264 |
-
"loss": 0.9771,
|
4265 |
-
"step": 13869
|
4266 |
-
},
|
4267 |
-
{
|
4268 |
-
"epoch": 0.599128822184845,
|
4269 |
-
"grad_norm": 0.431640625,
|
4270 |
-
"learning_rate": 0.00040126921084441376,
|
4271 |
-
"loss": 0.9661,
|
4272 |
-
"step": 13892
|
4273 |
-
},
|
4274 |
-
{
|
4275 |
-
"epoch": 0.5998188640186312,
|
4276 |
-
"eval_runtime": 163.7921,
|
4277 |
-
"eval_samples_per_second": 610.53,
|
4278 |
-
"eval_steps_per_second": 7.632,
|
4279 |
-
"step": 13908
|
4280 |
}
|
4281 |
],
|
4282 |
"logging_steps": 23,
|
@@ -4296,7 +2875,7 @@
|
|
4296 |
"attributes": {}
|
4297 |
}
|
4298 |
},
|
4299 |
-
"total_flos":
|
4300 |
"train_batch_size": 8,
|
4301 |
"trial_name": null,
|
4302 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.3998792426790874,
|
5 |
"eval_steps": 2318,
|
6 |
+
"global_step": 9272,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2856 |
"eval_samples_per_second": 610.045,
|
2857 |
"eval_steps_per_second": 7.626,
|
2858 |
"step": 9272
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2859 |
}
|
2860 |
],
|
2861 |
"logging_steps": 23,
|
|
|
2875 |
"attributes": {}
|
2876 |
}
|
2877 |
},
|
2878 |
+
"total_flos": 6.778106242599485e+17,
|
2879 |
"train_batch_size": 8,
|
2880 |
"trial_name": null,
|
2881 |
"trial_params": null
|