Femboyuwu2000
commited on
Commit
•
6940d52
1
Parent(s):
0104963
Training in progress, step 8900, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13982248
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:630a910b2d5b8c9504add11b070a2b95445874621f6de2ea3077628d0553ffe3
|
3 |
size 13982248
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7062522
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da3ec0e6441f43d24431895b711504845f578f35580f2b0f333b91a363d641c1
|
3 |
size 7062522
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e3f344cbf26501e998b8de868d6d509ad3fe3171ec390a602388814ddd9df38
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0dde4a2f54c5119ed26c871b2368383ec34337eb67d04dfb138caacf14a881ef
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2926,6 +2926,202 @@
|
|
2926 |
"learning_rate": 1.9681434114209617e-05,
|
2927 |
"loss": 3.4415,
|
2928 |
"step": 8340
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2929 |
}
|
2930 |
],
|
2931 |
"logging_steps": 20,
|
@@ -2933,7 +3129,7 @@
|
|
2933 |
"num_input_tokens_seen": 0,
|
2934 |
"num_train_epochs": 2,
|
2935 |
"save_steps": 20,
|
2936 |
-
"total_flos":
|
2937 |
"train_batch_size": 8,
|
2938 |
"trial_name": null,
|
2939 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.712,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 8900,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2926 |
"learning_rate": 1.9681434114209617e-05,
|
2927 |
"loss": 3.4415,
|
2928 |
"step": 8340
|
2929 |
+
},
|
2930 |
+
{
|
2931 |
+
"epoch": 0.67,
|
2932 |
+
"grad_norm": 30.5143985748291,
|
2933 |
+
"learning_rate": 1.963525491562421e-05,
|
2934 |
+
"loss": 3.4806,
|
2935 |
+
"step": 8360
|
2936 |
+
},
|
2937 |
+
{
|
2938 |
+
"epoch": 0.67,
|
2939 |
+
"grad_norm": 27.770360946655273,
|
2940 |
+
"learning_rate": 1.958902709540811e-05,
|
2941 |
+
"loss": 3.4379,
|
2942 |
+
"step": 8380
|
2943 |
+
},
|
2944 |
+
{
|
2945 |
+
"epoch": 0.67,
|
2946 |
+
"grad_norm": 24.266944885253906,
|
2947 |
+
"learning_rate": 1.954275113846926e-05,
|
2948 |
+
"loss": 3.4933,
|
2949 |
+
"step": 8400
|
2950 |
+
},
|
2951 |
+
{
|
2952 |
+
"epoch": 0.67,
|
2953 |
+
"grad_norm": 43.798301696777344,
|
2954 |
+
"learning_rate": 1.9496427530220567e-05,
|
2955 |
+
"loss": 3.4107,
|
2956 |
+
"step": 8420
|
2957 |
+
},
|
2958 |
+
{
|
2959 |
+
"epoch": 0.68,
|
2960 |
+
"grad_norm": 32.54145431518555,
|
2961 |
+
"learning_rate": 1.9450056756574753e-05,
|
2962 |
+
"loss": 3.507,
|
2963 |
+
"step": 8440
|
2964 |
+
},
|
2965 |
+
{
|
2966 |
+
"epoch": 0.68,
|
2967 |
+
"grad_norm": 29.06185531616211,
|
2968 |
+
"learning_rate": 1.9403639303939293e-05,
|
2969 |
+
"loss": 3.4434,
|
2970 |
+
"step": 8460
|
2971 |
+
},
|
2972 |
+
{
|
2973 |
+
"epoch": 0.68,
|
2974 |
+
"grad_norm": 26.419170379638672,
|
2975 |
+
"learning_rate": 1.93571756592113e-05,
|
2976 |
+
"loss": 3.4684,
|
2977 |
+
"step": 8480
|
2978 |
+
},
|
2979 |
+
{
|
2980 |
+
"epoch": 0.68,
|
2981 |
+
"grad_norm": 31.831510543823242,
|
2982 |
+
"learning_rate": 1.9310666309772426e-05,
|
2983 |
+
"loss": 3.4565,
|
2984 |
+
"step": 8500
|
2985 |
+
},
|
2986 |
+
{
|
2987 |
+
"epoch": 0.68,
|
2988 |
+
"grad_norm": 33.18935775756836,
|
2989 |
+
"learning_rate": 1.926411174348373e-05,
|
2990 |
+
"loss": 3.4568,
|
2991 |
+
"step": 8520
|
2992 |
+
},
|
2993 |
+
{
|
2994 |
+
"epoch": 0.68,
|
2995 |
+
"grad_norm": 25.60289192199707,
|
2996 |
+
"learning_rate": 1.9217512448680586e-05,
|
2997 |
+
"loss": 3.3513,
|
2998 |
+
"step": 8540
|
2999 |
+
},
|
3000 |
+
{
|
3001 |
+
"epoch": 0.68,
|
3002 |
+
"grad_norm": 27.03973960876465,
|
3003 |
+
"learning_rate": 1.917086891416755e-05,
|
3004 |
+
"loss": 3.4551,
|
3005 |
+
"step": 8560
|
3006 |
+
},
|
3007 |
+
{
|
3008 |
+
"epoch": 0.69,
|
3009 |
+
"grad_norm": 39.20319366455078,
|
3010 |
+
"learning_rate": 1.9124181629213228e-05,
|
3011 |
+
"loss": 3.4217,
|
3012 |
+
"step": 8580
|
3013 |
+
},
|
3014 |
+
{
|
3015 |
+
"epoch": 0.69,
|
3016 |
+
"grad_norm": 25.287826538085938,
|
3017 |
+
"learning_rate": 1.9077451083545144e-05,
|
3018 |
+
"loss": 3.4172,
|
3019 |
+
"step": 8600
|
3020 |
+
},
|
3021 |
+
{
|
3022 |
+
"epoch": 0.69,
|
3023 |
+
"grad_norm": 51.33893585205078,
|
3024 |
+
"learning_rate": 1.903067776734461e-05,
|
3025 |
+
"loss": 3.4487,
|
3026 |
+
"step": 8620
|
3027 |
+
},
|
3028 |
+
{
|
3029 |
+
"epoch": 0.69,
|
3030 |
+
"grad_norm": 23.41849708557129,
|
3031 |
+
"learning_rate": 1.8983862171241577e-05,
|
3032 |
+
"loss": 3.4675,
|
3033 |
+
"step": 8640
|
3034 |
+
},
|
3035 |
+
{
|
3036 |
+
"epoch": 0.69,
|
3037 |
+
"grad_norm": 37.373104095458984,
|
3038 |
+
"learning_rate": 1.8937004786309504e-05,
|
3039 |
+
"loss": 3.4782,
|
3040 |
+
"step": 8660
|
3041 |
+
},
|
3042 |
+
{
|
3043 |
+
"epoch": 0.69,
|
3044 |
+
"grad_norm": 26.743480682373047,
|
3045 |
+
"learning_rate": 1.8890106104060177e-05,
|
3046 |
+
"loss": 3.432,
|
3047 |
+
"step": 8680
|
3048 |
+
},
|
3049 |
+
{
|
3050 |
+
"epoch": 0.7,
|
3051 |
+
"grad_norm": 39.051124572753906,
|
3052 |
+
"learning_rate": 1.8843166616438585e-05,
|
3053 |
+
"loss": 3.4937,
|
3054 |
+
"step": 8700
|
3055 |
+
},
|
3056 |
+
{
|
3057 |
+
"epoch": 0.7,
|
3058 |
+
"grad_norm": 32.51453399658203,
|
3059 |
+
"learning_rate": 1.8796186815817743e-05,
|
3060 |
+
"loss": 3.4618,
|
3061 |
+
"step": 8720
|
3062 |
+
},
|
3063 |
+
{
|
3064 |
+
"epoch": 0.7,
|
3065 |
+
"grad_norm": 22.23604393005371,
|
3066 |
+
"learning_rate": 1.874916719499353e-05,
|
3067 |
+
"loss": 3.5259,
|
3068 |
+
"step": 8740
|
3069 |
+
},
|
3070 |
+
{
|
3071 |
+
"epoch": 0.7,
|
3072 |
+
"grad_norm": 27.168733596801758,
|
3073 |
+
"learning_rate": 1.8702108247179512e-05,
|
3074 |
+
"loss": 3.4829,
|
3075 |
+
"step": 8760
|
3076 |
+
},
|
3077 |
+
{
|
3078 |
+
"epoch": 0.7,
|
3079 |
+
"grad_norm": 28.35675621032715,
|
3080 |
+
"learning_rate": 1.8655010466001794e-05,
|
3081 |
+
"loss": 3.4133,
|
3082 |
+
"step": 8780
|
3083 |
+
},
|
3084 |
+
{
|
3085 |
+
"epoch": 0.7,
|
3086 |
+
"grad_norm": 30.800825119018555,
|
3087 |
+
"learning_rate": 1.8607874345493806e-05,
|
3088 |
+
"loss": 3.4527,
|
3089 |
+
"step": 8800
|
3090 |
+
},
|
3091 |
+
{
|
3092 |
+
"epoch": 0.71,
|
3093 |
+
"grad_norm": 26.275304794311523,
|
3094 |
+
"learning_rate": 1.856070038009115e-05,
|
3095 |
+
"loss": 3.4386,
|
3096 |
+
"step": 8820
|
3097 |
+
},
|
3098 |
+
{
|
3099 |
+
"epoch": 0.71,
|
3100 |
+
"grad_norm": 27.828401565551758,
|
3101 |
+
"learning_rate": 1.85134890646264e-05,
|
3102 |
+
"loss": 3.4806,
|
3103 |
+
"step": 8840
|
3104 |
+
},
|
3105 |
+
{
|
3106 |
+
"epoch": 0.71,
|
3107 |
+
"grad_norm": 31.448450088500977,
|
3108 |
+
"learning_rate": 1.846624089432392e-05,
|
3109 |
+
"loss": 3.4659,
|
3110 |
+
"step": 8860
|
3111 |
+
},
|
3112 |
+
{
|
3113 |
+
"epoch": 0.71,
|
3114 |
+
"grad_norm": 30.396820068359375,
|
3115 |
+
"learning_rate": 1.8418956364794655e-05,
|
3116 |
+
"loss": 3.4717,
|
3117 |
+
"step": 8880
|
3118 |
+
},
|
3119 |
+
{
|
3120 |
+
"epoch": 0.71,
|
3121 |
+
"grad_norm": 65.90203857421875,
|
3122 |
+
"learning_rate": 1.8371635972030942e-05,
|
3123 |
+
"loss": 3.4938,
|
3124 |
+
"step": 8900
|
3125 |
}
|
3126 |
],
|
3127 |
"logging_steps": 20,
|
|
|
3129 |
"num_input_tokens_seen": 0,
|
3130 |
"num_train_epochs": 2,
|
3131 |
"save_steps": 20,
|
3132 |
+
"total_flos": 2.103984238922957e+16,
|
3133 |
"train_batch_size": 8,
|
3134 |
"trial_name": null,
|
3135 |
"trial_params": null
|