BryanSagbay commited on
Commit
53911bf
1 Parent(s): 835e889

Upload 10 files

Browse files
config.json ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sentence-transformers/all-mpnet-base-v2",
3
+ "architectures": [
4
+ "MPNetForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "control de bienes",
14
+ "1": "capacitaciones",
15
+ "2": "aval",
16
+ "3": "adquisiciones informe rt",
17
+ "4": "adquisiciones",
18
+ "5": "erp crm",
19
+ "6": "control de bienes prestamos",
20
+ "7": "control de cambios",
21
+ "8": "erp bienestar",
22
+ "9": "erp centros de produccion",
23
+ "10": "erp gestion academica evaluacion docente",
24
+ "11": "erp conceder acceso",
25
+ "12": "erp eliminar accesos",
26
+ "13": "erp financiero",
27
+ "14": "erp gestion academica",
28
+ "15": "erp no reportes",
29
+ "16": "erp gestion de biblioteca",
30
+ "17": "erp gestion logistica",
31
+ "18": "erp internacionalizacion",
32
+ "19": "erp investigacion cientifica",
33
+ "20": "erp talento humano",
34
+ "21": "erp salud ocupacional",
35
+ "22": "erp planificacion",
36
+ "23": "erp otros",
37
+ "24": "erp odoo monica otros",
38
+ "25": "evea conceder acceso",
39
+ "26": "erp vinculacion",
40
+ "27": "erp votaciones",
41
+ "28": "evea acceso al servicio evea",
42
+ "29": "evea cursos",
43
+ "30": "evea modificacion de la informacion",
44
+ "31": "evea eliminar accesos",
45
+ "32": "evea encuesta cuestionario tareas examenes",
46
+ "33": "evea interrupcion del servicio",
47
+ "34": "evea matriculacion en cursos",
48
+ "35": "evea reportes",
49
+ "36": "evea nuevo",
50
+ "37": "evea otro",
51
+ "38": "evea plataforma",
52
+ "39": "evea problema con la configuracion",
53
+ "40": "firma electronica quipux",
54
+ "41": "encuesta",
55
+ "42": "evea sincronizacion",
56
+ "43": "evea sincronizacion proveedor",
57
+ "44": "evea respaldos",
58
+ "45": "infraestructura respaldo y recuperacion",
59
+ "46": "gestion",
60
+ "47": "infraestructura conectividad",
61
+ "48": "infraestructura mantenimiento preventivo",
62
+ "49": "infraestructura uso de infraestructura fisica",
63
+ "50": "office 365 conceder acceso",
64
+ "51": "informe",
65
+ "52": "office 365",
66
+ "53": "office 365 acceso al servicio",
67
+ "54": "office 365 bloqueo de servicio",
68
+ "55": "office 365 problema con el servicio",
69
+ "56": "office 365 creacion de cuenta",
70
+ "57": "office 365 eliminar accesos",
71
+ "58": "office 365 interrupcion del servicio",
72
+ "59": "office 365 otro",
73
+ "60": "poa",
74
+ "61": "pagina web",
75
+ "62": "otros",
76
+ "63": "oficio",
77
+ "64": "office 365 roles y permisos",
78
+ "65": "reportes",
79
+ "66": "proveedor",
80
+ "67": "redes sociales",
81
+ "68": "proyectos",
82
+ "69": "repositorio base digital",
83
+ "70": "soporte equipos mantenimiento de hardware",
84
+ "71": "revistas",
85
+ "72": "servicios conceder accesos",
86
+ "73": "servicios elimnar accesos",
87
+ "74": "soporte equipos garantia",
88
+ "75": "soporte erp gestion logistica",
89
+ "76": "soporte equipos mantenimiento de software",
90
+ "77": "soporte erp bienestar",
91
+ "78": "soporte erp financiero",
92
+ "79": "soporte erp gestion academica",
93
+ "80": "soporte infraestructura",
94
+ "81": "soporte evea sincronizacion",
95
+ "82": "soporte erp talento humano",
96
+ "83": "soporte evea plataforma",
97
+ "84": "soporte erp otros",
98
+ "85": "soporte seguridad",
99
+ "86": "soporte otros",
100
+ "87": "soporte pagina web",
101
+ "88": "soporte repositorio base digital",
102
+ "89": "soporte revistas",
103
+ "90": "soporte odoo monica otros",
104
+ "91": "soporte infraestructura conectividad",
105
+ "92": "soporte infraestructura respaldo y recuperacion",
106
+ "93": "soporte mantenimiento correctivo",
107
+ "94": "soporte office 365",
108
+ "95": "streaming eventos",
109
+ "96": "soporte streaming eventos",
110
+ "97": "soporte turniting",
111
+ "98": "soporte zoom",
112
+ "99": "sin datos",
113
+ "100": "turnitin",
114
+ "101": "zoom credenciales zoom",
115
+ "102": "zoom interrupcion del servicio",
116
+ "103": "zoom licenciamiento",
117
+ "104": "zoom otros",
118
+ "105": "zoom webinar",
119
+ "106": "zoom conceder accesos",
120
+ "107": "zoom eliminar accesos",
121
+ "108": "zoom videoconferencia",
122
+ "109": "reporte informe"
123
+ },
124
+ "initializer_range": 0.02,
125
+ "intermediate_size": 3072,
126
+ "label2id": {
127
+ "adquisiciones": 4,
128
+ "adquisiciones informe rt": 3,
129
+ "aval": 2,
130
+ "capacitaciones": 1,
131
+ "control de bienes": 0,
132
+ "control de bienes prestamos": 6,
133
+ "control de cambios": 7,
134
+ "encuesta": 41,
135
+ "erp bienestar": 8,
136
+ "erp centros de produccion": 9,
137
+ "erp conceder acceso": 11,
138
+ "erp crm": 5,
139
+ "erp eliminar accesos": 12,
140
+ "erp financiero": 13,
141
+ "erp gestion academica": 14,
142
+ "erp gestion academica evaluacion docente": 10,
143
+ "erp gestion de biblioteca": 16,
144
+ "erp gestion logistica": 17,
145
+ "erp internacionalizacion": 18,
146
+ "erp investigacion cientifica": 19,
147
+ "erp no reportes": 15,
148
+ "erp odoo monica otros": 24,
149
+ "erp otros": 23,
150
+ "erp planificacion": 22,
151
+ "erp salud ocupacional": 21,
152
+ "erp talento humano": 20,
153
+ "erp vinculacion": 26,
154
+ "erp votaciones": 27,
155
+ "evea acceso al servicio evea": 28,
156
+ "evea conceder acceso": 25,
157
+ "evea cursos": 29,
158
+ "evea eliminar accesos": 31,
159
+ "evea encuesta cuestionario tareas examenes": 32,
160
+ "evea interrupcion del servicio": 33,
161
+ "evea matriculacion en cursos": 34,
162
+ "evea modificacion de la informacion": 30,
163
+ "evea nuevo": 36,
164
+ "evea otro": 37,
165
+ "evea plataforma": 38,
166
+ "evea problema con la configuracion": 39,
167
+ "evea reportes": 35,
168
+ "evea respaldos": 44,
169
+ "evea sincronizacion": 42,
170
+ "evea sincronizacion proveedor": 43,
171
+ "firma electronica quipux": 40,
172
+ "gestion": 46,
173
+ "informe": 51,
174
+ "infraestructura conectividad": 47,
175
+ "infraestructura mantenimiento preventivo": 48,
176
+ "infraestructura respaldo y recuperacion": 45,
177
+ "infraestructura uso de infraestructura fisica": 49,
178
+ "office 365": 52,
179
+ "office 365 acceso al servicio": 53,
180
+ "office 365 bloqueo de servicio": 54,
181
+ "office 365 conceder acceso": 50,
182
+ "office 365 creacion de cuenta": 56,
183
+ "office 365 eliminar accesos": 57,
184
+ "office 365 interrupcion del servicio": 58,
185
+ "office 365 otro": 59,
186
+ "office 365 problema con el servicio": 55,
187
+ "office 365 roles y permisos": 64,
188
+ "oficio": 63,
189
+ "otros": 62,
190
+ "pagina web": 61,
191
+ "poa": 60,
192
+ "proveedor": 66,
193
+ "proyectos": 68,
194
+ "redes sociales": 67,
195
+ "reporte informe": 109,
196
+ "reportes": 65,
197
+ "repositorio base digital": 69,
198
+ "revistas": 71,
199
+ "servicios conceder accesos": 72,
200
+ "servicios elimnar accesos": 73,
201
+ "sin datos": 99,
202
+ "soporte equipos garantia": 74,
203
+ "soporte equipos mantenimiento de hardware": 70,
204
+ "soporte equipos mantenimiento de software": 76,
205
+ "soporte erp bienestar": 77,
206
+ "soporte erp financiero": 78,
207
+ "soporte erp gestion academica": 79,
208
+ "soporte erp gestion logistica": 75,
209
+ "soporte erp otros": 84,
210
+ "soporte erp talento humano": 82,
211
+ "soporte evea plataforma": 83,
212
+ "soporte evea sincronizacion": 81,
213
+ "soporte infraestructura": 80,
214
+ "soporte infraestructura conectividad": 91,
215
+ "soporte infraestructura respaldo y recuperacion": 92,
216
+ "soporte mantenimiento correctivo": 93,
217
+ "soporte odoo monica otros": 90,
218
+ "soporte office 365": 94,
219
+ "soporte otros": 86,
220
+ "soporte pagina web": 87,
221
+ "soporte repositorio base digital": 88,
222
+ "soporte revistas": 89,
223
+ "soporte seguridad": 85,
224
+ "soporte streaming eventos": 96,
225
+ "soporte turniting": 97,
226
+ "soporte zoom": 98,
227
+ "streaming eventos": 95,
228
+ "turnitin": 100,
229
+ "zoom conceder accesos": 106,
230
+ "zoom credenciales zoom": 101,
231
+ "zoom eliminar accesos": 107,
232
+ "zoom interrupcion del servicio": 102,
233
+ "zoom licenciamiento": 103,
234
+ "zoom otros": 104,
235
+ "zoom videoconferencia": 108,
236
+ "zoom webinar": 105
237
+ },
238
+ "layer_norm_eps": 1e-05,
239
+ "max_position_embeddings": 514,
240
+ "model_type": "mpnet",
241
+ "num_attention_heads": 12,
242
+ "num_hidden_layers": 12,
243
+ "pad_token_id": 1,
244
+ "problem_type": "single_label_classification",
245
+ "relative_attention_num_buckets": 32,
246
+ "torch_dtype": "float32",
247
+ "transformers_version": "4.39.3",
248
+ "vocab_size": 30527
249
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3c34868aeab3207ec7c2c4e275ceadaefae544de0922c88fc3579f2be57654d
3
+ size 876736442
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd1fd254665e11824f0cb7c37c036de4909ac73fafa6d0dc380b4c0408a4f806
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf06e273b5aeea9357f7cc59fe206491bb60037d5e532e3fcf971a7cf99a8c15
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "104": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "30526": {
44
+ "content": "<mask>",
45
+ "lstrip": true,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "bos_token": "<s>",
53
+ "clean_up_tokenization_spaces": true,
54
+ "cls_token": "<s>",
55
+ "do_lower_case": true,
56
+ "eos_token": "</s>",
57
+ "mask_token": "<mask>",
58
+ "max_length": 128,
59
+ "model_max_length": 512,
60
+ "pad_to_multiple_of": null,
61
+ "pad_token": "<pad>",
62
+ "pad_token_type_id": 0,
63
+ "padding_side": "right",
64
+ "sep_token": "</s>",
65
+ "stride": 0,
66
+ "strip_accents": null,
67
+ "tokenize_chinese_chars": true,
68
+ "tokenizer_class": "MPNetTokenizer",
69
+ "truncation_side": "right",
70
+ "truncation_strategy": "longest_first",
71
+ "unk_token": "[UNK]"
72
+ }
trainer_state.json ADDED
@@ -0,0 +1,787 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.022735346358792183,
3
+ "best_model_checkpoint": "./results/checkpoint-10634",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 10634,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 0.7369279265403748,
14
+ "learning_rate": 4.952981004325748e-05,
15
+ "loss": 4.7032,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 0.5547892451286316,
21
+ "learning_rate": 4.905962008651496e-05,
22
+ "loss": 4.7033,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 0.5557618737220764,
28
+ "learning_rate": 4.858943012977243e-05,
29
+ "loss": 4.7015,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 0.5398121476173401,
35
+ "learning_rate": 4.8119240173029906e-05,
36
+ "loss": 4.7038,
37
+ "step": 400
38
+ },
39
+ {
40
+ "epoch": 0.09,
41
+ "grad_norm": 0.5128409266471863,
42
+ "learning_rate": 4.7649050216287384e-05,
43
+ "loss": 4.7003,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.11,
48
+ "grad_norm": 0.57951420545578,
49
+ "learning_rate": 4.717886025954486e-05,
50
+ "loss": 4.7018,
51
+ "step": 600
52
+ },
53
+ {
54
+ "epoch": 0.13,
55
+ "grad_norm": 0.48418277502059937,
56
+ "learning_rate": 4.670867030280233e-05,
57
+ "loss": 4.7005,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.15,
62
+ "grad_norm": 0.5264779329299927,
63
+ "learning_rate": 4.623848034605981e-05,
64
+ "loss": 4.7034,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 0.17,
69
+ "grad_norm": 0.4631044268608093,
70
+ "learning_rate": 4.576829038931729e-05,
71
+ "loss": 4.7018,
72
+ "step": 900
73
+ },
74
+ {
75
+ "epoch": 0.19,
76
+ "grad_norm": 0.49229735136032104,
77
+ "learning_rate": 4.5298100432574765e-05,
78
+ "loss": 4.6994,
79
+ "step": 1000
80
+ },
81
+ {
82
+ "epoch": 0.21,
83
+ "grad_norm": 0.5025375485420227,
84
+ "learning_rate": 4.482791047583224e-05,
85
+ "loss": 4.7019,
86
+ "step": 1100
87
+ },
88
+ {
89
+ "epoch": 0.23,
90
+ "grad_norm": 0.5106560587882996,
91
+ "learning_rate": 4.4357720519089714e-05,
92
+ "loss": 4.7017,
93
+ "step": 1200
94
+ },
95
+ {
96
+ "epoch": 0.24,
97
+ "grad_norm": 0.4811244308948517,
98
+ "learning_rate": 4.388753056234719e-05,
99
+ "loss": 4.7015,
100
+ "step": 1300
101
+ },
102
+ {
103
+ "epoch": 0.26,
104
+ "grad_norm": 0.5047605037689209,
105
+ "learning_rate": 4.341734060560467e-05,
106
+ "loss": 4.7011,
107
+ "step": 1400
108
+ },
109
+ {
110
+ "epoch": 0.28,
111
+ "grad_norm": 0.4720272123813629,
112
+ "learning_rate": 4.2947150648862146e-05,
113
+ "loss": 4.7004,
114
+ "step": 1500
115
+ },
116
+ {
117
+ "epoch": 0.3,
118
+ "grad_norm": 0.3932195007801056,
119
+ "learning_rate": 4.247696069211962e-05,
120
+ "loss": 4.7028,
121
+ "step": 1600
122
+ },
123
+ {
124
+ "epoch": 0.32,
125
+ "grad_norm": 0.39717328548431396,
126
+ "learning_rate": 4.2006770735377095e-05,
127
+ "loss": 4.702,
128
+ "step": 1700
129
+ },
130
+ {
131
+ "epoch": 0.34,
132
+ "grad_norm": 0.46395114064216614,
133
+ "learning_rate": 4.153658077863457e-05,
134
+ "loss": 4.7005,
135
+ "step": 1800
136
+ },
137
+ {
138
+ "epoch": 0.36,
139
+ "grad_norm": 0.4270722270011902,
140
+ "learning_rate": 4.106639082189205e-05,
141
+ "loss": 4.7,
142
+ "step": 1900
143
+ },
144
+ {
145
+ "epoch": 0.38,
146
+ "grad_norm": 0.4068189561367035,
147
+ "learning_rate": 4.059620086514952e-05,
148
+ "loss": 4.7016,
149
+ "step": 2000
150
+ },
151
+ {
152
+ "epoch": 0.39,
153
+ "grad_norm": 0.4495134651660919,
154
+ "learning_rate": 4.0126010908407e-05,
155
+ "loss": 4.7,
156
+ "step": 2100
157
+ },
158
+ {
159
+ "epoch": 0.41,
160
+ "grad_norm": 0.4774065613746643,
161
+ "learning_rate": 3.9655820951664476e-05,
162
+ "loss": 4.7022,
163
+ "step": 2200
164
+ },
165
+ {
166
+ "epoch": 0.43,
167
+ "grad_norm": 0.4217697083950043,
168
+ "learning_rate": 3.9185630994921954e-05,
169
+ "loss": 4.7016,
170
+ "step": 2300
171
+ },
172
+ {
173
+ "epoch": 0.45,
174
+ "grad_norm": 0.4176025092601776,
175
+ "learning_rate": 3.871544103817943e-05,
176
+ "loss": 4.7005,
177
+ "step": 2400
178
+ },
179
+ {
180
+ "epoch": 0.47,
181
+ "grad_norm": 0.43538960814476013,
182
+ "learning_rate": 3.82452510814369e-05,
183
+ "loss": 4.7002,
184
+ "step": 2500
185
+ },
186
+ {
187
+ "epoch": 0.49,
188
+ "grad_norm": 0.3962569236755371,
189
+ "learning_rate": 3.777506112469438e-05,
190
+ "loss": 4.7015,
191
+ "step": 2600
192
+ },
193
+ {
194
+ "epoch": 0.51,
195
+ "grad_norm": 0.41195255517959595,
196
+ "learning_rate": 3.730487116795186e-05,
197
+ "loss": 4.7014,
198
+ "step": 2700
199
+ },
200
+ {
201
+ "epoch": 0.53,
202
+ "grad_norm": 0.3937325179576874,
203
+ "learning_rate": 3.6834681211209335e-05,
204
+ "loss": 4.7015,
205
+ "step": 2800
206
+ },
207
+ {
208
+ "epoch": 0.55,
209
+ "grad_norm": 0.39071130752563477,
210
+ "learning_rate": 3.6364491254466806e-05,
211
+ "loss": 4.6994,
212
+ "step": 2900
213
+ },
214
+ {
215
+ "epoch": 0.56,
216
+ "grad_norm": 0.40909039974212646,
217
+ "learning_rate": 3.5894301297724284e-05,
218
+ "loss": 4.7019,
219
+ "step": 3000
220
+ },
221
+ {
222
+ "epoch": 0.58,
223
+ "grad_norm": 0.3900412321090698,
224
+ "learning_rate": 3.542411134098176e-05,
225
+ "loss": 4.6996,
226
+ "step": 3100
227
+ },
228
+ {
229
+ "epoch": 0.6,
230
+ "grad_norm": 0.5122581720352173,
231
+ "learning_rate": 3.495392138423923e-05,
232
+ "loss": 4.6992,
233
+ "step": 3200
234
+ },
235
+ {
236
+ "epoch": 0.62,
237
+ "grad_norm": 0.38495704531669617,
238
+ "learning_rate": 3.448373142749671e-05,
239
+ "loss": 4.7024,
240
+ "step": 3300
241
+ },
242
+ {
243
+ "epoch": 0.64,
244
+ "grad_norm": 0.40592727065086365,
245
+ "learning_rate": 3.401354147075419e-05,
246
+ "loss": 4.7002,
247
+ "step": 3400
248
+ },
249
+ {
250
+ "epoch": 0.66,
251
+ "grad_norm": 0.4175421893596649,
252
+ "learning_rate": 3.354335151401166e-05,
253
+ "loss": 4.6993,
254
+ "step": 3500
255
+ },
256
+ {
257
+ "epoch": 0.68,
258
+ "grad_norm": 0.43954411149024963,
259
+ "learning_rate": 3.3073161557269136e-05,
260
+ "loss": 4.7016,
261
+ "step": 3600
262
+ },
263
+ {
264
+ "epoch": 0.7,
265
+ "grad_norm": 0.501674473285675,
266
+ "learning_rate": 3.2602971600526614e-05,
267
+ "loss": 4.6995,
268
+ "step": 3700
269
+ },
270
+ {
271
+ "epoch": 0.71,
272
+ "grad_norm": 0.3901698589324951,
273
+ "learning_rate": 3.2132781643784084e-05,
274
+ "loss": 4.7006,
275
+ "step": 3800
276
+ },
277
+ {
278
+ "epoch": 0.73,
279
+ "grad_norm": 0.3993096649646759,
280
+ "learning_rate": 3.166259168704156e-05,
281
+ "loss": 4.699,
282
+ "step": 3900
283
+ },
284
+ {
285
+ "epoch": 0.75,
286
+ "grad_norm": 0.43237388134002686,
287
+ "learning_rate": 3.119240173029904e-05,
288
+ "loss": 4.7001,
289
+ "step": 4000
290
+ },
291
+ {
292
+ "epoch": 0.77,
293
+ "grad_norm": 2.5795607566833496,
294
+ "learning_rate": 3.072221177355652e-05,
295
+ "loss": 4.6928,
296
+ "step": 4100
297
+ },
298
+ {
299
+ "epoch": 0.79,
300
+ "grad_norm": 0.9452407956123352,
301
+ "learning_rate": 3.0252021816814e-05,
302
+ "loss": 4.7014,
303
+ "step": 4200
304
+ },
305
+ {
306
+ "epoch": 0.81,
307
+ "grad_norm": 0.7060949802398682,
308
+ "learning_rate": 2.978183186007147e-05,
309
+ "loss": 4.707,
310
+ "step": 4300
311
+ },
312
+ {
313
+ "epoch": 0.83,
314
+ "grad_norm": 1.052825689315796,
315
+ "learning_rate": 2.9311641903328947e-05,
316
+ "loss": 4.7014,
317
+ "step": 4400
318
+ },
319
+ {
320
+ "epoch": 0.85,
321
+ "grad_norm": 0.7108451128005981,
322
+ "learning_rate": 2.8841451946586424e-05,
323
+ "loss": 4.7027,
324
+ "step": 4500
325
+ },
326
+ {
327
+ "epoch": 0.87,
328
+ "grad_norm": 0.6421079635620117,
329
+ "learning_rate": 2.83712619898439e-05,
330
+ "loss": 4.7033,
331
+ "step": 4600
332
+ },
333
+ {
334
+ "epoch": 0.88,
335
+ "grad_norm": 0.5927799940109253,
336
+ "learning_rate": 2.7901072033101373e-05,
337
+ "loss": 4.7044,
338
+ "step": 4700
339
+ },
340
+ {
341
+ "epoch": 0.9,
342
+ "grad_norm": 0.5713520050048828,
343
+ "learning_rate": 2.743088207635885e-05,
344
+ "loss": 4.7022,
345
+ "step": 4800
346
+ },
347
+ {
348
+ "epoch": 0.92,
349
+ "grad_norm": 0.5557407140731812,
350
+ "learning_rate": 2.6960692119616325e-05,
351
+ "loss": 4.7036,
352
+ "step": 4900
353
+ },
354
+ {
355
+ "epoch": 0.94,
356
+ "grad_norm": 0.37561553716659546,
357
+ "learning_rate": 2.6490502162873802e-05,
358
+ "loss": 4.7011,
359
+ "step": 5000
360
+ },
361
+ {
362
+ "epoch": 0.96,
363
+ "grad_norm": 0.404910683631897,
364
+ "learning_rate": 2.602031220613128e-05,
365
+ "loss": 4.7007,
366
+ "step": 5100
367
+ },
368
+ {
369
+ "epoch": 0.98,
370
+ "grad_norm": 0.39087265729904175,
371
+ "learning_rate": 2.5550122249388754e-05,
372
+ "loss": 4.7021,
373
+ "step": 5200
374
+ },
375
+ {
376
+ "epoch": 1.0,
377
+ "grad_norm": 0.3689398169517517,
378
+ "learning_rate": 2.507993229264623e-05,
379
+ "loss": 4.7004,
380
+ "step": 5300
381
+ },
382
+ {
383
+ "epoch": 1.0,
384
+ "eval_accuracy": 0.008644168146832444,
385
+ "eval_f1": 0.00014816254395838658,
386
+ "eval_loss": 4.700786113739014,
387
+ "eval_precision": 7.472164295071267e-05,
388
+ "eval_recall": 0.008644168146832444,
389
+ "eval_runtime": 96.2388,
390
+ "eval_samples_per_second": 87.75,
391
+ "eval_steps_per_second": 4.884,
392
+ "step": 5317
393
+ },
394
+ {
395
+ "epoch": 1.02,
396
+ "grad_norm": 0.3822825849056244,
397
+ "learning_rate": 2.4609742335903706e-05,
398
+ "loss": 4.7002,
399
+ "step": 5400
400
+ },
401
+ {
402
+ "epoch": 1.03,
403
+ "grad_norm": 0.421373575925827,
404
+ "learning_rate": 2.413955237916118e-05,
405
+ "loss": 4.6991,
406
+ "step": 5500
407
+ },
408
+ {
409
+ "epoch": 1.05,
410
+ "grad_norm": 0.4438491463661194,
411
+ "learning_rate": 2.3669362422418658e-05,
412
+ "loss": 4.7001,
413
+ "step": 5600
414
+ },
415
+ {
416
+ "epoch": 1.07,
417
+ "grad_norm": 0.812016487121582,
418
+ "learning_rate": 2.3199172465676132e-05,
419
+ "loss": 4.6941,
420
+ "step": 5700
421
+ },
422
+ {
423
+ "epoch": 1.09,
424
+ "grad_norm": 1.119163155555725,
425
+ "learning_rate": 2.272898250893361e-05,
426
+ "loss": 4.6807,
427
+ "step": 5800
428
+ },
429
+ {
430
+ "epoch": 1.11,
431
+ "grad_norm": 1.1431705951690674,
432
+ "learning_rate": 2.2258792552191084e-05,
433
+ "loss": 4.6842,
434
+ "step": 5900
435
+ },
436
+ {
437
+ "epoch": 1.13,
438
+ "grad_norm": 1.4402434825897217,
439
+ "learning_rate": 2.1788602595448562e-05,
440
+ "loss": 4.6653,
441
+ "step": 6000
442
+ },
443
+ {
444
+ "epoch": 1.15,
445
+ "grad_norm": 2.073824644088745,
446
+ "learning_rate": 2.131841263870604e-05,
447
+ "loss": 4.6543,
448
+ "step": 6100
449
+ },
450
+ {
451
+ "epoch": 1.17,
452
+ "grad_norm": 1.6333357095718384,
453
+ "learning_rate": 2.0848222681963514e-05,
454
+ "loss": 4.6394,
455
+ "step": 6200
456
+ },
457
+ {
458
+ "epoch": 1.18,
459
+ "grad_norm": 1.206061840057373,
460
+ "learning_rate": 2.037803272522099e-05,
461
+ "loss": 4.6213,
462
+ "step": 6300
463
+ },
464
+ {
465
+ "epoch": 1.2,
466
+ "grad_norm": 0.5230748653411865,
467
+ "learning_rate": 1.9907842768478466e-05,
468
+ "loss": 4.7044,
469
+ "step": 6400
470
+ },
471
+ {
472
+ "epoch": 1.22,
473
+ "grad_norm": 0.4634922444820404,
474
+ "learning_rate": 1.9437652811735943e-05,
475
+ "loss": 4.7031,
476
+ "step": 6500
477
+ },
478
+ {
479
+ "epoch": 1.24,
480
+ "grad_norm": 0.47288939356803894,
481
+ "learning_rate": 1.8967462854993417e-05,
482
+ "loss": 4.7027,
483
+ "step": 6600
484
+ },
485
+ {
486
+ "epoch": 1.26,
487
+ "grad_norm": 0.4731047749519348,
488
+ "learning_rate": 1.8497272898250895e-05,
489
+ "loss": 4.7014,
490
+ "step": 6700
491
+ },
492
+ {
493
+ "epoch": 1.28,
494
+ "grad_norm": 0.5110020637512207,
495
+ "learning_rate": 1.802708294150837e-05,
496
+ "loss": 4.6995,
497
+ "step": 6800
498
+ },
499
+ {
500
+ "epoch": 1.3,
501
+ "grad_norm": 0.5348946452140808,
502
+ "learning_rate": 1.7556892984765847e-05,
503
+ "loss": 4.7013,
504
+ "step": 6900
505
+ },
506
+ {
507
+ "epoch": 1.32,
508
+ "grad_norm": 0.5182259678840637,
509
+ "learning_rate": 1.708670302802332e-05,
510
+ "loss": 4.6991,
511
+ "step": 7000
512
+ },
513
+ {
514
+ "epoch": 1.34,
515
+ "grad_norm": 0.6097771525382996,
516
+ "learning_rate": 1.66165130712808e-05,
517
+ "loss": 4.7022,
518
+ "step": 7100
519
+ },
520
+ {
521
+ "epoch": 1.35,
522
+ "grad_norm": 0.49007490277290344,
523
+ "learning_rate": 1.6146323114538276e-05,
524
+ "loss": 4.7012,
525
+ "step": 7200
526
+ },
527
+ {
528
+ "epoch": 1.37,
529
+ "grad_norm": 0.5180237293243408,
530
+ "learning_rate": 1.567613315779575e-05,
531
+ "loss": 4.6998,
532
+ "step": 7300
533
+ },
534
+ {
535
+ "epoch": 1.39,
536
+ "grad_norm": 0.4303135275840759,
537
+ "learning_rate": 1.5205943201053227e-05,
538
+ "loss": 4.7025,
539
+ "step": 7400
540
+ },
541
+ {
542
+ "epoch": 1.41,
543
+ "grad_norm": 0.6103301644325256,
544
+ "learning_rate": 1.47357532443107e-05,
545
+ "loss": 4.7027,
546
+ "step": 7500
547
+ },
548
+ {
549
+ "epoch": 1.43,
550
+ "grad_norm": 0.5991978049278259,
551
+ "learning_rate": 1.4265563287568178e-05,
552
+ "loss": 4.7016,
553
+ "step": 7600
554
+ },
555
+ {
556
+ "epoch": 1.45,
557
+ "grad_norm": 0.590713381767273,
558
+ "learning_rate": 1.3795373330825653e-05,
559
+ "loss": 4.6992,
560
+ "step": 7700
561
+ },
562
+ {
563
+ "epoch": 1.47,
564
+ "grad_norm": 0.6496405601501465,
565
+ "learning_rate": 1.332518337408313e-05,
566
+ "loss": 4.7005,
567
+ "step": 7800
568
+ },
569
+ {
570
+ "epoch": 1.49,
571
+ "grad_norm": 0.5405462980270386,
572
+ "learning_rate": 1.2854993417340605e-05,
573
+ "loss": 4.704,
574
+ "step": 7900
575
+ },
576
+ {
577
+ "epoch": 1.5,
578
+ "grad_norm": 0.5655048489570618,
579
+ "learning_rate": 1.2384803460598082e-05,
580
+ "loss": 4.701,
581
+ "step": 8000
582
+ },
583
+ {
584
+ "epoch": 1.52,
585
+ "grad_norm": 1.1468925476074219,
586
+ "learning_rate": 1.1914613503855558e-05,
587
+ "loss": 4.6547,
588
+ "step": 8100
589
+ },
590
+ {
591
+ "epoch": 1.54,
592
+ "grad_norm": 3.2609410285949707,
593
+ "learning_rate": 1.1444423547113034e-05,
594
+ "loss": 4.6054,
595
+ "step": 8200
596
+ },
597
+ {
598
+ "epoch": 1.56,
599
+ "grad_norm": 3.0458478927612305,
600
+ "learning_rate": 1.097423359037051e-05,
601
+ "loss": 4.5575,
602
+ "step": 8300
603
+ },
604
+ {
605
+ "epoch": 1.58,
606
+ "grad_norm": 4.3110527992248535,
607
+ "learning_rate": 1.0504043633627986e-05,
608
+ "loss": 4.5948,
609
+ "step": 8400
610
+ },
611
+ {
612
+ "epoch": 1.6,
613
+ "grad_norm": 1.7174781560897827,
614
+ "learning_rate": 1.0033853676885462e-05,
615
+ "loss": 4.5585,
616
+ "step": 8500
617
+ },
618
+ {
619
+ "epoch": 1.62,
620
+ "grad_norm": 3.2902700901031494,
621
+ "learning_rate": 9.56366372014294e-06,
622
+ "loss": 4.5886,
623
+ "step": 8600
624
+ },
625
+ {
626
+ "epoch": 1.64,
627
+ "grad_norm": 1.1743800640106201,
628
+ "learning_rate": 9.093473763400415e-06,
629
+ "loss": 4.5534,
630
+ "step": 8700
631
+ },
632
+ {
633
+ "epoch": 1.66,
634
+ "grad_norm": 1.5308388471603394,
635
+ "learning_rate": 8.623283806657891e-06,
636
+ "loss": 4.547,
637
+ "step": 8800
638
+ },
639
+ {
640
+ "epoch": 1.67,
641
+ "grad_norm": 2.839127540588379,
642
+ "learning_rate": 8.153093849915366e-06,
643
+ "loss": 4.5339,
644
+ "step": 8900
645
+ },
646
+ {
647
+ "epoch": 1.69,
648
+ "grad_norm": 1.9120197296142578,
649
+ "learning_rate": 7.682903893172842e-06,
650
+ "loss": 4.5439,
651
+ "step": 9000
652
+ },
653
+ {
654
+ "epoch": 1.71,
655
+ "grad_norm": 1.549185872077942,
656
+ "learning_rate": 7.212713936430318e-06,
657
+ "loss": 4.5276,
658
+ "step": 9100
659
+ },
660
+ {
661
+ "epoch": 1.73,
662
+ "grad_norm": 2.3087658882141113,
663
+ "learning_rate": 6.742523979687794e-06,
664
+ "loss": 4.5081,
665
+ "step": 9200
666
+ },
667
+ {
668
+ "epoch": 1.75,
669
+ "grad_norm": 1.4830306768417358,
670
+ "learning_rate": 6.27233402294527e-06,
671
+ "loss": 4.494,
672
+ "step": 9300
673
+ },
674
+ {
675
+ "epoch": 1.77,
676
+ "grad_norm": 2.1838178634643555,
677
+ "learning_rate": 5.802144066202746e-06,
678
+ "loss": 4.4904,
679
+ "step": 9400
680
+ },
681
+ {
682
+ "epoch": 1.79,
683
+ "grad_norm": 3.3637304306030273,
684
+ "learning_rate": 5.331954109460222e-06,
685
+ "loss": 4.4807,
686
+ "step": 9500
687
+ },
688
+ {
689
+ "epoch": 1.81,
690
+ "grad_norm": 1.637593388557434,
691
+ "learning_rate": 4.861764152717698e-06,
692
+ "loss": 4.474,
693
+ "step": 9600
694
+ },
695
+ {
696
+ "epoch": 1.82,
697
+ "grad_norm": 1.833465576171875,
698
+ "learning_rate": 4.391574195975174e-06,
699
+ "loss": 4.4719,
700
+ "step": 9700
701
+ },
702
+ {
703
+ "epoch": 1.84,
704
+ "grad_norm": 2.6578447818756104,
705
+ "learning_rate": 3.921384239232651e-06,
706
+ "loss": 4.4593,
707
+ "step": 9800
708
+ },
709
+ {
710
+ "epoch": 1.86,
711
+ "grad_norm": 2.121244192123413,
712
+ "learning_rate": 3.451194282490126e-06,
713
+ "loss": 4.4514,
714
+ "step": 9900
715
+ },
716
+ {
717
+ "epoch": 1.88,
718
+ "grad_norm": 2.080278158187866,
719
+ "learning_rate": 2.981004325747602e-06,
720
+ "loss": 4.4407,
721
+ "step": 10000
722
+ },
723
+ {
724
+ "epoch": 1.9,
725
+ "grad_norm": 2.2840731143951416,
726
+ "learning_rate": 2.510814369005078e-06,
727
+ "loss": 4.4289,
728
+ "step": 10100
729
+ },
730
+ {
731
+ "epoch": 1.92,
732
+ "grad_norm": 2.203077554702759,
733
+ "learning_rate": 2.0406244122625544e-06,
734
+ "loss": 4.4071,
735
+ "step": 10200
736
+ },
737
+ {
738
+ "epoch": 1.94,
739
+ "grad_norm": 2.4338624477386475,
740
+ "learning_rate": 1.5704344555200301e-06,
741
+ "loss": 4.4166,
742
+ "step": 10300
743
+ },
744
+ {
745
+ "epoch": 1.96,
746
+ "grad_norm": 1.855125069618225,
747
+ "learning_rate": 1.1002444987775063e-06,
748
+ "loss": 4.4079,
749
+ "step": 10400
750
+ },
751
+ {
752
+ "epoch": 1.97,
753
+ "grad_norm": 2.9419732093811035,
754
+ "learning_rate": 6.300545420349821e-07,
755
+ "loss": 4.4227,
756
+ "step": 10500
757
+ },
758
+ {
759
+ "epoch": 1.99,
760
+ "grad_norm": 3.1695730686187744,
761
+ "learning_rate": 1.5986458529245815e-07,
762
+ "loss": 4.4244,
763
+ "step": 10600
764
+ },
765
+ {
766
+ "epoch": 2.0,
767
+ "eval_accuracy": 0.022735346358792183,
768
+ "eval_f1": 0.00212270922930165,
769
+ "eval_loss": 4.410290241241455,
770
+ "eval_precision": 0.0011595953132254675,
771
+ "eval_recall": 0.022735346358792183,
772
+ "eval_runtime": 96.0024,
773
+ "eval_samples_per_second": 87.967,
774
+ "eval_steps_per_second": 4.896,
775
+ "step": 10634
776
+ }
777
+ ],
778
+ "logging_steps": 100,
779
+ "max_steps": 10634,
780
+ "num_input_tokens_seen": 0,
781
+ "num_train_epochs": 2,
782
+ "save_steps": 500,
783
+ "total_flos": 5.041092263921664e+16,
784
+ "train_batch_size": 18,
785
+ "trial_name": null,
786
+ "trial_params": null
787
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ac24511dda067ae8ccff9fc3cfe145f435c4e027dd443fec4e9340547a2236d
3
+ size 4856
vocab.txt ADDED
The diff for this file is too large to render. See raw diff