kafikani commited on 12 days ago

Commit

e84b4f8

•

1 Parent(s): 59b9a83

Upload folder using huggingface_hub

Browse files

Files changed (19) hide show

README.md +36 -0
checkpoint-6201/config.json +55 -0
checkpoint-6201/model.safetensors +3 -0
checkpoint-6201/optimizer.pt +3 -0
checkpoint-6201/rng_state.pth +3 -0
checkpoint-6201/scheduler.pt +3 -0
checkpoint-6201/trainer_state.json +1832 -0
checkpoint-6201/training_args.bin +3 -0
config.json +55 -0
merges.txt +0 -0
model.safetensors +3 -0
runs/Nov01_08-49-04_r-kafikani-longformer-va66dsc0-0e056-fjgin/events.out.tfevents.1730450948.r-kafikani-longformer-va66dsc0-0e056-fjgin.465.0 +2 -2
runs/Nov01_08-49-04_r-kafikani-longformer-va66dsc0-0e056-fjgin/events.out.tfevents.1730870444.r-kafikani-longformer-va66dsc0-0e056-fjgin.465.1 +3 -0
special_tokens_map.json +15 -0
tokenizer.json +0 -0
tokenizer_config.json +57 -0
training_args.bin +3 -0
training_params.json +30 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,36 @@

+---
+tags:
+- autotrain
+- text-classification
+base_model: allenai/longformer-base-4096
+widget:
+- text: "I love AutoTrain"
+---
+# Model Trained Using AutoTrain
+- Problem type: Text Classification
+## Validation Metrics
+loss: 1.307055115699768
+f1_macro: 0.5244016249451032
+f1_micro: 0.7504835589941973
+f1_weighted: 0.714761195760481
+precision_macro: 0.5012229210342417
+precision_micro: 0.7504835589941973
+precision_weighted: 0.6860840439724423
+recall_macro: 0.5532259049014222
+recall_micro: 0.7504835589941973
+recall_weighted: 0.7504835589941973
+accuracy: 0.7504835589941973

checkpoint-6201/config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "_name_or_path": "allenai/longformer-base-4096",
+  "_num_labels": 3,
+  "architectures": [
+    "LongformerForSequenceClassification"
+  ],
+  "attention_mode": "longformer",
+  "attention_probs_dropout_prob": 0.1,
+  "attention_window": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "negative",
+    "1": "neutral",
+    "2": "positive"
+  },
+  "ignore_attention_mask": false,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "negative": 0,
+    "neutral": 1,
+    "positive": 2
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 4098,
+  "model_type": "longformer",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "onnx_export": false,
+  "pad_token_id": 1,
+  "problem_type": "single_label_classification",
+  "sep_token_id": 2,
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.0",
+  "type_vocab_size": 1,
+  "vocab_size": 50265
+}

checkpoint-6201/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:322dae133d3ce467a820df6105d8c7973c40d913fb2ce1117801d5c7b57c139f
+size 594681260

checkpoint-6201/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b69385ae3f0d8b25a998fc24255da1a205051f30b2e71450956ea059fb08b4e
+size 1189514810

checkpoint-6201/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66988915a0a37f08074f2bbf10041158cae43ca697efd6a7f943e09e0e6bd7e4
+size 13990

checkpoint-6201/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8311d9af8bc0939f4f57ff934d5efeb067fdfe2d024c64e6f9d34a9e095e315b
+size 1064

checkpoint-6201/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1832 @@

+{
+  "best_metric": 1.307055115699768,
+  "best_model_checkpoint": "autotrain-iinjh-0wh75/checkpoint-6201",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 6201,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.012094823415578132,
+      "grad_norm": 10.670669555664062,
+      "learning_rate": 2.012882447665056e-06,
+      "loss": 1.0613,
+      "step": 25
+    },
+    {
+      "epoch": 0.024189646831156264,
+      "grad_norm": 17.839488983154297,
+      "learning_rate": 4.025764895330112e-06,
+      "loss": 1.0539,
+      "step": 50
+    },
+    {
+      "epoch": 0.036284470246734396,
+      "grad_norm": 9.838277816772461,
+      "learning_rate": 6.038647342995169e-06,
+      "loss": 1.0026,
+      "step": 75
+    },
+    {
+      "epoch": 0.04837929366231253,
+      "grad_norm": 18.464771270751953,
+      "learning_rate": 8.051529790660225e-06,
+      "loss": 0.9234,
+      "step": 100
+    },
+    {
+      "epoch": 0.06047411707789066,
+      "grad_norm": 14.502671241760254,
+      "learning_rate": 1.0064412238325282e-05,
+      "loss": 1.2115,
+      "step": 125
+    },
+    {
+      "epoch": 0.07256894049346879,
+      "grad_norm": 13.285282135009766,
+      "learning_rate": 1.2077294685990338e-05,
+      "loss": 1.0436,
+      "step": 150
+    },
+    {
+      "epoch": 0.08466376390904692,
+      "grad_norm": 10.353692054748535,
+      "learning_rate": 1.4090177133655394e-05,
+      "loss": 0.9726,
+      "step": 175
+    },
+    {
+      "epoch": 0.09675858732462506,
+      "grad_norm": 8.703958511352539,
+      "learning_rate": 1.610305958132045e-05,
+      "loss": 0.8265,
+      "step": 200
+    },
+    {
+      "epoch": 0.10885341074020319,
+      "grad_norm": 13.100640296936035,
+      "learning_rate": 1.8115942028985507e-05,
+      "loss": 1.2616,
+      "step": 225
+    },
+    {
+      "epoch": 0.12094823415578132,
+      "grad_norm": 20.051851272583008,
+      "learning_rate": 2.0128824476650564e-05,
+      "loss": 1.0757,
+      "step": 250
+    },
+    {
+      "epoch": 0.13304305757135945,
+      "grad_norm": 10.080187797546387,
+      "learning_rate": 2.214170692431562e-05,
+      "loss": 1.1058,
+      "step": 275
+    },
+    {
+      "epoch": 0.14513788098693758,
+      "grad_norm": 1.0888252258300781,
+      "learning_rate": 2.4154589371980676e-05,
+      "loss": 1.0442,
+      "step": 300
+    },
+    {
+      "epoch": 0.15723270440251572,
+      "grad_norm": 216.78359985351562,
+      "learning_rate": 2.6167471819645733e-05,
+      "loss": 0.4581,
+      "step": 325
+    },
+    {
+      "epoch": 0.16932752781809385,
+      "grad_norm": 3.2515599727630615,
+      "learning_rate": 2.8180354267310787e-05,
+      "loss": 1.7329,
+      "step": 350
+    },
+    {
+      "epoch": 0.18142235123367198,
+      "grad_norm": 4.606472969055176,
+      "learning_rate": 3.0193236714975848e-05,
+      "loss": 1.8193,
+      "step": 375
+    },
+    {
+      "epoch": 0.1935171746492501,
+      "grad_norm": 4.541548252105713,
+      "learning_rate": 3.22061191626409e-05,
+      "loss": 1.9815,
+      "step": 400
+    },
+    {
+      "epoch": 0.20561199806482824,
+      "grad_norm": 0.2636414170265198,
+      "learning_rate": 3.421900161030596e-05,
+      "loss": 1.1492,
+      "step": 425
+    },
+    {
+      "epoch": 0.21770682148040638,
+      "grad_norm": 5.543747425079346,
+      "learning_rate": 3.6231884057971014e-05,
+      "loss": 1.4995,
+      "step": 450
+    },
+    {
+      "epoch": 0.2298016448959845,
+      "grad_norm": 29.730493545532227,
+      "learning_rate": 3.824476650563607e-05,
+      "loss": 1.736,
+      "step": 475
+    },
+    {
+      "epoch": 0.24189646831156264,
+      "grad_norm": 0.5111683011054993,
+      "learning_rate": 4.025764895330113e-05,
+      "loss": 1.7303,
+      "step": 500
+    },
+    {
+      "epoch": 0.2539912917271408,
+      "grad_norm": 1.9221380949020386,
+      "learning_rate": 4.2270531400966186e-05,
+      "loss": 1.3904,
+      "step": 525
+    },
+    {
+      "epoch": 0.2660861151427189,
+      "grad_norm": 0.305082768201828,
+      "learning_rate": 4.428341384863124e-05,
+      "loss": 1.6028,
+      "step": 550
+    },
+    {
+      "epoch": 0.27818093855829706,
+      "grad_norm": 49.53160858154297,
+      "learning_rate": 4.62962962962963e-05,
+      "loss": 1.5488,
+      "step": 575
+    },
+    {
+      "epoch": 0.29027576197387517,
+      "grad_norm": 0.5875459313392639,
+      "learning_rate": 4.830917874396135e-05,
+      "loss": 2.5069,
+      "step": 600
+    },
+    {
+      "epoch": 0.30237058538945333,
+      "grad_norm": 0.1444765031337738,
+      "learning_rate": 4.996415770609319e-05,
+      "loss": 1.1429,
+      "step": 625
+    },
+    {
+      "epoch": 0.31446540880503143,
+      "grad_norm": 6.752070903778076,
+      "learning_rate": 4.974014336917563e-05,
+      "loss": 1.1868,
+      "step": 650
+    },
+    {
+      "epoch": 0.3265602322206096,
+      "grad_norm": 2.889754056930542,
+      "learning_rate": 4.951612903225807e-05,
+      "loss": 1.2563,
+      "step": 675
+    },
+    {
+      "epoch": 0.3386550556361877,
+      "grad_norm": 0.23045390844345093,
+      "learning_rate": 4.92921146953405e-05,
+      "loss": 1.5123,
+      "step": 700
+    },
+    {
+      "epoch": 0.35074987905176586,
+      "grad_norm": 0.20135998725891113,
+      "learning_rate": 4.906810035842294e-05,
+      "loss": 1.7428,
+      "step": 725
+    },
+    {
+      "epoch": 0.36284470246734396,
+      "grad_norm": 0.06913596391677856,
+      "learning_rate": 4.884408602150538e-05,
+      "loss": 1.2342,
+      "step": 750
+    },
+    {
+      "epoch": 0.3749395258829221,
+      "grad_norm": 2.32104754447937,
+      "learning_rate": 4.8620071684587816e-05,
+      "loss": 1.4426,
+      "step": 775
+    },
+    {
+      "epoch": 0.3870343492985002,
+      "grad_norm": 1.5299639701843262,
+      "learning_rate": 4.8396057347670255e-05,
+      "loss": 1.3624,
+      "step": 800
+    },
+    {
+      "epoch": 0.3991291727140784,
+      "grad_norm": 0.35129979252815247,
+      "learning_rate": 4.8172043010752693e-05,
+      "loss": 1.9102,
+      "step": 825
+    },
+    {
+      "epoch": 0.4112239961296565,
+      "grad_norm": 11.400070190429688,
+      "learning_rate": 4.7948028673835125e-05,
+      "loss": 1.5198,
+      "step": 850
+    },
+    {
+      "epoch": 0.42331881954523465,
+      "grad_norm": 13.278685569763184,
+      "learning_rate": 4.7724014336917564e-05,
+      "loss": 1.146,
+      "step": 875
+    },
+    {
+      "epoch": 0.43541364296081275,
+      "grad_norm": 14.47873592376709,
+      "learning_rate": 4.75e-05,
+      "loss": 0.8003,
+      "step": 900
+    },
+    {
+      "epoch": 0.4475084663763909,
+      "grad_norm": 13.323905944824219,
+      "learning_rate": 4.727598566308244e-05,
+      "loss": 1.0161,
+      "step": 925
+    },
+    {
+      "epoch": 0.459603289791969,
+      "grad_norm": 29.149803161621094,
+      "learning_rate": 4.705197132616488e-05,
+      "loss": 2.1808,
+      "step": 950
+    },
+    {
+      "epoch": 0.4716981132075472,
+      "grad_norm": 14.055343627929688,
+      "learning_rate": 4.682795698924731e-05,
+      "loss": 1.9727,
+      "step": 975
+    },
+    {
+      "epoch": 0.4837929366231253,
+      "grad_norm": 13.733134269714355,
+      "learning_rate": 4.660394265232975e-05,
+      "loss": 1.3969,
+      "step": 1000
+    },
+    {
+      "epoch": 0.49588776003870344,
+      "grad_norm": 12.515105247497559,
+      "learning_rate": 4.637992831541219e-05,
+      "loss": 1.1516,
+      "step": 1025
+    },
+    {
+      "epoch": 0.5079825834542816,
+      "grad_norm": 20.534067153930664,
+      "learning_rate": 4.615591397849463e-05,
+      "loss": 2.2554,
+      "step": 1050
+    },
+    {
+      "epoch": 0.5200774068698597,
+      "grad_norm": 16.800443649291992,
+      "learning_rate": 4.5931899641577066e-05,
+      "loss": 1.6032,
+      "step": 1075
+    },
+    {
+      "epoch": 0.5321722302854378,
+      "grad_norm": 2.256089448928833,
+      "learning_rate": 4.57078853046595e-05,
+      "loss": 1.7488,
+      "step": 1100
+    },
+    {
+      "epoch": 0.5442670537010159,
+      "grad_norm": 13.562662124633789,
+      "learning_rate": 4.548387096774194e-05,
+      "loss": 1.2443,
+      "step": 1125
+    },
+    {
+      "epoch": 0.5563618771165941,
+      "grad_norm": 12.777020454406738,
+      "learning_rate": 4.5259856630824375e-05,
+      "loss": 1.3996,
+      "step": 1150
+    },
+    {
+      "epoch": 0.5684567005321722,
+      "grad_norm": 11.443547248840332,
+      "learning_rate": 4.503584229390681e-05,
+      "loss": 1.2757,
+      "step": 1175
+    },
+    {
+      "epoch": 0.5805515239477503,
+      "grad_norm": 3.251011848449707,
+      "learning_rate": 4.481182795698925e-05,
+      "loss": 1.0835,
+      "step": 1200
+    },
+    {
+      "epoch": 0.5926463473633284,
+      "grad_norm": 12.59429931640625,
+      "learning_rate": 4.458781362007169e-05,
+      "loss": 1.4228,
+      "step": 1225
+    },
+    {
+      "epoch": 0.6047411707789067,
+      "grad_norm": 1.7253248691558838,
+      "learning_rate": 4.436379928315412e-05,
+      "loss": 1.7177,
+      "step": 1250
+    },
+    {
+      "epoch": 0.6168359941944848,
+      "grad_norm": 15.0354642868042,
+      "learning_rate": 4.413978494623656e-05,
+      "loss": 1.7989,
+      "step": 1275
+    },
+    {
+      "epoch": 0.6289308176100629,
+      "grad_norm": 13.84261417388916,
+      "learning_rate": 4.3915770609318994e-05,
+      "loss": 1.4853,
+      "step": 1300
+    },
+    {
+      "epoch": 0.6410256410256411,
+      "grad_norm": 13.903498649597168,
+      "learning_rate": 4.369175627240143e-05,
+      "loss": 1.1268,
+      "step": 1325
+    },
+    {
+      "epoch": 0.6531204644412192,
+      "grad_norm": 1.7544844150543213,
+      "learning_rate": 4.346774193548388e-05,
+      "loss": 1.3478,
+      "step": 1350
+    },
+    {
+      "epoch": 0.6652152878567973,
+      "grad_norm": 16.601131439208984,
+      "learning_rate": 4.324372759856631e-05,
+      "loss": 1.5037,
+      "step": 1375
+    },
+    {
+      "epoch": 0.6773101112723754,
+      "grad_norm": 0.15384919941425323,
+      "learning_rate": 4.301971326164875e-05,
+      "loss": 1.8697,
+      "step": 1400
+    },
+    {
+      "epoch": 0.6894049346879536,
+      "grad_norm": 16.029470443725586,
+      "learning_rate": 4.279569892473119e-05,
+      "loss": 1.7013,
+      "step": 1425
+    },
+    {
+      "epoch": 0.7014997581035317,
+      "grad_norm": 1.9329349994659424,
+      "learning_rate": 4.257168458781362e-05,
+      "loss": 1.852,
+      "step": 1450
+    },
+    {
+      "epoch": 0.7135945815191098,
+      "grad_norm": 34.523712158203125,
+      "learning_rate": 4.234767025089606e-05,
+      "loss": 1.1739,
+      "step": 1475
+    },
+    {
+      "epoch": 0.7256894049346879,
+      "grad_norm": 18.080400466918945,
+      "learning_rate": 4.2123655913978496e-05,
+      "loss": 1.1161,
+      "step": 1500
+    },
+    {
+      "epoch": 0.7377842283502661,
+      "grad_norm": 13.704357147216797,
+      "learning_rate": 4.1899641577060935e-05,
+      "loss": 1.1861,
+      "step": 1525
+    },
+    {
+      "epoch": 0.7498790517658442,
+      "grad_norm": 10.594738006591797,
+      "learning_rate": 4.167562724014337e-05,
+      "loss": 1.2265,
+      "step": 1550
+    },
+    {
+      "epoch": 0.7619738751814223,
+      "grad_norm": 1.4613639116287231,
+      "learning_rate": 4.1451612903225805e-05,
+      "loss": 1.173,
+      "step": 1575
+    },
+    {
+      "epoch": 0.7740686985970004,
+      "grad_norm": 0.7370879054069519,
+      "learning_rate": 4.1227598566308244e-05,
+      "loss": 1.9062,
+      "step": 1600
+    },
+    {
+      "epoch": 0.7861635220125787,
+      "grad_norm": 1.4284896850585938,
+      "learning_rate": 4.100358422939068e-05,
+      "loss": 1.9307,
+      "step": 1625
+    },
+    {
+      "epoch": 0.7982583454281568,
+      "grad_norm": 3.1330041885375977,
+      "learning_rate": 4.077956989247312e-05,
+      "loss": 1.4191,
+      "step": 1650
+    },
+    {
+      "epoch": 0.8103531688437349,
+      "grad_norm": 25.01806640625,
+      "learning_rate": 4.055555555555556e-05,
+      "loss": 1.8695,
+      "step": 1675
+    },
+    {
+      "epoch": 0.822447992259313,
+      "grad_norm": 24.93403434753418,
+      "learning_rate": 4.0331541218638e-05,
+      "loss": 1.0304,
+      "step": 1700
+    },
+    {
+      "epoch": 0.8345428156748912,
+      "grad_norm": 0.39635589718818665,
+      "learning_rate": 4.010752688172043e-05,
+      "loss": 1.5612,
+      "step": 1725
+    },
+    {
+      "epoch": 0.8466376390904693,
+      "grad_norm": 0.25706031918525696,
+      "learning_rate": 3.988351254480287e-05,
+      "loss": 1.6167,
+      "step": 1750
+    },
+    {
+      "epoch": 0.8587324625060474,
+      "grad_norm": 204.7973175048828,
+      "learning_rate": 3.965949820788531e-05,
+      "loss": 1.033,
+      "step": 1775
+    },
+    {
+      "epoch": 0.8708272859216255,
+      "grad_norm": 0.1547696739435196,
+      "learning_rate": 3.9435483870967746e-05,
+      "loss": 0.9405,
+      "step": 1800
+    },
+    {
+      "epoch": 0.8829221093372037,
+      "grad_norm": 25.602577209472656,
+      "learning_rate": 3.9211469534050185e-05,
+      "loss": 1.2914,
+      "step": 1825
+    },
+    {
+      "epoch": 0.8950169327527818,
+      "grad_norm": 25.09197425842285,
+      "learning_rate": 3.8987455197132616e-05,
+      "loss": 0.8189,
+      "step": 1850
+    },
+    {
+      "epoch": 0.9071117561683599,
+      "grad_norm": 0.08802329748868942,
+      "learning_rate": 3.8763440860215055e-05,
+      "loss": 1.7206,
+      "step": 1875
+    },
+    {
+      "epoch": 0.919206579583938,
+      "grad_norm": 23.29674530029297,
+      "learning_rate": 3.8539426523297494e-05,
+      "loss": 2.5356,
+      "step": 1900
+    },
+    {
+      "epoch": 0.9313014029995162,
+      "grad_norm": 24.580432891845703,
+      "learning_rate": 3.8315412186379926e-05,
+      "loss": 1.6752,
+      "step": 1925
+    },
+    {
+      "epoch": 0.9433962264150944,
+      "grad_norm": 2.269275665283203,
+      "learning_rate": 3.809139784946237e-05,
+      "loss": 1.9433,
+      "step": 1950
+    },
+    {
+      "epoch": 0.9554910498306725,
+      "grad_norm": 3.1146695613861084,
+      "learning_rate": 3.786738351254481e-05,
+      "loss": 1.562,
+      "step": 1975
+    },
+    {
+      "epoch": 0.9675858732462506,
+      "grad_norm": 0.2862231135368347,
+      "learning_rate": 3.764336917562724e-05,
+      "loss": 0.6273,
+      "step": 2000
+    },
+    {
+      "epoch": 0.9796806966618288,
+      "grad_norm": 0.39030689001083374,
+      "learning_rate": 3.741935483870968e-05,
+      "loss": 1.7117,
+      "step": 2025
+    },
+    {
+      "epoch": 0.9917755200774069,
+      "grad_norm": 25.142972946166992,
+      "learning_rate": 3.719534050179211e-05,
+      "loss": 1.8362,
+      "step": 2050
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.6054158607350096,
+      "eval_f1_macro": 0.41404572439055193,
+      "eval_f1_micro": 0.6054158607350096,
+      "eval_f1_weighted": 0.5589758737227555,
+      "eval_loss": 1.5838946104049683,
+      "eval_precision_macro": 0.46238977618202576,
+      "eval_precision_micro": 0.6054158607350096,
+      "eval_precision_weighted": 0.6418784559670553,
+      "eval_recall_macro": 0.46003784410669596,
+      "eval_recall_micro": 0.6054158607350096,
+      "eval_recall_weighted": 0.6054158607350096,
+      "eval_runtime": 7116.055,
+      "eval_samples_per_second": 0.073,
+      "eval_steps_per_second": 0.036,
+      "step": 2067
+    },
+    {
+      "epoch": 1.003870343492985,
+      "grad_norm": 23.918968200683594,
+      "learning_rate": 3.697132616487455e-05,
+      "loss": 1.7229,
+      "step": 2075
+    },
+    {
+      "epoch": 1.0159651669085632,
+      "grad_norm": 24.551406860351562,
+      "learning_rate": 3.6747311827956996e-05,
+      "loss": 0.7897,
+      "step": 2100
+    },
+    {
+      "epoch": 1.0280599903241412,
+      "grad_norm": 0.6501819491386414,
+      "learning_rate": 3.652329749103943e-05,
+      "loss": 1.4349,
+      "step": 2125
+    },
+    {
+      "epoch": 1.0401548137397194,
+      "grad_norm": 0.4270811676979065,
+      "learning_rate": 3.6299283154121866e-05,
+      "loss": 1.4771,
+      "step": 2150
+    },
+    {
+      "epoch": 1.0522496371552976,
+      "grad_norm": 25.46353530883789,
+      "learning_rate": 3.6075268817204305e-05,
+      "loss": 1.6769,
+      "step": 2175
+    },
+    {
+      "epoch": 1.0643444605708756,
+      "grad_norm": 24.238040924072266,
+      "learning_rate": 3.585125448028674e-05,
+      "loss": 2.2981,
+      "step": 2200
+    },
+    {
+      "epoch": 1.0764392839864538,
+      "grad_norm": 24.645816802978516,
+      "learning_rate": 3.5627240143369176e-05,
+      "loss": 1.9655,
+      "step": 2225
+    },
+    {
+      "epoch": 1.0885341074020318,
+      "grad_norm": 0.16327662765979767,
+      "learning_rate": 3.5403225806451614e-05,
+      "loss": 1.2296,
+      "step": 2250
+    },
+    {
+      "epoch": 1.10062893081761,
+      "grad_norm": 3.4786908626556396,
+      "learning_rate": 3.517921146953405e-05,
+      "loss": 1.3128,
+      "step": 2275
+    },
+    {
+      "epoch": 1.1127237542331883,
+      "grad_norm": 0.18501241505146027,
+      "learning_rate": 3.495519713261649e-05,
+      "loss": 1.1883,
+      "step": 2300
+    },
+    {
+      "epoch": 1.1248185776487662,
+      "grad_norm": 0.35296130180358887,
+      "learning_rate": 3.473118279569892e-05,
+      "loss": 1.5325,
+      "step": 2325
+    },
+    {
+      "epoch": 1.1369134010643445,
+      "grad_norm": 0.24219243228435516,
+      "learning_rate": 3.450716845878136e-05,
+      "loss": 0.8073,
+      "step": 2350
+    },
+    {
+      "epoch": 1.1490082244799227,
+      "grad_norm": 0.16509315371513367,
+      "learning_rate": 3.42831541218638e-05,
+      "loss": 0.4453,
+      "step": 2375
+    },
+    {
+      "epoch": 1.1611030478955007,
+      "grad_norm": 25.64842987060547,
+      "learning_rate": 3.405913978494624e-05,
+      "loss": 1.8413,
+      "step": 2400
+    },
+    {
+      "epoch": 1.1731978713110789,
+      "grad_norm": 0.2987683117389679,
+      "learning_rate": 3.383512544802868e-05,
+      "loss": 1.3856,
+      "step": 2425
+    },
+    {
+      "epoch": 1.185292694726657,
+      "grad_norm": 0.1390484720468521,
+      "learning_rate": 3.3611111111111116e-05,
+      "loss": 1.0936,
+      "step": 2450
+    },
+    {
+      "epoch": 1.197387518142235,
+      "grad_norm": 152.2466278076172,
+      "learning_rate": 3.338709677419355e-05,
+      "loss": 1.29,
+      "step": 2475
+    },
+    {
+      "epoch": 1.2094823415578133,
+      "grad_norm": 0.5964694619178772,
+      "learning_rate": 3.316308243727599e-05,
+      "loss": 2.0098,
+      "step": 2500
+    },
+    {
+      "epoch": 1.2215771649733913,
+      "grad_norm": 0.7620899677276611,
+      "learning_rate": 3.2939068100358426e-05,
+      "loss": 0.9353,
+      "step": 2525
+    },
+    {
+      "epoch": 1.2336719883889695,
+      "grad_norm": 0.4900791347026825,
+      "learning_rate": 3.2715053763440864e-05,
+      "loss": 1.1481,
+      "step": 2550
+    },
+    {
+      "epoch": 1.2457668118045477,
+      "grad_norm": 0.8056408762931824,
+      "learning_rate": 3.24910394265233e-05,
+      "loss": 1.6924,
+      "step": 2575
+    },
+    {
+      "epoch": 1.2578616352201257,
+      "grad_norm": 0.34215426445007324,
+      "learning_rate": 3.2267025089605735e-05,
+      "loss": 1.5408,
+      "step": 2600
+    },
+    {
+      "epoch": 1.269956458635704,
+      "grad_norm": 0.32340100407600403,
+      "learning_rate": 3.204301075268817e-05,
+      "loss": 1.9354,
+      "step": 2625
+    },
+    {
+      "epoch": 1.282051282051282,
+      "grad_norm": 24.62626075744629,
+      "learning_rate": 3.181899641577061e-05,
+      "loss": 1.9551,
+      "step": 2650
+    },
+    {
+      "epoch": 1.2941461054668602,
+      "grad_norm": 1.058280348777771,
+      "learning_rate": 3.1594982078853044e-05,
+      "loss": 1.3507,
+      "step": 2675
+    },
+    {
+      "epoch": 1.3062409288824384,
+      "grad_norm": 1.5900629758834839,
+      "learning_rate": 3.137096774193549e-05,
+      "loss": 2.1607,
+      "step": 2700
+    },
+    {
+      "epoch": 1.3183357522980166,
+      "grad_norm": 24.93858528137207,
+      "learning_rate": 3.114695340501792e-05,
+      "loss": 1.328,
+      "step": 2725
+    },
+    {
+      "epoch": 1.3304305757135946,
+      "grad_norm": 24.877849578857422,
+      "learning_rate": 3.092293906810036e-05,
+      "loss": 1.6651,
+      "step": 2750
+    },
+    {
+      "epoch": 1.3425253991291728,
+      "grad_norm": 0.48590317368507385,
+      "learning_rate": 3.06989247311828e-05,
+      "loss": 0.6887,
+      "step": 2775
+    },
+    {
+      "epoch": 1.3546202225447508,
+      "grad_norm": 0.09108546376228333,
+      "learning_rate": 3.0474910394265234e-05,
+      "loss": 0.9513,
+      "step": 2800
+    },
+    {
+      "epoch": 1.366715045960329,
+      "grad_norm": 0.23763756453990936,
+      "learning_rate": 3.0250896057347672e-05,
+      "loss": 1.5187,
+      "step": 2825
+    },
+    {
+      "epoch": 1.3788098693759072,
+      "grad_norm": 0.2408967763185501,
+      "learning_rate": 3.002688172043011e-05,
+      "loss": 1.3683,
+      "step": 2850
+    },
+    {
+      "epoch": 1.3909046927914852,
+      "grad_norm": 2.7443923950195312,
+      "learning_rate": 2.9802867383512546e-05,
+      "loss": 1.7777,
+      "step": 2875
+    },
+    {
+      "epoch": 1.4029995162070634,
+      "grad_norm": 26.244659423828125,
+      "learning_rate": 2.9578853046594985e-05,
+      "loss": 2.1378,
+      "step": 2900
+    },
+    {
+      "epoch": 1.4150943396226414,
+      "grad_norm": 158.08462524414062,
+      "learning_rate": 2.9354838709677417e-05,
+      "loss": 1.28,
+      "step": 2925
+    },
+    {
+      "epoch": 1.4271891630382196,
+      "grad_norm": 27.04939842224121,
+      "learning_rate": 2.913082437275986e-05,
+      "loss": 0.8604,
+      "step": 2950
+    },
+    {
+      "epoch": 1.4392839864537978,
+      "grad_norm": 0.1699460744857788,
+      "learning_rate": 2.8906810035842297e-05,
+      "loss": 2.7105,
+      "step": 2975
+    },
+    {
+      "epoch": 1.4513788098693758,
+      "grad_norm": 23.923328399658203,
+      "learning_rate": 2.868279569892473e-05,
+      "loss": 1.3738,
+      "step": 3000
+    },
+    {
+      "epoch": 1.463473633284954,
+      "grad_norm": 23.95648765563965,
+      "learning_rate": 2.845878136200717e-05,
+      "loss": 1.4941,
+      "step": 3025
+    },
+    {
+      "epoch": 1.475568456700532,
+      "grad_norm": 0.05188923701643944,
+      "learning_rate": 2.823476702508961e-05,
+      "loss": 0.9538,
+      "step": 3050
+    },
+    {
+      "epoch": 1.4876632801161103,
+      "grad_norm": 2.9171254634857178,
+      "learning_rate": 2.801075268817204e-05,
+      "loss": 1.7475,
+      "step": 3075
+    },
+    {
+      "epoch": 1.4997581035316885,
+      "grad_norm": 6.571992874145508,
+      "learning_rate": 2.7786738351254484e-05,
+      "loss": 1.1107,
+      "step": 3100
+    },
+    {
+      "epoch": 1.5118529269472667,
+      "grad_norm": 26.21474838256836,
+      "learning_rate": 2.7562724014336922e-05,
+      "loss": 1.1843,
+      "step": 3125
+    },
+    {
+      "epoch": 1.5239477503628447,
+      "grad_norm": 0.16156063973903656,
+      "learning_rate": 2.7338709677419354e-05,
+      "loss": 1.5585,
+      "step": 3150
+    },
+    {
+      "epoch": 1.5360425737784227,
+      "grad_norm": 0.13359041512012482,
+      "learning_rate": 2.7114695340501796e-05,
+      "loss": 1.1763,
+      "step": 3175
+    },
+    {
+      "epoch": 1.548137397194001,
+      "grad_norm": 0.2726369798183441,
+      "learning_rate": 2.6890681003584228e-05,
+      "loss": 1.6273,
+      "step": 3200
+    },
+    {
+      "epoch": 1.5602322206095791,
+      "grad_norm": 0.41835281252861023,
+      "learning_rate": 2.6666666666666667e-05,
+      "loss": 1.3093,
+      "step": 3225
+    },
+    {
+      "epoch": 1.5723270440251573,
+      "grad_norm": 0.2902648448944092,
+      "learning_rate": 2.6442652329749105e-05,
+      "loss": 0.8197,
+      "step": 3250
+    },
+    {
+      "epoch": 1.5844218674407353,
+      "grad_norm": 0.3506704866886139,
+      "learning_rate": 2.621863799283154e-05,
+      "loss": 0.8084,
+      "step": 3275
+    },
+    {
+      "epoch": 1.5965166908563135,
+      "grad_norm": 0.07854276150465012,
+      "learning_rate": 2.599462365591398e-05,
+      "loss": 0.6503,
+      "step": 3300
+    },
+    {
+      "epoch": 1.6086115142718915,
+      "grad_norm": 0.24219046533107758,
+      "learning_rate": 2.5770609318996418e-05,
+      "loss": 0.9614,
+      "step": 3325
+    },
+    {
+      "epoch": 1.6207063376874697,
+      "grad_norm": 26.107500076293945,
+      "learning_rate": 2.5546594982078853e-05,
+      "loss": 1.6199,
+      "step": 3350
+    },
+    {
+      "epoch": 1.632801161103048,
+      "grad_norm": 26.309776306152344,
+      "learning_rate": 2.532258064516129e-05,
+      "loss": 1.1816,
+      "step": 3375
+    },
+    {
+      "epoch": 1.6448959845186262,
+      "grad_norm": 0.2523309886455536,
+      "learning_rate": 2.5098566308243727e-05,
+      "loss": 1.1721,
+      "step": 3400
+    },
+    {
+      "epoch": 1.6569908079342042,
+      "grad_norm": 26.82816505432129,
+      "learning_rate": 2.4874551971326165e-05,
+      "loss": 0.696,
+      "step": 3425
+    },
+    {
+      "epoch": 1.6690856313497822,
+      "grad_norm": 0.15189094841480255,
+      "learning_rate": 2.46505376344086e-05,
+      "loss": 1.1234,
+      "step": 3450
+    },
+    {
+      "epoch": 1.6811804547653604,
+      "grad_norm": 0.19785544276237488,
+      "learning_rate": 2.4426523297491043e-05,
+      "loss": 1.6574,
+      "step": 3475
+    },
+    {
+      "epoch": 1.6932752781809386,
+      "grad_norm": 0.66933673620224,
+      "learning_rate": 2.4202508960573478e-05,
+      "loss": 2.0303,
+      "step": 3500
+    },
+    {
+      "epoch": 1.7053701015965168,
+      "grad_norm": 23.572277069091797,
+      "learning_rate": 2.3978494623655913e-05,
+      "loss": 2.2247,
+      "step": 3525
+    },
+    {
+      "epoch": 1.7174649250120948,
+      "grad_norm": 0.5169154405593872,
+      "learning_rate": 2.3754480286738355e-05,
+      "loss": 0.7378,
+      "step": 3550
+    },
+    {
+      "epoch": 1.7295597484276728,
+      "grad_norm": 0.28006285429000854,
+      "learning_rate": 2.353046594982079e-05,
+      "loss": 1.823,
+      "step": 3575
+    },
+    {
+      "epoch": 1.741654571843251,
+      "grad_norm": 0.47209933400154114,
+      "learning_rate": 2.3306451612903226e-05,
+      "loss": 2.0323,
+      "step": 3600
+    },
+    {
+      "epoch": 1.7537493952588292,
+      "grad_norm": 0.23533566296100616,
+      "learning_rate": 2.3082437275985664e-05,
+      "loss": 0.5909,
+      "step": 3625
+    },
+    {
+      "epoch": 1.7658442186744074,
+      "grad_norm": 0.17464753985404968,
+      "learning_rate": 2.2858422939068103e-05,
+      "loss": 1.0246,
+      "step": 3650
+    },
+    {
+      "epoch": 1.7779390420899854,
+      "grad_norm": 0.18633712828159332,
+      "learning_rate": 2.2634408602150538e-05,
+      "loss": 0.7297,
+      "step": 3675
+    },
+    {
+      "epoch": 1.7900338655055636,
+      "grad_norm": 0.19488303363323212,
+      "learning_rate": 2.2410394265232977e-05,
+      "loss": 1.0616,
+      "step": 3700
+    },
+    {
+      "epoch": 1.8021286889211416,
+      "grad_norm": 0.21784909069538116,
+      "learning_rate": 2.2186379928315412e-05,
+      "loss": 1.5585,
+      "step": 3725
+    },
+    {
+      "epoch": 1.8142235123367199,
+      "grad_norm": 0.2788207530975342,
+      "learning_rate": 2.196236559139785e-05,
+      "loss": 1.201,
+      "step": 3750
+    },
+    {
+      "epoch": 1.826318335752298,
+      "grad_norm": 24.099077224731445,
+      "learning_rate": 2.173835125448029e-05,
+      "loss": 2.076,
+      "step": 3775
+    },
+    {
+      "epoch": 1.8384131591678763,
+      "grad_norm": 0.18294040858745575,
+      "learning_rate": 2.1514336917562725e-05,
+      "loss": 1.384,
+      "step": 3800
+    },
+    {
+      "epoch": 1.8505079825834543,
+      "grad_norm": 24.9930477142334,
+      "learning_rate": 2.129032258064516e-05,
+      "loss": 1.0263,
+      "step": 3825
+    },
+    {
+      "epoch": 1.8626028059990323,
+      "grad_norm": 0.19692493975162506,
+      "learning_rate": 2.1066308243727602e-05,
+      "loss": 1.4213,
+      "step": 3850
+    },
+    {
+      "epoch": 1.8746976294146105,
+      "grad_norm": 0.32238996028900146,
+      "learning_rate": 2.0842293906810037e-05,
+      "loss": 0.9261,
+      "step": 3875
+    },
+    {
+      "epoch": 1.8867924528301887,
+      "grad_norm": 0.16958071291446686,
+      "learning_rate": 2.0618279569892472e-05,
+      "loss": 0.4504,
+      "step": 3900
+    },
+    {
+      "epoch": 1.898887276245767,
+      "grad_norm": 0.187217578291893,
+      "learning_rate": 2.039426523297491e-05,
+      "loss": 1.5949,
+      "step": 3925
+    },
+    {
+      "epoch": 1.910982099661345,
+      "grad_norm": 0.644129753112793,
+      "learning_rate": 2.017025089605735e-05,
+      "loss": 2.5123,
+      "step": 3950
+    },
+    {
+      "epoch": 1.9230769230769231,
+      "grad_norm": 0.2620396018028259,
+      "learning_rate": 1.9946236559139785e-05,
+      "loss": 1.0002,
+      "step": 3975
+    },
+    {
+      "epoch": 1.9351717464925011,
+      "grad_norm": 0.2608689069747925,
+      "learning_rate": 1.9722222222222224e-05,
+      "loss": 1.3737,
+      "step": 4000
+    },
+    {
+      "epoch": 1.9472665699080793,
+      "grad_norm": 24.944629669189453,
+      "learning_rate": 1.9498207885304662e-05,
+      "loss": 1.1471,
+      "step": 4025
+    },
+    {
+      "epoch": 1.9593613933236576,
+      "grad_norm": 23.796247482299805,
+      "learning_rate": 1.9274193548387097e-05,
+      "loss": 2.678,
+      "step": 4050
+    },
+    {
+      "epoch": 1.9714562167392358,
+      "grad_norm": 0.3903225362300873,
+      "learning_rate": 1.9050179211469536e-05,
+      "loss": 1.7349,
+      "step": 4075
+    },
+    {
+      "epoch": 1.9835510401548138,
+      "grad_norm": 23.73769760131836,
+      "learning_rate": 1.882616487455197e-05,
+      "loss": 1.1653,
+      "step": 4100
+    },
+    {
+      "epoch": 1.9956458635703918,
+      "grad_norm": 22.70951271057129,
+      "learning_rate": 1.860215053763441e-05,
+      "loss": 1.9917,
+      "step": 4125
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.6731141199226306,
+      "eval_f1_macro": 0.4615303615303616,
+      "eval_f1_micro": 0.6731141199226306,
+      "eval_f1_weighted": 0.6321285489564019,
+      "eval_loss": 1.3909244537353516,
+      "eval_precision_macro": 0.4655498128675815,
+      "eval_precision_micro": 0.6731141199226306,
+      "eval_precision_weighted": 0.6287401060434273,
+      "eval_recall_macro": 0.4847830067753159,
+      "eval_recall_micro": 0.6731141199226306,
+      "eval_recall_weighted": 0.6731141199226306,
+      "eval_runtime": 6650.8901,
+      "eval_samples_per_second": 0.078,
+      "eval_steps_per_second": 0.039,
+      "step": 4134
+    },
+    {
+      "epoch": 2.00774068698597,
+      "grad_norm": 0.9518312811851501,
+      "learning_rate": 1.837813620071685e-05,
+      "loss": 0.8235,
+      "step": 4150
+    },
+    {
+      "epoch": 2.019835510401548,
+      "grad_norm": 0.2822599411010742,
+      "learning_rate": 1.8154121863799284e-05,
+      "loss": 0.7381,
+      "step": 4175
+    },
+    {
+      "epoch": 2.0319303338171264,
+      "grad_norm": 0.2420988231897354,
+      "learning_rate": 1.793010752688172e-05,
+      "loss": 1.0998,
+      "step": 4200
+    },
+    {
+      "epoch": 2.0440251572327046,
+      "grad_norm": 0.23866595327854156,
+      "learning_rate": 1.770609318996416e-05,
+      "loss": 1.5128,
+      "step": 4225
+    },
+    {
+      "epoch": 2.0561199806482824,
+      "grad_norm": 0.20324940979480743,
+      "learning_rate": 1.7482078853046596e-05,
+      "loss": 0.6954,
+      "step": 4250
+    },
+    {
+      "epoch": 2.0682148040638606,
+      "grad_norm": 0.23257912695407867,
+      "learning_rate": 1.725806451612903e-05,
+      "loss": 2.1005,
+      "step": 4275
+    },
+    {
+      "epoch": 2.080309627479439,
+      "grad_norm": 0.28216153383255005,
+      "learning_rate": 1.703405017921147e-05,
+      "loss": 0.6184,
+      "step": 4300
+    },
+    {
+      "epoch": 2.092404450895017,
+      "grad_norm": 0.2500057518482208,
+      "learning_rate": 1.681003584229391e-05,
+      "loss": 1.82,
+      "step": 4325
+    },
+    {
+      "epoch": 2.1044992743105952,
+      "grad_norm": 0.442717045545578,
+      "learning_rate": 1.6586021505376344e-05,
+      "loss": 2.2307,
+      "step": 4350
+    },
+    {
+      "epoch": 2.116594097726173,
+      "grad_norm": 44.982303619384766,
+      "learning_rate": 1.6362007168458783e-05,
+      "loss": 1.16,
+      "step": 4375
+    },
+    {
+      "epoch": 2.1286889211417512,
+      "grad_norm": 34.185569763183594,
+      "learning_rate": 1.6137992831541218e-05,
+      "loss": 1.8138,
+      "step": 4400
+    },
+    {
+      "epoch": 2.1407837445573294,
+      "grad_norm": 0.5167245864868164,
+      "learning_rate": 1.5913978494623657e-05,
+      "loss": 1.5199,
+      "step": 4425
+    },
+    {
+      "epoch": 2.1528785679729077,
+      "grad_norm": 25.86376190185547,
+      "learning_rate": 1.5689964157706095e-05,
+      "loss": 1.1301,
+      "step": 4450
+    },
+    {
+      "epoch": 2.164973391388486,
+      "grad_norm": 0.7654205560684204,
+      "learning_rate": 1.546594982078853e-05,
+      "loss": 0.9802,
+      "step": 4475
+    },
+    {
+      "epoch": 2.1770682148040637,
+      "grad_norm": 0.22726771235466003,
+      "learning_rate": 1.5241935483870967e-05,
+      "loss": 1.3874,
+      "step": 4500
+    },
+    {
+      "epoch": 2.189163038219642,
+      "grad_norm": 0.3605280816555023,
+      "learning_rate": 1.5017921146953406e-05,
+      "loss": 1.9075,
+      "step": 4525
+    },
+    {
+      "epoch": 2.20125786163522,
+      "grad_norm": 0.28213346004486084,
+      "learning_rate": 1.4793906810035843e-05,
+      "loss": 1.0298,
+      "step": 4550
+    },
+    {
+      "epoch": 2.2133526850507983,
+      "grad_norm": 0.18000219762325287,
+      "learning_rate": 1.456989247311828e-05,
+      "loss": 1.5587,
+      "step": 4575
+    },
+    {
+      "epoch": 2.2254475084663765,
+      "grad_norm": 25.92272186279297,
+      "learning_rate": 1.4345878136200718e-05,
+      "loss": 1.1183,
+      "step": 4600
+    },
+    {
+      "epoch": 2.2375423318819547,
+      "grad_norm": 0.14410781860351562,
+      "learning_rate": 1.4121863799283155e-05,
+      "loss": 1.6175,
+      "step": 4625
+    },
+    {
+      "epoch": 2.2496371552975325,
+      "grad_norm": 0.16350312530994415,
+      "learning_rate": 1.3897849462365592e-05,
+      "loss": 1.2121,
+      "step": 4650
+    },
+    {
+      "epoch": 2.2617319787131107,
+      "grad_norm": 48.49122619628906,
+      "learning_rate": 1.367383512544803e-05,
+      "loss": 1.9733,
+      "step": 4675
+    },
+    {
+      "epoch": 2.273826802128689,
+      "grad_norm": 0.27995195984840393,
+      "learning_rate": 1.3449820788530468e-05,
+      "loss": 0.8415,
+      "step": 4700
+    },
+    {
+      "epoch": 2.285921625544267,
+      "grad_norm": 0.35172316431999207,
+      "learning_rate": 1.3225806451612905e-05,
+      "loss": 1.1809,
+      "step": 4725
+    },
+    {
+      "epoch": 2.2980164489598454,
+      "grad_norm": 0.33857014775276184,
+      "learning_rate": 1.300179211469534e-05,
+      "loss": 0.992,
+      "step": 4750
+    },
+    {
+      "epoch": 2.310111272375423,
+      "grad_norm": 0.9367401003837585,
+      "learning_rate": 1.2777777777777777e-05,
+      "loss": 1.4335,
+      "step": 4775
+    },
+    {
+      "epoch": 2.3222060957910013,
+      "grad_norm": 0.2820034921169281,
+      "learning_rate": 1.2553763440860217e-05,
+      "loss": 0.6884,
+      "step": 4800
+    },
+    {
+      "epoch": 2.3343009192065796,
+      "grad_norm": 0.37094831466674805,
+      "learning_rate": 1.2329749103942653e-05,
+      "loss": 1.672,
+      "step": 4825
+    },
+    {
+      "epoch": 2.3463957426221578,
+      "grad_norm": 0.32186359167099,
+      "learning_rate": 1.210573476702509e-05,
+      "loss": 1.2877,
+      "step": 4850
+    },
+    {
+      "epoch": 2.358490566037736,
+      "grad_norm": 54.59464645385742,
+      "learning_rate": 1.1881720430107528e-05,
+      "loss": 1.83,
+      "step": 4875
+    },
+    {
+      "epoch": 2.370585389453314,
+      "grad_norm": 0.36730891466140747,
+      "learning_rate": 1.1657706093189963e-05,
+      "loss": 1.6348,
+      "step": 4900
+    },
+    {
+      "epoch": 2.382680212868892,
+      "grad_norm": 0.23865163326263428,
+      "learning_rate": 1.1433691756272402e-05,
+      "loss": 1.2163,
+      "step": 4925
+    },
+    {
+      "epoch": 2.39477503628447,
+      "grad_norm": 27.761423110961914,
+      "learning_rate": 1.1209677419354839e-05,
+      "loss": 0.821,
+      "step": 4950
+    },
+    {
+      "epoch": 2.4068698597000484,
+      "grad_norm": 0.162750706076622,
+      "learning_rate": 1.0985663082437276e-05,
+      "loss": 0.7309,
+      "step": 4975
+    },
+    {
+      "epoch": 2.4189646831156266,
+      "grad_norm": 25.199321746826172,
+      "learning_rate": 1.0761648745519713e-05,
+      "loss": 0.7073,
+      "step": 5000
+    },
+    {
+      "epoch": 2.431059506531205,
+      "grad_norm": 0.1526053547859192,
+      "learning_rate": 1.0537634408602151e-05,
+      "loss": 1.2978,
+      "step": 5025
+    },
+    {
+      "epoch": 2.4431543299467826,
+      "grad_norm": 0.1368321031332016,
+      "learning_rate": 1.0313620071684588e-05,
+      "loss": 1.6805,
+      "step": 5050
+    },
+    {
+      "epoch": 2.455249153362361,
+      "grad_norm": 0.23694339394569397,
+      "learning_rate": 1.0089605734767025e-05,
+      "loss": 1.2977,
+      "step": 5075
+    },
+    {
+      "epoch": 2.467343976777939,
+      "grad_norm": 0.7356523871421814,
+      "learning_rate": 9.865591397849464e-06,
+      "loss": 1.8933,
+      "step": 5100
+    },
+    {
+      "epoch": 2.4794388001935173,
+      "grad_norm": 0.3766566812992096,
+      "learning_rate": 9.6415770609319e-06,
+      "loss": 1.2868,
+      "step": 5125
+    },
+    {
+      "epoch": 2.4915336236090955,
+      "grad_norm": 0.14354300498962402,
+      "learning_rate": 9.417562724014338e-06,
+      "loss": 1.5716,
+      "step": 5150
+    },
+    {
+      "epoch": 2.5036284470246732,
+      "grad_norm": 0.2801443934440613,
+      "learning_rate": 9.193548387096775e-06,
+      "loss": 1.1805,
+      "step": 5175
+    },
+    {
+      "epoch": 2.5157232704402515,
+      "grad_norm": 26.191604614257812,
+      "learning_rate": 8.969534050179212e-06,
+      "loss": 1.5799,
+      "step": 5200
+    },
+    {
+      "epoch": 2.5278180938558297,
+      "grad_norm": 0.29439038038253784,
+      "learning_rate": 8.745519713261649e-06,
+      "loss": 0.7503,
+      "step": 5225
+    },
+    {
+      "epoch": 2.539912917271408,
+      "grad_norm": 0.25986921787261963,
+      "learning_rate": 8.521505376344087e-06,
+      "loss": 1.5983,
+      "step": 5250
+    },
+    {
+      "epoch": 2.552007740686986,
+      "grad_norm": 0.37119609117507935,
+      "learning_rate": 8.297491039426524e-06,
+      "loss": 1.3133,
+      "step": 5275
+    },
+    {
+      "epoch": 2.564102564102564,
+      "grad_norm": 0.29802656173706055,
+      "learning_rate": 8.073476702508961e-06,
+      "loss": 1.2844,
+      "step": 5300
+    },
+    {
+      "epoch": 2.576197387518142,
+      "grad_norm": 0.22544977068901062,
+      "learning_rate": 7.849462365591398e-06,
+      "loss": 1.5217,
+      "step": 5325
+    },
+    {
+      "epoch": 2.5882922109337203,
+      "grad_norm": 0.2867962718009949,
+      "learning_rate": 7.625448028673836e-06,
+      "loss": 0.637,
+      "step": 5350
+    },
+    {
+      "epoch": 2.6003870343492985,
+      "grad_norm": 0.25557562708854675,
+      "learning_rate": 7.401433691756272e-06,
+      "loss": 1.1052,
+      "step": 5375
+    },
+    {
+      "epoch": 2.6124818577648767,
+      "grad_norm": 25.903554916381836,
+      "learning_rate": 7.177419354838711e-06,
+      "loss": 1.2246,
+      "step": 5400
+    },
+    {
+      "epoch": 2.6245766811804545,
+      "grad_norm": 24.891738891601562,
+      "learning_rate": 6.953405017921147e-06,
+      "loss": 1.3789,
+      "step": 5425
+    },
+    {
+      "epoch": 2.636671504596033,
+      "grad_norm": 0.23630130290985107,
+      "learning_rate": 6.7293906810035845e-06,
+      "loss": 1.1069,
+      "step": 5450
+    },
+    {
+      "epoch": 2.648766328011611,
+      "grad_norm": 0.27478015422821045,
+      "learning_rate": 6.5053763440860214e-06,
+      "loss": 0.5998,
+      "step": 5475
+    },
+    {
+      "epoch": 2.660861151427189,
+      "grad_norm": 25.27565574645996,
+      "learning_rate": 6.281362007168459e-06,
+      "loss": 1.2044,
+      "step": 5500
+    },
+    {
+      "epoch": 2.6729559748427674,
+      "grad_norm": 25.68202781677246,
+      "learning_rate": 6.057347670250896e-06,
+      "loss": 1.0829,
+      "step": 5525
+    },
+    {
+      "epoch": 2.6850507982583456,
+      "grad_norm": 0.17175991833209991,
+      "learning_rate": 5.833333333333334e-06,
+      "loss": 1.8226,
+      "step": 5550
+    },
+    {
+      "epoch": 2.697145621673924,
+      "grad_norm": 0.19636030495166779,
+      "learning_rate": 5.609318996415771e-06,
+      "loss": 0.8312,
+      "step": 5575
+    },
+    {
+      "epoch": 2.7092404450895016,
+      "grad_norm": 0.16146661341190338,
+      "learning_rate": 5.385304659498208e-06,
+      "loss": 0.8461,
+      "step": 5600
+    },
+    {
+      "epoch": 2.72133526850508,
+      "grad_norm": 0.17099538445472717,
+      "learning_rate": 5.161290322580646e-06,
+      "loss": 0.667,
+      "step": 5625
+    },
+    {
+      "epoch": 2.733430091920658,
+      "grad_norm": 0.48382991552352905,
+      "learning_rate": 4.9372759856630825e-06,
+      "loss": 0.6893,
+      "step": 5650
+    },
+    {
+      "epoch": 2.745524915336236,
+      "grad_norm": 25.40802574157715,
+      "learning_rate": 4.7132616487455195e-06,
+      "loss": 1.7727,
+      "step": 5675
+    },
+    {
+      "epoch": 2.7576197387518144,
+      "grad_norm": 45.407470703125,
+      "learning_rate": 4.489247311827957e-06,
+      "loss": 1.7406,
+      "step": 5700
+    },
+    {
+      "epoch": 2.769714562167392,
+      "grad_norm": 0.16706956923007965,
+      "learning_rate": 4.265232974910394e-06,
+      "loss": 1.0508,
+      "step": 5725
+    },
+    {
+      "epoch": 2.7818093855829704,
+      "grad_norm": 0.14837704598903656,
+      "learning_rate": 4.041218637992832e-06,
+      "loss": 0.9122,
+      "step": 5750
+    },
+    {
+      "epoch": 2.7939042089985486,
+      "grad_norm": 0.19028626382350922,
+      "learning_rate": 3.817204301075269e-06,
+      "loss": 0.9756,
+      "step": 5775
+    },
+    {
+      "epoch": 2.805999032414127,
+      "grad_norm": 29.7191162109375,
+      "learning_rate": 3.593189964157706e-06,
+      "loss": 1.4132,
+      "step": 5800
+    },
+    {
+      "epoch": 2.818093855829705,
+      "grad_norm": 25.76336097717285,
+      "learning_rate": 3.3691756272401432e-06,
+      "loss": 1.9683,
+      "step": 5825
+    },
+    {
+      "epoch": 2.830188679245283,
+      "grad_norm": 0.4251428544521332,
+      "learning_rate": 3.1451612903225806e-06,
+      "loss": 0.6718,
+      "step": 5850
+    },
+    {
+      "epoch": 2.842283502660861,
+      "grad_norm": 0.26770126819610596,
+      "learning_rate": 2.921146953405018e-06,
+      "loss": 0.6973,
+      "step": 5875
+    },
+    {
+      "epoch": 2.8543783260764393,
+      "grad_norm": 0.7866289615631104,
+      "learning_rate": 2.6971326164874553e-06,
+      "loss": 0.7933,
+      "step": 5900
+    },
+    {
+      "epoch": 2.8664731494920175,
+      "grad_norm": 46.420658111572266,
+      "learning_rate": 2.4731182795698927e-06,
+      "loss": 2.0253,
+      "step": 5925
+    },
+    {
+      "epoch": 2.8785679729075957,
+      "grad_norm": 0.2426643818616867,
+      "learning_rate": 2.2491039426523296e-06,
+      "loss": 1.0285,
+      "step": 5950
+    },
+    {
+      "epoch": 2.8906627963231735,
+      "grad_norm": 34.91679382324219,
+      "learning_rate": 2.025089605734767e-06,
+      "loss": 1.3188,
+      "step": 5975
+    },
+    {
+      "epoch": 2.9027576197387517,
+      "grad_norm": 0.22075743973255157,
+      "learning_rate": 1.8010752688172043e-06,
+      "loss": 1.6779,
+      "step": 6000
+    },
+    {
+      "epoch": 2.91485244315433,
+      "grad_norm": 25.831626892089844,
+      "learning_rate": 1.577060931899642e-06,
+      "loss": 1.2721,
+      "step": 6025
+    },
+    {
+      "epoch": 2.926947266569908,
+      "grad_norm": 0.1505293846130371,
+      "learning_rate": 1.3530465949820788e-06,
+      "loss": 2.161,
+      "step": 6050
+    },
+    {
+      "epoch": 2.9390420899854863,
+      "grad_norm": 0.18153157830238342,
+      "learning_rate": 1.1290322580645162e-06,
+      "loss": 1.1947,
+      "step": 6075
+    },
+    {
+      "epoch": 2.951136913401064,
+      "grad_norm": 25.609390258789062,
+      "learning_rate": 9.050179211469536e-07,
+      "loss": 0.6296,
+      "step": 6100
+    },
+    {
+      "epoch": 2.9632317368166423,
+      "grad_norm": 0.3544562757015228,
+      "learning_rate": 6.810035842293907e-07,
+      "loss": 1.2337,
+      "step": 6125
+    },
+    {
+      "epoch": 2.9753265602322205,
+      "grad_norm": 25.893449783325195,
+      "learning_rate": 4.5698924731182797e-07,
+      "loss": 1.7994,
+      "step": 6150
+    },
+    {
+      "epoch": 2.9874213836477987,
+      "grad_norm": 1.0282210111618042,
+      "learning_rate": 2.3297491039426527e-07,
+      "loss": 1.2547,
+      "step": 6175
+    },
+    {
+      "epoch": 2.999516207063377,
+      "grad_norm": 0.2006077915430069,
+      "learning_rate": 8.960573476702509e-09,
+      "loss": 0.87,
+      "step": 6200
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.7504835589941973,
+      "eval_f1_macro": 0.5244016249451032,
+      "eval_f1_micro": 0.7504835589941973,
+      "eval_f1_weighted": 0.714761195760481,
+      "eval_loss": 1.307055115699768,
+      "eval_precision_macro": 0.5012229210342417,
+      "eval_precision_micro": 0.7504835589941973,
+      "eval_precision_weighted": 0.6860840439724423,
+      "eval_recall_macro": 0.5532259049014222,
+      "eval_recall_micro": 0.7504835589941973,
+      "eval_recall_weighted": 0.7504835589941973,
+      "eval_runtime": 6673.8059,
+      "eval_samples_per_second": 0.077,
+      "eval_steps_per_second": 0.039,
+      "step": 6201
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 6201,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 5,
+        "early_stopping_threshold": 0.01
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.6292664567668736e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-6201/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cab6e4694c69fe840eda2a160ba8d0e45613204efc679c3084b6aa54b4cd418c
+size 5240

config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "_name_or_path": "allenai/longformer-base-4096",
+  "_num_labels": 3,
+  "architectures": [
+    "LongformerForSequenceClassification"
+  ],
+  "attention_mode": "longformer",
+  "attention_probs_dropout_prob": 0.1,
+  "attention_window": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "negative",
+    "1": "neutral",
+    "2": "positive"
+  },
+  "ignore_attention_mask": false,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "negative": 0,
+    "neutral": 1,
+    "positive": 2
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 4098,
+  "model_type": "longformer",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "onnx_export": false,
+  "pad_token_id": 1,
+  "problem_type": "single_label_classification",
+  "sep_token_id": 2,
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.0",
+  "type_vocab_size": 1,
+  "vocab_size": 50265
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:322dae133d3ce467a820df6105d8c7973c40d913fb2ce1117801d5c7b57c139f
+size 594681260

runs/Nov01_08-49-04_r-kafikani-longformer-va66dsc0-0e056-fjgin/events.out.tfevents.1730450948.r-kafikani-longformer-va66dsc0-0e056-fjgin.465.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5e51598b89ad0ea04ec1934e8c888cb1bcd479588893d4ba3ee21ab8784db3e
-size 59074

 version https://git-lfs.github.com/spec/v1
+oid sha256:cfdf7e559ab1ee1a97c3a4c242ada4f77098d01eb43f82c4493442ef596eef4a
+size 60472

runs/Nov01_08-49-04_r-kafikani-longformer-va66dsc0-0e056-fjgin/events.out.tfevents.1730870444.r-kafikani-longformer-va66dsc0-0e056-fjgin.465.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:232870b490734f0c789391c0d5c8c5bdeca5087a951488bf94a57efb56d76b58
+size 921

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "LongformerTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cab6e4694c69fe840eda2a160ba8d0e45613204efc679c3084b6aa54b4cd418c
+size 5240

training_params.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+    "data_path": "autotrain-iinjh-0wh75/autotrain-data",
+    "model": "allenai/longformer-base-4096",
+    "lr": 5e-05,
+    "epochs": 3,
+    "max_seq_length": 4096,
+    "batch_size": 1,
+    "warmup_ratio": 0.1,
+    "gradient_accumulation": 1,
+    "optimizer": "adamw_torch",
+    "scheduler": "linear",
+    "weight_decay": 0.0,
+    "max_grad_norm": 1.0,
+    "seed": 42,
+    "train_split": "train",
+    "valid_split": "validation",
+    "text_column": "autotrain_text",
+    "target_column": "autotrain_label",
+    "logging_steps": -1,
+    "project_name": "autotrain-iinjh-0wh75",
+    "auto_find_batch_size": false,
+    "mixed_precision": "fp16",
+    "save_total_limit": 1,
+    "push_to_hub": true,
+    "eval_strategy": "epoch",
+    "username": "kafikani",
+    "log": "tensorboard",
+    "early_stopping_patience": 5,
+    "early_stopping_threshold": 0.01
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff