mariagrandury commited on
Commit
1c7d2cb
·
verified ·
1 Parent(s): d615e1f

End of training

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +19 -12
  2. config.json +18 -15
  3. logs/events.out.tfevents.1740157634.a2d653e866f9.226.3 +3 -0
  4. logs/events.out.tfevents.1740162891.0a0ba32201a8.2151.21 +3 -0
  5. logs/events.out.tfevents.1740163730.0a0ba32201a8.2151.22 +3 -0
  6. model.safetensors +2 -2
  7. model_stats.json +157 -238
  8. tokenizer.json +2 -2
  9. tokenizer_config.json +1 -1
  10. training_args.bin +2 -2
  11. trial_0/checkpoint-292/config.json +50 -0
  12. trial_0/checkpoint-292/model.safetensors +3 -0
  13. trial_0/checkpoint-292/optimizer.pt +3 -0
  14. trial_0/checkpoint-292/rng_state.pth +3 -0
  15. trial_0/checkpoint-292/scheduler.pt +3 -0
  16. trial_0/checkpoint-292/special_tokens_map.json +37 -0
  17. trial_0/checkpoint-292/tokenizer.json +0 -0
  18. trial_0/checkpoint-292/tokenizer_config.json +58 -0
  19. trial_0/checkpoint-292/trainer_state.json +314 -0
  20. trial_0/checkpoint-292/training_args.bin +3 -0
  21. trial_0/checkpoint-292/vocab.txt +0 -0
  22. trial_0/checkpoint-584/config.json +50 -0
  23. trial_0/checkpoint-584/model.safetensors +3 -0
  24. trial_0/checkpoint-584/optimizer.pt +3 -0
  25. trial_0/checkpoint-584/rng_state.pth +3 -0
  26. trial_0/checkpoint-584/scheduler.pt +3 -0
  27. trial_0/checkpoint-584/special_tokens_map.json +37 -0
  28. trial_0/checkpoint-584/tokenizer.json +0 -0
  29. trial_0/checkpoint-584/tokenizer_config.json +58 -0
  30. trial_0/checkpoint-584/trainer_state.json +595 -0
  31. trial_0/checkpoint-584/training_args.bin +3 -0
  32. trial_0/checkpoint-584/vocab.txt +0 -0
  33. trial_0/checkpoint-876/config.json +50 -0
  34. trial_0/checkpoint-876/model.safetensors +3 -0
  35. trial_0/checkpoint-876/optimizer.pt +3 -0
  36. trial_0/checkpoint-876/rng_state.pth +3 -0
  37. trial_0/checkpoint-876/scheduler.pt +3 -0
  38. trial_0/checkpoint-876/special_tokens_map.json +37 -0
  39. trial_0/checkpoint-876/tokenizer.json +0 -0
  40. trial_0/checkpoint-876/tokenizer_config.json +58 -0
  41. trial_0/checkpoint-876/trainer_state.json +876 -0
  42. trial_0/checkpoint-876/training_args.bin +3 -0
  43. trial_0/checkpoint-876/vocab.txt +0 -0
  44. trial_0/logs/events.out.tfevents.1740158198.a2d653e866f9.226.4 +3 -0
  45. trial_0/logs/events.out.tfevents.1740161296.0a0ba32201a8.2151.0 +3 -0
  46. trial_0/logs/events.out.tfevents.1740161531.0a0ba32201a8.2151.1 +3 -0
  47. trial_0/logs/events.out.tfevents.1740161645.0a0ba32201a8.2151.2 +3 -0
  48. trial_1/checkpoint-146/config.json +50 -0
  49. trial_1/checkpoint-146/model.safetensors +3 -0
  50. trial_1/checkpoint-146/optimizer.pt +3 -0
README.md CHANGED
@@ -1,8 +1,10 @@
1
  ---
2
  library_name: transformers
3
- base_model: dccuchile/distilbert-base-spanish-uncased
4
  tags:
5
  - generated_from_trainer
 
 
6
  model-index:
7
  - name: bluesky-spanish-classifier
8
  results: []
@@ -13,10 +15,11 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # bluesky-spanish-classifier
15
 
16
- This model is a fine-tuned version of [dccuchile/distilbert-base-spanish-uncased](https://huggingface.co/dccuchile/distilbert-base-spanish-uncased) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 2.2149
19
- - Classification Report: {'ar': {'precision': 0.125, 'recall': 0.014492753623188406, 'f1-score': 0.025974025974025976, 'support': 207.0}, 'cl': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 263.0}, 'co': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 171.0}, 'es': {'precision': 0.6666666666666666, 'recall': 0.006024096385542169, 'f1-score': 0.011940298507462687, 'support': 332.0}, 'mx': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 203.0}, 'pe': {'precision': 0.03333333333333333, 'recall': 0.006211180124223602, 'f1-score': 0.010471204188481676, 'support': 161.0}, 'pr': {'precision': 0.09401709401709402, 'recall': 0.22, 'f1-score': 0.1317365269461078, 'support': 50.0}, 'uy': {'precision': 0.07073715562174236, 'recall': 0.8260869565217391, 'f1-score': 0.13031550068587106, 'support': 115.0}, 've': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22.0}, 'accuracy': 0.07349081364829396, 'macro avg': {'precision': 0.10997269440431515, 'recall': 0.1192016651838548, 'f1-score': 0.03449306181132769, 'support': 1524.0}, 'weighted avg': {'precision': 0.17415395511637471, 'recall': 0.07349081364829396, 'f1-score': 0.021390928662484977, 'support': 1524.0}}
 
20
 
21
  ## Model description
22
 
@@ -35,20 +38,24 @@ More information needed
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
- - learning_rate: 2e-05
39
- - train_batch_size: 4
40
- - eval_batch_size: 4
41
  - seed: 42
42
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
  - lr_scheduler_type: linear
44
- - lr_scheduler_warmup_steps: 2
45
- - training_steps: 2
46
 
47
  ### Training results
48
 
49
- | Training Loss | Epoch | Step | Validation Loss | Classification Report |
50
- |:-------------:|:------:|:----:|:---------------:|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
51
- | 2.2248 | 0.0022 | 2 | 2.2149 | {'ar': {'precision': 0.125, 'recall': 0.014492753623188406, 'f1-score': 0.025974025974025976, 'support': 207.0}, 'cl': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 263.0}, 'co': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 171.0}, 'es': {'precision': 0.6666666666666666, 'recall': 0.006024096385542169, 'f1-score': 0.011940298507462687, 'support': 332.0}, 'mx': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 203.0}, 'pe': {'precision': 0.03333333333333333, 'recall': 0.006211180124223602, 'f1-score': 0.010471204188481676, 'support': 161.0}, 'pr': {'precision': 0.09401709401709402, 'recall': 0.22, 'f1-score': 0.1317365269461078, 'support': 50.0}, 'uy': {'precision': 0.07073715562174236, 'recall': 0.8260869565217391, 'f1-score': 0.13031550068587106, 'support': 115.0}, 've': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22.0}, 'accuracy': 0.07349081364829396, 'macro avg': {'precision': 0.10997269440431515, 'recall': 0.1192016651838548, 'f1-score': 0.03449306181132769, 'support': 1524.0}, 'weighted avg': {'precision': 0.17415395511637471, 'recall': 0.07349081364829396, 'f1-score': 0.021390928662484977, 'support': 1524.0}} |
 
 
 
 
52
 
53
 
54
  ### Framework versions
 
1
  ---
2
  library_name: transformers
3
+ base_model: dccuchile/bert-base-spanish-wwm-uncased
4
  tags:
5
  - generated_from_trainer
6
+ metrics:
7
+ - f1
8
  model-index:
9
  - name: bluesky-spanish-classifier
10
  results: []
 
15
 
16
  # bluesky-spanish-classifier
17
 
18
+ This model is a fine-tuned version of [dccuchile/bert-base-spanish-wwm-uncased](https://huggingface.co/dccuchile/bert-base-spanish-wwm-uncased) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 2.3731
21
+ - Classification Report: {'ar': {'precision': 0.4898785425101215, 'recall': 0.32180851063829785, 'f1-score': 0.3884430176565008, 'support': 376.0}, 'cl': {'precision': 0.3626666666666667, 'recall': 0.4722222222222222, 'f1-score': 0.41025641025641024, 'support': 576.0}, 'co': {'precision': 0.34656084656084657, 'recall': 0.3808139534883721, 'f1-score': 0.3628808864265928, 'support': 344.0}, 'es': {'precision': 0.4630738522954092, 'recall': 0.427255985267035, 'f1-score': 0.4444444444444444, 'support': 543.0}, 'mx': {'precision': 0.43380855397148677, 'recall': 0.43917525773195876, 'f1-score': 0.4364754098360656, 'support': 485.0}, 'pe': {'precision': 0.3769968051118211, 'recall': 0.3390804597701149, 'f1-score': 0.35703479576399394, 'support': 348.0}, 'pr': {'precision': 0.5736434108527132, 'recall': 0.7326732673267327, 'f1-score': 0.6434782608695652, 'support': 101.0}, 'uy': {'precision': 0.35096153846153844, 'recall': 0.3201754385964912, 'f1-score': 0.3348623853211009, 'support': 228.0}, 've': {'precision': 0.16666666666666666, 'recall': 0.045454545454545456, 'f1-score': 0.07142857142857142, 'support': 22.0}, 'accuracy': 0.4085345683096262, 'macro avg': {'precision': 0.39602854256636333, 'recall': 0.38651773783286336, 'f1-score': 0.3832560202225828, 'support': 3023.0}, 'weighted avg': {'precision': 0.4124949665181113, 'recall': 0.4085345683096262, 'f1-score': 0.40601279016852304, 'support': 3023.0}}
22
+ - F1: 0.3833
23
 
24
  ## Model description
25
 
 
38
  ### Training hyperparameters
39
 
40
  The following hyperparameters were used during training:
41
+ - learning_rate: 2.8600231011639855e-05
42
+ - train_batch_size: 8
43
+ - eval_batch_size: 8
44
  - seed: 42
45
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
  - lr_scheduler_type: linear
47
+ - lr_scheduler_warmup_ratio: 0.11531859504380029
48
+ - num_epochs: 5
49
 
50
  ### Training results
51
 
52
+ | Training Loss | Epoch | Step | Validation Loss | Classification Report | F1 |
53
+ |:-------------:|:-----:|:----:|:---------------:|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------:|
54
+ | 1.7603 | 1.0 | 882 | 1.7406 | {'ar': {'precision': 0.35516372795969775, 'recall': 0.375, 'f1-score': 0.3648124191461837, 'support': 376.0}, 'cl': {'precision': 0.3016759776536313, 'recall': 0.28125, 'f1-score': 0.29110512129380056, 'support': 576.0}, 'co': {'precision': 0.3670886075949367, 'recall': 0.25290697674418605, 'f1-score': 0.29948364888123924, 'support': 344.0}, 'es': {'precision': 0.3584905660377358, 'recall': 0.4548802946593002, 'f1-score': 0.400974025974026, 'support': 543.0}, 'mx': {'precision': 0.32465753424657534, 'recall': 0.488659793814433, 'f1-score': 0.39012345679012345, 'support': 485.0}, 'pe': {'precision': 0.3958333333333333, 'recall': 0.27298850574712646, 'f1-score': 0.3231292517006803, 'support': 348.0}, 'pr': {'precision': 0.5631067961165048, 'recall': 0.5742574257425742, 'f1-score': 0.5686274509803921, 'support': 101.0}, 'uy': {'precision': 0.4666666666666667, 'recall': 0.18421052631578946, 'f1-score': 0.2641509433962264, 'support': 228.0}, 've': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22.0}, 'accuracy': 0.3536222295732716, 'macro avg': {'precision': 0.34807591217878686, 'recall': 0.3204615025581566, 'f1-score': 0.32248959090696355, 'support': 3023.0}, 'weighted avg': {'precision': 0.35948675942105285, 'recall': 0.3536222295732716, 'f1-score': 0.3456546260541325, 'support': 3023.0}} | 0.3225 |
55
+ | 1.4223 | 2.0 | 1764 | 1.6758 | {'ar': {'precision': 0.4349315068493151, 'recall': 0.3377659574468085, 'f1-score': 0.38023952095808383, 'support': 376.0}, 'cl': {'precision': 0.336996336996337, 'recall': 0.3194444444444444, 'f1-score': 0.32798573975044565, 'support': 576.0}, 'co': {'precision': 0.36333333333333334, 'recall': 0.3168604651162791, 'f1-score': 0.3385093167701863, 'support': 344.0}, 'es': {'precision': 0.38980716253443526, 'recall': 0.5211786372007366, 'f1-score': 0.44602048857368004, 'support': 543.0}, 'mx': {'precision': 0.35246995994659547, 'recall': 0.5443298969072164, 'f1-score': 0.42787682333873583, 'support': 485.0}, 'pe': {'precision': 0.44308943089430897, 'recall': 0.3132183908045977, 'f1-score': 0.367003367003367, 'support': 348.0}, 'pr': {'precision': 0.759493670886076, 'recall': 0.594059405940594, 'f1-score': 0.6666666666666666, 'support': 101.0}, 'uy': {'precision': 0.5542168674698795, 'recall': 0.20175438596491227, 'f1-score': 0.2958199356913183, 'support': 228.0}, 've': {'precision': 1.0, 'recall': 0.09090909090909091, 'f1-score': 0.16666666666666666, 'support': 22.0}, 'accuracy': 0.3916639100231558, 'macro avg': {'precision': 0.5149264743233645, 'recall': 0.35994674163718665, 'f1-score': 0.3796431694910167, 'support': 3023.0}, 'weighted avg': {'precision': 0.4116802685001794, 'recall': 0.3916639100231558, 'f1-score': 0.3851176158170783, 'support': 3023.0}} | 0.3796 |
56
+ | 0.9068 | 3.0 | 2646 | 1.9523 | {'ar': {'precision': 0.39574468085106385, 'recall': 0.4946808510638298, 'f1-score': 0.4397163120567376, 'support': 376.0}, 'cl': {'precision': 0.35144927536231885, 'recall': 0.3368055555555556, 'f1-score': 0.34397163120567376, 'support': 576.0}, 'co': {'precision': 0.31555555555555553, 'recall': 0.4127906976744186, 'f1-score': 0.35768261964735515, 'support': 344.0}, 'es': {'precision': 0.47113163972286376, 'recall': 0.3756906077348066, 'f1-score': 0.4180327868852459, 'support': 543.0}, 'mx': {'precision': 0.43680709534368073, 'recall': 0.4061855670103093, 'f1-score': 0.42094017094017094, 'support': 485.0}, 'pe': {'precision': 0.38661710037174724, 'recall': 0.2988505747126437, 'f1-score': 0.3371150729335494, 'support': 348.0}, 'pr': {'precision': 0.64, 'recall': 0.6336633663366337, 'f1-score': 0.6368159203980099, 'support': 101.0}, 'uy': {'precision': 0.30662020905923343, 'recall': 0.38596491228070173, 'f1-score': 0.341747572815534, 'support': 228.0}, 've': {'precision': 0.18181818181818182, 'recall': 0.09090909090909091, 'f1-score': 0.12121212121212122, 'support': 22.0}, 'accuracy': 0.3906715183592458, 'macro avg': {'precision': 0.3873048597871828, 'recall': 0.3817268025864433, 'f1-score': 0.37969268978826637, 'support': 3023.0}, 'weighted avg': {'precision': 0.3971399185993649, 'recall': 0.3906715183592458, 'f1-score': 0.3902981034934984, 'support': 3023.0}} | 0.3797 |
57
+ | 0.4818 | 4.0 | 3528 | 2.3731 | {'ar': {'precision': 0.4898785425101215, 'recall': 0.32180851063829785, 'f1-score': 0.3884430176565008, 'support': 376.0}, 'cl': {'precision': 0.3626666666666667, 'recall': 0.4722222222222222, 'f1-score': 0.41025641025641024, 'support': 576.0}, 'co': {'precision': 0.34656084656084657, 'recall': 0.3808139534883721, 'f1-score': 0.3628808864265928, 'support': 344.0}, 'es': {'precision': 0.4630738522954092, 'recall': 0.427255985267035, 'f1-score': 0.4444444444444444, 'support': 543.0}, 'mx': {'precision': 0.43380855397148677, 'recall': 0.43917525773195876, 'f1-score': 0.4364754098360656, 'support': 485.0}, 'pe': {'precision': 0.3769968051118211, 'recall': 0.3390804597701149, 'f1-score': 0.35703479576399394, 'support': 348.0}, 'pr': {'precision': 0.5736434108527132, 'recall': 0.7326732673267327, 'f1-score': 0.6434782608695652, 'support': 101.0}, 'uy': {'precision': 0.35096153846153844, 'recall': 0.3201754385964912, 'f1-score': 0.3348623853211009, 'support': 228.0}, 've': {'precision': 0.16666666666666666, 'recall': 0.045454545454545456, 'f1-score': 0.07142857142857142, 'support': 22.0}, 'accuracy': 0.4085345683096262, 'macro avg': {'precision': 0.39602854256636333, 'recall': 0.38651773783286336, 'f1-score': 0.3832560202225828, 'support': 3023.0}, 'weighted avg': {'precision': 0.4124949665181113, 'recall': 0.4085345683096262, 'f1-score': 0.40601279016852304, 'support': 3023.0}} | 0.3833 |
58
+ | 0.2357 | 5.0 | 4410 | 2.7721 | {'ar': {'precision': 0.42168674698795183, 'recall': 0.3723404255319149, 'f1-score': 0.3954802259887006, 'support': 376.0}, 'cl': {'precision': 0.38753799392097266, 'recall': 0.4427083333333333, 'f1-score': 0.413290113452188, 'support': 576.0}, 'co': {'precision': 0.35051546391752575, 'recall': 0.3953488372093023, 'f1-score': 0.37158469945355194, 'support': 344.0}, 'es': {'precision': 0.4642857142857143, 'recall': 0.40699815837937386, 'f1-score': 0.4337585868498528, 'support': 543.0}, 'mx': {'precision': 0.43089430894308944, 'recall': 0.43711340206185567, 'f1-score': 0.43398157625383826, 'support': 485.0}, 'pe': {'precision': 0.3407960199004975, 'recall': 0.3936781609195402, 'f1-score': 0.36533333333333334, 'support': 348.0}, 'pr': {'precision': 0.6601941747572816, 'recall': 0.6732673267326733, 'f1-score': 0.6666666666666666, 'support': 101.0}, 'uy': {'precision': 0.40853658536585363, 'recall': 0.29385964912280704, 'f1-score': 0.34183673469387754, 'support': 228.0}, 've': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22.0}, 'accuracy': 0.40886536553092956, 'macro avg': {'precision': 0.3849385564532096, 'recall': 0.3794793659212001, 'f1-score': 0.38021465963244544, 'support': 3023.0}, 'weighted avg': {'precision': 0.41080624270175103, 'recall': 0.40886536553092956, 'f1-score': 0.4078732692419294, 'support': 3023.0}} | 0.3802 |
59
 
60
 
61
  ### Framework versions
config.json CHANGED
@@ -1,13 +1,14 @@
1
  {
2
- "_name_or_path": "dccuchile/distilbert-base-spanish-uncased",
3
- "activation": "gelu",
4
  "architectures": [
5
- "DistilBertForSequenceClassification"
6
  ],
7
- "attention_dropout": 0.1,
8
- "dim": 768,
9
- "dropout": 0.1,
10
- "hidden_dim": 3072,
 
 
11
  "id2label": {
12
  "0": "ar",
13
  "1": "cl",
@@ -20,6 +21,7 @@
20
  "8": "ve"
21
  },
22
  "initializer_range": 0.02,
 
23
  "label2id": {
24
  "ar": 0,
25
  "cl": 1,
@@ -31,17 +33,18 @@
31
  "uy": 7,
32
  "ve": 8
33
  },
 
34
  "max_position_embeddings": 512,
35
- "model_type": "distilbert",
36
- "n_heads": 12,
37
- "n_layers": 6,
38
- "pad_token_id": 0,
 
 
39
  "problem_type": "single_label_classification",
40
- "qa_dropout": 0.1,
41
- "seq_classif_dropout": 0.2,
42
- "sinusoidal_pos_embds": true,
43
- "tie_weights_": true,
44
  "torch_dtype": "float32",
45
  "transformers_version": "4.48.3",
 
 
46
  "vocab_size": 31002
47
  }
 
1
  {
2
+ "_name_or_path": "dccuchile/bert-base-spanish-wwm-uncased",
 
3
  "architectures": [
4
+ "BertForSequenceClassification"
5
  ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
  "id2label": {
13
  "0": "ar",
14
  "1": "cl",
 
21
  "8": "ve"
22
  },
23
  "initializer_range": 0.02,
24
+ "intermediate_size": 3072,
25
  "label2id": {
26
  "ar": 0,
27
  "cl": 1,
 
33
  "uy": 7,
34
  "ve": 8
35
  },
36
+ "layer_norm_eps": 1e-12,
37
  "max_position_embeddings": 512,
38
+ "model_type": "bert",
39
+ "num_attention_heads": 12,
40
+ "num_hidden_layers": 12,
41
+ "output_past": true,
42
+ "pad_token_id": 1,
43
+ "position_embedding_type": "absolute",
44
  "problem_type": "single_label_classification",
 
 
 
 
45
  "torch_dtype": "float32",
46
  "transformers_version": "4.48.3",
47
+ "type_vocab_size": 2,
48
+ "use_cache": true,
49
  "vocab_size": 31002
50
  }
logs/events.out.tfevents.1740157634.a2d653e866f9.226.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:646126b025c968bf4bd3116e12b41d96bfb11ffd3e476cc70526ebb920ce8b86
3
+ size 354
logs/events.out.tfevents.1740162891.0a0ba32201a8.2151.21 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ab77507abc6135face2f24050935579841c0edd04ed429b2b14a790d9de3346
3
+ size 100428
logs/events.out.tfevents.1740163730.0a0ba32201a8.2151.22 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c9e1f0c540ff78c3cd5ed026bc180b0a9e5b858cf3411c46ddbc70bd2f355ef
3
+ size 405
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e9ad0f28c73e32d964399d0a61f45d861c8a1af2235599dd7b8c5d38517ce4a
3
- size 269328660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9640bf7ca7b6a541089109fcdb8dd4b103d14e514f0b0b70e6032897467855b5
3
+ size 439454740
model_stats.json CHANGED
@@ -1,177 +1,96 @@
1
  {
2
  "dataset": {
3
- "splits": {
 
 
4
  "train": {
5
- "num_examples": 3553,
6
- "features": [
7
- "author",
8
- "country",
9
- "post_text",
10
- "created_at",
11
- "likes",
12
- "quotes",
13
- "replies",
14
- "reposts",
15
- "post_id",
16
- "lang",
17
- "tags",
18
- "image_links",
19
- "profile_info",
20
- "processed_text",
21
- "__index_level_0__"
22
- ],
23
- "first_example": {
24
- "author": "apalet.bsky.social",
25
- "country": "cl",
26
- "post_text": "Me lloran los ojos sin pedir permiso ni razón.",
27
- "created_at": "2024-12-31T01:15:04.751Z",
28
- "likes": 8,
29
- "quotes": 0,
30
- "replies": 1,
31
- "reposts": 0,
32
- "post_id": "at://did:plc:y4bvfcu7wrx7jlyjg6qglqzl/app.bsky.feed.post/3lekuofg3xk2v",
33
- "lang": "es",
34
- "tags": "",
35
- "image_links": "",
36
- "profile_info": {
37
- "description": "Leo y olvido \nwww.laurel.cl\nhttps://revistadossier.udp.cl",
38
- "display_name": "Andrea Palet",
39
- "feed_uri": null,
40
- "followers_count": 1355,
41
- "follows_count": 304,
42
- "indexed_at": "2024-11-30T20:22:38.048Z",
43
- "posts_count": 515
44
- },
45
- "processed_text": "lloran ojos pedir permiso razón",
46
- "__index_level_0__": 4653
47
  },
48
- "country_distribution": {
49
- "ar": {
50
- "count": 483,
51
- "percentage": 13.594145792288206
52
- },
53
- "cl": {
54
- "count": 614,
55
- "percentage": 17.28117084154236
56
- },
57
- "co": {
58
- "count": 399,
59
- "percentage": 11.229946524064172
60
- },
61
- "es": {
62
- "count": 774,
63
- "percentage": 21.784407542921475
64
- },
65
- "mx": {
66
- "count": 473,
67
- "percentage": 13.312693498452013
68
- },
69
- "pe": {
70
- "count": 376,
71
- "percentage": 10.582606248240923
72
- },
73
- "pr": {
74
- "count": 117,
75
- "percentage": 3.2929918378834784
76
- },
77
- "uy": {
78
- "count": 267,
79
- "percentage": 7.5147762454264
80
- },
81
- "ve": {
82
- "count": 50,
83
- "percentage": 1.407261469180974
84
- }
85
  }
86
  },
87
  "test": {
88
- "num_examples": 1524,
89
- "features": [
90
- "author",
91
- "country",
92
- "post_text",
93
- "created_at",
94
- "likes",
95
- "quotes",
96
- "replies",
97
- "reposts",
98
- "post_id",
99
- "lang",
100
- "tags",
101
- "image_links",
102
- "profile_info",
103
- "processed_text",
104
- "__index_level_0__"
105
- ],
106
- "first_example": {
107
- "author": "rpalomino.bsky.social",
108
- "country": "pe",
109
- "post_text": "Dos libros que he estado esperando leer hace mucho.",
110
- "created_at": "2024-12-25T16:32:48.934Z",
111
- "likes": 2,
112
- "quotes": 0,
113
- "replies": 1,
114
- "reposts": 0,
115
- "post_id": "at://did:plc:65xzu4se42blbypbgmki3xxv/app.bsky.feed.post/3le5f5vxtmc2h",
116
- "lang": "es",
117
- "tags": "",
118
- "image_links": "",
119
- "profile_info": {
120
- "description": "Literatura y sociedad.",
121
- "display_name": "Roy Palomino",
122
- "feed_uri": null,
123
- "followers_count": 174,
124
- "follows_count": 283,
125
- "indexed_at": "2024-11-25T05:12:19.445Z",
126
- "posts_count": 90
127
- },
128
- "processed_text": "libros esperando leer",
129
- "__index_level_0__": 3477
130
  },
131
- "country_distribution": {
132
- "ar": {
133
- "count": 207,
134
- "percentage": 13.582677165354331
135
- },
136
- "cl": {
137
- "count": 263,
138
- "percentage": 17.25721784776903
139
- },
140
- "co": {
141
- "count": 171,
142
- "percentage": 11.220472440944881
143
- },
144
- "es": {
145
- "count": 332,
146
- "percentage": 21.784776902887142
147
- },
148
- "mx": {
149
- "count": 203,
150
- "percentage": 13.32020997375328
151
- },
152
- "pe": {
153
- "count": 161,
154
- "percentage": 10.564304461942257
155
- },
156
- "pr": {
157
- "count": 50,
158
- "percentage": 3.2808398950131235
159
- },
160
- "uy": {
161
- "count": 115,
162
- "percentage": 7.545931758530183
163
- },
164
- "ve": {
165
- "count": 22,
166
- "percentage": 1.4435695538057742
167
- }
168
  }
169
  }
170
  },
171
- "processed": {
172
- "train_examples": 3553,
173
- "test_examples": 1524
174
- }
 
 
 
 
 
 
 
175
  },
176
  "model": {
177
  "name": "dccuchile/distilbert-base-spanish-uncased",
@@ -185,152 +104,152 @@
185
  "hub_url": "https://huggingface.co/None/bluesky-spanish-classifier"
186
  },
187
  "training": {
188
- "runtime_seconds": 97.8884,
189
- "loss": 2.1454918384552,
190
- "steps_per_second": 0.02
191
  },
192
  "evaluation": {
193
  "ar": {
194
- "precision": 0.10526315789473684,
195
- "recall": 0.08695652173913043,
196
- "f1-score": 0.09523809523809523,
197
  "support": 207.0
198
  },
199
  "cl": {
200
- "precision": 0.17346938775510204,
201
- "recall": 0.19391634980988592,
202
- "f1-score": 0.18312387791741472,
203
  "support": 263.0
204
  },
205
  "co": {
206
- "precision": 0.06451612903225806,
207
- "recall": 0.011695906432748537,
208
- "f1-score": 0.019801980198019802,
209
  "support": 171.0
210
  },
211
  "es": {
212
- "precision": 0.24783362218370883,
213
- "recall": 0.4307228915662651,
214
- "f1-score": 0.3146314631463146,
215
  "support": 332.0
216
  },
217
  "mx": {
218
- "precision": 0.15086206896551724,
219
- "recall": 0.1724137931034483,
220
- "f1-score": 0.16091954022988506,
221
  "support": 203.0
222
  },
223
  "pe": {
224
- "precision": 0.061946902654867256,
225
- "recall": 0.043478260869565216,
226
- "f1-score": 0.051094890510948905,
227
  "support": 161.0
228
  },
229
  "pr": {
230
- "precision": 0.5,
231
- "recall": 0.02,
232
- "f1-score": 0.038461538461538464,
233
  "support": 50.0
234
  },
235
  "uy": {
236
- "precision": 0.3181818181818182,
237
- "recall": 0.06086956521739131,
238
- "f1-score": 0.10218978102189781,
239
  "support": 115.0
240
  },
241
  "ve": {
242
- "precision": 0.012195121951219513,
243
- "recall": 0.045454545454545456,
244
- "f1-score": 0.019230769230769232,
245
  "support": 22.0
246
  },
247
- "accuracy": 0.17388451443569553,
248
  "macro avg": {
249
- "precision": 0.18158535651324753,
250
- "recall": 0.11838975935477558,
251
- "f1-score": 0.10941021510609822,
252
  "support": 1524.0
253
  },
254
  "weighted avg": {
255
- "precision": 0.1726919923848946,
256
- "recall": 0.17388451443569553,
257
- "f1-score": 0.151384890214964,
258
  "support": 1524.0
259
  },
260
  "final": {
261
- "eval_loss": 2.181152582168579,
262
  "eval_classification_report": {
263
  "ar": {
264
- "precision": 0.10526315789473684,
265
- "recall": 0.08695652173913043,
266
- "f1-score": 0.09523809523809523,
267
  "support": 207.0
268
  },
269
  "cl": {
270
- "precision": 0.17346938775510204,
271
- "recall": 0.19391634980988592,
272
- "f1-score": 0.18312387791741472,
273
  "support": 263.0
274
  },
275
  "co": {
276
- "precision": 0.06451612903225806,
277
- "recall": 0.011695906432748537,
278
- "f1-score": 0.019801980198019802,
279
  "support": 171.0
280
  },
281
  "es": {
282
- "precision": 0.24783362218370883,
283
- "recall": 0.4307228915662651,
284
- "f1-score": 0.3146314631463146,
285
  "support": 332.0
286
  },
287
  "mx": {
288
- "precision": 0.15086206896551724,
289
- "recall": 0.1724137931034483,
290
- "f1-score": 0.16091954022988506,
291
  "support": 203.0
292
  },
293
  "pe": {
294
- "precision": 0.061946902654867256,
295
- "recall": 0.043478260869565216,
296
- "f1-score": 0.051094890510948905,
297
  "support": 161.0
298
  },
299
  "pr": {
300
- "precision": 0.5,
301
- "recall": 0.02,
302
- "f1-score": 0.038461538461538464,
303
  "support": 50.0
304
  },
305
  "uy": {
306
- "precision": 0.3181818181818182,
307
- "recall": 0.06086956521739131,
308
- "f1-score": 0.10218978102189781,
309
  "support": 115.0
310
  },
311
  "ve": {
312
- "precision": 0.012195121951219513,
313
- "recall": 0.045454545454545456,
314
- "f1-score": 0.019230769230769232,
315
  "support": 22.0
316
  },
317
- "accuracy": 0.17388451443569553,
318
  "macro avg": {
319
- "precision": 0.18158535651324753,
320
- "recall": 0.11838975935477558,
321
- "f1-score": 0.10941021510609822,
322
  "support": 1524.0
323
  },
324
  "weighted avg": {
325
- "precision": 0.1726919923848946,
326
- "recall": 0.17388451443569553,
327
- "f1-score": 0.151384890214964,
328
  "support": 1524.0
329
  }
330
  },
331
- "eval_runtime": 97.6642,
332
- "eval_samples_per_second": 15.604,
333
- "eval_steps_per_second": 3.901,
334
  "epoch": 0.0022497187851518562
335
  }
336
  }
 
1
  {
2
  "dataset": {
3
+ "train_size": 3553,
4
+ "test_size": 1524,
5
+ "country_distribution": {
6
  "train": {
7
+ "ar": {
8
+ "count": 483,
9
+ "percentage": 13.594145792288206
10
+ },
11
+ "cl": {
12
+ "count": 614,
13
+ "percentage": 17.28117084154236
14
+ },
15
+ "co": {
16
+ "count": 399,
17
+ "percentage": 11.229946524064172
18
+ },
19
+ "es": {
20
+ "count": 774,
21
+ "percentage": 21.784407542921475
22
+ },
23
+ "mx": {
24
+ "count": 473,
25
+ "percentage": 13.312693498452013
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  },
27
+ "pe": {
28
+ "count": 376,
29
+ "percentage": 10.582606248240923
30
+ },
31
+ "pr": {
32
+ "count": 117,
33
+ "percentage": 3.2929918378834784
34
+ },
35
+ "uy": {
36
+ "count": 267,
37
+ "percentage": 7.5147762454264
38
+ },
39
+ "ve": {
40
+ "count": 50,
41
+ "percentage": 1.407261469180974
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  }
43
  },
44
  "test": {
45
+ "ar": {
46
+ "count": 207,
47
+ "percentage": 13.582677165354331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  },
49
+ "cl": {
50
+ "count": 263,
51
+ "percentage": 17.25721784776903
52
+ },
53
+ "co": {
54
+ "count": 171,
55
+ "percentage": 11.220472440944881
56
+ },
57
+ "es": {
58
+ "count": 332,
59
+ "percentage": 21.784776902887142
60
+ },
61
+ "mx": {
62
+ "count": 203,
63
+ "percentage": 13.32020997375328
64
+ },
65
+ "pe": {
66
+ "count": 161,
67
+ "percentage": 10.564304461942257
68
+ },
69
+ "pr": {
70
+ "count": 50,
71
+ "percentage": 3.2808398950131235
72
+ },
73
+ "uy": {
74
+ "count": 115,
75
+ "percentage": 7.545931758530183
76
+ },
77
+ "ve": {
78
+ "count": 22,
79
+ "percentage": 1.4435695538057742
 
 
 
 
 
 
80
  }
81
  }
82
  },
83
+ "unique_countries": [
84
+ "ar",
85
+ "cl",
86
+ "co",
87
+ "es",
88
+ "mx",
89
+ "pe",
90
+ "pr",
91
+ "uy",
92
+ "ve"
93
+ ]
94
  },
95
  "model": {
96
  "name": "dccuchile/distilbert-base-spanish-uncased",
 
104
  "hub_url": "https://huggingface.co/None/bluesky-spanish-classifier"
105
  },
106
  "training": {
107
+ "runtime_seconds": 105.9611,
108
+ "loss": 2.257941484451294,
109
+ "steps_per_second": 0.019
110
  },
111
  "evaluation": {
112
  "ar": {
113
+ "precision": 0.125,
114
+ "recall": 0.014492753623188406,
115
+ "f1-score": 0.025974025974025976,
116
  "support": 207.0
117
  },
118
  "cl": {
119
+ "precision": 0.0,
120
+ "recall": 0.0,
121
+ "f1-score": 0.0,
122
  "support": 263.0
123
  },
124
  "co": {
125
+ "precision": 0.0,
126
+ "recall": 0.0,
127
+ "f1-score": 0.0,
128
  "support": 171.0
129
  },
130
  "es": {
131
+ "precision": 0.6666666666666666,
132
+ "recall": 0.006024096385542169,
133
+ "f1-score": 0.011940298507462687,
134
  "support": 332.0
135
  },
136
  "mx": {
137
+ "precision": 0.0,
138
+ "recall": 0.0,
139
+ "f1-score": 0.0,
140
  "support": 203.0
141
  },
142
  "pe": {
143
+ "precision": 0.03333333333333333,
144
+ "recall": 0.006211180124223602,
145
+ "f1-score": 0.010471204188481676,
146
  "support": 161.0
147
  },
148
  "pr": {
149
+ "precision": 0.09401709401709402,
150
+ "recall": 0.22,
151
+ "f1-score": 0.1317365269461078,
152
  "support": 50.0
153
  },
154
  "uy": {
155
+ "precision": 0.07073715562174236,
156
+ "recall": 0.8260869565217391,
157
+ "f1-score": 0.13031550068587106,
158
  "support": 115.0
159
  },
160
  "ve": {
161
+ "precision": 0.0,
162
+ "recall": 0.0,
163
+ "f1-score": 0.0,
164
  "support": 22.0
165
  },
166
+ "accuracy": 0.07349081364829396,
167
  "macro avg": {
168
+ "precision": 0.10997269440431515,
169
+ "recall": 0.1192016651838548,
170
+ "f1-score": 0.03449306181132769,
171
  "support": 1524.0
172
  },
173
  "weighted avg": {
174
+ "precision": 0.17415395511637471,
175
+ "recall": 0.07349081364829396,
176
+ "f1-score": 0.021390928662484977,
177
  "support": 1524.0
178
  },
179
  "final": {
180
+ "eval_loss": 2.2149105072021484,
181
  "eval_classification_report": {
182
  "ar": {
183
+ "precision": 0.125,
184
+ "recall": 0.014492753623188406,
185
+ "f1-score": 0.025974025974025976,
186
  "support": 207.0
187
  },
188
  "cl": {
189
+ "precision": 0.0,
190
+ "recall": 0.0,
191
+ "f1-score": 0.0,
192
  "support": 263.0
193
  },
194
  "co": {
195
+ "precision": 0.0,
196
+ "recall": 0.0,
197
+ "f1-score": 0.0,
198
  "support": 171.0
199
  },
200
  "es": {
201
+ "precision": 0.6666666666666666,
202
+ "recall": 0.006024096385542169,
203
+ "f1-score": 0.011940298507462687,
204
  "support": 332.0
205
  },
206
  "mx": {
207
+ "precision": 0.0,
208
+ "recall": 0.0,
209
+ "f1-score": 0.0,
210
  "support": 203.0
211
  },
212
  "pe": {
213
+ "precision": 0.03333333333333333,
214
+ "recall": 0.006211180124223602,
215
+ "f1-score": 0.010471204188481676,
216
  "support": 161.0
217
  },
218
  "pr": {
219
+ "precision": 0.09401709401709402,
220
+ "recall": 0.22,
221
+ "f1-score": 0.1317365269461078,
222
  "support": 50.0
223
  },
224
  "uy": {
225
+ "precision": 0.07073715562174236,
226
+ "recall": 0.8260869565217391,
227
+ "f1-score": 0.13031550068587106,
228
  "support": 115.0
229
  },
230
  "ve": {
231
+ "precision": 0.0,
232
+ "recall": 0.0,
233
+ "f1-score": 0.0,
234
  "support": 22.0
235
  },
236
+ "accuracy": 0.07349081364829396,
237
  "macro avg": {
238
+ "precision": 0.10997269440431515,
239
+ "recall": 0.1192016651838548,
240
+ "f1-score": 0.03449306181132769,
241
  "support": 1524.0
242
  },
243
  "weighted avg": {
244
+ "precision": 0.17415395511637471,
245
+ "recall": 0.07349081364829396,
246
+ "f1-score": 0.021390928662484977,
247
  "support": 1524.0
248
  }
249
  },
250
+ "eval_runtime": 98.563,
251
+ "eval_samples_per_second": 15.462,
252
+ "eval_steps_per_second": 3.866,
253
  "epoch": 0.0022497187851518562
254
  }
255
  }
tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 32,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 32
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 64,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 64
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
tokenizer_config.json CHANGED
@@ -53,6 +53,6 @@
53
  "sep_token": "[SEP]",
54
  "strip_accents": false,
55
  "tokenize_chinese_chars": true,
56
- "tokenizer_class": "DistilBertTokenizer",
57
  "unk_token": "[UNK]"
58
  }
 
53
  "sep_token": "[SEP]",
54
  "strip_accents": false,
55
  "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
  "unk_token": "[UNK]"
58
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a931bb071306bc2c9f6731ed0c069d5875fa5eab471d84947b67f65d2ef43f3
3
- size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2de174b6ea17c52db95df91a80925879ae293fdc8fa93e84445ba29d1a5e95b
3
+ size 5432
trial_0/checkpoint-292/config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "dccuchile/bert-base-spanish-wwm-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "ar",
14
+ "1": "cl",
15
+ "2": "co",
16
+ "3": "es",
17
+ "4": "mx",
18
+ "5": "pe",
19
+ "6": "pr",
20
+ "7": "uy",
21
+ "8": "ve"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 3072,
25
+ "label2id": {
26
+ "ar": 0,
27
+ "cl": 1,
28
+ "co": 2,
29
+ "es": 3,
30
+ "mx": 4,
31
+ "pe": 5,
32
+ "pr": 6,
33
+ "uy": 7,
34
+ "ve": 8
35
+ },
36
+ "layer_norm_eps": 1e-12,
37
+ "max_position_embeddings": 512,
38
+ "model_type": "bert",
39
+ "num_attention_heads": 12,
40
+ "num_hidden_layers": 12,
41
+ "output_past": true,
42
+ "pad_token_id": 1,
43
+ "position_embedding_type": "absolute",
44
+ "problem_type": "single_label_classification",
45
+ "torch_dtype": "float32",
46
+ "transformers_version": "4.48.3",
47
+ "type_vocab_size": 2,
48
+ "use_cache": true,
49
+ "vocab_size": 31002
50
+ }
trial_0/checkpoint-292/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76e0b013258938faa56136402a7e40a933eef90038e3f931c5780cf4f69b10bd
3
+ size 439454740
trial_0/checkpoint-292/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:751bd43e029cbd1d226e60e6a4fd7af47fee03b3990b3f6a8ecbc3ec83eeba06
3
+ size 879030522
trial_0/checkpoint-292/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0df68380976b6dbabd78e6cbea5eb7a814681ecd61ae85ed04b274788a2e922
3
+ size 14244
trial_0/checkpoint-292/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1778d089cccff46f89357266568881d0aa63539ed46bc3a93cfb7fba7fc007c3
3
+ size 1064
trial_0/checkpoint-292/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
trial_0/checkpoint-292/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
trial_0/checkpoint-292/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[MASK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "3": {
20
+ "content": "[UNK]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "4": {
28
+ "content": "[CLS]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "5": {
36
+ "content": "[SEP]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": false,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
trial_0/checkpoint-292/trainer_state.json ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.2140692864641761,
3
+ "best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_0/checkpoint-292",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 292,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03424657534246575,
13
+ "grad_norm": 8.432939529418945,
14
+ "learning_rate": 8.18270817933484e-07,
15
+ "loss": 2.2234,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.0684931506849315,
20
+ "grad_norm": 8.3140230178833,
21
+ "learning_rate": 1.636541635866968e-06,
22
+ "loss": 2.2403,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.10273972602739725,
27
+ "grad_norm": 7.506945610046387,
28
+ "learning_rate": 2.454812453800452e-06,
29
+ "loss": 2.2297,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.136986301369863,
34
+ "grad_norm": 8.531285285949707,
35
+ "learning_rate": 3.273083271733936e-06,
36
+ "loss": 2.1823,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.17123287671232876,
41
+ "grad_norm": 7.951968669891357,
42
+ "learning_rate": 4.091354089667421e-06,
43
+ "loss": 2.1696,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.2054794520547945,
48
+ "grad_norm": 8.328391075134277,
49
+ "learning_rate": 4.909624907600904e-06,
50
+ "loss": 2.1415,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.23972602739726026,
55
+ "grad_norm": 7.337668418884277,
56
+ "learning_rate": 5.727895725534388e-06,
57
+ "loss": 2.1206,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.273972602739726,
62
+ "grad_norm": 7.09800386428833,
63
+ "learning_rate": 6.546166543467872e-06,
64
+ "loss": 2.0949,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.3082191780821918,
69
+ "grad_norm": 6.549777507781982,
70
+ "learning_rate": 7.364437361401356e-06,
71
+ "loss": 2.076,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.3424657534246575,
76
+ "grad_norm": 7.464539051055908,
77
+ "learning_rate": 8.182708179334841e-06,
78
+ "loss": 2.096,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.3767123287671233,
83
+ "grad_norm": 6.982883930206299,
84
+ "learning_rate": 9.000978997268324e-06,
85
+ "loss": 2.0615,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.410958904109589,
90
+ "grad_norm": 7.149113655090332,
91
+ "learning_rate": 9.819249815201808e-06,
92
+ "loss": 2.0785,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.4452054794520548,
97
+ "grad_norm": 6.507534503936768,
98
+ "learning_rate": 1.0637520633135292e-05,
99
+ "loss": 2.0865,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.4794520547945205,
104
+ "grad_norm": 6.1932196617126465,
105
+ "learning_rate": 1.1455791451068777e-05,
106
+ "loss": 2.0533,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.5136986301369864,
111
+ "grad_norm": 7.072916507720947,
112
+ "learning_rate": 1.1783771409556479e-05,
113
+ "loss": 2.0613,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.547945205479452,
118
+ "grad_norm": 8.155049324035645,
119
+ "learning_rate": 1.1621460508598402e-05,
120
+ "loss": 2.0284,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.5821917808219178,
125
+ "grad_norm": 9.3668851852417,
126
+ "learning_rate": 1.1459149607640322e-05,
127
+ "loss": 2.0137,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.6164383561643836,
132
+ "grad_norm": 8.165146827697754,
133
+ "learning_rate": 1.1296838706682245e-05,
134
+ "loss": 1.9835,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.6506849315068494,
139
+ "grad_norm": 9.0431547164917,
140
+ "learning_rate": 1.1134527805724166e-05,
141
+ "loss": 1.9934,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.684931506849315,
146
+ "grad_norm": 8.92784309387207,
147
+ "learning_rate": 1.0972216904766088e-05,
148
+ "loss": 2.0081,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.7191780821917808,
153
+ "grad_norm": 9.184441566467285,
154
+ "learning_rate": 1.0809906003808009e-05,
155
+ "loss": 1.9567,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.7534246575342466,
160
+ "grad_norm": 10.262762069702148,
161
+ "learning_rate": 1.0647595102849931e-05,
162
+ "loss": 1.966,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.7876712328767124,
167
+ "grad_norm": 7.367489337921143,
168
+ "learning_rate": 1.0485284201891854e-05,
169
+ "loss": 2.051,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.821917808219178,
174
+ "grad_norm": 8.76405143737793,
175
+ "learning_rate": 1.0322973300933776e-05,
176
+ "loss": 2.0785,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.8561643835616438,
181
+ "grad_norm": 7.100874900817871,
182
+ "learning_rate": 1.0160662399975697e-05,
183
+ "loss": 1.9977,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.8904109589041096,
188
+ "grad_norm": 8.239387512207031,
189
+ "learning_rate": 9.998351499017618e-06,
190
+ "loss": 2.0099,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.9246575342465754,
195
+ "grad_norm": 7.704412460327148,
196
+ "learning_rate": 9.83604059805954e-06,
197
+ "loss": 1.9851,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.958904109589041,
202
+ "grad_norm": 7.8163652420043945,
203
+ "learning_rate": 9.673729697101463e-06,
204
+ "loss": 2.0085,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.9931506849315068,
209
+ "grad_norm": 8.153071403503418,
210
+ "learning_rate": 9.511418796143383e-06,
211
+ "loss": 1.9962,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 1.0,
216
+ "eval_classification_report": {
217
+ "accuracy": 0.2375,
218
+ "ar": {
219
+ "f1-score": 0.0622568093385214,
220
+ "precision": 0.1568627450980392,
221
+ "recall": 0.038834951456310676,
222
+ "support": 206.0
223
+ },
224
+ "cl": {
225
+ "f1-score": 0.2011173184357542,
226
+ "precision": 0.16901408450704225,
227
+ "recall": 0.2482758620689655,
228
+ "support": 290.0
229
+ },
230
+ "co": {
231
+ "f1-score": 0.31543624161073824,
232
+ "precision": 0.23383084577114427,
233
+ "recall": 0.4845360824742268,
234
+ "support": 291.0
235
+ },
236
+ "es": {
237
+ "f1-score": 0.27364185110663986,
238
+ "precision": 0.3119266055045872,
239
+ "recall": 0.24372759856630824,
240
+ "support": 279.0
241
+ },
242
+ "macro avg": {
243
+ "f1-score": 0.2140692864641761,
244
+ "precision": 0.24922489036795273,
245
+ "recall": 0.22042150190811827,
246
+ "support": 2000.0
247
+ },
248
+ "mx": {
249
+ "f1-score": 0.22950819672131148,
250
+ "precision": 0.28426395939086296,
251
+ "recall": 0.19243986254295534,
252
+ "support": 291.0
253
+ },
254
+ "pe": {
255
+ "f1-score": 0.10610079575596817,
256
+ "precision": 0.23255813953488372,
257
+ "recall": 0.06872852233676977,
258
+ "support": 291.0
259
+ },
260
+ "pr": {
261
+ "f1-score": 0.5030674846625767,
262
+ "precision": 0.6612903225806451,
263
+ "recall": 0.40594059405940597,
264
+ "support": 101.0
265
+ },
266
+ "uy": {
267
+ "f1-score": 0.2354948805460751,
268
+ "precision": 0.19327731092436976,
269
+ "recall": 0.30131004366812225,
270
+ "support": 229.0
271
+ },
272
+ "ve": {
273
+ "f1-score": 0.0,
274
+ "precision": 0.0,
275
+ "recall": 0.0,
276
+ "support": 22.0
277
+ },
278
+ "weighted avg": {
279
+ "f1-score": 0.22084365412222065,
280
+ "precision": 0.24892308331106963,
281
+ "recall": 0.2375,
282
+ "support": 2000.0
283
+ }
284
+ },
285
+ "eval_f1": 0.2140692864641761,
286
+ "eval_loss": 1.964627742767334,
287
+ "eval_runtime": 3.948,
288
+ "eval_samples_per_second": 506.591,
289
+ "eval_steps_per_second": 31.662,
290
+ "step": 292
291
+ }
292
+ ],
293
+ "logging_steps": 10,
294
+ "max_steps": 876,
295
+ "num_input_tokens_seen": 0,
296
+ "num_train_epochs": 3,
297
+ "save_steps": 500,
298
+ "stateful_callbacks": {
299
+ "TrainerControl": {
300
+ "args": {
301
+ "should_epoch_stop": false,
302
+ "should_evaluate": false,
303
+ "should_log": false,
304
+ "should_save": true,
305
+ "should_training_stop": false
306
+ },
307
+ "attributes": {}
308
+ }
309
+ },
310
+ "total_flos": 306938335993344.0,
311
+ "train_batch_size": 16,
312
+ "trial_name": null,
313
+ "trial_params": null
314
+ }
trial_0/checkpoint-292/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf983bab71d39717083fd85c30b7e0fd950e8d457e647f61a7ff596cb390b138
3
+ size 5368
trial_0/checkpoint-292/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
trial_0/checkpoint-584/config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "dccuchile/bert-base-spanish-wwm-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "ar",
14
+ "1": "cl",
15
+ "2": "co",
16
+ "3": "es",
17
+ "4": "mx",
18
+ "5": "pe",
19
+ "6": "pr",
20
+ "7": "uy",
21
+ "8": "ve"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 3072,
25
+ "label2id": {
26
+ "ar": 0,
27
+ "cl": 1,
28
+ "co": 2,
29
+ "es": 3,
30
+ "mx": 4,
31
+ "pe": 5,
32
+ "pr": 6,
33
+ "uy": 7,
34
+ "ve": 8
35
+ },
36
+ "layer_norm_eps": 1e-12,
37
+ "max_position_embeddings": 512,
38
+ "model_type": "bert",
39
+ "num_attention_heads": 12,
40
+ "num_hidden_layers": 12,
41
+ "output_past": true,
42
+ "pad_token_id": 1,
43
+ "position_embedding_type": "absolute",
44
+ "problem_type": "single_label_classification",
45
+ "torch_dtype": "float32",
46
+ "transformers_version": "4.48.3",
47
+ "type_vocab_size": 2,
48
+ "use_cache": true,
49
+ "vocab_size": 31002
50
+ }
trial_0/checkpoint-584/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4bd824eba668c9f99dbdde707cc6253632a9e5404c4c7bef800e5dba0b97408
3
+ size 439454740
trial_0/checkpoint-584/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9e5a12946127f3bcd02446e83b50c11c5b9bd0d42c2769ab3843e2416a6ea45
3
+ size 879030522
trial_0/checkpoint-584/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29298f33af39c877ad364d333a5cea911258abeb5e708833407727da7ed21d36
3
+ size 14244
trial_0/checkpoint-584/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3ae5ac764505bf16f8c0111375ef98ea91c30113225dcd6204117dc37e31e3b
3
+ size 1064
trial_0/checkpoint-584/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
trial_0/checkpoint-584/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
trial_0/checkpoint-584/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[MASK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "3": {
20
+ "content": "[UNK]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "4": {
28
+ "content": "[CLS]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "5": {
36
+ "content": "[SEP]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": false,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
trial_0/checkpoint-584/trainer_state.json ADDED
@@ -0,0 +1,595 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.28404923689784706,
3
+ "best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_0/checkpoint-584",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 584,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03424657534246575,
13
+ "grad_norm": 8.432939529418945,
14
+ "learning_rate": 8.18270817933484e-07,
15
+ "loss": 2.2234,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.0684931506849315,
20
+ "grad_norm": 8.3140230178833,
21
+ "learning_rate": 1.636541635866968e-06,
22
+ "loss": 2.2403,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.10273972602739725,
27
+ "grad_norm": 7.506945610046387,
28
+ "learning_rate": 2.454812453800452e-06,
29
+ "loss": 2.2297,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.136986301369863,
34
+ "grad_norm": 8.531285285949707,
35
+ "learning_rate": 3.273083271733936e-06,
36
+ "loss": 2.1823,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.17123287671232876,
41
+ "grad_norm": 7.951968669891357,
42
+ "learning_rate": 4.091354089667421e-06,
43
+ "loss": 2.1696,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.2054794520547945,
48
+ "grad_norm": 8.328391075134277,
49
+ "learning_rate": 4.909624907600904e-06,
50
+ "loss": 2.1415,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.23972602739726026,
55
+ "grad_norm": 7.337668418884277,
56
+ "learning_rate": 5.727895725534388e-06,
57
+ "loss": 2.1206,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.273972602739726,
62
+ "grad_norm": 7.09800386428833,
63
+ "learning_rate": 6.546166543467872e-06,
64
+ "loss": 2.0949,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.3082191780821918,
69
+ "grad_norm": 6.549777507781982,
70
+ "learning_rate": 7.364437361401356e-06,
71
+ "loss": 2.076,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.3424657534246575,
76
+ "grad_norm": 7.464539051055908,
77
+ "learning_rate": 8.182708179334841e-06,
78
+ "loss": 2.096,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.3767123287671233,
83
+ "grad_norm": 6.982883930206299,
84
+ "learning_rate": 9.000978997268324e-06,
85
+ "loss": 2.0615,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.410958904109589,
90
+ "grad_norm": 7.149113655090332,
91
+ "learning_rate": 9.819249815201808e-06,
92
+ "loss": 2.0785,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.4452054794520548,
97
+ "grad_norm": 6.507534503936768,
98
+ "learning_rate": 1.0637520633135292e-05,
99
+ "loss": 2.0865,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.4794520547945205,
104
+ "grad_norm": 6.1932196617126465,
105
+ "learning_rate": 1.1455791451068777e-05,
106
+ "loss": 2.0533,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.5136986301369864,
111
+ "grad_norm": 7.072916507720947,
112
+ "learning_rate": 1.1783771409556479e-05,
113
+ "loss": 2.0613,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.547945205479452,
118
+ "grad_norm": 8.155049324035645,
119
+ "learning_rate": 1.1621460508598402e-05,
120
+ "loss": 2.0284,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.5821917808219178,
125
+ "grad_norm": 9.3668851852417,
126
+ "learning_rate": 1.1459149607640322e-05,
127
+ "loss": 2.0137,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.6164383561643836,
132
+ "grad_norm": 8.165146827697754,
133
+ "learning_rate": 1.1296838706682245e-05,
134
+ "loss": 1.9835,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.6506849315068494,
139
+ "grad_norm": 9.0431547164917,
140
+ "learning_rate": 1.1134527805724166e-05,
141
+ "loss": 1.9934,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.684931506849315,
146
+ "grad_norm": 8.92784309387207,
147
+ "learning_rate": 1.0972216904766088e-05,
148
+ "loss": 2.0081,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.7191780821917808,
153
+ "grad_norm": 9.184441566467285,
154
+ "learning_rate": 1.0809906003808009e-05,
155
+ "loss": 1.9567,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.7534246575342466,
160
+ "grad_norm": 10.262762069702148,
161
+ "learning_rate": 1.0647595102849931e-05,
162
+ "loss": 1.966,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.7876712328767124,
167
+ "grad_norm": 7.367489337921143,
168
+ "learning_rate": 1.0485284201891854e-05,
169
+ "loss": 2.051,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.821917808219178,
174
+ "grad_norm": 8.76405143737793,
175
+ "learning_rate": 1.0322973300933776e-05,
176
+ "loss": 2.0785,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.8561643835616438,
181
+ "grad_norm": 7.100874900817871,
182
+ "learning_rate": 1.0160662399975697e-05,
183
+ "loss": 1.9977,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.8904109589041096,
188
+ "grad_norm": 8.239387512207031,
189
+ "learning_rate": 9.998351499017618e-06,
190
+ "loss": 2.0099,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.9246575342465754,
195
+ "grad_norm": 7.704412460327148,
196
+ "learning_rate": 9.83604059805954e-06,
197
+ "loss": 1.9851,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.958904109589041,
202
+ "grad_norm": 7.8163652420043945,
203
+ "learning_rate": 9.673729697101463e-06,
204
+ "loss": 2.0085,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.9931506849315068,
209
+ "grad_norm": 8.153071403503418,
210
+ "learning_rate": 9.511418796143383e-06,
211
+ "loss": 1.9962,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 1.0,
216
+ "eval_classification_report": {
217
+ "accuracy": 0.2375,
218
+ "ar": {
219
+ "f1-score": 0.0622568093385214,
220
+ "precision": 0.1568627450980392,
221
+ "recall": 0.038834951456310676,
222
+ "support": 206.0
223
+ },
224
+ "cl": {
225
+ "f1-score": 0.2011173184357542,
226
+ "precision": 0.16901408450704225,
227
+ "recall": 0.2482758620689655,
228
+ "support": 290.0
229
+ },
230
+ "co": {
231
+ "f1-score": 0.31543624161073824,
232
+ "precision": 0.23383084577114427,
233
+ "recall": 0.4845360824742268,
234
+ "support": 291.0
235
+ },
236
+ "es": {
237
+ "f1-score": 0.27364185110663986,
238
+ "precision": 0.3119266055045872,
239
+ "recall": 0.24372759856630824,
240
+ "support": 279.0
241
+ },
242
+ "macro avg": {
243
+ "f1-score": 0.2140692864641761,
244
+ "precision": 0.24922489036795273,
245
+ "recall": 0.22042150190811827,
246
+ "support": 2000.0
247
+ },
248
+ "mx": {
249
+ "f1-score": 0.22950819672131148,
250
+ "precision": 0.28426395939086296,
251
+ "recall": 0.19243986254295534,
252
+ "support": 291.0
253
+ },
254
+ "pe": {
255
+ "f1-score": 0.10610079575596817,
256
+ "precision": 0.23255813953488372,
257
+ "recall": 0.06872852233676977,
258
+ "support": 291.0
259
+ },
260
+ "pr": {
261
+ "f1-score": 0.5030674846625767,
262
+ "precision": 0.6612903225806451,
263
+ "recall": 0.40594059405940597,
264
+ "support": 101.0
265
+ },
266
+ "uy": {
267
+ "f1-score": 0.2354948805460751,
268
+ "precision": 0.19327731092436976,
269
+ "recall": 0.30131004366812225,
270
+ "support": 229.0
271
+ },
272
+ "ve": {
273
+ "f1-score": 0.0,
274
+ "precision": 0.0,
275
+ "recall": 0.0,
276
+ "support": 22.0
277
+ },
278
+ "weighted avg": {
279
+ "f1-score": 0.22084365412222065,
280
+ "precision": 0.24892308331106963,
281
+ "recall": 0.2375,
282
+ "support": 2000.0
283
+ }
284
+ },
285
+ "eval_f1": 0.2140692864641761,
286
+ "eval_loss": 1.964627742767334,
287
+ "eval_runtime": 3.948,
288
+ "eval_samples_per_second": 506.591,
289
+ "eval_steps_per_second": 31.662,
290
+ "step": 292
291
+ },
292
+ {
293
+ "epoch": 1.0273972602739727,
294
+ "grad_norm": 7.403143882751465,
295
+ "learning_rate": 9.349107895185306e-06,
296
+ "loss": 1.9144,
297
+ "step": 300
298
+ },
299
+ {
300
+ "epoch": 1.0616438356164384,
301
+ "grad_norm": 7.673084735870361,
302
+ "learning_rate": 9.186796994227228e-06,
303
+ "loss": 1.8783,
304
+ "step": 310
305
+ },
306
+ {
307
+ "epoch": 1.095890410958904,
308
+ "grad_norm": 7.875649452209473,
309
+ "learning_rate": 9.024486093269149e-06,
310
+ "loss": 1.9275,
311
+ "step": 320
312
+ },
313
+ {
314
+ "epoch": 1.13013698630137,
315
+ "grad_norm": 8.519344329833984,
316
+ "learning_rate": 8.86217519231107e-06,
317
+ "loss": 1.9603,
318
+ "step": 330
319
+ },
320
+ {
321
+ "epoch": 1.1643835616438356,
322
+ "grad_norm": 11.323283195495605,
323
+ "learning_rate": 8.699864291352992e-06,
324
+ "loss": 1.923,
325
+ "step": 340
326
+ },
327
+ {
328
+ "epoch": 1.1986301369863013,
329
+ "grad_norm": 8.269103050231934,
330
+ "learning_rate": 8.537553390394915e-06,
331
+ "loss": 1.9427,
332
+ "step": 350
333
+ },
334
+ {
335
+ "epoch": 1.2328767123287672,
336
+ "grad_norm": 9.630481719970703,
337
+ "learning_rate": 8.375242489436837e-06,
338
+ "loss": 1.8944,
339
+ "step": 360
340
+ },
341
+ {
342
+ "epoch": 1.2671232876712328,
343
+ "grad_norm": 10.00069808959961,
344
+ "learning_rate": 8.212931588478758e-06,
345
+ "loss": 1.8854,
346
+ "step": 370
347
+ },
348
+ {
349
+ "epoch": 1.3013698630136985,
350
+ "grad_norm": 10.448683738708496,
351
+ "learning_rate": 8.05062068752068e-06,
352
+ "loss": 1.9484,
353
+ "step": 380
354
+ },
355
+ {
356
+ "epoch": 1.3356164383561644,
357
+ "grad_norm": 9.772107124328613,
358
+ "learning_rate": 7.888309786562601e-06,
359
+ "loss": 1.8663,
360
+ "step": 390
361
+ },
362
+ {
363
+ "epoch": 1.36986301369863,
364
+ "grad_norm": 10.358853340148926,
365
+ "learning_rate": 7.725998885604523e-06,
366
+ "loss": 1.9082,
367
+ "step": 400
368
+ },
369
+ {
370
+ "epoch": 1.404109589041096,
371
+ "grad_norm": 11.836031913757324,
372
+ "learning_rate": 7.563687984646444e-06,
373
+ "loss": 1.881,
374
+ "step": 410
375
+ },
376
+ {
377
+ "epoch": 1.4383561643835616,
378
+ "grad_norm": 11.461163520812988,
379
+ "learning_rate": 7.401377083688367e-06,
380
+ "loss": 1.964,
381
+ "step": 420
382
+ },
383
+ {
384
+ "epoch": 1.4726027397260273,
385
+ "grad_norm": 9.836813926696777,
386
+ "learning_rate": 7.239066182730289e-06,
387
+ "loss": 1.8552,
388
+ "step": 430
389
+ },
390
+ {
391
+ "epoch": 1.5068493150684932,
392
+ "grad_norm": 12.651612281799316,
393
+ "learning_rate": 7.076755281772211e-06,
394
+ "loss": 1.8211,
395
+ "step": 440
396
+ },
397
+ {
398
+ "epoch": 1.541095890410959,
399
+ "grad_norm": 10.353775024414062,
400
+ "learning_rate": 6.9144443808141315e-06,
401
+ "loss": 1.905,
402
+ "step": 450
403
+ },
404
+ {
405
+ "epoch": 1.5753424657534247,
406
+ "grad_norm": 9.688141822814941,
407
+ "learning_rate": 6.752133479856054e-06,
408
+ "loss": 1.8525,
409
+ "step": 460
410
+ },
411
+ {
412
+ "epoch": 1.6095890410958904,
413
+ "grad_norm": 10.979829788208008,
414
+ "learning_rate": 6.5898225788979756e-06,
415
+ "loss": 1.8468,
416
+ "step": 470
417
+ },
418
+ {
419
+ "epoch": 1.643835616438356,
420
+ "grad_norm": 11.913898468017578,
421
+ "learning_rate": 6.427511677939898e-06,
422
+ "loss": 1.8301,
423
+ "step": 480
424
+ },
425
+ {
426
+ "epoch": 1.678082191780822,
427
+ "grad_norm": 9.506439208984375,
428
+ "learning_rate": 6.265200776981819e-06,
429
+ "loss": 1.9175,
430
+ "step": 490
431
+ },
432
+ {
433
+ "epoch": 1.7123287671232876,
434
+ "grad_norm": 11.259939193725586,
435
+ "learning_rate": 6.102889876023741e-06,
436
+ "loss": 1.8674,
437
+ "step": 500
438
+ },
439
+ {
440
+ "epoch": 1.7465753424657535,
441
+ "grad_norm": 10.506684303283691,
442
+ "learning_rate": 5.940578975065663e-06,
443
+ "loss": 1.8201,
444
+ "step": 510
445
+ },
446
+ {
447
+ "epoch": 1.7808219178082192,
448
+ "grad_norm": 10.255843162536621,
449
+ "learning_rate": 5.7782680741075844e-06,
450
+ "loss": 1.83,
451
+ "step": 520
452
+ },
453
+ {
454
+ "epoch": 1.8150684931506849,
455
+ "grad_norm": 10.053559303283691,
456
+ "learning_rate": 5.615957173149507e-06,
457
+ "loss": 1.8929,
458
+ "step": 530
459
+ },
460
+ {
461
+ "epoch": 1.8493150684931505,
462
+ "grad_norm": 11.122831344604492,
463
+ "learning_rate": 5.4536462721914285e-06,
464
+ "loss": 1.8836,
465
+ "step": 540
466
+ },
467
+ {
468
+ "epoch": 1.8835616438356164,
469
+ "grad_norm": 10.520340919494629,
470
+ "learning_rate": 5.29133537123335e-06,
471
+ "loss": 1.8587,
472
+ "step": 550
473
+ },
474
+ {
475
+ "epoch": 1.9178082191780823,
476
+ "grad_norm": 10.444989204406738,
477
+ "learning_rate": 5.129024470275272e-06,
478
+ "loss": 1.9133,
479
+ "step": 560
480
+ },
481
+ {
482
+ "epoch": 1.952054794520548,
483
+ "grad_norm": 9.19580078125,
484
+ "learning_rate": 4.966713569317194e-06,
485
+ "loss": 1.7403,
486
+ "step": 570
487
+ },
488
+ {
489
+ "epoch": 1.9863013698630136,
490
+ "grad_norm": 9.467530250549316,
491
+ "learning_rate": 4.804402668359115e-06,
492
+ "loss": 1.854,
493
+ "step": 580
494
+ },
495
+ {
496
+ "epoch": 2.0,
497
+ "eval_classification_report": {
498
+ "accuracy": 0.3015,
499
+ "ar": {
500
+ "f1-score": 0.20817843866171004,
501
+ "precision": 0.4444444444444444,
502
+ "recall": 0.13592233009708737,
503
+ "support": 206.0
504
+ },
505
+ "cl": {
506
+ "f1-score": 0.24404761904761904,
507
+ "precision": 0.21465968586387435,
508
+ "recall": 0.2827586206896552,
509
+ "support": 290.0
510
+ },
511
+ "co": {
512
+ "f1-score": 0.3091190108191654,
513
+ "precision": 0.2808988764044944,
514
+ "recall": 0.3436426116838488,
515
+ "support": 291.0
516
+ },
517
+ "es": {
518
+ "f1-score": 0.36363636363636365,
519
+ "precision": 0.31891891891891894,
520
+ "recall": 0.4229390681003584,
521
+ "support": 279.0
522
+ },
523
+ "macro avg": {
524
+ "f1-score": 0.28404923689784706,
525
+ "precision": 0.313232548351025,
526
+ "recall": 0.28845705885930073,
527
+ "support": 2000.0
528
+ },
529
+ "mx": {
530
+ "f1-score": 0.3058350100603622,
531
+ "precision": 0.36893203883495146,
532
+ "recall": 0.2611683848797251,
533
+ "support": 291.0
534
+ },
535
+ "pe": {
536
+ "f1-score": 0.23582089552238805,
537
+ "precision": 0.20844327176781002,
538
+ "recall": 0.27147766323024053,
539
+ "support": 291.0
540
+ },
541
+ "pr": {
542
+ "f1-score": 0.5688888888888889,
543
+ "precision": 0.5161290322580645,
544
+ "recall": 0.6336633663366337,
545
+ "support": 101.0
546
+ },
547
+ "uy": {
548
+ "f1-score": 0.3209169054441261,
549
+ "precision": 0.4666666666666667,
550
+ "recall": 0.2445414847161572,
551
+ "support": 229.0
552
+ },
553
+ "ve": {
554
+ "f1-score": 0.0,
555
+ "precision": 0.0,
556
+ "recall": 0.0,
557
+ "support": 22.0
558
+ },
559
+ "weighted avg": {
560
+ "f1-score": 0.29681818157005363,
561
+ "precision": 0.3257693650891501,
562
+ "recall": 0.3015,
563
+ "support": 2000.0
564
+ }
565
+ },
566
+ "eval_f1": 0.28404923689784706,
567
+ "eval_loss": 1.8588982820510864,
568
+ "eval_runtime": 3.9624,
569
+ "eval_samples_per_second": 504.741,
570
+ "eval_steps_per_second": 31.546,
571
+ "step": 584
572
+ }
573
+ ],
574
+ "logging_steps": 10,
575
+ "max_steps": 876,
576
+ "num_input_tokens_seen": 0,
577
+ "num_train_epochs": 3,
578
+ "save_steps": 500,
579
+ "stateful_callbacks": {
580
+ "TrainerControl": {
581
+ "args": {
582
+ "should_epoch_stop": false,
583
+ "should_evaluate": false,
584
+ "should_log": false,
585
+ "should_save": true,
586
+ "should_training_stop": false
587
+ },
588
+ "attributes": {}
589
+ }
590
+ },
591
+ "total_flos": 613876671986688.0,
592
+ "train_batch_size": 16,
593
+ "trial_name": null,
594
+ "trial_params": null
595
+ }
trial_0/checkpoint-584/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf983bab71d39717083fd85c30b7e0fd950e8d457e647f61a7ff596cb390b138
3
+ size 5368
trial_0/checkpoint-584/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
trial_0/checkpoint-876/config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "dccuchile/bert-base-spanish-wwm-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "ar",
14
+ "1": "cl",
15
+ "2": "co",
16
+ "3": "es",
17
+ "4": "mx",
18
+ "5": "pe",
19
+ "6": "pr",
20
+ "7": "uy",
21
+ "8": "ve"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 3072,
25
+ "label2id": {
26
+ "ar": 0,
27
+ "cl": 1,
28
+ "co": 2,
29
+ "es": 3,
30
+ "mx": 4,
31
+ "pe": 5,
32
+ "pr": 6,
33
+ "uy": 7,
34
+ "ve": 8
35
+ },
36
+ "layer_norm_eps": 1e-12,
37
+ "max_position_embeddings": 512,
38
+ "model_type": "bert",
39
+ "num_attention_heads": 12,
40
+ "num_hidden_layers": 12,
41
+ "output_past": true,
42
+ "pad_token_id": 1,
43
+ "position_embedding_type": "absolute",
44
+ "problem_type": "single_label_classification",
45
+ "torch_dtype": "float32",
46
+ "transformers_version": "4.48.3",
47
+ "type_vocab_size": 2,
48
+ "use_cache": true,
49
+ "vocab_size": 31002
50
+ }
trial_0/checkpoint-876/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf854bf20a145ed4aea50832d905cbc3b7c185076aa225249307cba72e17e404
3
+ size 439454740
trial_0/checkpoint-876/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a57e2c02488f59dc935a6f5d7b68b0210f98125fd0c15d37ff039eb0f9a9cb9e
3
+ size 879030522
trial_0/checkpoint-876/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5ec19cccb67f774cb5d9d70fcb09d0964e6b44cd2249c3f0f947bc6c27a2d1c
3
+ size 14244
trial_0/checkpoint-876/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f3e075f9e406a57a0eebb4f878639933cb4c2996bf03eaa3ac48b55e42597a6
3
+ size 1064
trial_0/checkpoint-876/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
trial_0/checkpoint-876/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
trial_0/checkpoint-876/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[MASK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "3": {
20
+ "content": "[UNK]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "4": {
28
+ "content": "[CLS]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "5": {
36
+ "content": "[SEP]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": false,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
trial_0/checkpoint-876/trainer_state.json ADDED
@@ -0,0 +1,876 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.30473656895821705,
3
+ "best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_0/checkpoint-876",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 876,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03424657534246575,
13
+ "grad_norm": 8.432939529418945,
14
+ "learning_rate": 8.18270817933484e-07,
15
+ "loss": 2.2234,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.0684931506849315,
20
+ "grad_norm": 8.3140230178833,
21
+ "learning_rate": 1.636541635866968e-06,
22
+ "loss": 2.2403,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.10273972602739725,
27
+ "grad_norm": 7.506945610046387,
28
+ "learning_rate": 2.454812453800452e-06,
29
+ "loss": 2.2297,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.136986301369863,
34
+ "grad_norm": 8.531285285949707,
35
+ "learning_rate": 3.273083271733936e-06,
36
+ "loss": 2.1823,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.17123287671232876,
41
+ "grad_norm": 7.951968669891357,
42
+ "learning_rate": 4.091354089667421e-06,
43
+ "loss": 2.1696,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.2054794520547945,
48
+ "grad_norm": 8.328391075134277,
49
+ "learning_rate": 4.909624907600904e-06,
50
+ "loss": 2.1415,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.23972602739726026,
55
+ "grad_norm": 7.337668418884277,
56
+ "learning_rate": 5.727895725534388e-06,
57
+ "loss": 2.1206,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.273972602739726,
62
+ "grad_norm": 7.09800386428833,
63
+ "learning_rate": 6.546166543467872e-06,
64
+ "loss": 2.0949,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.3082191780821918,
69
+ "grad_norm": 6.549777507781982,
70
+ "learning_rate": 7.364437361401356e-06,
71
+ "loss": 2.076,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.3424657534246575,
76
+ "grad_norm": 7.464539051055908,
77
+ "learning_rate": 8.182708179334841e-06,
78
+ "loss": 2.096,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.3767123287671233,
83
+ "grad_norm": 6.982883930206299,
84
+ "learning_rate": 9.000978997268324e-06,
85
+ "loss": 2.0615,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.410958904109589,
90
+ "grad_norm": 7.149113655090332,
91
+ "learning_rate": 9.819249815201808e-06,
92
+ "loss": 2.0785,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.4452054794520548,
97
+ "grad_norm": 6.507534503936768,
98
+ "learning_rate": 1.0637520633135292e-05,
99
+ "loss": 2.0865,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.4794520547945205,
104
+ "grad_norm": 6.1932196617126465,
105
+ "learning_rate": 1.1455791451068777e-05,
106
+ "loss": 2.0533,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.5136986301369864,
111
+ "grad_norm": 7.072916507720947,
112
+ "learning_rate": 1.1783771409556479e-05,
113
+ "loss": 2.0613,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.547945205479452,
118
+ "grad_norm": 8.155049324035645,
119
+ "learning_rate": 1.1621460508598402e-05,
120
+ "loss": 2.0284,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.5821917808219178,
125
+ "grad_norm": 9.3668851852417,
126
+ "learning_rate": 1.1459149607640322e-05,
127
+ "loss": 2.0137,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.6164383561643836,
132
+ "grad_norm": 8.165146827697754,
133
+ "learning_rate": 1.1296838706682245e-05,
134
+ "loss": 1.9835,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.6506849315068494,
139
+ "grad_norm": 9.0431547164917,
140
+ "learning_rate": 1.1134527805724166e-05,
141
+ "loss": 1.9934,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.684931506849315,
146
+ "grad_norm": 8.92784309387207,
147
+ "learning_rate": 1.0972216904766088e-05,
148
+ "loss": 2.0081,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.7191780821917808,
153
+ "grad_norm": 9.184441566467285,
154
+ "learning_rate": 1.0809906003808009e-05,
155
+ "loss": 1.9567,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.7534246575342466,
160
+ "grad_norm": 10.262762069702148,
161
+ "learning_rate": 1.0647595102849931e-05,
162
+ "loss": 1.966,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.7876712328767124,
167
+ "grad_norm": 7.367489337921143,
168
+ "learning_rate": 1.0485284201891854e-05,
169
+ "loss": 2.051,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.821917808219178,
174
+ "grad_norm": 8.76405143737793,
175
+ "learning_rate": 1.0322973300933776e-05,
176
+ "loss": 2.0785,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.8561643835616438,
181
+ "grad_norm": 7.100874900817871,
182
+ "learning_rate": 1.0160662399975697e-05,
183
+ "loss": 1.9977,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.8904109589041096,
188
+ "grad_norm": 8.239387512207031,
189
+ "learning_rate": 9.998351499017618e-06,
190
+ "loss": 2.0099,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.9246575342465754,
195
+ "grad_norm": 7.704412460327148,
196
+ "learning_rate": 9.83604059805954e-06,
197
+ "loss": 1.9851,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.958904109589041,
202
+ "grad_norm": 7.8163652420043945,
203
+ "learning_rate": 9.673729697101463e-06,
204
+ "loss": 2.0085,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.9931506849315068,
209
+ "grad_norm": 8.153071403503418,
210
+ "learning_rate": 9.511418796143383e-06,
211
+ "loss": 1.9962,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 1.0,
216
+ "eval_classification_report": {
217
+ "accuracy": 0.2375,
218
+ "ar": {
219
+ "f1-score": 0.0622568093385214,
220
+ "precision": 0.1568627450980392,
221
+ "recall": 0.038834951456310676,
222
+ "support": 206.0
223
+ },
224
+ "cl": {
225
+ "f1-score": 0.2011173184357542,
226
+ "precision": 0.16901408450704225,
227
+ "recall": 0.2482758620689655,
228
+ "support": 290.0
229
+ },
230
+ "co": {
231
+ "f1-score": 0.31543624161073824,
232
+ "precision": 0.23383084577114427,
233
+ "recall": 0.4845360824742268,
234
+ "support": 291.0
235
+ },
236
+ "es": {
237
+ "f1-score": 0.27364185110663986,
238
+ "precision": 0.3119266055045872,
239
+ "recall": 0.24372759856630824,
240
+ "support": 279.0
241
+ },
242
+ "macro avg": {
243
+ "f1-score": 0.2140692864641761,
244
+ "precision": 0.24922489036795273,
245
+ "recall": 0.22042150190811827,
246
+ "support": 2000.0
247
+ },
248
+ "mx": {
249
+ "f1-score": 0.22950819672131148,
250
+ "precision": 0.28426395939086296,
251
+ "recall": 0.19243986254295534,
252
+ "support": 291.0
253
+ },
254
+ "pe": {
255
+ "f1-score": 0.10610079575596817,
256
+ "precision": 0.23255813953488372,
257
+ "recall": 0.06872852233676977,
258
+ "support": 291.0
259
+ },
260
+ "pr": {
261
+ "f1-score": 0.5030674846625767,
262
+ "precision": 0.6612903225806451,
263
+ "recall": 0.40594059405940597,
264
+ "support": 101.0
265
+ },
266
+ "uy": {
267
+ "f1-score": 0.2354948805460751,
268
+ "precision": 0.19327731092436976,
269
+ "recall": 0.30131004366812225,
270
+ "support": 229.0
271
+ },
272
+ "ve": {
273
+ "f1-score": 0.0,
274
+ "precision": 0.0,
275
+ "recall": 0.0,
276
+ "support": 22.0
277
+ },
278
+ "weighted avg": {
279
+ "f1-score": 0.22084365412222065,
280
+ "precision": 0.24892308331106963,
281
+ "recall": 0.2375,
282
+ "support": 2000.0
283
+ }
284
+ },
285
+ "eval_f1": 0.2140692864641761,
286
+ "eval_loss": 1.964627742767334,
287
+ "eval_runtime": 3.948,
288
+ "eval_samples_per_second": 506.591,
289
+ "eval_steps_per_second": 31.662,
290
+ "step": 292
291
+ },
292
+ {
293
+ "epoch": 1.0273972602739727,
294
+ "grad_norm": 7.403143882751465,
295
+ "learning_rate": 9.349107895185306e-06,
296
+ "loss": 1.9144,
297
+ "step": 300
298
+ },
299
+ {
300
+ "epoch": 1.0616438356164384,
301
+ "grad_norm": 7.673084735870361,
302
+ "learning_rate": 9.186796994227228e-06,
303
+ "loss": 1.8783,
304
+ "step": 310
305
+ },
306
+ {
307
+ "epoch": 1.095890410958904,
308
+ "grad_norm": 7.875649452209473,
309
+ "learning_rate": 9.024486093269149e-06,
310
+ "loss": 1.9275,
311
+ "step": 320
312
+ },
313
+ {
314
+ "epoch": 1.13013698630137,
315
+ "grad_norm": 8.519344329833984,
316
+ "learning_rate": 8.86217519231107e-06,
317
+ "loss": 1.9603,
318
+ "step": 330
319
+ },
320
+ {
321
+ "epoch": 1.1643835616438356,
322
+ "grad_norm": 11.323283195495605,
323
+ "learning_rate": 8.699864291352992e-06,
324
+ "loss": 1.923,
325
+ "step": 340
326
+ },
327
+ {
328
+ "epoch": 1.1986301369863013,
329
+ "grad_norm": 8.269103050231934,
330
+ "learning_rate": 8.537553390394915e-06,
331
+ "loss": 1.9427,
332
+ "step": 350
333
+ },
334
+ {
335
+ "epoch": 1.2328767123287672,
336
+ "grad_norm": 9.630481719970703,
337
+ "learning_rate": 8.375242489436837e-06,
338
+ "loss": 1.8944,
339
+ "step": 360
340
+ },
341
+ {
342
+ "epoch": 1.2671232876712328,
343
+ "grad_norm": 10.00069808959961,
344
+ "learning_rate": 8.212931588478758e-06,
345
+ "loss": 1.8854,
346
+ "step": 370
347
+ },
348
+ {
349
+ "epoch": 1.3013698630136985,
350
+ "grad_norm": 10.448683738708496,
351
+ "learning_rate": 8.05062068752068e-06,
352
+ "loss": 1.9484,
353
+ "step": 380
354
+ },
355
+ {
356
+ "epoch": 1.3356164383561644,
357
+ "grad_norm": 9.772107124328613,
358
+ "learning_rate": 7.888309786562601e-06,
359
+ "loss": 1.8663,
360
+ "step": 390
361
+ },
362
+ {
363
+ "epoch": 1.36986301369863,
364
+ "grad_norm": 10.358853340148926,
365
+ "learning_rate": 7.725998885604523e-06,
366
+ "loss": 1.9082,
367
+ "step": 400
368
+ },
369
+ {
370
+ "epoch": 1.404109589041096,
371
+ "grad_norm": 11.836031913757324,
372
+ "learning_rate": 7.563687984646444e-06,
373
+ "loss": 1.881,
374
+ "step": 410
375
+ },
376
+ {
377
+ "epoch": 1.4383561643835616,
378
+ "grad_norm": 11.461163520812988,
379
+ "learning_rate": 7.401377083688367e-06,
380
+ "loss": 1.964,
381
+ "step": 420
382
+ },
383
+ {
384
+ "epoch": 1.4726027397260273,
385
+ "grad_norm": 9.836813926696777,
386
+ "learning_rate": 7.239066182730289e-06,
387
+ "loss": 1.8552,
388
+ "step": 430
389
+ },
390
+ {
391
+ "epoch": 1.5068493150684932,
392
+ "grad_norm": 12.651612281799316,
393
+ "learning_rate": 7.076755281772211e-06,
394
+ "loss": 1.8211,
395
+ "step": 440
396
+ },
397
+ {
398
+ "epoch": 1.541095890410959,
399
+ "grad_norm": 10.353775024414062,
400
+ "learning_rate": 6.9144443808141315e-06,
401
+ "loss": 1.905,
402
+ "step": 450
403
+ },
404
+ {
405
+ "epoch": 1.5753424657534247,
406
+ "grad_norm": 9.688141822814941,
407
+ "learning_rate": 6.752133479856054e-06,
408
+ "loss": 1.8525,
409
+ "step": 460
410
+ },
411
+ {
412
+ "epoch": 1.6095890410958904,
413
+ "grad_norm": 10.979829788208008,
414
+ "learning_rate": 6.5898225788979756e-06,
415
+ "loss": 1.8468,
416
+ "step": 470
417
+ },
418
+ {
419
+ "epoch": 1.643835616438356,
420
+ "grad_norm": 11.913898468017578,
421
+ "learning_rate": 6.427511677939898e-06,
422
+ "loss": 1.8301,
423
+ "step": 480
424
+ },
425
+ {
426
+ "epoch": 1.678082191780822,
427
+ "grad_norm": 9.506439208984375,
428
+ "learning_rate": 6.265200776981819e-06,
429
+ "loss": 1.9175,
430
+ "step": 490
431
+ },
432
+ {
433
+ "epoch": 1.7123287671232876,
434
+ "grad_norm": 11.259939193725586,
435
+ "learning_rate": 6.102889876023741e-06,
436
+ "loss": 1.8674,
437
+ "step": 500
438
+ },
439
+ {
440
+ "epoch": 1.7465753424657535,
441
+ "grad_norm": 10.506684303283691,
442
+ "learning_rate": 5.940578975065663e-06,
443
+ "loss": 1.8201,
444
+ "step": 510
445
+ },
446
+ {
447
+ "epoch": 1.7808219178082192,
448
+ "grad_norm": 10.255843162536621,
449
+ "learning_rate": 5.7782680741075844e-06,
450
+ "loss": 1.83,
451
+ "step": 520
452
+ },
453
+ {
454
+ "epoch": 1.8150684931506849,
455
+ "grad_norm": 10.053559303283691,
456
+ "learning_rate": 5.615957173149507e-06,
457
+ "loss": 1.8929,
458
+ "step": 530
459
+ },
460
+ {
461
+ "epoch": 1.8493150684931505,
462
+ "grad_norm": 11.122831344604492,
463
+ "learning_rate": 5.4536462721914285e-06,
464
+ "loss": 1.8836,
465
+ "step": 540
466
+ },
467
+ {
468
+ "epoch": 1.8835616438356164,
469
+ "grad_norm": 10.520340919494629,
470
+ "learning_rate": 5.29133537123335e-06,
471
+ "loss": 1.8587,
472
+ "step": 550
473
+ },
474
+ {
475
+ "epoch": 1.9178082191780823,
476
+ "grad_norm": 10.444989204406738,
477
+ "learning_rate": 5.129024470275272e-06,
478
+ "loss": 1.9133,
479
+ "step": 560
480
+ },
481
+ {
482
+ "epoch": 1.952054794520548,
483
+ "grad_norm": 9.19580078125,
484
+ "learning_rate": 4.966713569317194e-06,
485
+ "loss": 1.7403,
486
+ "step": 570
487
+ },
488
+ {
489
+ "epoch": 1.9863013698630136,
490
+ "grad_norm": 9.467530250549316,
491
+ "learning_rate": 4.804402668359115e-06,
492
+ "loss": 1.854,
493
+ "step": 580
494
+ },
495
+ {
496
+ "epoch": 2.0,
497
+ "eval_classification_report": {
498
+ "accuracy": 0.3015,
499
+ "ar": {
500
+ "f1-score": 0.20817843866171004,
501
+ "precision": 0.4444444444444444,
502
+ "recall": 0.13592233009708737,
503
+ "support": 206.0
504
+ },
505
+ "cl": {
506
+ "f1-score": 0.24404761904761904,
507
+ "precision": 0.21465968586387435,
508
+ "recall": 0.2827586206896552,
509
+ "support": 290.0
510
+ },
511
+ "co": {
512
+ "f1-score": 0.3091190108191654,
513
+ "precision": 0.2808988764044944,
514
+ "recall": 0.3436426116838488,
515
+ "support": 291.0
516
+ },
517
+ "es": {
518
+ "f1-score": 0.36363636363636365,
519
+ "precision": 0.31891891891891894,
520
+ "recall": 0.4229390681003584,
521
+ "support": 279.0
522
+ },
523
+ "macro avg": {
524
+ "f1-score": 0.28404923689784706,
525
+ "precision": 0.313232548351025,
526
+ "recall": 0.28845705885930073,
527
+ "support": 2000.0
528
+ },
529
+ "mx": {
530
+ "f1-score": 0.3058350100603622,
531
+ "precision": 0.36893203883495146,
532
+ "recall": 0.2611683848797251,
533
+ "support": 291.0
534
+ },
535
+ "pe": {
536
+ "f1-score": 0.23582089552238805,
537
+ "precision": 0.20844327176781002,
538
+ "recall": 0.27147766323024053,
539
+ "support": 291.0
540
+ },
541
+ "pr": {
542
+ "f1-score": 0.5688888888888889,
543
+ "precision": 0.5161290322580645,
544
+ "recall": 0.6336633663366337,
545
+ "support": 101.0
546
+ },
547
+ "uy": {
548
+ "f1-score": 0.3209169054441261,
549
+ "precision": 0.4666666666666667,
550
+ "recall": 0.2445414847161572,
551
+ "support": 229.0
552
+ },
553
+ "ve": {
554
+ "f1-score": 0.0,
555
+ "precision": 0.0,
556
+ "recall": 0.0,
557
+ "support": 22.0
558
+ },
559
+ "weighted avg": {
560
+ "f1-score": 0.29681818157005363,
561
+ "precision": 0.3257693650891501,
562
+ "recall": 0.3015,
563
+ "support": 2000.0
564
+ }
565
+ },
566
+ "eval_f1": 0.28404923689784706,
567
+ "eval_loss": 1.8588982820510864,
568
+ "eval_runtime": 3.9624,
569
+ "eval_samples_per_second": 504.741,
570
+ "eval_steps_per_second": 31.546,
571
+ "step": 584
572
+ },
573
+ {
574
+ "epoch": 2.0205479452054793,
575
+ "grad_norm": 10.664342880249023,
576
+ "learning_rate": 4.642091767401037e-06,
577
+ "loss": 1.7675,
578
+ "step": 590
579
+ },
580
+ {
581
+ "epoch": 2.0547945205479454,
582
+ "grad_norm": 10.18624210357666,
583
+ "learning_rate": 4.479780866442959e-06,
584
+ "loss": 1.7303,
585
+ "step": 600
586
+ },
587
+ {
588
+ "epoch": 2.089041095890411,
589
+ "grad_norm": 9.922106742858887,
590
+ "learning_rate": 4.3174699654848806e-06,
591
+ "loss": 1.6809,
592
+ "step": 610
593
+ },
594
+ {
595
+ "epoch": 2.1232876712328768,
596
+ "grad_norm": 10.778169631958008,
597
+ "learning_rate": 4.155159064526802e-06,
598
+ "loss": 1.6858,
599
+ "step": 620
600
+ },
601
+ {
602
+ "epoch": 2.1575342465753424,
603
+ "grad_norm": 11.85101318359375,
604
+ "learning_rate": 3.992848163568725e-06,
605
+ "loss": 1.7164,
606
+ "step": 630
607
+ },
608
+ {
609
+ "epoch": 2.191780821917808,
610
+ "grad_norm": 10.55038833618164,
611
+ "learning_rate": 3.830537262610646e-06,
612
+ "loss": 1.7627,
613
+ "step": 640
614
+ },
615
+ {
616
+ "epoch": 2.2260273972602738,
617
+ "grad_norm": 12.37063217163086,
618
+ "learning_rate": 3.6682263616525683e-06,
619
+ "loss": 1.7254,
620
+ "step": 650
621
+ },
622
+ {
623
+ "epoch": 2.26027397260274,
624
+ "grad_norm": 12.823184967041016,
625
+ "learning_rate": 3.5059154606944894e-06,
626
+ "loss": 1.6745,
627
+ "step": 660
628
+ },
629
+ {
630
+ "epoch": 2.2945205479452055,
631
+ "grad_norm": 10.982261657714844,
632
+ "learning_rate": 3.3436045597364115e-06,
633
+ "loss": 1.6329,
634
+ "step": 670
635
+ },
636
+ {
637
+ "epoch": 2.328767123287671,
638
+ "grad_norm": 11.901023864746094,
639
+ "learning_rate": 3.181293658778333e-06,
640
+ "loss": 1.7054,
641
+ "step": 680
642
+ },
643
+ {
644
+ "epoch": 2.363013698630137,
645
+ "grad_norm": 12.643646240234375,
646
+ "learning_rate": 3.018982757820255e-06,
647
+ "loss": 1.6988,
648
+ "step": 690
649
+ },
650
+ {
651
+ "epoch": 2.3972602739726026,
652
+ "grad_norm": 11.213497161865234,
653
+ "learning_rate": 2.8566718568621767e-06,
654
+ "loss": 1.6856,
655
+ "step": 700
656
+ },
657
+ {
658
+ "epoch": 2.4315068493150687,
659
+ "grad_norm": 13.69401741027832,
660
+ "learning_rate": 2.6943609559040983e-06,
661
+ "loss": 1.7865,
662
+ "step": 710
663
+ },
664
+ {
665
+ "epoch": 2.4657534246575343,
666
+ "grad_norm": 11.204819679260254,
667
+ "learning_rate": 2.5320500549460203e-06,
668
+ "loss": 1.6012,
669
+ "step": 720
670
+ },
671
+ {
672
+ "epoch": 2.5,
673
+ "grad_norm": 11.135315895080566,
674
+ "learning_rate": 2.369739153987942e-06,
675
+ "loss": 1.7297,
676
+ "step": 730
677
+ },
678
+ {
679
+ "epoch": 2.5342465753424657,
680
+ "grad_norm": 11.281685829162598,
681
+ "learning_rate": 2.207428253029864e-06,
682
+ "loss": 1.7066,
683
+ "step": 740
684
+ },
685
+ {
686
+ "epoch": 2.5684931506849313,
687
+ "grad_norm": 12.174514770507812,
688
+ "learning_rate": 2.0451173520717856e-06,
689
+ "loss": 1.7291,
690
+ "step": 750
691
+ },
692
+ {
693
+ "epoch": 2.602739726027397,
694
+ "grad_norm": 11.84288501739502,
695
+ "learning_rate": 1.8828064511137074e-06,
696
+ "loss": 1.7143,
697
+ "step": 760
698
+ },
699
+ {
700
+ "epoch": 2.636986301369863,
701
+ "grad_norm": 13.132065773010254,
702
+ "learning_rate": 1.7204955501556292e-06,
703
+ "loss": 1.6739,
704
+ "step": 770
705
+ },
706
+ {
707
+ "epoch": 2.671232876712329,
708
+ "grad_norm": 10.684150695800781,
709
+ "learning_rate": 1.5581846491975508e-06,
710
+ "loss": 1.7677,
711
+ "step": 780
712
+ },
713
+ {
714
+ "epoch": 2.7054794520547945,
715
+ "grad_norm": 10.924251556396484,
716
+ "learning_rate": 1.3958737482394726e-06,
717
+ "loss": 1.7319,
718
+ "step": 790
719
+ },
720
+ {
721
+ "epoch": 2.73972602739726,
722
+ "grad_norm": 11.701458930969238,
723
+ "learning_rate": 1.2335628472813945e-06,
724
+ "loss": 1.633,
725
+ "step": 800
726
+ },
727
+ {
728
+ "epoch": 2.7739726027397262,
729
+ "grad_norm": 15.176057815551758,
730
+ "learning_rate": 1.0712519463233163e-06,
731
+ "loss": 1.7776,
732
+ "step": 810
733
+ },
734
+ {
735
+ "epoch": 2.808219178082192,
736
+ "grad_norm": 11.823851585388184,
737
+ "learning_rate": 9.08941045365238e-07,
738
+ "loss": 1.6718,
739
+ "step": 820
740
+ },
741
+ {
742
+ "epoch": 2.8424657534246576,
743
+ "grad_norm": 16.096837997436523,
744
+ "learning_rate": 7.466301444071598e-07,
745
+ "loss": 1.8125,
746
+ "step": 830
747
+ },
748
+ {
749
+ "epoch": 2.8767123287671232,
750
+ "grad_norm": 14.548500061035156,
751
+ "learning_rate": 5.843192434490816e-07,
752
+ "loss": 1.6709,
753
+ "step": 840
754
+ },
755
+ {
756
+ "epoch": 2.910958904109589,
757
+ "grad_norm": 14.510014533996582,
758
+ "learning_rate": 4.2200834249100337e-07,
759
+ "loss": 1.7332,
760
+ "step": 850
761
+ },
762
+ {
763
+ "epoch": 2.9452054794520546,
764
+ "grad_norm": 12.800880432128906,
765
+ "learning_rate": 2.5969744153292514e-07,
766
+ "loss": 1.656,
767
+ "step": 860
768
+ },
769
+ {
770
+ "epoch": 2.9794520547945207,
771
+ "grad_norm": 11.786734580993652,
772
+ "learning_rate": 9.738654057484693e-08,
773
+ "loss": 1.6054,
774
+ "step": 870
775
+ },
776
+ {
777
+ "epoch": 3.0,
778
+ "eval_classification_report": {
779
+ "accuracy": 0.321,
780
+ "ar": {
781
+ "f1-score": 0.24539877300613497,
782
+ "precision": 0.3333333333333333,
783
+ "recall": 0.1941747572815534,
784
+ "support": 206.0
785
+ },
786
+ "cl": {
787
+ "f1-score": 0.22151898734177214,
788
+ "precision": 0.2046783625730994,
789
+ "recall": 0.2413793103448276,
790
+ "support": 290.0
791
+ },
792
+ "co": {
793
+ "f1-score": 0.327212020033389,
794
+ "precision": 0.3181818181818182,
795
+ "recall": 0.33676975945017185,
796
+ "support": 291.0
797
+ },
798
+ "es": {
799
+ "f1-score": 0.39095315024232635,
800
+ "precision": 0.3558823529411765,
801
+ "recall": 0.4336917562724014,
802
+ "support": 279.0
803
+ },
804
+ "macro avg": {
805
+ "f1-score": 0.30473656895821705,
806
+ "precision": 0.31299377782399707,
807
+ "recall": 0.3074325006319518,
808
+ "support": 2000.0
809
+ },
810
+ "mx": {
811
+ "f1-score": 0.36075949367088606,
812
+ "precision": 0.3343108504398827,
813
+ "recall": 0.3917525773195876,
814
+ "support": 291.0
815
+ },
816
+ "pe": {
817
+ "f1-score": 0.23850085178875638,
818
+ "precision": 0.23648648648648649,
819
+ "recall": 0.24054982817869416,
820
+ "support": 291.0
821
+ },
822
+ "pr": {
823
+ "f1-score": 0.616822429906542,
824
+ "precision": 0.584070796460177,
825
+ "recall": 0.6534653465346535,
826
+ "support": 101.0
827
+ },
828
+ "uy": {
829
+ "f1-score": 0.34146341463414637,
830
+ "precision": 0.45,
831
+ "recall": 0.27510917030567683,
832
+ "support": 229.0
833
+ },
834
+ "ve": {
835
+ "f1-score": 0.0,
836
+ "precision": 0.0,
837
+ "recall": 0.0,
838
+ "support": 22.0
839
+ },
840
+ "weighted avg": {
841
+ "f1-score": 0.3169831141081196,
842
+ "precision": 0.3240243264312071,
843
+ "recall": 0.321,
844
+ "support": 2000.0
845
+ }
846
+ },
847
+ "eval_f1": 0.30473656895821705,
848
+ "eval_loss": 1.8237080574035645,
849
+ "eval_runtime": 3.9808,
850
+ "eval_samples_per_second": 502.417,
851
+ "eval_steps_per_second": 31.401,
852
+ "step": 876
853
+ }
854
+ ],
855
+ "logging_steps": 10,
856
+ "max_steps": 876,
857
+ "num_input_tokens_seen": 0,
858
+ "num_train_epochs": 3,
859
+ "save_steps": 500,
860
+ "stateful_callbacks": {
861
+ "TrainerControl": {
862
+ "args": {
863
+ "should_epoch_stop": false,
864
+ "should_evaluate": false,
865
+ "should_log": false,
866
+ "should_save": true,
867
+ "should_training_stop": true
868
+ },
869
+ "attributes": {}
870
+ }
871
+ },
872
+ "total_flos": 920815007980032.0,
873
+ "train_batch_size": 16,
874
+ "trial_name": null,
875
+ "trial_params": null
876
+ }
trial_0/checkpoint-876/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf983bab71d39717083fd85c30b7e0fd950e8d457e647f61a7ff596cb390b138
3
+ size 5368
trial_0/checkpoint-876/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
trial_0/logs/events.out.tfevents.1740158198.a2d653e866f9.226.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d93a76ea98cda8f1a1f299c4559552e6ff63b7201898fd12d91c00f7d63ab861
3
+ size 6723
trial_0/logs/events.out.tfevents.1740161296.0a0ba32201a8.2151.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e04278ab83601bb3ec7661f515626f6d5cc7e973b6bc6ef2761183a848daed5
3
+ size 30179
trial_0/logs/events.out.tfevents.1740161531.0a0ba32201a8.2151.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc6a4056a5185e518d4124b6179202d83d9aa7b9da6f6957c3776875c2345405
3
+ size 25089
trial_0/logs/events.out.tfevents.1740161645.0a0ba32201a8.2151.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76747c3b41f44365b9f8d4eabb3389f8487d7441a12322dc7488712efccf4d86
3
+ size 405
trial_1/checkpoint-146/config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "dccuchile/bert-base-spanish-wwm-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "ar",
14
+ "1": "cl",
15
+ "2": "co",
16
+ "3": "es",
17
+ "4": "mx",
18
+ "5": "pe",
19
+ "6": "pr",
20
+ "7": "uy",
21
+ "8": "ve"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 3072,
25
+ "label2id": {
26
+ "ar": 0,
27
+ "cl": 1,
28
+ "co": 2,
29
+ "es": 3,
30
+ "mx": 4,
31
+ "pe": 5,
32
+ "pr": 6,
33
+ "uy": 7,
34
+ "ve": 8
35
+ },
36
+ "layer_norm_eps": 1e-12,
37
+ "max_position_embeddings": 512,
38
+ "model_type": "bert",
39
+ "num_attention_heads": 12,
40
+ "num_hidden_layers": 12,
41
+ "output_past": true,
42
+ "pad_token_id": 1,
43
+ "position_embedding_type": "absolute",
44
+ "problem_type": "single_label_classification",
45
+ "torch_dtype": "float32",
46
+ "transformers_version": "4.48.3",
47
+ "type_vocab_size": 2,
48
+ "use_cache": true,
49
+ "vocab_size": 31002
50
+ }
trial_1/checkpoint-146/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f148025042e8766377f58bd5b2cb8ab568f58cfee85f2f67d50d7b13949b896
3
+ size 439454740
trial_1/checkpoint-146/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5b16f9046371de70837c5fcfbbb97032ca47fd7b0f2f5f70b053c6733259e41
3
+ size 879030522