Model save

Browse files

Files changed (4) hide show

README.md +64 -0
all_results.json +9 -0
train_results.json +9 -0
trainer_state.json +492 -0

README.md ADDED Viewed

	@@ -0,0 +1,64 @@

+---
+base_model: barc0/Llama-3.1-ARC-Potpourri-Transduction-8B
+library_name: peft
+license: llama3.1
+tags:
+- trl
+- sft
+- generated_from_trainer
+model-index:
+- name: problem39_model_more_aug_30
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# problem39_model_more_aug_30
+This model is a fine-tuned version of [barc0/Llama-3.1-ARC-Potpourri-Transduction-8B](https://huggingface.co/barc0/Llama-3.1-ARC-Potpourri-Transduction-8B) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.0029
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 2
+- eval_batch_size: 2
+- seed: 42
+- distributed_type: multi-GPU
+- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 2
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 0.0           | 1.0   | 31   | 0.0026          |
+| 0.0001        | 2.0   | 62   | 0.0029          |
+### Framework versions
+- PEFT 0.13.2
+- Transformers 4.47.0.dev0
+- Pytorch 2.4.0+cu121
+- Datasets 3.1.0
+- Tokenizers 0.20.3

all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 2.0,
+    "total_flos": 1144521031680.0,
+    "train_loss": 0.0004356121934755896,
+    "train_runtime": 331.8626,
+    "train_samples": 62,
+    "train_samples_per_second": 0.374,
+    "train_steps_per_second": 0.187
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 2.0,
+    "total_flos": 1144521031680.0,
+    "train_loss": 0.0004356121934755896,
+    "train_runtime": 331.8626,
+    "train_samples": 62,
+    "train_samples_per_second": 0.374,
+    "train_steps_per_second": 0.187
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,492 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 62,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03225806451612903,
+      "grad_norm": 0.0003888311526422569,
+      "learning_rate": 7.142857142857143e-06,
+      "loss": 0.0,
+      "step": 1
+    },
+    {
+      "epoch": 0.06451612903225806,
+      "grad_norm": 0.006705866500275534,
+      "learning_rate": 1.4285714285714285e-05,
+      "loss": 0.0018,
+      "step": 2
+    },
+    {
+      "epoch": 0.0967741935483871,
+      "grad_norm": 0.0004288404580521471,
+      "learning_rate": 2.1428571428571428e-05,
+      "loss": 0.0001,
+      "step": 3
+    },
+    {
+      "epoch": 0.12903225806451613,
+      "grad_norm": 0.006721843317752399,
+      "learning_rate": 2.857142857142857e-05,
+      "loss": 0.0019,
+      "step": 4
+    },
+    {
+      "epoch": 0.16129032258064516,
+      "grad_norm": 0.009519105488016918,
+      "learning_rate": 3.571428571428572e-05,
+      "loss": 0.0032,
+      "step": 5
+    },
+    {
+      "epoch": 0.1935483870967742,
+      "grad_norm": 0.009983139237850786,
+      "learning_rate": 4.2857142857142856e-05,
+      "loss": 0.003,
+      "step": 6
+    },
+    {
+      "epoch": 0.22580645161290322,
+      "grad_norm": 0.006711102239168073,
+      "learning_rate": 5e-05,
+      "loss": 0.0014,
+      "step": 7
+    },
+    {
+      "epoch": 0.25806451612903225,
+      "grad_norm": 0.0001960930857351409,
+      "learning_rate": 4.995922759815339e-05,
+      "loss": 0.0,
+      "step": 8
+    },
+    {
+      "epoch": 0.2903225806451613,
+      "grad_norm": 9.143550595543499e-05,
+      "learning_rate": 4.9837043383713753e-05,
+      "loss": 0.0,
+      "step": 9
+    },
+    {
+      "epoch": 0.3225806451612903,
+      "grad_norm": 0.007417075139109489,
+      "learning_rate": 4.963384589619233e-05,
+      "loss": 0.0017,
+      "step": 10
+    },
+    {
+      "epoch": 0.3548387096774194,
+      "grad_norm": 0.002566821879916539,
+      "learning_rate": 4.935029792355834e-05,
+      "loss": 0.0006,
+      "step": 11
+    },
+    {
+      "epoch": 0.3870967741935484,
+      "grad_norm": 0.007642635295730325,
+      "learning_rate": 4.898732434036244e-05,
+      "loss": 0.0009,
+      "step": 12
+    },
+    {
+      "epoch": 0.41935483870967744,
+      "grad_norm": 0.00826117485142426,
+      "learning_rate": 4.854610909098812e-05,
+      "loss": 0.0011,
+      "step": 13
+    },
+    {
+      "epoch": 0.45161290322580644,
+      "grad_norm": 0.005359952597223366,
+      "learning_rate": 4.802809132787125e-05,
+      "loss": 0.0005,
+      "step": 14
+    },
+    {
+      "epoch": 0.4838709677419355,
+      "grad_norm": 0.002749396030753739,
+      "learning_rate": 4.743496071728396e-05,
+      "loss": 0.0002,
+      "step": 15
+    },
+    {
+      "epoch": 0.5161290322580645,
+      "grad_norm": 0.0011744439020923403,
+      "learning_rate": 4.6768651927994434e-05,
+      "loss": 0.0002,
+      "step": 16
+    },
+    {
+      "epoch": 0.5483870967741935,
+      "grad_norm": 0.0017986768332600579,
+      "learning_rate": 4.6031338320779534e-05,
+      "loss": 0.0003,
+      "step": 17
+    },
+    {
+      "epoch": 0.5806451612903226,
+      "grad_norm": 0.0027352310170441737,
+      "learning_rate": 4.522542485937369e-05,
+      "loss": 0.0006,
+      "step": 18
+    },
+    {
+      "epoch": 0.6129032258064516,
+      "grad_norm": 0.00021274929498089895,
+      "learning_rate": 4.4353540265977064e-05,
+      "loss": 0.0,
+      "step": 19
+    },
+    {
+      "epoch": 0.6451612903225806,
+      "grad_norm": 0.00015254003122006633,
+      "learning_rate": 4.341852844691012e-05,
+      "loss": 0.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.6774193548387096,
+      "grad_norm": 0.00014754669500526948,
+      "learning_rate": 4.242343921638234e-05,
+      "loss": 0.0,
+      "step": 21
+    },
+    {
+      "epoch": 0.7096774193548387,
+      "grad_norm": 0.014701207072580798,
+      "learning_rate": 4.137151834863213e-05,
+      "loss": 0.0053,
+      "step": 22
+    },
+    {
+      "epoch": 0.7419354838709677,
+      "grad_norm": 0.00040132537069888165,
+      "learning_rate": 4.0266196990885955e-05,
+      "loss": 0.0001,
+      "step": 23
+    },
+    {
+      "epoch": 0.7741935483870968,
+      "grad_norm": 0.000310253267324018,
+      "learning_rate": 3.911108047166924e-05,
+      "loss": 0.0,
+      "step": 24
+    },
+    {
+      "epoch": 0.8064516129032258,
+      "grad_norm": 0.0032453162218567663,
+      "learning_rate": 3.790993654097405e-05,
+      "loss": 0.0004,
+      "step": 25
+    },
+    {
+      "epoch": 0.8387096774193549,
+      "grad_norm": 0.0011259240533704532,
+      "learning_rate": 3.6666683080641846e-05,
+      "loss": 0.0002,
+      "step": 26
+    },
+    {
+      "epoch": 0.8709677419354839,
+      "grad_norm": 6.593340410831557e-05,
+      "learning_rate": 3.5385375325047166e-05,
+      "loss": 0.0,
+      "step": 27
+    },
+    {
+      "epoch": 0.9032258064516129,
+      "grad_norm": 5.1984982035018296e-05,
+      "learning_rate": 3.4070192633766025e-05,
+      "loss": 0.0,
+      "step": 28
+    },
+    {
+      "epoch": 0.9354838709677419,
+      "grad_norm": 0.0012978852470741332,
+      "learning_rate": 3.272542485937369e-05,
+      "loss": 0.0002,
+      "step": 29
+    },
+    {
+      "epoch": 0.967741935483871,
+      "grad_norm": 0.0011559129352493,
+      "learning_rate": 3.135545835483718e-05,
+      "loss": 0.0001,
+      "step": 30
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.00016768764915493187,
+      "learning_rate": 2.996476166614364e-05,
+      "loss": 0.0,
+      "step": 31
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.002580386819317937,
+      "eval_runtime": 0.6554,
+      "eval_samples_per_second": 1.526,
+      "eval_steps_per_second": 1.526,
+      "step": 31
+    },
+    {
+      "epoch": 1.032258064516129,
+      "grad_norm": 9.698020571956317e-05,
+      "learning_rate": 2.8557870956832132e-05,
+      "loss": 0.0,
+      "step": 32
+    },
+    {
+      "epoch": 1.064516129032258,
+      "grad_norm": 0.0017065509209019311,
+      "learning_rate": 2.7139375211970996e-05,
+      "loss": 0.0001,
+      "step": 33
+    },
+    {
+      "epoch": 1.096774193548387,
+      "grad_norm": 0.008080666632729714,
+      "learning_rate": 2.5713901269842404e-05,
+      "loss": 0.0022,
+      "step": 34
+    },
+    {
+      "epoch": 1.129032258064516,
+      "grad_norm": 0.00014016671601073153,
+      "learning_rate": 2.42860987301576e-05,
+      "loss": 0.0,
+      "step": 35
+    },
+    {
+      "epoch": 1.1612903225806452,
+      "grad_norm": 0.0002183282791861122,
+      "learning_rate": 2.2860624788029013e-05,
+      "loss": 0.0,
+      "step": 36
+    },
+    {
+      "epoch": 1.1935483870967742,
+      "grad_norm": 3.4706191979932056e-05,
+      "learning_rate": 2.1442129043167874e-05,
+      "loss": 0.0,
+      "step": 37
+    },
+    {
+      "epoch": 1.2258064516129032,
+      "grad_norm": 6.274199180955771e-05,
+      "learning_rate": 2.003523833385637e-05,
+      "loss": 0.0,
+      "step": 38
+    },
+    {
+      "epoch": 1.2580645161290323,
+      "grad_norm": 0.00011240971099287573,
+      "learning_rate": 1.8644541645162834e-05,
+      "loss": 0.0,
+      "step": 39
+    },
+    {
+      "epoch": 1.2903225806451613,
+      "grad_norm": 8.379442890594463e-05,
+      "learning_rate": 1.7274575140626318e-05,
+      "loss": 0.0,
+      "step": 40
+    },
+    {
+      "epoch": 1.3225806451612903,
+      "grad_norm": 0.0006950311671464802,
+      "learning_rate": 1.5929807366233977e-05,
+      "loss": 0.0001,
+      "step": 41
+    },
+    {
+      "epoch": 1.3548387096774195,
+      "grad_norm": 6.554897296239256e-05,
+      "learning_rate": 1.4614624674952842e-05,
+      "loss": 0.0,
+      "step": 42
+    },
+    {
+      "epoch": 1.3870967741935485,
+      "grad_norm": 8.946111691896196e-05,
+      "learning_rate": 1.3333316919358157e-05,
+      "loss": 0.0,
+      "step": 43
+    },
+    {
+      "epoch": 1.4193548387096775,
+      "grad_norm": 0.00021237835272870894,
+      "learning_rate": 1.2090063459025955e-05,
+      "loss": 0.0,
+      "step": 44
+    },
+    {
+      "epoch": 1.4516129032258065,
+      "grad_norm": 8.780703503531893e-05,
+      "learning_rate": 1.0888919528330777e-05,
+      "loss": 0.0,
+      "step": 45
+    },
+    {
+      "epoch": 1.4838709677419355,
+      "grad_norm": 9.310185931359011e-05,
+      "learning_rate": 9.733803009114045e-06,
+      "loss": 0.0,
+      "step": 46
+    },
+    {
+      "epoch": 1.5161290322580645,
+      "grad_norm": 0.0022132919985140046,
+      "learning_rate": 8.628481651367876e-06,
+      "loss": 0.0002,
+      "step": 47
+    },
+    {
+      "epoch": 1.5483870967741935,
+      "grad_norm": 0.0001803834028245764,
+      "learning_rate": 7.576560783617668e-06,
+      "loss": 0.0,
+      "step": 48
+    },
+    {
+      "epoch": 1.5806451612903225,
+      "grad_norm": 0.0011069882126815845,
+      "learning_rate": 6.5814715530898745e-06,
+      "loss": 0.0001,
+      "step": 49
+    },
+    {
+      "epoch": 1.6129032258064515,
+      "grad_norm": 0.00015904430381639932,
+      "learning_rate": 5.646459734022938e-06,
+      "loss": 0.0,
+      "step": 50
+    },
+    {
+      "epoch": 1.6451612903225805,
+      "grad_norm": 0.00023196882598516344,
+      "learning_rate": 4.7745751406263165e-06,
+      "loss": 0.0,
+      "step": 51
+    },
+    {
+      "epoch": 1.6774193548387095,
+      "grad_norm": 8.494839968665877e-05,
+      "learning_rate": 3.968661679220468e-06,
+      "loss": 0.0,
+      "step": 52
+    },
+    {
+      "epoch": 1.7096774193548387,
+      "grad_norm": 7.495013324600562e-05,
+      "learning_rate": 3.2313480720055745e-06,
+      "loss": 0.0,
+      "step": 53
+    },
+    {
+      "epoch": 1.7419354838709677,
+      "grad_norm": 0.00011153696481041336,
+      "learning_rate": 2.565039282716045e-06,
+      "loss": 0.0,
+      "step": 54
+    },
+    {
+      "epoch": 1.7741935483870968,
+      "grad_norm": 9.790228118890396e-05,
+      "learning_rate": 1.97190867212875e-06,
+      "loss": 0.0,
+      "step": 55
+    },
+    {
+      "epoch": 1.8064516129032258,
+      "grad_norm": 0.0005323293049174912,
+      "learning_rate": 1.4538909090118846e-06,
+      "loss": 0.0,
+      "step": 56
+    },
+    {
+      "epoch": 1.838709677419355,
+      "grad_norm": 7.75389691903468e-05,
+      "learning_rate": 1.0126756596375686e-06,
+      "loss": 0.0,
+      "step": 57
+    },
+    {
+      "epoch": 1.870967741935484,
+      "grad_norm": 9.419564261080586e-05,
+      "learning_rate": 6.497020764416633e-07,
+      "loss": 0.0,
+      "step": 58
+    },
+    {
+      "epoch": 1.903225806451613,
+      "grad_norm": 1.6085484123530773e-05,
+      "learning_rate": 3.6615410380767544e-07,
+      "loss": 0.0,
+      "step": 59
+    },
+    {
+      "epoch": 1.935483870967742,
+      "grad_norm": 0.0005223311340339994,
+      "learning_rate": 1.6295661628624447e-07,
+      "loss": 0.0001,
+      "step": 60
+    },
+    {
+      "epoch": 1.967741935483871,
+      "grad_norm": 9.50629744971249e-05,
+      "learning_rate": 4.07724018466088e-08,
+      "loss": 0.0,
+      "step": 61
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.0005980841644283981,
+      "learning_rate": 0.0,
+      "loss": 0.0001,
+      "step": 62
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.0029119281098246574,
+      "eval_runtime": 1.4832,
+      "eval_samples_per_second": 0.674,
+      "eval_steps_per_second": 0.674,
+      "step": 62
+    },
+    {
+      "epoch": 2.0,
+      "step": 62,
+      "total_flos": 1144521031680.0,
+      "train_loss": 0.0004356121934755896,
+      "train_runtime": 331.8626,
+      "train_samples_per_second": 0.374,
+      "train_steps_per_second": 0.187
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 62,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1144521031680.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}