Model save

Browse files

Files changed (4) hide show

README.md +64 -0
all_results.json +9 -0
train_results.json +9 -0
trainer_state.json +688 -0

README.md ADDED Viewed

	@@ -0,0 +1,64 @@

+---
+base_model: barc0/Llama-3.1-ARC-Potpourri-Transduction-8B
+library_name: peft
+license: llama3.1
+tags:
+- trl
+- sft
+- generated_from_trainer
+model-index:
+- name: problem79_model_aug_30
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# problem79_model_aug_30
+This model is a fine-tuned version of [barc0/Llama-3.1-ARC-Potpourri-Transduction-8B](https://huggingface.co/barc0/Llama-3.1-ARC-Potpourri-Transduction-8B) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.0041
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 2
+- eval_batch_size: 2
+- seed: 42
+- distributed_type: multi-GPU
+- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 2
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 0.0061        | 1.0   | 45   | 0.0086          |
+| 0.0043        | 2.0   | 90   | 0.0041          |
+### Framework versions
+- PEFT 0.13.2
+- Transformers 4.47.0.dev0
+- Pytorch 2.4.0+cu121
+- Datasets 3.1.0
+- Tokenizers 0.20.3

all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 2.0,
+    "total_flos": 1178207846400.0,
+    "train_loss": 0.010592084623769753,
+    "train_runtime": 335.2271,
+    "train_samples": 90,
+    "train_samples_per_second": 0.537,
+    "train_steps_per_second": 0.268
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 2.0,
+    "total_flos": 1178207846400.0,
+    "train_loss": 0.010592084623769753,
+    "train_runtime": 335.2271,
+    "train_samples": 90,
+    "train_samples_per_second": 0.537,
+    "train_steps_per_second": 0.268
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,688 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 90,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.022222222222222223,
+      "grad_norm": 0.06182662126426935,
+      "learning_rate": 5.555555555555556e-06,
+      "loss": 0.0629,
+      "step": 1
+    },
+    {
+      "epoch": 0.044444444444444446,
+      "grad_norm": 0.01589702341098145,
+      "learning_rate": 1.1111111111111112e-05,
+      "loss": 0.0152,
+      "step": 2
+    },
+    {
+      "epoch": 0.06666666666666667,
+      "grad_norm": 0.009011508424448374,
+      "learning_rate": 1.6666666666666667e-05,
+      "loss": 0.0026,
+      "step": 3
+    },
+    {
+      "epoch": 0.08888888888888889,
+      "grad_norm": 0.034235923175647005,
+      "learning_rate": 2.2222222222222223e-05,
+      "loss": 0.0332,
+      "step": 4
+    },
+    {
+      "epoch": 0.1111111111111111,
+      "grad_norm": 0.02163238904889027,
+      "learning_rate": 2.777777777777778e-05,
+      "loss": 0.0162,
+      "step": 5
+    },
+    {
+      "epoch": 0.13333333333333333,
+      "grad_norm": 0.016464683927634675,
+      "learning_rate": 3.3333333333333335e-05,
+      "loss": 0.0166,
+      "step": 6
+    },
+    {
+      "epoch": 0.15555555555555556,
+      "grad_norm": 0.049353869295811564,
+      "learning_rate": 3.888888888888889e-05,
+      "loss": 0.0308,
+      "step": 7
+    },
+    {
+      "epoch": 0.17777777777777778,
+      "grad_norm": 0.0275078773129142,
+      "learning_rate": 4.4444444444444447e-05,
+      "loss": 0.0135,
+      "step": 8
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 0.010117806940033707,
+      "learning_rate": 5e-05,
+      "loss": 0.0038,
+      "step": 9
+    },
+    {
+      "epoch": 0.2222222222222222,
+      "grad_norm": 0.015063898747815097,
+      "learning_rate": 4.998119881260576e-05,
+      "loss": 0.0103,
+      "step": 10
+    },
+    {
+      "epoch": 0.24444444444444444,
+      "grad_norm": 0.06977881827872869,
+      "learning_rate": 4.99248235291948e-05,
+      "loss": 0.0507,
+      "step": 11
+    },
+    {
+      "epoch": 0.26666666666666666,
+      "grad_norm": 0.051533656076912064,
+      "learning_rate": 4.983095894354858e-05,
+      "loss": 0.0308,
+      "step": 12
+    },
+    {
+      "epoch": 0.28888888888888886,
+      "grad_norm": 0.10116870744477119,
+      "learning_rate": 4.969974623692023e-05,
+      "loss": 0.0654,
+      "step": 13
+    },
+    {
+      "epoch": 0.3111111111111111,
+      "grad_norm": 0.014325180509067602,
+      "learning_rate": 4.953138276568462e-05,
+      "loss": 0.0073,
+      "step": 14
+    },
+    {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 0.01979978006856134,
+      "learning_rate": 4.9326121764495596e-05,
+      "loss": 0.0176,
+      "step": 15
+    },
+    {
+      "epoch": 0.35555555555555557,
+      "grad_norm": 0.015300139316187096,
+      "learning_rate": 4.9084271965397014e-05,
+      "loss": 0.008,
+      "step": 16
+    },
+    {
+      "epoch": 0.37777777777777777,
+      "grad_norm": 0.07521864372886254,
+      "learning_rate": 4.880619713346039e-05,
+      "loss": 0.0245,
+      "step": 17
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.025173450961016364,
+      "learning_rate": 4.849231551964771e-05,
+      "loss": 0.0109,
+      "step": 18
+    },
+    {
+      "epoch": 0.4222222222222222,
+      "grad_norm": 0.008528461150650984,
+      "learning_rate": 4.814309923172227e-05,
+      "loss": 0.0025,
+      "step": 19
+    },
+    {
+      "epoch": 0.4444444444444444,
+      "grad_norm": 0.03981127057713206,
+      "learning_rate": 4.775907352415367e-05,
+      "loss": 0.014,
+      "step": 20
+    },
+    {
+      "epoch": 0.4666666666666667,
+      "grad_norm": 0.06436066544229031,
+      "learning_rate": 4.734081600808531e-05,
+      "loss": 0.0232,
+      "step": 21
+    },
+    {
+      "epoch": 0.4888888888888889,
+      "grad_norm": 0.07361515847522783,
+      "learning_rate": 4.6888955782552274e-05,
+      "loss": 0.0285,
+      "step": 22
+    },
+    {
+      "epoch": 0.5111111111111111,
+      "grad_norm": 0.016771501914059777,
+      "learning_rate": 4.640417248825667e-05,
+      "loss": 0.0056,
+      "step": 23
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 0.019982413157502734,
+      "learning_rate": 4.588719528532342e-05,
+      "loss": 0.0121,
+      "step": 24
+    },
+    {
+      "epoch": 0.5555555555555556,
+      "grad_norm": 0.011731032346215246,
+      "learning_rate": 4.533880175657419e-05,
+      "loss": 0.0037,
+      "step": 25
+    },
+    {
+      "epoch": 0.5777777777777777,
+      "grad_norm": 0.023635800121382996,
+      "learning_rate": 4.475981673796899e-05,
+      "loss": 0.0131,
+      "step": 26
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 0.05619303358469727,
+      "learning_rate": 4.415111107797445e-05,
+      "loss": 0.0331,
+      "step": 27
+    },
+    {
+      "epoch": 0.6222222222222222,
+      "grad_norm": 0.05558849670893856,
+      "learning_rate": 4.351360032772512e-05,
+      "loss": 0.0293,
+      "step": 28
+    },
+    {
+      "epoch": 0.6444444444444445,
+      "grad_norm": 0.011555616757588632,
+      "learning_rate": 4.2848243363947484e-05,
+      "loss": 0.0027,
+      "step": 29
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.01073787599815852,
+      "learning_rate": 4.215604094671835e-05,
+      "loss": 0.0028,
+      "step": 30
+    },
+    {
+      "epoch": 0.6888888888888889,
+      "grad_norm": 0.05167251748328779,
+      "learning_rate": 4.14380342142266e-05,
+      "loss": 0.0145,
+      "step": 31
+    },
+    {
+      "epoch": 0.7111111111111111,
+      "grad_norm": 0.0689265472213532,
+      "learning_rate": 4.069530311680247e-05,
+      "loss": 0.0349,
+      "step": 32
+    },
+    {
+      "epoch": 0.7333333333333333,
+      "grad_norm": 0.008400104323732366,
+      "learning_rate": 3.9928964792569655e-05,
+      "loss": 0.0045,
+      "step": 33
+    },
+    {
+      "epoch": 0.7555555555555555,
+      "grad_norm": 0.08018920636711299,
+      "learning_rate": 3.914017188716347e-05,
+      "loss": 0.0214,
+      "step": 34
+    },
+    {
+      "epoch": 0.7777777777777778,
+      "grad_norm": 0.011680157844149832,
+      "learning_rate": 3.8330110820042285e-05,
+      "loss": 0.0047,
+      "step": 35
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.08749984752999368,
+      "learning_rate": 3.7500000000000003e-05,
+      "loss": 0.0202,
+      "step": 36
+    },
+    {
+      "epoch": 0.8222222222222222,
+      "grad_norm": 0.013666773676813073,
+      "learning_rate": 3.665108799256348e-05,
+      "loss": 0.0092,
+      "step": 37
+    },
+    {
+      "epoch": 0.8444444444444444,
+      "grad_norm": 0.06655828244115725,
+      "learning_rate": 3.578465164203134e-05,
+      "loss": 0.022,
+      "step": 38
+    },
+    {
+      "epoch": 0.8666666666666667,
+      "grad_norm": 0.03500700900820066,
+      "learning_rate": 3.490199415097892e-05,
+      "loss": 0.0089,
+      "step": 39
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 0.009397023547674558,
+      "learning_rate": 3.400444312011776e-05,
+      "loss": 0.0023,
+      "step": 40
+    },
+    {
+      "epoch": 0.9111111111111111,
+      "grad_norm": 0.06659563035933518,
+      "learning_rate": 3.309334855145803e-05,
+      "loss": 0.0112,
+      "step": 41
+    },
+    {
+      "epoch": 0.9333333333333333,
+      "grad_norm": 0.018530712937046168,
+      "learning_rate": 3.217008081777726e-05,
+      "loss": 0.0104,
+      "step": 42
+    },
+    {
+      "epoch": 0.9555555555555556,
+      "grad_norm": 0.036374253122435134,
+      "learning_rate": 3.1236028601449534e-05,
+      "loss": 0.0078,
+      "step": 43
+    },
+    {
+      "epoch": 0.9777777777777777,
+      "grad_norm": 0.007598122085381065,
+      "learning_rate": 3.0292596805735274e-05,
+      "loss": 0.0021,
+      "step": 44
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.01154681575748284,
+      "learning_rate": 2.9341204441673266e-05,
+      "loss": 0.0061,
+      "step": 45
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.008554333820939064,
+      "eval_runtime": 0.4003,
+      "eval_samples_per_second": 2.498,
+      "eval_steps_per_second": 2.498,
+      "step": 45
+    },
+    {
+      "epoch": 1.0222222222222221,
+      "grad_norm": 0.01037823501158092,
+      "learning_rate": 2.8383282493753283e-05,
+      "loss": 0.0026,
+      "step": 46
+    },
+    {
+      "epoch": 1.0444444444444445,
+      "grad_norm": 0.011839423396842186,
+      "learning_rate": 2.742027176757948e-05,
+      "loss": 0.0056,
+      "step": 47
+    },
+    {
+      "epoch": 1.0666666666666667,
+      "grad_norm": 0.03371854178303596,
+      "learning_rate": 2.6453620722761896e-05,
+      "loss": 0.0045,
+      "step": 48
+    },
+    {
+      "epoch": 1.0888888888888888,
+      "grad_norm": 0.03300585669501468,
+      "learning_rate": 2.548478329429561e-05,
+      "loss": 0.0064,
+      "step": 49
+    },
+    {
+      "epoch": 1.1111111111111112,
+      "grad_norm": 0.013449148332427016,
+      "learning_rate": 2.4515216705704395e-05,
+      "loss": 0.008,
+      "step": 50
+    },
+    {
+      "epoch": 1.1333333333333333,
+      "grad_norm": 0.021034590041630902,
+      "learning_rate": 2.3546379277238107e-05,
+      "loss": 0.0032,
+      "step": 51
+    },
+    {
+      "epoch": 1.1555555555555554,
+      "grad_norm": 0.017485685122905388,
+      "learning_rate": 2.2579728232420525e-05,
+      "loss": 0.0033,
+      "step": 52
+    },
+    {
+      "epoch": 1.1777777777777778,
+      "grad_norm": 0.01444946951960895,
+      "learning_rate": 2.161671750624673e-05,
+      "loss": 0.0029,
+      "step": 53
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 0.00732110018420229,
+      "learning_rate": 2.0658795558326743e-05,
+      "loss": 0.0017,
+      "step": 54
+    },
+    {
+      "epoch": 1.2222222222222223,
+      "grad_norm": 0.020238449396738974,
+      "learning_rate": 1.970740319426474e-05,
+      "loss": 0.006,
+      "step": 55
+    },
+    {
+      "epoch": 1.2444444444444445,
+      "grad_norm": 0.006366192592726735,
+      "learning_rate": 1.876397139855047e-05,
+      "loss": 0.0009,
+      "step": 56
+    },
+    {
+      "epoch": 1.2666666666666666,
+      "grad_norm": 0.007366265452970751,
+      "learning_rate": 1.7829919182222752e-05,
+      "loss": 0.0016,
+      "step": 57
+    },
+    {
+      "epoch": 1.2888888888888888,
+      "grad_norm": 0.009830493382021098,
+      "learning_rate": 1.690665144854198e-05,
+      "loss": 0.0048,
+      "step": 58
+    },
+    {
+      "epoch": 1.3111111111111111,
+      "grad_norm": 0.03796420169500345,
+      "learning_rate": 1.5995556879882246e-05,
+      "loss": 0.0152,
+      "step": 59
+    },
+    {
+      "epoch": 1.3333333333333333,
+      "grad_norm": 0.006007504069616182,
+      "learning_rate": 1.509800584902108e-05,
+      "loss": 0.0016,
+      "step": 60
+    },
+    {
+      "epoch": 1.3555555555555556,
+      "grad_norm": 0.012483540160593455,
+      "learning_rate": 1.4215348357968669e-05,
+      "loss": 0.003,
+      "step": 61
+    },
+    {
+      "epoch": 1.3777777777777778,
+      "grad_norm": 0.013173843939312408,
+      "learning_rate": 1.3348912007436537e-05,
+      "loss": 0.0031,
+      "step": 62
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 0.03260985697608295,
+      "learning_rate": 1.2500000000000006e-05,
+      "loss": 0.0068,
+      "step": 63
+    },
+    {
+      "epoch": 1.4222222222222223,
+      "grad_norm": 0.01905871598459963,
+      "learning_rate": 1.1669889179957725e-05,
+      "loss": 0.0034,
+      "step": 64
+    },
+    {
+      "epoch": 1.4444444444444444,
+      "grad_norm": 0.011202548052787911,
+      "learning_rate": 1.085982811283654e-05,
+      "loss": 0.0026,
+      "step": 65
+    },
+    {
+      "epoch": 1.4666666666666668,
+      "grad_norm": 0.010947806429369628,
+      "learning_rate": 1.0071035207430352e-05,
+      "loss": 0.0011,
+      "step": 66
+    },
+    {
+      "epoch": 1.488888888888889,
+      "grad_norm": 0.011158261215536286,
+      "learning_rate": 9.304696883197542e-06,
+      "loss": 0.0015,
+      "step": 67
+    },
+    {
+      "epoch": 1.511111111111111,
+      "grad_norm": 0.053644402213161864,
+      "learning_rate": 8.561965785773413e-06,
+      "loss": 0.0044,
+      "step": 68
+    },
+    {
+      "epoch": 1.5333333333333332,
+      "grad_norm": 0.00811995614867166,
+      "learning_rate": 7.843959053281663e-06,
+      "loss": 0.0048,
+      "step": 69
+    },
+    {
+      "epoch": 1.5555555555555556,
+      "grad_norm": 0.003948942088982283,
+      "learning_rate": 7.1517566360525284e-06,
+      "loss": 0.0008,
+      "step": 70
+    },
+    {
+      "epoch": 1.5777777777777777,
+      "grad_norm": 0.00787073010942699,
+      "learning_rate": 6.48639967227489e-06,
+      "loss": 0.0021,
+      "step": 71
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 0.006610555855784759,
+      "learning_rate": 5.848888922025553e-06,
+      "loss": 0.0011,
+      "step": 72
+    },
+    {
+      "epoch": 1.6222222222222222,
+      "grad_norm": 0.010134326783885872,
+      "learning_rate": 5.240183262031021e-06,
+      "loss": 0.002,
+      "step": 73
+    },
+    {
+      "epoch": 1.6444444444444444,
+      "grad_norm": 0.005001526466726832,
+      "learning_rate": 4.661198243425813e-06,
+      "loss": 0.0007,
+      "step": 74
+    },
+    {
+      "epoch": 1.6666666666666665,
+      "grad_norm": 0.007348066819530326,
+      "learning_rate": 4.112804714676594e-06,
+      "loss": 0.0013,
+      "step": 75
+    },
+    {
+      "epoch": 1.6888888888888889,
+      "grad_norm": 0.03371638893848461,
+      "learning_rate": 3.595827511743341e-06,
+      "loss": 0.0155,
+      "step": 76
+    },
+    {
+      "epoch": 1.7111111111111112,
+      "grad_norm": 0.00709656147687496,
+      "learning_rate": 3.111044217447731e-06,
+      "loss": 0.0011,
+      "step": 77
+    },
+    {
+      "epoch": 1.7333333333333334,
+      "grad_norm": 0.011299389181676027,
+      "learning_rate": 2.659183991914696e-06,
+      "loss": 0.003,
+      "step": 78
+    },
+    {
+      "epoch": 1.7555555555555555,
+      "grad_norm": 0.007881490979262289,
+      "learning_rate": 2.2409264758463363e-06,
+      "loss": 0.0031,
+      "step": 79
+    },
+    {
+      "epoch": 1.7777777777777777,
+      "grad_norm": 0.021330992455981652,
+      "learning_rate": 1.8569007682777417e-06,
+      "loss": 0.0069,
+      "step": 80
+    },
+    {
+      "epoch": 1.8,
+      "grad_norm": 0.016774598008309008,
+      "learning_rate": 1.5076844803522922e-06,
+      "loss": 0.0048,
+      "step": 81
+    },
+    {
+      "epoch": 1.8222222222222222,
+      "grad_norm": 0.010869532162782105,
+      "learning_rate": 1.1938028665396173e-06,
+      "loss": 0.0051,
+      "step": 82
+    },
+    {
+      "epoch": 1.8444444444444446,
+      "grad_norm": 0.035520474855902366,
+      "learning_rate": 9.157280346029918e-07,
+      "loss": 0.0158,
+      "step": 83
+    },
+    {
+      "epoch": 1.8666666666666667,
+      "grad_norm": 0.011978338454831126,
+      "learning_rate": 6.738782355044049e-07,
+      "loss": 0.0041,
+      "step": 84
+    },
+    {
+      "epoch": 1.8888888888888888,
+      "grad_norm": 0.010527929609233001,
+      "learning_rate": 4.6861723431538276e-07,
+      "loss": 0.0015,
+      "step": 85
+    },
+    {
+      "epoch": 1.911111111111111,
+      "grad_norm": 0.013486579199822386,
+      "learning_rate": 3.002537630797747e-07,
+      "loss": 0.0025,
+      "step": 86
+    },
+    {
+      "epoch": 1.9333333333333333,
+      "grad_norm": 0.021473459166054858,
+      "learning_rate": 1.6904105645142444e-07,
+      "loss": 0.0039,
+      "step": 87
+    },
+    {
+      "epoch": 1.9555555555555557,
+      "grad_norm": 0.00814137505575892,
+      "learning_rate": 7.51764708051994e-08,
+      "loss": 0.0014,
+      "step": 88
+    },
+    {
+      "epoch": 1.9777777777777779,
+      "grad_norm": 0.007668567820707523,
+      "learning_rate": 1.8801187394248965e-08,
+      "loss": 0.0018,
+      "step": 89
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.043157962637548175,
+      "learning_rate": 0.0,
+      "loss": 0.0043,
+      "step": 90
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.0041035814210772514,
+      "eval_runtime": 1.4053,
+      "eval_samples_per_second": 0.712,
+      "eval_steps_per_second": 0.712,
+      "step": 90
+    },
+    {
+      "epoch": 2.0,
+      "step": 90,
+      "total_flos": 1178207846400.0,
+      "train_loss": 0.010592084623769753,
+      "train_runtime": 335.2271,
+      "train_samples_per_second": 0.537,
+      "train_steps_per_second": 0.268
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 90,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1178207846400.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}