diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..211fae2fabcaab1d8be8d074b0d5b164bb5ef7cc --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +--- +tags: +- trl +- sft +- generated_from_trainer +datasets: +- generator +model-index: +- name: zephyr-2b-gemma-dft-debug + results: [] +--- + + + +# zephyr-2b-gemma-dft-debug + +This model was trained from scratch on the generator dataset. +It achieves the following results on the evaluation set: +- Loss: 2957.4309 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-05 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | +|:-------------:|:------:|:----:|:---------------:| +| 2978.2934 | 0.9982 | 270 | 2957.4309 | + + +### Framework versions + +- Transformers 4.43.3 +- Pytorch 2.3.1+cu121 +- Datasets 2.20.0 +- Tokenizers 0.19.1 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f2a29d93e7b495521e9157f922770e8b7547f302 --- /dev/null +++ b/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 0.9981515711645101, + "total_flos": 5.260333472022528e+16, + "train_loss": 3746.6009367766205, + "train_runtime": 2500.0335, + "train_samples": 6750, + "train_samples_per_second": 3.461, + "train_steps_per_second": 0.108 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..753f14bb4f84c576605b7b2051e3b0edbe86f869 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "_name_or_path": "data/gemma-2b", + "architectures": [ + "GemmaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 2, + "eos_token_id": 1, + "head_dim": 256, + "hidden_act": "gelu", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 16384, + "max_position_embeddings": 8192, + "model_type": "gemma", + "num_attention_heads": 8, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "pad_token_id": 0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "torch_dtype": "bfloat16", + "transformers_version": "4.43.3", + "use_cache": false, + "vocab_size": 256000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d2d9fefd5d5cf7703bfd78cc1d52d17529e8bc95 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 2, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.43.3" +} diff --git a/model-00001-of-00002.safetensors b/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..120143ea4a173a5700ee44000677a305fd14c087 --- /dev/null +++ b/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:673f21f6511aa4d6965bb497bbc307de1fecdcdb64045e3d677f47944e65fa5a +size 4945242264 diff --git a/model-00002-of-00002.safetensors b/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec524b8ca7bb9f77e89bfcc130b48689c64ea0a1 --- /dev/null +++ b/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b8d5752b1fa47fe467d63499fa2ea4087cf9ace96733181c06492817e07a1f7 +size 67121608 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..b8fcee94bedd135b67735027dc217936f7444ab2 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,171 @@ +{ + "metadata": { + "total_size": 5012344832 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/runs/Aug01_08-13-23_ale-distillm-8-0-0/events.out.tfevents.1722496416.ale-distillm-8-0-0.150135.0 b/runs/Aug01_08-13-23_ale-distillm-8-0-0/events.out.tfevents.1722496416.ale-distillm-8-0-0.150135.0 new file mode 100644 index 0000000000000000000000000000000000000000..66586e8ba4f249586de98fd44e1c881ebcd662e5 --- /dev/null +++ b/runs/Aug01_08-13-23_ale-distillm-8-0-0/events.out.tfevents.1722496416.ale-distillm-8-0-0.150135.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1b21cdd4d55dd68cfa415502ad9e43df20cb1f421d9e10311bc60cc3e568eb7 +size 5801 diff --git a/runs/Aug01_08-19-49_ale-distillm-8-0-0/events.out.tfevents.1722496802.ale-distillm-8-0-0.150944.0 b/runs/Aug01_08-19-49_ale-distillm-8-0-0/events.out.tfevents.1722496802.ale-distillm-8-0-0.150944.0 new file mode 100644 index 0000000000000000000000000000000000000000..7b0c3eaf1f33f9d370a2a03dd90140f52911f515 --- /dev/null +++ b/runs/Aug01_08-19-49_ale-distillm-8-0-0/events.out.tfevents.1722496802.ale-distillm-8-0-0.150944.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a991f1dfcaf80b984ee9f5682235fef8c8046ea2216af91d4aa3dcc6d5f1124 +size 5801 diff --git a/runs/Aug01_08-23-09_ale-distillm-8-0-0/events.out.tfevents.1722497020.ale-distillm-8-0-0.151618.0 b/runs/Aug01_08-23-09_ale-distillm-8-0-0/events.out.tfevents.1722497020.ale-distillm-8-0-0.151618.0 new file mode 100644 index 0000000000000000000000000000000000000000..8ea26e8ec4ba5c0b8883f5f468c8620dd6879a90 --- /dev/null +++ b/runs/Aug01_08-23-09_ale-distillm-8-0-0/events.out.tfevents.1722497020.ale-distillm-8-0-0.151618.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:775f30c0c4c1ac6f5300c0c214e81e3b3560c8d75da600a2ce715cb699ef3b17 +size 5180 diff --git a/runs/Aug01_08-43-38_ale-distillm-8-0-0/events.out.tfevents.1722498230.ale-distillm-8-0-0.155112.0 b/runs/Aug01_08-43-38_ale-distillm-8-0-0/events.out.tfevents.1722498230.ale-distillm-8-0-0.155112.0 new file mode 100644 index 0000000000000000000000000000000000000000..02f6cf5cc6214ae60a4ca74fda156d5243a67120 --- /dev/null +++ b/runs/Aug01_08-43-38_ale-distillm-8-0-0/events.out.tfevents.1722498230.ale-distillm-8-0-0.155112.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c46f3350073f6cbb8dca2704e1fb48412e88e9adcb64c41193b3406aa68557a4 +size 5180 diff --git a/runs/Aug01_08-50-20_ale-distillm-8-0-0/events.out.tfevents.1722498632.ale-distillm-8-0-0.155311.0 b/runs/Aug01_08-50-20_ale-distillm-8-0-0/events.out.tfevents.1722498632.ale-distillm-8-0-0.155311.0 new file mode 100644 index 0000000000000000000000000000000000000000..bc84cb4731b9fd8c72853b4fb2f615920b26bd9e --- /dev/null +++ b/runs/Aug01_08-50-20_ale-distillm-8-0-0/events.out.tfevents.1722498632.ale-distillm-8-0-0.155311.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee34231a8d43aff5288de9719c8097ccbf96600eeab75c1881281e7d5964184e +size 5180 diff --git a/runs/Aug01_08-51-49_ale-distillm-8-0-0/events.out.tfevents.1722498721.ale-distillm-8-0-0.155510.0 b/runs/Aug01_08-51-49_ale-distillm-8-0-0/events.out.tfevents.1722498721.ale-distillm-8-0-0.155510.0 new file mode 100644 index 0000000000000000000000000000000000000000..510e7b6199ad7269b3c6f9c07d7b83634f21c397 --- /dev/null +++ b/runs/Aug01_08-51-49_ale-distillm-8-0-0/events.out.tfevents.1722498721.ale-distillm-8-0-0.155510.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bd7d4392525199c21c57688fdb450a139382247d2059f1686302c536532d98d +size 6008 diff --git a/runs/Aug01_08-57-31_ale-distillm-8-0-0/events.out.tfevents.1722499073.ale-distillm-8-0-0.156427.0 b/runs/Aug01_08-57-31_ale-distillm-8-0-0/events.out.tfevents.1722499073.ale-distillm-8-0-0.156427.0 new file mode 100644 index 0000000000000000000000000000000000000000..0851c29b4f28249be56af86ed2625f911d590afd --- /dev/null +++ b/runs/Aug01_08-57-31_ale-distillm-8-0-0/events.out.tfevents.1722499073.ale-distillm-8-0-0.156427.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3421344c3c805aa705d7c21ed8e5b8c8cdcc521987a1b4a8aeac0681aa1fb3e +size 5180 diff --git a/runs/Aug01_10-28-04_ale-distillm-8-0-0/events.out.tfevents.1722504496.ale-distillm-8-0-0.158148.0 b/runs/Aug01_10-28-04_ale-distillm-8-0-0/events.out.tfevents.1722504496.ale-distillm-8-0-0.158148.0 new file mode 100644 index 0000000000000000000000000000000000000000..9e9e3c8932e1431e3d2165221b023d88e1c3395b --- /dev/null +++ b/runs/Aug01_10-28-04_ale-distillm-8-0-0/events.out.tfevents.1722504496.ale-distillm-8-0-0.158148.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:570b9947f2c8485235fa2eb477461dad72c6272e4273b55afceb55afc428b21f +size 5180 diff --git a/runs/Aug01_10-37-23_ale-distillm-8-0-0/events.out.tfevents.1722505055.ale-distillm-8-0-0.158347.0 b/runs/Aug01_10-37-23_ale-distillm-8-0-0/events.out.tfevents.1722505055.ale-distillm-8-0-0.158347.0 new file mode 100644 index 0000000000000000000000000000000000000000..8fb0ae8511c7a3e0349195afa05d2ae353b2f668 --- /dev/null +++ b/runs/Aug01_10-37-23_ale-distillm-8-0-0/events.out.tfevents.1722505055.ale-distillm-8-0-0.158347.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31c7bd0695f11af4ba2ea2b717f9226ac2a0708c4985767a6ed79896cafbcc77 +size 5387 diff --git a/runs/Aug01_10-40-13_ale-distillm-8-0-0/events.out.tfevents.1722505225.ale-distillm-8-0-0.158858.0 b/runs/Aug01_10-40-13_ale-distillm-8-0-0/events.out.tfevents.1722505225.ale-distillm-8-0-0.158858.0 new file mode 100644 index 0000000000000000000000000000000000000000..35f3e8d30e814fb43db108cd242a1f88ee46e8b9 --- /dev/null +++ b/runs/Aug01_10-40-13_ale-distillm-8-0-0/events.out.tfevents.1722505225.ale-distillm-8-0-0.158858.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1c6a83095901a7e40018b745aa8fc6f069f431615dc7e7269d716b27a077a6a +size 7250 diff --git a/runs/Aug01_11-08-21_ale-distillm-8-0-0/events.out.tfevents.1722506913.ale-distillm-8-0-0.165077.0 b/runs/Aug01_11-08-21_ale-distillm-8-0-0/events.out.tfevents.1722506913.ale-distillm-8-0-0.165077.0 new file mode 100644 index 0000000000000000000000000000000000000000..cbf8699e298bf06edef5a6589475b68d32b9b21d --- /dev/null +++ b/runs/Aug01_11-08-21_ale-distillm-8-0-0/events.out.tfevents.1722506913.ale-distillm-8-0-0.165077.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b62e11a461dfd9a2958947d0a733728c4e44cac8042d01b420fe0a27b63024c +size 7250 diff --git a/runs/Aug01_11-18-26_ale-distillm-8-0-0/events.out.tfevents.1722507521.ale-distillm-8-0-0.166956.0 b/runs/Aug01_11-18-26_ale-distillm-8-0-0/events.out.tfevents.1722507521.ale-distillm-8-0-0.166956.0 new file mode 100644 index 0000000000000000000000000000000000000000..a875dae76302db4f6712316a23beb682425f5942 --- /dev/null +++ b/runs/Aug01_11-18-26_ale-distillm-8-0-0/events.out.tfevents.1722507521.ale-distillm-8-0-0.166956.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9851a828a33ffaf5843a42c2d07312f096d21ddbf5e40dc938a35874a4ecdce3 +size 5387 diff --git a/runs/Aug01_11-55-49_ale-distillm-8-0-0/events.out.tfevents.1722509761.ale-distillm-8-0-0.167857.0 b/runs/Aug01_11-55-49_ale-distillm-8-0-0/events.out.tfevents.1722509761.ale-distillm-8-0-0.167857.0 new file mode 100644 index 0000000000000000000000000000000000000000..a85d1cc4fbc1c1cf5e34c0be290c47928e549e72 --- /dev/null +++ b/runs/Aug01_11-55-49_ale-distillm-8-0-0/events.out.tfevents.1722509761.ale-distillm-8-0-0.167857.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:083938771864a047148f66da3ae73f1acad7a45f81760f1f4afb83d99a769beb +size 5801 diff --git a/runs/Aug01_11-59-21_ale-distillm-8-0-0/events.out.tfevents.1722509973.ale-distillm-8-0-0.168664.0 b/runs/Aug01_11-59-21_ale-distillm-8-0-0/events.out.tfevents.1722509973.ale-distillm-8-0-0.168664.0 new file mode 100644 index 0000000000000000000000000000000000000000..1caf3b946fcca1afbd7e0cc36adefc6e6778c14b --- /dev/null +++ b/runs/Aug01_11-59-21_ale-distillm-8-0-0/events.out.tfevents.1722509973.ale-distillm-8-0-0.168664.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc45f5f5e6a02580796a5b351860408570c047e8bbd1225b03ace29a37a83a8c +size 5594 diff --git a/runs/Aug01_12-11-31_ale-distillm-8-0-0/events.out.tfevents.1722510703.ale-distillm-8-0-0.169234.0 b/runs/Aug01_12-11-31_ale-distillm-8-0-0/events.out.tfevents.1722510703.ale-distillm-8-0-0.169234.0 new file mode 100644 index 0000000000000000000000000000000000000000..babb90ccac5df842c1ac645e48ba777758b1074e --- /dev/null +++ b/runs/Aug01_12-11-31_ale-distillm-8-0-0/events.out.tfevents.1722510703.ale-distillm-8-0-0.169234.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c34fe08825a30697bab13b6a4d47be84a730ee4d2f2129d167e8e0a1d0cd1bc9 +size 5387 diff --git a/runs/Aug01_12-15-56_ale-distillm-8-0-0/events.out.tfevents.1722510981.ale-distillm-8-0-0.169880.0 b/runs/Aug01_12-15-56_ale-distillm-8-0-0/events.out.tfevents.1722510981.ale-distillm-8-0-0.169880.0 new file mode 100644 index 0000000000000000000000000000000000000000..f371e4a5bc6896d356b8eebbbd84650c865a9f8b --- /dev/null +++ b/runs/Aug01_12-15-56_ale-distillm-8-0-0/events.out.tfevents.1722510981.ale-distillm-8-0-0.169880.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9011fe468442bbd47dbf08db5d4a2444bff31c1f04a393521acb2d183dd9a80b +size 5180 diff --git a/runs/Aug01_12-45-33_ale-distillm-8-0-0/events.out.tfevents.1722512949.ale-distillm-8-0-0.172568.0 b/runs/Aug01_12-45-33_ale-distillm-8-0-0/events.out.tfevents.1722512949.ale-distillm-8-0-0.172568.0 new file mode 100644 index 0000000000000000000000000000000000000000..73a65d6f97d8c2c9ed7487edcf68729f65f21d6a --- /dev/null +++ b/runs/Aug01_12-45-33_ale-distillm-8-0-0/events.out.tfevents.1722512949.ale-distillm-8-0-0.172568.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d463a3a08a22d2214bb763be5fc06aa26c1c9783755e2a9aab6bab633f35b182 +size 5180 diff --git a/runs/Aug01_12-57-42_ale-distillm-8-0-0/events.out.tfevents.1722513474.ale-distillm-8-0-0.174648.0 b/runs/Aug01_12-57-42_ale-distillm-8-0-0/events.out.tfevents.1722513474.ale-distillm-8-0-0.174648.0 new file mode 100644 index 0000000000000000000000000000000000000000..32eef7a1d42399c8cbca29d5fa987e8e5ecbfe14 --- /dev/null +++ b/runs/Aug01_12-57-42_ale-distillm-8-0-0/events.out.tfevents.1722513474.ale-distillm-8-0-0.174648.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b53ffdee6aec5ac4f86c1cab71d5a4d7983b6cac2676ac0c4e8a045aeb890d3 +size 7457 diff --git a/runs/Aug01_13-06-02_ale-distillm-8-0-0/events.out.tfevents.1722513974.ale-distillm-8-0-0.176733.0 b/runs/Aug01_13-06-02_ale-distillm-8-0-0/events.out.tfevents.1722513974.ale-distillm-8-0-0.176733.0 new file mode 100644 index 0000000000000000000000000000000000000000..e61cc5b9356b603188c3e4610f59885fd2113346 --- /dev/null +++ b/runs/Aug01_13-06-02_ale-distillm-8-0-0/events.out.tfevents.1722513974.ale-distillm-8-0-0.176733.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdab057e1f0f37f5775610a57e5d3f99c5ee7b6a850c490006b70f872d77b7d6 +size 8906 diff --git a/runs/Aug01_13-19-22_ale-distillm-8-0-0/events.out.tfevents.1722514774.ale-distillm-8-0-0.179826.0 b/runs/Aug01_13-19-22_ale-distillm-8-0-0/events.out.tfevents.1722514774.ale-distillm-8-0-0.179826.0 new file mode 100644 index 0000000000000000000000000000000000000000..1ee9e4f0baf558e51aab55e0aaf1f87f6306c6f8 --- /dev/null +++ b/runs/Aug01_13-19-22_ale-distillm-8-0-0/events.out.tfevents.1722514774.ale-distillm-8-0-0.179826.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa29afbf9ae47268905683a16d6ea3bfb42326af1404807e836285cbee93307a +size 17099 diff --git a/runs/Aug01_14-32-12_ale-distillm-8-0-0/events.out.tfevents.1722519239.ale-distillm-8-0-0.189662.0 b/runs/Aug01_14-32-12_ale-distillm-8-0-0/events.out.tfevents.1722519239.ale-distillm-8-0-0.189662.0 new file mode 100644 index 0000000000000000000000000000000000000000..ac06832e3baa91348f628a32d04b30286d693764 --- /dev/null +++ b/runs/Aug01_14-32-12_ale-distillm-8-0-0/events.out.tfevents.1722519239.ale-distillm-8-0-0.189662.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98e10f45e6c730a87860d507ca9c5428a28577a2b0cd5f89966f90dc889097a3 +size 5180 diff --git a/runs/Aug01_15-40-54_ale-distillm-8-0-0/events.out.tfevents.1722523316.ale-distillm-8-0-0.190122.0 b/runs/Aug01_15-40-54_ale-distillm-8-0-0/events.out.tfevents.1722523316.ale-distillm-8-0-0.190122.0 new file mode 100644 index 0000000000000000000000000000000000000000..d09da16ece086ba40decc617c35aa345f34b4e4d --- /dev/null +++ b/runs/Aug01_15-40-54_ale-distillm-8-0-0/events.out.tfevents.1722523316.ale-distillm-8-0-0.190122.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d90361df317feaa69353a091006c324614ec2a802efbb818693406141b80d15 +size 5180 diff --git a/runs/Aug01_16-19-27_ale-distillm-8-0-0/events.out.tfevents.1722525579.ale-distillm-8-0-0.190777.0 b/runs/Aug01_16-19-27_ale-distillm-8-0-0/events.out.tfevents.1722525579.ale-distillm-8-0-0.190777.0 new file mode 100644 index 0000000000000000000000000000000000000000..f2c9856ba0e8472fd65bb39298883e2bca80303e --- /dev/null +++ b/runs/Aug01_16-19-27_ale-distillm-8-0-0/events.out.tfevents.1722525579.ale-distillm-8-0-0.190777.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:959e6f64280ee78c16e4516a0f932366000c51266e277adb641a24edf0a24542 +size 17099 diff --git a/runs/Jul24_15-15-56_ale-distillm-8-0-0/events.out.tfevents.1721830680.ale-distillm-8-0-0.1471.0 b/runs/Jul24_15-15-56_ale-distillm-8-0-0/events.out.tfevents.1721830680.ale-distillm-8-0-0.1471.0 new file mode 100644 index 0000000000000000000000000000000000000000..6c14171d000ca9487f786fc3efa52b6fc92087b3 --- /dev/null +++ b/runs/Jul24_15-15-56_ale-distillm-8-0-0/events.out.tfevents.1721830680.ale-distillm-8-0-0.1471.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:966f2e545e0ed76fbdc9b9d47c1b3bbde6432052c7f7e552eec8c1cb5dcb14fc +size 5180 diff --git a/runs/Jul24_15-22-07_ale-distillm-8-0-0/events.out.tfevents.1721830970.ale-distillm-8-0-0.2114.0 b/runs/Jul24_15-22-07_ale-distillm-8-0-0/events.out.tfevents.1721830970.ale-distillm-8-0-0.2114.0 new file mode 100644 index 0000000000000000000000000000000000000000..30c543ca2a770af4a9079e7ed093179bf81cdb15 --- /dev/null +++ b/runs/Jul24_15-22-07_ale-distillm-8-0-0/events.out.tfevents.1721830970.ale-distillm-8-0-0.2114.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54f666a2711fcedc5dd27a57a32290ed14ab4229d2d6ba70c357fc027fabe86c +size 5180 diff --git a/runs/Jul24_15-28-05_ale-distillm-8-0-0/events.out.tfevents.1721831314.ale-distillm-8-0-0.3143.0 b/runs/Jul24_15-28-05_ale-distillm-8-0-0/events.out.tfevents.1721831314.ale-distillm-8-0-0.3143.0 new file mode 100644 index 0000000000000000000000000000000000000000..1811d4aad4fa64a10d7f22db1361d3b866272654 --- /dev/null +++ b/runs/Jul24_15-28-05_ale-distillm-8-0-0/events.out.tfevents.1721831314.ale-distillm-8-0-0.3143.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fd46aaee1504e368e5490dd9e61567fb04d26016e4e53dce0fc78be2dc9a218 +size 5180 diff --git a/runs/Jul25_09-25-47_ale-distillm-8-0-0/events.out.tfevents.1721895984.ale-distillm-8-0-0.16592.0 b/runs/Jul25_09-25-47_ale-distillm-8-0-0/events.out.tfevents.1721895984.ale-distillm-8-0-0.16592.0 new file mode 100644 index 0000000000000000000000000000000000000000..8200488eb63d2645ab5b038472c35e040dd413fe --- /dev/null +++ b/runs/Jul25_09-25-47_ale-distillm-8-0-0/events.out.tfevents.1721895984.ale-distillm-8-0-0.16592.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:900c848a3363f4cdc17cd2a28a0c9be4595a47cfbafefffe4f02d718e1d4645f +size 5180 diff --git a/runs/Jul25_14-53-34_ale-distillm-8-0-0/events.out.tfevents.1721915707.ale-distillm-8-0-0.28984.0 b/runs/Jul25_14-53-34_ale-distillm-8-0-0/events.out.tfevents.1721915707.ale-distillm-8-0-0.28984.0 new file mode 100644 index 0000000000000000000000000000000000000000..cc40dc195c9822b6b3dc4fcc2e94e27f8ee4f30d --- /dev/null +++ b/runs/Jul25_14-53-34_ale-distillm-8-0-0/events.out.tfevents.1721915707.ale-distillm-8-0-0.28984.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27ce9fefcf320cef6bf74593a9ad816a27d309e3657470304c7f6759956c4c68 +size 5594 diff --git a/runs/Jul25_16-32-18_ale-distillm-8-0-0/events.out.tfevents.1721921563.ale-distillm-8-0-0.29850.0 b/runs/Jul25_16-32-18_ale-distillm-8-0-0/events.out.tfevents.1721921563.ale-distillm-8-0-0.29850.0 new file mode 100644 index 0000000000000000000000000000000000000000..a92fb44ceb3ad1c5af9b5dfdda7cb64c7cab0644 --- /dev/null +++ b/runs/Jul25_16-32-18_ale-distillm-8-0-0/events.out.tfevents.1721921563.ale-distillm-8-0-0.29850.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9529257f55422b3a834eb32912332de4bd7b6f13d9f93fffa5a7d4c0f0063a18 +size 5180 diff --git a/runs/Jul25_17-32-25_ale-distillm-8-0-0/events.out.tfevents.1721925178.ale-distillm-8-0-0.30782.0 b/runs/Jul25_17-32-25_ale-distillm-8-0-0/events.out.tfevents.1721925178.ale-distillm-8-0-0.30782.0 new file mode 100644 index 0000000000000000000000000000000000000000..3811c9fe62699c3a3469d725010747af9984ee70 --- /dev/null +++ b/runs/Jul25_17-32-25_ale-distillm-8-0-0/events.out.tfevents.1721925178.ale-distillm-8-0-0.30782.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a06f62cd84736eadf6aaecc96166d0d4d1bc24b8bfefc70294a76e8d0d3d390 +size 5180 diff --git a/runs/Jul25_17-39-32_ale-distillm-8-0-0/events.out.tfevents.1721925610.ale-distillm-8-0-0.31222.0 b/runs/Jul25_17-39-32_ale-distillm-8-0-0/events.out.tfevents.1721925610.ale-distillm-8-0-0.31222.0 new file mode 100644 index 0000000000000000000000000000000000000000..ff97f7303042604ff6d124d43cc65b71d0198509 --- /dev/null +++ b/runs/Jul25_17-39-32_ale-distillm-8-0-0/events.out.tfevents.1721925610.ale-distillm-8-0-0.31222.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22f8cb7ef8f864ca40c3a2b788d149bd1f299a83b81a6d55e15f09076300a087 +size 5180 diff --git a/runs/Jul25_17-52-25_ale-distillm-8-0-0/events.out.tfevents.1721926378.ale-distillm-8-0-0.32561.0 b/runs/Jul25_17-52-25_ale-distillm-8-0-0/events.out.tfevents.1721926378.ale-distillm-8-0-0.32561.0 new file mode 100644 index 0000000000000000000000000000000000000000..ac0fbfd13f0077ce017e10fdd961c589055028d0 --- /dev/null +++ b/runs/Jul25_17-52-25_ale-distillm-8-0-0/events.out.tfevents.1721926378.ale-distillm-8-0-0.32561.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d21dbb13ce056a50ca4ea17be5f72c25a4ad1d8aedab350244ca1e559f8c627 +size 5180 diff --git a/runs/Jul25_18-09-33_ale-distillm-8-0-0/events.out.tfevents.1721927396.ale-distillm-8-0-0.34375.0 b/runs/Jul25_18-09-33_ale-distillm-8-0-0/events.out.tfevents.1721927396.ale-distillm-8-0-0.34375.0 new file mode 100644 index 0000000000000000000000000000000000000000..deb975267c6081e3d5e1c742d1bcace454302a00 --- /dev/null +++ b/runs/Jul25_18-09-33_ale-distillm-8-0-0/events.out.tfevents.1721927396.ale-distillm-8-0-0.34375.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82da5f2f40568f733299257ccd2d61a8dd008c8d1599060d7abf3fb9c5f7a67f +size 5180 diff --git a/runs/Jul26_09-07-00_ale-distillm-8-0-0/events.out.tfevents.1721981241.ale-distillm-8-0-0.37979.0 b/runs/Jul26_09-07-00_ale-distillm-8-0-0/events.out.tfevents.1721981241.ale-distillm-8-0-0.37979.0 new file mode 100644 index 0000000000000000000000000000000000000000..1e261f521b36b8e1301f9709e8f29deefd7dd286 --- /dev/null +++ b/runs/Jul26_09-07-00_ale-distillm-8-0-0/events.out.tfevents.1721981241.ale-distillm-8-0-0.37979.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade46f78113518d29f16a04bd8c0af98e65dea3042522869b9b270ffa2353d18 +size 5180 diff --git a/runs/Jul26_09-46-13_ale-distillm-8-0-0/events.out.tfevents.1721983603.ale-distillm-8-0-0.41839.0 b/runs/Jul26_09-46-13_ale-distillm-8-0-0/events.out.tfevents.1721983603.ale-distillm-8-0-0.41839.0 new file mode 100644 index 0000000000000000000000000000000000000000..897d240741263a95dd83ee6bb1a730f883cb8787 --- /dev/null +++ b/runs/Jul26_09-46-13_ale-distillm-8-0-0/events.out.tfevents.1721983603.ale-distillm-8-0-0.41839.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4003a0c8d70f0ee539df81cadb7eb4a9031d77d23a7c191c84843244d1986d1a +size 5180 diff --git a/runs/Jul26_09-48-57_ale-distillm-8-0-0/events.out.tfevents.1721983776.ale-distillm-8-0-0.42185.0 b/runs/Jul26_09-48-57_ale-distillm-8-0-0/events.out.tfevents.1721983776.ale-distillm-8-0-0.42185.0 new file mode 100644 index 0000000000000000000000000000000000000000..6a46da98cce7f08efdf8029996f8eca17c07f53e --- /dev/null +++ b/runs/Jul26_09-48-57_ale-distillm-8-0-0/events.out.tfevents.1721983776.ale-distillm-8-0-0.42185.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:499e34cef3c0389706938d6bbe091aa6cad4e6257cd8c341b2646a2ab5c4dc7d +size 5180 diff --git a/runs/Jul26_09-55-11_ale-distillm-8-0-0/events.out.tfevents.1721984142.ale-distillm-8-0-0.42830.0 b/runs/Jul26_09-55-11_ale-distillm-8-0-0/events.out.tfevents.1721984142.ale-distillm-8-0-0.42830.0 new file mode 100644 index 0000000000000000000000000000000000000000..41bf43a3c54b5ce3b97e3809e3d9482d12e2a1b4 --- /dev/null +++ b/runs/Jul26_09-55-11_ale-distillm-8-0-0/events.out.tfevents.1721984142.ale-distillm-8-0-0.42830.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a293268a3a33410f3dc98ca69f6f2837c08cfa95b72dfd136fda61b6f833b3d2 +size 5180 diff --git a/runs/Jul26_10-11-22_ale-distillm-8-0-0/events.out.tfevents.1721985387.ale-distillm-8-0-0.44021.0 b/runs/Jul26_10-11-22_ale-distillm-8-0-0/events.out.tfevents.1721985387.ale-distillm-8-0-0.44021.0 new file mode 100644 index 0000000000000000000000000000000000000000..7925996df23d99cd5c1ae1dbec7b8b6a4238e258 --- /dev/null +++ b/runs/Jul26_10-11-22_ale-distillm-8-0-0/events.out.tfevents.1721985387.ale-distillm-8-0-0.44021.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c4772baeb03eedcf4f36814f85fb5b4c656782cc5cb4aa901fe8533df400118 +size 5180 diff --git a/runs/Jul26_10-30-54_ale-distillm-8-0-0/events.out.tfevents.1721986269.ale-distillm-8-0-0.46148.0 b/runs/Jul26_10-30-54_ale-distillm-8-0-0/events.out.tfevents.1721986269.ale-distillm-8-0-0.46148.0 new file mode 100644 index 0000000000000000000000000000000000000000..a40c6b3a37e66abfba260a697b0d3b65482210a0 --- /dev/null +++ b/runs/Jul26_10-30-54_ale-distillm-8-0-0/events.out.tfevents.1721986269.ale-distillm-8-0-0.46148.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78df162956e38c5d80368b7b263d2315389eb2f19f9dd29967beb90b7a6623ec +size 5180 diff --git a/runs/Jul26_10-34-46_ale-distillm-8-0-0/events.out.tfevents.1721986500.ale-distillm-8-0-0.46693.0 b/runs/Jul26_10-34-46_ale-distillm-8-0-0/events.out.tfevents.1721986500.ale-distillm-8-0-0.46693.0 new file mode 100644 index 0000000000000000000000000000000000000000..323a776ebc376823070ae21b54929829270b94b9 --- /dev/null +++ b/runs/Jul26_10-34-46_ale-distillm-8-0-0/events.out.tfevents.1721986500.ale-distillm-8-0-0.46693.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce7b815838cb3359c3116a7ce9faa68e37d1d0944fd2806b6b01d0149d0c9497 +size 5180 diff --git a/runs/Jul26_10-50-00_ale-distillm-8-0-0/events.out.tfevents.1721987414.ale-distillm-8-0-0.47867.0 b/runs/Jul26_10-50-00_ale-distillm-8-0-0/events.out.tfevents.1721987414.ale-distillm-8-0-0.47867.0 new file mode 100644 index 0000000000000000000000000000000000000000..46385f10ba43ab4e0889ba189de3b47d8b6d85ac --- /dev/null +++ b/runs/Jul26_10-50-00_ale-distillm-8-0-0/events.out.tfevents.1721987414.ale-distillm-8-0-0.47867.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a8667472f2787fe03b5643ff93967c42377088f8243b356f7fa18e621fd054 +size 7043 diff --git a/runs/Jul31_17-04-25_ale-distillm-8-0-0/events.out.tfevents.1722441943.ale-distillm-8-0-0.139252.0 b/runs/Jul31_17-04-25_ale-distillm-8-0-0/events.out.tfevents.1722441943.ale-distillm-8-0-0.139252.0 new file mode 100644 index 0000000000000000000000000000000000000000..1025887f56d22a2a96f47916d6e19414119d35f0 --- /dev/null +++ b/runs/Jul31_17-04-25_ale-distillm-8-0-0/events.out.tfevents.1722441943.ale-distillm-8-0-0.139252.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27a526ad1b8807a242af99b41aad18c43856a96e8c17a1d99f421503ad7dc216 +size 5180 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..fa665d525e921bb8f3a1e7d18c7ed6fd16348fd1 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..eaa6221f1e83e2d3737feafd66674c0768929e58 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:322a5f52ab5cab196761ab397a022d6fa3a2e1418585e532bb6efb2fedd2ae94 +size 17477501 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3208450a3248ebed7a0266609fcf02b3110d3573 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,70 @@ +{ + "add_bos_token": false, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "106": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>" + ], + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'user' or messages[0]['role'] == 'system' %}{{ bos_token }}{% endif %}{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% elif messages[-1]['role'] == 'assistant' %}{{ eos_token }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": null, + "model_max_length": 2048, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f2a29d93e7b495521e9157f922770e8b7547f302 --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 0.9981515711645101, + "total_flos": 5.260333472022528e+16, + "train_loss": 3746.6009367766205, + "train_runtime": 2500.0335, + "train_samples": 6750, + "train_samples_per_second": 3.461, + "train_steps_per_second": 0.108 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..110537c6a044c4b71e5801526444b90996f8f054 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,428 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9981515711645101, + "eval_steps": 500, + "global_step": 270, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.018484288354898338, + "grad_norm": 190464.0, + "learning_rate": 3.7037037037037037e-06, + "loss": 10605.0938, + "step": 5 + }, + { + "epoch": 0.036968576709796676, + "grad_norm": 89088.0, + "learning_rate": 7.4074074074074075e-06, + "loss": 9817.9641, + "step": 10 + }, + { + "epoch": 0.05545286506469501, + "grad_norm": 65280.0, + "learning_rate": 1.1111111111111113e-05, + "loss": 8481.5859, + "step": 15 + }, + { + "epoch": 0.07393715341959335, + "grad_norm": 35072.0, + "learning_rate": 1.4814814814814815e-05, + "loss": 7176.0742, + "step": 20 + }, + { + "epoch": 0.09242144177449169, + "grad_norm": 32640.0, + "learning_rate": 1.851851851851852e-05, + "loss": 5997.027, + "step": 25 + }, + { + "epoch": 0.11090573012939002, + "grad_norm": 46336.0, + "learning_rate": 1.9992479525042305e-05, + "loss": 5191.1363, + "step": 30 + }, + { + "epoch": 0.12939001848428835, + "grad_norm": 34048.0, + "learning_rate": 1.9946562024066018e-05, + "loss": 4804.0578, + "step": 35 + }, + { + "epoch": 0.1478743068391867, + "grad_norm": 9280.0, + "learning_rate": 1.9859096633447965e-05, + "loss": 4586.0977, + "step": 40 + }, + { + "epoch": 0.16635859519408502, + "grad_norm": 43520.0, + "learning_rate": 1.973044870579824e-05, + "loss": 4305.1379, + "step": 45 + }, + { + "epoch": 0.18484288354898337, + "grad_norm": 19072.0, + "learning_rate": 1.95611556177388e-05, + "loss": 4172.2984, + "step": 50 + }, + { + "epoch": 0.2033271719038817, + "grad_norm": 23808.0, + "learning_rate": 1.93519245252219e-05, + "loss": 3893.0977, + "step": 55 + }, + { + "epoch": 0.22181146025878004, + "grad_norm": 6752.0, + "learning_rate": 1.9103629409661468e-05, + "loss": 3707.5781, + "step": 60 + }, + { + "epoch": 0.24029574861367836, + "grad_norm": 35584.0, + "learning_rate": 1.881730742721608e-05, + "loss": 3645.1086, + "step": 65 + }, + { + "epoch": 0.2587800369685767, + "grad_norm": 66560.0, + "learning_rate": 1.8494154576472976e-05, + "loss": 3538.7551, + "step": 70 + }, + { + "epoch": 0.27726432532347506, + "grad_norm": 35328.0, + "learning_rate": 1.8135520702629677e-05, + "loss": 3496.9391, + "step": 75 + }, + { + "epoch": 0.2957486136783734, + "grad_norm": 33536.0, + "learning_rate": 1.7742903859041324e-05, + "loss": 3434.5762, + "step": 80 + }, + { + "epoch": 0.3142329020332717, + "grad_norm": 22272.0, + "learning_rate": 1.7317944049686125e-05, + "loss": 3348.5082, + "step": 85 + }, + { + "epoch": 0.33271719038817005, + "grad_norm": 9408.0, + "learning_rate": 1.686241637868734e-05, + "loss": 3297.6758, + "step": 90 + }, + { + "epoch": 0.3512014787430684, + "grad_norm": 12864.0, + "learning_rate": 1.637822363550706e-05, + "loss": 3260.8111, + "step": 95 + }, + { + "epoch": 0.36968576709796674, + "grad_norm": 7936.0, + "learning_rate": 1.586738834678418e-05, + "loss": 3206.3494, + "step": 100 + }, + { + "epoch": 0.38817005545286504, + "grad_norm": 19200.0, + "learning_rate": 1.5332044328016916e-05, + "loss": 3263.5463, + "step": 105 + }, + { + "epoch": 0.4066543438077634, + "grad_norm": 25984.0, + "learning_rate": 1.4774427770379492e-05, + "loss": 3248.7541, + "step": 110 + }, + { + "epoch": 0.42513863216266173, + "grad_norm": 33536.0, + "learning_rate": 1.4196867899904292e-05, + "loss": 3195.2939, + "step": 115 + }, + { + "epoch": 0.4436229205175601, + "grad_norm": 34560.0, + "learning_rate": 1.3601777248047105e-05, + "loss": 3080.999, + "step": 120 + }, + { + "epoch": 0.46210720887245843, + "grad_norm": 24320.0, + "learning_rate": 1.2991641574276419e-05, + "loss": 3109.1754, + "step": 125 + }, + { + "epoch": 0.4805914972273567, + "grad_norm": 15488.0, + "learning_rate": 1.2369009482781191e-05, + "loss": 3104.5951, + "step": 130 + }, + { + "epoch": 0.49907578558225507, + "grad_norm": 23168.0, + "learning_rate": 1.1736481776669307e-05, + "loss": 3047.1744, + "step": 135 + }, + { + "epoch": 0.5175600739371534, + "grad_norm": 41728.0, + "learning_rate": 1.1096700594125318e-05, + "loss": 3073.7598, + "step": 140 + }, + { + "epoch": 0.5360443622920518, + "grad_norm": 19200.0, + "learning_rate": 1.0452338371907065e-05, + "loss": 2990.3996, + "step": 145 + }, + { + "epoch": 0.5545286506469501, + "grad_norm": 35584.0, + "learning_rate": 9.806086682281759e-06, + "loss": 3001.843, + "step": 150 + }, + { + "epoch": 0.5730129390018485, + "grad_norm": 19072.0, + "learning_rate": 9.160644990030932e-06, + "loss": 3061.5664, + "step": 155 + }, + { + "epoch": 0.5914972273567468, + "grad_norm": 32000.0, + "learning_rate": 8.518709376487515e-06, + "loss": 3027.4182, + "step": 160 + }, + { + "epoch": 0.609981515711645, + "grad_norm": 16640.0, + "learning_rate": 7.882961277705897e-06, + "loss": 3005.9816, + "step": 165 + }, + { + "epoch": 0.6284658040665434, + "grad_norm": 24448.0, + "learning_rate": 7.256056283806987e-06, + "loss": 3036.1979, + "step": 170 + }, + { + "epoch": 0.6469500924214417, + "grad_norm": 23424.0, + "learning_rate": 6.640613046284581e-06, + "loss": 3033.5047, + "step": 175 + }, + { + "epoch": 0.6654343807763401, + "grad_norm": 12288.0, + "learning_rate": 6.039202339608432e-06, + "loss": 2887.5133, + "step": 180 + }, + { + "epoch": 0.6839186691312384, + "grad_norm": 14400.0, + "learning_rate": 5.454336322814995e-06, + "loss": 2917.3609, + "step": 185 + }, + { + "epoch": 0.7024029574861368, + "grad_norm": 19968.0, + "learning_rate": 4.888458045941269e-06, + "loss": 2940.7945, + "step": 190 + }, + { + "epoch": 0.7208872458410351, + "grad_norm": 7488.0, + "learning_rate": 4.343931245134616e-06, + "loss": 2938.1154, + "step": 195 + }, + { + "epoch": 0.7393715341959335, + "grad_norm": 5664.0, + "learning_rate": 3.823030469065431e-06, + "loss": 2921.0854, + "step": 200 + }, + { + "epoch": 0.7578558225508318, + "grad_norm": 15104.0, + "learning_rate": 3.3279315778858034e-06, + "loss": 3014.4, + "step": 205 + }, + { + "epoch": 0.7763401109057301, + "grad_norm": 24832.0, + "learning_rate": 2.8607026544210115e-06, + "loss": 2952.8635, + "step": 210 + }, + { + "epoch": 0.7948243992606284, + "grad_norm": 26624.0, + "learning_rate": 2.423295365558821e-06, + "loss": 2933.4004, + "step": 215 + }, + { + "epoch": 0.8133086876155268, + "grad_norm": 9792.0, + "learning_rate": 2.01753680992107e-06, + "loss": 3003.6742, + "step": 220 + }, + { + "epoch": 0.8317929759704251, + "grad_norm": 12032.0, + "learning_rate": 1.6451218858706374e-06, + "loss": 2939.7191, + "step": 225 + }, + { + "epoch": 0.8502772643253235, + "grad_norm": 26752.0, + "learning_rate": 1.307606211733522e-06, + "loss": 2912.9416, + "step": 230 + }, + { + "epoch": 0.8687615526802218, + "grad_norm": 14272.0, + "learning_rate": 1.0063996278090704e-06, + "loss": 2914.6553, + "step": 235 + }, + { + "epoch": 0.8872458410351202, + "grad_norm": 8704.0, + "learning_rate": 7.427603073110967e-07, + "loss": 3010.8492, + "step": 240 + }, + { + "epoch": 0.9057301293900185, + "grad_norm": 4960.0, + "learning_rate": 5.177895008392353e-07, + "loss": 2934.9787, + "step": 245 + }, + { + "epoch": 0.9242144177449169, + "grad_norm": 31616.0, + "learning_rate": 3.3242693633337986e-07, + "loss": 3034.4797, + "step": 250 + }, + { + "epoch": 0.9426987060998152, + "grad_norm": 6656.0, + "learning_rate": 1.874468937261531e-07, + "loss": 2923.3105, + "step": 255 + }, + { + "epoch": 0.9611829944547134, + "grad_norm": 9344.0, + "learning_rate": 8.345497068998897e-08, + "loss": 2966.9092, + "step": 260 + }, + { + "epoch": 0.9796672828096118, + "grad_norm": 6240.0, + "learning_rate": 2.088555298867978e-08, + "loss": 2945.0246, + "step": 265 + }, + { + "epoch": 0.9981515711645101, + "grad_norm": 19456.0, + "learning_rate": 0.0, + "loss": 2978.2934, + "step": 270 + }, + { + "epoch": 0.9981515711645101, + "eval_loss": 2957.430908203125, + "eval_runtime": 375.38, + "eval_samples_per_second": 2.587, + "eval_steps_per_second": 0.325, + "step": 270 + }, + { + "epoch": 0.9981515711645101, + "step": 270, + "total_flos": 5.260333472022528e+16, + "train_loss": 3746.6009367766205, + "train_runtime": 2500.0335, + "train_samples_per_second": 3.461, + "train_steps_per_second": 0.108 + } + ], + "logging_steps": 5, + "max_steps": 270, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.260333472022528e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4f55fc8d74f5efd6705e607ecfb4330a325bc46b --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a88849e77f99bae98597a9ae316882630d4dd9dbd06b89258d46a48aeba9e018 +size 5368