diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..e8a17591b24538f7ef40db19f3b7b4cfded1101c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-600/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-700/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-792/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e55e0f24da737d09f58d064b3aff74ea31324a89 --- /dev/null +++ b/README.md @@ -0,0 +1,61 @@ +--- +base_model: unsloth/gemma-2-2b-it +library_name: peft +license: other +tags: +- llama-factory +- lora +- generated_from_trainer +model-index: +- name: 4k_train_2024-10-17-07-48-45 + results: [] +--- + + + +# 4k_train_2024-10-17-07-48-45 + +This model is a fine-tuned version of [unsloth/gemma-2-2b-it](https://huggingface.co./unsloth/gemma-2-2b-it) on the identity dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 2 +- gradient_accumulation_steps: 8 +- total_train_batch_size: 32 +- total_eval_batch_size: 16 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: cosine +- num_epochs: 6.0 + +### Training results + + + +### Framework versions + +- PEFT 0.12.0 +- Transformers 4.45.0 +- Pytorch 2.3.1+cu121 +- Datasets 2.21.0 +- Tokenizers 0.20.1 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..84f5e74794795afa599846c26565a6f523155345 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/gemma-2-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "o_proj", + "down_proj", + "up_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7df8a3d3ce234cbf35dc2645f7a28936757043ae --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d4cae9c229b3fadbf4be0636f819d244c0aa2cded89cf97568dfc8ee4052cbd +size 41581360 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..dd477c643160dd09539290d5310457e09cccfb4c --- /dev/null +++ b/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 5.971724787935909, + "num_input_tokens_seen": 8063376, + "total_flos": 9.844904305885184e+16, + "train_loss": 2.431757736085641, + "train_runtime": 2652.6299, + "train_samples_per_second": 9.6, + "train_steps_per_second": 0.299 +} \ No newline at end of file diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9f8f0d832be44b45f0bbeda414aed6d1440221e7 --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/gemma-2-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..84f5e74794795afa599846c26565a6f523155345 --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/gemma-2-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "o_proj", + "down_proj", + "up_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4016a355b74c9fa4580f948f0460ad7dcc16aa0 --- /dev/null +++ b/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23c46731de00ad39baf6709b85512a8bbfcf18f18306c2b390d6197277ecb7aa +size 41581360 diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..be61f2ee1bb37f36258ae02aa50da9d48e95125b --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bc6d88c9e93014545dd1cd086d3416ebdb3cb9ddc156ae4a319f328b6450de3 +size 83372758 diff --git a/checkpoint-100/rng_state_0.pth b/checkpoint-100/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..b24ba5257472a7c82c4d4247a4c0210ee74f9e61 --- /dev/null +++ b/checkpoint-100/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8d6a959372d5e0c2ea025dd26c9d0ad2046fce19352056cae8074dcbd0a6fd4 +size 14512 diff --git a/checkpoint-100/rng_state_1.pth b/checkpoint-100/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..9350a8206512bf8b857f4064425716468c2b7465 --- /dev/null +++ b/checkpoint-100/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f68a37892a1b445d21bb35cc10bf7a058a6f9ec8c363f5ed156ff4f49d90fb6 +size 14512 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a53ef2174c184393d666a31d31361036b4e0ed9a --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:007585e9be6fcea10724fc5c4f995ce0d7c1a0cb64ea3e579daa75bb93d29802 +size 1064 diff --git a/checkpoint-100/special_tokens_map.json b/checkpoint-100/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-100/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-100/tokenizer.json b/checkpoint-100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..a4a305d1de4d8f47c0252b4d7fe65a10dd8e2c22 --- /dev/null +++ b/checkpoint-100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7eee611703c5ce5d1eee32d9cdcfe465647b8aff0c1dfb3bed7ad7dbb05060 +size 34362873 diff --git a/checkpoint-100/tokenizer.model b/checkpoint-100/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/checkpoint-100/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..97f33b16e8481f3849fb6d02d1656628e9e22c91 --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1,2015 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..10c2ddadac37eaa936e804989e0919a017cbacd2 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,193 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7540056550424128, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03770028275212064, + "grad_norm": 3.2918148040771484, + "learning_rate": 4.9995083170283816e-05, + "loss": 4.3742, + "num_input_tokens_seen": 50544, + "step": 5 + }, + { + "epoch": 0.07540056550424128, + "grad_norm": 3.0331127643585205, + "learning_rate": 4.998033461515242e-05, + "loss": 3.8434, + "num_input_tokens_seen": 104080, + "step": 10 + }, + { + "epoch": 0.11310084825636192, + "grad_norm": 3.322950601577759, + "learning_rate": 4.9955760135896534e-05, + "loss": 3.6027, + "num_input_tokens_seen": 155776, + "step": 15 + }, + { + "epoch": 0.15080113100848255, + "grad_norm": 2.7705531120300293, + "learning_rate": 4.992136939879856e-05, + "loss": 3.4102, + "num_input_tokens_seen": 203184, + "step": 20 + }, + { + "epoch": 0.1885014137606032, + "grad_norm": 3.4572091102600098, + "learning_rate": 4.9877175931330346e-05, + "loss": 3.153, + "num_input_tokens_seen": 255744, + "step": 25 + }, + { + "epoch": 0.22620169651272384, + "grad_norm": 2.0412893295288086, + "learning_rate": 4.982319711683221e-05, + "loss": 3.0949, + "num_input_tokens_seen": 307808, + "step": 30 + }, + { + "epoch": 0.2639019792648445, + "grad_norm": 2.0765998363494873, + "learning_rate": 4.975945418767529e-05, + "loss": 3.0392, + "num_input_tokens_seen": 358192, + "step": 35 + }, + { + "epoch": 0.3016022620169651, + "grad_norm": 2.2433698177337646, + "learning_rate": 4.968597221690986e-05, + "loss": 3.0685, + "num_input_tokens_seen": 408832, + "step": 40 + }, + { + "epoch": 0.3393025447690858, + "grad_norm": 2.7639973163604736, + "learning_rate": 4.96027801084029e-05, + "loss": 3.0385, + "num_input_tokens_seen": 458368, + "step": 45 + }, + { + "epoch": 0.3770028275212064, + "grad_norm": 2.0680019855499268, + "learning_rate": 4.950991058546893e-05, + "loss": 3.1079, + "num_input_tokens_seen": 512160, + "step": 50 + }, + { + "epoch": 0.41470311027332707, + "grad_norm": 2.181554079055786, + "learning_rate": 4.940740017799833e-05, + "loss": 2.9883, + "num_input_tokens_seen": 562928, + "step": 55 + }, + { + "epoch": 0.4524033930254477, + "grad_norm": 2.0724411010742188, + "learning_rate": 4.929528920808854e-05, + "loss": 2.969, + "num_input_tokens_seen": 613072, + "step": 60 + }, + { + "epoch": 0.49010367577756836, + "grad_norm": 2.5529158115386963, + "learning_rate": 4.917362177418342e-05, + "loss": 3.0019, + "num_input_tokens_seen": 664640, + "step": 65 + }, + { + "epoch": 0.527803958529689, + "grad_norm": 2.3983142375946045, + "learning_rate": 4.904244573372733e-05, + "loss": 2.9984, + "num_input_tokens_seen": 716800, + "step": 70 + }, + { + "epoch": 0.5655042412818096, + "grad_norm": 2.206209182739258, + "learning_rate": 4.8901812684340564e-05, + "loss": 2.9848, + "num_input_tokens_seen": 766528, + "step": 75 + }, + { + "epoch": 0.6032045240339302, + "grad_norm": 2.22698974609375, + "learning_rate": 4.8751777943523634e-05, + "loss": 2.9169, + "num_input_tokens_seen": 817376, + "step": 80 + }, + { + "epoch": 0.6409048067860509, + "grad_norm": 2.4569709300994873, + "learning_rate": 4.8592400526898314e-05, + "loss": 2.8844, + "num_input_tokens_seen": 864688, + "step": 85 + }, + { + "epoch": 0.6786050895381716, + "grad_norm": 2.195237636566162, + "learning_rate": 4.842374312499405e-05, + "loss": 2.9422, + "num_input_tokens_seen": 913216, + "step": 90 + }, + { + "epoch": 0.7163053722902922, + "grad_norm": 2.54585599899292, + "learning_rate": 4.824587207858888e-05, + "loss": 2.8431, + "num_input_tokens_seen": 964144, + "step": 95 + }, + { + "epoch": 0.7540056550424128, + "grad_norm": 2.3841867446899414, + "learning_rate": 4.805885735261454e-05, + "loss": 2.8377, + "num_input_tokens_seen": 1018768, + "step": 100 + } + ], + "logging_steps": 5, + "max_steps": 792, + "num_input_tokens_seen": 1018768, + "num_train_epochs": 6, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.243855378710528e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d325c35219dd8c0eea8279f17307e1d0d8c29b12 --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d28ce688790f3a28f1dba3064866610463622e1d4141255300e136617f5673 +size 5432 diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9f8f0d832be44b45f0bbeda414aed6d1440221e7 --- /dev/null +++ b/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/gemma-2-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..84f5e74794795afa599846c26565a6f523155345 --- /dev/null +++ b/checkpoint-200/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/gemma-2-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "o_proj", + "down_proj", + "up_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-200/adapter_model.safetensors b/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bae457d2285c896c27efea4ddeb8d6e655dca9cf --- /dev/null +++ b/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c5c492739561d3d93898dbaabc7a71197e1b6d4bce77880a084407c8a6be42c +size 41581360 diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa50a0ad5edc080303e98bbb65c962ce5f896f70 --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1681b5a10bb46b6c382c8c9703f85520e4165292c0c81c3a346c7a473cdffe9 +size 83372758 diff --git a/checkpoint-200/rng_state_0.pth b/checkpoint-200/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..d46a9ba7690e83fef48d0cf5f4c34bd9df6cc737 --- /dev/null +++ b/checkpoint-200/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cb795a5cea0baa625c50007a6c9da09c6bbb5c16b560424070384a479e7d8a6 +size 14512 diff --git a/checkpoint-200/rng_state_1.pth b/checkpoint-200/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..23784d04394ff924f7fca03236f62241ce5f4b6e --- /dev/null +++ b/checkpoint-200/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f19604377bd828eb366c68946ad997a4ff4d69beaeea93ee58915135768ec63 +size 14512 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..256a7585cec7c0986317830863d14bfa567b9de0 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa006138eee6fa0ff47a9911fb5414a64744afc4fa750d29d1a25fc9da929eba +size 1064 diff --git a/checkpoint-200/special_tokens_map.json b/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-200/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-200/tokenizer.json b/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..a4a305d1de4d8f47c0252b4d7fe65a10dd8e2c22 --- /dev/null +++ b/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7eee611703c5ce5d1eee32d9cdcfe465647b8aff0c1dfb3bed7ad7dbb05060 +size 34362873 diff --git a/checkpoint-200/tokenizer.model b/checkpoint-200/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/checkpoint-200/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/checkpoint-200/tokenizer_config.json b/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..97f33b16e8481f3849fb6d02d1656628e9e22c91 --- /dev/null +++ b/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2015 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fd024fcb80eaba411d6a8265682cf41d203980b8 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,353 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5080113100848256, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03770028275212064, + "grad_norm": 3.2918148040771484, + "learning_rate": 4.9995083170283816e-05, + "loss": 4.3742, + "num_input_tokens_seen": 50544, + "step": 5 + }, + { + "epoch": 0.07540056550424128, + "grad_norm": 3.0331127643585205, + "learning_rate": 4.998033461515242e-05, + "loss": 3.8434, + "num_input_tokens_seen": 104080, + "step": 10 + }, + { + "epoch": 0.11310084825636192, + "grad_norm": 3.322950601577759, + "learning_rate": 4.9955760135896534e-05, + "loss": 3.6027, + "num_input_tokens_seen": 155776, + "step": 15 + }, + { + "epoch": 0.15080113100848255, + "grad_norm": 2.7705531120300293, + "learning_rate": 4.992136939879856e-05, + "loss": 3.4102, + "num_input_tokens_seen": 203184, + "step": 20 + }, + { + "epoch": 0.1885014137606032, + "grad_norm": 3.4572091102600098, + "learning_rate": 4.9877175931330346e-05, + "loss": 3.153, + "num_input_tokens_seen": 255744, + "step": 25 + }, + { + "epoch": 0.22620169651272384, + "grad_norm": 2.0412893295288086, + "learning_rate": 4.982319711683221e-05, + "loss": 3.0949, + "num_input_tokens_seen": 307808, + "step": 30 + }, + { + "epoch": 0.2639019792648445, + "grad_norm": 2.0765998363494873, + "learning_rate": 4.975945418767529e-05, + "loss": 3.0392, + "num_input_tokens_seen": 358192, + "step": 35 + }, + { + "epoch": 0.3016022620169651, + "grad_norm": 2.2433698177337646, + "learning_rate": 4.968597221690986e-05, + "loss": 3.0685, + "num_input_tokens_seen": 408832, + "step": 40 + }, + { + "epoch": 0.3393025447690858, + "grad_norm": 2.7639973163604736, + "learning_rate": 4.96027801084029e-05, + "loss": 3.0385, + "num_input_tokens_seen": 458368, + "step": 45 + }, + { + "epoch": 0.3770028275212064, + "grad_norm": 2.0680019855499268, + "learning_rate": 4.950991058546893e-05, + "loss": 3.1079, + "num_input_tokens_seen": 512160, + "step": 50 + }, + { + "epoch": 0.41470311027332707, + "grad_norm": 2.181554079055786, + "learning_rate": 4.940740017799833e-05, + "loss": 2.9883, + "num_input_tokens_seen": 562928, + "step": 55 + }, + { + "epoch": 0.4524033930254477, + "grad_norm": 2.0724411010742188, + "learning_rate": 4.929528920808854e-05, + "loss": 2.969, + "num_input_tokens_seen": 613072, + "step": 60 + }, + { + "epoch": 0.49010367577756836, + "grad_norm": 2.5529158115386963, + "learning_rate": 4.917362177418342e-05, + "loss": 3.0019, + "num_input_tokens_seen": 664640, + "step": 65 + }, + { + "epoch": 0.527803958529689, + "grad_norm": 2.3983142375946045, + "learning_rate": 4.904244573372733e-05, + "loss": 2.9984, + "num_input_tokens_seen": 716800, + "step": 70 + }, + { + "epoch": 0.5655042412818096, + "grad_norm": 2.206209182739258, + "learning_rate": 4.8901812684340564e-05, + "loss": 2.9848, + "num_input_tokens_seen": 766528, + "step": 75 + }, + { + "epoch": 0.6032045240339302, + "grad_norm": 2.22698974609375, + "learning_rate": 4.8751777943523634e-05, + "loss": 2.9169, + "num_input_tokens_seen": 817376, + "step": 80 + }, + { + "epoch": 0.6409048067860509, + "grad_norm": 2.4569709300994873, + "learning_rate": 4.8592400526898314e-05, + "loss": 2.8844, + "num_input_tokens_seen": 864688, + "step": 85 + }, + { + "epoch": 0.6786050895381716, + "grad_norm": 2.195237636566162, + "learning_rate": 4.842374312499405e-05, + "loss": 2.9422, + "num_input_tokens_seen": 913216, + "step": 90 + }, + { + "epoch": 0.7163053722902922, + "grad_norm": 2.54585599899292, + "learning_rate": 4.824587207858888e-05, + "loss": 2.8431, + "num_input_tokens_seen": 964144, + "step": 95 + }, + { + "epoch": 0.7540056550424128, + "grad_norm": 2.3841867446899414, + "learning_rate": 4.805885735261454e-05, + "loss": 2.8377, + "num_input_tokens_seen": 1018768, + "step": 100 + }, + { + "epoch": 0.7917059377945335, + "grad_norm": 2.359386920928955, + "learning_rate": 4.786277250863599e-05, + "loss": 2.7843, + "num_input_tokens_seen": 1070592, + "step": 105 + }, + { + "epoch": 0.8294062205466541, + "grad_norm": 2.0952038764953613, + "learning_rate": 4.765769467591625e-05, + "loss": 2.8218, + "num_input_tokens_seen": 1125392, + "step": 110 + }, + { + "epoch": 0.8671065032987747, + "grad_norm": 2.222754955291748, + "learning_rate": 4.744370452107789e-05, + "loss": 2.877, + "num_input_tokens_seen": 1176256, + "step": 115 + }, + { + "epoch": 0.9048067860508954, + "grad_norm": 2.3233537673950195, + "learning_rate": 4.722088621637309e-05, + "loss": 2.9028, + "num_input_tokens_seen": 1225376, + "step": 120 + }, + { + "epoch": 0.942507068803016, + "grad_norm": 2.400575876235962, + "learning_rate": 4.698932740657479e-05, + "loss": 2.762, + "num_input_tokens_seen": 1277024, + "step": 125 + }, + { + "epoch": 0.9802073515551367, + "grad_norm": 2.592116117477417, + "learning_rate": 4.6749119174501975e-05, + "loss": 2.9164, + "num_input_tokens_seen": 1322912, + "step": 130 + }, + { + "epoch": 1.0179076343072573, + "grad_norm": 2.563326120376587, + "learning_rate": 4.6500356005192514e-05, + "loss": 2.736, + "num_input_tokens_seen": 1368624, + "step": 135 + }, + { + "epoch": 1.055607917059378, + "grad_norm": 2.3927371501922607, + "learning_rate": 4.6243135748737864e-05, + "loss": 2.7414, + "num_input_tokens_seen": 1417664, + "step": 140 + }, + { + "epoch": 1.0933081998114986, + "grad_norm": 2.597975492477417, + "learning_rate": 4.597755958179406e-05, + "loss": 2.6961, + "num_input_tokens_seen": 1469120, + "step": 145 + }, + { + "epoch": 1.1310084825636193, + "grad_norm": 2.932581663131714, + "learning_rate": 4.570373196778427e-05, + "loss": 2.6134, + "num_input_tokens_seen": 1521632, + "step": 150 + }, + { + "epoch": 1.1687087653157398, + "grad_norm": 3.28389310836792, + "learning_rate": 4.5421760615808474e-05, + "loss": 2.7548, + "num_input_tokens_seen": 1565296, + "step": 155 + }, + { + "epoch": 1.2064090480678604, + "grad_norm": 2.927279233932495, + "learning_rate": 4.513175643827647e-05, + "loss": 2.702, + "num_input_tokens_seen": 1617088, + "step": 160 + }, + { + "epoch": 1.244109330819981, + "grad_norm": 2.840573310852051, + "learning_rate": 4.4833833507280884e-05, + "loss": 2.6893, + "num_input_tokens_seen": 1663584, + "step": 165 + }, + { + "epoch": 1.2818096135721018, + "grad_norm": 2.6322081089019775, + "learning_rate": 4.4528109009727336e-05, + "loss": 2.5671, + "num_input_tokens_seen": 1713744, + "step": 170 + }, + { + "epoch": 1.3195098963242224, + "grad_norm": 3.1280879974365234, + "learning_rate": 4.42147032012394e-05, + "loss": 2.7682, + "num_input_tokens_seen": 1762768, + "step": 175 + }, + { + "epoch": 1.3572101790763431, + "grad_norm": 3.1128265857696533, + "learning_rate": 4.389373935885646e-05, + "loss": 2.7062, + "num_input_tokens_seen": 1815808, + "step": 180 + }, + { + "epoch": 1.3949104618284638, + "grad_norm": 2.952150344848633, + "learning_rate": 4.356534373254316e-05, + "loss": 2.7092, + "num_input_tokens_seen": 1871040, + "step": 185 + }, + { + "epoch": 1.4326107445805842, + "grad_norm": 3.0146102905273438, + "learning_rate": 4.322964549552943e-05, + "loss": 2.6518, + "num_input_tokens_seen": 1924048, + "step": 190 + }, + { + "epoch": 1.4703110273327051, + "grad_norm": 2.999300956726074, + "learning_rate": 4.288677669350066e-05, + "loss": 2.6592, + "num_input_tokens_seen": 1972720, + "step": 195 + }, + { + "epoch": 1.5080113100848256, + "grad_norm": 3.575253963470459, + "learning_rate": 4.2536872192658036e-05, + "loss": 2.716, + "num_input_tokens_seen": 2022112, + "step": 200 + } + ], + "logging_steps": 5, + "max_steps": 792, + "num_input_tokens_seen": 2022112, + "num_train_epochs": 6, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.4688789632843776e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d325c35219dd8c0eea8279f17307e1d0d8c29b12 --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d28ce688790f3a28f1dba3064866610463622e1d4141255300e136617f5673 +size 5432 diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9f8f0d832be44b45f0bbeda414aed6d1440221e7 --- /dev/null +++ b/checkpoint-300/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/gemma-2-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..84f5e74794795afa599846c26565a6f523155345 --- /dev/null +++ b/checkpoint-300/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/gemma-2-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "o_proj", + "down_proj", + "up_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-300/adapter_model.safetensors b/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..572b3c44664aaf8f77841b5a258e74bd35c80011 --- /dev/null +++ b/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0940084858a3e25e8d34c9d116eed73b709fa8708e9490fe30e4d5201ef9733b +size 41581360 diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..12aec96ca8ac26854493e8a0349d773ff8110cf0 --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e3730e8862211fa04893513d49f062ced4a18fa664933cdd0655f098c0424e +size 83372758 diff --git a/checkpoint-300/rng_state_0.pth b/checkpoint-300/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..9959dfa0d32cf7a8deece6c5a778423e8a10619a --- /dev/null +++ b/checkpoint-300/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34bcae41c589c7e4cab7b2ef263b878c90c2741404a6af11994dc31537b2319b +size 14512 diff --git a/checkpoint-300/rng_state_1.pth b/checkpoint-300/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..b8d192967011a6873fc38efe91068e31262ad585 --- /dev/null +++ b/checkpoint-300/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d05dc84075e8f7dd1191c36f3be9dda12073208e12f7d2cef433c38d6336774a +size 14512 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b81ae62f1b91366bf4f31885e9f5a6cfe9d897b7 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8ec54f6a0aea46713028de7bebd175e905179497901d87a9a354db5ce43f81a +size 1064 diff --git a/checkpoint-300/special_tokens_map.json b/checkpoint-300/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-300/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-300/tokenizer.json b/checkpoint-300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..a4a305d1de4d8f47c0252b4d7fe65a10dd8e2c22 --- /dev/null +++ b/checkpoint-300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7eee611703c5ce5d1eee32d9cdcfe465647b8aff0c1dfb3bed7ad7dbb05060 +size 34362873 diff --git a/checkpoint-300/tokenizer.model b/checkpoint-300/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/checkpoint-300/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/checkpoint-300/tokenizer_config.json b/checkpoint-300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..97f33b16e8481f3849fb6d02d1656628e9e22c91 --- /dev/null +++ b/checkpoint-300/tokenizer_config.json @@ -0,0 +1,2015 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..03d301c87a50c610bf04a169b7a606238189c76e --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,513 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.2620169651272386, + "eval_steps": 500, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03770028275212064, + "grad_norm": 3.2918148040771484, + "learning_rate": 4.9995083170283816e-05, + "loss": 4.3742, + "num_input_tokens_seen": 50544, + "step": 5 + }, + { + "epoch": 0.07540056550424128, + "grad_norm": 3.0331127643585205, + "learning_rate": 4.998033461515242e-05, + "loss": 3.8434, + "num_input_tokens_seen": 104080, + "step": 10 + }, + { + "epoch": 0.11310084825636192, + "grad_norm": 3.322950601577759, + "learning_rate": 4.9955760135896534e-05, + "loss": 3.6027, + "num_input_tokens_seen": 155776, + "step": 15 + }, + { + "epoch": 0.15080113100848255, + "grad_norm": 2.7705531120300293, + "learning_rate": 4.992136939879856e-05, + "loss": 3.4102, + "num_input_tokens_seen": 203184, + "step": 20 + }, + { + "epoch": 0.1885014137606032, + "grad_norm": 3.4572091102600098, + "learning_rate": 4.9877175931330346e-05, + "loss": 3.153, + "num_input_tokens_seen": 255744, + "step": 25 + }, + { + "epoch": 0.22620169651272384, + "grad_norm": 2.0412893295288086, + "learning_rate": 4.982319711683221e-05, + "loss": 3.0949, + "num_input_tokens_seen": 307808, + "step": 30 + }, + { + "epoch": 0.2639019792648445, + "grad_norm": 2.0765998363494873, + "learning_rate": 4.975945418767529e-05, + "loss": 3.0392, + "num_input_tokens_seen": 358192, + "step": 35 + }, + { + "epoch": 0.3016022620169651, + "grad_norm": 2.2433698177337646, + "learning_rate": 4.968597221690986e-05, + "loss": 3.0685, + "num_input_tokens_seen": 408832, + "step": 40 + }, + { + "epoch": 0.3393025447690858, + "grad_norm": 2.7639973163604736, + "learning_rate": 4.96027801084029e-05, + "loss": 3.0385, + "num_input_tokens_seen": 458368, + "step": 45 + }, + { + "epoch": 0.3770028275212064, + "grad_norm": 2.0680019855499268, + "learning_rate": 4.950991058546893e-05, + "loss": 3.1079, + "num_input_tokens_seen": 512160, + "step": 50 + }, + { + "epoch": 0.41470311027332707, + "grad_norm": 2.181554079055786, + "learning_rate": 4.940740017799833e-05, + "loss": 2.9883, + "num_input_tokens_seen": 562928, + "step": 55 + }, + { + "epoch": 0.4524033930254477, + "grad_norm": 2.0724411010742188, + "learning_rate": 4.929528920808854e-05, + "loss": 2.969, + "num_input_tokens_seen": 613072, + "step": 60 + }, + { + "epoch": 0.49010367577756836, + "grad_norm": 2.5529158115386963, + "learning_rate": 4.917362177418342e-05, + "loss": 3.0019, + "num_input_tokens_seen": 664640, + "step": 65 + }, + { + "epoch": 0.527803958529689, + "grad_norm": 2.3983142375946045, + "learning_rate": 4.904244573372733e-05, + "loss": 2.9984, + "num_input_tokens_seen": 716800, + "step": 70 + }, + { + "epoch": 0.5655042412818096, + "grad_norm": 2.206209182739258, + "learning_rate": 4.8901812684340564e-05, + "loss": 2.9848, + "num_input_tokens_seen": 766528, + "step": 75 + }, + { + "epoch": 0.6032045240339302, + "grad_norm": 2.22698974609375, + "learning_rate": 4.8751777943523634e-05, + "loss": 2.9169, + "num_input_tokens_seen": 817376, + "step": 80 + }, + { + "epoch": 0.6409048067860509, + "grad_norm": 2.4569709300994873, + "learning_rate": 4.8592400526898314e-05, + "loss": 2.8844, + "num_input_tokens_seen": 864688, + "step": 85 + }, + { + "epoch": 0.6786050895381716, + "grad_norm": 2.195237636566162, + "learning_rate": 4.842374312499405e-05, + "loss": 2.9422, + "num_input_tokens_seen": 913216, + "step": 90 + }, + { + "epoch": 0.7163053722902922, + "grad_norm": 2.54585599899292, + "learning_rate": 4.824587207858888e-05, + "loss": 2.8431, + "num_input_tokens_seen": 964144, + "step": 95 + }, + { + "epoch": 0.7540056550424128, + "grad_norm": 2.3841867446899414, + "learning_rate": 4.805885735261454e-05, + "loss": 2.8377, + "num_input_tokens_seen": 1018768, + "step": 100 + }, + { + "epoch": 0.7917059377945335, + "grad_norm": 2.359386920928955, + "learning_rate": 4.786277250863599e-05, + "loss": 2.7843, + "num_input_tokens_seen": 1070592, + "step": 105 + }, + { + "epoch": 0.8294062205466541, + "grad_norm": 2.0952038764953613, + "learning_rate": 4.765769467591625e-05, + "loss": 2.8218, + "num_input_tokens_seen": 1125392, + "step": 110 + }, + { + "epoch": 0.8671065032987747, + "grad_norm": 2.222754955291748, + "learning_rate": 4.744370452107789e-05, + "loss": 2.877, + "num_input_tokens_seen": 1176256, + "step": 115 + }, + { + "epoch": 0.9048067860508954, + "grad_norm": 2.3233537673950195, + "learning_rate": 4.722088621637309e-05, + "loss": 2.9028, + "num_input_tokens_seen": 1225376, + "step": 120 + }, + { + "epoch": 0.942507068803016, + "grad_norm": 2.400575876235962, + "learning_rate": 4.698932740657479e-05, + "loss": 2.762, + "num_input_tokens_seen": 1277024, + "step": 125 + }, + { + "epoch": 0.9802073515551367, + "grad_norm": 2.592116117477417, + "learning_rate": 4.6749119174501975e-05, + "loss": 2.9164, + "num_input_tokens_seen": 1322912, + "step": 130 + }, + { + "epoch": 1.0179076343072573, + "grad_norm": 2.563326120376587, + "learning_rate": 4.6500356005192514e-05, + "loss": 2.736, + "num_input_tokens_seen": 1368624, + "step": 135 + }, + { + "epoch": 1.055607917059378, + "grad_norm": 2.3927371501922607, + "learning_rate": 4.6243135748737864e-05, + "loss": 2.7414, + "num_input_tokens_seen": 1417664, + "step": 140 + }, + { + "epoch": 1.0933081998114986, + "grad_norm": 2.597975492477417, + "learning_rate": 4.597755958179406e-05, + "loss": 2.6961, + "num_input_tokens_seen": 1469120, + "step": 145 + }, + { + "epoch": 1.1310084825636193, + "grad_norm": 2.932581663131714, + "learning_rate": 4.570373196778427e-05, + "loss": 2.6134, + "num_input_tokens_seen": 1521632, + "step": 150 + }, + { + "epoch": 1.1687087653157398, + "grad_norm": 3.28389310836792, + "learning_rate": 4.5421760615808474e-05, + "loss": 2.7548, + "num_input_tokens_seen": 1565296, + "step": 155 + }, + { + "epoch": 1.2064090480678604, + "grad_norm": 2.927279233932495, + "learning_rate": 4.513175643827647e-05, + "loss": 2.702, + "num_input_tokens_seen": 1617088, + "step": 160 + }, + { + "epoch": 1.244109330819981, + "grad_norm": 2.840573310852051, + "learning_rate": 4.4833833507280884e-05, + "loss": 2.6893, + "num_input_tokens_seen": 1663584, + "step": 165 + }, + { + "epoch": 1.2818096135721018, + "grad_norm": 2.6322081089019775, + "learning_rate": 4.4528109009727336e-05, + "loss": 2.5671, + "num_input_tokens_seen": 1713744, + "step": 170 + }, + { + "epoch": 1.3195098963242224, + "grad_norm": 3.1280879974365234, + "learning_rate": 4.42147032012394e-05, + "loss": 2.7682, + "num_input_tokens_seen": 1762768, + "step": 175 + }, + { + "epoch": 1.3572101790763431, + "grad_norm": 3.1128265857696533, + "learning_rate": 4.389373935885646e-05, + "loss": 2.7062, + "num_input_tokens_seen": 1815808, + "step": 180 + }, + { + "epoch": 1.3949104618284638, + "grad_norm": 2.952150344848633, + "learning_rate": 4.356534373254316e-05, + "loss": 2.7092, + "num_input_tokens_seen": 1871040, + "step": 185 + }, + { + "epoch": 1.4326107445805842, + "grad_norm": 3.0146102905273438, + "learning_rate": 4.322964549552943e-05, + "loss": 2.6518, + "num_input_tokens_seen": 1924048, + "step": 190 + }, + { + "epoch": 1.4703110273327051, + "grad_norm": 2.999300956726074, + "learning_rate": 4.288677669350066e-05, + "loss": 2.6592, + "num_input_tokens_seen": 1972720, + "step": 195 + }, + { + "epoch": 1.5080113100848256, + "grad_norm": 3.575253963470459, + "learning_rate": 4.2536872192658036e-05, + "loss": 2.716, + "num_input_tokens_seen": 2022112, + "step": 200 + }, + { + "epoch": 1.5457115928369463, + "grad_norm": 3.1339428424835205, + "learning_rate": 4.218006962666934e-05, + "loss": 2.6746, + "num_input_tokens_seen": 2072000, + "step": 205 + }, + { + "epoch": 1.583411875589067, + "grad_norm": 3.130823850631714, + "learning_rate": 4.181650934253132e-05, + "loss": 2.6306, + "num_input_tokens_seen": 2125632, + "step": 210 + }, + { + "epoch": 1.6211121583411876, + "grad_norm": 3.1828997135162354, + "learning_rate": 4.144633434536467e-05, + "loss": 2.5678, + "num_input_tokens_seen": 2174464, + "step": 215 + }, + { + "epoch": 1.6588124410933083, + "grad_norm": 3.3879778385162354, + "learning_rate": 4.1069690242163484e-05, + "loss": 2.7334, + "num_input_tokens_seen": 2223408, + "step": 220 + }, + { + "epoch": 1.6965127238454287, + "grad_norm": 3.5921592712402344, + "learning_rate": 4.06867251845213e-05, + "loss": 2.6658, + "num_input_tokens_seen": 2281296, + "step": 225 + }, + { + "epoch": 1.7342130065975496, + "grad_norm": 3.4126648902893066, + "learning_rate": 4.0297589810356165e-05, + "loss": 2.6567, + "num_input_tokens_seen": 2334176, + "step": 230 + }, + { + "epoch": 1.77191328934967, + "grad_norm": 3.504786252975464, + "learning_rate": 3.9902437184657784e-05, + "loss": 2.6131, + "num_input_tokens_seen": 2389296, + "step": 235 + }, + { + "epoch": 1.8096135721017907, + "grad_norm": 3.459768056869507, + "learning_rate": 3.9501422739279956e-05, + "loss": 2.662, + "num_input_tokens_seen": 2442800, + "step": 240 + }, + { + "epoch": 1.8473138548539114, + "grad_norm": 2.944490909576416, + "learning_rate": 3.909470421180201e-05, + "loss": 2.6339, + "num_input_tokens_seen": 2494912, + "step": 245 + }, + { + "epoch": 1.885014137606032, + "grad_norm": 3.4897429943084717, + "learning_rate": 3.8682441583483314e-05, + "loss": 2.6338, + "num_input_tokens_seen": 2544624, + "step": 250 + }, + { + "epoch": 1.9227144203581528, + "grad_norm": 3.15039324760437, + "learning_rate": 3.8264797016335205e-05, + "loss": 2.6844, + "num_input_tokens_seen": 2597792, + "step": 255 + }, + { + "epoch": 1.9604147031102732, + "grad_norm": 3.5575578212738037, + "learning_rate": 3.7841934789335164e-05, + "loss": 2.5939, + "num_input_tokens_seen": 2646544, + "step": 260 + }, + { + "epoch": 1.998114985862394, + "grad_norm": 3.97521710395813, + "learning_rate": 3.741402123380828e-05, + "loss": 2.7393, + "num_input_tokens_seen": 2700224, + "step": 265 + }, + { + "epoch": 2.0358152686145146, + "grad_norm": 3.1736276149749756, + "learning_rate": 3.6981224668001424e-05, + "loss": 2.4495, + "num_input_tokens_seen": 2749104, + "step": 270 + }, + { + "epoch": 2.0735155513666355, + "grad_norm": 3.633033275604248, + "learning_rate": 3.654371533087586e-05, + "loss": 2.3823, + "num_input_tokens_seen": 2802992, + "step": 275 + }, + { + "epoch": 2.111215834118756, + "grad_norm": 3.4200243949890137, + "learning_rate": 3.610166531514436e-05, + "loss": 2.474, + "num_input_tokens_seen": 2844512, + "step": 280 + }, + { + "epoch": 2.1489161168708764, + "grad_norm": 4.055693626403809, + "learning_rate": 3.565524849957921e-05, + "loss": 2.4776, + "num_input_tokens_seen": 2894400, + "step": 285 + }, + { + "epoch": 2.1866163996229973, + "grad_norm": 4.254647254943848, + "learning_rate": 3.520464048061758e-05, + "loss": 2.4709, + "num_input_tokens_seen": 2944688, + "step": 290 + }, + { + "epoch": 2.2243166823751177, + "grad_norm": 4.337226390838623, + "learning_rate": 3.47500185032913e-05, + "loss": 2.5009, + "num_input_tokens_seen": 2994672, + "step": 295 + }, + { + "epoch": 2.2620169651272386, + "grad_norm": 4.424121379852295, + "learning_rate": 3.4291561391508185e-05, + "loss": 2.4524, + "num_input_tokens_seen": 3045344, + "step": 300 + } + ], + "logging_steps": 5, + "max_steps": 792, + "num_input_tokens_seen": 3045344, + "num_train_epochs": 6, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.718184608517325e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d325c35219dd8c0eea8279f17307e1d0d8c29b12 --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d28ce688790f3a28f1dba3064866610463622e1d4141255300e136617f5673 +size 5432 diff --git a/checkpoint-400/README.md b/checkpoint-400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9f8f0d832be44b45f0bbeda414aed6d1440221e7 --- /dev/null +++ b/checkpoint-400/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/gemma-2-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-400/adapter_config.json b/checkpoint-400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..84f5e74794795afa599846c26565a6f523155345 --- /dev/null +++ b/checkpoint-400/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/gemma-2-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "o_proj", + "down_proj", + "up_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-400/adapter_model.safetensors b/checkpoint-400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d135eb400410589f360a27d9dc35d0ca801b7b74 --- /dev/null +++ b/checkpoint-400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16d42e907058fb4573e70dd4a71b1b82c4b2d7244af3eeb5fa3f139259a47c34 +size 41581360 diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..608cb3f6a77359eb3a62d0f0aa931527e02fdbd0 --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e71ce1f112a0e3736d74cc41d199304192f9d1f01d5dbf7382582448be62e9ff +size 83372758 diff --git a/checkpoint-400/rng_state_0.pth b/checkpoint-400/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..8e39cd89edd6409a9e49b8db7f0d371695a2623d --- /dev/null +++ b/checkpoint-400/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9affc1541e7e94c18354d5173bc55400c5f07faf3d080c6d453d48e7a8d6ac3 +size 14512 diff --git a/checkpoint-400/rng_state_1.pth b/checkpoint-400/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d1b839d26b0a64f427c73c634fb491ba9ddf3381 --- /dev/null +++ b/checkpoint-400/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4748c3ebf0e4c051c58b92e4a8c5b87cdb39d55cfdc2aec81a1baef0f02fc113 +size 14512 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f81b56725280cc11fc5689d291a095dbb95bdb6 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f22ca2b24be2724497925de4a88dc3d652a6dcb3cb655731486697897d39d9be +size 1064 diff --git a/checkpoint-400/special_tokens_map.json b/checkpoint-400/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-400/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-400/tokenizer.json b/checkpoint-400/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..a4a305d1de4d8f47c0252b4d7fe65a10dd8e2c22 --- /dev/null +++ b/checkpoint-400/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7eee611703c5ce5d1eee32d9cdcfe465647b8aff0c1dfb3bed7ad7dbb05060 +size 34362873 diff --git a/checkpoint-400/tokenizer.model b/checkpoint-400/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/checkpoint-400/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/checkpoint-400/tokenizer_config.json b/checkpoint-400/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..97f33b16e8481f3849fb6d02d1656628e9e22c91 --- /dev/null +++ b/checkpoint-400/tokenizer_config.json @@ -0,0 +1,2015 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ee0d283d192bd76948273bf8f2d40a6f8e0a6190 --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,673 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.016022620169651, + "eval_steps": 500, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03770028275212064, + "grad_norm": 3.2918148040771484, + "learning_rate": 4.9995083170283816e-05, + "loss": 4.3742, + "num_input_tokens_seen": 50544, + "step": 5 + }, + { + "epoch": 0.07540056550424128, + "grad_norm": 3.0331127643585205, + "learning_rate": 4.998033461515242e-05, + "loss": 3.8434, + "num_input_tokens_seen": 104080, + "step": 10 + }, + { + "epoch": 0.11310084825636192, + "grad_norm": 3.322950601577759, + "learning_rate": 4.9955760135896534e-05, + "loss": 3.6027, + "num_input_tokens_seen": 155776, + "step": 15 + }, + { + "epoch": 0.15080113100848255, + "grad_norm": 2.7705531120300293, + "learning_rate": 4.992136939879856e-05, + "loss": 3.4102, + "num_input_tokens_seen": 203184, + "step": 20 + }, + { + "epoch": 0.1885014137606032, + "grad_norm": 3.4572091102600098, + "learning_rate": 4.9877175931330346e-05, + "loss": 3.153, + "num_input_tokens_seen": 255744, + "step": 25 + }, + { + "epoch": 0.22620169651272384, + "grad_norm": 2.0412893295288086, + "learning_rate": 4.982319711683221e-05, + "loss": 3.0949, + "num_input_tokens_seen": 307808, + "step": 30 + }, + { + "epoch": 0.2639019792648445, + "grad_norm": 2.0765998363494873, + "learning_rate": 4.975945418767529e-05, + "loss": 3.0392, + "num_input_tokens_seen": 358192, + "step": 35 + }, + { + "epoch": 0.3016022620169651, + "grad_norm": 2.2433698177337646, + "learning_rate": 4.968597221690986e-05, + "loss": 3.0685, + "num_input_tokens_seen": 408832, + "step": 40 + }, + { + "epoch": 0.3393025447690858, + "grad_norm": 2.7639973163604736, + "learning_rate": 4.96027801084029e-05, + "loss": 3.0385, + "num_input_tokens_seen": 458368, + "step": 45 + }, + { + "epoch": 0.3770028275212064, + "grad_norm": 2.0680019855499268, + "learning_rate": 4.950991058546893e-05, + "loss": 3.1079, + "num_input_tokens_seen": 512160, + "step": 50 + }, + { + "epoch": 0.41470311027332707, + "grad_norm": 2.181554079055786, + "learning_rate": 4.940740017799833e-05, + "loss": 2.9883, + "num_input_tokens_seen": 562928, + "step": 55 + }, + { + "epoch": 0.4524033930254477, + "grad_norm": 2.0724411010742188, + "learning_rate": 4.929528920808854e-05, + "loss": 2.969, + "num_input_tokens_seen": 613072, + "step": 60 + }, + { + "epoch": 0.49010367577756836, + "grad_norm": 2.5529158115386963, + "learning_rate": 4.917362177418342e-05, + "loss": 3.0019, + "num_input_tokens_seen": 664640, + "step": 65 + }, + { + "epoch": 0.527803958529689, + "grad_norm": 2.3983142375946045, + "learning_rate": 4.904244573372733e-05, + "loss": 2.9984, + "num_input_tokens_seen": 716800, + "step": 70 + }, + { + "epoch": 0.5655042412818096, + "grad_norm": 2.206209182739258, + "learning_rate": 4.8901812684340564e-05, + "loss": 2.9848, + "num_input_tokens_seen": 766528, + "step": 75 + }, + { + "epoch": 0.6032045240339302, + "grad_norm": 2.22698974609375, + "learning_rate": 4.8751777943523634e-05, + "loss": 2.9169, + "num_input_tokens_seen": 817376, + "step": 80 + }, + { + "epoch": 0.6409048067860509, + "grad_norm": 2.4569709300994873, + "learning_rate": 4.8592400526898314e-05, + "loss": 2.8844, + "num_input_tokens_seen": 864688, + "step": 85 + }, + { + "epoch": 0.6786050895381716, + "grad_norm": 2.195237636566162, + "learning_rate": 4.842374312499405e-05, + "loss": 2.9422, + "num_input_tokens_seen": 913216, + "step": 90 + }, + { + "epoch": 0.7163053722902922, + "grad_norm": 2.54585599899292, + "learning_rate": 4.824587207858888e-05, + "loss": 2.8431, + "num_input_tokens_seen": 964144, + "step": 95 + }, + { + "epoch": 0.7540056550424128, + "grad_norm": 2.3841867446899414, + "learning_rate": 4.805885735261454e-05, + "loss": 2.8377, + "num_input_tokens_seen": 1018768, + "step": 100 + }, + { + "epoch": 0.7917059377945335, + "grad_norm": 2.359386920928955, + "learning_rate": 4.786277250863599e-05, + "loss": 2.7843, + "num_input_tokens_seen": 1070592, + "step": 105 + }, + { + "epoch": 0.8294062205466541, + "grad_norm": 2.0952038764953613, + "learning_rate": 4.765769467591625e-05, + "loss": 2.8218, + "num_input_tokens_seen": 1125392, + "step": 110 + }, + { + "epoch": 0.8671065032987747, + "grad_norm": 2.222754955291748, + "learning_rate": 4.744370452107789e-05, + "loss": 2.877, + "num_input_tokens_seen": 1176256, + "step": 115 + }, + { + "epoch": 0.9048067860508954, + "grad_norm": 2.3233537673950195, + "learning_rate": 4.722088621637309e-05, + "loss": 2.9028, + "num_input_tokens_seen": 1225376, + "step": 120 + }, + { + "epoch": 0.942507068803016, + "grad_norm": 2.400575876235962, + "learning_rate": 4.698932740657479e-05, + "loss": 2.762, + "num_input_tokens_seen": 1277024, + "step": 125 + }, + { + "epoch": 0.9802073515551367, + "grad_norm": 2.592116117477417, + "learning_rate": 4.6749119174501975e-05, + "loss": 2.9164, + "num_input_tokens_seen": 1322912, + "step": 130 + }, + { + "epoch": 1.0179076343072573, + "grad_norm": 2.563326120376587, + "learning_rate": 4.6500356005192514e-05, + "loss": 2.736, + "num_input_tokens_seen": 1368624, + "step": 135 + }, + { + "epoch": 1.055607917059378, + "grad_norm": 2.3927371501922607, + "learning_rate": 4.6243135748737864e-05, + "loss": 2.7414, + "num_input_tokens_seen": 1417664, + "step": 140 + }, + { + "epoch": 1.0933081998114986, + "grad_norm": 2.597975492477417, + "learning_rate": 4.597755958179406e-05, + "loss": 2.6961, + "num_input_tokens_seen": 1469120, + "step": 145 + }, + { + "epoch": 1.1310084825636193, + "grad_norm": 2.932581663131714, + "learning_rate": 4.570373196778427e-05, + "loss": 2.6134, + "num_input_tokens_seen": 1521632, + "step": 150 + }, + { + "epoch": 1.1687087653157398, + "grad_norm": 3.28389310836792, + "learning_rate": 4.5421760615808474e-05, + "loss": 2.7548, + "num_input_tokens_seen": 1565296, + "step": 155 + }, + { + "epoch": 1.2064090480678604, + "grad_norm": 2.927279233932495, + "learning_rate": 4.513175643827647e-05, + "loss": 2.702, + "num_input_tokens_seen": 1617088, + "step": 160 + }, + { + "epoch": 1.244109330819981, + "grad_norm": 2.840573310852051, + "learning_rate": 4.4833833507280884e-05, + "loss": 2.6893, + "num_input_tokens_seen": 1663584, + "step": 165 + }, + { + "epoch": 1.2818096135721018, + "grad_norm": 2.6322081089019775, + "learning_rate": 4.4528109009727336e-05, + "loss": 2.5671, + "num_input_tokens_seen": 1713744, + "step": 170 + }, + { + "epoch": 1.3195098963242224, + "grad_norm": 3.1280879974365234, + "learning_rate": 4.42147032012394e-05, + "loss": 2.7682, + "num_input_tokens_seen": 1762768, + "step": 175 + }, + { + "epoch": 1.3572101790763431, + "grad_norm": 3.1128265857696533, + "learning_rate": 4.389373935885646e-05, + "loss": 2.7062, + "num_input_tokens_seen": 1815808, + "step": 180 + }, + { + "epoch": 1.3949104618284638, + "grad_norm": 2.952150344848633, + "learning_rate": 4.356534373254316e-05, + "loss": 2.7092, + "num_input_tokens_seen": 1871040, + "step": 185 + }, + { + "epoch": 1.4326107445805842, + "grad_norm": 3.0146102905273438, + "learning_rate": 4.322964549552943e-05, + "loss": 2.6518, + "num_input_tokens_seen": 1924048, + "step": 190 + }, + { + "epoch": 1.4703110273327051, + "grad_norm": 2.999300956726074, + "learning_rate": 4.288677669350066e-05, + "loss": 2.6592, + "num_input_tokens_seen": 1972720, + "step": 195 + }, + { + "epoch": 1.5080113100848256, + "grad_norm": 3.575253963470459, + "learning_rate": 4.2536872192658036e-05, + "loss": 2.716, + "num_input_tokens_seen": 2022112, + "step": 200 + }, + { + "epoch": 1.5457115928369463, + "grad_norm": 3.1339428424835205, + "learning_rate": 4.218006962666934e-05, + "loss": 2.6746, + "num_input_tokens_seen": 2072000, + "step": 205 + }, + { + "epoch": 1.583411875589067, + "grad_norm": 3.130823850631714, + "learning_rate": 4.181650934253132e-05, + "loss": 2.6306, + "num_input_tokens_seen": 2125632, + "step": 210 + }, + { + "epoch": 1.6211121583411876, + "grad_norm": 3.1828997135162354, + "learning_rate": 4.144633434536467e-05, + "loss": 2.5678, + "num_input_tokens_seen": 2174464, + "step": 215 + }, + { + "epoch": 1.6588124410933083, + "grad_norm": 3.3879778385162354, + "learning_rate": 4.1069690242163484e-05, + "loss": 2.7334, + "num_input_tokens_seen": 2223408, + "step": 220 + }, + { + "epoch": 1.6965127238454287, + "grad_norm": 3.5921592712402344, + "learning_rate": 4.06867251845213e-05, + "loss": 2.6658, + "num_input_tokens_seen": 2281296, + "step": 225 + }, + { + "epoch": 1.7342130065975496, + "grad_norm": 3.4126648902893066, + "learning_rate": 4.0297589810356165e-05, + "loss": 2.6567, + "num_input_tokens_seen": 2334176, + "step": 230 + }, + { + "epoch": 1.77191328934967, + "grad_norm": 3.504786252975464, + "learning_rate": 3.9902437184657784e-05, + "loss": 2.6131, + "num_input_tokens_seen": 2389296, + "step": 235 + }, + { + "epoch": 1.8096135721017907, + "grad_norm": 3.459768056869507, + "learning_rate": 3.9501422739279956e-05, + "loss": 2.662, + "num_input_tokens_seen": 2442800, + "step": 240 + }, + { + "epoch": 1.8473138548539114, + "grad_norm": 2.944490909576416, + "learning_rate": 3.909470421180201e-05, + "loss": 2.6339, + "num_input_tokens_seen": 2494912, + "step": 245 + }, + { + "epoch": 1.885014137606032, + "grad_norm": 3.4897429943084717, + "learning_rate": 3.8682441583483314e-05, + "loss": 2.6338, + "num_input_tokens_seen": 2544624, + "step": 250 + }, + { + "epoch": 1.9227144203581528, + "grad_norm": 3.15039324760437, + "learning_rate": 3.8264797016335205e-05, + "loss": 2.6844, + "num_input_tokens_seen": 2597792, + "step": 255 + }, + { + "epoch": 1.9604147031102732, + "grad_norm": 3.5575578212738037, + "learning_rate": 3.7841934789335164e-05, + "loss": 2.5939, + "num_input_tokens_seen": 2646544, + "step": 260 + }, + { + "epoch": 1.998114985862394, + "grad_norm": 3.97521710395813, + "learning_rate": 3.741402123380828e-05, + "loss": 2.7393, + "num_input_tokens_seen": 2700224, + "step": 265 + }, + { + "epoch": 2.0358152686145146, + "grad_norm": 3.1736276149749756, + "learning_rate": 3.6981224668001424e-05, + "loss": 2.4495, + "num_input_tokens_seen": 2749104, + "step": 270 + }, + { + "epoch": 2.0735155513666355, + "grad_norm": 3.633033275604248, + "learning_rate": 3.654371533087586e-05, + "loss": 2.3823, + "num_input_tokens_seen": 2802992, + "step": 275 + }, + { + "epoch": 2.111215834118756, + "grad_norm": 3.4200243949890137, + "learning_rate": 3.610166531514436e-05, + "loss": 2.474, + "num_input_tokens_seen": 2844512, + "step": 280 + }, + { + "epoch": 2.1489161168708764, + "grad_norm": 4.055693626403809, + "learning_rate": 3.565524849957921e-05, + "loss": 2.4776, + "num_input_tokens_seen": 2894400, + "step": 285 + }, + { + "epoch": 2.1866163996229973, + "grad_norm": 4.254647254943848, + "learning_rate": 3.520464048061758e-05, + "loss": 2.4709, + "num_input_tokens_seen": 2944688, + "step": 290 + }, + { + "epoch": 2.2243166823751177, + "grad_norm": 4.337226390838623, + "learning_rate": 3.47500185032913e-05, + "loss": 2.5009, + "num_input_tokens_seen": 2994672, + "step": 295 + }, + { + "epoch": 2.2620169651272386, + "grad_norm": 4.424121379852295, + "learning_rate": 3.4291561391508185e-05, + "loss": 2.4524, + "num_input_tokens_seen": 3045344, + "step": 300 + }, + { + "epoch": 2.299717247879359, + "grad_norm": 4.178481101989746, + "learning_rate": 3.3829449477712324e-05, + "loss": 2.4083, + "num_input_tokens_seen": 3100688, + "step": 305 + }, + { + "epoch": 2.3374175306314795, + "grad_norm": 4.315364837646484, + "learning_rate": 3.336386453195088e-05, + "loss": 2.4713, + "num_input_tokens_seen": 3154896, + "step": 310 + }, + { + "epoch": 2.3751178133836004, + "grad_norm": 4.445486545562744, + "learning_rate": 3.2894989690375626e-05, + "loss": 2.3986, + "num_input_tokens_seen": 3209120, + "step": 315 + }, + { + "epoch": 2.412818096135721, + "grad_norm": 4.80281400680542, + "learning_rate": 3.2423009383206876e-05, + "loss": 2.4258, + "num_input_tokens_seen": 3257984, + "step": 320 + }, + { + "epoch": 2.4505183788878417, + "grad_norm": 4.705103397369385, + "learning_rate": 3.194810926218861e-05, + "loss": 2.4126, + "num_input_tokens_seen": 3309424, + "step": 325 + }, + { + "epoch": 2.488218661639962, + "grad_norm": 4.281659126281738, + "learning_rate": 3.147047612756302e-05, + "loss": 2.3639, + "num_input_tokens_seen": 3359152, + "step": 330 + }, + { + "epoch": 2.525918944392083, + "grad_norm": 4.493980407714844, + "learning_rate": 3.099029785459328e-05, + "loss": 2.5176, + "num_input_tokens_seen": 3406784, + "step": 335 + }, + { + "epoch": 2.5636192271442035, + "grad_norm": 4.339905261993408, + "learning_rate": 3.0507763319663517e-05, + "loss": 2.4779, + "num_input_tokens_seen": 3459328, + "step": 340 + }, + { + "epoch": 2.6013195098963244, + "grad_norm": 4.67468786239624, + "learning_rate": 3.002306232598497e-05, + "loss": 2.3833, + "num_input_tokens_seen": 3511056, + "step": 345 + }, + { + "epoch": 2.639019792648445, + "grad_norm": 4.905932426452637, + "learning_rate": 2.9536385528937567e-05, + "loss": 2.3794, + "num_input_tokens_seen": 3562352, + "step": 350 + }, + { + "epoch": 2.6767200754005653, + "grad_norm": 4.684591770172119, + "learning_rate": 2.9047924361076345e-05, + "loss": 2.4883, + "num_input_tokens_seen": 3615664, + "step": 355 + }, + { + "epoch": 2.7144203581526862, + "grad_norm": 4.377009868621826, + "learning_rate": 2.8557870956832132e-05, + "loss": 2.3423, + "num_input_tokens_seen": 3661424, + "step": 360 + }, + { + "epoch": 2.7521206409048067, + "grad_norm": 5.146539688110352, + "learning_rate": 2.8066418076936167e-05, + "loss": 2.4092, + "num_input_tokens_seen": 3710592, + "step": 365 + }, + { + "epoch": 2.7898209236569276, + "grad_norm": 5.5937910079956055, + "learning_rate": 2.7573759032598366e-05, + "loss": 2.4818, + "num_input_tokens_seen": 3765728, + "step": 370 + }, + { + "epoch": 2.827521206409048, + "grad_norm": 4.4958696365356445, + "learning_rate": 2.7080087609469062e-05, + "loss": 2.4587, + "num_input_tokens_seen": 3815360, + "step": 375 + }, + { + "epoch": 2.8652214891611685, + "grad_norm": 4.503344535827637, + "learning_rate": 2.6585597991414114e-05, + "loss": 2.3462, + "num_input_tokens_seen": 3868096, + "step": 380 + }, + { + "epoch": 2.9029217719132894, + "grad_norm": 4.773792743682861, + "learning_rate": 2.6090484684133404e-05, + "loss": 2.3713, + "num_input_tokens_seen": 3913696, + "step": 385 + }, + { + "epoch": 2.9406220546654103, + "grad_norm": 4.8644537925720215, + "learning_rate": 2.5594942438652688e-05, + "loss": 2.4618, + "num_input_tokens_seen": 3971840, + "step": 390 + }, + { + "epoch": 2.9783223374175307, + "grad_norm": 5.539215087890625, + "learning_rate": 2.509916617471903e-05, + "loss": 2.5454, + "num_input_tokens_seen": 4025040, + "step": 395 + }, + { + "epoch": 3.016022620169651, + "grad_norm": 4.475778579711914, + "learning_rate": 2.46033509041298e-05, + "loss": 2.3007, + "num_input_tokens_seen": 4075488, + "step": 400 + } + ], + "logging_steps": 5, + "max_steps": 792, + "num_input_tokens_seen": 4075488, + "num_train_epochs": 6, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.975929388013978e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d325c35219dd8c0eea8279f17307e1d0d8c29b12 --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d28ce688790f3a28f1dba3064866610463622e1d4141255300e136617f5673 +size 5432 diff --git a/checkpoint-500/README.md b/checkpoint-500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9f8f0d832be44b45f0bbeda414aed6d1440221e7 --- /dev/null +++ b/checkpoint-500/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/gemma-2-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-500/adapter_config.json b/checkpoint-500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..84f5e74794795afa599846c26565a6f523155345 --- /dev/null +++ b/checkpoint-500/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/gemma-2-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "o_proj", + "down_proj", + "up_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-500/adapter_model.safetensors b/checkpoint-500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa6388d8af97ef866d200849dafed69816148ecb --- /dev/null +++ b/checkpoint-500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7708dc9c94784ba63b7f8a9a43b20407ecc2d316903841610da8d5585343f88f +size 41581360 diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2266a2ef8fccee79dc346d7ae5c0a9e44295b42 --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aa6a233b483d02cdf19ba093174170f8c9f16c8800784427a6d128ad17e289a +size 83372758 diff --git a/checkpoint-500/rng_state_0.pth b/checkpoint-500/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..8e39cd89edd6409a9e49b8db7f0d371695a2623d --- /dev/null +++ b/checkpoint-500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9affc1541e7e94c18354d5173bc55400c5f07faf3d080c6d453d48e7a8d6ac3 +size 14512 diff --git a/checkpoint-500/rng_state_1.pth b/checkpoint-500/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d1b839d26b0a64f427c73c634fb491ba9ddf3381 --- /dev/null +++ b/checkpoint-500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4748c3ebf0e4c051c58b92e4a8c5b87cdb39d55cfdc2aec81a1baef0f02fc113 +size 14512 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2513a608779571bfcb21ffa99571a895303f452a --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d076f944dc4da9aa8bf3d62dfd4d58d668f38c8a828d05146d0cbc3944cf8eb +size 1064 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-500/tokenizer.json b/checkpoint-500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..a4a305d1de4d8f47c0252b4d7fe65a10dd8e2c22 --- /dev/null +++ b/checkpoint-500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7eee611703c5ce5d1eee32d9cdcfe465647b8aff0c1dfb3bed7ad7dbb05060 +size 34362873 diff --git a/checkpoint-500/tokenizer.model b/checkpoint-500/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/checkpoint-500/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..97f33b16e8481f3849fb6d02d1656628e9e22c91 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1,2015 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4ce0615285fc0380bfe5bbfe6b5cf4022f67f795 --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,833 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.770028275212064, + "eval_steps": 500, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03770028275212064, + "grad_norm": 3.2918148040771484, + "learning_rate": 4.9995083170283816e-05, + "loss": 4.3742, + "num_input_tokens_seen": 50544, + "step": 5 + }, + { + "epoch": 0.07540056550424128, + "grad_norm": 3.0331127643585205, + "learning_rate": 4.998033461515242e-05, + "loss": 3.8434, + "num_input_tokens_seen": 104080, + "step": 10 + }, + { + "epoch": 0.11310084825636192, + "grad_norm": 3.322950601577759, + "learning_rate": 4.9955760135896534e-05, + "loss": 3.6027, + "num_input_tokens_seen": 155776, + "step": 15 + }, + { + "epoch": 0.15080113100848255, + "grad_norm": 2.7705531120300293, + "learning_rate": 4.992136939879856e-05, + "loss": 3.4102, + "num_input_tokens_seen": 203184, + "step": 20 + }, + { + "epoch": 0.1885014137606032, + "grad_norm": 3.4572091102600098, + "learning_rate": 4.9877175931330346e-05, + "loss": 3.153, + "num_input_tokens_seen": 255744, + "step": 25 + }, + { + "epoch": 0.22620169651272384, + "grad_norm": 2.0412893295288086, + "learning_rate": 4.982319711683221e-05, + "loss": 3.0949, + "num_input_tokens_seen": 307808, + "step": 30 + }, + { + "epoch": 0.2639019792648445, + "grad_norm": 2.0765998363494873, + "learning_rate": 4.975945418767529e-05, + "loss": 3.0392, + "num_input_tokens_seen": 358192, + "step": 35 + }, + { + "epoch": 0.3016022620169651, + "grad_norm": 2.2433698177337646, + "learning_rate": 4.968597221690986e-05, + "loss": 3.0685, + "num_input_tokens_seen": 408832, + "step": 40 + }, + { + "epoch": 0.3393025447690858, + "grad_norm": 2.7639973163604736, + "learning_rate": 4.96027801084029e-05, + "loss": 3.0385, + "num_input_tokens_seen": 458368, + "step": 45 + }, + { + "epoch": 0.3770028275212064, + "grad_norm": 2.0680019855499268, + "learning_rate": 4.950991058546893e-05, + "loss": 3.1079, + "num_input_tokens_seen": 512160, + "step": 50 + }, + { + "epoch": 0.41470311027332707, + "grad_norm": 2.181554079055786, + "learning_rate": 4.940740017799833e-05, + "loss": 2.9883, + "num_input_tokens_seen": 562928, + "step": 55 + }, + { + "epoch": 0.4524033930254477, + "grad_norm": 2.0724411010742188, + "learning_rate": 4.929528920808854e-05, + "loss": 2.969, + "num_input_tokens_seen": 613072, + "step": 60 + }, + { + "epoch": 0.49010367577756836, + "grad_norm": 2.5529158115386963, + "learning_rate": 4.917362177418342e-05, + "loss": 3.0019, + "num_input_tokens_seen": 664640, + "step": 65 + }, + { + "epoch": 0.527803958529689, + "grad_norm": 2.3983142375946045, + "learning_rate": 4.904244573372733e-05, + "loss": 2.9984, + "num_input_tokens_seen": 716800, + "step": 70 + }, + { + "epoch": 0.5655042412818096, + "grad_norm": 2.206209182739258, + "learning_rate": 4.8901812684340564e-05, + "loss": 2.9848, + "num_input_tokens_seen": 766528, + "step": 75 + }, + { + "epoch": 0.6032045240339302, + "grad_norm": 2.22698974609375, + "learning_rate": 4.8751777943523634e-05, + "loss": 2.9169, + "num_input_tokens_seen": 817376, + "step": 80 + }, + { + "epoch": 0.6409048067860509, + "grad_norm": 2.4569709300994873, + "learning_rate": 4.8592400526898314e-05, + "loss": 2.8844, + "num_input_tokens_seen": 864688, + "step": 85 + }, + { + "epoch": 0.6786050895381716, + "grad_norm": 2.195237636566162, + "learning_rate": 4.842374312499405e-05, + "loss": 2.9422, + "num_input_tokens_seen": 913216, + "step": 90 + }, + { + "epoch": 0.7163053722902922, + "grad_norm": 2.54585599899292, + "learning_rate": 4.824587207858888e-05, + "loss": 2.8431, + "num_input_tokens_seen": 964144, + "step": 95 + }, + { + "epoch": 0.7540056550424128, + "grad_norm": 2.3841867446899414, + "learning_rate": 4.805885735261454e-05, + "loss": 2.8377, + "num_input_tokens_seen": 1018768, + "step": 100 + }, + { + "epoch": 0.7917059377945335, + "grad_norm": 2.359386920928955, + "learning_rate": 4.786277250863599e-05, + "loss": 2.7843, + "num_input_tokens_seen": 1070592, + "step": 105 + }, + { + "epoch": 0.8294062205466541, + "grad_norm": 2.0952038764953613, + "learning_rate": 4.765769467591625e-05, + "loss": 2.8218, + "num_input_tokens_seen": 1125392, + "step": 110 + }, + { + "epoch": 0.8671065032987747, + "grad_norm": 2.222754955291748, + "learning_rate": 4.744370452107789e-05, + "loss": 2.877, + "num_input_tokens_seen": 1176256, + "step": 115 + }, + { + "epoch": 0.9048067860508954, + "grad_norm": 2.3233537673950195, + "learning_rate": 4.722088621637309e-05, + "loss": 2.9028, + "num_input_tokens_seen": 1225376, + "step": 120 + }, + { + "epoch": 0.942507068803016, + "grad_norm": 2.400575876235962, + "learning_rate": 4.698932740657479e-05, + "loss": 2.762, + "num_input_tokens_seen": 1277024, + "step": 125 + }, + { + "epoch": 0.9802073515551367, + "grad_norm": 2.592116117477417, + "learning_rate": 4.6749119174501975e-05, + "loss": 2.9164, + "num_input_tokens_seen": 1322912, + "step": 130 + }, + { + "epoch": 1.0179076343072573, + "grad_norm": 2.563326120376587, + "learning_rate": 4.6500356005192514e-05, + "loss": 2.736, + "num_input_tokens_seen": 1368624, + "step": 135 + }, + { + "epoch": 1.055607917059378, + "grad_norm": 2.3927371501922607, + "learning_rate": 4.6243135748737864e-05, + "loss": 2.7414, + "num_input_tokens_seen": 1417664, + "step": 140 + }, + { + "epoch": 1.0933081998114986, + "grad_norm": 2.597975492477417, + "learning_rate": 4.597755958179406e-05, + "loss": 2.6961, + "num_input_tokens_seen": 1469120, + "step": 145 + }, + { + "epoch": 1.1310084825636193, + "grad_norm": 2.932581663131714, + "learning_rate": 4.570373196778427e-05, + "loss": 2.6134, + "num_input_tokens_seen": 1521632, + "step": 150 + }, + { + "epoch": 1.1687087653157398, + "grad_norm": 3.28389310836792, + "learning_rate": 4.5421760615808474e-05, + "loss": 2.7548, + "num_input_tokens_seen": 1565296, + "step": 155 + }, + { + "epoch": 1.2064090480678604, + "grad_norm": 2.927279233932495, + "learning_rate": 4.513175643827647e-05, + "loss": 2.702, + "num_input_tokens_seen": 1617088, + "step": 160 + }, + { + "epoch": 1.244109330819981, + "grad_norm": 2.840573310852051, + "learning_rate": 4.4833833507280884e-05, + "loss": 2.6893, + "num_input_tokens_seen": 1663584, + "step": 165 + }, + { + "epoch": 1.2818096135721018, + "grad_norm": 2.6322081089019775, + "learning_rate": 4.4528109009727336e-05, + "loss": 2.5671, + "num_input_tokens_seen": 1713744, + "step": 170 + }, + { + "epoch": 1.3195098963242224, + "grad_norm": 3.1280879974365234, + "learning_rate": 4.42147032012394e-05, + "loss": 2.7682, + "num_input_tokens_seen": 1762768, + "step": 175 + }, + { + "epoch": 1.3572101790763431, + "grad_norm": 3.1128265857696533, + "learning_rate": 4.389373935885646e-05, + "loss": 2.7062, + "num_input_tokens_seen": 1815808, + "step": 180 + }, + { + "epoch": 1.3949104618284638, + "grad_norm": 2.952150344848633, + "learning_rate": 4.356534373254316e-05, + "loss": 2.7092, + "num_input_tokens_seen": 1871040, + "step": 185 + }, + { + "epoch": 1.4326107445805842, + "grad_norm": 3.0146102905273438, + "learning_rate": 4.322964549552943e-05, + "loss": 2.6518, + "num_input_tokens_seen": 1924048, + "step": 190 + }, + { + "epoch": 1.4703110273327051, + "grad_norm": 2.999300956726074, + "learning_rate": 4.288677669350066e-05, + "loss": 2.6592, + "num_input_tokens_seen": 1972720, + "step": 195 + }, + { + "epoch": 1.5080113100848256, + "grad_norm": 3.575253963470459, + "learning_rate": 4.2536872192658036e-05, + "loss": 2.716, + "num_input_tokens_seen": 2022112, + "step": 200 + }, + { + "epoch": 1.5457115928369463, + "grad_norm": 3.1339428424835205, + "learning_rate": 4.218006962666934e-05, + "loss": 2.6746, + "num_input_tokens_seen": 2072000, + "step": 205 + }, + { + "epoch": 1.583411875589067, + "grad_norm": 3.130823850631714, + "learning_rate": 4.181650934253132e-05, + "loss": 2.6306, + "num_input_tokens_seen": 2125632, + "step": 210 + }, + { + "epoch": 1.6211121583411876, + "grad_norm": 3.1828997135162354, + "learning_rate": 4.144633434536467e-05, + "loss": 2.5678, + "num_input_tokens_seen": 2174464, + "step": 215 + }, + { + "epoch": 1.6588124410933083, + "grad_norm": 3.3879778385162354, + "learning_rate": 4.1069690242163484e-05, + "loss": 2.7334, + "num_input_tokens_seen": 2223408, + "step": 220 + }, + { + "epoch": 1.6965127238454287, + "grad_norm": 3.5921592712402344, + "learning_rate": 4.06867251845213e-05, + "loss": 2.6658, + "num_input_tokens_seen": 2281296, + "step": 225 + }, + { + "epoch": 1.7342130065975496, + "grad_norm": 3.4126648902893066, + "learning_rate": 4.0297589810356165e-05, + "loss": 2.6567, + "num_input_tokens_seen": 2334176, + "step": 230 + }, + { + "epoch": 1.77191328934967, + "grad_norm": 3.504786252975464, + "learning_rate": 3.9902437184657784e-05, + "loss": 2.6131, + "num_input_tokens_seen": 2389296, + "step": 235 + }, + { + "epoch": 1.8096135721017907, + "grad_norm": 3.459768056869507, + "learning_rate": 3.9501422739279956e-05, + "loss": 2.662, + "num_input_tokens_seen": 2442800, + "step": 240 + }, + { + "epoch": 1.8473138548539114, + "grad_norm": 2.944490909576416, + "learning_rate": 3.909470421180201e-05, + "loss": 2.6339, + "num_input_tokens_seen": 2494912, + "step": 245 + }, + { + "epoch": 1.885014137606032, + "grad_norm": 3.4897429943084717, + "learning_rate": 3.8682441583483314e-05, + "loss": 2.6338, + "num_input_tokens_seen": 2544624, + "step": 250 + }, + { + "epoch": 1.9227144203581528, + "grad_norm": 3.15039324760437, + "learning_rate": 3.8264797016335205e-05, + "loss": 2.6844, + "num_input_tokens_seen": 2597792, + "step": 255 + }, + { + "epoch": 1.9604147031102732, + "grad_norm": 3.5575578212738037, + "learning_rate": 3.7841934789335164e-05, + "loss": 2.5939, + "num_input_tokens_seen": 2646544, + "step": 260 + }, + { + "epoch": 1.998114985862394, + "grad_norm": 3.97521710395813, + "learning_rate": 3.741402123380828e-05, + "loss": 2.7393, + "num_input_tokens_seen": 2700224, + "step": 265 + }, + { + "epoch": 2.0358152686145146, + "grad_norm": 3.1736276149749756, + "learning_rate": 3.6981224668001424e-05, + "loss": 2.4495, + "num_input_tokens_seen": 2749104, + "step": 270 + }, + { + "epoch": 2.0735155513666355, + "grad_norm": 3.633033275604248, + "learning_rate": 3.654371533087586e-05, + "loss": 2.3823, + "num_input_tokens_seen": 2802992, + "step": 275 + }, + { + "epoch": 2.111215834118756, + "grad_norm": 3.4200243949890137, + "learning_rate": 3.610166531514436e-05, + "loss": 2.474, + "num_input_tokens_seen": 2844512, + "step": 280 + }, + { + "epoch": 2.1489161168708764, + "grad_norm": 4.055693626403809, + "learning_rate": 3.565524849957921e-05, + "loss": 2.4776, + "num_input_tokens_seen": 2894400, + "step": 285 + }, + { + "epoch": 2.1866163996229973, + "grad_norm": 4.254647254943848, + "learning_rate": 3.520464048061758e-05, + "loss": 2.4709, + "num_input_tokens_seen": 2944688, + "step": 290 + }, + { + "epoch": 2.2243166823751177, + "grad_norm": 4.337226390838623, + "learning_rate": 3.47500185032913e-05, + "loss": 2.5009, + "num_input_tokens_seen": 2994672, + "step": 295 + }, + { + "epoch": 2.2620169651272386, + "grad_norm": 4.424121379852295, + "learning_rate": 3.4291561391508185e-05, + "loss": 2.4524, + "num_input_tokens_seen": 3045344, + "step": 300 + }, + { + "epoch": 2.299717247879359, + "grad_norm": 4.178481101989746, + "learning_rate": 3.3829449477712324e-05, + "loss": 2.4083, + "num_input_tokens_seen": 3100688, + "step": 305 + }, + { + "epoch": 2.3374175306314795, + "grad_norm": 4.315364837646484, + "learning_rate": 3.336386453195088e-05, + "loss": 2.4713, + "num_input_tokens_seen": 3154896, + "step": 310 + }, + { + "epoch": 2.3751178133836004, + "grad_norm": 4.445486545562744, + "learning_rate": 3.2894989690375626e-05, + "loss": 2.3986, + "num_input_tokens_seen": 3209120, + "step": 315 + }, + { + "epoch": 2.412818096135721, + "grad_norm": 4.80281400680542, + "learning_rate": 3.2423009383206876e-05, + "loss": 2.4258, + "num_input_tokens_seen": 3257984, + "step": 320 + }, + { + "epoch": 2.4505183788878417, + "grad_norm": 4.705103397369385, + "learning_rate": 3.194810926218861e-05, + "loss": 2.4126, + "num_input_tokens_seen": 3309424, + "step": 325 + }, + { + "epoch": 2.488218661639962, + "grad_norm": 4.281659126281738, + "learning_rate": 3.147047612756302e-05, + "loss": 2.3639, + "num_input_tokens_seen": 3359152, + "step": 330 + }, + { + "epoch": 2.525918944392083, + "grad_norm": 4.493980407714844, + "learning_rate": 3.099029785459328e-05, + "loss": 2.5176, + "num_input_tokens_seen": 3406784, + "step": 335 + }, + { + "epoch": 2.5636192271442035, + "grad_norm": 4.339905261993408, + "learning_rate": 3.0507763319663517e-05, + "loss": 2.4779, + "num_input_tokens_seen": 3459328, + "step": 340 + }, + { + "epoch": 2.6013195098963244, + "grad_norm": 4.67468786239624, + "learning_rate": 3.002306232598497e-05, + "loss": 2.3833, + "num_input_tokens_seen": 3511056, + "step": 345 + }, + { + "epoch": 2.639019792648445, + "grad_norm": 4.905932426452637, + "learning_rate": 2.9536385528937567e-05, + "loss": 2.3794, + "num_input_tokens_seen": 3562352, + "step": 350 + }, + { + "epoch": 2.6767200754005653, + "grad_norm": 4.684591770172119, + "learning_rate": 2.9047924361076345e-05, + "loss": 2.4883, + "num_input_tokens_seen": 3615664, + "step": 355 + }, + { + "epoch": 2.7144203581526862, + "grad_norm": 4.377009868621826, + "learning_rate": 2.8557870956832132e-05, + "loss": 2.3423, + "num_input_tokens_seen": 3661424, + "step": 360 + }, + { + "epoch": 2.7521206409048067, + "grad_norm": 5.146539688110352, + "learning_rate": 2.8066418076936167e-05, + "loss": 2.4092, + "num_input_tokens_seen": 3710592, + "step": 365 + }, + { + "epoch": 2.7898209236569276, + "grad_norm": 5.5937910079956055, + "learning_rate": 2.7573759032598366e-05, + "loss": 2.4818, + "num_input_tokens_seen": 3765728, + "step": 370 + }, + { + "epoch": 2.827521206409048, + "grad_norm": 4.4958696365356445, + "learning_rate": 2.7080087609469062e-05, + "loss": 2.4587, + "num_input_tokens_seen": 3815360, + "step": 375 + }, + { + "epoch": 2.8652214891611685, + "grad_norm": 4.503344535827637, + "learning_rate": 2.6585597991414114e-05, + "loss": 2.3462, + "num_input_tokens_seen": 3868096, + "step": 380 + }, + { + "epoch": 2.9029217719132894, + "grad_norm": 4.773792743682861, + "learning_rate": 2.6090484684133404e-05, + "loss": 2.3713, + "num_input_tokens_seen": 3913696, + "step": 385 + }, + { + "epoch": 2.9406220546654103, + "grad_norm": 4.8644537925720215, + "learning_rate": 2.5594942438652688e-05, + "loss": 2.4618, + "num_input_tokens_seen": 3971840, + "step": 390 + }, + { + "epoch": 2.9783223374175307, + "grad_norm": 5.539215087890625, + "learning_rate": 2.509916617471903e-05, + "loss": 2.5454, + "num_input_tokens_seen": 4025040, + "step": 395 + }, + { + "epoch": 3.016022620169651, + "grad_norm": 4.475778579711914, + "learning_rate": 2.46033509041298e-05, + "loss": 2.3007, + "num_input_tokens_seen": 4075488, + "step": 400 + }, + { + "epoch": 3.053722902921772, + "grad_norm": 4.82028341293335, + "learning_rate": 2.410769165402549e-05, + "loss": 2.274, + "num_input_tokens_seen": 4130496, + "step": 405 + }, + { + "epoch": 3.0914231856738925, + "grad_norm": 5.513036251068115, + "learning_rate": 2.3612383390176503e-05, + "loss": 2.2457, + "num_input_tokens_seen": 4181504, + "step": 410 + }, + { + "epoch": 3.1291234684260134, + "grad_norm": 5.219841957092285, + "learning_rate": 2.3117620940294048e-05, + "loss": 2.2595, + "num_input_tokens_seen": 4236816, + "step": 415 + }, + { + "epoch": 3.166823751178134, + "grad_norm": 5.527017593383789, + "learning_rate": 2.2623598917395438e-05, + "loss": 2.2326, + "num_input_tokens_seen": 4289488, + "step": 420 + }, + { + "epoch": 3.2045240339302543, + "grad_norm": 5.297417163848877, + "learning_rate": 2.213051164325366e-05, + "loss": 2.2595, + "num_input_tokens_seen": 4334704, + "step": 425 + }, + { + "epoch": 3.242224316682375, + "grad_norm": 5.43293571472168, + "learning_rate": 2.1638553071961708e-05, + "loss": 2.2084, + "num_input_tokens_seen": 4377360, + "step": 430 + }, + { + "epoch": 3.2799245994344957, + "grad_norm": 5.788747310638428, + "learning_rate": 2.1147916713641367e-05, + "loss": 2.21, + "num_input_tokens_seen": 4428544, + "step": 435 + }, + { + "epoch": 3.3176248821866166, + "grad_norm": 6.185176372528076, + "learning_rate": 2.0658795558326743e-05, + "loss": 2.2449, + "num_input_tokens_seen": 4477664, + "step": 440 + }, + { + "epoch": 3.355325164938737, + "grad_norm": 5.564029693603516, + "learning_rate": 2.017138200005236e-05, + "loss": 2.2457, + "num_input_tokens_seen": 4533792, + "step": 445 + }, + { + "epoch": 3.3930254476908575, + "grad_norm": 5.952132225036621, + "learning_rate": 1.9685867761175584e-05, + "loss": 2.3035, + "num_input_tokens_seen": 4584800, + "step": 450 + }, + { + "epoch": 3.4307257304429783, + "grad_norm": 5.484558582305908, + "learning_rate": 1.9202443816963425e-05, + "loss": 2.3204, + "num_input_tokens_seen": 4634976, + "step": 455 + }, + { + "epoch": 3.468426013195099, + "grad_norm": 6.121850490570068, + "learning_rate": 1.872130032047302e-05, + "loss": 2.2055, + "num_input_tokens_seen": 4691344, + "step": 460 + }, + { + "epoch": 3.5061262959472197, + "grad_norm": 6.345475673675537, + "learning_rate": 1.824262652775568e-05, + "loss": 2.3327, + "num_input_tokens_seen": 4745536, + "step": 465 + }, + { + "epoch": 3.54382657869934, + "grad_norm": 6.4370574951171875, + "learning_rate": 1.7766610723413684e-05, + "loss": 2.2214, + "num_input_tokens_seen": 4798128, + "step": 470 + }, + { + "epoch": 3.581526861451461, + "grad_norm": 6.708219528198242, + "learning_rate": 1.7293440146539196e-05, + "loss": 2.3097, + "num_input_tokens_seen": 4847632, + "step": 475 + }, + { + "epoch": 3.6192271442035815, + "grad_norm": 5.383622169494629, + "learning_rate": 1.682330091706446e-05, + "loss": 2.2966, + "num_input_tokens_seen": 4905648, + "step": 480 + }, + { + "epoch": 3.6569274269557024, + "grad_norm": 6.210061550140381, + "learning_rate": 1.6356377962552238e-05, + "loss": 2.2006, + "num_input_tokens_seen": 4955600, + "step": 485 + }, + { + "epoch": 3.694627709707823, + "grad_norm": 6.637734889984131, + "learning_rate": 1.589285494545514e-05, + "loss": 2.2463, + "num_input_tokens_seen": 5007424, + "step": 490 + }, + { + "epoch": 3.7323279924599433, + "grad_norm": 5.893795967102051, + "learning_rate": 1.5432914190872757e-05, + "loss": 2.2045, + "num_input_tokens_seen": 5058848, + "step": 495 + }, + { + "epoch": 3.770028275212064, + "grad_norm": 5.891578674316406, + "learning_rate": 1.4976736614834664e-05, + "loss": 2.135, + "num_input_tokens_seen": 5109904, + "step": 500 + } + ], + "logging_steps": 5, + "max_steps": 792, + "num_input_tokens_seen": 5109904, + "num_train_epochs": 6, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.238890032837427e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d325c35219dd8c0eea8279f17307e1d0d8c29b12 --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d28ce688790f3a28f1dba3064866610463622e1d4141255300e136617f5673 +size 5432 diff --git a/checkpoint-600/README.md b/checkpoint-600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9f8f0d832be44b45f0bbeda414aed6d1440221e7 --- /dev/null +++ b/checkpoint-600/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/gemma-2-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-600/adapter_config.json b/checkpoint-600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..84f5e74794795afa599846c26565a6f523155345 --- /dev/null +++ b/checkpoint-600/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/gemma-2-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "o_proj", + "down_proj", + "up_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-600/adapter_model.safetensors b/checkpoint-600/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fea691e92ffce309ff8d7b53262c3085d470f184 --- /dev/null +++ b/checkpoint-600/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b370659adbf60898dcdaf7eb26f455b105a4f249ed21fea30422cb7d94861660 +size 41581360 diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..360a74cb8499df588ca4fa25d2abf52403d3eefe --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:852904b0355c9c1c9b2b66d5ae172afcb432ad90813e631e1b5653997b256e4a +size 83372758 diff --git a/checkpoint-600/rng_state_0.pth b/checkpoint-600/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..43606f86a072ecc959e43371efd6a451e74daac3 --- /dev/null +++ b/checkpoint-600/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7e52325e9d729519836af640f8f754a93ee06730fb2953b5309434b53b17562 +size 14512 diff --git a/checkpoint-600/rng_state_1.pth b/checkpoint-600/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..fbadae7125c23bf749649bf0b9cd0044d56679d3 --- /dev/null +++ b/checkpoint-600/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a93593cf0342eb47876986e1063102e1546354426a2324c46ddcf1cbecae803 +size 14512 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf2f8c18e9d74869c988eab02430ad6319f17840 --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:352d333818407c00acc00fba3e8fa8bd42f6825ddbcda95decb7921507cceb6c +size 1064 diff --git a/checkpoint-600/special_tokens_map.json b/checkpoint-600/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-600/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-600/tokenizer.json b/checkpoint-600/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..a4a305d1de4d8f47c0252b4d7fe65a10dd8e2c22 --- /dev/null +++ b/checkpoint-600/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7eee611703c5ce5d1eee32d9cdcfe465647b8aff0c1dfb3bed7ad7dbb05060 +size 34362873 diff --git a/checkpoint-600/tokenizer.model b/checkpoint-600/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/checkpoint-600/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/checkpoint-600/tokenizer_config.json b/checkpoint-600/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..97f33b16e8481f3849fb6d02d1656628e9e22c91 --- /dev/null +++ b/checkpoint-600/tokenizer_config.json @@ -0,0 +1,2015 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ada8633a1206019ef425aaf92d7a9bda0f368bab --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,993 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.524033930254477, + "eval_steps": 500, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03770028275212064, + "grad_norm": 3.2918148040771484, + "learning_rate": 4.9995083170283816e-05, + "loss": 4.3742, + "num_input_tokens_seen": 50544, + "step": 5 + }, + { + "epoch": 0.07540056550424128, + "grad_norm": 3.0331127643585205, + "learning_rate": 4.998033461515242e-05, + "loss": 3.8434, + "num_input_tokens_seen": 104080, + "step": 10 + }, + { + "epoch": 0.11310084825636192, + "grad_norm": 3.322950601577759, + "learning_rate": 4.9955760135896534e-05, + "loss": 3.6027, + "num_input_tokens_seen": 155776, + "step": 15 + }, + { + "epoch": 0.15080113100848255, + "grad_norm": 2.7705531120300293, + "learning_rate": 4.992136939879856e-05, + "loss": 3.4102, + "num_input_tokens_seen": 203184, + "step": 20 + }, + { + "epoch": 0.1885014137606032, + "grad_norm": 3.4572091102600098, + "learning_rate": 4.9877175931330346e-05, + "loss": 3.153, + "num_input_tokens_seen": 255744, + "step": 25 + }, + { + "epoch": 0.22620169651272384, + "grad_norm": 2.0412893295288086, + "learning_rate": 4.982319711683221e-05, + "loss": 3.0949, + "num_input_tokens_seen": 307808, + "step": 30 + }, + { + "epoch": 0.2639019792648445, + "grad_norm": 2.0765998363494873, + "learning_rate": 4.975945418767529e-05, + "loss": 3.0392, + "num_input_tokens_seen": 358192, + "step": 35 + }, + { + "epoch": 0.3016022620169651, + "grad_norm": 2.2433698177337646, + "learning_rate": 4.968597221690986e-05, + "loss": 3.0685, + "num_input_tokens_seen": 408832, + "step": 40 + }, + { + "epoch": 0.3393025447690858, + "grad_norm": 2.7639973163604736, + "learning_rate": 4.96027801084029e-05, + "loss": 3.0385, + "num_input_tokens_seen": 458368, + "step": 45 + }, + { + "epoch": 0.3770028275212064, + "grad_norm": 2.0680019855499268, + "learning_rate": 4.950991058546893e-05, + "loss": 3.1079, + "num_input_tokens_seen": 512160, + "step": 50 + }, + { + "epoch": 0.41470311027332707, + "grad_norm": 2.181554079055786, + "learning_rate": 4.940740017799833e-05, + "loss": 2.9883, + "num_input_tokens_seen": 562928, + "step": 55 + }, + { + "epoch": 0.4524033930254477, + "grad_norm": 2.0724411010742188, + "learning_rate": 4.929528920808854e-05, + "loss": 2.969, + "num_input_tokens_seen": 613072, + "step": 60 + }, + { + "epoch": 0.49010367577756836, + "grad_norm": 2.5529158115386963, + "learning_rate": 4.917362177418342e-05, + "loss": 3.0019, + "num_input_tokens_seen": 664640, + "step": 65 + }, + { + "epoch": 0.527803958529689, + "grad_norm": 2.3983142375946045, + "learning_rate": 4.904244573372733e-05, + "loss": 2.9984, + "num_input_tokens_seen": 716800, + "step": 70 + }, + { + "epoch": 0.5655042412818096, + "grad_norm": 2.206209182739258, + "learning_rate": 4.8901812684340564e-05, + "loss": 2.9848, + "num_input_tokens_seen": 766528, + "step": 75 + }, + { + "epoch": 0.6032045240339302, + "grad_norm": 2.22698974609375, + "learning_rate": 4.8751777943523634e-05, + "loss": 2.9169, + "num_input_tokens_seen": 817376, + "step": 80 + }, + { + "epoch": 0.6409048067860509, + "grad_norm": 2.4569709300994873, + "learning_rate": 4.8592400526898314e-05, + "loss": 2.8844, + "num_input_tokens_seen": 864688, + "step": 85 + }, + { + "epoch": 0.6786050895381716, + "grad_norm": 2.195237636566162, + "learning_rate": 4.842374312499405e-05, + "loss": 2.9422, + "num_input_tokens_seen": 913216, + "step": 90 + }, + { + "epoch": 0.7163053722902922, + "grad_norm": 2.54585599899292, + "learning_rate": 4.824587207858888e-05, + "loss": 2.8431, + "num_input_tokens_seen": 964144, + "step": 95 + }, + { + "epoch": 0.7540056550424128, + "grad_norm": 2.3841867446899414, + "learning_rate": 4.805885735261454e-05, + "loss": 2.8377, + "num_input_tokens_seen": 1018768, + "step": 100 + }, + { + "epoch": 0.7917059377945335, + "grad_norm": 2.359386920928955, + "learning_rate": 4.786277250863599e-05, + "loss": 2.7843, + "num_input_tokens_seen": 1070592, + "step": 105 + }, + { + "epoch": 0.8294062205466541, + "grad_norm": 2.0952038764953613, + "learning_rate": 4.765769467591625e-05, + "loss": 2.8218, + "num_input_tokens_seen": 1125392, + "step": 110 + }, + { + "epoch": 0.8671065032987747, + "grad_norm": 2.222754955291748, + "learning_rate": 4.744370452107789e-05, + "loss": 2.877, + "num_input_tokens_seen": 1176256, + "step": 115 + }, + { + "epoch": 0.9048067860508954, + "grad_norm": 2.3233537673950195, + "learning_rate": 4.722088621637309e-05, + "loss": 2.9028, + "num_input_tokens_seen": 1225376, + "step": 120 + }, + { + "epoch": 0.942507068803016, + "grad_norm": 2.400575876235962, + "learning_rate": 4.698932740657479e-05, + "loss": 2.762, + "num_input_tokens_seen": 1277024, + "step": 125 + }, + { + "epoch": 0.9802073515551367, + "grad_norm": 2.592116117477417, + "learning_rate": 4.6749119174501975e-05, + "loss": 2.9164, + "num_input_tokens_seen": 1322912, + "step": 130 + }, + { + "epoch": 1.0179076343072573, + "grad_norm": 2.563326120376587, + "learning_rate": 4.6500356005192514e-05, + "loss": 2.736, + "num_input_tokens_seen": 1368624, + "step": 135 + }, + { + "epoch": 1.055607917059378, + "grad_norm": 2.3927371501922607, + "learning_rate": 4.6243135748737864e-05, + "loss": 2.7414, + "num_input_tokens_seen": 1417664, + "step": 140 + }, + { + "epoch": 1.0933081998114986, + "grad_norm": 2.597975492477417, + "learning_rate": 4.597755958179406e-05, + "loss": 2.6961, + "num_input_tokens_seen": 1469120, + "step": 145 + }, + { + "epoch": 1.1310084825636193, + "grad_norm": 2.932581663131714, + "learning_rate": 4.570373196778427e-05, + "loss": 2.6134, + "num_input_tokens_seen": 1521632, + "step": 150 + }, + { + "epoch": 1.1687087653157398, + "grad_norm": 3.28389310836792, + "learning_rate": 4.5421760615808474e-05, + "loss": 2.7548, + "num_input_tokens_seen": 1565296, + "step": 155 + }, + { + "epoch": 1.2064090480678604, + "grad_norm": 2.927279233932495, + "learning_rate": 4.513175643827647e-05, + "loss": 2.702, + "num_input_tokens_seen": 1617088, + "step": 160 + }, + { + "epoch": 1.244109330819981, + "grad_norm": 2.840573310852051, + "learning_rate": 4.4833833507280884e-05, + "loss": 2.6893, + "num_input_tokens_seen": 1663584, + "step": 165 + }, + { + "epoch": 1.2818096135721018, + "grad_norm": 2.6322081089019775, + "learning_rate": 4.4528109009727336e-05, + "loss": 2.5671, + "num_input_tokens_seen": 1713744, + "step": 170 + }, + { + "epoch": 1.3195098963242224, + "grad_norm": 3.1280879974365234, + "learning_rate": 4.42147032012394e-05, + "loss": 2.7682, + "num_input_tokens_seen": 1762768, + "step": 175 + }, + { + "epoch": 1.3572101790763431, + "grad_norm": 3.1128265857696533, + "learning_rate": 4.389373935885646e-05, + "loss": 2.7062, + "num_input_tokens_seen": 1815808, + "step": 180 + }, + { + "epoch": 1.3949104618284638, + "grad_norm": 2.952150344848633, + "learning_rate": 4.356534373254316e-05, + "loss": 2.7092, + "num_input_tokens_seen": 1871040, + "step": 185 + }, + { + "epoch": 1.4326107445805842, + "grad_norm": 3.0146102905273438, + "learning_rate": 4.322964549552943e-05, + "loss": 2.6518, + "num_input_tokens_seen": 1924048, + "step": 190 + }, + { + "epoch": 1.4703110273327051, + "grad_norm": 2.999300956726074, + "learning_rate": 4.288677669350066e-05, + "loss": 2.6592, + "num_input_tokens_seen": 1972720, + "step": 195 + }, + { + "epoch": 1.5080113100848256, + "grad_norm": 3.575253963470459, + "learning_rate": 4.2536872192658036e-05, + "loss": 2.716, + "num_input_tokens_seen": 2022112, + "step": 200 + }, + { + "epoch": 1.5457115928369463, + "grad_norm": 3.1339428424835205, + "learning_rate": 4.218006962666934e-05, + "loss": 2.6746, + "num_input_tokens_seen": 2072000, + "step": 205 + }, + { + "epoch": 1.583411875589067, + "grad_norm": 3.130823850631714, + "learning_rate": 4.181650934253132e-05, + "loss": 2.6306, + "num_input_tokens_seen": 2125632, + "step": 210 + }, + { + "epoch": 1.6211121583411876, + "grad_norm": 3.1828997135162354, + "learning_rate": 4.144633434536467e-05, + "loss": 2.5678, + "num_input_tokens_seen": 2174464, + "step": 215 + }, + { + "epoch": 1.6588124410933083, + "grad_norm": 3.3879778385162354, + "learning_rate": 4.1069690242163484e-05, + "loss": 2.7334, + "num_input_tokens_seen": 2223408, + "step": 220 + }, + { + "epoch": 1.6965127238454287, + "grad_norm": 3.5921592712402344, + "learning_rate": 4.06867251845213e-05, + "loss": 2.6658, + "num_input_tokens_seen": 2281296, + "step": 225 + }, + { + "epoch": 1.7342130065975496, + "grad_norm": 3.4126648902893066, + "learning_rate": 4.0297589810356165e-05, + "loss": 2.6567, + "num_input_tokens_seen": 2334176, + "step": 230 + }, + { + "epoch": 1.77191328934967, + "grad_norm": 3.504786252975464, + "learning_rate": 3.9902437184657784e-05, + "loss": 2.6131, + "num_input_tokens_seen": 2389296, + "step": 235 + }, + { + "epoch": 1.8096135721017907, + "grad_norm": 3.459768056869507, + "learning_rate": 3.9501422739279956e-05, + "loss": 2.662, + "num_input_tokens_seen": 2442800, + "step": 240 + }, + { + "epoch": 1.8473138548539114, + "grad_norm": 2.944490909576416, + "learning_rate": 3.909470421180201e-05, + "loss": 2.6339, + "num_input_tokens_seen": 2494912, + "step": 245 + }, + { + "epoch": 1.885014137606032, + "grad_norm": 3.4897429943084717, + "learning_rate": 3.8682441583483314e-05, + "loss": 2.6338, + "num_input_tokens_seen": 2544624, + "step": 250 + }, + { + "epoch": 1.9227144203581528, + "grad_norm": 3.15039324760437, + "learning_rate": 3.8264797016335205e-05, + "loss": 2.6844, + "num_input_tokens_seen": 2597792, + "step": 255 + }, + { + "epoch": 1.9604147031102732, + "grad_norm": 3.5575578212738037, + "learning_rate": 3.7841934789335164e-05, + "loss": 2.5939, + "num_input_tokens_seen": 2646544, + "step": 260 + }, + { + "epoch": 1.998114985862394, + "grad_norm": 3.97521710395813, + "learning_rate": 3.741402123380828e-05, + "loss": 2.7393, + "num_input_tokens_seen": 2700224, + "step": 265 + }, + { + "epoch": 2.0358152686145146, + "grad_norm": 3.1736276149749756, + "learning_rate": 3.6981224668001424e-05, + "loss": 2.4495, + "num_input_tokens_seen": 2749104, + "step": 270 + }, + { + "epoch": 2.0735155513666355, + "grad_norm": 3.633033275604248, + "learning_rate": 3.654371533087586e-05, + "loss": 2.3823, + "num_input_tokens_seen": 2802992, + "step": 275 + }, + { + "epoch": 2.111215834118756, + "grad_norm": 3.4200243949890137, + "learning_rate": 3.610166531514436e-05, + "loss": 2.474, + "num_input_tokens_seen": 2844512, + "step": 280 + }, + { + "epoch": 2.1489161168708764, + "grad_norm": 4.055693626403809, + "learning_rate": 3.565524849957921e-05, + "loss": 2.4776, + "num_input_tokens_seen": 2894400, + "step": 285 + }, + { + "epoch": 2.1866163996229973, + "grad_norm": 4.254647254943848, + "learning_rate": 3.520464048061758e-05, + "loss": 2.4709, + "num_input_tokens_seen": 2944688, + "step": 290 + }, + { + "epoch": 2.2243166823751177, + "grad_norm": 4.337226390838623, + "learning_rate": 3.47500185032913e-05, + "loss": 2.5009, + "num_input_tokens_seen": 2994672, + "step": 295 + }, + { + "epoch": 2.2620169651272386, + "grad_norm": 4.424121379852295, + "learning_rate": 3.4291561391508185e-05, + "loss": 2.4524, + "num_input_tokens_seen": 3045344, + "step": 300 + }, + { + "epoch": 2.299717247879359, + "grad_norm": 4.178481101989746, + "learning_rate": 3.3829449477712324e-05, + "loss": 2.4083, + "num_input_tokens_seen": 3100688, + "step": 305 + }, + { + "epoch": 2.3374175306314795, + "grad_norm": 4.315364837646484, + "learning_rate": 3.336386453195088e-05, + "loss": 2.4713, + "num_input_tokens_seen": 3154896, + "step": 310 + }, + { + "epoch": 2.3751178133836004, + "grad_norm": 4.445486545562744, + "learning_rate": 3.2894989690375626e-05, + "loss": 2.3986, + "num_input_tokens_seen": 3209120, + "step": 315 + }, + { + "epoch": 2.412818096135721, + "grad_norm": 4.80281400680542, + "learning_rate": 3.2423009383206876e-05, + "loss": 2.4258, + "num_input_tokens_seen": 3257984, + "step": 320 + }, + { + "epoch": 2.4505183788878417, + "grad_norm": 4.705103397369385, + "learning_rate": 3.194810926218861e-05, + "loss": 2.4126, + "num_input_tokens_seen": 3309424, + "step": 325 + }, + { + "epoch": 2.488218661639962, + "grad_norm": 4.281659126281738, + "learning_rate": 3.147047612756302e-05, + "loss": 2.3639, + "num_input_tokens_seen": 3359152, + "step": 330 + }, + { + "epoch": 2.525918944392083, + "grad_norm": 4.493980407714844, + "learning_rate": 3.099029785459328e-05, + "loss": 2.5176, + "num_input_tokens_seen": 3406784, + "step": 335 + }, + { + "epoch": 2.5636192271442035, + "grad_norm": 4.339905261993408, + "learning_rate": 3.0507763319663517e-05, + "loss": 2.4779, + "num_input_tokens_seen": 3459328, + "step": 340 + }, + { + "epoch": 2.6013195098963244, + "grad_norm": 4.67468786239624, + "learning_rate": 3.002306232598497e-05, + "loss": 2.3833, + "num_input_tokens_seen": 3511056, + "step": 345 + }, + { + "epoch": 2.639019792648445, + "grad_norm": 4.905932426452637, + "learning_rate": 2.9536385528937567e-05, + "loss": 2.3794, + "num_input_tokens_seen": 3562352, + "step": 350 + }, + { + "epoch": 2.6767200754005653, + "grad_norm": 4.684591770172119, + "learning_rate": 2.9047924361076345e-05, + "loss": 2.4883, + "num_input_tokens_seen": 3615664, + "step": 355 + }, + { + "epoch": 2.7144203581526862, + "grad_norm": 4.377009868621826, + "learning_rate": 2.8557870956832132e-05, + "loss": 2.3423, + "num_input_tokens_seen": 3661424, + "step": 360 + }, + { + "epoch": 2.7521206409048067, + "grad_norm": 5.146539688110352, + "learning_rate": 2.8066418076936167e-05, + "loss": 2.4092, + "num_input_tokens_seen": 3710592, + "step": 365 + }, + { + "epoch": 2.7898209236569276, + "grad_norm": 5.5937910079956055, + "learning_rate": 2.7573759032598366e-05, + "loss": 2.4818, + "num_input_tokens_seen": 3765728, + "step": 370 + }, + { + "epoch": 2.827521206409048, + "grad_norm": 4.4958696365356445, + "learning_rate": 2.7080087609469062e-05, + "loss": 2.4587, + "num_input_tokens_seen": 3815360, + "step": 375 + }, + { + "epoch": 2.8652214891611685, + "grad_norm": 4.503344535827637, + "learning_rate": 2.6585597991414114e-05, + "loss": 2.3462, + "num_input_tokens_seen": 3868096, + "step": 380 + }, + { + "epoch": 2.9029217719132894, + "grad_norm": 4.773792743682861, + "learning_rate": 2.6090484684133404e-05, + "loss": 2.3713, + "num_input_tokens_seen": 3913696, + "step": 385 + }, + { + "epoch": 2.9406220546654103, + "grad_norm": 4.8644537925720215, + "learning_rate": 2.5594942438652688e-05, + "loss": 2.4618, + "num_input_tokens_seen": 3971840, + "step": 390 + }, + { + "epoch": 2.9783223374175307, + "grad_norm": 5.539215087890625, + "learning_rate": 2.509916617471903e-05, + "loss": 2.5454, + "num_input_tokens_seen": 4025040, + "step": 395 + }, + { + "epoch": 3.016022620169651, + "grad_norm": 4.475778579711914, + "learning_rate": 2.46033509041298e-05, + "loss": 2.3007, + "num_input_tokens_seen": 4075488, + "step": 400 + }, + { + "epoch": 3.053722902921772, + "grad_norm": 4.82028341293335, + "learning_rate": 2.410769165402549e-05, + "loss": 2.274, + "num_input_tokens_seen": 4130496, + "step": 405 + }, + { + "epoch": 3.0914231856738925, + "grad_norm": 5.513036251068115, + "learning_rate": 2.3612383390176503e-05, + "loss": 2.2457, + "num_input_tokens_seen": 4181504, + "step": 410 + }, + { + "epoch": 3.1291234684260134, + "grad_norm": 5.219841957092285, + "learning_rate": 2.3117620940294048e-05, + "loss": 2.2595, + "num_input_tokens_seen": 4236816, + "step": 415 + }, + { + "epoch": 3.166823751178134, + "grad_norm": 5.527017593383789, + "learning_rate": 2.2623598917395438e-05, + "loss": 2.2326, + "num_input_tokens_seen": 4289488, + "step": 420 + }, + { + "epoch": 3.2045240339302543, + "grad_norm": 5.297417163848877, + "learning_rate": 2.213051164325366e-05, + "loss": 2.2595, + "num_input_tokens_seen": 4334704, + "step": 425 + }, + { + "epoch": 3.242224316682375, + "grad_norm": 5.43293571472168, + "learning_rate": 2.1638553071961708e-05, + "loss": 2.2084, + "num_input_tokens_seen": 4377360, + "step": 430 + }, + { + "epoch": 3.2799245994344957, + "grad_norm": 5.788747310638428, + "learning_rate": 2.1147916713641367e-05, + "loss": 2.21, + "num_input_tokens_seen": 4428544, + "step": 435 + }, + { + "epoch": 3.3176248821866166, + "grad_norm": 6.185176372528076, + "learning_rate": 2.0658795558326743e-05, + "loss": 2.2449, + "num_input_tokens_seen": 4477664, + "step": 440 + }, + { + "epoch": 3.355325164938737, + "grad_norm": 5.564029693603516, + "learning_rate": 2.017138200005236e-05, + "loss": 2.2457, + "num_input_tokens_seen": 4533792, + "step": 445 + }, + { + "epoch": 3.3930254476908575, + "grad_norm": 5.952132225036621, + "learning_rate": 1.9685867761175584e-05, + "loss": 2.3035, + "num_input_tokens_seen": 4584800, + "step": 450 + }, + { + "epoch": 3.4307257304429783, + "grad_norm": 5.484558582305908, + "learning_rate": 1.9202443816963425e-05, + "loss": 2.3204, + "num_input_tokens_seen": 4634976, + "step": 455 + }, + { + "epoch": 3.468426013195099, + "grad_norm": 6.121850490570068, + "learning_rate": 1.872130032047302e-05, + "loss": 2.2055, + "num_input_tokens_seen": 4691344, + "step": 460 + }, + { + "epoch": 3.5061262959472197, + "grad_norm": 6.345475673675537, + "learning_rate": 1.824262652775568e-05, + "loss": 2.3327, + "num_input_tokens_seen": 4745536, + "step": 465 + }, + { + "epoch": 3.54382657869934, + "grad_norm": 6.4370574951171875, + "learning_rate": 1.7766610723413684e-05, + "loss": 2.2214, + "num_input_tokens_seen": 4798128, + "step": 470 + }, + { + "epoch": 3.581526861451461, + "grad_norm": 6.708219528198242, + "learning_rate": 1.7293440146539196e-05, + "loss": 2.3097, + "num_input_tokens_seen": 4847632, + "step": 475 + }, + { + "epoch": 3.6192271442035815, + "grad_norm": 5.383622169494629, + "learning_rate": 1.682330091706446e-05, + "loss": 2.2966, + "num_input_tokens_seen": 4905648, + "step": 480 + }, + { + "epoch": 3.6569274269557024, + "grad_norm": 6.210061550140381, + "learning_rate": 1.6356377962552238e-05, + "loss": 2.2006, + "num_input_tokens_seen": 4955600, + "step": 485 + }, + { + "epoch": 3.694627709707823, + "grad_norm": 6.637734889984131, + "learning_rate": 1.589285494545514e-05, + "loss": 2.2463, + "num_input_tokens_seen": 5007424, + "step": 490 + }, + { + "epoch": 3.7323279924599433, + "grad_norm": 5.893795967102051, + "learning_rate": 1.5432914190872757e-05, + "loss": 2.2045, + "num_input_tokens_seen": 5058848, + "step": 495 + }, + { + "epoch": 3.770028275212064, + "grad_norm": 5.891578674316406, + "learning_rate": 1.4976736614834664e-05, + "loss": 2.135, + "num_input_tokens_seen": 5109904, + "step": 500 + }, + { + "epoch": 3.8077285579641846, + "grad_norm": 6.888275623321533, + "learning_rate": 1.4524501653137787e-05, + "loss": 2.3023, + "num_input_tokens_seen": 5156496, + "step": 505 + }, + { + "epoch": 3.8454288407163055, + "grad_norm": 5.987553119659424, + "learning_rate": 1.4076387190766017e-05, + "loss": 2.2636, + "num_input_tokens_seen": 5207824, + "step": 510 + }, + { + "epoch": 3.883129123468426, + "grad_norm": 6.791808128356934, + "learning_rate": 1.363256949191972e-05, + "loss": 2.1942, + "num_input_tokens_seen": 5256608, + "step": 515 + }, + { + "epoch": 3.9208294062205464, + "grad_norm": 6.749192714691162, + "learning_rate": 1.3193223130682936e-05, + "loss": 2.2731, + "num_input_tokens_seen": 5305584, + "step": 520 + }, + { + "epoch": 3.9585296889726673, + "grad_norm": 5.9771647453308105, + "learning_rate": 1.2758520922355226e-05, + "loss": 2.1669, + "num_input_tokens_seen": 5349712, + "step": 525 + }, + { + "epoch": 3.9962299717247878, + "grad_norm": 6.35679817199707, + "learning_rate": 1.2328633855475429e-05, + "loss": 2.2146, + "num_input_tokens_seen": 5400336, + "step": 530 + }, + { + "epoch": 4.033930254476909, + "grad_norm": 6.124676704406738, + "learning_rate": 1.1903731024563966e-05, + "loss": 2.0324, + "num_input_tokens_seen": 5451776, + "step": 535 + }, + { + "epoch": 4.071630537229029, + "grad_norm": 6.622769355773926, + "learning_rate": 1.148397956361007e-05, + "loss": 2.0762, + "num_input_tokens_seen": 5507520, + "step": 540 + }, + { + "epoch": 4.10933081998115, + "grad_norm": 7.076972961425781, + "learning_rate": 1.106954458033026e-05, + "loss": 2.0949, + "num_input_tokens_seen": 5562688, + "step": 545 + }, + { + "epoch": 4.147031102733271, + "grad_norm": 6.695667266845703, + "learning_rate": 1.0660589091223855e-05, + "loss": 2.1853, + "num_input_tokens_seen": 5610800, + "step": 550 + }, + { + "epoch": 4.184731385485391, + "grad_norm": 7.716992378234863, + "learning_rate": 1.025727395745095e-05, + "loss": 2.1706, + "num_input_tokens_seen": 5657616, + "step": 555 + }, + { + "epoch": 4.222431668237512, + "grad_norm": 6.8223419189453125, + "learning_rate": 9.859757821558337e-06, + "loss": 2.1199, + "num_input_tokens_seen": 5710960, + "step": 560 + }, + { + "epoch": 4.260131950989632, + "grad_norm": 6.87358283996582, + "learning_rate": 9.468197045077976e-06, + "loss": 2.0353, + "num_input_tokens_seen": 5756608, + "step": 565 + }, + { + "epoch": 4.297832233741753, + "grad_norm": 7.660863399505615, + "learning_rate": 9.082745647022797e-06, + "loss": 2.1101, + "num_input_tokens_seen": 5812304, + "step": 570 + }, + { + "epoch": 4.335532516493874, + "grad_norm": 7.863148212432861, + "learning_rate": 8.703555243303835e-06, + "loss": 2.1076, + "num_input_tokens_seen": 5859808, + "step": 575 + }, + { + "epoch": 4.3732327992459945, + "grad_norm": 7.298150539398193, + "learning_rate": 8.330774987092712e-06, + "loss": 2.0305, + "num_input_tokens_seen": 5908784, + "step": 580 + }, + { + "epoch": 4.410933081998115, + "grad_norm": 6.835300922393799, + "learning_rate": 7.96455151015272e-06, + "loss": 2.1132, + "num_input_tokens_seen": 5958672, + "step": 585 + }, + { + "epoch": 4.448633364750235, + "grad_norm": 6.710065841674805, + "learning_rate": 7.605028865161809e-06, + "loss": 2.1527, + "num_input_tokens_seen": 6010720, + "step": 590 + }, + { + "epoch": 4.486333647502356, + "grad_norm": 6.827284812927246, + "learning_rate": 7.25234846904993e-06, + "loss": 2.0948, + "num_input_tokens_seen": 6061440, + "step": 595 + }, + { + "epoch": 4.524033930254477, + "grad_norm": 7.066997528076172, + "learning_rate": 6.906649047373246e-06, + "loss": 2.1222, + "num_input_tokens_seen": 6115216, + "step": 600 + } + ], + "logging_steps": 5, + "max_steps": 792, + "num_input_tokens_seen": 6115216, + "num_train_epochs": 6, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7.466316423443251e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d325c35219dd8c0eea8279f17307e1d0d8c29b12 --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d28ce688790f3a28f1dba3064866610463622e1d4141255300e136617f5673 +size 5432 diff --git a/checkpoint-700/README.md b/checkpoint-700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9f8f0d832be44b45f0bbeda414aed6d1440221e7 --- /dev/null +++ b/checkpoint-700/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/gemma-2-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-700/adapter_config.json b/checkpoint-700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..84f5e74794795afa599846c26565a6f523155345 --- /dev/null +++ b/checkpoint-700/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/gemma-2-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "o_proj", + "down_proj", + "up_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-700/adapter_model.safetensors b/checkpoint-700/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38e73691e6a0d172922af30e01cbc0dd3334439b --- /dev/null +++ b/checkpoint-700/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bfe59b185037502f2ab497f0254e1c02b625d0c7537e3a4c145547c137ff0cc +size 41581360 diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb7569e34bfc8758e240580805d5873ff928a95b --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5b30f27413f61fdcbf0004200abd2f81f16900e842bb8d0ce610d9dd3e4ef6c +size 83372758 diff --git a/checkpoint-700/rng_state_0.pth b/checkpoint-700/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..07a546a3d8fa499648a42db76ea9733d09e5ca98 --- /dev/null +++ b/checkpoint-700/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7a17ffe4d1cfad70857491e1fd7e427c0413a789e2cb4398c4af3ca8efd92a5 +size 14512 diff --git a/checkpoint-700/rng_state_1.pth b/checkpoint-700/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..5552726456b4cc7d1cc941b486f870e723d6ab42 --- /dev/null +++ b/checkpoint-700/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8430d63cfb7960c36461376f5e1ef952c23b5128eae3a1f763753f4c308fd4aa +size 14512 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d06407581f27555700c0fbd2722a54c288fe6f84 --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e73e049cc9ae3a3165d3a65ed3a46b19ca504dd75a711ee23051792926d4b31 +size 1064 diff --git a/checkpoint-700/special_tokens_map.json b/checkpoint-700/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-700/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-700/tokenizer.json b/checkpoint-700/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..a4a305d1de4d8f47c0252b4d7fe65a10dd8e2c22 --- /dev/null +++ b/checkpoint-700/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7eee611703c5ce5d1eee32d9cdcfe465647b8aff0c1dfb3bed7ad7dbb05060 +size 34362873 diff --git a/checkpoint-700/tokenizer.model b/checkpoint-700/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/checkpoint-700/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/checkpoint-700/tokenizer_config.json b/checkpoint-700/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..97f33b16e8481f3849fb6d02d1656628e9e22c91 --- /dev/null +++ b/checkpoint-700/tokenizer_config.json @@ -0,0 +1,2015 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c882d0ab414879f33b275b3c0b718401fe3b29ee --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,1153 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.27803958529689, + "eval_steps": 500, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03770028275212064, + "grad_norm": 3.2918148040771484, + "learning_rate": 4.9995083170283816e-05, + "loss": 4.3742, + "num_input_tokens_seen": 50544, + "step": 5 + }, + { + "epoch": 0.07540056550424128, + "grad_norm": 3.0331127643585205, + "learning_rate": 4.998033461515242e-05, + "loss": 3.8434, + "num_input_tokens_seen": 104080, + "step": 10 + }, + { + "epoch": 0.11310084825636192, + "grad_norm": 3.322950601577759, + "learning_rate": 4.9955760135896534e-05, + "loss": 3.6027, + "num_input_tokens_seen": 155776, + "step": 15 + }, + { + "epoch": 0.15080113100848255, + "grad_norm": 2.7705531120300293, + "learning_rate": 4.992136939879856e-05, + "loss": 3.4102, + "num_input_tokens_seen": 203184, + "step": 20 + }, + { + "epoch": 0.1885014137606032, + "grad_norm": 3.4572091102600098, + "learning_rate": 4.9877175931330346e-05, + "loss": 3.153, + "num_input_tokens_seen": 255744, + "step": 25 + }, + { + "epoch": 0.22620169651272384, + "grad_norm": 2.0412893295288086, + "learning_rate": 4.982319711683221e-05, + "loss": 3.0949, + "num_input_tokens_seen": 307808, + "step": 30 + }, + { + "epoch": 0.2639019792648445, + "grad_norm": 2.0765998363494873, + "learning_rate": 4.975945418767529e-05, + "loss": 3.0392, + "num_input_tokens_seen": 358192, + "step": 35 + }, + { + "epoch": 0.3016022620169651, + "grad_norm": 2.2433698177337646, + "learning_rate": 4.968597221690986e-05, + "loss": 3.0685, + "num_input_tokens_seen": 408832, + "step": 40 + }, + { + "epoch": 0.3393025447690858, + "grad_norm": 2.7639973163604736, + "learning_rate": 4.96027801084029e-05, + "loss": 3.0385, + "num_input_tokens_seen": 458368, + "step": 45 + }, + { + "epoch": 0.3770028275212064, + "grad_norm": 2.0680019855499268, + "learning_rate": 4.950991058546893e-05, + "loss": 3.1079, + "num_input_tokens_seen": 512160, + "step": 50 + }, + { + "epoch": 0.41470311027332707, + "grad_norm": 2.181554079055786, + "learning_rate": 4.940740017799833e-05, + "loss": 2.9883, + "num_input_tokens_seen": 562928, + "step": 55 + }, + { + "epoch": 0.4524033930254477, + "grad_norm": 2.0724411010742188, + "learning_rate": 4.929528920808854e-05, + "loss": 2.969, + "num_input_tokens_seen": 613072, + "step": 60 + }, + { + "epoch": 0.49010367577756836, + "grad_norm": 2.5529158115386963, + "learning_rate": 4.917362177418342e-05, + "loss": 3.0019, + "num_input_tokens_seen": 664640, + "step": 65 + }, + { + "epoch": 0.527803958529689, + "grad_norm": 2.3983142375946045, + "learning_rate": 4.904244573372733e-05, + "loss": 2.9984, + "num_input_tokens_seen": 716800, + "step": 70 + }, + { + "epoch": 0.5655042412818096, + "grad_norm": 2.206209182739258, + "learning_rate": 4.8901812684340564e-05, + "loss": 2.9848, + "num_input_tokens_seen": 766528, + "step": 75 + }, + { + "epoch": 0.6032045240339302, + "grad_norm": 2.22698974609375, + "learning_rate": 4.8751777943523634e-05, + "loss": 2.9169, + "num_input_tokens_seen": 817376, + "step": 80 + }, + { + "epoch": 0.6409048067860509, + "grad_norm": 2.4569709300994873, + "learning_rate": 4.8592400526898314e-05, + "loss": 2.8844, + "num_input_tokens_seen": 864688, + "step": 85 + }, + { + "epoch": 0.6786050895381716, + "grad_norm": 2.195237636566162, + "learning_rate": 4.842374312499405e-05, + "loss": 2.9422, + "num_input_tokens_seen": 913216, + "step": 90 + }, + { + "epoch": 0.7163053722902922, + "grad_norm": 2.54585599899292, + "learning_rate": 4.824587207858888e-05, + "loss": 2.8431, + "num_input_tokens_seen": 964144, + "step": 95 + }, + { + "epoch": 0.7540056550424128, + "grad_norm": 2.3841867446899414, + "learning_rate": 4.805885735261454e-05, + "loss": 2.8377, + "num_input_tokens_seen": 1018768, + "step": 100 + }, + { + "epoch": 0.7917059377945335, + "grad_norm": 2.359386920928955, + "learning_rate": 4.786277250863599e-05, + "loss": 2.7843, + "num_input_tokens_seen": 1070592, + "step": 105 + }, + { + "epoch": 0.8294062205466541, + "grad_norm": 2.0952038764953613, + "learning_rate": 4.765769467591625e-05, + "loss": 2.8218, + "num_input_tokens_seen": 1125392, + "step": 110 + }, + { + "epoch": 0.8671065032987747, + "grad_norm": 2.222754955291748, + "learning_rate": 4.744370452107789e-05, + "loss": 2.877, + "num_input_tokens_seen": 1176256, + "step": 115 + }, + { + "epoch": 0.9048067860508954, + "grad_norm": 2.3233537673950195, + "learning_rate": 4.722088621637309e-05, + "loss": 2.9028, + "num_input_tokens_seen": 1225376, + "step": 120 + }, + { + "epoch": 0.942507068803016, + "grad_norm": 2.400575876235962, + "learning_rate": 4.698932740657479e-05, + "loss": 2.762, + "num_input_tokens_seen": 1277024, + "step": 125 + }, + { + "epoch": 0.9802073515551367, + "grad_norm": 2.592116117477417, + "learning_rate": 4.6749119174501975e-05, + "loss": 2.9164, + "num_input_tokens_seen": 1322912, + "step": 130 + }, + { + "epoch": 1.0179076343072573, + "grad_norm": 2.563326120376587, + "learning_rate": 4.6500356005192514e-05, + "loss": 2.736, + "num_input_tokens_seen": 1368624, + "step": 135 + }, + { + "epoch": 1.055607917059378, + "grad_norm": 2.3927371501922607, + "learning_rate": 4.6243135748737864e-05, + "loss": 2.7414, + "num_input_tokens_seen": 1417664, + "step": 140 + }, + { + "epoch": 1.0933081998114986, + "grad_norm": 2.597975492477417, + "learning_rate": 4.597755958179406e-05, + "loss": 2.6961, + "num_input_tokens_seen": 1469120, + "step": 145 + }, + { + "epoch": 1.1310084825636193, + "grad_norm": 2.932581663131714, + "learning_rate": 4.570373196778427e-05, + "loss": 2.6134, + "num_input_tokens_seen": 1521632, + "step": 150 + }, + { + "epoch": 1.1687087653157398, + "grad_norm": 3.28389310836792, + "learning_rate": 4.5421760615808474e-05, + "loss": 2.7548, + "num_input_tokens_seen": 1565296, + "step": 155 + }, + { + "epoch": 1.2064090480678604, + "grad_norm": 2.927279233932495, + "learning_rate": 4.513175643827647e-05, + "loss": 2.702, + "num_input_tokens_seen": 1617088, + "step": 160 + }, + { + "epoch": 1.244109330819981, + "grad_norm": 2.840573310852051, + "learning_rate": 4.4833833507280884e-05, + "loss": 2.6893, + "num_input_tokens_seen": 1663584, + "step": 165 + }, + { + "epoch": 1.2818096135721018, + "grad_norm": 2.6322081089019775, + "learning_rate": 4.4528109009727336e-05, + "loss": 2.5671, + "num_input_tokens_seen": 1713744, + "step": 170 + }, + { + "epoch": 1.3195098963242224, + "grad_norm": 3.1280879974365234, + "learning_rate": 4.42147032012394e-05, + "loss": 2.7682, + "num_input_tokens_seen": 1762768, + "step": 175 + }, + { + "epoch": 1.3572101790763431, + "grad_norm": 3.1128265857696533, + "learning_rate": 4.389373935885646e-05, + "loss": 2.7062, + "num_input_tokens_seen": 1815808, + "step": 180 + }, + { + "epoch": 1.3949104618284638, + "grad_norm": 2.952150344848633, + "learning_rate": 4.356534373254316e-05, + "loss": 2.7092, + "num_input_tokens_seen": 1871040, + "step": 185 + }, + { + "epoch": 1.4326107445805842, + "grad_norm": 3.0146102905273438, + "learning_rate": 4.322964549552943e-05, + "loss": 2.6518, + "num_input_tokens_seen": 1924048, + "step": 190 + }, + { + "epoch": 1.4703110273327051, + "grad_norm": 2.999300956726074, + "learning_rate": 4.288677669350066e-05, + "loss": 2.6592, + "num_input_tokens_seen": 1972720, + "step": 195 + }, + { + "epoch": 1.5080113100848256, + "grad_norm": 3.575253963470459, + "learning_rate": 4.2536872192658036e-05, + "loss": 2.716, + "num_input_tokens_seen": 2022112, + "step": 200 + }, + { + "epoch": 1.5457115928369463, + "grad_norm": 3.1339428424835205, + "learning_rate": 4.218006962666934e-05, + "loss": 2.6746, + "num_input_tokens_seen": 2072000, + "step": 205 + }, + { + "epoch": 1.583411875589067, + "grad_norm": 3.130823850631714, + "learning_rate": 4.181650934253132e-05, + "loss": 2.6306, + "num_input_tokens_seen": 2125632, + "step": 210 + }, + { + "epoch": 1.6211121583411876, + "grad_norm": 3.1828997135162354, + "learning_rate": 4.144633434536467e-05, + "loss": 2.5678, + "num_input_tokens_seen": 2174464, + "step": 215 + }, + { + "epoch": 1.6588124410933083, + "grad_norm": 3.3879778385162354, + "learning_rate": 4.1069690242163484e-05, + "loss": 2.7334, + "num_input_tokens_seen": 2223408, + "step": 220 + }, + { + "epoch": 1.6965127238454287, + "grad_norm": 3.5921592712402344, + "learning_rate": 4.06867251845213e-05, + "loss": 2.6658, + "num_input_tokens_seen": 2281296, + "step": 225 + }, + { + "epoch": 1.7342130065975496, + "grad_norm": 3.4126648902893066, + "learning_rate": 4.0297589810356165e-05, + "loss": 2.6567, + "num_input_tokens_seen": 2334176, + "step": 230 + }, + { + "epoch": 1.77191328934967, + "grad_norm": 3.504786252975464, + "learning_rate": 3.9902437184657784e-05, + "loss": 2.6131, + "num_input_tokens_seen": 2389296, + "step": 235 + }, + { + "epoch": 1.8096135721017907, + "grad_norm": 3.459768056869507, + "learning_rate": 3.9501422739279956e-05, + "loss": 2.662, + "num_input_tokens_seen": 2442800, + "step": 240 + }, + { + "epoch": 1.8473138548539114, + "grad_norm": 2.944490909576416, + "learning_rate": 3.909470421180201e-05, + "loss": 2.6339, + "num_input_tokens_seen": 2494912, + "step": 245 + }, + { + "epoch": 1.885014137606032, + "grad_norm": 3.4897429943084717, + "learning_rate": 3.8682441583483314e-05, + "loss": 2.6338, + "num_input_tokens_seen": 2544624, + "step": 250 + }, + { + "epoch": 1.9227144203581528, + "grad_norm": 3.15039324760437, + "learning_rate": 3.8264797016335205e-05, + "loss": 2.6844, + "num_input_tokens_seen": 2597792, + "step": 255 + }, + { + "epoch": 1.9604147031102732, + "grad_norm": 3.5575578212738037, + "learning_rate": 3.7841934789335164e-05, + "loss": 2.5939, + "num_input_tokens_seen": 2646544, + "step": 260 + }, + { + "epoch": 1.998114985862394, + "grad_norm": 3.97521710395813, + "learning_rate": 3.741402123380828e-05, + "loss": 2.7393, + "num_input_tokens_seen": 2700224, + "step": 265 + }, + { + "epoch": 2.0358152686145146, + "grad_norm": 3.1736276149749756, + "learning_rate": 3.6981224668001424e-05, + "loss": 2.4495, + "num_input_tokens_seen": 2749104, + "step": 270 + }, + { + "epoch": 2.0735155513666355, + "grad_norm": 3.633033275604248, + "learning_rate": 3.654371533087586e-05, + "loss": 2.3823, + "num_input_tokens_seen": 2802992, + "step": 275 + }, + { + "epoch": 2.111215834118756, + "grad_norm": 3.4200243949890137, + "learning_rate": 3.610166531514436e-05, + "loss": 2.474, + "num_input_tokens_seen": 2844512, + "step": 280 + }, + { + "epoch": 2.1489161168708764, + "grad_norm": 4.055693626403809, + "learning_rate": 3.565524849957921e-05, + "loss": 2.4776, + "num_input_tokens_seen": 2894400, + "step": 285 + }, + { + "epoch": 2.1866163996229973, + "grad_norm": 4.254647254943848, + "learning_rate": 3.520464048061758e-05, + "loss": 2.4709, + "num_input_tokens_seen": 2944688, + "step": 290 + }, + { + "epoch": 2.2243166823751177, + "grad_norm": 4.337226390838623, + "learning_rate": 3.47500185032913e-05, + "loss": 2.5009, + "num_input_tokens_seen": 2994672, + "step": 295 + }, + { + "epoch": 2.2620169651272386, + "grad_norm": 4.424121379852295, + "learning_rate": 3.4291561391508185e-05, + "loss": 2.4524, + "num_input_tokens_seen": 3045344, + "step": 300 + }, + { + "epoch": 2.299717247879359, + "grad_norm": 4.178481101989746, + "learning_rate": 3.3829449477712324e-05, + "loss": 2.4083, + "num_input_tokens_seen": 3100688, + "step": 305 + }, + { + "epoch": 2.3374175306314795, + "grad_norm": 4.315364837646484, + "learning_rate": 3.336386453195088e-05, + "loss": 2.4713, + "num_input_tokens_seen": 3154896, + "step": 310 + }, + { + "epoch": 2.3751178133836004, + "grad_norm": 4.445486545562744, + "learning_rate": 3.2894989690375626e-05, + "loss": 2.3986, + "num_input_tokens_seen": 3209120, + "step": 315 + }, + { + "epoch": 2.412818096135721, + "grad_norm": 4.80281400680542, + "learning_rate": 3.2423009383206876e-05, + "loss": 2.4258, + "num_input_tokens_seen": 3257984, + "step": 320 + }, + { + "epoch": 2.4505183788878417, + "grad_norm": 4.705103397369385, + "learning_rate": 3.194810926218861e-05, + "loss": 2.4126, + "num_input_tokens_seen": 3309424, + "step": 325 + }, + { + "epoch": 2.488218661639962, + "grad_norm": 4.281659126281738, + "learning_rate": 3.147047612756302e-05, + "loss": 2.3639, + "num_input_tokens_seen": 3359152, + "step": 330 + }, + { + "epoch": 2.525918944392083, + "grad_norm": 4.493980407714844, + "learning_rate": 3.099029785459328e-05, + "loss": 2.5176, + "num_input_tokens_seen": 3406784, + "step": 335 + }, + { + "epoch": 2.5636192271442035, + "grad_norm": 4.339905261993408, + "learning_rate": 3.0507763319663517e-05, + "loss": 2.4779, + "num_input_tokens_seen": 3459328, + "step": 340 + }, + { + "epoch": 2.6013195098963244, + "grad_norm": 4.67468786239624, + "learning_rate": 3.002306232598497e-05, + "loss": 2.3833, + "num_input_tokens_seen": 3511056, + "step": 345 + }, + { + "epoch": 2.639019792648445, + "grad_norm": 4.905932426452637, + "learning_rate": 2.9536385528937567e-05, + "loss": 2.3794, + "num_input_tokens_seen": 3562352, + "step": 350 + }, + { + "epoch": 2.6767200754005653, + "grad_norm": 4.684591770172119, + "learning_rate": 2.9047924361076345e-05, + "loss": 2.4883, + "num_input_tokens_seen": 3615664, + "step": 355 + }, + { + "epoch": 2.7144203581526862, + "grad_norm": 4.377009868621826, + "learning_rate": 2.8557870956832132e-05, + "loss": 2.3423, + "num_input_tokens_seen": 3661424, + "step": 360 + }, + { + "epoch": 2.7521206409048067, + "grad_norm": 5.146539688110352, + "learning_rate": 2.8066418076936167e-05, + "loss": 2.4092, + "num_input_tokens_seen": 3710592, + "step": 365 + }, + { + "epoch": 2.7898209236569276, + "grad_norm": 5.5937910079956055, + "learning_rate": 2.7573759032598366e-05, + "loss": 2.4818, + "num_input_tokens_seen": 3765728, + "step": 370 + }, + { + "epoch": 2.827521206409048, + "grad_norm": 4.4958696365356445, + "learning_rate": 2.7080087609469062e-05, + "loss": 2.4587, + "num_input_tokens_seen": 3815360, + "step": 375 + }, + { + "epoch": 2.8652214891611685, + "grad_norm": 4.503344535827637, + "learning_rate": 2.6585597991414114e-05, + "loss": 2.3462, + "num_input_tokens_seen": 3868096, + "step": 380 + }, + { + "epoch": 2.9029217719132894, + "grad_norm": 4.773792743682861, + "learning_rate": 2.6090484684133404e-05, + "loss": 2.3713, + "num_input_tokens_seen": 3913696, + "step": 385 + }, + { + "epoch": 2.9406220546654103, + "grad_norm": 4.8644537925720215, + "learning_rate": 2.5594942438652688e-05, + "loss": 2.4618, + "num_input_tokens_seen": 3971840, + "step": 390 + }, + { + "epoch": 2.9783223374175307, + "grad_norm": 5.539215087890625, + "learning_rate": 2.509916617471903e-05, + "loss": 2.5454, + "num_input_tokens_seen": 4025040, + "step": 395 + }, + { + "epoch": 3.016022620169651, + "grad_norm": 4.475778579711914, + "learning_rate": 2.46033509041298e-05, + "loss": 2.3007, + "num_input_tokens_seen": 4075488, + "step": 400 + }, + { + "epoch": 3.053722902921772, + "grad_norm": 4.82028341293335, + "learning_rate": 2.410769165402549e-05, + "loss": 2.274, + "num_input_tokens_seen": 4130496, + "step": 405 + }, + { + "epoch": 3.0914231856738925, + "grad_norm": 5.513036251068115, + "learning_rate": 2.3612383390176503e-05, + "loss": 2.2457, + "num_input_tokens_seen": 4181504, + "step": 410 + }, + { + "epoch": 3.1291234684260134, + "grad_norm": 5.219841957092285, + "learning_rate": 2.3117620940294048e-05, + "loss": 2.2595, + "num_input_tokens_seen": 4236816, + "step": 415 + }, + { + "epoch": 3.166823751178134, + "grad_norm": 5.527017593383789, + "learning_rate": 2.2623598917395438e-05, + "loss": 2.2326, + "num_input_tokens_seen": 4289488, + "step": 420 + }, + { + "epoch": 3.2045240339302543, + "grad_norm": 5.297417163848877, + "learning_rate": 2.213051164325366e-05, + "loss": 2.2595, + "num_input_tokens_seen": 4334704, + "step": 425 + }, + { + "epoch": 3.242224316682375, + "grad_norm": 5.43293571472168, + "learning_rate": 2.1638553071961708e-05, + "loss": 2.2084, + "num_input_tokens_seen": 4377360, + "step": 430 + }, + { + "epoch": 3.2799245994344957, + "grad_norm": 5.788747310638428, + "learning_rate": 2.1147916713641367e-05, + "loss": 2.21, + "num_input_tokens_seen": 4428544, + "step": 435 + }, + { + "epoch": 3.3176248821866166, + "grad_norm": 6.185176372528076, + "learning_rate": 2.0658795558326743e-05, + "loss": 2.2449, + "num_input_tokens_seen": 4477664, + "step": 440 + }, + { + "epoch": 3.355325164938737, + "grad_norm": 5.564029693603516, + "learning_rate": 2.017138200005236e-05, + "loss": 2.2457, + "num_input_tokens_seen": 4533792, + "step": 445 + }, + { + "epoch": 3.3930254476908575, + "grad_norm": 5.952132225036621, + "learning_rate": 1.9685867761175584e-05, + "loss": 2.3035, + "num_input_tokens_seen": 4584800, + "step": 450 + }, + { + "epoch": 3.4307257304429783, + "grad_norm": 5.484558582305908, + "learning_rate": 1.9202443816963425e-05, + "loss": 2.3204, + "num_input_tokens_seen": 4634976, + "step": 455 + }, + { + "epoch": 3.468426013195099, + "grad_norm": 6.121850490570068, + "learning_rate": 1.872130032047302e-05, + "loss": 2.2055, + "num_input_tokens_seen": 4691344, + "step": 460 + }, + { + "epoch": 3.5061262959472197, + "grad_norm": 6.345475673675537, + "learning_rate": 1.824262652775568e-05, + "loss": 2.3327, + "num_input_tokens_seen": 4745536, + "step": 465 + }, + { + "epoch": 3.54382657869934, + "grad_norm": 6.4370574951171875, + "learning_rate": 1.7766610723413684e-05, + "loss": 2.2214, + "num_input_tokens_seen": 4798128, + "step": 470 + }, + { + "epoch": 3.581526861451461, + "grad_norm": 6.708219528198242, + "learning_rate": 1.7293440146539196e-05, + "loss": 2.3097, + "num_input_tokens_seen": 4847632, + "step": 475 + }, + { + "epoch": 3.6192271442035815, + "grad_norm": 5.383622169494629, + "learning_rate": 1.682330091706446e-05, + "loss": 2.2966, + "num_input_tokens_seen": 4905648, + "step": 480 + }, + { + "epoch": 3.6569274269557024, + "grad_norm": 6.210061550140381, + "learning_rate": 1.6356377962552238e-05, + "loss": 2.2006, + "num_input_tokens_seen": 4955600, + "step": 485 + }, + { + "epoch": 3.694627709707823, + "grad_norm": 6.637734889984131, + "learning_rate": 1.589285494545514e-05, + "loss": 2.2463, + "num_input_tokens_seen": 5007424, + "step": 490 + }, + { + "epoch": 3.7323279924599433, + "grad_norm": 5.893795967102051, + "learning_rate": 1.5432914190872757e-05, + "loss": 2.2045, + "num_input_tokens_seen": 5058848, + "step": 495 + }, + { + "epoch": 3.770028275212064, + "grad_norm": 5.891578674316406, + "learning_rate": 1.4976736614834664e-05, + "loss": 2.135, + "num_input_tokens_seen": 5109904, + "step": 500 + }, + { + "epoch": 3.8077285579641846, + "grad_norm": 6.888275623321533, + "learning_rate": 1.4524501653137787e-05, + "loss": 2.3023, + "num_input_tokens_seen": 5156496, + "step": 505 + }, + { + "epoch": 3.8454288407163055, + "grad_norm": 5.987553119659424, + "learning_rate": 1.4076387190766017e-05, + "loss": 2.2636, + "num_input_tokens_seen": 5207824, + "step": 510 + }, + { + "epoch": 3.883129123468426, + "grad_norm": 6.791808128356934, + "learning_rate": 1.363256949191972e-05, + "loss": 2.1942, + "num_input_tokens_seen": 5256608, + "step": 515 + }, + { + "epoch": 3.9208294062205464, + "grad_norm": 6.749192714691162, + "learning_rate": 1.3193223130682936e-05, + "loss": 2.2731, + "num_input_tokens_seen": 5305584, + "step": 520 + }, + { + "epoch": 3.9585296889726673, + "grad_norm": 5.9771647453308105, + "learning_rate": 1.2758520922355226e-05, + "loss": 2.1669, + "num_input_tokens_seen": 5349712, + "step": 525 + }, + { + "epoch": 3.9962299717247878, + "grad_norm": 6.35679817199707, + "learning_rate": 1.2328633855475429e-05, + "loss": 2.2146, + "num_input_tokens_seen": 5400336, + "step": 530 + }, + { + "epoch": 4.033930254476909, + "grad_norm": 6.124676704406738, + "learning_rate": 1.1903731024563966e-05, + "loss": 2.0324, + "num_input_tokens_seen": 5451776, + "step": 535 + }, + { + "epoch": 4.071630537229029, + "grad_norm": 6.622769355773926, + "learning_rate": 1.148397956361007e-05, + "loss": 2.0762, + "num_input_tokens_seen": 5507520, + "step": 540 + }, + { + "epoch": 4.10933081998115, + "grad_norm": 7.076972961425781, + "learning_rate": 1.106954458033026e-05, + "loss": 2.0949, + "num_input_tokens_seen": 5562688, + "step": 545 + }, + { + "epoch": 4.147031102733271, + "grad_norm": 6.695667266845703, + "learning_rate": 1.0660589091223855e-05, + "loss": 2.1853, + "num_input_tokens_seen": 5610800, + "step": 550 + }, + { + "epoch": 4.184731385485391, + "grad_norm": 7.716992378234863, + "learning_rate": 1.025727395745095e-05, + "loss": 2.1706, + "num_input_tokens_seen": 5657616, + "step": 555 + }, + { + "epoch": 4.222431668237512, + "grad_norm": 6.8223419189453125, + "learning_rate": 9.859757821558337e-06, + "loss": 2.1199, + "num_input_tokens_seen": 5710960, + "step": 560 + }, + { + "epoch": 4.260131950989632, + "grad_norm": 6.87358283996582, + "learning_rate": 9.468197045077976e-06, + "loss": 2.0353, + "num_input_tokens_seen": 5756608, + "step": 565 + }, + { + "epoch": 4.297832233741753, + "grad_norm": 7.660863399505615, + "learning_rate": 9.082745647022797e-06, + "loss": 2.1101, + "num_input_tokens_seen": 5812304, + "step": 570 + }, + { + "epoch": 4.335532516493874, + "grad_norm": 7.863148212432861, + "learning_rate": 8.703555243303835e-06, + "loss": 2.1076, + "num_input_tokens_seen": 5859808, + "step": 575 + }, + { + "epoch": 4.3732327992459945, + "grad_norm": 7.298150539398193, + "learning_rate": 8.330774987092712e-06, + "loss": 2.0305, + "num_input_tokens_seen": 5908784, + "step": 580 + }, + { + "epoch": 4.410933081998115, + "grad_norm": 6.835300922393799, + "learning_rate": 7.96455151015272e-06, + "loss": 2.1132, + "num_input_tokens_seen": 5958672, + "step": 585 + }, + { + "epoch": 4.448633364750235, + "grad_norm": 6.710065841674805, + "learning_rate": 7.605028865161809e-06, + "loss": 2.1527, + "num_input_tokens_seen": 6010720, + "step": 590 + }, + { + "epoch": 4.486333647502356, + "grad_norm": 6.827284812927246, + "learning_rate": 7.25234846904993e-06, + "loss": 2.0948, + "num_input_tokens_seen": 6061440, + "step": 595 + }, + { + "epoch": 4.524033930254477, + "grad_norm": 7.066997528076172, + "learning_rate": 6.906649047373246e-06, + "loss": 2.1222, + "num_input_tokens_seen": 6115216, + "step": 600 + }, + { + "epoch": 4.561734213006598, + "grad_norm": 7.966955184936523, + "learning_rate": 6.568066579746901e-06, + "loss": 2.0719, + "num_input_tokens_seen": 6160944, + "step": 605 + }, + { + "epoch": 4.599434495758718, + "grad_norm": 7.257175922393799, + "learning_rate": 6.2367342463579475e-06, + "loss": 2.1399, + "num_input_tokens_seen": 6209008, + "step": 610 + }, + { + "epoch": 4.6371347785108386, + "grad_norm": 7.445122241973877, + "learning_rate": 5.912782375579412e-06, + "loss": 2.0671, + "num_input_tokens_seen": 6258176, + "step": 615 + }, + { + "epoch": 4.674835061262959, + "grad_norm": 7.934208869934082, + "learning_rate": 5.596338392706077e-06, + "loss": 2.152, + "num_input_tokens_seen": 6308496, + "step": 620 + }, + { + "epoch": 4.71253534401508, + "grad_norm": 6.131651878356934, + "learning_rate": 5.2875267698322325e-06, + "loss": 2.0834, + "num_input_tokens_seen": 6358896, + "step": 625 + }, + { + "epoch": 4.750235626767201, + "grad_norm": 6.925292015075684, + "learning_rate": 4.986468976890993e-06, + "loss": 2.1373, + "num_input_tokens_seen": 6410896, + "step": 630 + }, + { + "epoch": 4.787935909519321, + "grad_norm": 6.970002174377441, + "learning_rate": 4.693283433874565e-06, + "loss": 2.1276, + "num_input_tokens_seen": 6459120, + "step": 635 + }, + { + "epoch": 4.825636192271442, + "grad_norm": 7.309933662414551, + "learning_rate": 4.408085464254183e-06, + "loss": 2.1481, + "num_input_tokens_seen": 6506048, + "step": 640 + }, + { + "epoch": 4.863336475023563, + "grad_norm": 6.540215492248535, + "learning_rate": 4.130987249617993e-06, + "loss": 2.0872, + "num_input_tokens_seen": 6560448, + "step": 645 + }, + { + "epoch": 4.9010367577756835, + "grad_norm": 6.6013360023498535, + "learning_rate": 3.8620977855448935e-06, + "loss": 2.1141, + "num_input_tokens_seen": 6616704, + "step": 650 + }, + { + "epoch": 4.938737040527804, + "grad_norm": 7.337521553039551, + "learning_rate": 3.601522838731461e-06, + "loss": 2.0778, + "num_input_tokens_seen": 6670192, + "step": 655 + }, + { + "epoch": 4.976437323279924, + "grad_norm": 7.133378505706787, + "learning_rate": 3.3493649053890326e-06, + "loss": 2.1827, + "num_input_tokens_seen": 6721600, + "step": 660 + }, + { + "epoch": 5.014137606032045, + "grad_norm": 7.371194839477539, + "learning_rate": 3.1057231709272077e-06, + "loss": 2.0695, + "num_input_tokens_seen": 6773936, + "step": 665 + }, + { + "epoch": 5.051837888784166, + "grad_norm": 7.494382858276367, + "learning_rate": 2.8706934709395892e-06, + "loss": 2.0689, + "num_input_tokens_seen": 6823488, + "step": 670 + }, + { + "epoch": 5.089538171536287, + "grad_norm": 7.376400947570801, + "learning_rate": 2.6443682535072177e-06, + "loss": 1.9623, + "num_input_tokens_seen": 6870576, + "step": 675 + }, + { + "epoch": 5.127238454288407, + "grad_norm": 7.1367316246032715, + "learning_rate": 2.4268365428344736e-06, + "loss": 2.0186, + "num_input_tokens_seen": 6923488, + "step": 680 + }, + { + "epoch": 5.1649387370405275, + "grad_norm": 9.0476655960083, + "learning_rate": 2.21818390423168e-06, + "loss": 1.9324, + "num_input_tokens_seen": 6968384, + "step": 685 + }, + { + "epoch": 5.202639019792649, + "grad_norm": 6.944507122039795, + "learning_rate": 2.0184924104583613e-06, + "loss": 1.9769, + "num_input_tokens_seen": 7012784, + "step": 690 + }, + { + "epoch": 5.240339302544769, + "grad_norm": 7.558785438537598, + "learning_rate": 1.8278406094401623e-06, + "loss": 1.9815, + "num_input_tokens_seen": 7058992, + "step": 695 + }, + { + "epoch": 5.27803958529689, + "grad_norm": 7.789961338043213, + "learning_rate": 1.6463034933723337e-06, + "loss": 2.0789, + "num_input_tokens_seen": 7107360, + "step": 700 + } + ], + "logging_steps": 5, + "max_steps": 792, + "num_input_tokens_seen": 7107360, + "num_train_epochs": 6, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8.677665426060083e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d325c35219dd8c0eea8279f17307e1d0d8c29b12 --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d28ce688790f3a28f1dba3064866610463622e1d4141255300e136617f5673 +size 5432 diff --git a/checkpoint-792/README.md b/checkpoint-792/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9f8f0d832be44b45f0bbeda414aed6d1440221e7 --- /dev/null +++ b/checkpoint-792/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/gemma-2-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-792/adapter_config.json b/checkpoint-792/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..84f5e74794795afa599846c26565a6f523155345 --- /dev/null +++ b/checkpoint-792/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/gemma-2-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "o_proj", + "down_proj", + "up_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-792/adapter_model.safetensors b/checkpoint-792/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7df8a3d3ce234cbf35dc2645f7a28936757043ae --- /dev/null +++ b/checkpoint-792/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d4cae9c229b3fadbf4be0636f819d244c0aa2cded89cf97568dfc8ee4052cbd +size 41581360 diff --git a/checkpoint-792/optimizer.pt b/checkpoint-792/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fb15c3467ea276f0f74623f22aa69f6697b8823 --- /dev/null +++ b/checkpoint-792/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:609277d7c79e08288d84a6ff7285f85c78f3aecec5e16e7edfbcd43374b4a9b8 +size 83372758 diff --git a/checkpoint-792/rng_state_0.pth b/checkpoint-792/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..07a546a3d8fa499648a42db76ea9733d09e5ca98 --- /dev/null +++ b/checkpoint-792/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7a17ffe4d1cfad70857491e1fd7e427c0413a789e2cb4398c4af3ca8efd92a5 +size 14512 diff --git a/checkpoint-792/rng_state_1.pth b/checkpoint-792/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..5552726456b4cc7d1cc941b486f870e723d6ab42 --- /dev/null +++ b/checkpoint-792/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8430d63cfb7960c36461376f5e1ef952c23b5128eae3a1f763753f4c308fd4aa +size 14512 diff --git a/checkpoint-792/scheduler.pt b/checkpoint-792/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..20399683afba80b004d44fb0c6ae5f70cf36ae7c --- /dev/null +++ b/checkpoint-792/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f73652ede104d225238e53c163ef7c39023640a346bd2fa7bdc03199391ac285 +size 1064 diff --git a/checkpoint-792/special_tokens_map.json b/checkpoint-792/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-792/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-792/tokenizer.json b/checkpoint-792/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..a4a305d1de4d8f47c0252b4d7fe65a10dd8e2c22 --- /dev/null +++ b/checkpoint-792/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7eee611703c5ce5d1eee32d9cdcfe465647b8aff0c1dfb3bed7ad7dbb05060 +size 34362873 diff --git a/checkpoint-792/tokenizer.model b/checkpoint-792/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/checkpoint-792/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/checkpoint-792/tokenizer_config.json b/checkpoint-792/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..97f33b16e8481f3849fb6d02d1656628e9e22c91 --- /dev/null +++ b/checkpoint-792/tokenizer_config.json @@ -0,0 +1,2015 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-792/trainer_state.json b/checkpoint-792/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d53ca8b98908c3930ca7deca9f676ebd7010e06e --- /dev/null +++ b/checkpoint-792/trainer_state.json @@ -0,0 +1,1297 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.971724787935909, + "eval_steps": 500, + "global_step": 792, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03770028275212064, + "grad_norm": 3.2918148040771484, + "learning_rate": 4.9995083170283816e-05, + "loss": 4.3742, + "num_input_tokens_seen": 50544, + "step": 5 + }, + { + "epoch": 0.07540056550424128, + "grad_norm": 3.0331127643585205, + "learning_rate": 4.998033461515242e-05, + "loss": 3.8434, + "num_input_tokens_seen": 104080, + "step": 10 + }, + { + "epoch": 0.11310084825636192, + "grad_norm": 3.322950601577759, + "learning_rate": 4.9955760135896534e-05, + "loss": 3.6027, + "num_input_tokens_seen": 155776, + "step": 15 + }, + { + "epoch": 0.15080113100848255, + "grad_norm": 2.7705531120300293, + "learning_rate": 4.992136939879856e-05, + "loss": 3.4102, + "num_input_tokens_seen": 203184, + "step": 20 + }, + { + "epoch": 0.1885014137606032, + "grad_norm": 3.4572091102600098, + "learning_rate": 4.9877175931330346e-05, + "loss": 3.153, + "num_input_tokens_seen": 255744, + "step": 25 + }, + { + "epoch": 0.22620169651272384, + "grad_norm": 2.0412893295288086, + "learning_rate": 4.982319711683221e-05, + "loss": 3.0949, + "num_input_tokens_seen": 307808, + "step": 30 + }, + { + "epoch": 0.2639019792648445, + "grad_norm": 2.0765998363494873, + "learning_rate": 4.975945418767529e-05, + "loss": 3.0392, + "num_input_tokens_seen": 358192, + "step": 35 + }, + { + "epoch": 0.3016022620169651, + "grad_norm": 2.2433698177337646, + "learning_rate": 4.968597221690986e-05, + "loss": 3.0685, + "num_input_tokens_seen": 408832, + "step": 40 + }, + { + "epoch": 0.3393025447690858, + "grad_norm": 2.7639973163604736, + "learning_rate": 4.96027801084029e-05, + "loss": 3.0385, + "num_input_tokens_seen": 458368, + "step": 45 + }, + { + "epoch": 0.3770028275212064, + "grad_norm": 2.0680019855499268, + "learning_rate": 4.950991058546893e-05, + "loss": 3.1079, + "num_input_tokens_seen": 512160, + "step": 50 + }, + { + "epoch": 0.41470311027332707, + "grad_norm": 2.181554079055786, + "learning_rate": 4.940740017799833e-05, + "loss": 2.9883, + "num_input_tokens_seen": 562928, + "step": 55 + }, + { + "epoch": 0.4524033930254477, + "grad_norm": 2.0724411010742188, + "learning_rate": 4.929528920808854e-05, + "loss": 2.969, + "num_input_tokens_seen": 613072, + "step": 60 + }, + { + "epoch": 0.49010367577756836, + "grad_norm": 2.5529158115386963, + "learning_rate": 4.917362177418342e-05, + "loss": 3.0019, + "num_input_tokens_seen": 664640, + "step": 65 + }, + { + "epoch": 0.527803958529689, + "grad_norm": 2.3983142375946045, + "learning_rate": 4.904244573372733e-05, + "loss": 2.9984, + "num_input_tokens_seen": 716800, + "step": 70 + }, + { + "epoch": 0.5655042412818096, + "grad_norm": 2.206209182739258, + "learning_rate": 4.8901812684340564e-05, + "loss": 2.9848, + "num_input_tokens_seen": 766528, + "step": 75 + }, + { + "epoch": 0.6032045240339302, + "grad_norm": 2.22698974609375, + "learning_rate": 4.8751777943523634e-05, + "loss": 2.9169, + "num_input_tokens_seen": 817376, + "step": 80 + }, + { + "epoch": 0.6409048067860509, + "grad_norm": 2.4569709300994873, + "learning_rate": 4.8592400526898314e-05, + "loss": 2.8844, + "num_input_tokens_seen": 864688, + "step": 85 + }, + { + "epoch": 0.6786050895381716, + "grad_norm": 2.195237636566162, + "learning_rate": 4.842374312499405e-05, + "loss": 2.9422, + "num_input_tokens_seen": 913216, + "step": 90 + }, + { + "epoch": 0.7163053722902922, + "grad_norm": 2.54585599899292, + "learning_rate": 4.824587207858888e-05, + "loss": 2.8431, + "num_input_tokens_seen": 964144, + "step": 95 + }, + { + "epoch": 0.7540056550424128, + "grad_norm": 2.3841867446899414, + "learning_rate": 4.805885735261454e-05, + "loss": 2.8377, + "num_input_tokens_seen": 1018768, + "step": 100 + }, + { + "epoch": 0.7917059377945335, + "grad_norm": 2.359386920928955, + "learning_rate": 4.786277250863599e-05, + "loss": 2.7843, + "num_input_tokens_seen": 1070592, + "step": 105 + }, + { + "epoch": 0.8294062205466541, + "grad_norm": 2.0952038764953613, + "learning_rate": 4.765769467591625e-05, + "loss": 2.8218, + "num_input_tokens_seen": 1125392, + "step": 110 + }, + { + "epoch": 0.8671065032987747, + "grad_norm": 2.222754955291748, + "learning_rate": 4.744370452107789e-05, + "loss": 2.877, + "num_input_tokens_seen": 1176256, + "step": 115 + }, + { + "epoch": 0.9048067860508954, + "grad_norm": 2.3233537673950195, + "learning_rate": 4.722088621637309e-05, + "loss": 2.9028, + "num_input_tokens_seen": 1225376, + "step": 120 + }, + { + "epoch": 0.942507068803016, + "grad_norm": 2.400575876235962, + "learning_rate": 4.698932740657479e-05, + "loss": 2.762, + "num_input_tokens_seen": 1277024, + "step": 125 + }, + { + "epoch": 0.9802073515551367, + "grad_norm": 2.592116117477417, + "learning_rate": 4.6749119174501975e-05, + "loss": 2.9164, + "num_input_tokens_seen": 1322912, + "step": 130 + }, + { + "epoch": 1.0179076343072573, + "grad_norm": 2.563326120376587, + "learning_rate": 4.6500356005192514e-05, + "loss": 2.736, + "num_input_tokens_seen": 1368624, + "step": 135 + }, + { + "epoch": 1.055607917059378, + "grad_norm": 2.3927371501922607, + "learning_rate": 4.6243135748737864e-05, + "loss": 2.7414, + "num_input_tokens_seen": 1417664, + "step": 140 + }, + { + "epoch": 1.0933081998114986, + "grad_norm": 2.597975492477417, + "learning_rate": 4.597755958179406e-05, + "loss": 2.6961, + "num_input_tokens_seen": 1469120, + "step": 145 + }, + { + "epoch": 1.1310084825636193, + "grad_norm": 2.932581663131714, + "learning_rate": 4.570373196778427e-05, + "loss": 2.6134, + "num_input_tokens_seen": 1521632, + "step": 150 + }, + { + "epoch": 1.1687087653157398, + "grad_norm": 3.28389310836792, + "learning_rate": 4.5421760615808474e-05, + "loss": 2.7548, + "num_input_tokens_seen": 1565296, + "step": 155 + }, + { + "epoch": 1.2064090480678604, + "grad_norm": 2.927279233932495, + "learning_rate": 4.513175643827647e-05, + "loss": 2.702, + "num_input_tokens_seen": 1617088, + "step": 160 + }, + { + "epoch": 1.244109330819981, + "grad_norm": 2.840573310852051, + "learning_rate": 4.4833833507280884e-05, + "loss": 2.6893, + "num_input_tokens_seen": 1663584, + "step": 165 + }, + { + "epoch": 1.2818096135721018, + "grad_norm": 2.6322081089019775, + "learning_rate": 4.4528109009727336e-05, + "loss": 2.5671, + "num_input_tokens_seen": 1713744, + "step": 170 + }, + { + "epoch": 1.3195098963242224, + "grad_norm": 3.1280879974365234, + "learning_rate": 4.42147032012394e-05, + "loss": 2.7682, + "num_input_tokens_seen": 1762768, + "step": 175 + }, + { + "epoch": 1.3572101790763431, + "grad_norm": 3.1128265857696533, + "learning_rate": 4.389373935885646e-05, + "loss": 2.7062, + "num_input_tokens_seen": 1815808, + "step": 180 + }, + { + "epoch": 1.3949104618284638, + "grad_norm": 2.952150344848633, + "learning_rate": 4.356534373254316e-05, + "loss": 2.7092, + "num_input_tokens_seen": 1871040, + "step": 185 + }, + { + "epoch": 1.4326107445805842, + "grad_norm": 3.0146102905273438, + "learning_rate": 4.322964549552943e-05, + "loss": 2.6518, + "num_input_tokens_seen": 1924048, + "step": 190 + }, + { + "epoch": 1.4703110273327051, + "grad_norm": 2.999300956726074, + "learning_rate": 4.288677669350066e-05, + "loss": 2.6592, + "num_input_tokens_seen": 1972720, + "step": 195 + }, + { + "epoch": 1.5080113100848256, + "grad_norm": 3.575253963470459, + "learning_rate": 4.2536872192658036e-05, + "loss": 2.716, + "num_input_tokens_seen": 2022112, + "step": 200 + }, + { + "epoch": 1.5457115928369463, + "grad_norm": 3.1339428424835205, + "learning_rate": 4.218006962666934e-05, + "loss": 2.6746, + "num_input_tokens_seen": 2072000, + "step": 205 + }, + { + "epoch": 1.583411875589067, + "grad_norm": 3.130823850631714, + "learning_rate": 4.181650934253132e-05, + "loss": 2.6306, + "num_input_tokens_seen": 2125632, + "step": 210 + }, + { + "epoch": 1.6211121583411876, + "grad_norm": 3.1828997135162354, + "learning_rate": 4.144633434536467e-05, + "loss": 2.5678, + "num_input_tokens_seen": 2174464, + "step": 215 + }, + { + "epoch": 1.6588124410933083, + "grad_norm": 3.3879778385162354, + "learning_rate": 4.1069690242163484e-05, + "loss": 2.7334, + "num_input_tokens_seen": 2223408, + "step": 220 + }, + { + "epoch": 1.6965127238454287, + "grad_norm": 3.5921592712402344, + "learning_rate": 4.06867251845213e-05, + "loss": 2.6658, + "num_input_tokens_seen": 2281296, + "step": 225 + }, + { + "epoch": 1.7342130065975496, + "grad_norm": 3.4126648902893066, + "learning_rate": 4.0297589810356165e-05, + "loss": 2.6567, + "num_input_tokens_seen": 2334176, + "step": 230 + }, + { + "epoch": 1.77191328934967, + "grad_norm": 3.504786252975464, + "learning_rate": 3.9902437184657784e-05, + "loss": 2.6131, + "num_input_tokens_seen": 2389296, + "step": 235 + }, + { + "epoch": 1.8096135721017907, + "grad_norm": 3.459768056869507, + "learning_rate": 3.9501422739279956e-05, + "loss": 2.662, + "num_input_tokens_seen": 2442800, + "step": 240 + }, + { + "epoch": 1.8473138548539114, + "grad_norm": 2.944490909576416, + "learning_rate": 3.909470421180201e-05, + "loss": 2.6339, + "num_input_tokens_seen": 2494912, + "step": 245 + }, + { + "epoch": 1.885014137606032, + "grad_norm": 3.4897429943084717, + "learning_rate": 3.8682441583483314e-05, + "loss": 2.6338, + "num_input_tokens_seen": 2544624, + "step": 250 + }, + { + "epoch": 1.9227144203581528, + "grad_norm": 3.15039324760437, + "learning_rate": 3.8264797016335205e-05, + "loss": 2.6844, + "num_input_tokens_seen": 2597792, + "step": 255 + }, + { + "epoch": 1.9604147031102732, + "grad_norm": 3.5575578212738037, + "learning_rate": 3.7841934789335164e-05, + "loss": 2.5939, + "num_input_tokens_seen": 2646544, + "step": 260 + }, + { + "epoch": 1.998114985862394, + "grad_norm": 3.97521710395813, + "learning_rate": 3.741402123380828e-05, + "loss": 2.7393, + "num_input_tokens_seen": 2700224, + "step": 265 + }, + { + "epoch": 2.0358152686145146, + "grad_norm": 3.1736276149749756, + "learning_rate": 3.6981224668001424e-05, + "loss": 2.4495, + "num_input_tokens_seen": 2749104, + "step": 270 + }, + { + "epoch": 2.0735155513666355, + "grad_norm": 3.633033275604248, + "learning_rate": 3.654371533087586e-05, + "loss": 2.3823, + "num_input_tokens_seen": 2802992, + "step": 275 + }, + { + "epoch": 2.111215834118756, + "grad_norm": 3.4200243949890137, + "learning_rate": 3.610166531514436e-05, + "loss": 2.474, + "num_input_tokens_seen": 2844512, + "step": 280 + }, + { + "epoch": 2.1489161168708764, + "grad_norm": 4.055693626403809, + "learning_rate": 3.565524849957921e-05, + "loss": 2.4776, + "num_input_tokens_seen": 2894400, + "step": 285 + }, + { + "epoch": 2.1866163996229973, + "grad_norm": 4.254647254943848, + "learning_rate": 3.520464048061758e-05, + "loss": 2.4709, + "num_input_tokens_seen": 2944688, + "step": 290 + }, + { + "epoch": 2.2243166823751177, + "grad_norm": 4.337226390838623, + "learning_rate": 3.47500185032913e-05, + "loss": 2.5009, + "num_input_tokens_seen": 2994672, + "step": 295 + }, + { + "epoch": 2.2620169651272386, + "grad_norm": 4.424121379852295, + "learning_rate": 3.4291561391508185e-05, + "loss": 2.4524, + "num_input_tokens_seen": 3045344, + "step": 300 + }, + { + "epoch": 2.299717247879359, + "grad_norm": 4.178481101989746, + "learning_rate": 3.3829449477712324e-05, + "loss": 2.4083, + "num_input_tokens_seen": 3100688, + "step": 305 + }, + { + "epoch": 2.3374175306314795, + "grad_norm": 4.315364837646484, + "learning_rate": 3.336386453195088e-05, + "loss": 2.4713, + "num_input_tokens_seen": 3154896, + "step": 310 + }, + { + "epoch": 2.3751178133836004, + "grad_norm": 4.445486545562744, + "learning_rate": 3.2894989690375626e-05, + "loss": 2.3986, + "num_input_tokens_seen": 3209120, + "step": 315 + }, + { + "epoch": 2.412818096135721, + "grad_norm": 4.80281400680542, + "learning_rate": 3.2423009383206876e-05, + "loss": 2.4258, + "num_input_tokens_seen": 3257984, + "step": 320 + }, + { + "epoch": 2.4505183788878417, + "grad_norm": 4.705103397369385, + "learning_rate": 3.194810926218861e-05, + "loss": 2.4126, + "num_input_tokens_seen": 3309424, + "step": 325 + }, + { + "epoch": 2.488218661639962, + "grad_norm": 4.281659126281738, + "learning_rate": 3.147047612756302e-05, + "loss": 2.3639, + "num_input_tokens_seen": 3359152, + "step": 330 + }, + { + "epoch": 2.525918944392083, + "grad_norm": 4.493980407714844, + "learning_rate": 3.099029785459328e-05, + "loss": 2.5176, + "num_input_tokens_seen": 3406784, + "step": 335 + }, + { + "epoch": 2.5636192271442035, + "grad_norm": 4.339905261993408, + "learning_rate": 3.0507763319663517e-05, + "loss": 2.4779, + "num_input_tokens_seen": 3459328, + "step": 340 + }, + { + "epoch": 2.6013195098963244, + "grad_norm": 4.67468786239624, + "learning_rate": 3.002306232598497e-05, + "loss": 2.3833, + "num_input_tokens_seen": 3511056, + "step": 345 + }, + { + "epoch": 2.639019792648445, + "grad_norm": 4.905932426452637, + "learning_rate": 2.9536385528937567e-05, + "loss": 2.3794, + "num_input_tokens_seen": 3562352, + "step": 350 + }, + { + "epoch": 2.6767200754005653, + "grad_norm": 4.684591770172119, + "learning_rate": 2.9047924361076345e-05, + "loss": 2.4883, + "num_input_tokens_seen": 3615664, + "step": 355 + }, + { + "epoch": 2.7144203581526862, + "grad_norm": 4.377009868621826, + "learning_rate": 2.8557870956832132e-05, + "loss": 2.3423, + "num_input_tokens_seen": 3661424, + "step": 360 + }, + { + "epoch": 2.7521206409048067, + "grad_norm": 5.146539688110352, + "learning_rate": 2.8066418076936167e-05, + "loss": 2.4092, + "num_input_tokens_seen": 3710592, + "step": 365 + }, + { + "epoch": 2.7898209236569276, + "grad_norm": 5.5937910079956055, + "learning_rate": 2.7573759032598366e-05, + "loss": 2.4818, + "num_input_tokens_seen": 3765728, + "step": 370 + }, + { + "epoch": 2.827521206409048, + "grad_norm": 4.4958696365356445, + "learning_rate": 2.7080087609469062e-05, + "loss": 2.4587, + "num_input_tokens_seen": 3815360, + "step": 375 + }, + { + "epoch": 2.8652214891611685, + "grad_norm": 4.503344535827637, + "learning_rate": 2.6585597991414114e-05, + "loss": 2.3462, + "num_input_tokens_seen": 3868096, + "step": 380 + }, + { + "epoch": 2.9029217719132894, + "grad_norm": 4.773792743682861, + "learning_rate": 2.6090484684133404e-05, + "loss": 2.3713, + "num_input_tokens_seen": 3913696, + "step": 385 + }, + { + "epoch": 2.9406220546654103, + "grad_norm": 4.8644537925720215, + "learning_rate": 2.5594942438652688e-05, + "loss": 2.4618, + "num_input_tokens_seen": 3971840, + "step": 390 + }, + { + "epoch": 2.9783223374175307, + "grad_norm": 5.539215087890625, + "learning_rate": 2.509916617471903e-05, + "loss": 2.5454, + "num_input_tokens_seen": 4025040, + "step": 395 + }, + { + "epoch": 3.016022620169651, + "grad_norm": 4.475778579711914, + "learning_rate": 2.46033509041298e-05, + "loss": 2.3007, + "num_input_tokens_seen": 4075488, + "step": 400 + }, + { + "epoch": 3.053722902921772, + "grad_norm": 4.82028341293335, + "learning_rate": 2.410769165402549e-05, + "loss": 2.274, + "num_input_tokens_seen": 4130496, + "step": 405 + }, + { + "epoch": 3.0914231856738925, + "grad_norm": 5.513036251068115, + "learning_rate": 2.3612383390176503e-05, + "loss": 2.2457, + "num_input_tokens_seen": 4181504, + "step": 410 + }, + { + "epoch": 3.1291234684260134, + "grad_norm": 5.219841957092285, + "learning_rate": 2.3117620940294048e-05, + "loss": 2.2595, + "num_input_tokens_seen": 4236816, + "step": 415 + }, + { + "epoch": 3.166823751178134, + "grad_norm": 5.527017593383789, + "learning_rate": 2.2623598917395438e-05, + "loss": 2.2326, + "num_input_tokens_seen": 4289488, + "step": 420 + }, + { + "epoch": 3.2045240339302543, + "grad_norm": 5.297417163848877, + "learning_rate": 2.213051164325366e-05, + "loss": 2.2595, + "num_input_tokens_seen": 4334704, + "step": 425 + }, + { + "epoch": 3.242224316682375, + "grad_norm": 5.43293571472168, + "learning_rate": 2.1638553071961708e-05, + "loss": 2.2084, + "num_input_tokens_seen": 4377360, + "step": 430 + }, + { + "epoch": 3.2799245994344957, + "grad_norm": 5.788747310638428, + "learning_rate": 2.1147916713641367e-05, + "loss": 2.21, + "num_input_tokens_seen": 4428544, + "step": 435 + }, + { + "epoch": 3.3176248821866166, + "grad_norm": 6.185176372528076, + "learning_rate": 2.0658795558326743e-05, + "loss": 2.2449, + "num_input_tokens_seen": 4477664, + "step": 440 + }, + { + "epoch": 3.355325164938737, + "grad_norm": 5.564029693603516, + "learning_rate": 2.017138200005236e-05, + "loss": 2.2457, + "num_input_tokens_seen": 4533792, + "step": 445 + }, + { + "epoch": 3.3930254476908575, + "grad_norm": 5.952132225036621, + "learning_rate": 1.9685867761175584e-05, + "loss": 2.3035, + "num_input_tokens_seen": 4584800, + "step": 450 + }, + { + "epoch": 3.4307257304429783, + "grad_norm": 5.484558582305908, + "learning_rate": 1.9202443816963425e-05, + "loss": 2.3204, + "num_input_tokens_seen": 4634976, + "step": 455 + }, + { + "epoch": 3.468426013195099, + "grad_norm": 6.121850490570068, + "learning_rate": 1.872130032047302e-05, + "loss": 2.2055, + "num_input_tokens_seen": 4691344, + "step": 460 + }, + { + "epoch": 3.5061262959472197, + "grad_norm": 6.345475673675537, + "learning_rate": 1.824262652775568e-05, + "loss": 2.3327, + "num_input_tokens_seen": 4745536, + "step": 465 + }, + { + "epoch": 3.54382657869934, + "grad_norm": 6.4370574951171875, + "learning_rate": 1.7766610723413684e-05, + "loss": 2.2214, + "num_input_tokens_seen": 4798128, + "step": 470 + }, + { + "epoch": 3.581526861451461, + "grad_norm": 6.708219528198242, + "learning_rate": 1.7293440146539196e-05, + "loss": 2.3097, + "num_input_tokens_seen": 4847632, + "step": 475 + }, + { + "epoch": 3.6192271442035815, + "grad_norm": 5.383622169494629, + "learning_rate": 1.682330091706446e-05, + "loss": 2.2966, + "num_input_tokens_seen": 4905648, + "step": 480 + }, + { + "epoch": 3.6569274269557024, + "grad_norm": 6.210061550140381, + "learning_rate": 1.6356377962552238e-05, + "loss": 2.2006, + "num_input_tokens_seen": 4955600, + "step": 485 + }, + { + "epoch": 3.694627709707823, + "grad_norm": 6.637734889984131, + "learning_rate": 1.589285494545514e-05, + "loss": 2.2463, + "num_input_tokens_seen": 5007424, + "step": 490 + }, + { + "epoch": 3.7323279924599433, + "grad_norm": 5.893795967102051, + "learning_rate": 1.5432914190872757e-05, + "loss": 2.2045, + "num_input_tokens_seen": 5058848, + "step": 495 + }, + { + "epoch": 3.770028275212064, + "grad_norm": 5.891578674316406, + "learning_rate": 1.4976736614834664e-05, + "loss": 2.135, + "num_input_tokens_seen": 5109904, + "step": 500 + }, + { + "epoch": 3.8077285579641846, + "grad_norm": 6.888275623321533, + "learning_rate": 1.4524501653137787e-05, + "loss": 2.3023, + "num_input_tokens_seen": 5156496, + "step": 505 + }, + { + "epoch": 3.8454288407163055, + "grad_norm": 5.987553119659424, + "learning_rate": 1.4076387190766017e-05, + "loss": 2.2636, + "num_input_tokens_seen": 5207824, + "step": 510 + }, + { + "epoch": 3.883129123468426, + "grad_norm": 6.791808128356934, + "learning_rate": 1.363256949191972e-05, + "loss": 2.1942, + "num_input_tokens_seen": 5256608, + "step": 515 + }, + { + "epoch": 3.9208294062205464, + "grad_norm": 6.749192714691162, + "learning_rate": 1.3193223130682936e-05, + "loss": 2.2731, + "num_input_tokens_seen": 5305584, + "step": 520 + }, + { + "epoch": 3.9585296889726673, + "grad_norm": 5.9771647453308105, + "learning_rate": 1.2758520922355226e-05, + "loss": 2.1669, + "num_input_tokens_seen": 5349712, + "step": 525 + }, + { + "epoch": 3.9962299717247878, + "grad_norm": 6.35679817199707, + "learning_rate": 1.2328633855475429e-05, + "loss": 2.2146, + "num_input_tokens_seen": 5400336, + "step": 530 + }, + { + "epoch": 4.033930254476909, + "grad_norm": 6.124676704406738, + "learning_rate": 1.1903731024563966e-05, + "loss": 2.0324, + "num_input_tokens_seen": 5451776, + "step": 535 + }, + { + "epoch": 4.071630537229029, + "grad_norm": 6.622769355773926, + "learning_rate": 1.148397956361007e-05, + "loss": 2.0762, + "num_input_tokens_seen": 5507520, + "step": 540 + }, + { + "epoch": 4.10933081998115, + "grad_norm": 7.076972961425781, + "learning_rate": 1.106954458033026e-05, + "loss": 2.0949, + "num_input_tokens_seen": 5562688, + "step": 545 + }, + { + "epoch": 4.147031102733271, + "grad_norm": 6.695667266845703, + "learning_rate": 1.0660589091223855e-05, + "loss": 2.1853, + "num_input_tokens_seen": 5610800, + "step": 550 + }, + { + "epoch": 4.184731385485391, + "grad_norm": 7.716992378234863, + "learning_rate": 1.025727395745095e-05, + "loss": 2.1706, + "num_input_tokens_seen": 5657616, + "step": 555 + }, + { + "epoch": 4.222431668237512, + "grad_norm": 6.8223419189453125, + "learning_rate": 9.859757821558337e-06, + "loss": 2.1199, + "num_input_tokens_seen": 5710960, + "step": 560 + }, + { + "epoch": 4.260131950989632, + "grad_norm": 6.87358283996582, + "learning_rate": 9.468197045077976e-06, + "loss": 2.0353, + "num_input_tokens_seen": 5756608, + "step": 565 + }, + { + "epoch": 4.297832233741753, + "grad_norm": 7.660863399505615, + "learning_rate": 9.082745647022797e-06, + "loss": 2.1101, + "num_input_tokens_seen": 5812304, + "step": 570 + }, + { + "epoch": 4.335532516493874, + "grad_norm": 7.863148212432861, + "learning_rate": 8.703555243303835e-06, + "loss": 2.1076, + "num_input_tokens_seen": 5859808, + "step": 575 + }, + { + "epoch": 4.3732327992459945, + "grad_norm": 7.298150539398193, + "learning_rate": 8.330774987092712e-06, + "loss": 2.0305, + "num_input_tokens_seen": 5908784, + "step": 580 + }, + { + "epoch": 4.410933081998115, + "grad_norm": 6.835300922393799, + "learning_rate": 7.96455151015272e-06, + "loss": 2.1132, + "num_input_tokens_seen": 5958672, + "step": 585 + }, + { + "epoch": 4.448633364750235, + "grad_norm": 6.710065841674805, + "learning_rate": 7.605028865161809e-06, + "loss": 2.1527, + "num_input_tokens_seen": 6010720, + "step": 590 + }, + { + "epoch": 4.486333647502356, + "grad_norm": 6.827284812927246, + "learning_rate": 7.25234846904993e-06, + "loss": 2.0948, + "num_input_tokens_seen": 6061440, + "step": 595 + }, + { + "epoch": 4.524033930254477, + "grad_norm": 7.066997528076172, + "learning_rate": 6.906649047373246e-06, + "loss": 2.1222, + "num_input_tokens_seen": 6115216, + "step": 600 + }, + { + "epoch": 4.561734213006598, + "grad_norm": 7.966955184936523, + "learning_rate": 6.568066579746901e-06, + "loss": 2.0719, + "num_input_tokens_seen": 6160944, + "step": 605 + }, + { + "epoch": 4.599434495758718, + "grad_norm": 7.257175922393799, + "learning_rate": 6.2367342463579475e-06, + "loss": 2.1399, + "num_input_tokens_seen": 6209008, + "step": 610 + }, + { + "epoch": 4.6371347785108386, + "grad_norm": 7.445122241973877, + "learning_rate": 5.912782375579412e-06, + "loss": 2.0671, + "num_input_tokens_seen": 6258176, + "step": 615 + }, + { + "epoch": 4.674835061262959, + "grad_norm": 7.934208869934082, + "learning_rate": 5.596338392706077e-06, + "loss": 2.152, + "num_input_tokens_seen": 6308496, + "step": 620 + }, + { + "epoch": 4.71253534401508, + "grad_norm": 6.131651878356934, + "learning_rate": 5.2875267698322325e-06, + "loss": 2.0834, + "num_input_tokens_seen": 6358896, + "step": 625 + }, + { + "epoch": 4.750235626767201, + "grad_norm": 6.925292015075684, + "learning_rate": 4.986468976890993e-06, + "loss": 2.1373, + "num_input_tokens_seen": 6410896, + "step": 630 + }, + { + "epoch": 4.787935909519321, + "grad_norm": 6.970002174377441, + "learning_rate": 4.693283433874565e-06, + "loss": 2.1276, + "num_input_tokens_seen": 6459120, + "step": 635 + }, + { + "epoch": 4.825636192271442, + "grad_norm": 7.309933662414551, + "learning_rate": 4.408085464254183e-06, + "loss": 2.1481, + "num_input_tokens_seen": 6506048, + "step": 640 + }, + { + "epoch": 4.863336475023563, + "grad_norm": 6.540215492248535, + "learning_rate": 4.130987249617993e-06, + "loss": 2.0872, + "num_input_tokens_seen": 6560448, + "step": 645 + }, + { + "epoch": 4.9010367577756835, + "grad_norm": 6.6013360023498535, + "learning_rate": 3.8620977855448935e-06, + "loss": 2.1141, + "num_input_tokens_seen": 6616704, + "step": 650 + }, + { + "epoch": 4.938737040527804, + "grad_norm": 7.337521553039551, + "learning_rate": 3.601522838731461e-06, + "loss": 2.0778, + "num_input_tokens_seen": 6670192, + "step": 655 + }, + { + "epoch": 4.976437323279924, + "grad_norm": 7.133378505706787, + "learning_rate": 3.3493649053890326e-06, + "loss": 2.1827, + "num_input_tokens_seen": 6721600, + "step": 660 + }, + { + "epoch": 5.014137606032045, + "grad_norm": 7.371194839477539, + "learning_rate": 3.1057231709272077e-06, + "loss": 2.0695, + "num_input_tokens_seen": 6773936, + "step": 665 + }, + { + "epoch": 5.051837888784166, + "grad_norm": 7.494382858276367, + "learning_rate": 2.8706934709395892e-06, + "loss": 2.0689, + "num_input_tokens_seen": 6823488, + "step": 670 + }, + { + "epoch": 5.089538171536287, + "grad_norm": 7.376400947570801, + "learning_rate": 2.6443682535072177e-06, + "loss": 1.9623, + "num_input_tokens_seen": 6870576, + "step": 675 + }, + { + "epoch": 5.127238454288407, + "grad_norm": 7.1367316246032715, + "learning_rate": 2.4268365428344736e-06, + "loss": 2.0186, + "num_input_tokens_seen": 6923488, + "step": 680 + }, + { + "epoch": 5.1649387370405275, + "grad_norm": 9.0476655960083, + "learning_rate": 2.21818390423168e-06, + "loss": 1.9324, + "num_input_tokens_seen": 6968384, + "step": 685 + }, + { + "epoch": 5.202639019792649, + "grad_norm": 6.944507122039795, + "learning_rate": 2.0184924104583613e-06, + "loss": 1.9769, + "num_input_tokens_seen": 7012784, + "step": 690 + }, + { + "epoch": 5.240339302544769, + "grad_norm": 7.558785438537598, + "learning_rate": 1.8278406094401623e-06, + "loss": 1.9815, + "num_input_tokens_seen": 7058992, + "step": 695 + }, + { + "epoch": 5.27803958529689, + "grad_norm": 7.789961338043213, + "learning_rate": 1.6463034933723337e-06, + "loss": 2.0789, + "num_input_tokens_seen": 7107360, + "step": 700 + }, + { + "epoch": 5.31573986804901, + "grad_norm": 7.385551929473877, + "learning_rate": 1.4739524692218314e-06, + "loss": 2.0299, + "num_input_tokens_seen": 7154560, + "step": 705 + }, + { + "epoch": 5.353440150801131, + "grad_norm": 8.215983390808105, + "learning_rate": 1.3108553306396265e-06, + "loss": 2.0888, + "num_input_tokens_seen": 7207760, + "step": 710 + }, + { + "epoch": 5.391140433553252, + "grad_norm": 7.679669380187988, + "learning_rate": 1.1570762312943295e-06, + "loss": 2.054, + "num_input_tokens_seen": 7261248, + "step": 715 + }, + { + "epoch": 5.4288407163053725, + "grad_norm": 7.84255313873291, + "learning_rate": 1.0126756596375686e-06, + "loss": 1.9551, + "num_input_tokens_seen": 7309552, + "step": 720 + }, + { + "epoch": 5.466540999057493, + "grad_norm": 7.932110786437988, + "learning_rate": 8.777104151110826e-07, + "loss": 2.0533, + "num_input_tokens_seen": 7368576, + "step": 725 + }, + { + "epoch": 5.504241281809613, + "grad_norm": 8.4814453125, + "learning_rate": 7.522335858048707e-07, + "loss": 2.0151, + "num_input_tokens_seen": 7425056, + "step": 730 + }, + { + "epoch": 5.541941564561734, + "grad_norm": 8.199661254882812, + "learning_rate": 6.362945275751736e-07, + "loss": 2.0525, + "num_input_tokens_seen": 7473488, + "step": 735 + }, + { + "epoch": 5.579641847313855, + "grad_norm": 7.404112339019775, + "learning_rate": 5.299388446305343e-07, + "loss": 2.0759, + "num_input_tokens_seen": 7522656, + "step": 740 + }, + { + "epoch": 5.617342130065976, + "grad_norm": 6.568804740905762, + "learning_rate": 4.3320837159353813e-07, + "loss": 2.0374, + "num_input_tokens_seen": 7577104, + "step": 745 + }, + { + "epoch": 5.655042412818096, + "grad_norm": 6.717598915100098, + "learning_rate": 3.4614115704533767e-07, + "loss": 2.0137, + "num_input_tokens_seen": 7633024, + "step": 750 + }, + { + "epoch": 5.6927426955702165, + "grad_norm": 8.680829048156738, + "learning_rate": 2.687714485593462e-07, + "loss": 2.1025, + "num_input_tokens_seen": 7682736, + "step": 755 + }, + { + "epoch": 5.730442978322337, + "grad_norm": 7.93720817565918, + "learning_rate": 2.011296792301165e-07, + "loss": 2.1403, + "num_input_tokens_seen": 7737888, + "step": 760 + }, + { + "epoch": 5.768143261074458, + "grad_norm": 7.161721706390381, + "learning_rate": 1.4324245570256633e-07, + "loss": 2.0965, + "num_input_tokens_seen": 7788384, + "step": 765 + }, + { + "epoch": 5.805843543826579, + "grad_norm": 7.347177028656006, + "learning_rate": 9.513254770636137e-08, + "loss": 2.0826, + "num_input_tokens_seen": 7835600, + "step": 770 + }, + { + "epoch": 5.843543826578699, + "grad_norm": 7.278431415557861, + "learning_rate": 5.681887909952388e-08, + "loss": 2.1508, + "num_input_tokens_seen": 7888336, + "step": 775 + }, + { + "epoch": 5.88124410933082, + "grad_norm": 8.160250663757324, + "learning_rate": 2.831652042480093e-08, + "loss": 2.0338, + "num_input_tokens_seen": 7938960, + "step": 780 + }, + { + "epoch": 5.918944392082941, + "grad_norm": 7.380901336669922, + "learning_rate": 9.636682981720158e-09, + "loss": 2.0466, + "num_input_tokens_seen": 7991296, + "step": 785 + }, + { + "epoch": 5.956644674835061, + "grad_norm": 7.834613800048828, + "learning_rate": 7.867144166728846e-10, + "loss": 2.0436, + "num_input_tokens_seen": 8044336, + "step": 790 + } + ], + "logging_steps": 5, + "max_steps": 792, + "num_input_tokens_seen": 8063376, + "num_train_epochs": 6, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 9.844904305885184e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-792/training_args.bin b/checkpoint-792/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d325c35219dd8c0eea8279f17307e1d0d8c29b12 --- /dev/null +++ b/checkpoint-792/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d28ce688790f3a28f1dba3064866610463622e1d4141255300e136617f5673 +size 5432 diff --git a/llamaboard_config.yaml b/llamaboard_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7a5169f556872ef15472ea02e3e4cdda2787e79 --- /dev/null +++ b/llamaboard_config.yaml @@ -0,0 +1,66 @@ +top.booster: auto +top.checkpoint_path: [] +top.finetuning_type: lora +top.model_name: Gemma-2B +top.quantization_bit: none +top.quantization_method: bitsandbytes +top.rope_scaling: none +top.template: default +train.additional_target: '' +train.badam_mode: layer +train.badam_switch_interval: 50 +train.badam_switch_mode: ascending +train.badam_update_ratio: 0.05 +train.batch_size: 2 +train.compute_type: bf16 +train.create_new_adapter: false +train.cutoff_len: 1024 +train.dataset: +- identity +train.dataset_dir: data +train.ds_offload: false +train.ds_stage: none +train.freeze_extra_modules: '' +train.freeze_trainable_layers: 2 +train.freeze_trainable_modules: all +train.galore_rank: 16 +train.galore_scale: 0.25 +train.galore_target: all +train.galore_update_interval: 200 +train.gradient_accumulation_steps: 8 +train.learning_rate: 5e-5 +train.logging_steps: 5 +train.lora_alpha: 16 +train.lora_dropout: 0 +train.lora_rank: 8 +train.lora_target: '' +train.loraplus_lr_ratio: 0 +train.lr_scheduler_type: cosine +train.mask_history: false +train.max_grad_norm: '3.0' +train.max_samples: '100000' +train.neat_packing: false +train.neftune_alpha: 0 +train.num_train_epochs: '6.0' +train.optim: adamw_torch +train.packing: false +train.ppo_score_norm: false +train.ppo_whiten_rewards: false +train.pref_beta: 0.1 +train.pref_ftx: 0 +train.pref_loss: sigmoid +train.report_to: false +train.resize_vocab: false +train.reward_model: [] +train.save_steps: 100 +train.shift_attn: false +train.train_on_prompt: false +train.training_stage: Supervised Fine-Tuning +train.use_badam: false +train.use_dora: false +train.use_galore: false +train.use_llama_pro: false +train.use_pissa: false +train.use_rslora: false +train.val_size: 0 +train.warmup_steps: 0 diff --git a/running_log.txt b/running_log.txt new file mode 100644 index 0000000000000000000000000000000000000000..27916ded53b8478a962e6aaaea7cd89e90e52765 --- /dev/null +++ b/running_log.txt @@ -0,0 +1,628 @@ +[INFO|configuration_utils.py:672] 2024-10-17 07:49:30,560 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/config.json + +[INFO|configuration_utils.py:739] 2024-10-17 07:49:30,563 >> Model config Gemma2Config { + "_name_or_path": "unsloth/gemma-2-2b-it", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": [ + 1, + 107 + ], + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2304, + "initializer_range": 0.02, + "intermediate_size": 9216, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 26, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.0", + "unsloth_version": "2024.9", + "use_cache": true, + "vocab_size": 256000 +} + + +[INFO|tokenization_utils_base.py:2214] 2024-10-17 07:49:30,971 >> loading file tokenizer.model from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/tokenizer.model + +[INFO|tokenization_utils_base.py:2214] 2024-10-17 07:49:30,971 >> loading file tokenizer.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/tokenizer.json + +[INFO|tokenization_utils_base.py:2214] 2024-10-17 07:49:30,971 >> loading file added_tokens.json from cache at None + +[INFO|tokenization_utils_base.py:2214] 2024-10-17 07:49:30,971 >> loading file special_tokens_map.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/special_tokens_map.json + +[INFO|tokenization_utils_base.py:2214] 2024-10-17 07:49:30,971 >> loading file tokenizer_config.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/tokenizer_config.json + +[INFO|configuration_utils.py:672] 2024-10-17 07:49:33,042 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/config.json + +[INFO|configuration_utils.py:739] 2024-10-17 07:49:33,043 >> Model config Gemma2Config { + "_name_or_path": "unsloth/gemma-2-2b-it", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": [ + 1, + 107 + ], + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2304, + "initializer_range": 0.02, + "intermediate_size": 9216, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 26, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.0", + "unsloth_version": "2024.9", + "use_cache": true, + "vocab_size": 256000 +} + + +[INFO|tokenization_utils_base.py:2214] 2024-10-17 07:49:33,316 >> loading file tokenizer.model from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/tokenizer.model + +[INFO|tokenization_utils_base.py:2214] 2024-10-17 07:49:33,316 >> loading file tokenizer.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/tokenizer.json + +[INFO|tokenization_utils_base.py:2214] 2024-10-17 07:49:33,317 >> loading file added_tokens.json from cache at None + +[INFO|tokenization_utils_base.py:2214] 2024-10-17 07:49:33,317 >> loading file special_tokens_map.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/special_tokens_map.json + +[INFO|tokenization_utils_base.py:2214] 2024-10-17 07:49:33,317 >> loading file tokenizer_config.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/tokenizer_config.json + +[INFO|configuration_utils.py:672] 2024-10-17 07:49:43,349 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/config.json + +[INFO|configuration_utils.py:739] 2024-10-17 07:49:43,350 >> Model config Gemma2Config { + "_name_or_path": "unsloth/gemma-2-2b-it", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": [ + 1, + 107 + ], + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2304, + "initializer_range": 0.02, + "intermediate_size": 9216, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 26, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.0", + "unsloth_version": "2024.9", + "use_cache": true, + "vocab_size": 256000 +} + + +[INFO|modeling_utils.py:3726] 2024-10-17 07:49:43,408 >> loading weights file model.safetensors from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/model.safetensors + +[INFO|modeling_utils.py:1622] 2024-10-17 07:49:52,501 >> Instantiating Gemma2ForCausalLM model under default dtype torch.bfloat16. + +[INFO|configuration_utils.py:1099] 2024-10-17 07:49:52,505 >> Generate config GenerationConfig { + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": [ + 1, + 107 + ], + "pad_token_id": 0 +} + + +[INFO|modeling_utils.py:4568] 2024-10-17 07:52:51,954 >> All model checkpoint weights were used when initializing Gemma2ForCausalLM. + + +[INFO|modeling_utils.py:4576] 2024-10-17 07:52:51,954 >> All the weights of Gemma2ForCausalLM were initialized from the model checkpoint at unsloth/gemma-2-2b-it. +If your task is similar to the task the model of the checkpoint was trained on, you can already use Gemma2ForCausalLM for predictions without further training. + +[INFO|configuration_utils.py:1054] 2024-10-17 07:52:52,225 >> loading configuration file generation_config.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/generation_config.json + +[INFO|configuration_utils.py:1099] 2024-10-17 07:52:52,225 >> Generate config GenerationConfig { + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": [ + 1, + 107 + ], + "max_length": 8192, + "pad_token_id": 0 +} + + +[INFO|trainer.py:667] 2024-10-17 07:52:52,594 >> Using auto half precision backend + +[INFO|trainer.py:2243] 2024-10-17 07:52:53,383 >> ***** Running training ***** + +[INFO|trainer.py:2244] 2024-10-17 07:52:53,383 >> Num examples = 4,244 + +[INFO|trainer.py:2245] 2024-10-17 07:52:53,383 >> Num Epochs = 6 + +[INFO|trainer.py:2246] 2024-10-17 07:52:53,383 >> Instantaneous batch size per device = 2 + +[INFO|trainer.py:2249] 2024-10-17 07:52:53,383 >> Total train batch size (w. parallel, distributed & accumulation) = 32 + +[INFO|trainer.py:2250] 2024-10-17 07:52:53,383 >> Gradient Accumulation steps = 8 + +[INFO|trainer.py:2251] 2024-10-17 07:52:53,383 >> Total optimization steps = 792 + +[INFO|trainer.py:2252] 2024-10-17 07:52:53,386 >> Number of trainable parameters = 10,383,360 + +[INFO|trainer.py:3705] 2024-10-17 07:58:27,005 >> Saving model checkpoint to saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-100 + +[INFO|configuration_utils.py:672] 2024-10-17 07:58:27,602 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/config.json + +[INFO|configuration_utils.py:739] 2024-10-17 07:58:27,603 >> Model config Gemma2Config { + "_name_or_path": "unsloth/gemma-2-2b-it", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": [ + 1, + 107 + ], + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2304, + "initializer_range": 0.02, + "intermediate_size": 9216, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 26, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.0", + "unsloth_version": "2024.9", + "use_cache": true, + "vocab_size": 256000 +} + + +[INFO|tokenization_utils_base.py:2649] 2024-10-17 07:58:27,704 >> tokenizer config file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-100/tokenizer_config.json + +[INFO|tokenization_utils_base.py:2658] 2024-10-17 07:58:27,704 >> Special tokens file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-100/special_tokens_map.json + +[INFO|trainer.py:3705] 2024-10-17 08:03:56,426 >> Saving model checkpoint to saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-200 + +[INFO|configuration_utils.py:672] 2024-10-17 08:04:00,522 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/config.json + +[INFO|configuration_utils.py:739] 2024-10-17 08:04:00,523 >> Model config Gemma2Config { + "_name_or_path": "unsloth/gemma-2-2b-it", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": [ + 1, + 107 + ], + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2304, + "initializer_range": 0.02, + "intermediate_size": 9216, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 26, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.0", + "unsloth_version": "2024.9", + "use_cache": true, + "vocab_size": 256000 +} + + +[INFO|tokenization_utils_base.py:2649] 2024-10-17 08:04:00,623 >> tokenizer config file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-200/tokenizer_config.json + +[INFO|tokenization_utils_base.py:2658] 2024-10-17 08:04:00,623 >> Special tokens file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-200/special_tokens_map.json + +[INFO|trainer.py:3705] 2024-10-17 08:09:34,776 >> Saving model checkpoint to saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-300 + +[INFO|configuration_utils.py:672] 2024-10-17 08:09:35,375 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/config.json + +[INFO|configuration_utils.py:739] 2024-10-17 08:09:35,376 >> Model config Gemma2Config { + "_name_or_path": "unsloth/gemma-2-2b-it", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": [ + 1, + 107 + ], + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2304, + "initializer_range": 0.02, + "intermediate_size": 9216, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 26, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.0", + "unsloth_version": "2024.9", + "use_cache": true, + "vocab_size": 256000 +} + + +[INFO|tokenization_utils_base.py:2649] 2024-10-17 08:09:35,447 >> tokenizer config file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-300/tokenizer_config.json + +[INFO|tokenization_utils_base.py:2658] 2024-10-17 08:09:35,448 >> Special tokens file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-300/special_tokens_map.json + +[INFO|trainer.py:3705] 2024-10-17 08:15:13,571 >> Saving model checkpoint to saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-400 + +[INFO|configuration_utils.py:672] 2024-10-17 08:15:15,143 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/config.json + +[INFO|configuration_utils.py:739] 2024-10-17 08:15:15,145 >> Model config Gemma2Config { + "_name_or_path": "unsloth/gemma-2-2b-it", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": [ + 1, + 107 + ], + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2304, + "initializer_range": 0.02, + "intermediate_size": 9216, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 26, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.0", + "unsloth_version": "2024.9", + "use_cache": true, + "vocab_size": 256000 +} + + +[INFO|tokenization_utils_base.py:2649] 2024-10-17 08:15:15,245 >> tokenizer config file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-400/tokenizer_config.json + +[INFO|tokenization_utils_base.py:2658] 2024-10-17 08:15:15,245 >> Special tokens file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-400/special_tokens_map.json + +[INFO|trainer.py:3705] 2024-10-17 08:20:53,112 >> Saving model checkpoint to saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-500 + +[INFO|configuration_utils.py:672] 2024-10-17 08:20:53,739 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/config.json + +[INFO|configuration_utils.py:739] 2024-10-17 08:20:53,741 >> Model config Gemma2Config { + "_name_or_path": "unsloth/gemma-2-2b-it", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": [ + 1, + 107 + ], + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2304, + "initializer_range": 0.02, + "intermediate_size": 9216, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 26, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.0", + "unsloth_version": "2024.9", + "use_cache": true, + "vocab_size": 256000 +} + + +[INFO|tokenization_utils_base.py:2649] 2024-10-17 08:20:53,840 >> tokenizer config file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-500/tokenizer_config.json + +[INFO|tokenization_utils_base.py:2658] 2024-10-17 08:20:53,840 >> Special tokens file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-500/special_tokens_map.json + +[INFO|trainer.py:3705] 2024-10-17 08:26:23,393 >> Saving model checkpoint to saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-600 + +[INFO|configuration_utils.py:672] 2024-10-17 08:26:25,621 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/config.json + +[INFO|configuration_utils.py:739] 2024-10-17 08:26:25,622 >> Model config Gemma2Config { + "_name_or_path": "unsloth/gemma-2-2b-it", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": [ + 1, + 107 + ], + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2304, + "initializer_range": 0.02, + "intermediate_size": 9216, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 26, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.0", + "unsloth_version": "2024.9", + "use_cache": true, + "vocab_size": 256000 +} + + +[INFO|tokenization_utils_base.py:2649] 2024-10-17 08:26:25,722 >> tokenizer config file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-600/tokenizer_config.json + +[INFO|tokenization_utils_base.py:2658] 2024-10-17 08:26:25,722 >> Special tokens file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-600/special_tokens_map.json + +[INFO|trainer.py:3705] 2024-10-17 08:31:53,889 >> Saving model checkpoint to saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-700 + +[INFO|configuration_utils.py:672] 2024-10-17 08:31:54,915 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/config.json + +[INFO|configuration_utils.py:739] 2024-10-17 08:31:54,917 >> Model config Gemma2Config { + "_name_or_path": "unsloth/gemma-2-2b-it", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": [ + 1, + 107 + ], + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2304, + "initializer_range": 0.02, + "intermediate_size": 9216, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 26, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.0", + "unsloth_version": "2024.9", + "use_cache": true, + "vocab_size": 256000 +} + + +[INFO|tokenization_utils_base.py:2649] 2024-10-17 08:31:55,024 >> tokenizer config file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-700/tokenizer_config.json + +[INFO|tokenization_utils_base.py:2658] 2024-10-17 08:31:55,024 >> Special tokens file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-700/special_tokens_map.json + +[INFO|trainer.py:3705] 2024-10-17 08:37:04,811 >> Saving model checkpoint to saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-792 + +[INFO|configuration_utils.py:672] 2024-10-17 08:37:05,459 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/config.json + +[INFO|configuration_utils.py:739] 2024-10-17 08:37:05,460 >> Model config Gemma2Config { + "_name_or_path": "unsloth/gemma-2-2b-it", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": [ + 1, + 107 + ], + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2304, + "initializer_range": 0.02, + "intermediate_size": 9216, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 26, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.0", + "unsloth_version": "2024.9", + "use_cache": true, + "vocab_size": 256000 +} + + +[INFO|tokenization_utils_base.py:2649] 2024-10-17 08:37:05,563 >> tokenizer config file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-792/tokenizer_config.json + +[INFO|tokenization_utils_base.py:2658] 2024-10-17 08:37:05,566 >> Special tokens file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/checkpoint-792/special_tokens_map.json + +[INFO|trainer.py:2505] 2024-10-17 08:37:06,016 >> + +Training completed. Do not forget to share your model on huggingface.co/models =) + + + +[INFO|trainer.py:3705] 2024-10-17 08:37:06,018 >> Saving model checkpoint to saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45 + +[INFO|configuration_utils.py:672] 2024-10-17 08:37:07,036 >> loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--unsloth--gemma-2-2b-it/snapshots/457f2e15bf550c227ce6ad86e2ec108d3e42c106/config.json + +[INFO|configuration_utils.py:739] 2024-10-17 08:37:07,036 >> Model config Gemma2Config { + "_name_or_path": "unsloth/gemma-2-2b-it", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": [ + 1, + 107 + ], + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2304, + "initializer_range": 0.02, + "intermediate_size": 9216, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 26, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.0", + "unsloth_version": "2024.9", + "use_cache": true, + "vocab_size": 256000 +} + + +[INFO|tokenization_utils_base.py:2649] 2024-10-17 08:37:07,112 >> tokenizer config file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/tokenizer_config.json + +[INFO|tokenization_utils_base.py:2658] 2024-10-17 08:37:07,112 >> Special tokens file saved in saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45/special_tokens_map.json + +[INFO|modelcard.py:449] 2024-10-17 08:37:07,533 >> Dropping the following result as it does not have all the necessary fields: +{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}} + diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..a4a305d1de4d8f47c0252b4d7fe65a10dd8e2c22 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7eee611703c5ce5d1eee32d9cdcfe465647b8aff0c1dfb3bed7ad7dbb05060 +size 34362873 diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..97f33b16e8481f3849fb6d02d1656628e9e22c91 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2015 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message + '\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\nAssistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '' + '\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..dd477c643160dd09539290d5310457e09cccfb4c --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 5.971724787935909, + "num_input_tokens_seen": 8063376, + "total_flos": 9.844904305885184e+16, + "train_loss": 2.431757736085641, + "train_runtime": 2652.6299, + "train_samples_per_second": 9.6, + "train_steps_per_second": 0.299 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..df6e514155a189e27f66b8c726319507e03718da --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,159 @@ +{"current_steps": 5, "total_steps": 792, "loss": 4.3742, "learning_rate": 4.9995083170283816e-05, "epoch": 0.03770028275212064, "percentage": 0.63, "elapsed_time": "0:00:17", "remaining_time": "0:45:21", "throughput": 2923.58, "total_tokens": 50544} +{"current_steps": 10, "total_steps": 792, "loss": 3.8434, "learning_rate": 4.998033461515242e-05, "epoch": 0.07540056550424128, "percentage": 1.26, "elapsed_time": "0:00:34", "remaining_time": "0:44:34", "throughput": 3042.77, "total_tokens": 104080} +{"current_steps": 15, "total_steps": 792, "loss": 3.6027, "learning_rate": 4.9955760135896534e-05, "epoch": 0.11310084825636192, "percentage": 1.89, "elapsed_time": "0:00:51", "remaining_time": "0:44:28", "throughput": 3023.63, "total_tokens": 155776} +{"current_steps": 20, "total_steps": 792, "loss": 3.4102, "learning_rate": 4.992136939879856e-05, "epoch": 0.15080113100848255, "percentage": 2.53, "elapsed_time": "0:01:07", "remaining_time": "0:43:23", "throughput": 3012.04, "total_tokens": 203184} +{"current_steps": 25, "total_steps": 792, "loss": 3.153, "learning_rate": 4.9877175931330346e-05, "epoch": 0.1885014137606032, "percentage": 3.16, "elapsed_time": "0:01:24", "remaining_time": "0:43:26", "throughput": 3009.96, "total_tokens": 255744} +{"current_steps": 30, "total_steps": 792, "loss": 3.0949, "learning_rate": 4.982319711683221e-05, "epoch": 0.22620169651272384, "percentage": 3.79, "elapsed_time": "0:01:42", "remaining_time": "0:43:16", "throughput": 3010.76, "total_tokens": 307808} +{"current_steps": 35, "total_steps": 792, "loss": 3.0392, "learning_rate": 4.975945418767529e-05, "epoch": 0.2639019792648445, "percentage": 4.42, "elapsed_time": "0:01:58", "remaining_time": "0:42:35", "throughput": 3031.72, "total_tokens": 358192} +{"current_steps": 40, "total_steps": 792, "loss": 3.0685, "learning_rate": 4.968597221690986e-05, "epoch": 0.3016022620169651, "percentage": 5.05, "elapsed_time": "0:02:14", "remaining_time": "0:42:10", "throughput": 3037.08, "total_tokens": 408832} +{"current_steps": 45, "total_steps": 792, "loss": 3.0385, "learning_rate": 4.96027801084029e-05, "epoch": 0.3393025447690858, "percentage": 5.68, "elapsed_time": "0:02:30", "remaining_time": "0:41:37", "throughput": 3047.02, "total_tokens": 458368} +{"current_steps": 50, "total_steps": 792, "loss": 3.1079, "learning_rate": 4.950991058546893e-05, "epoch": 0.3770028275212064, "percentage": 6.31, "elapsed_time": "0:02:47", "remaining_time": "0:41:31", "throughput": 3049.95, "total_tokens": 512160} +{"current_steps": 55, "total_steps": 792, "loss": 2.9883, "learning_rate": 4.940740017799833e-05, "epoch": 0.41470311027332707, "percentage": 6.94, "elapsed_time": "0:03:04", "remaining_time": "0:41:13", "throughput": 3049.6, "total_tokens": 562928} +{"current_steps": 60, "total_steps": 792, "loss": 2.969, "learning_rate": 4.929528920808854e-05, "epoch": 0.4524033930254477, "percentage": 7.58, "elapsed_time": "0:03:20", "remaining_time": "0:40:45", "throughput": 3057.85, "total_tokens": 613072} +{"current_steps": 65, "total_steps": 792, "loss": 3.0019, "learning_rate": 4.917362177418342e-05, "epoch": 0.49010367577756836, "percentage": 8.21, "elapsed_time": "0:03:37", "remaining_time": "0:40:29", "throughput": 3059.22, "total_tokens": 664640} +{"current_steps": 70, "total_steps": 792, "loss": 2.9984, "learning_rate": 4.904244573372733e-05, "epoch": 0.527803958529689, "percentage": 8.84, "elapsed_time": "0:03:54", "remaining_time": "0:40:14", "throughput": 3062.56, "total_tokens": 716800} +{"current_steps": 75, "total_steps": 792, "loss": 2.9848, "learning_rate": 4.8901812684340564e-05, "epoch": 0.5655042412818096, "percentage": 9.47, "elapsed_time": "0:04:09", "remaining_time": "0:39:47", "throughput": 3069.75, "total_tokens": 766528} +{"current_steps": 80, "total_steps": 792, "loss": 2.9169, "learning_rate": 4.8751777943523634e-05, "epoch": 0.6032045240339302, "percentage": 10.1, "elapsed_time": "0:04:26", "remaining_time": "0:39:30", "throughput": 3068.71, "total_tokens": 817376} +{"current_steps": 85, "total_steps": 792, "loss": 2.8844, "learning_rate": 4.8592400526898314e-05, "epoch": 0.6409048067860509, "percentage": 10.73, "elapsed_time": "0:04:42", "remaining_time": "0:39:12", "throughput": 3056.83, "total_tokens": 864688} +{"current_steps": 90, "total_steps": 792, "loss": 2.9422, "learning_rate": 4.842374312499405e-05, "epoch": 0.6786050895381716, "percentage": 11.36, "elapsed_time": "0:04:58", "remaining_time": "0:38:49", "throughput": 3057.87, "total_tokens": 913216} +{"current_steps": 95, "total_steps": 792, "loss": 2.8431, "learning_rate": 4.824587207858888e-05, "epoch": 0.7163053722902922, "percentage": 11.99, "elapsed_time": "0:05:15", "remaining_time": "0:38:36", "throughput": 3053.36, "total_tokens": 964144} +{"current_steps": 100, "total_steps": 792, "loss": 2.8377, "learning_rate": 4.805885735261454e-05, "epoch": 0.7540056550424128, "percentage": 12.63, "elapsed_time": "0:05:33", "remaining_time": "0:38:27", "throughput": 3055.29, "total_tokens": 1018768} +{"current_steps": 105, "total_steps": 792, "loss": 2.7843, "learning_rate": 4.786277250863599e-05, "epoch": 0.7917059377945335, "percentage": 13.26, "elapsed_time": "0:05:51", "remaining_time": "0:38:22", "throughput": 3041.61, "total_tokens": 1070592} +{"current_steps": 110, "total_steps": 792, "loss": 2.8218, "learning_rate": 4.765769467591625e-05, "epoch": 0.8294062205466541, "percentage": 13.89, "elapsed_time": "0:06:09", "remaining_time": "0:38:08", "throughput": 3049.16, "total_tokens": 1125392} +{"current_steps": 115, "total_steps": 792, "loss": 2.877, "learning_rate": 4.744370452107789e-05, "epoch": 0.8671065032987747, "percentage": 14.52, "elapsed_time": "0:06:25", "remaining_time": "0:37:49", "throughput": 3051.46, "total_tokens": 1176256} +{"current_steps": 120, "total_steps": 792, "loss": 2.9028, "learning_rate": 4.722088621637309e-05, "epoch": 0.9048067860508954, "percentage": 15.15, "elapsed_time": "0:06:41", "remaining_time": "0:37:25", "throughput": 3055.51, "total_tokens": 1225376} +{"current_steps": 125, "total_steps": 792, "loss": 2.762, "learning_rate": 4.698932740657479e-05, "epoch": 0.942507068803016, "percentage": 15.78, "elapsed_time": "0:06:58", "remaining_time": "0:37:12", "throughput": 3051.65, "total_tokens": 1277024} +{"current_steps": 130, "total_steps": 792, "loss": 2.9164, "learning_rate": 4.6749119174501975e-05, "epoch": 0.9802073515551367, "percentage": 16.41, "elapsed_time": "0:07:13", "remaining_time": "0:36:47", "throughput": 3051.75, "total_tokens": 1322912} +{"current_steps": 135, "total_steps": 792, "loss": 2.736, "learning_rate": 4.6500356005192514e-05, "epoch": 1.0179076343072573, "percentage": 17.05, "elapsed_time": "0:07:29", "remaining_time": "0:36:27", "throughput": 3045.06, "total_tokens": 1368624} +{"current_steps": 140, "total_steps": 792, "loss": 2.7414, "learning_rate": 4.6243135748737864e-05, "epoch": 1.055607917059378, "percentage": 17.68, "elapsed_time": "0:07:44", "remaining_time": "0:36:05", "throughput": 3049.46, "total_tokens": 1417664} +{"current_steps": 145, "total_steps": 792, "loss": 2.6961, "learning_rate": 4.597755958179406e-05, "epoch": 1.0933081998114986, "percentage": 18.31, "elapsed_time": "0:08:01", "remaining_time": "0:35:47", "throughput": 3051.85, "total_tokens": 1469120} +{"current_steps": 150, "total_steps": 792, "loss": 2.6134, "learning_rate": 4.570373196778427e-05, "epoch": 1.1310084825636193, "percentage": 18.94, "elapsed_time": "0:08:18", "remaining_time": "0:35:35", "throughput": 3049.73, "total_tokens": 1521632} +{"current_steps": 155, "total_steps": 792, "loss": 2.7548, "learning_rate": 4.5421760615808474e-05, "epoch": 1.1687087653157398, "percentage": 19.57, "elapsed_time": "0:08:33", "remaining_time": "0:35:11", "throughput": 3046.59, "total_tokens": 1565296} +{"current_steps": 160, "total_steps": 792, "loss": 2.702, "learning_rate": 4.513175643827647e-05, "epoch": 1.2064090480678604, "percentage": 20.2, "elapsed_time": "0:08:50", "remaining_time": "0:34:56", "throughput": 3047.17, "total_tokens": 1617088} +{"current_steps": 165, "total_steps": 792, "loss": 2.6893, "learning_rate": 4.4833833507280884e-05, "epoch": 1.244109330819981, "percentage": 20.83, "elapsed_time": "0:09:05", "remaining_time": "0:34:34", "throughput": 3047.1, "total_tokens": 1663584} +{"current_steps": 170, "total_steps": 792, "loss": 2.5671, "learning_rate": 4.4528109009727336e-05, "epoch": 1.2818096135721018, "percentage": 21.46, "elapsed_time": "0:09:22", "remaining_time": "0:34:16", "throughput": 3048.75, "total_tokens": 1713744} +{"current_steps": 175, "total_steps": 792, "loss": 2.7682, "learning_rate": 4.42147032012394e-05, "epoch": 1.3195098963242224, "percentage": 22.1, "elapsed_time": "0:09:37", "remaining_time": "0:33:56", "throughput": 3051.21, "total_tokens": 1762768} +{"current_steps": 180, "total_steps": 792, "loss": 2.7062, "learning_rate": 4.389373935885646e-05, "epoch": 1.3572101790763431, "percentage": 22.73, "elapsed_time": "0:09:55", "remaining_time": "0:33:43", "throughput": 3051.2, "total_tokens": 1815808} +{"current_steps": 185, "total_steps": 792, "loss": 2.7092, "learning_rate": 4.356534373254316e-05, "epoch": 1.3949104618284638, "percentage": 23.36, "elapsed_time": "0:10:13", "remaining_time": "0:33:34", "throughput": 3048.11, "total_tokens": 1871040} +{"current_steps": 190, "total_steps": 792, "loss": 2.6518, "learning_rate": 4.322964549552943e-05, "epoch": 1.4326107445805842, "percentage": 23.99, "elapsed_time": "0:10:30", "remaining_time": "0:33:18", "throughput": 3050.86, "total_tokens": 1924048} +{"current_steps": 195, "total_steps": 792, "loss": 2.6592, "learning_rate": 4.288677669350066e-05, "epoch": 1.4703110273327051, "percentage": 24.62, "elapsed_time": "0:10:46", "remaining_time": "0:33:00", "throughput": 3049.64, "total_tokens": 1972720} +{"current_steps": 200, "total_steps": 792, "loss": 2.716, "learning_rate": 4.2536872192658036e-05, "epoch": 1.5080113100848256, "percentage": 25.25, "elapsed_time": "0:11:02", "remaining_time": "0:32:42", "throughput": 3050.56, "total_tokens": 2022112} +{"current_steps": 205, "total_steps": 792, "loss": 2.6746, "learning_rate": 4.218006962666934e-05, "epoch": 1.5457115928369463, "percentage": 25.88, "elapsed_time": "0:11:23", "remaining_time": "0:32:38", "throughput": 3029.26, "total_tokens": 2072000} +{"current_steps": 210, "total_steps": 792, "loss": 2.6306, "learning_rate": 4.181650934253132e-05, "epoch": 1.583411875589067, "percentage": 26.52, "elapsed_time": "0:11:40", "remaining_time": "0:32:21", "throughput": 3033.93, "total_tokens": 2125632} +{"current_steps": 215, "total_steps": 792, "loss": 2.5678, "learning_rate": 4.144633434536467e-05, "epoch": 1.6211121583411876, "percentage": 27.15, "elapsed_time": "0:11:56", "remaining_time": "0:32:03", "throughput": 3033.71, "total_tokens": 2174464} +{"current_steps": 220, "total_steps": 792, "loss": 2.7334, "learning_rate": 4.1069690242163484e-05, "epoch": 1.6588124410933083, "percentage": 27.78, "elapsed_time": "0:12:12", "remaining_time": "0:31:45", "throughput": 3033.93, "total_tokens": 2223408} +{"current_steps": 225, "total_steps": 792, "loss": 2.6658, "learning_rate": 4.06867251845213e-05, "epoch": 1.6965127238454287, "percentage": 28.41, "elapsed_time": "0:12:32", "remaining_time": "0:31:35", "throughput": 3032.2, "total_tokens": 2281296} +{"current_steps": 230, "total_steps": 792, "loss": 2.6567, "learning_rate": 4.0297589810356165e-05, "epoch": 1.7342130065975496, "percentage": 29.04, "elapsed_time": "0:12:49", "remaining_time": "0:31:21", "throughput": 3032.13, "total_tokens": 2334176} +{"current_steps": 235, "total_steps": 792, "loss": 2.6131, "learning_rate": 3.9902437184657784e-05, "epoch": 1.77191328934967, "percentage": 29.67, "elapsed_time": "0:13:07", "remaining_time": "0:31:07", "throughput": 3032.28, "total_tokens": 2389296} +{"current_steps": 240, "total_steps": 792, "loss": 2.662, "learning_rate": 3.9501422739279956e-05, "epoch": 1.8096135721017907, "percentage": 30.3, "elapsed_time": "0:13:25", "remaining_time": "0:30:52", "throughput": 3033.37, "total_tokens": 2442800} +{"current_steps": 245, "total_steps": 792, "loss": 2.6339, "learning_rate": 3.909470421180201e-05, "epoch": 1.8473138548539114, "percentage": 30.93, "elapsed_time": "0:13:42", "remaining_time": "0:30:35", "throughput": 3034.95, "total_tokens": 2494912} +{"current_steps": 250, "total_steps": 792, "loss": 2.6338, "learning_rate": 3.8682441583483314e-05, "epoch": 1.885014137606032, "percentage": 31.57, "elapsed_time": "0:13:57", "remaining_time": "0:30:16", "throughput": 3037.24, "total_tokens": 2544624} +{"current_steps": 255, "total_steps": 792, "loss": 2.6844, "learning_rate": 3.8264797016335205e-05, "epoch": 1.9227144203581528, "percentage": 32.2, "elapsed_time": "0:14:14", "remaining_time": "0:29:59", "throughput": 3039.87, "total_tokens": 2597792} +{"current_steps": 260, "total_steps": 792, "loss": 2.5939, "learning_rate": 3.7841934789335164e-05, "epoch": 1.9604147031102732, "percentage": 32.83, "elapsed_time": "0:14:30", "remaining_time": "0:29:41", "throughput": 3039.11, "total_tokens": 2646544} +{"current_steps": 265, "total_steps": 792, "loss": 2.7393, "learning_rate": 3.741402123380828e-05, "epoch": 1.998114985862394, "percentage": 33.46, "elapsed_time": "0:14:48", "remaining_time": "0:29:26", "throughput": 3040.38, "total_tokens": 2700224} +{"current_steps": 270, "total_steps": 792, "loss": 2.4495, "learning_rate": 3.6981224668001424e-05, "epoch": 2.0358152686145146, "percentage": 34.09, "elapsed_time": "0:15:04", "remaining_time": "0:29:08", "throughput": 3039.17, "total_tokens": 2749104} +{"current_steps": 275, "total_steps": 792, "loss": 2.3823, "learning_rate": 3.654371533087586e-05, "epoch": 2.0735155513666355, "percentage": 34.72, "elapsed_time": "0:15:22", "remaining_time": "0:28:54", "throughput": 3037.96, "total_tokens": 2802992} +{"current_steps": 280, "total_steps": 792, "loss": 2.474, "learning_rate": 3.610166531514436e-05, "epoch": 2.111215834118756, "percentage": 35.35, "elapsed_time": "0:15:36", "remaining_time": "0:28:32", "throughput": 3037.94, "total_tokens": 2844512} +{"current_steps": 285, "total_steps": 792, "loss": 2.4776, "learning_rate": 3.565524849957921e-05, "epoch": 2.1489161168708764, "percentage": 35.98, "elapsed_time": "0:15:52", "remaining_time": "0:28:15", "throughput": 3037.45, "total_tokens": 2894400} +{"current_steps": 290, "total_steps": 792, "loss": 2.4709, "learning_rate": 3.520464048061758e-05, "epoch": 2.1866163996229973, "percentage": 36.62, "elapsed_time": "0:16:08", "remaining_time": "0:27:56", "throughput": 3040.06, "total_tokens": 2944688} +{"current_steps": 295, "total_steps": 792, "loss": 2.5009, "learning_rate": 3.47500185032913e-05, "epoch": 2.2243166823751177, "percentage": 37.25, "elapsed_time": "0:16:24", "remaining_time": "0:27:37", "throughput": 3043.14, "total_tokens": 2994672} +{"current_steps": 300, "total_steps": 792, "loss": 2.4524, "learning_rate": 3.4291561391508185e-05, "epoch": 2.2620169651272386, "percentage": 37.88, "elapsed_time": "0:16:41", "remaining_time": "0:27:21", "throughput": 3041.65, "total_tokens": 3045344} +{"current_steps": 305, "total_steps": 792, "loss": 2.4083, "learning_rate": 3.3829449477712324e-05, "epoch": 2.299717247879359, "percentage": 38.51, "elapsed_time": "0:17:00", "remaining_time": "0:27:09", "throughput": 3038.29, "total_tokens": 3100688} +{"current_steps": 310, "total_steps": 792, "loss": 2.4713, "learning_rate": 3.336386453195088e-05, "epoch": 2.3374175306314795, "percentage": 39.14, "elapsed_time": "0:17:18", "remaining_time": "0:26:53", "throughput": 3039.26, "total_tokens": 3154896} +{"current_steps": 315, "total_steps": 792, "loss": 2.3986, "learning_rate": 3.2894989690375626e-05, "epoch": 2.3751178133836004, "percentage": 39.77, "elapsed_time": "0:17:35", "remaining_time": "0:26:37", "throughput": 3041.26, "total_tokens": 3209120} +{"current_steps": 320, "total_steps": 792, "loss": 2.4258, "learning_rate": 3.2423009383206876e-05, "epoch": 2.412818096135721, "percentage": 40.4, "elapsed_time": "0:17:51", "remaining_time": "0:26:19", "throughput": 3041.69, "total_tokens": 3257984} +{"current_steps": 325, "total_steps": 792, "loss": 2.4126, "learning_rate": 3.194810926218861e-05, "epoch": 2.4505183788878417, "percentage": 41.04, "elapsed_time": "0:18:08", "remaining_time": "0:26:03", "throughput": 3041.61, "total_tokens": 3309424} +{"current_steps": 330, "total_steps": 792, "loss": 2.3639, "learning_rate": 3.147047612756302e-05, "epoch": 2.488218661639962, "percentage": 41.67, "elapsed_time": "0:18:24", "remaining_time": "0:25:46", "throughput": 3040.63, "total_tokens": 3359152} +{"current_steps": 335, "total_steps": 792, "loss": 2.5176, "learning_rate": 3.099029785459328e-05, "epoch": 2.525918944392083, "percentage": 42.3, "elapsed_time": "0:18:41", "remaining_time": "0:25:29", "throughput": 3039.03, "total_tokens": 3406784} +{"current_steps": 340, "total_steps": 792, "loss": 2.4779, "learning_rate": 3.0507763319663517e-05, "epoch": 2.5636192271442035, "percentage": 42.93, "elapsed_time": "0:18:57", "remaining_time": "0:25:12", "throughput": 3040.31, "total_tokens": 3459328} +{"current_steps": 345, "total_steps": 792, "loss": 2.3833, "learning_rate": 3.002306232598497e-05, "epoch": 2.6013195098963244, "percentage": 43.56, "elapsed_time": "0:19:14", "remaining_time": "0:24:56", "throughput": 3040.38, "total_tokens": 3511056} +{"current_steps": 350, "total_steps": 792, "loss": 2.3794, "learning_rate": 2.9536385528937567e-05, "epoch": 2.639019792648445, "percentage": 44.19, "elapsed_time": "0:19:31", "remaining_time": "0:24:39", "throughput": 3040.19, "total_tokens": 3562352} +{"current_steps": 355, "total_steps": 792, "loss": 2.4883, "learning_rate": 2.9047924361076345e-05, "epoch": 2.6767200754005653, "percentage": 44.82, "elapsed_time": "0:19:49", "remaining_time": "0:24:24", "throughput": 3039.87, "total_tokens": 3615664} +{"current_steps": 360, "total_steps": 792, "loss": 2.3423, "learning_rate": 2.8557870956832132e-05, "epoch": 2.7144203581526862, "percentage": 45.45, "elapsed_time": "0:20:04", "remaining_time": "0:24:04", "throughput": 3040.67, "total_tokens": 3661424} +{"current_steps": 365, "total_steps": 792, "loss": 2.4092, "learning_rate": 2.8066418076936167e-05, "epoch": 2.7521206409048067, "percentage": 46.09, "elapsed_time": "0:20:20", "remaining_time": "0:23:48", "throughput": 3039.05, "total_tokens": 3710592} +{"current_steps": 370, "total_steps": 792, "loss": 2.4818, "learning_rate": 2.7573759032598366e-05, "epoch": 2.7898209236569276, "percentage": 46.72, "elapsed_time": "0:20:38", "remaining_time": "0:23:32", "throughput": 3039.87, "total_tokens": 3765728} +{"current_steps": 375, "total_steps": 792, "loss": 2.4587, "learning_rate": 2.7080087609469062e-05, "epoch": 2.827521206409048, "percentage": 47.35, "elapsed_time": "0:20:55", "remaining_time": "0:23:16", "throughput": 3038.84, "total_tokens": 3815360} +{"current_steps": 380, "total_steps": 792, "loss": 2.3462, "learning_rate": 2.6585597991414114e-05, "epoch": 2.8652214891611685, "percentage": 47.98, "elapsed_time": "0:21:12", "remaining_time": "0:22:59", "throughput": 3039.66, "total_tokens": 3868096} +{"current_steps": 385, "total_steps": 792, "loss": 2.3713, "learning_rate": 2.6090484684133404e-05, "epoch": 2.9029217719132894, "percentage": 48.61, "elapsed_time": "0:21:27", "remaining_time": "0:22:40", "throughput": 3040.83, "total_tokens": 3913696} +{"current_steps": 390, "total_steps": 792, "loss": 2.4618, "learning_rate": 2.5594942438652688e-05, "epoch": 2.9406220546654103, "percentage": 49.24, "elapsed_time": "0:21:45", "remaining_time": "0:22:25", "throughput": 3043.08, "total_tokens": 3971840} +{"current_steps": 395, "total_steps": 792, "loss": 2.5454, "learning_rate": 2.509916617471903e-05, "epoch": 2.9783223374175307, "percentage": 49.87, "elapsed_time": "0:22:03", "remaining_time": "0:22:10", "throughput": 3041.45, "total_tokens": 4025040} +{"current_steps": 400, "total_steps": 792, "loss": 2.3007, "learning_rate": 2.46033509041298e-05, "epoch": 3.016022620169651, "percentage": 50.51, "elapsed_time": "0:22:20", "remaining_time": "0:21:53", "throughput": 3041.39, "total_tokens": 4075488} +{"current_steps": 405, "total_steps": 792, "loss": 2.274, "learning_rate": 2.410769165402549e-05, "epoch": 3.053722902921772, "percentage": 51.14, "elapsed_time": "0:22:39", "remaining_time": "0:21:39", "throughput": 3037.23, "total_tokens": 4130496} +{"current_steps": 410, "total_steps": 792, "loss": 2.2457, "learning_rate": 2.3612383390176503e-05, "epoch": 3.0914231856738925, "percentage": 51.77, "elapsed_time": "0:22:56", "remaining_time": "0:21:22", "throughput": 3038.87, "total_tokens": 4181504} +{"current_steps": 415, "total_steps": 792, "loss": 2.2595, "learning_rate": 2.3117620940294048e-05, "epoch": 3.1291234684260134, "percentage": 52.4, "elapsed_time": "0:23:14", "remaining_time": "0:21:06", "throughput": 3039.02, "total_tokens": 4236816} +{"current_steps": 420, "total_steps": 792, "loss": 2.2326, "learning_rate": 2.2623598917395438e-05, "epoch": 3.166823751178134, "percentage": 53.03, "elapsed_time": "0:23:30", "remaining_time": "0:20:49", "throughput": 3040.26, "total_tokens": 4289488} +{"current_steps": 425, "total_steps": 792, "loss": 2.2595, "learning_rate": 2.213051164325366e-05, "epoch": 3.2045240339302543, "percentage": 53.66, "elapsed_time": "0:23:45", "remaining_time": "0:20:31", "throughput": 3040.5, "total_tokens": 4334704} +{"current_steps": 430, "total_steps": 792, "loss": 2.2084, "learning_rate": 2.1638553071961708e-05, "epoch": 3.242224316682375, "percentage": 54.29, "elapsed_time": "0:24:00", "remaining_time": "0:20:12", "throughput": 3038.95, "total_tokens": 4377360} +{"current_steps": 435, "total_steps": 792, "loss": 2.21, "learning_rate": 2.1147916713641367e-05, "epoch": 3.2799245994344957, "percentage": 54.92, "elapsed_time": "0:24:17", "remaining_time": "0:19:55", "throughput": 3039.08, "total_tokens": 4428544} +{"current_steps": 440, "total_steps": 792, "loss": 2.2449, "learning_rate": 2.0658795558326743e-05, "epoch": 3.3176248821866166, "percentage": 55.56, "elapsed_time": "0:24:33", "remaining_time": "0:19:38", "throughput": 3039.7, "total_tokens": 4477664} +{"current_steps": 445, "total_steps": 792, "loss": 2.2457, "learning_rate": 2.017138200005236e-05, "epoch": 3.355325164938737, "percentage": 56.19, "elapsed_time": "0:24:50", "remaining_time": "0:19:22", "throughput": 3041.13, "total_tokens": 4533792} +{"current_steps": 450, "total_steps": 792, "loss": 2.3035, "learning_rate": 1.9685867761175584e-05, "epoch": 3.3930254476908575, "percentage": 56.82, "elapsed_time": "0:25:07", "remaining_time": "0:19:05", "throughput": 3040.89, "total_tokens": 4584800} +{"current_steps": 455, "total_steps": 792, "loss": 2.3204, "learning_rate": 1.9202443816963425e-05, "epoch": 3.4307257304429783, "percentage": 57.45, "elapsed_time": "0:25:24", "remaining_time": "0:18:49", "throughput": 3039.95, "total_tokens": 4634976} +{"current_steps": 460, "total_steps": 792, "loss": 2.2055, "learning_rate": 1.872130032047302e-05, "epoch": 3.468426013195099, "percentage": 58.08, "elapsed_time": "0:25:42", "remaining_time": "0:18:33", "throughput": 3040.94, "total_tokens": 4691344} +{"current_steps": 465, "total_steps": 792, "loss": 2.3327, "learning_rate": 1.824262652775568e-05, "epoch": 3.5061262959472197, "percentage": 58.71, "elapsed_time": "0:25:59", "remaining_time": "0:18:16", "throughput": 3043.14, "total_tokens": 4745536} +{"current_steps": 470, "total_steps": 792, "loss": 2.2214, "learning_rate": 1.7766610723413684e-05, "epoch": 3.54382657869934, "percentage": 59.34, "elapsed_time": "0:26:17", "remaining_time": "0:18:00", "throughput": 3041.92, "total_tokens": 4798128} +{"current_steps": 475, "total_steps": 792, "loss": 2.3097, "learning_rate": 1.7293440146539196e-05, "epoch": 3.581526861451461, "percentage": 59.97, "elapsed_time": "0:26:33", "remaining_time": "0:17:43", "throughput": 3042.01, "total_tokens": 4847632} +{"current_steps": 480, "total_steps": 792, "loss": 2.2966, "learning_rate": 1.682330091706446e-05, "epoch": 3.6192271442035815, "percentage": 60.61, "elapsed_time": "0:26:52", "remaining_time": "0:17:28", "throughput": 3042.15, "total_tokens": 4905648} +{"current_steps": 485, "total_steps": 792, "loss": 2.2006, "learning_rate": 1.6356377962552238e-05, "epoch": 3.6569274269557024, "percentage": 61.24, "elapsed_time": "0:27:09", "remaining_time": "0:17:11", "throughput": 3042.05, "total_tokens": 4955600} +{"current_steps": 490, "total_steps": 792, "loss": 2.2463, "learning_rate": 1.589285494545514e-05, "epoch": 3.694627709707823, "percentage": 61.87, "elapsed_time": "0:27:26", "remaining_time": "0:16:54", "throughput": 3041.35, "total_tokens": 5007424} +{"current_steps": 495, "total_steps": 792, "loss": 2.2045, "learning_rate": 1.5432914190872757e-05, "epoch": 3.7323279924599433, "percentage": 62.5, "elapsed_time": "0:27:43", "remaining_time": "0:16:37", "throughput": 3041.88, "total_tokens": 5058848} +{"current_steps": 500, "total_steps": 792, "loss": 2.135, "learning_rate": 1.4976736614834664e-05, "epoch": 3.770028275212064, "percentage": 63.13, "elapsed_time": "0:27:59", "remaining_time": "0:16:20", "throughput": 3042.42, "total_tokens": 5109904} +{"current_steps": 505, "total_steps": 792, "loss": 2.3023, "learning_rate": 1.4524501653137787e-05, "epoch": 3.8077285579641846, "percentage": 63.76, "elapsed_time": "0:28:16", "remaining_time": "0:16:03", "throughput": 3040.38, "total_tokens": 5156496} +{"current_steps": 510, "total_steps": 792, "loss": 2.2636, "learning_rate": 1.4076387190766017e-05, "epoch": 3.8454288407163055, "percentage": 64.39, "elapsed_time": "0:28:32", "remaining_time": "0:15:46", "throughput": 3040.79, "total_tokens": 5207824} +{"current_steps": 515, "total_steps": 792, "loss": 2.1942, "learning_rate": 1.363256949191972e-05, "epoch": 3.883129123468426, "percentage": 65.03, "elapsed_time": "0:28:49", "remaining_time": "0:15:30", "throughput": 3039.19, "total_tokens": 5256608} +{"current_steps": 520, "total_steps": 792, "loss": 2.2731, "learning_rate": 1.3193223130682936e-05, "epoch": 3.9208294062205464, "percentage": 65.66, "elapsed_time": "0:29:05", "remaining_time": "0:15:13", "throughput": 3039.12, "total_tokens": 5305584} +{"current_steps": 525, "total_steps": 792, "loss": 2.1669, "learning_rate": 1.2758520922355226e-05, "epoch": 3.9585296889726673, "percentage": 66.29, "elapsed_time": "0:29:20", "remaining_time": "0:14:55", "throughput": 3039.05, "total_tokens": 5349712} +{"current_steps": 530, "total_steps": 792, "loss": 2.2146, "learning_rate": 1.2328633855475429e-05, "epoch": 3.9962299717247878, "percentage": 66.92, "elapsed_time": "0:29:37", "remaining_time": "0:14:38", "throughput": 3038.65, "total_tokens": 5400336} +{"current_steps": 535, "total_steps": 792, "loss": 2.0324, "learning_rate": 1.1903731024563966e-05, "epoch": 4.033930254476909, "percentage": 67.55, "elapsed_time": "0:29:53", "remaining_time": "0:14:21", "throughput": 3039.56, "total_tokens": 5451776} +{"current_steps": 540, "total_steps": 792, "loss": 2.0762, "learning_rate": 1.148397956361007e-05, "epoch": 4.071630537229029, "percentage": 68.18, "elapsed_time": "0:30:11", "remaining_time": "0:14:05", "throughput": 3040.36, "total_tokens": 5507520} +{"current_steps": 545, "total_steps": 792, "loss": 2.0949, "learning_rate": 1.106954458033026e-05, "epoch": 4.10933081998115, "percentage": 68.81, "elapsed_time": "0:30:29", "remaining_time": "0:13:49", "throughput": 3039.99, "total_tokens": 5562688} +{"current_steps": 550, "total_steps": 792, "loss": 2.1853, "learning_rate": 1.0660589091223855e-05, "epoch": 4.147031102733271, "percentage": 69.44, "elapsed_time": "0:30:45", "remaining_time": "0:13:31", "throughput": 3040.69, "total_tokens": 5610800} +{"current_steps": 555, "total_steps": 792, "loss": 2.1706, "learning_rate": 1.025727395745095e-05, "epoch": 4.184731385485391, "percentage": 70.08, "elapsed_time": "0:31:00", "remaining_time": "0:13:14", "throughput": 3041.11, "total_tokens": 5657616} +{"current_steps": 560, "total_steps": 792, "loss": 2.1199, "learning_rate": 9.859757821558337e-06, "epoch": 4.222431668237512, "percentage": 70.71, "elapsed_time": "0:31:17", "remaining_time": "0:12:57", "throughput": 3041.42, "total_tokens": 5710960} +{"current_steps": 565, "total_steps": 792, "loss": 2.0353, "learning_rate": 9.468197045077976e-06, "epoch": 4.260131950989632, "percentage": 71.34, "elapsed_time": "0:31:32", "remaining_time": "0:12:40", "throughput": 3041.1, "total_tokens": 5756608} +{"current_steps": 570, "total_steps": 792, "loss": 2.1101, "learning_rate": 9.082745647022797e-06, "epoch": 4.297832233741753, "percentage": 71.97, "elapsed_time": "0:31:51", "remaining_time": "0:12:24", "throughput": 3041.4, "total_tokens": 5812304} +{"current_steps": 575, "total_steps": 792, "loss": 2.1076, "learning_rate": 8.703555243303835e-06, "epoch": 4.335532516493874, "percentage": 72.6, "elapsed_time": "0:32:06", "remaining_time": "0:12:07", "throughput": 3041.66, "total_tokens": 5859808} +{"current_steps": 580, "total_steps": 792, "loss": 2.0305, "learning_rate": 8.330774987092712e-06, "epoch": 4.3732327992459945, "percentage": 73.23, "elapsed_time": "0:32:22", "remaining_time": "0:11:50", "throughput": 3041.75, "total_tokens": 5908784} +{"current_steps": 585, "total_steps": 792, "loss": 2.1132, "learning_rate": 7.96455151015272e-06, "epoch": 4.410933081998115, "percentage": 73.86, "elapsed_time": "0:32:39", "remaining_time": "0:11:33", "throughput": 3041.48, "total_tokens": 5958672} +{"current_steps": 590, "total_steps": 792, "loss": 2.1527, "learning_rate": 7.605028865161809e-06, "epoch": 4.448633364750235, "percentage": 74.49, "elapsed_time": "0:32:56", "remaining_time": "0:11:16", "throughput": 3041.34, "total_tokens": 6010720} +{"current_steps": 595, "total_steps": 792, "loss": 2.0948, "learning_rate": 7.25234846904993e-06, "epoch": 4.486333647502356, "percentage": 75.13, "elapsed_time": "0:33:12", "remaining_time": "0:10:59", "throughput": 3041.88, "total_tokens": 6061440} +{"current_steps": 600, "total_steps": 792, "loss": 2.1222, "learning_rate": 6.906649047373246e-06, "epoch": 4.524033930254477, "percentage": 75.76, "elapsed_time": "0:33:29", "remaining_time": "0:10:43", "throughput": 3042.65, "total_tokens": 6115216} +{"current_steps": 605, "total_steps": 792, "loss": 2.0719, "learning_rate": 6.568066579746901e-06, "epoch": 4.561734213006598, "percentage": 76.39, "elapsed_time": "0:33:47", "remaining_time": "0:10:26", "throughput": 3039.27, "total_tokens": 6160944} +{"current_steps": 610, "total_steps": 792, "loss": 2.1399, "learning_rate": 6.2367342463579475e-06, "epoch": 4.599434495758718, "percentage": 77.02, "elapsed_time": "0:34:03", "remaining_time": "0:10:09", "throughput": 3037.86, "total_tokens": 6209008} +{"current_steps": 615, "total_steps": 792, "loss": 2.0671, "learning_rate": 5.912782375579412e-06, "epoch": 4.6371347785108386, "percentage": 77.65, "elapsed_time": "0:34:20", "remaining_time": "0:09:53", "throughput": 3037.12, "total_tokens": 6258176} +{"current_steps": 620, "total_steps": 792, "loss": 2.152, "learning_rate": 5.596338392706077e-06, "epoch": 4.674835061262959, "percentage": 78.28, "elapsed_time": "0:34:36", "remaining_time": "0:09:36", "throughput": 3037.74, "total_tokens": 6308496} +{"current_steps": 625, "total_steps": 792, "loss": 2.0834, "learning_rate": 5.2875267698322325e-06, "epoch": 4.71253534401508, "percentage": 78.91, "elapsed_time": "0:34:52", "remaining_time": "0:09:19", "throughput": 3038.39, "total_tokens": 6358896} +{"current_steps": 630, "total_steps": 792, "loss": 2.1373, "learning_rate": 4.986468976890993e-06, "epoch": 4.750235626767201, "percentage": 79.55, "elapsed_time": "0:35:09", "remaining_time": "0:09:02", "throughput": 3039.38, "total_tokens": 6410896} +{"current_steps": 635, "total_steps": 792, "loss": 2.1276, "learning_rate": 4.693283433874565e-06, "epoch": 4.787935909519321, "percentage": 80.18, "elapsed_time": "0:35:24", "remaining_time": "0:08:45", "throughput": 3040.16, "total_tokens": 6459120} +{"current_steps": 640, "total_steps": 792, "loss": 2.1481, "learning_rate": 4.408085464254183e-06, "epoch": 4.825636192271442, "percentage": 80.81, "elapsed_time": "0:35:40", "remaining_time": "0:08:28", "throughput": 3039.88, "total_tokens": 6506048} +{"current_steps": 645, "total_steps": 792, "loss": 2.0872, "learning_rate": 4.130987249617993e-06, "epoch": 4.863336475023563, "percentage": 81.44, "elapsed_time": "0:35:57", "remaining_time": "0:08:11", "throughput": 3040.67, "total_tokens": 6560448} +{"current_steps": 650, "total_steps": 792, "loss": 2.1141, "learning_rate": 3.8620977855448935e-06, "epoch": 4.9010367577756835, "percentage": 82.07, "elapsed_time": "0:36:15", "remaining_time": "0:07:55", "throughput": 3041.25, "total_tokens": 6616704} +{"current_steps": 655, "total_steps": 792, "loss": 2.0778, "learning_rate": 3.601522838731461e-06, "epoch": 4.938737040527804, "percentage": 82.7, "elapsed_time": "0:36:33", "remaining_time": "0:07:38", "throughput": 3041.38, "total_tokens": 6670192} +{"current_steps": 660, "total_steps": 792, "loss": 2.1827, "learning_rate": 3.3493649053890326e-06, "epoch": 4.976437323279924, "percentage": 83.33, "elapsed_time": "0:36:50", "remaining_time": "0:07:22", "throughput": 3040.71, "total_tokens": 6721600} +{"current_steps": 665, "total_steps": 792, "loss": 2.0695, "learning_rate": 3.1057231709272077e-06, "epoch": 5.014137606032045, "percentage": 83.96, "elapsed_time": "0:37:08", "remaining_time": "0:07:05", "throughput": 3039.71, "total_tokens": 6773936} +{"current_steps": 670, "total_steps": 792, "loss": 2.0689, "learning_rate": 2.8706934709395892e-06, "epoch": 5.051837888784166, "percentage": 84.6, "elapsed_time": "0:37:25", "remaining_time": "0:06:48", "throughput": 3039.31, "total_tokens": 6823488} +{"current_steps": 675, "total_steps": 792, "loss": 1.9623, "learning_rate": 2.6443682535072177e-06, "epoch": 5.089538171536287, "percentage": 85.23, "elapsed_time": "0:37:40", "remaining_time": "0:06:31", "throughput": 3039.45, "total_tokens": 6870576} +{"current_steps": 680, "total_steps": 792, "loss": 2.0186, "learning_rate": 2.4268365428344736e-06, "epoch": 5.127238454288407, "percentage": 85.86, "elapsed_time": "0:37:58", "remaining_time": "0:06:15", "throughput": 3039.03, "total_tokens": 6923488} +{"current_steps": 685, "total_steps": 792, "loss": 1.9324, "learning_rate": 2.21818390423168e-06, "epoch": 5.1649387370405275, "percentage": 86.49, "elapsed_time": "0:38:13", "remaining_time": "0:05:58", "throughput": 3038.24, "total_tokens": 6968384} +{"current_steps": 690, "total_steps": 792, "loss": 1.9769, "learning_rate": 2.0184924104583613e-06, "epoch": 5.202639019792649, "percentage": 87.12, "elapsed_time": "0:38:28", "remaining_time": "0:05:41", "throughput": 3037.41, "total_tokens": 7012784} +{"current_steps": 695, "total_steps": 792, "loss": 1.9815, "learning_rate": 1.8278406094401623e-06, "epoch": 5.240339302544769, "percentage": 87.75, "elapsed_time": "0:38:44", "remaining_time": "0:05:24", "throughput": 3037.0, "total_tokens": 7058992} +{"current_steps": 700, "total_steps": 792, "loss": 2.0789, "learning_rate": 1.6463034933723337e-06, "epoch": 5.27803958529689, "percentage": 88.38, "elapsed_time": "0:39:00", "remaining_time": "0:05:07", "throughput": 3036.91, "total_tokens": 7107360} +{"current_steps": 705, "total_steps": 792, "loss": 2.0299, "learning_rate": 1.4739524692218314e-06, "epoch": 5.31573986804901, "percentage": 89.02, "elapsed_time": "0:39:17", "remaining_time": "0:04:50", "throughput": 3034.31, "total_tokens": 7154560} +{"current_steps": 710, "total_steps": 792, "loss": 2.0888, "learning_rate": 1.3108553306396265e-06, "epoch": 5.353440150801131, "percentage": 89.65, "elapsed_time": "0:39:34", "remaining_time": "0:04:34", "throughput": 3035.0, "total_tokens": 7207760} +{"current_steps": 715, "total_steps": 792, "loss": 2.054, "learning_rate": 1.1570762312943295e-06, "epoch": 5.391140433553252, "percentage": 90.28, "elapsed_time": "0:39:52", "remaining_time": "0:04:17", "throughput": 3035.4, "total_tokens": 7261248} +{"current_steps": 720, "total_steps": 792, "loss": 1.9551, "learning_rate": 1.0126756596375686e-06, "epoch": 5.4288407163053725, "percentage": 90.91, "elapsed_time": "0:40:08", "remaining_time": "0:04:00", "throughput": 3035.28, "total_tokens": 7309552} +{"current_steps": 725, "total_steps": 792, "loss": 2.0533, "learning_rate": 8.777104151110826e-07, "epoch": 5.466540999057493, "percentage": 91.54, "elapsed_time": "0:40:27", "remaining_time": "0:03:44", "throughput": 3036.06, "total_tokens": 7368576} +{"current_steps": 730, "total_steps": 792, "loss": 2.0151, "learning_rate": 7.522335858048707e-07, "epoch": 5.504241281809613, "percentage": 92.17, "elapsed_time": "0:40:45", "remaining_time": "0:03:27", "throughput": 3036.25, "total_tokens": 7425056} +{"current_steps": 735, "total_steps": 792, "loss": 2.0525, "learning_rate": 6.362945275751736e-07, "epoch": 5.541941564561734, "percentage": 92.8, "elapsed_time": "0:41:00", "remaining_time": "0:03:10", "throughput": 3037.05, "total_tokens": 7473488} +{"current_steps": 740, "total_steps": 792, "loss": 2.0759, "learning_rate": 5.299388446305343e-07, "epoch": 5.579641847313855, "percentage": 93.43, "elapsed_time": "0:41:16", "remaining_time": "0:02:54", "throughput": 3037.57, "total_tokens": 7522656} +{"current_steps": 745, "total_steps": 792, "loss": 2.0374, "learning_rate": 4.3320837159353813e-07, "epoch": 5.617342130065976, "percentage": 94.07, "elapsed_time": "0:41:34", "remaining_time": "0:02:37", "throughput": 3037.5, "total_tokens": 7577104} +{"current_steps": 750, "total_steps": 792, "loss": 2.0137, "learning_rate": 3.4614115704533767e-07, "epoch": 5.655042412818096, "percentage": 94.7, "elapsed_time": "0:41:52", "remaining_time": "0:02:20", "throughput": 3037.99, "total_tokens": 7633024} +{"current_steps": 755, "total_steps": 792, "loss": 2.1025, "learning_rate": 2.687714485593462e-07, "epoch": 5.6927426955702165, "percentage": 95.33, "elapsed_time": "0:42:08", "remaining_time": "0:02:03", "throughput": 3038.45, "total_tokens": 7682736} +{"current_steps": 760, "total_steps": 792, "loss": 2.1403, "learning_rate": 2.011296792301165e-07, "epoch": 5.730442978322337, "percentage": 95.96, "elapsed_time": "0:42:26", "remaining_time": "0:01:47", "throughput": 3038.56, "total_tokens": 7737888} +{"current_steps": 765, "total_steps": 792, "loss": 2.0965, "learning_rate": 1.4324245570256633e-07, "epoch": 5.768143261074458, "percentage": 96.59, "elapsed_time": "0:42:42", "remaining_time": "0:01:30", "throughput": 3039.29, "total_tokens": 7788384} +{"current_steps": 770, "total_steps": 792, "loss": 2.0826, "learning_rate": 9.513254770636137e-08, "epoch": 5.805843543826579, "percentage": 97.22, "elapsed_time": "0:42:58", "remaining_time": "0:01:13", "throughput": 3038.83, "total_tokens": 7835600} +{"current_steps": 775, "total_steps": 792, "loss": 2.1508, "learning_rate": 5.681887909952388e-08, "epoch": 5.843543826578699, "percentage": 97.85, "elapsed_time": "0:43:15", "remaining_time": "0:00:56", "throughput": 3039.1, "total_tokens": 7888336} +{"current_steps": 780, "total_steps": 792, "loss": 2.0338, "learning_rate": 2.831652042480093e-08, "epoch": 5.88124410933082, "percentage": 98.48, "elapsed_time": "0:43:31", "remaining_time": "0:00:40", "throughput": 3039.64, "total_tokens": 7938960} +{"current_steps": 785, "total_steps": 792, "loss": 2.0466, "learning_rate": 9.636682981720158e-09, "epoch": 5.918944392082941, "percentage": 99.12, "elapsed_time": "0:43:48", "remaining_time": "0:00:23", "throughput": 3040.43, "total_tokens": 7991296} +{"current_steps": 790, "total_steps": 792, "loss": 2.0436, "learning_rate": 7.867144166728846e-10, "epoch": 5.956644674835061, "percentage": 99.75, "elapsed_time": "0:44:05", "remaining_time": "0:00:06", "throughput": 3040.67, "total_tokens": 8044336} +{"current_steps": 792, "total_steps": 792, "epoch": 5.971724787935909, "percentage": 100.0, "elapsed_time": "0:44:12", "remaining_time": "0:00:00", "throughput": 3039.97, "total_tokens": 8063376} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..de2f78a9924732deb21160eace4c07083e765119 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,1307 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.971724787935909, + "eval_steps": 500, + "global_step": 792, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03770028275212064, + "grad_norm": 3.2918148040771484, + "learning_rate": 4.9995083170283816e-05, + "loss": 4.3742, + "num_input_tokens_seen": 50544, + "step": 5 + }, + { + "epoch": 0.07540056550424128, + "grad_norm": 3.0331127643585205, + "learning_rate": 4.998033461515242e-05, + "loss": 3.8434, + "num_input_tokens_seen": 104080, + "step": 10 + }, + { + "epoch": 0.11310084825636192, + "grad_norm": 3.322950601577759, + "learning_rate": 4.9955760135896534e-05, + "loss": 3.6027, + "num_input_tokens_seen": 155776, + "step": 15 + }, + { + "epoch": 0.15080113100848255, + "grad_norm": 2.7705531120300293, + "learning_rate": 4.992136939879856e-05, + "loss": 3.4102, + "num_input_tokens_seen": 203184, + "step": 20 + }, + { + "epoch": 0.1885014137606032, + "grad_norm": 3.4572091102600098, + "learning_rate": 4.9877175931330346e-05, + "loss": 3.153, + "num_input_tokens_seen": 255744, + "step": 25 + }, + { + "epoch": 0.22620169651272384, + "grad_norm": 2.0412893295288086, + "learning_rate": 4.982319711683221e-05, + "loss": 3.0949, + "num_input_tokens_seen": 307808, + "step": 30 + }, + { + "epoch": 0.2639019792648445, + "grad_norm": 2.0765998363494873, + "learning_rate": 4.975945418767529e-05, + "loss": 3.0392, + "num_input_tokens_seen": 358192, + "step": 35 + }, + { + "epoch": 0.3016022620169651, + "grad_norm": 2.2433698177337646, + "learning_rate": 4.968597221690986e-05, + "loss": 3.0685, + "num_input_tokens_seen": 408832, + "step": 40 + }, + { + "epoch": 0.3393025447690858, + "grad_norm": 2.7639973163604736, + "learning_rate": 4.96027801084029e-05, + "loss": 3.0385, + "num_input_tokens_seen": 458368, + "step": 45 + }, + { + "epoch": 0.3770028275212064, + "grad_norm": 2.0680019855499268, + "learning_rate": 4.950991058546893e-05, + "loss": 3.1079, + "num_input_tokens_seen": 512160, + "step": 50 + }, + { + "epoch": 0.41470311027332707, + "grad_norm": 2.181554079055786, + "learning_rate": 4.940740017799833e-05, + "loss": 2.9883, + "num_input_tokens_seen": 562928, + "step": 55 + }, + { + "epoch": 0.4524033930254477, + "grad_norm": 2.0724411010742188, + "learning_rate": 4.929528920808854e-05, + "loss": 2.969, + "num_input_tokens_seen": 613072, + "step": 60 + }, + { + "epoch": 0.49010367577756836, + "grad_norm": 2.5529158115386963, + "learning_rate": 4.917362177418342e-05, + "loss": 3.0019, + "num_input_tokens_seen": 664640, + "step": 65 + }, + { + "epoch": 0.527803958529689, + "grad_norm": 2.3983142375946045, + "learning_rate": 4.904244573372733e-05, + "loss": 2.9984, + "num_input_tokens_seen": 716800, + "step": 70 + }, + { + "epoch": 0.5655042412818096, + "grad_norm": 2.206209182739258, + "learning_rate": 4.8901812684340564e-05, + "loss": 2.9848, + "num_input_tokens_seen": 766528, + "step": 75 + }, + { + "epoch": 0.6032045240339302, + "grad_norm": 2.22698974609375, + "learning_rate": 4.8751777943523634e-05, + "loss": 2.9169, + "num_input_tokens_seen": 817376, + "step": 80 + }, + { + "epoch": 0.6409048067860509, + "grad_norm": 2.4569709300994873, + "learning_rate": 4.8592400526898314e-05, + "loss": 2.8844, + "num_input_tokens_seen": 864688, + "step": 85 + }, + { + "epoch": 0.6786050895381716, + "grad_norm": 2.195237636566162, + "learning_rate": 4.842374312499405e-05, + "loss": 2.9422, + "num_input_tokens_seen": 913216, + "step": 90 + }, + { + "epoch": 0.7163053722902922, + "grad_norm": 2.54585599899292, + "learning_rate": 4.824587207858888e-05, + "loss": 2.8431, + "num_input_tokens_seen": 964144, + "step": 95 + }, + { + "epoch": 0.7540056550424128, + "grad_norm": 2.3841867446899414, + "learning_rate": 4.805885735261454e-05, + "loss": 2.8377, + "num_input_tokens_seen": 1018768, + "step": 100 + }, + { + "epoch": 0.7917059377945335, + "grad_norm": 2.359386920928955, + "learning_rate": 4.786277250863599e-05, + "loss": 2.7843, + "num_input_tokens_seen": 1070592, + "step": 105 + }, + { + "epoch": 0.8294062205466541, + "grad_norm": 2.0952038764953613, + "learning_rate": 4.765769467591625e-05, + "loss": 2.8218, + "num_input_tokens_seen": 1125392, + "step": 110 + }, + { + "epoch": 0.8671065032987747, + "grad_norm": 2.222754955291748, + "learning_rate": 4.744370452107789e-05, + "loss": 2.877, + "num_input_tokens_seen": 1176256, + "step": 115 + }, + { + "epoch": 0.9048067860508954, + "grad_norm": 2.3233537673950195, + "learning_rate": 4.722088621637309e-05, + "loss": 2.9028, + "num_input_tokens_seen": 1225376, + "step": 120 + }, + { + "epoch": 0.942507068803016, + "grad_norm": 2.400575876235962, + "learning_rate": 4.698932740657479e-05, + "loss": 2.762, + "num_input_tokens_seen": 1277024, + "step": 125 + }, + { + "epoch": 0.9802073515551367, + "grad_norm": 2.592116117477417, + "learning_rate": 4.6749119174501975e-05, + "loss": 2.9164, + "num_input_tokens_seen": 1322912, + "step": 130 + }, + { + "epoch": 1.0179076343072573, + "grad_norm": 2.563326120376587, + "learning_rate": 4.6500356005192514e-05, + "loss": 2.736, + "num_input_tokens_seen": 1368624, + "step": 135 + }, + { + "epoch": 1.055607917059378, + "grad_norm": 2.3927371501922607, + "learning_rate": 4.6243135748737864e-05, + "loss": 2.7414, + "num_input_tokens_seen": 1417664, + "step": 140 + }, + { + "epoch": 1.0933081998114986, + "grad_norm": 2.597975492477417, + "learning_rate": 4.597755958179406e-05, + "loss": 2.6961, + "num_input_tokens_seen": 1469120, + "step": 145 + }, + { + "epoch": 1.1310084825636193, + "grad_norm": 2.932581663131714, + "learning_rate": 4.570373196778427e-05, + "loss": 2.6134, + "num_input_tokens_seen": 1521632, + "step": 150 + }, + { + "epoch": 1.1687087653157398, + "grad_norm": 3.28389310836792, + "learning_rate": 4.5421760615808474e-05, + "loss": 2.7548, + "num_input_tokens_seen": 1565296, + "step": 155 + }, + { + "epoch": 1.2064090480678604, + "grad_norm": 2.927279233932495, + "learning_rate": 4.513175643827647e-05, + "loss": 2.702, + "num_input_tokens_seen": 1617088, + "step": 160 + }, + { + "epoch": 1.244109330819981, + "grad_norm": 2.840573310852051, + "learning_rate": 4.4833833507280884e-05, + "loss": 2.6893, + "num_input_tokens_seen": 1663584, + "step": 165 + }, + { + "epoch": 1.2818096135721018, + "grad_norm": 2.6322081089019775, + "learning_rate": 4.4528109009727336e-05, + "loss": 2.5671, + "num_input_tokens_seen": 1713744, + "step": 170 + }, + { + "epoch": 1.3195098963242224, + "grad_norm": 3.1280879974365234, + "learning_rate": 4.42147032012394e-05, + "loss": 2.7682, + "num_input_tokens_seen": 1762768, + "step": 175 + }, + { + "epoch": 1.3572101790763431, + "grad_norm": 3.1128265857696533, + "learning_rate": 4.389373935885646e-05, + "loss": 2.7062, + "num_input_tokens_seen": 1815808, + "step": 180 + }, + { + "epoch": 1.3949104618284638, + "grad_norm": 2.952150344848633, + "learning_rate": 4.356534373254316e-05, + "loss": 2.7092, + "num_input_tokens_seen": 1871040, + "step": 185 + }, + { + "epoch": 1.4326107445805842, + "grad_norm": 3.0146102905273438, + "learning_rate": 4.322964549552943e-05, + "loss": 2.6518, + "num_input_tokens_seen": 1924048, + "step": 190 + }, + { + "epoch": 1.4703110273327051, + "grad_norm": 2.999300956726074, + "learning_rate": 4.288677669350066e-05, + "loss": 2.6592, + "num_input_tokens_seen": 1972720, + "step": 195 + }, + { + "epoch": 1.5080113100848256, + "grad_norm": 3.575253963470459, + "learning_rate": 4.2536872192658036e-05, + "loss": 2.716, + "num_input_tokens_seen": 2022112, + "step": 200 + }, + { + "epoch": 1.5457115928369463, + "grad_norm": 3.1339428424835205, + "learning_rate": 4.218006962666934e-05, + "loss": 2.6746, + "num_input_tokens_seen": 2072000, + "step": 205 + }, + { + "epoch": 1.583411875589067, + "grad_norm": 3.130823850631714, + "learning_rate": 4.181650934253132e-05, + "loss": 2.6306, + "num_input_tokens_seen": 2125632, + "step": 210 + }, + { + "epoch": 1.6211121583411876, + "grad_norm": 3.1828997135162354, + "learning_rate": 4.144633434536467e-05, + "loss": 2.5678, + "num_input_tokens_seen": 2174464, + "step": 215 + }, + { + "epoch": 1.6588124410933083, + "grad_norm": 3.3879778385162354, + "learning_rate": 4.1069690242163484e-05, + "loss": 2.7334, + "num_input_tokens_seen": 2223408, + "step": 220 + }, + { + "epoch": 1.6965127238454287, + "grad_norm": 3.5921592712402344, + "learning_rate": 4.06867251845213e-05, + "loss": 2.6658, + "num_input_tokens_seen": 2281296, + "step": 225 + }, + { + "epoch": 1.7342130065975496, + "grad_norm": 3.4126648902893066, + "learning_rate": 4.0297589810356165e-05, + "loss": 2.6567, + "num_input_tokens_seen": 2334176, + "step": 230 + }, + { + "epoch": 1.77191328934967, + "grad_norm": 3.504786252975464, + "learning_rate": 3.9902437184657784e-05, + "loss": 2.6131, + "num_input_tokens_seen": 2389296, + "step": 235 + }, + { + "epoch": 1.8096135721017907, + "grad_norm": 3.459768056869507, + "learning_rate": 3.9501422739279956e-05, + "loss": 2.662, + "num_input_tokens_seen": 2442800, + "step": 240 + }, + { + "epoch": 1.8473138548539114, + "grad_norm": 2.944490909576416, + "learning_rate": 3.909470421180201e-05, + "loss": 2.6339, + "num_input_tokens_seen": 2494912, + "step": 245 + }, + { + "epoch": 1.885014137606032, + "grad_norm": 3.4897429943084717, + "learning_rate": 3.8682441583483314e-05, + "loss": 2.6338, + "num_input_tokens_seen": 2544624, + "step": 250 + }, + { + "epoch": 1.9227144203581528, + "grad_norm": 3.15039324760437, + "learning_rate": 3.8264797016335205e-05, + "loss": 2.6844, + "num_input_tokens_seen": 2597792, + "step": 255 + }, + { + "epoch": 1.9604147031102732, + "grad_norm": 3.5575578212738037, + "learning_rate": 3.7841934789335164e-05, + "loss": 2.5939, + "num_input_tokens_seen": 2646544, + "step": 260 + }, + { + "epoch": 1.998114985862394, + "grad_norm": 3.97521710395813, + "learning_rate": 3.741402123380828e-05, + "loss": 2.7393, + "num_input_tokens_seen": 2700224, + "step": 265 + }, + { + "epoch": 2.0358152686145146, + "grad_norm": 3.1736276149749756, + "learning_rate": 3.6981224668001424e-05, + "loss": 2.4495, + "num_input_tokens_seen": 2749104, + "step": 270 + }, + { + "epoch": 2.0735155513666355, + "grad_norm": 3.633033275604248, + "learning_rate": 3.654371533087586e-05, + "loss": 2.3823, + "num_input_tokens_seen": 2802992, + "step": 275 + }, + { + "epoch": 2.111215834118756, + "grad_norm": 3.4200243949890137, + "learning_rate": 3.610166531514436e-05, + "loss": 2.474, + "num_input_tokens_seen": 2844512, + "step": 280 + }, + { + "epoch": 2.1489161168708764, + "grad_norm": 4.055693626403809, + "learning_rate": 3.565524849957921e-05, + "loss": 2.4776, + "num_input_tokens_seen": 2894400, + "step": 285 + }, + { + "epoch": 2.1866163996229973, + "grad_norm": 4.254647254943848, + "learning_rate": 3.520464048061758e-05, + "loss": 2.4709, + "num_input_tokens_seen": 2944688, + "step": 290 + }, + { + "epoch": 2.2243166823751177, + "grad_norm": 4.337226390838623, + "learning_rate": 3.47500185032913e-05, + "loss": 2.5009, + "num_input_tokens_seen": 2994672, + "step": 295 + }, + { + "epoch": 2.2620169651272386, + "grad_norm": 4.424121379852295, + "learning_rate": 3.4291561391508185e-05, + "loss": 2.4524, + "num_input_tokens_seen": 3045344, + "step": 300 + }, + { + "epoch": 2.299717247879359, + "grad_norm": 4.178481101989746, + "learning_rate": 3.3829449477712324e-05, + "loss": 2.4083, + "num_input_tokens_seen": 3100688, + "step": 305 + }, + { + "epoch": 2.3374175306314795, + "grad_norm": 4.315364837646484, + "learning_rate": 3.336386453195088e-05, + "loss": 2.4713, + "num_input_tokens_seen": 3154896, + "step": 310 + }, + { + "epoch": 2.3751178133836004, + "grad_norm": 4.445486545562744, + "learning_rate": 3.2894989690375626e-05, + "loss": 2.3986, + "num_input_tokens_seen": 3209120, + "step": 315 + }, + { + "epoch": 2.412818096135721, + "grad_norm": 4.80281400680542, + "learning_rate": 3.2423009383206876e-05, + "loss": 2.4258, + "num_input_tokens_seen": 3257984, + "step": 320 + }, + { + "epoch": 2.4505183788878417, + "grad_norm": 4.705103397369385, + "learning_rate": 3.194810926218861e-05, + "loss": 2.4126, + "num_input_tokens_seen": 3309424, + "step": 325 + }, + { + "epoch": 2.488218661639962, + "grad_norm": 4.281659126281738, + "learning_rate": 3.147047612756302e-05, + "loss": 2.3639, + "num_input_tokens_seen": 3359152, + "step": 330 + }, + { + "epoch": 2.525918944392083, + "grad_norm": 4.493980407714844, + "learning_rate": 3.099029785459328e-05, + "loss": 2.5176, + "num_input_tokens_seen": 3406784, + "step": 335 + }, + { + "epoch": 2.5636192271442035, + "grad_norm": 4.339905261993408, + "learning_rate": 3.0507763319663517e-05, + "loss": 2.4779, + "num_input_tokens_seen": 3459328, + "step": 340 + }, + { + "epoch": 2.6013195098963244, + "grad_norm": 4.67468786239624, + "learning_rate": 3.002306232598497e-05, + "loss": 2.3833, + "num_input_tokens_seen": 3511056, + "step": 345 + }, + { + "epoch": 2.639019792648445, + "grad_norm": 4.905932426452637, + "learning_rate": 2.9536385528937567e-05, + "loss": 2.3794, + "num_input_tokens_seen": 3562352, + "step": 350 + }, + { + "epoch": 2.6767200754005653, + "grad_norm": 4.684591770172119, + "learning_rate": 2.9047924361076345e-05, + "loss": 2.4883, + "num_input_tokens_seen": 3615664, + "step": 355 + }, + { + "epoch": 2.7144203581526862, + "grad_norm": 4.377009868621826, + "learning_rate": 2.8557870956832132e-05, + "loss": 2.3423, + "num_input_tokens_seen": 3661424, + "step": 360 + }, + { + "epoch": 2.7521206409048067, + "grad_norm": 5.146539688110352, + "learning_rate": 2.8066418076936167e-05, + "loss": 2.4092, + "num_input_tokens_seen": 3710592, + "step": 365 + }, + { + "epoch": 2.7898209236569276, + "grad_norm": 5.5937910079956055, + "learning_rate": 2.7573759032598366e-05, + "loss": 2.4818, + "num_input_tokens_seen": 3765728, + "step": 370 + }, + { + "epoch": 2.827521206409048, + "grad_norm": 4.4958696365356445, + "learning_rate": 2.7080087609469062e-05, + "loss": 2.4587, + "num_input_tokens_seen": 3815360, + "step": 375 + }, + { + "epoch": 2.8652214891611685, + "grad_norm": 4.503344535827637, + "learning_rate": 2.6585597991414114e-05, + "loss": 2.3462, + "num_input_tokens_seen": 3868096, + "step": 380 + }, + { + "epoch": 2.9029217719132894, + "grad_norm": 4.773792743682861, + "learning_rate": 2.6090484684133404e-05, + "loss": 2.3713, + "num_input_tokens_seen": 3913696, + "step": 385 + }, + { + "epoch": 2.9406220546654103, + "grad_norm": 4.8644537925720215, + "learning_rate": 2.5594942438652688e-05, + "loss": 2.4618, + "num_input_tokens_seen": 3971840, + "step": 390 + }, + { + "epoch": 2.9783223374175307, + "grad_norm": 5.539215087890625, + "learning_rate": 2.509916617471903e-05, + "loss": 2.5454, + "num_input_tokens_seen": 4025040, + "step": 395 + }, + { + "epoch": 3.016022620169651, + "grad_norm": 4.475778579711914, + "learning_rate": 2.46033509041298e-05, + "loss": 2.3007, + "num_input_tokens_seen": 4075488, + "step": 400 + }, + { + "epoch": 3.053722902921772, + "grad_norm": 4.82028341293335, + "learning_rate": 2.410769165402549e-05, + "loss": 2.274, + "num_input_tokens_seen": 4130496, + "step": 405 + }, + { + "epoch": 3.0914231856738925, + "grad_norm": 5.513036251068115, + "learning_rate": 2.3612383390176503e-05, + "loss": 2.2457, + "num_input_tokens_seen": 4181504, + "step": 410 + }, + { + "epoch": 3.1291234684260134, + "grad_norm": 5.219841957092285, + "learning_rate": 2.3117620940294048e-05, + "loss": 2.2595, + "num_input_tokens_seen": 4236816, + "step": 415 + }, + { + "epoch": 3.166823751178134, + "grad_norm": 5.527017593383789, + "learning_rate": 2.2623598917395438e-05, + "loss": 2.2326, + "num_input_tokens_seen": 4289488, + "step": 420 + }, + { + "epoch": 3.2045240339302543, + "grad_norm": 5.297417163848877, + "learning_rate": 2.213051164325366e-05, + "loss": 2.2595, + "num_input_tokens_seen": 4334704, + "step": 425 + }, + { + "epoch": 3.242224316682375, + "grad_norm": 5.43293571472168, + "learning_rate": 2.1638553071961708e-05, + "loss": 2.2084, + "num_input_tokens_seen": 4377360, + "step": 430 + }, + { + "epoch": 3.2799245994344957, + "grad_norm": 5.788747310638428, + "learning_rate": 2.1147916713641367e-05, + "loss": 2.21, + "num_input_tokens_seen": 4428544, + "step": 435 + }, + { + "epoch": 3.3176248821866166, + "grad_norm": 6.185176372528076, + "learning_rate": 2.0658795558326743e-05, + "loss": 2.2449, + "num_input_tokens_seen": 4477664, + "step": 440 + }, + { + "epoch": 3.355325164938737, + "grad_norm": 5.564029693603516, + "learning_rate": 2.017138200005236e-05, + "loss": 2.2457, + "num_input_tokens_seen": 4533792, + "step": 445 + }, + { + "epoch": 3.3930254476908575, + "grad_norm": 5.952132225036621, + "learning_rate": 1.9685867761175584e-05, + "loss": 2.3035, + "num_input_tokens_seen": 4584800, + "step": 450 + }, + { + "epoch": 3.4307257304429783, + "grad_norm": 5.484558582305908, + "learning_rate": 1.9202443816963425e-05, + "loss": 2.3204, + "num_input_tokens_seen": 4634976, + "step": 455 + }, + { + "epoch": 3.468426013195099, + "grad_norm": 6.121850490570068, + "learning_rate": 1.872130032047302e-05, + "loss": 2.2055, + "num_input_tokens_seen": 4691344, + "step": 460 + }, + { + "epoch": 3.5061262959472197, + "grad_norm": 6.345475673675537, + "learning_rate": 1.824262652775568e-05, + "loss": 2.3327, + "num_input_tokens_seen": 4745536, + "step": 465 + }, + { + "epoch": 3.54382657869934, + "grad_norm": 6.4370574951171875, + "learning_rate": 1.7766610723413684e-05, + "loss": 2.2214, + "num_input_tokens_seen": 4798128, + "step": 470 + }, + { + "epoch": 3.581526861451461, + "grad_norm": 6.708219528198242, + "learning_rate": 1.7293440146539196e-05, + "loss": 2.3097, + "num_input_tokens_seen": 4847632, + "step": 475 + }, + { + "epoch": 3.6192271442035815, + "grad_norm": 5.383622169494629, + "learning_rate": 1.682330091706446e-05, + "loss": 2.2966, + "num_input_tokens_seen": 4905648, + "step": 480 + }, + { + "epoch": 3.6569274269557024, + "grad_norm": 6.210061550140381, + "learning_rate": 1.6356377962552238e-05, + "loss": 2.2006, + "num_input_tokens_seen": 4955600, + "step": 485 + }, + { + "epoch": 3.694627709707823, + "grad_norm": 6.637734889984131, + "learning_rate": 1.589285494545514e-05, + "loss": 2.2463, + "num_input_tokens_seen": 5007424, + "step": 490 + }, + { + "epoch": 3.7323279924599433, + "grad_norm": 5.893795967102051, + "learning_rate": 1.5432914190872757e-05, + "loss": 2.2045, + "num_input_tokens_seen": 5058848, + "step": 495 + }, + { + "epoch": 3.770028275212064, + "grad_norm": 5.891578674316406, + "learning_rate": 1.4976736614834664e-05, + "loss": 2.135, + "num_input_tokens_seen": 5109904, + "step": 500 + }, + { + "epoch": 3.8077285579641846, + "grad_norm": 6.888275623321533, + "learning_rate": 1.4524501653137787e-05, + "loss": 2.3023, + "num_input_tokens_seen": 5156496, + "step": 505 + }, + { + "epoch": 3.8454288407163055, + "grad_norm": 5.987553119659424, + "learning_rate": 1.4076387190766017e-05, + "loss": 2.2636, + "num_input_tokens_seen": 5207824, + "step": 510 + }, + { + "epoch": 3.883129123468426, + "grad_norm": 6.791808128356934, + "learning_rate": 1.363256949191972e-05, + "loss": 2.1942, + "num_input_tokens_seen": 5256608, + "step": 515 + }, + { + "epoch": 3.9208294062205464, + "grad_norm": 6.749192714691162, + "learning_rate": 1.3193223130682936e-05, + "loss": 2.2731, + "num_input_tokens_seen": 5305584, + "step": 520 + }, + { + "epoch": 3.9585296889726673, + "grad_norm": 5.9771647453308105, + "learning_rate": 1.2758520922355226e-05, + "loss": 2.1669, + "num_input_tokens_seen": 5349712, + "step": 525 + }, + { + "epoch": 3.9962299717247878, + "grad_norm": 6.35679817199707, + "learning_rate": 1.2328633855475429e-05, + "loss": 2.2146, + "num_input_tokens_seen": 5400336, + "step": 530 + }, + { + "epoch": 4.033930254476909, + "grad_norm": 6.124676704406738, + "learning_rate": 1.1903731024563966e-05, + "loss": 2.0324, + "num_input_tokens_seen": 5451776, + "step": 535 + }, + { + "epoch": 4.071630537229029, + "grad_norm": 6.622769355773926, + "learning_rate": 1.148397956361007e-05, + "loss": 2.0762, + "num_input_tokens_seen": 5507520, + "step": 540 + }, + { + "epoch": 4.10933081998115, + "grad_norm": 7.076972961425781, + "learning_rate": 1.106954458033026e-05, + "loss": 2.0949, + "num_input_tokens_seen": 5562688, + "step": 545 + }, + { + "epoch": 4.147031102733271, + "grad_norm": 6.695667266845703, + "learning_rate": 1.0660589091223855e-05, + "loss": 2.1853, + "num_input_tokens_seen": 5610800, + "step": 550 + }, + { + "epoch": 4.184731385485391, + "grad_norm": 7.716992378234863, + "learning_rate": 1.025727395745095e-05, + "loss": 2.1706, + "num_input_tokens_seen": 5657616, + "step": 555 + }, + { + "epoch": 4.222431668237512, + "grad_norm": 6.8223419189453125, + "learning_rate": 9.859757821558337e-06, + "loss": 2.1199, + "num_input_tokens_seen": 5710960, + "step": 560 + }, + { + "epoch": 4.260131950989632, + "grad_norm": 6.87358283996582, + "learning_rate": 9.468197045077976e-06, + "loss": 2.0353, + "num_input_tokens_seen": 5756608, + "step": 565 + }, + { + "epoch": 4.297832233741753, + "grad_norm": 7.660863399505615, + "learning_rate": 9.082745647022797e-06, + "loss": 2.1101, + "num_input_tokens_seen": 5812304, + "step": 570 + }, + { + "epoch": 4.335532516493874, + "grad_norm": 7.863148212432861, + "learning_rate": 8.703555243303835e-06, + "loss": 2.1076, + "num_input_tokens_seen": 5859808, + "step": 575 + }, + { + "epoch": 4.3732327992459945, + "grad_norm": 7.298150539398193, + "learning_rate": 8.330774987092712e-06, + "loss": 2.0305, + "num_input_tokens_seen": 5908784, + "step": 580 + }, + { + "epoch": 4.410933081998115, + "grad_norm": 6.835300922393799, + "learning_rate": 7.96455151015272e-06, + "loss": 2.1132, + "num_input_tokens_seen": 5958672, + "step": 585 + }, + { + "epoch": 4.448633364750235, + "grad_norm": 6.710065841674805, + "learning_rate": 7.605028865161809e-06, + "loss": 2.1527, + "num_input_tokens_seen": 6010720, + "step": 590 + }, + { + "epoch": 4.486333647502356, + "grad_norm": 6.827284812927246, + "learning_rate": 7.25234846904993e-06, + "loss": 2.0948, + "num_input_tokens_seen": 6061440, + "step": 595 + }, + { + "epoch": 4.524033930254477, + "grad_norm": 7.066997528076172, + "learning_rate": 6.906649047373246e-06, + "loss": 2.1222, + "num_input_tokens_seen": 6115216, + "step": 600 + }, + { + "epoch": 4.561734213006598, + "grad_norm": 7.966955184936523, + "learning_rate": 6.568066579746901e-06, + "loss": 2.0719, + "num_input_tokens_seen": 6160944, + "step": 605 + }, + { + "epoch": 4.599434495758718, + "grad_norm": 7.257175922393799, + "learning_rate": 6.2367342463579475e-06, + "loss": 2.1399, + "num_input_tokens_seen": 6209008, + "step": 610 + }, + { + "epoch": 4.6371347785108386, + "grad_norm": 7.445122241973877, + "learning_rate": 5.912782375579412e-06, + "loss": 2.0671, + "num_input_tokens_seen": 6258176, + "step": 615 + }, + { + "epoch": 4.674835061262959, + "grad_norm": 7.934208869934082, + "learning_rate": 5.596338392706077e-06, + "loss": 2.152, + "num_input_tokens_seen": 6308496, + "step": 620 + }, + { + "epoch": 4.71253534401508, + "grad_norm": 6.131651878356934, + "learning_rate": 5.2875267698322325e-06, + "loss": 2.0834, + "num_input_tokens_seen": 6358896, + "step": 625 + }, + { + "epoch": 4.750235626767201, + "grad_norm": 6.925292015075684, + "learning_rate": 4.986468976890993e-06, + "loss": 2.1373, + "num_input_tokens_seen": 6410896, + "step": 630 + }, + { + "epoch": 4.787935909519321, + "grad_norm": 6.970002174377441, + "learning_rate": 4.693283433874565e-06, + "loss": 2.1276, + "num_input_tokens_seen": 6459120, + "step": 635 + }, + { + "epoch": 4.825636192271442, + "grad_norm": 7.309933662414551, + "learning_rate": 4.408085464254183e-06, + "loss": 2.1481, + "num_input_tokens_seen": 6506048, + "step": 640 + }, + { + "epoch": 4.863336475023563, + "grad_norm": 6.540215492248535, + "learning_rate": 4.130987249617993e-06, + "loss": 2.0872, + "num_input_tokens_seen": 6560448, + "step": 645 + }, + { + "epoch": 4.9010367577756835, + "grad_norm": 6.6013360023498535, + "learning_rate": 3.8620977855448935e-06, + "loss": 2.1141, + "num_input_tokens_seen": 6616704, + "step": 650 + }, + { + "epoch": 4.938737040527804, + "grad_norm": 7.337521553039551, + "learning_rate": 3.601522838731461e-06, + "loss": 2.0778, + "num_input_tokens_seen": 6670192, + "step": 655 + }, + { + "epoch": 4.976437323279924, + "grad_norm": 7.133378505706787, + "learning_rate": 3.3493649053890326e-06, + "loss": 2.1827, + "num_input_tokens_seen": 6721600, + "step": 660 + }, + { + "epoch": 5.014137606032045, + "grad_norm": 7.371194839477539, + "learning_rate": 3.1057231709272077e-06, + "loss": 2.0695, + "num_input_tokens_seen": 6773936, + "step": 665 + }, + { + "epoch": 5.051837888784166, + "grad_norm": 7.494382858276367, + "learning_rate": 2.8706934709395892e-06, + "loss": 2.0689, + "num_input_tokens_seen": 6823488, + "step": 670 + }, + { + "epoch": 5.089538171536287, + "grad_norm": 7.376400947570801, + "learning_rate": 2.6443682535072177e-06, + "loss": 1.9623, + "num_input_tokens_seen": 6870576, + "step": 675 + }, + { + "epoch": 5.127238454288407, + "grad_norm": 7.1367316246032715, + "learning_rate": 2.4268365428344736e-06, + "loss": 2.0186, + "num_input_tokens_seen": 6923488, + "step": 680 + }, + { + "epoch": 5.1649387370405275, + "grad_norm": 9.0476655960083, + "learning_rate": 2.21818390423168e-06, + "loss": 1.9324, + "num_input_tokens_seen": 6968384, + "step": 685 + }, + { + "epoch": 5.202639019792649, + "grad_norm": 6.944507122039795, + "learning_rate": 2.0184924104583613e-06, + "loss": 1.9769, + "num_input_tokens_seen": 7012784, + "step": 690 + }, + { + "epoch": 5.240339302544769, + "grad_norm": 7.558785438537598, + "learning_rate": 1.8278406094401623e-06, + "loss": 1.9815, + "num_input_tokens_seen": 7058992, + "step": 695 + }, + { + "epoch": 5.27803958529689, + "grad_norm": 7.789961338043213, + "learning_rate": 1.6463034933723337e-06, + "loss": 2.0789, + "num_input_tokens_seen": 7107360, + "step": 700 + }, + { + "epoch": 5.31573986804901, + "grad_norm": 7.385551929473877, + "learning_rate": 1.4739524692218314e-06, + "loss": 2.0299, + "num_input_tokens_seen": 7154560, + "step": 705 + }, + { + "epoch": 5.353440150801131, + "grad_norm": 8.215983390808105, + "learning_rate": 1.3108553306396265e-06, + "loss": 2.0888, + "num_input_tokens_seen": 7207760, + "step": 710 + }, + { + "epoch": 5.391140433553252, + "grad_norm": 7.679669380187988, + "learning_rate": 1.1570762312943295e-06, + "loss": 2.054, + "num_input_tokens_seen": 7261248, + "step": 715 + }, + { + "epoch": 5.4288407163053725, + "grad_norm": 7.84255313873291, + "learning_rate": 1.0126756596375686e-06, + "loss": 1.9551, + "num_input_tokens_seen": 7309552, + "step": 720 + }, + { + "epoch": 5.466540999057493, + "grad_norm": 7.932110786437988, + "learning_rate": 8.777104151110826e-07, + "loss": 2.0533, + "num_input_tokens_seen": 7368576, + "step": 725 + }, + { + "epoch": 5.504241281809613, + "grad_norm": 8.4814453125, + "learning_rate": 7.522335858048707e-07, + "loss": 2.0151, + "num_input_tokens_seen": 7425056, + "step": 730 + }, + { + "epoch": 5.541941564561734, + "grad_norm": 8.199661254882812, + "learning_rate": 6.362945275751736e-07, + "loss": 2.0525, + "num_input_tokens_seen": 7473488, + "step": 735 + }, + { + "epoch": 5.579641847313855, + "grad_norm": 7.404112339019775, + "learning_rate": 5.299388446305343e-07, + "loss": 2.0759, + "num_input_tokens_seen": 7522656, + "step": 740 + }, + { + "epoch": 5.617342130065976, + "grad_norm": 6.568804740905762, + "learning_rate": 4.3320837159353813e-07, + "loss": 2.0374, + "num_input_tokens_seen": 7577104, + "step": 745 + }, + { + "epoch": 5.655042412818096, + "grad_norm": 6.717598915100098, + "learning_rate": 3.4614115704533767e-07, + "loss": 2.0137, + "num_input_tokens_seen": 7633024, + "step": 750 + }, + { + "epoch": 5.6927426955702165, + "grad_norm": 8.680829048156738, + "learning_rate": 2.687714485593462e-07, + "loss": 2.1025, + "num_input_tokens_seen": 7682736, + "step": 755 + }, + { + "epoch": 5.730442978322337, + "grad_norm": 7.93720817565918, + "learning_rate": 2.011296792301165e-07, + "loss": 2.1403, + "num_input_tokens_seen": 7737888, + "step": 760 + }, + { + "epoch": 5.768143261074458, + "grad_norm": 7.161721706390381, + "learning_rate": 1.4324245570256633e-07, + "loss": 2.0965, + "num_input_tokens_seen": 7788384, + "step": 765 + }, + { + "epoch": 5.805843543826579, + "grad_norm": 7.347177028656006, + "learning_rate": 9.513254770636137e-08, + "loss": 2.0826, + "num_input_tokens_seen": 7835600, + "step": 770 + }, + { + "epoch": 5.843543826578699, + "grad_norm": 7.278431415557861, + "learning_rate": 5.681887909952388e-08, + "loss": 2.1508, + "num_input_tokens_seen": 7888336, + "step": 775 + }, + { + "epoch": 5.88124410933082, + "grad_norm": 8.160250663757324, + "learning_rate": 2.831652042480093e-08, + "loss": 2.0338, + "num_input_tokens_seen": 7938960, + "step": 780 + }, + { + "epoch": 5.918944392082941, + "grad_norm": 7.380901336669922, + "learning_rate": 9.636682981720158e-09, + "loss": 2.0466, + "num_input_tokens_seen": 7991296, + "step": 785 + }, + { + "epoch": 5.956644674835061, + "grad_norm": 7.834613800048828, + "learning_rate": 7.867144166728846e-10, + "loss": 2.0436, + "num_input_tokens_seen": 8044336, + "step": 790 + }, + { + "epoch": 5.971724787935909, + "num_input_tokens_seen": 8063376, + "step": 792, + "total_flos": 9.844904305885184e+16, + "train_loss": 2.431757736085641, + "train_runtime": 2652.6299, + "train_samples_per_second": 9.6, + "train_steps_per_second": 0.299 + } + ], + "logging_steps": 5, + "max_steps": 792, + "num_input_tokens_seen": 8063376, + "num_train_epochs": 6, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 9.844904305885184e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d325c35219dd8c0eea8279f17307e1d0d8c29b12 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d28ce688790f3a28f1dba3064866610463622e1d4141255300e136617f5673 +size 5432 diff --git a/training_args.yaml b/training_args.yaml new file mode 100644 index 0000000000000000000000000000000000000000..53e314bacf36f3d090af2b432412a2e429e90fe5 --- /dev/null +++ b/training_args.yaml @@ -0,0 +1,32 @@ +bf16: true +cutoff_len: 1024 +dataset: identity +dataset_dir: data +ddp_timeout: 180000000 +do_train: true +finetuning_type: lora +flash_attn: auto +gradient_accumulation_steps: 8 +include_num_input_tokens_seen: true +learning_rate: 5.0e-05 +logging_steps: 5 +lora_alpha: 16 +lora_dropout: 0 +lora_rank: 8 +lora_target: all +lr_scheduler_type: cosine +max_grad_norm: 3.0 +max_samples: 100000 +model_name_or_path: unsloth/gemma-2-2b-it +num_train_epochs: 6.0 +optim: adamw_torch +output_dir: saves/Gemma-2B/lora/4k_train_2024-10-17-07-48-45 +packing: false +per_device_train_batch_size: 2 +plot_loss: true +preprocessing_num_workers: 16 +report_to: none +save_steps: 100 +stage: sft +template: default +warmup_steps: 0 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..0d08b09bed12137067589a72c390415dc8e0158d Binary files /dev/null and b/training_loss.png differ