tangledgroup
/

tangled-llama-l-128k-v0.1

llama

Model card Files Files and versions Community

mtasic85 commited on Nov 30, 2024

Commit

cd9e649

1 Parent(s): 60b8a72

config

Browse files

Files changed (2) hide show

config.json +40 -0
scripts/prepare_contrain_dataset.py +18 -29

config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "_name_or_path": "tangledgroup/tangled-llama-j-128k-v0.1",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "eos_token_id": [
+    1,
+    4,
+    5
+  ],
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 4,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 32.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 1000000.0,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.45.0.dev0",
+  "use_cache": true,
+  "vocab_size": 65536
+}

scripts/prepare_contrain_dataset.py CHANGED Viewed

@@ -93,6 +93,24 @@ datasets_configs = [
     #
     # general instructs
     #
     # arcee-ai/The-Tome - 4.58 GB, 1,752,473
     # - arcee-ai/infini-instruct-top-500k (BAAI/Infinity-Instruct)
     # - TIGER-Lab/WebInstructSub (top-500k) - IGNORE
@@ -174,36 +192,7 @@ datasets_configs = [
         for i in range(0, 100, 20)
     ],
-    # mlabonne/open-perfectblend - 1.48 GB, 1,420,909
-    #   meta-math/MetaMathQA 	395,000
-    #   openbmb/UltraInteract_sft 	288,579
-    #   HuggingFaceH4/ultrachat_200k 	207,865
-    #   microsoft/orca-math-word-problems-200k 	200,035
-    #   HuggingFaceH4/ultrafeedback_binarized 	187,405
-    #   theblackcat102/evol-codealpaca-v1 	111,272
-    #   Post-training-Data-Flywheel/AutoIF-instruct-61k 	61,492
-    #   mlabonne/lmsys-arena-human-preference-55k-sharegpt 	57,362
-    [
-        {'path': 'mlabonne/open-perfectblend', 'split': f'train[{i}%:{i + 20}%]', 'field': 'conversations', 'transform': lambda msgs: [
-            {'role': roles_map[m['from']], 'content': m['value']}
-            for m in msgs
-        ]}
-        for i in range(0, 100, 20)
-    ],
-    #
-    # math
-    #
-    ## 6.07 GB, 11,402,286
-    # [
-    #     {'path': 'ai2-adapt-dev/openmath-2-math', 'split': f'train[{i}%:{i + 10}%]', 'field': 'messages'}
-    #     for i in range(0, 100, 10)
-    # ],
-    ## 912 MB, 2,570,505
-    # [
-    #     {'path': 'ai2-adapt-dev/openmath-2-gsm8k', 'split': f'train[{i}%:{i + 10}%]', 'field': 'messages'}
-    #     for i in range(0, 100, 10)
-    # ],
     #
     # tool/function calling

     #
     # general instructs
     #
+    # mlabonne/open-perfectblend - 1.48 GB, 1,420,909
+    #   meta-math/MetaMathQA 	395,000
+    #   openbmb/UltraInteract_sft 	288,579
+    #   HuggingFaceH4/ultrachat_200k 	207,865
+    #   microsoft/orca-math-word-problems-200k 	200,035
+    #   HuggingFaceH4/ultrafeedback_binarized 	187,405
+    #   theblackcat102/evol-codealpaca-v1 	111,272
+    #   Post-training-Data-Flywheel/AutoIF-instruct-61k 	61,492
+    #   mlabonne/lmsys-arena-human-preference-55k-sharegpt 	57,362
+    [
+        {'path': 'mlabonne/open-perfectblend', 'split': f'train[{i}%:{i + 20}%]', 'field': 'conversations', 'transform': lambda msgs: [
+            {'role': roles_map[m['from']], 'content': m['value']}
+            for m in msgs
+        ]}
+        for i in range(0, 100, 20)
+    ],
     # arcee-ai/The-Tome - 4.58 GB, 1,752,473
     # - arcee-ai/infini-instruct-top-500k (BAAI/Infinity-Instruct)
     # - TIGER-Lab/WebInstructSub (top-500k) - IGNORE
         for i in range(0, 100, 20)
     ],
     #
     # tool/function calling