diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/mlc-chat-config.json b/mlc-chat-config.json new file mode 100644 index 0000000000000000000000000000000000000000..ab1d31f0c777bedf52e475d82bc3d4406644cb6b --- /dev/null +++ b/mlc-chat-config.json @@ -0,0 +1,44 @@ +{ + "model_type": "gemma", + "quantization": "q0f16", + "model_config": { + "hidden_size": 2048, + "hidden_act": "gelu", + "intermediate_size": 16384, + "attention_bias": false, + "num_attention_heads": 8, + "num_key_value_heads": 1, + "head_dim": 256, + "num_hidden_layers": 18, + "rms_norm_eps": 1e-06, + "vocab_size": 256000, + "position_embedding_base": 10000.0, + "context_window_size": 8192, + "prefill_chunk_size": 8192, + "tensor_parallel_shards": 1, + "max_batch_size": 80 + }, + "vocab_size": 256000, + "context_window_size": 8192, + "sliding_window_size": -1, + "prefill_chunk_size": 8192, + "attention_sink_size": -1, + "tensor_parallel_shards": 1, + "mean_gen_len": 128, + "max_gen_len": 512, + "shift_fill_factor": 0.3, + "temperature": 0.7, + "repetition_penalty": 1.0, + "frequency_penalty": 1.0, + "top_p": 0.95, + "conv_template": "gemma_instruction", + "pad_token_id": 0, + "bos_token_id": 2, + "eos_token_id": 1, + "tokenizer_files": [ + "tokenizer.model", + "tokenizer.json", + "tokenizer_config.json" + ], + "version": "0.1.0" +} \ No newline at end of file diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..6c4963e31121464e569327b6205738516f9da783 --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,1574 @@ +{ + "metadata": { + "ParamSize": 110, + "ParamBytes": 5012344832.0, + "BitsPerParam": 16.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 1048576000, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 256000, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576000, + "byteOffset": 0 + } + ], + "md5sum": "206485d87a22f62128d5b2494bafe7db" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "0af16f8f82b18456b292f26c2e72f63b" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "2ba31ef002c5e642855d91dbaf959171" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "c7d1d36c62d66bea4696e965fff1917e" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "362ff38d501c9881a63569d0cfa88605" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 4096 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8192 + }, + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10493952 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18882560 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18886656 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18890752 + } + ], + "md5sum": "5d30e8d1452ab145a0564794c26cd791" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "fe63b3dbe2543296f8112fac2ade5290" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "629331e7d43d063ae03d036060febb5f" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "cd3f99f63fc39b1170222cb6f4717b72" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c9193e83b6a383f990b1d7d703a6f80f" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 27279360, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8396800 + }, + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18882560 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27271168 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27275264 + } + ], + "md5sum": "59d0b4f1bd55ceeda6e5ab96633e9451" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "820b534e89d40aa248659ca0081f0674" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "256dabcc684ebacdfdbbcfa0adc035e1" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 29368320, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10485760 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18874368 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18878464 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18882560 + } + ], + "md5sum": "7d790e2cfa6142474ac62a0ec5aea10c" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "908e1855eb654380dfcd3cfd85a1e631" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "ea87b38d7fde1e77b55f18aa11c0ed31" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "f795202d6edb9f663561fa0cf03fa27a" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "88378b8b2905d8b91937c001c73403cb" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 27279360, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8396800 + }, + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18882560 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27271168 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27275264 + } + ], + "md5sum": "071c71fc33170a2d3a1a2ae1f03b140d" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "b58913c0a9d042f1469b2c4929efb8d3" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "7f3594a3e54595cf7f92b5901e0f6465" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29368320, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10485760 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18874368 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18878464 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18882560 + } + ], + "md5sum": "b7c100b48dd50547c5c74f03a68863aa" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "497e575ba76a1ae6e33393fd7c9c4c12" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "7e13a326187148b0ea54878998af1dbe" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "3e8d823c128094965bc0b63996bd30e0" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8396800 + }, + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18882560 + } + ], + "md5sum": "c51345fdd445c9c55206b86ee31d704a" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "708872d176d1deddd88435c817e93d2f" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "0eef3929b28890b79faf9d02dfa2487f" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 29368320, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10485760 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18874368 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18878464 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18882560 + } + ], + "md5sum": "6328df0ffee1d1160e0fc6fa300f4c9a" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "30f748096a7ec18a6e349c49a7d14822" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "a15a4fe50c6c5b78f36ec6351732a345" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "5b697bbb99453187d0eee6b6127d2008" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "76327282d65a9c80614e17129cb9e5f2" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 27279360, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8396800 + }, + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18882560 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27271168 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27275264 + } + ], + "md5sum": "1f73ecde85ffc44fa98fba7534c55356" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "c983dfde03cb18ee85a7d80d50f4d165" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "afb45d7056b90f804e2c75489be5792e" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 29368320, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10485760 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18874368 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18878464 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18882560 + } + ], + "md5sum": "a8c9cd2cc4d6179ffa0d181051ef4f7e" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "89e90749de11be2bc1ee29c5450a4f3d" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "bde36e490b2c3a905f40e1285f4873dd" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "eefc32a2ccf311c1a95e01ca979aaa9f" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c31b3371a8c2294135e993653a25d153" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 27279360, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8396800 + }, + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18882560 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27271168 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27275264 + } + ], + "md5sum": "a9a738d8b541d5d1517049e641a72c6f" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "b4d55621d295f8693b39a501ea581847" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "1031e7308cb30a67747b65b4fdaa94af" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 29368320, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10485760 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18874368 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18878464 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18882560 + } + ], + "md5sum": "f74050232fb038c2c717ca99af7cdada" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "2639e2fc57901dac06e9d4183ce2ee34" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "cdcce0c795b9df58125fb24aa22f62e8" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 2048, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "135969d29acd56ae5e9ac553cb476528" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 27283456, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8396800 + }, + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18882560 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27271168 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27275264 + }, + { + "name": "model.norm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27279360 + } + ], + "md5sum": "7bca8fca53ef5ba8a977e4733d66824a" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..db52d8a35d356ac8c700bc97d62d5fecff4cfc98 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b87ab2c01c80657855a5413d0457b4a041afaeda0cc785080e44e2f04acf07 +size 1048576000 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..838cc77e809f817b1103229a688bc2d9fb61832c --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2feb5eb3d572c57c5bafbf0ab506862df1160fe40965dcfe4b9fd855c08bed7 +size 67108864 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..16e6252c5aa7dec8f4c8a582ea74807a00f32954 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4288cfe3944cd6191f94ae6fd63104118d42b835452603ecb0f5e696e981c581 +size 27279360 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..d155d0d1250eb4915670707ee353302a143f6baf --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d10c790a4e3dc44c2dc36d96251ae97cdf30a4fa04d4c43e31bfbd038e6a7b7 +size 67108864 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef5dc0695f9cd0b1a8d5c666ad37032425ba2e1b --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:929a0a900fe314ea93709c197a4445238dba61029cf0af354bc16288e5c84677 +size 134217728 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..10ac7132cdf47913e5565f0c53889c8f2864fa9c --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72e09274b0fa41657ace50d78b2207c9dc4cf4406da769866baf5c33515f0b69 +size 29368320 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..c0d6beb376c1f809815aa6cc5e512a2b8f57c1fe --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a07d3abbfcacf48ba028df2cab895be32cc15022d23389a745286e79c1b1d1fd +size 67108864 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..e1bfe87fb6f7143179c1b278456c0bc24a8c0902 --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8cbd5f35ffa92cacb31f6207322b6c168c574685ad853e77aacb99578d7c7fb +size 134217728 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..9b5903d229e46731684a968e0fac3fadcd98f9b4 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef6694dff1e05820aac0cd2b22f39ac7788b4967afc9250775575554c66aab2c +size 67108864 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..67237d3811252f2eabd13e3534175eccb775d28e --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84f1d49021144f3b5c1adb13f87eb0fd517b5a686694621efe9a0a00982c517e +size 134217728 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..12c4a6a5cfb72a4eec2878c0a296fb5e4b86816d --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:965a1e20c10241269df95ed52b8e0d5a93e9e025fbe2f34263c72c5bf6bb8b7c +size 27279360 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..4ea0334ecd026ad864e673ae346aab269ac97966 --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d02eb439da852246d2ca67e9b7b6de0b090b80744355e64728a23e41926505b +size 67108864 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..740f96793620bd9b7865afc1e9601eedd47854f7 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c82659b0b4a5643b009e3dbf0444a36e6b43bd01115ebff385f2e06c98463a23 +size 134217728 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..0913be023ecaf66463e6395cb941ebd16ac66c6b --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7a62e531fc6fde459eecb716576da9d34073a7b2a4857c0073a8b71d41b412c +size 134217728 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..6d2d6742c45e36e2ac12a360adb866e485282595 --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57a37e915fc860f83056174c6f3bc0c2bed5349cc5333f91052f430369fb7558 +size 29368320 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..c80f1642642d2c0444f545f87577d9501baacec5 --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14006f5e4acb2f9416271ae562e299359cd2585739c7fc77ccbca54495563948 +size 67108864 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..727c60e711e904273192eaf79565a121dc9d2307 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1863a339688b84962200c43e2fb530bd2d702b124ac22f29996efb71df0f2058 +size 134217728 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..623d7c20e905223f57e1521d4131dbd78f55e6cd --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d4ef29f4bf6d085ea2c19f51cebf3aa080722ab84140e51c67e3ed04de3e84c +size 134217728 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..a846531037687a84ab4fcf6564f81cf14354fa59 --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea38daa1761e491f236e2064f7f82f8690a502936328d858cf184c07e10bf0c0 +size 27271168 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..65d6f1aeb1af32c01d2535008046d7f867368a4d --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b970ff1b0b6494165defe2fbfa1d31425766ed71e64de9ec4e66ac3955c8bc5f +size 67108864 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..c9402ebd72cdf585e2fe57d779221e94cf95d126 --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a30f91d2ca8ca264213e5907d5f61978224d57acf67c9bd507316dc07a97d565 +size 134217728 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..b3011e053f3870681826064137612a1b1ce25a12 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83dd496a9e6d90035c1406fb35a6bb23a242ec93ef86a31008ada167940c927c +size 29368320 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..0a5238411e9bf93eadcc26b1967de77779bf7a01 --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58821a8d87008d47d122427911c6fad5272aca70c448bbae223256a74bacd07e +size 67108864 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..575c7a83a42c5b87c58f9002dbfe62ac56d05496 --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ff53811038b661a7b8f2bfdf213bebfb185ec1a6060b662f063714f33584d79 +size 67108864 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..23f9eb93c6ccbb1e611a150a5d29755c892641c4 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56678666ffc37800588c58212de45e0a55320566354ae07d79bc0470b23d4fb7 +size 134217728 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..eda94e724ef1928cb7be2d3d5c2cc95196390657 --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d61e8856d8941d2b1fd138116d015f63840d0fa1e31e20e20a5ceca1536ceec +size 67108864 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..20706dd0a26b3c15113e52ef0e61d0c9976b4871 --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1c738e4f7968eecbbbf19e76459971d3820702d6cb7fee86a432ebe63783dc3 +size 134217728 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..2275f5d37aea9007c1177dc3948f663002241e0e --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fe8c9f67714a936fa25f96fe1a424c4936a2bbcdd7d888566a0e8641b8d211d +size 27279360 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..bb2d64c32a8c60bbea37289a490080a6c41a8a74 --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9b0efd2212981f16d956d8571f054b68780ad01f4917033647e359b557a4653 +size 67108864 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..fd669708ecddc00ce65a163a3e6a0218e80a7c6c --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61e71814db43d2d486b377c0264e25e6741fbfe12edc118bfdfc98d952fff6d +size 134217728 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..aa6faff4b605ad18dc4d3e58c4b26a0b4e8fe149 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dda75e2e9d2dad79e4261ac571b953bde6c0f503be680e86cdf6b0bab69873d +size 29368320 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..5943aa0de8430472499169c0462c7ac194cffdbe --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:622dd1ce1706355cbc659a8ab2c4509678ffe0f3ad34258e5e25ed2a5d951bcd +size 67108864 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..09db725f9a7b0f9f6cf384469570c2bb394cd765 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b9aee3aa472a570ec238e4f36c2d8b90e3de2c9558379847dc68721a90ce2e2 +size 134217728 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..3542d82cc0b420df3082debb4587445a8baa42e6 --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e75db42468800849a9a7da603d0072c5e86c8ed2b4d8b20a312a51fb86a7a10 +size 67108864 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..f4d682c38526c96ce52599ee4a10f02e4865a5b1 --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e685f76b32d22107033791ee3ff63793a388b06ab79cc0cf2bc3fd342eb0d3db +size 134217728 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..53a30412f6ef5eed4ba195a004932f79adba98ca --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc165aa4ca15d220bcdb2f4acb46ebc75d30f265372c1c77b2ff0b16e7e51ba7 +size 134217728 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..42d3faf3250334c96587b1a262ff87665b3520d0 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2dbabc5ff2074349355cc6072ad87d01e912cb022b9c337b2ee06320421fd43 +size 27279360 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..708bf1289cd819505568f9b4dfbc809eb85a4af2 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd69d2c9172974fff154441b237b4787fb53b2d185325442d5048130ef5bc4ef +size 67108864 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..3b69989555f2ffe9a06ac40dea8f9ec179e839a1 --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03a78c1c5b85b250c644f280d3067121aab38632646ce8e13e7bfac5e21ee210 +size 134217728 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb06233aac4bfe775a3963c2f44d15ceee326104 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5a6b1d6c5bf02832dd3acb1df723d3ecb411545935b0fed42f7baf7fe59cf8a +size 29368320 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..447092193f5926cf98ea4da191999c50ad679c3a --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbf43ec84becb40c9359a181ab0e641fd7faae7d34b549501f7cfb7afdc3d764 +size 67108864 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..da23a9f8dea516916d7ab533f91daf6a0467fc81 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955153723aa8e3419e79bfd03af98fcabb4af95e8dcd5920f2f8956725be526d +size 134217728 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..6d842913a47bb3860efc6787034e06ede7ea6a0a --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c086166241e0664f8963fd1ca4ed74c737abfb2525ec20f8435821ff50158f3 +size 67108864 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..175c8a892e838d86eb63895830a47e9ec55045bf --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29e03404838af61a8741de8cf35a5de1f6af115a1a5295319d83af4f64f3bf87 +size 27283456 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..3188bbcb5347152e84947ebff5a95ec6be0050ef --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd15ff66098ec09268baef9fffb86f474faed2a678b173dac4260ecad9effc19 +size 29376512 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f97f4b532b28b2c8360cc1636145568bfcb700f --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf3b3cb4cace435809ac7b4cc933f20853af12f1f272d3dcefe7f19c0f203b8b +size 67108864 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..5e21e85eea4e8ee0e060d8319f04dad9f31ad987 --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57a66e0268cd685d66a96425a81b6f6042720fcc156b23faf56fa7ff13ce20bb +size 134217728 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..b2c7d7159186bad60843053f7856281030e8dd17 --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ba40b8e12c09c5fb2006b77a771cb01ce894e88a3b3e1877f927a5b89c91709 +size 67108864 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..4c7151bfeabb732c872d74f38e5dd9c0e772020d --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f81ac658699fb66027a8de3e1849e3413b5ef7d7f29cd3091a86b3b1728f366 +size 134217728 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..10d1ca087d5e78cbfc41ae083201df86aa031044 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05e97791a5e007260de1db7e1692e53150e08cea481e2bf25435553380c147ee +size 17477929 diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..0fb99fadba40bda6e559565d903f62cfde63b88a --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6969e64047744a44bb3abfb5c50f8de0f7ed8b571d5444426ef931f651d1a0ef +size 4241111 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f66eacb3b9c5afce19c15caa4f822b39bc0aac45 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,70 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": null, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +}