Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- checkpoint_metadata.json +18 -0
- config.yaml +154 -0
- model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
checkpoint_metadata.json
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"custom_metas": null,
|
3 |
+
"dp": 256,
|
4 |
+
"metas": {
|
5 |
+
"consumed_train_samples": 3927040000,
|
6 |
+
"data_stages": [
|
7 |
+
{
|
8 |
+
"consumed_train_samples": 3927040000,
|
9 |
+
"name": "stable",
|
10 |
+
"start_training_step": 1
|
11 |
+
}
|
12 |
+
],
|
13 |
+
"last_stage_idx": 0,
|
14 |
+
"last_train_step": 3835000
|
15 |
+
},
|
16 |
+
"tp": 1,
|
17 |
+
"version": "1.4"
|
18 |
+
}
|
config.yaml
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
checkpoints:
|
2 |
+
checkpoint_interval: 1000
|
3 |
+
checkpoints_path: /scratch/loubna/checkpoints/smollm-big-run-1p81G-smollm-1.7B-8T-seed-0-
|
4 |
+
checkpoints_path_is_shared_file_system: false
|
5 |
+
overwrite_datastage: false
|
6 |
+
resume_checkpoint_path: /scratch/loubna/checkpoints/smollm-big-run-1p81G-smollm-1.7B-8T-seed-0-/3074000
|
7 |
+
save_initial_state: true
|
8 |
+
data_stages:
|
9 |
+
- data:
|
10 |
+
dataset:
|
11 |
+
dataloader_type: cyclic
|
12 |
+
dataset_max_tokens: null
|
13 |
+
dataset_weights:
|
14 |
+
- 1
|
15 |
+
datasets:
|
16 |
+
- filename_pattern: .*\.ds$
|
17 |
+
folder: /fsx/loubna/tokenized_for_exps/phase2_mixture
|
18 |
+
seed: 0
|
19 |
+
shuffle: true
|
20 |
+
skip_tokens: 0
|
21 |
+
pad_samples_to_global_batch_size: false
|
22 |
+
skip_in_stream: true
|
23 |
+
num_loading_workers: 0
|
24 |
+
seed: 0
|
25 |
+
name: stable
|
26 |
+
start_training_step: 1
|
27 |
+
experiment_logger:
|
28 |
+
tensorboard_logger:
|
29 |
+
push_to_hub_interval: 50
|
30 |
+
repo_id: HuggingFaceTB/smollm-big-run
|
31 |
+
repo_public: false
|
32 |
+
tensorboard_dir: /scratch/loubna/tensorboard-cosmo-smollm-big-run
|
33 |
+
wandb_logger:
|
34 |
+
wandb_entity: loubnabnl
|
35 |
+
wandb_project: smollm-big-run
|
36 |
+
general:
|
37 |
+
benchmark_csv_path: null
|
38 |
+
consumed_train_samples: 3927040000
|
39 |
+
ignore_sanity_checks: true
|
40 |
+
project: smollm-big-run
|
41 |
+
run: smollm-big-run-1p81G-smollm-1.7B-8T-seed-0-
|
42 |
+
seed: 42
|
43 |
+
step: 3835000
|
44 |
+
kill_switch_path: null
|
45 |
+
lighteval:
|
46 |
+
batch_size: 16
|
47 |
+
checkpoints_path: null
|
48 |
+
generation: null
|
49 |
+
logging:
|
50 |
+
hub_repo_details: null
|
51 |
+
hub_repo_results: HuggingFaceTB/smollm-big-run
|
52 |
+
hub_repo_tensorboard: HuggingFaceTB/smollm-big-run
|
53 |
+
local_output_path: /scratch/loubna/lighteval/smollm-big-run-1p81G-smollm-1.7B-8T-seed-0-
|
54 |
+
push_details_to_hub: false
|
55 |
+
push_results_to_hub: true
|
56 |
+
push_results_to_tensorboard: true
|
57 |
+
tensorboard_metric_prefix: e
|
58 |
+
parallelism:
|
59 |
+
dp: 8
|
60 |
+
expert_parallel_size: 1
|
61 |
+
pp: 1
|
62 |
+
pp_engine: 1f1b
|
63 |
+
tp: 1
|
64 |
+
tp_linear_async_communication: false
|
65 |
+
tp_mode: ALL_REDUCE
|
66 |
+
slurm_script_dir: /fsx/loubna/logs/smollmv2/eval-scripts
|
67 |
+
slurm_template: /fsx/loubna/projects/brrr/examples/loubna/eval_1b.slurm.jinja
|
68 |
+
tasks:
|
69 |
+
custom_tasks: brrr.lighteval.evaluation_tasks
|
70 |
+
dataset_loading_processes: 8
|
71 |
+
max_samples: 1000
|
72 |
+
multichoice_continuations_start_space: null
|
73 |
+
no_multichoice_continuations_start_space: null
|
74 |
+
num_fewshot_seeds: null
|
75 |
+
tasks: early-signal
|
76 |
+
wandb:
|
77 |
+
wandb_entity: loubnabnl
|
78 |
+
wandb_project: smollm-big-run
|
79 |
+
wandb_run_name: smollm-big-run-1p81G-smollm-1.7B-8T-seed-0-_evals
|
80 |
+
logging:
|
81 |
+
iteration_step_info_interval: 1
|
82 |
+
log_level: info
|
83 |
+
log_level_replica: info
|
84 |
+
model:
|
85 |
+
ddp_bucket_cap_mb: 25
|
86 |
+
dtype: bfloat16
|
87 |
+
init_method:
|
88 |
+
std: 0.02
|
89 |
+
make_vocab_size_divisible_by: 1
|
90 |
+
model_config:
|
91 |
+
bos_token_id: 0
|
92 |
+
eos_token_id: 0
|
93 |
+
hidden_act: silu
|
94 |
+
hidden_size: 2048
|
95 |
+
initializer_range: 0.02
|
96 |
+
intermediate_size: 8192
|
97 |
+
is_llama_config: true
|
98 |
+
max_position_embeddings: 2048
|
99 |
+
num_attention_heads: 32
|
100 |
+
num_hidden_layers: 24
|
101 |
+
num_key_value_heads: 32
|
102 |
+
pad_token_id: null
|
103 |
+
pretraining_tp: 1
|
104 |
+
rms_norm_eps: 1.0e-05
|
105 |
+
rope_scaling: null
|
106 |
+
tie_word_embeddings: true
|
107 |
+
use_cache: true
|
108 |
+
vocab_size: 49152
|
109 |
+
optimizer:
|
110 |
+
accumulate_grad_in_fp32: true
|
111 |
+
clip_grad: 1.0
|
112 |
+
learning_rate_scheduler:
|
113 |
+
learning_rate: 0.0005
|
114 |
+
lr_decay_starting_step: 3870000
|
115 |
+
lr_decay_steps: 430000
|
116 |
+
lr_decay_style: linear
|
117 |
+
lr_warmup_steps: 2000
|
118 |
+
lr_warmup_style: linear
|
119 |
+
min_decay_lr: 0
|
120 |
+
optimizer_factory:
|
121 |
+
adam_beta1: 0.9
|
122 |
+
adam_beta2: 0.95
|
123 |
+
adam_eps: 1.0e-08
|
124 |
+
name: adamW
|
125 |
+
torch_adam_is_fused: true
|
126 |
+
weight_decay: 0.01
|
127 |
+
zero_stage: 0
|
128 |
+
parallelism:
|
129 |
+
dp: 256
|
130 |
+
expert_parallel_size: 1
|
131 |
+
pp: 1
|
132 |
+
pp_engine: 1f1b
|
133 |
+
tp: 1
|
134 |
+
tp_linear_async_communication: true
|
135 |
+
tp_mode: REDUCE_SCATTER
|
136 |
+
profiler: null
|
137 |
+
s3_upload:
|
138 |
+
remove_after_upload: true
|
139 |
+
s5cmd_concurrency: 5
|
140 |
+
s5cmd_numworkers: 16
|
141 |
+
s5cmd_path: /admin/home/loubna/miniconda3/envs/nanotron/bin/s5cmd
|
142 |
+
upload_s3_path: s3://synthetic-project-models/big-run-5T/smollm-big-run-1p81G-smollm-1.7B-8T-seed-0-
|
143 |
+
tokenizer:
|
144 |
+
tokenizer_max_length: null
|
145 |
+
tokenizer_name_or_path: HuggingFaceTB/cosmo2-tokenizer
|
146 |
+
tokenizer_revision: null
|
147 |
+
tokens:
|
148 |
+
batch_accumulation_per_replica: 1
|
149 |
+
limit_test_batches: 0
|
150 |
+
limit_val_batches: 0
|
151 |
+
micro_batch_size: 4
|
152 |
+
sequence_length: 2048
|
153 |
+
train_steps: 4300000
|
154 |
+
val_check_interval: 100
|
model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0df8c6ef0e6b6765d34a2f7925b5d554f60178786f7b11574253a946d94b5242
|
3 |
+
size 8388848
|
model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab8796f38fe9bb2dac138f34c692ac9f6117b23c8ce418a8a728455571d31673
|
3 |
+
size 25166176
|
model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5c895b868d611297edfd5c1f4aab87760eeae49fc26e006a60676f656f4bbc7
|
3 |
+
size 4192
|
model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a0dbcebf05b34144d645792fb3fe6eff91c86805e0c053b8ba542042c2e7e15
|
3 |
+
size 33554672
|
model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40b3b8ce534d8e2ed1f47119016fe312b954a541657dcd20c0b048c71ec47a18
|
3 |
+
size 67109160
|
model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:006e5c4380b961927f1ba842100fcb04c6ed926ea90698dbef2ad0b1cc32469a
|
3 |
+
size 4192
|
model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48080c61c6dbfaea481c8ed1476ad32c550355e0aacdc0b5ba15b333ab205d60
|
3 |
+
size 8388848
|
model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0dc24e3f9f7a37b47cf198dea098d353c2963a76be30af053edd3008625559b3
|
3 |
+
size 25166176
|
model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:603f8bdecd8c1e58e6d07e2934fd4e39e59d16199334d82683360384768efba7
|
3 |
+
size 4192
|
model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b16a02681152efc5ecf60441a2946fcb819b37bc8403c76ba182dc3d457091e
|
3 |
+
size 33554672
|
model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b8764a664692dd1185234e1e12e670d714f53a60fd50b33e26bc2a56502c51a
|
3 |
+
size 67109160
|
model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0376bdcb798935572b53f6f5e8eb79aff5c0cd830ea9a2fd3e01f565771720af
|
3 |
+
size 4192
|
model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c597cd4d5049325257d323a5e4f84794428fbf501df2634ad16f9bec4efac234
|
3 |
+
size 8388848
|
model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ded8bfa83bf89d082fd04e02d3d8377557c2f1e09a4a2ad1993feedea9cbeec5
|
3 |
+
size 25166176
|
model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2170b9eb9597297aed1a2fc619a7098d1c6517c39ff4d846de75a5afb1fa75dd
|
3 |
+
size 4192
|
model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:547e7ef5c94e2ef8e89c8434fe745d43377c14a7c4c87248c04c7645043137ef
|
3 |
+
size 33554672
|
model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30333788f612dee918339620b44ff34171fc619b11211288f7d5439c2f57baca
|
3 |
+
size 67109160
|
model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c63998b611b39bfe475a95234e79eea533d421e495fe6a761a7a483a5f74e3b
|
3 |
+
size 4192
|
model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b58c88d7bfd335ec6bc9c49920e16666837c708a9dd2e16a842b5474009a184b
|
3 |
+
size 8388848
|
model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7d67183df2c73626aa940a8950abe90feb528d565c4ea2ce65939b2cf7b9878
|
3 |
+
size 25166176
|
model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23e4ac4c4b859f51a4b63c0b5dcf31efa306eb62d9bc78ada5ef5eb39af7009f
|
3 |
+
size 4192
|
model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59edfedcf83b562b0b61b9cb74a5b45000af930b216142a4a7d0e6ce32351b56
|
3 |
+
size 33554672
|
model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15bdf4dcabc0f76463183aaf7bc16ca50fa59121ef21b584403dcbc907ed25d7
|
3 |
+
size 67109160
|
model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:110a693fea2c5dcd62eedf2474d7fc4b523ed8609093786b8b4f9635c84cc394
|
3 |
+
size 4192
|
model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b032ce45fbbf89acd43ecd6589ed7265631c69d9034f98e762adaaa89ca2b5e2
|
3 |
+
size 8388848
|
model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3574082e2108aa2cdfa206b7ec02a70ccc8bcac916fe3972e301213f057ac02
|
3 |
+
size 25166176
|
model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6937f759ae5fe67413e111b13f5f3723946101e5b9ace33b00954d7afa39218d
|
3 |
+
size 4192
|
model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34b43f13e06541f78b1f8a27773e6726a1dc20365bd1e0a80e4cbaba4fdc5166
|
3 |
+
size 33554672
|
model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a5c2e80c933c79bec2ff8ed1fa1b70baaf59c1851ced3dd40058861cbd5f450
|
3 |
+
size 67109160
|
model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6f0adb6483cdefa9f4db36c4ecfcf695f2536db798e142140e6f885d00732b3
|
3 |
+
size 4192
|
model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d321e7be1aeac9ae9af4f4b98336ed65da4caf671e48fe07847feb3db1dab7c
|
3 |
+
size 8388848
|
model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb572f72db675305f01bfd35f623e263c910488e3ea494bdb41b8abc75ffdd91
|
3 |
+
size 25166176
|
model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:882532a0c2bccaec8ffe09b385aa18d0fecac5839b5a97d705ab726252f0f152
|
3 |
+
size 4192
|
model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:777f15d9ffaeae9258d03218ed6cced46b2fe1e628adff3f02f7cc90f09d8945
|
3 |
+
size 33554672
|
model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0436ff9f279dbf07962b596474c13df283f911811f016e68235226c07c158092
|
3 |
+
size 4192
|
model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d40f707b73e00084e4b2f450e9e73f030780e6b08f1a1e5ec26e879ea8617f50
|
3 |
+
size 8388848
|
model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a251966982f9414f4c7e20d0d596e47fd0c24cfbb6852e339f3cfd938cfe482d
|
3 |
+
size 25166176
|
model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c3c553079a10e2d0ad91776ad070165da3082b02fb4b77165cafec3d79ad8b3
|
3 |
+
size 4192
|
model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbcea22b811bb7c4434117d2785b3b4c25c1dfe2d38be80d4f51feaf75f8a81f
|
3 |
+
size 33554672
|
model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5931a0012da68afd6a5e092f1b385fc854e29707d6c1b9ce70a9f841fcabe5d0
|
3 |
+
size 67109160
|
model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1741a9491e626a804399a2599493b1310db2b044253d65aa758bf7267f0a67dc
|
3 |
+
size 4192
|
model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e670c343754276fb61ae9959680ec5ccb0ab6989bf880224144c157442b14130
|
3 |
+
size 8388848
|
model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:095f588265dbf2b3d80cfcfdb3bfca6014007a7954950a74e5863d79663e4916
|
3 |
+
size 25166176
|
model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fff67b97846515617b0161d06ed06d6faedeedb2141269a4660dc8e564d0c5ba
|
3 |
+
size 4192
|
model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f358b102c0943cfac36fdd97ee224ba2f4e7378d92782d50954217cc2f2ac60
|
3 |
+
size 33554672
|
model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4914277c311a6e6dce6b6635c16cdf0856c831e19b73352dbfa02e34d102f9d
|
3 |
+
size 67109160
|
model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a959a8f6bf4a58ac2a0cc3c95d6e1818cd09bb60f9af19f2666edd2b57ff15a
|
3 |
+
size 4192
|
model/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4c897e0163e9a7e45feb48889e009b3127e8d34a927505ebea26d1f75930231
|
3 |
+
size 8388848
|