Upload folder using huggingface_hub
Browse files
step100000-unsharded/config.yaml
ADDED
@@ -0,0 +1,465 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
run_name: OLMo-560M-17values
|
2 |
+
seed: 6198
|
3 |
+
epoch: null
|
4 |
+
dry_run: false
|
5 |
+
model:
|
6 |
+
d_model: 1280
|
7 |
+
n_heads: 20
|
8 |
+
n_kv_heads: null
|
9 |
+
clip_qkv: null
|
10 |
+
n_layers: 24
|
11 |
+
mlp_ratio: 4
|
12 |
+
mlp_hidden_size: 6144
|
13 |
+
activation_type: swiglu
|
14 |
+
block_type: sequential
|
15 |
+
block_group_size: 1
|
16 |
+
alibi: false
|
17 |
+
alibi_bias_max: 8.0
|
18 |
+
rope: true
|
19 |
+
rope_full_precision: true
|
20 |
+
rope_theta: 10000
|
21 |
+
flash_attention: false
|
22 |
+
attention_dropout: 0.0
|
23 |
+
multi_query_attention: false
|
24 |
+
attention_layer_norm: false
|
25 |
+
residual_dropout: 0.0
|
26 |
+
embedding_dropout: 0.0
|
27 |
+
embedding_layer_norm: false
|
28 |
+
layer_norm_type: rms
|
29 |
+
layer_norm_with_affine: true
|
30 |
+
layer_norm_eps: 1.0e-05
|
31 |
+
attention_layer_norm_with_affine: false
|
32 |
+
max_sequence_length: 2048
|
33 |
+
include_bias: false
|
34 |
+
bias_for_layer_norm: false
|
35 |
+
scale_logits: false
|
36 |
+
vocab_size: 50277
|
37 |
+
embedding_size: 50304
|
38 |
+
weight_tying: false
|
39 |
+
eos_token_id: 50277
|
40 |
+
pad_token_id: 1
|
41 |
+
init_device: meta
|
42 |
+
init_fn: mitchell
|
43 |
+
init_std: 0.02
|
44 |
+
init_cutoff_factor: null
|
45 |
+
precision: amp_bf16
|
46 |
+
scale_emb_init: false
|
47 |
+
emb_init_std: null
|
48 |
+
norm_after: false
|
49 |
+
linear_type: values17
|
50 |
+
num_trilm_matrix_scales: 1
|
51 |
+
optimizer:
|
52 |
+
name: adamw
|
53 |
+
learning_rate: 0.00028
|
54 |
+
weight_decay: 0.1
|
55 |
+
betas:
|
56 |
+
- 0.9
|
57 |
+
- 0.95
|
58 |
+
eps: 1.0e-05
|
59 |
+
no_decay_norm_and_bias: null
|
60 |
+
selective_updates: false
|
61 |
+
decay_norm_and_bias: false
|
62 |
+
decay_embeddings: false
|
63 |
+
metrics_log_interval: 10
|
64 |
+
record_update_metrics: false
|
65 |
+
scheduler:
|
66 |
+
name: cosine_with_warmup
|
67 |
+
units: steps
|
68 |
+
t_warmup: 375
|
69 |
+
t_max: null
|
70 |
+
alpha_f: 0.1
|
71 |
+
grad_clip_warmup_steps: null
|
72 |
+
grad_clip_warmup_factor: null
|
73 |
+
warmup_min_lr: null
|
74 |
+
remove_weight_decay_in_second_half: false
|
75 |
+
data:
|
76 |
+
paths:
|
77 |
+
- ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
78 |
+
- ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
79 |
+
- ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
80 |
+
- ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
81 |
+
- ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
82 |
+
- ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
83 |
+
- ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
84 |
+
- ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
85 |
+
- ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
86 |
+
- ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
87 |
+
- ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
88 |
+
- ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
89 |
+
- ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
90 |
+
- ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
91 |
+
- ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
92 |
+
- ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
93 |
+
- ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
94 |
+
- ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
95 |
+
- ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
96 |
+
- ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
97 |
+
- ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
98 |
+
- ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
99 |
+
- ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
100 |
+
- ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
101 |
+
- ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
102 |
+
- ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
103 |
+
- ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
104 |
+
- ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
105 |
+
- ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
106 |
+
- ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
107 |
+
- ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
108 |
+
- ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
109 |
+
- ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
110 |
+
- ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
111 |
+
- ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
112 |
+
- ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
113 |
+
- ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
114 |
+
- ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
115 |
+
- ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
116 |
+
- ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
117 |
+
- ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
118 |
+
- ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
119 |
+
- ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
120 |
+
- ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
121 |
+
- ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
122 |
+
- ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
123 |
+
- ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
124 |
+
- ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
125 |
+
- ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
126 |
+
- ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
127 |
+
- ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
128 |
+
- ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
129 |
+
- ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
130 |
+
- ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
131 |
+
- ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
132 |
+
- ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
133 |
+
- ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
134 |
+
- ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
135 |
+
- ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
136 |
+
- ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
137 |
+
- ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
138 |
+
- ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
139 |
+
- ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
140 |
+
- ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
141 |
+
- ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
142 |
+
- ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
143 |
+
- ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
144 |
+
- ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
145 |
+
- ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
146 |
+
- ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
147 |
+
- ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
148 |
+
- ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
149 |
+
- ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
150 |
+
- ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
151 |
+
- ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
152 |
+
- ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
153 |
+
- ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
154 |
+
- ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
155 |
+
- ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
156 |
+
- ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
157 |
+
- ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
158 |
+
- ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
159 |
+
- ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
160 |
+
- ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
161 |
+
- ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
162 |
+
- ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
163 |
+
- ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
164 |
+
- ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
165 |
+
- ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
166 |
+
- ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
167 |
+
- ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
168 |
+
- ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
169 |
+
- ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
170 |
+
- ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
171 |
+
- ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
172 |
+
- ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
173 |
+
- ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
174 |
+
- ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
175 |
+
- ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
176 |
+
- ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
177 |
+
- ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
178 |
+
- ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
179 |
+
- ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
180 |
+
- ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
181 |
+
- ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
182 |
+
- ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
183 |
+
- ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
184 |
+
- ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
185 |
+
- ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
186 |
+
- ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
187 |
+
- ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
188 |
+
- ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
189 |
+
- ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
190 |
+
- ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
191 |
+
- ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
192 |
+
- ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
193 |
+
- ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
194 |
+
- ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
195 |
+
- ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
196 |
+
- ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
197 |
+
- ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
198 |
+
- ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
199 |
+
- ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
200 |
+
- ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
201 |
+
- ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
202 |
+
- ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
203 |
+
- ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
204 |
+
- ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
205 |
+
- ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
206 |
+
- ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
207 |
+
- ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
208 |
+
- ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
209 |
+
- ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
210 |
+
- ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
211 |
+
- ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
212 |
+
- ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
213 |
+
- ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
214 |
+
- ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
215 |
+
- ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
216 |
+
- ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
217 |
+
- ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
218 |
+
- ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
219 |
+
- ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
220 |
+
- ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
221 |
+
- ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
222 |
+
- ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
223 |
+
- ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
224 |
+
- ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
225 |
+
- ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
226 |
+
- ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
227 |
+
- ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
228 |
+
- ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
229 |
+
- ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
230 |
+
- ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
231 |
+
- ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
232 |
+
- ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
233 |
+
- ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
234 |
+
- ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
235 |
+
- ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
236 |
+
- ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
237 |
+
- ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
238 |
+
- ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
239 |
+
- ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
240 |
+
- ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
241 |
+
- ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
242 |
+
- ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
243 |
+
- ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
244 |
+
- ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
245 |
+
- ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
246 |
+
- ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
247 |
+
- ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
248 |
+
- ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
249 |
+
- ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
250 |
+
- ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
251 |
+
- ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
252 |
+
- ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
253 |
+
- ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
254 |
+
- ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
255 |
+
- ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
256 |
+
- ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
257 |
+
- ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
258 |
+
- ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
259 |
+
- ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
260 |
+
- ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
261 |
+
- ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
262 |
+
- ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
263 |
+
- ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
264 |
+
- ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
265 |
+
- ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
266 |
+
- ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
267 |
+
- ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
268 |
+
- ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
269 |
+
- ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
270 |
+
- ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
271 |
+
- ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
272 |
+
- ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
273 |
+
- ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
274 |
+
- ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
275 |
+
- ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
276 |
+
- ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
277 |
+
- ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
278 |
+
- ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
279 |
+
- ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
280 |
+
- ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
281 |
+
- ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
282 |
+
- ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
283 |
+
- ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
284 |
+
- ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
285 |
+
- ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
286 |
+
- ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
287 |
+
- ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
288 |
+
- ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
289 |
+
- ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
290 |
+
- ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
291 |
+
- ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
292 |
+
- ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
293 |
+
- ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
294 |
+
- ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
295 |
+
- ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
296 |
+
- ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
297 |
+
- ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
298 |
+
- ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
299 |
+
- ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
300 |
+
- ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
301 |
+
- ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
302 |
+
- ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
303 |
+
- ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
304 |
+
- ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
305 |
+
- ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
306 |
+
- ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
307 |
+
- ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
308 |
+
- ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
309 |
+
- ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
310 |
+
- ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
311 |
+
- ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
312 |
+
- ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
313 |
+
- ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
314 |
+
- ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
315 |
+
- ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
316 |
+
- ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
317 |
+
- ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
318 |
+
- ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
319 |
+
- ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
320 |
+
- ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
321 |
+
- ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
322 |
+
- ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
323 |
+
- ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
324 |
+
- ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
325 |
+
- ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
326 |
+
- ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
327 |
+
- ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
328 |
+
- ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
329 |
+
- ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
330 |
+
- ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
331 |
+
- ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
332 |
+
- ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
333 |
+
- ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
334 |
+
- ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
335 |
+
- ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
336 |
+
- ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
337 |
+
- ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
338 |
+
- ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
339 |
+
- ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
340 |
+
- ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
341 |
+
- ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
342 |
+
- ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
343 |
+
- ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
344 |
+
- ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
345 |
+
- ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
346 |
+
- ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
347 |
+
- ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
348 |
+
- ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
349 |
+
- ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
350 |
+
- ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
351 |
+
- ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
352 |
+
- ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
353 |
+
- ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
354 |
+
- ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
355 |
+
- ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
356 |
+
- ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
357 |
+
- ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
358 |
+
- ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
359 |
+
- ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
360 |
+
- ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
361 |
+
- ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
362 |
+
- ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
363 |
+
- ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
364 |
+
- ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
365 |
+
- ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
366 |
+
- ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
367 |
+
- ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
368 |
+
- ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
369 |
+
- ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
370 |
+
- ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
371 |
+
- ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
372 |
+
- ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
373 |
+
- ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
374 |
+
- ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
375 |
+
- ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
376 |
+
- ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
377 |
+
- ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
378 |
+
- ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
379 |
+
- ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
380 |
+
- ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
381 |
+
- ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
382 |
+
- ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
383 |
+
- ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
384 |
+
- ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
385 |
+
- ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
386 |
+
- ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
|
387 |
+
memmap_dtype: uint16
|
388 |
+
datasets: null
|
389 |
+
label_mask_paths: null
|
390 |
+
pad_direction: right
|
391 |
+
generate_attention_mask: false
|
392 |
+
generate_doc_lengths: false
|
393 |
+
num_workers: 0
|
394 |
+
drop_last: true
|
395 |
+
pin_memory: true
|
396 |
+
prefetch_factor: 16
|
397 |
+
persistent_workers: true
|
398 |
+
timeout: 0
|
399 |
+
seed: null
|
400 |
+
instance_filter: null
|
401 |
+
restore_dataloader: true
|
402 |
+
fast_forward_batches: null
|
403 |
+
evaluators: []
|
404 |
+
eval_interval: 500
|
405 |
+
tokenizer:
|
406 |
+
identifier: ../spectra_tokenizer/tokenizer.json
|
407 |
+
truncate_direction: right
|
408 |
+
save_folder: checkpoints/olmo-560M-17values/
|
409 |
+
remote_save_folder: null
|
410 |
+
canceled_check_interval: 50
|
411 |
+
save_interval: 500
|
412 |
+
save_interval_unsharded: 10000
|
413 |
+
save_interval_ephemeral: null
|
414 |
+
save_num_checkpoints_to_keep: 4
|
415 |
+
save_num_unsharded_checkpoints_to_keep: -1
|
416 |
+
save_overwrite: true
|
417 |
+
force_save_unsharded: false
|
418 |
+
no_pre_train_checkpoint: false
|
419 |
+
load_path: checkpoints/olmo-560M-17values//latest
|
420 |
+
load_path_sharded_checkpointer: null
|
421 |
+
try_load_latest_save: false
|
422 |
+
reset_optimizer_state: false
|
423 |
+
reset_trainer_state: false
|
424 |
+
sharded_checkpointer: torch_legacy
|
425 |
+
new_style_checkpoints: null
|
426 |
+
max_duration: 150000
|
427 |
+
global_train_batch_size: 1024
|
428 |
+
device_train_batch_size: 4
|
429 |
+
device_train_microbatch_size: 4
|
430 |
+
device_eval_batch_size: 4
|
431 |
+
eval_subset_num_batches: -1
|
432 |
+
eval_on_load: false
|
433 |
+
device_train_grad_accum: 1
|
434 |
+
max_grad_norm: 1.0
|
435 |
+
max_grad_norm_ratio: null
|
436 |
+
precision: amp_bf16
|
437 |
+
wandb: null
|
438 |
+
speed_monitor:
|
439 |
+
window_size: 20
|
440 |
+
gpu_flops_available: null
|
441 |
+
console_log_interval: 1
|
442 |
+
gen1_gc_interval: 1
|
443 |
+
compile: null
|
444 |
+
distributed_strategy: fsdp
|
445 |
+
fsdp:
|
446 |
+
use_orig_params: true
|
447 |
+
sharding_strategy: _HYBRID_SHARD_ZERO2
|
448 |
+
wrapping_strategy: null
|
449 |
+
precision: pure
|
450 |
+
hybrid_sharding_num_model_replicas: null
|
451 |
+
ddp: null
|
452 |
+
softmax_auxiliary_loss: false
|
453 |
+
auxiliary_loss_multiplier: 0.0001
|
454 |
+
time_limit: null
|
455 |
+
extra_steps_after_cancel: 10
|
456 |
+
early_stopping_factor: null
|
457 |
+
save_data_indices: true
|
458 |
+
python_profiling: false
|
459 |
+
torch_profiling: false
|
460 |
+
stop_at: 150010
|
461 |
+
stop_after: null
|
462 |
+
activation_checkpointing: null
|
463 |
+
fused_loss: null
|
464 |
+
hf_datasets_cache_dir: null
|
465 |
+
module_outputs_save_steps: null
|
step100000-unsharded/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:246668c628a469ee588498153096bf6e4472e630efb81dcaff87232fc5bf8f1a
|
3 |
+
size 2277037726
|
step100000-unsharded/optim.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d429be84374428d11d10fa21b32289d7641f3566b2807bd0c0017f826247fb8d
|
3 |
+
size 4554069146
|
step100000-unsharded/train.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc67462c32feb8cba67dccca8ba51c0762256c43d3ad15438a0207d9841b01cf
|
3 |
+
size 14924
|