luciaquirke
commited on
Add files using upload-large-folder tool
Browse files- config.json +1 -0
- layers.0.mlp/cfg.json +1 -0
- layers.0.mlp/sae.safetensors +3 -0
- layers.1.mlp/cfg.json +1 -0
- layers.1.mlp/sae.safetensors +3 -0
- layers.10.mlp/cfg.json +1 -0
- layers.10.mlp/sae.safetensors +3 -0
- layers.11.mlp/cfg.json +1 -0
- layers.11.mlp/sae.safetensors +3 -0
- layers.12.mlp/cfg.json +1 -0
- layers.12.mlp/sae.safetensors +3 -0
- layers.13.mlp/cfg.json +1 -0
- layers.13.mlp/sae.safetensors +3 -0
- layers.14.mlp/cfg.json +1 -0
- layers.14.mlp/sae.safetensors +3 -0
- layers.15.mlp/cfg.json +1 -0
- layers.15.mlp/sae.safetensors +3 -0
- layers.2.mlp/cfg.json +1 -0
- layers.2.mlp/sae.safetensors +3 -0
- layers.3.mlp/cfg.json +1 -0
- layers.3.mlp/sae.safetensors +3 -0
- layers.4.mlp/cfg.json +1 -0
- layers.4.mlp/sae.safetensors +3 -0
- layers.5.mlp/cfg.json +1 -0
- layers.5.mlp/sae.safetensors +3 -0
- layers.6.mlp/cfg.json +1 -0
- layers.6.mlp/sae.safetensors +3 -0
- layers.7.mlp/cfg.json +1 -0
- layers.7.mlp/sae.safetensors +3 -0
- layers.8.mlp/cfg.json +1 -0
- layers.8.mlp/sae.safetensors +3 -0
- layers.9.mlp/cfg.json +1 -0
- layers.9.mlp/sae.safetensors +3 -0
- lr_scheduler.pt +3 -0
- optimizer.pt +3 -0
- state.pt +3 -0
config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true}, "batch_size": 2, "grad_acc_steps": 4, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.0.mlp", "layers.1.mlp", "layers.2.mlp", "layers.3.mlp", "layers.4.mlp", "layers.5.mlp", "layers.6.mlp", "layers.7.mlp", "layers.8.mlp", "layers.9.mlp", "layers.10.mlp", "layers.11.mlp", "layers.12.mlp", "layers.13.mlp", "layers.14.mlp", "layers.15.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": true, "distribute_modules": true, "save_every": 1000, "log_to_wandb": true, "run_name": "DeepSeek-R1-Distill-Qwen-1.5B-skip-65k", "wandb_log_frequency": 1, "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "HuggingFaceTB/smollm-corpus", "subset": "fineweb-edu-dedup", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48}
|
layers.0.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
|
layers.0.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:334dac676c92f48a8485144ca0ec27eb0f5cd54a128aed8eb2b2d9c980de6aee
|
3 |
+
size 815012256
|
layers.1.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
|
layers.1.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b920d3ab037f0829d2114cfad921918e6a831fb813e26b813ee0b60b059feb0
|
3 |
+
size 815012256
|
layers.10.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
|
layers.10.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4972e144cd9401d4fc62f901dd2abbb0eb13a4c98f3b713abd26a6ccf1cdc27
|
3 |
+
size 815012256
|
layers.11.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
|
layers.11.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:808caf16ec7149c21ba5bb1aac19cd03c26ab14220e5d2afa7e19d719e034872
|
3 |
+
size 815012256
|
layers.12.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
|
layers.12.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d258eb670ef722cf52d684e7140a9909229f814b1276615cca175c60627ac843
|
3 |
+
size 815012256
|
layers.13.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
|
layers.13.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b6d1ca59e7a1275fd496b96c6febdda992f5e2c02d69f150bcd147a5b9525ff
|
3 |
+
size 815012256
|
layers.14.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
|
layers.14.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07e6edc13422eb91b7cf45b1a88c05b4e7e22da0147888914012429192609b1d
|
3 |
+
size 815012256
|
layers.15.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
|
layers.15.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ad88666a4d9f984c3cba5ec3944593d2099999abada1299353b0e92876c055d
|
3 |
+
size 815012256
|
layers.2.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
|
layers.2.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff2571f48926a19a32e4c6295b0d46e78fe1f2b7847accd86f44a61098505a9d
|
3 |
+
size 815012256
|
layers.3.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
|
layers.3.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59110f87520f1abaac62e166fafa9a55d0c83a483e860e2ce1972399e45646ca
|
3 |
+
size 815012256
|
layers.4.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
|
layers.4.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca674f0745eeae08856e1811fcdaba362f55500bf4cb26313482ee004b884c79
|
3 |
+
size 815012256
|
layers.5.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
|
layers.5.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7c83b1854c5ee79c05aac200e66fde659a355a1a298cac582debf65e59ba187
|
3 |
+
size 815012256
|
layers.6.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
|
layers.6.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b728bcc7c80af0036879320d7cd3416473b292f7c062ad1d08a50b8fa4fc180
|
3 |
+
size 815012256
|
layers.7.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
|
layers.7.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:749cd150989959ab55cf31ebf8f5ce3ced56ede6ef8f152477733cf8d2f9b753
|
3 |
+
size 815012256
|
layers.8.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
|
layers.8.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d652abbea28769118e0bb7802645466b278a4c4e25b1a107076995892f746cfc
|
3 |
+
size 815012256
|
layers.9.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
|
layers.9.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0569e4fe1dc2adee6529272560375164a8893ac47460975c4bf31bf0317085f
|
3 |
+
size 815012256
|
lr_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf208a9524911862838668e1825fa34dc5e29ef9c92e1eb7f9258ce6cad96710
|
3 |
+
size 1076
|
optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b0502f69511e1bdd4d788a12e53f297d0cecf237dbd5076634faf11c4ae1cb9
|
3 |
+
size 827778488
|
state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bab6cf8405eed8bdb4848027cc5d9fa1534050b8bed156097418c712a333347b
|
3 |
+
size 1049996
|