luciaquirke commited on
Commit
80fb707
·
verified ·
1 Parent(s): 496a70b

Add files using upload-large-folder tool

Browse files
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true}, "batch_size": 2, "grad_acc_steps": 4, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.0.mlp", "layers.1.mlp", "layers.2.mlp", "layers.3.mlp", "layers.4.mlp", "layers.5.mlp", "layers.6.mlp", "layers.7.mlp", "layers.8.mlp", "layers.9.mlp", "layers.10.mlp", "layers.11.mlp", "layers.12.mlp", "layers.13.mlp", "layers.14.mlp", "layers.15.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": true, "distribute_modules": true, "save_every": 1000, "log_to_wandb": true, "run_name": "DeepSeek-R1-Distill-Qwen-1.5B-skip-65k", "wandb_log_frequency": 1, "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "HuggingFaceTB/smollm-corpus", "subset": "fineweb-edu-dedup", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48}
layers.0.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
layers.0.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:334dac676c92f48a8485144ca0ec27eb0f5cd54a128aed8eb2b2d9c980de6aee
3
+ size 815012256
layers.1.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
layers.1.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b920d3ab037f0829d2114cfad921918e6a831fb813e26b813ee0b60b059feb0
3
+ size 815012256
layers.10.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
layers.10.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4972e144cd9401d4fc62f901dd2abbb0eb13a4c98f3b713abd26a6ccf1cdc27
3
+ size 815012256
layers.11.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
layers.11.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:808caf16ec7149c21ba5bb1aac19cd03c26ab14220e5d2afa7e19d719e034872
3
+ size 815012256
layers.12.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
layers.12.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d258eb670ef722cf52d684e7140a9909229f814b1276615cca175c60627ac843
3
+ size 815012256
layers.13.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
layers.13.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b6d1ca59e7a1275fd496b96c6febdda992f5e2c02d69f150bcd147a5b9525ff
3
+ size 815012256
layers.14.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
layers.14.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07e6edc13422eb91b7cf45b1a88c05b4e7e22da0147888914012429192609b1d
3
+ size 815012256
layers.15.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
layers.15.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ad88666a4d9f984c3cba5ec3944593d2099999abada1299353b0e92876c055d
3
+ size 815012256
layers.2.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
layers.2.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff2571f48926a19a32e4c6295b0d46e78fe1f2b7847accd86f44a61098505a9d
3
+ size 815012256
layers.3.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
layers.3.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59110f87520f1abaac62e166fafa9a55d0c83a483e860e2ce1972399e45646ca
3
+ size 815012256
layers.4.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
layers.4.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca674f0745eeae08856e1811fcdaba362f55500bf4cb26313482ee004b884c79
3
+ size 815012256
layers.5.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
layers.5.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7c83b1854c5ee79c05aac200e66fde659a355a1a298cac582debf65e59ba187
3
+ size 815012256
layers.6.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
layers.6.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b728bcc7c80af0036879320d7cd3416473b292f7c062ad1d08a50b8fa4fc180
3
+ size 815012256
layers.7.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
layers.7.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:749cd150989959ab55cf31ebf8f5ce3ced56ede6ef8f152477733cf8d2f9b753
3
+ size 815012256
layers.8.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
layers.8.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d652abbea28769118e0bb7802645466b278a4c4e25b1a107076995892f746cfc
3
+ size 815012256
layers.9.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 1536}
layers.9.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0569e4fe1dc2adee6529272560375164a8893ac47460975c4bf31bf0317085f
3
+ size 815012256
lr_scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf208a9524911862838668e1825fa34dc5e29ef9c92e1eb7f9258ce6cad96710
3
+ size 1076
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b0502f69511e1bdd4d788a12e53f297d0cecf237dbd5076634faf11c4ae1cb9
3
+ size 827778488
state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bab6cf8405eed8bdb4848027cc5d9fa1534050b8bed156097418c712a333347b
3
+ size 1049996