schalnev commited on
Commit
377869b
·
verified ·
1 Parent(s): f56a4ff

Upload folder using huggingface_hub

Browse files
Files changed (24) hide show
  1. final_sparse_autoencoder_gpt2-small_blocks.0.hook_resid_pre_24576/cfg.json +37 -0
  2. final_sparse_autoencoder_gpt2-small_blocks.0.hook_resid_pre_24576/model.pt +3 -0
  3. final_sparse_autoencoder_gpt2-small_blocks.1.hook_resid_pre_24576/cfg.json +37 -0
  4. final_sparse_autoencoder_gpt2-small_blocks.1.hook_resid_pre_24576/model.pt +3 -0
  5. final_sparse_autoencoder_gpt2-small_blocks.10.hook_resid_pre_24576/cfg.json +37 -0
  6. final_sparse_autoencoder_gpt2-small_blocks.10.hook_resid_pre_24576/model.pt +3 -0
  7. final_sparse_autoencoder_gpt2-small_blocks.11.hook_resid_pre_24576/cfg.json +37 -0
  8. final_sparse_autoencoder_gpt2-small_blocks.11.hook_resid_pre_24576/model.pt +3 -0
  9. final_sparse_autoencoder_gpt2-small_blocks.2.hook_resid_pre_24576/cfg.json +37 -0
  10. final_sparse_autoencoder_gpt2-small_blocks.2.hook_resid_pre_24576/model.pt +3 -0
  11. final_sparse_autoencoder_gpt2-small_blocks.3.hook_resid_pre_24576/cfg.json +37 -0
  12. final_sparse_autoencoder_gpt2-small_blocks.3.hook_resid_pre_24576/model.pt +3 -0
  13. final_sparse_autoencoder_gpt2-small_blocks.4.hook_resid_pre_24576/cfg.json +37 -0
  14. final_sparse_autoencoder_gpt2-small_blocks.4.hook_resid_pre_24576/model.pt +3 -0
  15. final_sparse_autoencoder_gpt2-small_blocks.5.hook_resid_pre_24576/cfg.json +37 -0
  16. final_sparse_autoencoder_gpt2-small_blocks.5.hook_resid_pre_24576/model.pt +3 -0
  17. final_sparse_autoencoder_gpt2-small_blocks.6.hook_resid_pre_24576/cfg.json +37 -0
  18. final_sparse_autoencoder_gpt2-small_blocks.6.hook_resid_pre_24576/model.pt +3 -0
  19. final_sparse_autoencoder_gpt2-small_blocks.7.hook_resid_pre_24576/cfg.json +37 -0
  20. final_sparse_autoencoder_gpt2-small_blocks.7.hook_resid_pre_24576/model.pt +3 -0
  21. final_sparse_autoencoder_gpt2-small_blocks.8.hook_resid_pre_24576/cfg.json +37 -0
  22. final_sparse_autoencoder_gpt2-small_blocks.8.hook_resid_pre_24576/model.pt +3 -0
  23. final_sparse_autoencoder_gpt2-small_blocks.9.hook_resid_pre_24576/cfg.json +37 -0
  24. final_sparse_autoencoder_gpt2-small_blocks.9.hook_resid_pre_24576/model.pt +3 -0
final_sparse_autoencoder_gpt2-small_blocks.0.hook_resid_pre_24576/cfg.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gpt2-small",
3
+ "hook_point": "blocks.0.hook_resid_pre",
4
+ "hook_point_layer": 0,
5
+ "hook_point_head_index": null,
6
+ "dataset_path": "Skylion007/openwebtext",
7
+ "is_dataset_tokenized": false,
8
+ "context_size": 128,
9
+ "use_cached_activations": false,
10
+ "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.0.hook_resid_pre",
11
+ "d_in": 768,
12
+ "n_batches_in_buffer": 128,
13
+ "total_training_tokens": 300000000,
14
+ "store_batch_size": 32,
15
+ "seed": 42,
16
+ "b_dec_init_method": "geometric_median",
17
+ "expansion_factor": 32,
18
+ "from_pretrained_path": null,
19
+ "d_sae": 24576,
20
+ "l1_coefficient": 8e-05,
21
+ "lp_norm": 1,
22
+ "lr": 0.0004,
23
+ "lr_scheduler_name": null,
24
+ "lr_warm_up_steps": 5000,
25
+ "train_batch_size": 4096,
26
+ "use_ghost_grads": true,
27
+ "feature_sampling_window": 1000,
28
+ "dead_feature_window": 5000,
29
+ "dead_feature_threshold": 1e-08,
30
+ "log_to_wandb": true,
31
+ "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
32
+ "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
33
+ "wandb_entity": null,
34
+ "wandb_log_frequency": 100,
35
+ "n_checkpoints": 10,
36
+ "checkpoint_path": "checkpoints/y1t51byy"
37
+ }
final_sparse_autoencoder_gpt2-small_blocks.0.hook_resid_pre_24576/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae7d93e5df9a295de6381f059ac7d66361df359a9488c5b34c0d823c23a2138e
3
+ size 151098176
final_sparse_autoencoder_gpt2-small_blocks.1.hook_resid_pre_24576/cfg.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gpt2-small",
3
+ "hook_point": "blocks.1.hook_resid_pre",
4
+ "hook_point_layer": 1,
5
+ "hook_point_head_index": null,
6
+ "dataset_path": "Skylion007/openwebtext",
7
+ "is_dataset_tokenized": false,
8
+ "context_size": 128,
9
+ "use_cached_activations": false,
10
+ "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.1.hook_resid_pre",
11
+ "d_in": 768,
12
+ "n_batches_in_buffer": 128,
13
+ "total_training_tokens": 300000000,
14
+ "store_batch_size": 32,
15
+ "seed": 42,
16
+ "b_dec_init_method": "geometric_median",
17
+ "expansion_factor": 32,
18
+ "from_pretrained_path": null,
19
+ "d_sae": 24576,
20
+ "l1_coefficient": 8e-05,
21
+ "lp_norm": 1,
22
+ "lr": 0.0004,
23
+ "lr_scheduler_name": null,
24
+ "lr_warm_up_steps": 5000,
25
+ "train_batch_size": 4096,
26
+ "use_ghost_grads": true,
27
+ "feature_sampling_window": 1000,
28
+ "dead_feature_window": 5000,
29
+ "dead_feature_threshold": 1e-08,
30
+ "log_to_wandb": true,
31
+ "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
32
+ "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
33
+ "wandb_entity": null,
34
+ "wandb_log_frequency": 100,
35
+ "n_checkpoints": 10,
36
+ "checkpoint_path": "checkpoints/mm179kd2"
37
+ }
final_sparse_autoencoder_gpt2-small_blocks.1.hook_resid_pre_24576/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f604d4db127855c9b879a7d369435ba2295aaaba445d12803c602fd1901f351
3
+ size 151098176
final_sparse_autoencoder_gpt2-small_blocks.10.hook_resid_pre_24576/cfg.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gpt2-small",
3
+ "hook_point": "blocks.10.hook_resid_pre",
4
+ "hook_point_layer": 10,
5
+ "hook_point_head_index": null,
6
+ "dataset_path": "Skylion007/openwebtext",
7
+ "is_dataset_tokenized": false,
8
+ "context_size": 128,
9
+ "use_cached_activations": false,
10
+ "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.10.hook_resid_pre",
11
+ "d_in": 768,
12
+ "n_batches_in_buffer": 128,
13
+ "total_training_tokens": 300000000,
14
+ "store_batch_size": 32,
15
+ "seed": 42,
16
+ "b_dec_init_method": "geometric_median",
17
+ "expansion_factor": 32,
18
+ "from_pretrained_path": null,
19
+ "d_sae": 24576,
20
+ "l1_coefficient": 8e-05,
21
+ "lp_norm": 1,
22
+ "lr": 0.0004,
23
+ "lr_scheduler_name": null,
24
+ "lr_warm_up_steps": 5000,
25
+ "train_batch_size": 4096,
26
+ "use_ghost_grads": true,
27
+ "feature_sampling_window": 1000,
28
+ "dead_feature_window": 5000,
29
+ "dead_feature_threshold": 1e-08,
30
+ "log_to_wandb": true,
31
+ "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
32
+ "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
33
+ "wandb_entity": null,
34
+ "wandb_log_frequency": 100,
35
+ "n_checkpoints": 10,
36
+ "checkpoint_path": "checkpoints/9vu4ulem"
37
+ }
final_sparse_autoencoder_gpt2-small_blocks.10.hook_resid_pre_24576/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:012f006deef081ab33ad36bbe1e5dfef7a2353d33e412767463145dacc27aa51
3
+ size 151098176
final_sparse_autoencoder_gpt2-small_blocks.11.hook_resid_pre_24576/cfg.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gpt2-small",
3
+ "hook_point": "blocks.11.hook_resid_pre",
4
+ "hook_point_layer": 11,
5
+ "hook_point_head_index": null,
6
+ "dataset_path": "Skylion007/openwebtext",
7
+ "is_dataset_tokenized": false,
8
+ "context_size": 128,
9
+ "use_cached_activations": false,
10
+ "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.11.hook_resid_pre",
11
+ "d_in": 768,
12
+ "n_batches_in_buffer": 128,
13
+ "total_training_tokens": 300000000,
14
+ "store_batch_size": 32,
15
+ "seed": 42,
16
+ "b_dec_init_method": "geometric_median",
17
+ "expansion_factor": 32,
18
+ "from_pretrained_path": null,
19
+ "d_sae": 24576,
20
+ "l1_coefficient": 8e-05,
21
+ "lp_norm": 1,
22
+ "lr": 0.0004,
23
+ "lr_scheduler_name": null,
24
+ "lr_warm_up_steps": 5000,
25
+ "train_batch_size": 4096,
26
+ "use_ghost_grads": true,
27
+ "feature_sampling_window": 1000,
28
+ "dead_feature_window": 5000,
29
+ "dead_feature_threshold": 1e-08,
30
+ "log_to_wandb": true,
31
+ "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
32
+ "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
33
+ "wandb_entity": null,
34
+ "wandb_log_frequency": 100,
35
+ "n_checkpoints": 10,
36
+ "checkpoint_path": "checkpoints/gf296egd"
37
+ }
final_sparse_autoencoder_gpt2-small_blocks.11.hook_resid_pre_24576/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5953fa562360a790e05214544a70752331ae832e68750b65c676efc4a84853f
3
+ size 151098176
final_sparse_autoencoder_gpt2-small_blocks.2.hook_resid_pre_24576/cfg.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gpt2-small",
3
+ "hook_point": "blocks.2.hook_resid_pre",
4
+ "hook_point_layer": 2,
5
+ "hook_point_head_index": null,
6
+ "dataset_path": "Skylion007/openwebtext",
7
+ "is_dataset_tokenized": false,
8
+ "context_size": 128,
9
+ "use_cached_activations": false,
10
+ "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.2.hook_resid_pre",
11
+ "d_in": 768,
12
+ "n_batches_in_buffer": 128,
13
+ "total_training_tokens": 300000000,
14
+ "store_batch_size": 32,
15
+ "seed": 42,
16
+ "b_dec_init_method": "geometric_median",
17
+ "expansion_factor": 32,
18
+ "from_pretrained_path": null,
19
+ "d_sae": 24576,
20
+ "l1_coefficient": 8e-05,
21
+ "lp_norm": 1,
22
+ "lr": 0.0004,
23
+ "lr_scheduler_name": null,
24
+ "lr_warm_up_steps": 5000,
25
+ "train_batch_size": 4096,
26
+ "use_ghost_grads": true,
27
+ "feature_sampling_window": 1000,
28
+ "dead_feature_window": 5000,
29
+ "dead_feature_threshold": 1e-08,
30
+ "log_to_wandb": true,
31
+ "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
32
+ "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
33
+ "wandb_entity": null,
34
+ "wandb_log_frequency": 100,
35
+ "n_checkpoints": 10,
36
+ "checkpoint_path": "checkpoints/59dzvtdt"
37
+ }
final_sparse_autoencoder_gpt2-small_blocks.2.hook_resid_pre_24576/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:780da3a17f33bb2f6bd82ce1b7802e8c2ab6e41369746dc94915c11b0f1274e5
3
+ size 151098176
final_sparse_autoencoder_gpt2-small_blocks.3.hook_resid_pre_24576/cfg.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gpt2-small",
3
+ "hook_point": "blocks.3.hook_resid_pre",
4
+ "hook_point_layer": 3,
5
+ "hook_point_head_index": null,
6
+ "dataset_path": "Skylion007/openwebtext",
7
+ "is_dataset_tokenized": false,
8
+ "context_size": 128,
9
+ "use_cached_activations": false,
10
+ "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.3.hook_resid_pre",
11
+ "d_in": 768,
12
+ "n_batches_in_buffer": 128,
13
+ "total_training_tokens": 300000000,
14
+ "store_batch_size": 32,
15
+ "seed": 42,
16
+ "b_dec_init_method": "geometric_median",
17
+ "expansion_factor": 32,
18
+ "from_pretrained_path": null,
19
+ "d_sae": 24576,
20
+ "l1_coefficient": 8e-05,
21
+ "lp_norm": 1,
22
+ "lr": 0.0004,
23
+ "lr_scheduler_name": null,
24
+ "lr_warm_up_steps": 5000,
25
+ "train_batch_size": 4096,
26
+ "use_ghost_grads": true,
27
+ "feature_sampling_window": 1000,
28
+ "dead_feature_window": 5000,
29
+ "dead_feature_threshold": 1e-08,
30
+ "log_to_wandb": true,
31
+ "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
32
+ "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
33
+ "wandb_entity": null,
34
+ "wandb_log_frequency": 100,
35
+ "n_checkpoints": 10,
36
+ "checkpoint_path": "checkpoints/0sgl1gqz"
37
+ }
final_sparse_autoencoder_gpt2-small_blocks.3.hook_resid_pre_24576/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0d6126e8799d90e9f77dcd0e37b8e379b3ef5aa6e1c656e2abfd6bcd137160d
3
+ size 151098176
final_sparse_autoencoder_gpt2-small_blocks.4.hook_resid_pre_24576/cfg.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gpt2-small",
3
+ "hook_point": "blocks.4.hook_resid_pre",
4
+ "hook_point_layer": 4,
5
+ "hook_point_head_index": null,
6
+ "dataset_path": "Skylion007/openwebtext",
7
+ "is_dataset_tokenized": false,
8
+ "context_size": 128,
9
+ "use_cached_activations": false,
10
+ "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.4.hook_resid_pre",
11
+ "d_in": 768,
12
+ "n_batches_in_buffer": 128,
13
+ "total_training_tokens": 300000000,
14
+ "store_batch_size": 32,
15
+ "seed": 42,
16
+ "b_dec_init_method": "geometric_median",
17
+ "expansion_factor": 32,
18
+ "from_pretrained_path": null,
19
+ "d_sae": 24576,
20
+ "l1_coefficient": 8e-05,
21
+ "lp_norm": 1,
22
+ "lr": 0.0004,
23
+ "lr_scheduler_name": null,
24
+ "lr_warm_up_steps": 5000,
25
+ "train_batch_size": 4096,
26
+ "use_ghost_grads": true,
27
+ "feature_sampling_window": 1000,
28
+ "dead_feature_window": 5000,
29
+ "dead_feature_threshold": 1e-08,
30
+ "log_to_wandb": true,
31
+ "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
32
+ "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
33
+ "wandb_entity": null,
34
+ "wandb_log_frequency": 100,
35
+ "n_checkpoints": 10,
36
+ "checkpoint_path": "checkpoints/7rzeo1iv"
37
+ }
final_sparse_autoencoder_gpt2-small_blocks.4.hook_resid_pre_24576/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:213adff21b849e290a2baa229ced82e352ec1bb122c36d30295c84b76bc27ad5
3
+ size 151098176
final_sparse_autoencoder_gpt2-small_blocks.5.hook_resid_pre_24576/cfg.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gpt2-small",
3
+ "hook_point": "blocks.5.hook_resid_pre",
4
+ "hook_point_layer": 5,
5
+ "hook_point_head_index": null,
6
+ "dataset_path": "Skylion007/openwebtext",
7
+ "is_dataset_tokenized": false,
8
+ "context_size": 128,
9
+ "use_cached_activations": false,
10
+ "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.5.hook_resid_pre",
11
+ "d_in": 768,
12
+ "n_batches_in_buffer": 128,
13
+ "total_training_tokens": 300000000,
14
+ "store_batch_size": 32,
15
+ "seed": 42,
16
+ "b_dec_init_method": "geometric_median",
17
+ "expansion_factor": 32,
18
+ "from_pretrained_path": null,
19
+ "d_sae": 24576,
20
+ "l1_coefficient": 8e-05,
21
+ "lp_norm": 1,
22
+ "lr": 0.0004,
23
+ "lr_scheduler_name": null,
24
+ "lr_warm_up_steps": 5000,
25
+ "train_batch_size": 4096,
26
+ "use_ghost_grads": true,
27
+ "feature_sampling_window": 1000,
28
+ "dead_feature_window": 5000,
29
+ "dead_feature_threshold": 1e-08,
30
+ "log_to_wandb": true,
31
+ "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
32
+ "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
33
+ "wandb_entity": null,
34
+ "wandb_log_frequency": 100,
35
+ "n_checkpoints": 10,
36
+ "checkpoint_path": "checkpoints/65ufbyeo"
37
+ }
final_sparse_autoencoder_gpt2-small_blocks.5.hook_resid_pre_24576/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebae89dc4890c9d26411247355b933bce207c115a4ade7b5e11b8177060dbe4f
3
+ size 151098176
final_sparse_autoencoder_gpt2-small_blocks.6.hook_resid_pre_24576/cfg.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gpt2-small",
3
+ "hook_point": "blocks.6.hook_resid_pre",
4
+ "hook_point_layer": 6,
5
+ "hook_point_head_index": null,
6
+ "dataset_path": "Skylion007/openwebtext",
7
+ "is_dataset_tokenized": false,
8
+ "context_size": 128,
9
+ "use_cached_activations": false,
10
+ "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.6.hook_resid_pre",
11
+ "d_in": 768,
12
+ "n_batches_in_buffer": 128,
13
+ "total_training_tokens": 300000000,
14
+ "store_batch_size": 32,
15
+ "seed": 42,
16
+ "b_dec_init_method": "geometric_median",
17
+ "expansion_factor": 32,
18
+ "from_pretrained_path": null,
19
+ "d_sae": 24576,
20
+ "l1_coefficient": 8e-05,
21
+ "lp_norm": 1,
22
+ "lr": 0.0004,
23
+ "lr_scheduler_name": null,
24
+ "lr_warm_up_steps": 5000,
25
+ "train_batch_size": 4096,
26
+ "use_ghost_grads": true,
27
+ "feature_sampling_window": 1000,
28
+ "dead_feature_window": 5000,
29
+ "dead_feature_threshold": 1e-08,
30
+ "log_to_wandb": true,
31
+ "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
32
+ "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
33
+ "wandb_entity": null,
34
+ "wandb_log_frequency": 100,
35
+ "n_checkpoints": 10,
36
+ "checkpoint_path": "checkpoints/vjvlfpxa"
37
+ }
final_sparse_autoencoder_gpt2-small_blocks.6.hook_resid_pre_24576/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4af77d18a0d13065cb41a953e222b150804bc84b71786d374248fe8c4f4263f4
3
+ size 151098176
final_sparse_autoencoder_gpt2-small_blocks.7.hook_resid_pre_24576/cfg.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gpt2-small",
3
+ "hook_point": "blocks.7.hook_resid_pre",
4
+ "hook_point_layer": 7,
5
+ "hook_point_head_index": null,
6
+ "dataset_path": "Skylion007/openwebtext",
7
+ "is_dataset_tokenized": false,
8
+ "context_size": 128,
9
+ "use_cached_activations": false,
10
+ "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.7.hook_resid_pre",
11
+ "d_in": 768,
12
+ "n_batches_in_buffer": 128,
13
+ "total_training_tokens": 300000000,
14
+ "store_batch_size": 32,
15
+ "seed": 42,
16
+ "b_dec_init_method": "geometric_median",
17
+ "expansion_factor": 32,
18
+ "from_pretrained_path": null,
19
+ "d_sae": 24576,
20
+ "l1_coefficient": 8e-05,
21
+ "lp_norm": 1,
22
+ "lr": 0.0004,
23
+ "lr_scheduler_name": null,
24
+ "lr_warm_up_steps": 5000,
25
+ "train_batch_size": 4096,
26
+ "use_ghost_grads": true,
27
+ "feature_sampling_window": 1000,
28
+ "dead_feature_window": 5000,
29
+ "dead_feature_threshold": 1e-08,
30
+ "log_to_wandb": true,
31
+ "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
32
+ "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
33
+ "wandb_entity": null,
34
+ "wandb_log_frequency": 100,
35
+ "n_checkpoints": 10,
36
+ "checkpoint_path": "checkpoints/n6gbpj7s"
37
+ }
final_sparse_autoencoder_gpt2-small_blocks.7.hook_resid_pre_24576/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc9b9cd6cc5da7c5c332a9657b678f63e88fefb0966f612201dbc06ff8d096b2
3
+ size 151098176
final_sparse_autoencoder_gpt2-small_blocks.8.hook_resid_pre_24576/cfg.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gpt2-small",
3
+ "hook_point": "blocks.8.hook_resid_pre",
4
+ "hook_point_layer": 8,
5
+ "hook_point_head_index": null,
6
+ "dataset_path": "Skylion007/openwebtext",
7
+ "is_dataset_tokenized": false,
8
+ "context_size": 128,
9
+ "use_cached_activations": false,
10
+ "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.8.hook_resid_pre",
11
+ "d_in": 768,
12
+ "n_batches_in_buffer": 128,
13
+ "total_training_tokens": 300000000,
14
+ "store_batch_size": 32,
15
+ "seed": 42,
16
+ "b_dec_init_method": "geometric_median",
17
+ "expansion_factor": 32,
18
+ "from_pretrained_path": null,
19
+ "d_sae": 24576,
20
+ "l1_coefficient": 8e-05,
21
+ "lp_norm": 1,
22
+ "lr": 0.0004,
23
+ "lr_scheduler_name": null,
24
+ "lr_warm_up_steps": 5000,
25
+ "train_batch_size": 4096,
26
+ "use_ghost_grads": true,
27
+ "feature_sampling_window": 1000,
28
+ "dead_feature_window": 5000,
29
+ "dead_feature_threshold": 1e-08,
30
+ "log_to_wandb": true,
31
+ "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
32
+ "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
33
+ "wandb_entity": null,
34
+ "wandb_log_frequency": 100,
35
+ "n_checkpoints": 10,
36
+ "checkpoint_path": "checkpoints/ut7lhl4q"
37
+ }
final_sparse_autoencoder_gpt2-small_blocks.8.hook_resid_pre_24576/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd0c772df44ce5885ad0ec48313cf1f8edc6b0e13533d8898705c47678c62e06
3
+ size 151098176
final_sparse_autoencoder_gpt2-small_blocks.9.hook_resid_pre_24576/cfg.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gpt2-small",
3
+ "hook_point": "blocks.9.hook_resid_pre",
4
+ "hook_point_layer": 9,
5
+ "hook_point_head_index": null,
6
+ "dataset_path": "Skylion007/openwebtext",
7
+ "is_dataset_tokenized": false,
8
+ "context_size": 128,
9
+ "use_cached_activations": false,
10
+ "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.9.hook_resid_pre",
11
+ "d_in": 768,
12
+ "n_batches_in_buffer": 128,
13
+ "total_training_tokens": 300000000,
14
+ "store_batch_size": 32,
15
+ "seed": 42,
16
+ "b_dec_init_method": "geometric_median",
17
+ "expansion_factor": 32,
18
+ "from_pretrained_path": null,
19
+ "d_sae": 24576,
20
+ "l1_coefficient": 8e-05,
21
+ "lp_norm": 1,
22
+ "lr": 0.0004,
23
+ "lr_scheduler_name": null,
24
+ "lr_warm_up_steps": 5000,
25
+ "train_batch_size": 4096,
26
+ "use_ghost_grads": true,
27
+ "feature_sampling_window": 1000,
28
+ "dead_feature_window": 5000,
29
+ "dead_feature_threshold": 1e-08,
30
+ "log_to_wandb": true,
31
+ "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
32
+ "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
33
+ "wandb_entity": null,
34
+ "wandb_log_frequency": 100,
35
+ "n_checkpoints": 10,
36
+ "checkpoint_path": "checkpoints/wg1xo7vo"
37
+ }
final_sparse_autoencoder_gpt2-small_blocks.9.hook_resid_pre_24576/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f08b5d7483364e3b2bc2b8aa82968ddaa10007756aa2a906a2b91a6c01f5d9a
3
+ size 151098176