Upload folder using huggingface_hub
Browse files- final_sparse_autoencoder_gpt2-small_blocks.0.hook_resid_pre_24576/cfg.json +37 -0
- final_sparse_autoencoder_gpt2-small_blocks.0.hook_resid_pre_24576/model.pt +3 -0
- final_sparse_autoencoder_gpt2-small_blocks.1.hook_resid_pre_24576/cfg.json +37 -0
- final_sparse_autoencoder_gpt2-small_blocks.1.hook_resid_pre_24576/model.pt +3 -0
- final_sparse_autoencoder_gpt2-small_blocks.10.hook_resid_pre_24576/cfg.json +37 -0
- final_sparse_autoencoder_gpt2-small_blocks.10.hook_resid_pre_24576/model.pt +3 -0
- final_sparse_autoencoder_gpt2-small_blocks.11.hook_resid_pre_24576/cfg.json +37 -0
- final_sparse_autoencoder_gpt2-small_blocks.11.hook_resid_pre_24576/model.pt +3 -0
- final_sparse_autoencoder_gpt2-small_blocks.2.hook_resid_pre_24576/cfg.json +37 -0
- final_sparse_autoencoder_gpt2-small_blocks.2.hook_resid_pre_24576/model.pt +3 -0
- final_sparse_autoencoder_gpt2-small_blocks.3.hook_resid_pre_24576/cfg.json +37 -0
- final_sparse_autoencoder_gpt2-small_blocks.3.hook_resid_pre_24576/model.pt +3 -0
- final_sparse_autoencoder_gpt2-small_blocks.4.hook_resid_pre_24576/cfg.json +37 -0
- final_sparse_autoencoder_gpt2-small_blocks.4.hook_resid_pre_24576/model.pt +3 -0
- final_sparse_autoencoder_gpt2-small_blocks.5.hook_resid_pre_24576/cfg.json +37 -0
- final_sparse_autoencoder_gpt2-small_blocks.5.hook_resid_pre_24576/model.pt +3 -0
- final_sparse_autoencoder_gpt2-small_blocks.6.hook_resid_pre_24576/cfg.json +37 -0
- final_sparse_autoencoder_gpt2-small_blocks.6.hook_resid_pre_24576/model.pt +3 -0
- final_sparse_autoencoder_gpt2-small_blocks.7.hook_resid_pre_24576/cfg.json +37 -0
- final_sparse_autoencoder_gpt2-small_blocks.7.hook_resid_pre_24576/model.pt +3 -0
- final_sparse_autoencoder_gpt2-small_blocks.8.hook_resid_pre_24576/cfg.json +37 -0
- final_sparse_autoencoder_gpt2-small_blocks.8.hook_resid_pre_24576/model.pt +3 -0
- final_sparse_autoencoder_gpt2-small_blocks.9.hook_resid_pre_24576/cfg.json +37 -0
- final_sparse_autoencoder_gpt2-small_blocks.9.hook_resid_pre_24576/model.pt +3 -0
final_sparse_autoencoder_gpt2-small_blocks.0.hook_resid_pre_24576/cfg.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_name": "gpt2-small",
|
3 |
+
"hook_point": "blocks.0.hook_resid_pre",
|
4 |
+
"hook_point_layer": 0,
|
5 |
+
"hook_point_head_index": null,
|
6 |
+
"dataset_path": "Skylion007/openwebtext",
|
7 |
+
"is_dataset_tokenized": false,
|
8 |
+
"context_size": 128,
|
9 |
+
"use_cached_activations": false,
|
10 |
+
"cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.0.hook_resid_pre",
|
11 |
+
"d_in": 768,
|
12 |
+
"n_batches_in_buffer": 128,
|
13 |
+
"total_training_tokens": 300000000,
|
14 |
+
"store_batch_size": 32,
|
15 |
+
"seed": 42,
|
16 |
+
"b_dec_init_method": "geometric_median",
|
17 |
+
"expansion_factor": 32,
|
18 |
+
"from_pretrained_path": null,
|
19 |
+
"d_sae": 24576,
|
20 |
+
"l1_coefficient": 8e-05,
|
21 |
+
"lp_norm": 1,
|
22 |
+
"lr": 0.0004,
|
23 |
+
"lr_scheduler_name": null,
|
24 |
+
"lr_warm_up_steps": 5000,
|
25 |
+
"train_batch_size": 4096,
|
26 |
+
"use_ghost_grads": true,
|
27 |
+
"feature_sampling_window": 1000,
|
28 |
+
"dead_feature_window": 5000,
|
29 |
+
"dead_feature_threshold": 1e-08,
|
30 |
+
"log_to_wandb": true,
|
31 |
+
"wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
|
32 |
+
"run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
|
33 |
+
"wandb_entity": null,
|
34 |
+
"wandb_log_frequency": 100,
|
35 |
+
"n_checkpoints": 10,
|
36 |
+
"checkpoint_path": "checkpoints/y1t51byy"
|
37 |
+
}
|
final_sparse_autoencoder_gpt2-small_blocks.0.hook_resid_pre_24576/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae7d93e5df9a295de6381f059ac7d66361df359a9488c5b34c0d823c23a2138e
|
3 |
+
size 151098176
|
final_sparse_autoencoder_gpt2-small_blocks.1.hook_resid_pre_24576/cfg.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_name": "gpt2-small",
|
3 |
+
"hook_point": "blocks.1.hook_resid_pre",
|
4 |
+
"hook_point_layer": 1,
|
5 |
+
"hook_point_head_index": null,
|
6 |
+
"dataset_path": "Skylion007/openwebtext",
|
7 |
+
"is_dataset_tokenized": false,
|
8 |
+
"context_size": 128,
|
9 |
+
"use_cached_activations": false,
|
10 |
+
"cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.1.hook_resid_pre",
|
11 |
+
"d_in": 768,
|
12 |
+
"n_batches_in_buffer": 128,
|
13 |
+
"total_training_tokens": 300000000,
|
14 |
+
"store_batch_size": 32,
|
15 |
+
"seed": 42,
|
16 |
+
"b_dec_init_method": "geometric_median",
|
17 |
+
"expansion_factor": 32,
|
18 |
+
"from_pretrained_path": null,
|
19 |
+
"d_sae": 24576,
|
20 |
+
"l1_coefficient": 8e-05,
|
21 |
+
"lp_norm": 1,
|
22 |
+
"lr": 0.0004,
|
23 |
+
"lr_scheduler_name": null,
|
24 |
+
"lr_warm_up_steps": 5000,
|
25 |
+
"train_batch_size": 4096,
|
26 |
+
"use_ghost_grads": true,
|
27 |
+
"feature_sampling_window": 1000,
|
28 |
+
"dead_feature_window": 5000,
|
29 |
+
"dead_feature_threshold": 1e-08,
|
30 |
+
"log_to_wandb": true,
|
31 |
+
"wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
|
32 |
+
"run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
|
33 |
+
"wandb_entity": null,
|
34 |
+
"wandb_log_frequency": 100,
|
35 |
+
"n_checkpoints": 10,
|
36 |
+
"checkpoint_path": "checkpoints/mm179kd2"
|
37 |
+
}
|
final_sparse_autoencoder_gpt2-small_blocks.1.hook_resid_pre_24576/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f604d4db127855c9b879a7d369435ba2295aaaba445d12803c602fd1901f351
|
3 |
+
size 151098176
|
final_sparse_autoencoder_gpt2-small_blocks.10.hook_resid_pre_24576/cfg.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_name": "gpt2-small",
|
3 |
+
"hook_point": "blocks.10.hook_resid_pre",
|
4 |
+
"hook_point_layer": 10,
|
5 |
+
"hook_point_head_index": null,
|
6 |
+
"dataset_path": "Skylion007/openwebtext",
|
7 |
+
"is_dataset_tokenized": false,
|
8 |
+
"context_size": 128,
|
9 |
+
"use_cached_activations": false,
|
10 |
+
"cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.10.hook_resid_pre",
|
11 |
+
"d_in": 768,
|
12 |
+
"n_batches_in_buffer": 128,
|
13 |
+
"total_training_tokens": 300000000,
|
14 |
+
"store_batch_size": 32,
|
15 |
+
"seed": 42,
|
16 |
+
"b_dec_init_method": "geometric_median",
|
17 |
+
"expansion_factor": 32,
|
18 |
+
"from_pretrained_path": null,
|
19 |
+
"d_sae": 24576,
|
20 |
+
"l1_coefficient": 8e-05,
|
21 |
+
"lp_norm": 1,
|
22 |
+
"lr": 0.0004,
|
23 |
+
"lr_scheduler_name": null,
|
24 |
+
"lr_warm_up_steps": 5000,
|
25 |
+
"train_batch_size": 4096,
|
26 |
+
"use_ghost_grads": true,
|
27 |
+
"feature_sampling_window": 1000,
|
28 |
+
"dead_feature_window": 5000,
|
29 |
+
"dead_feature_threshold": 1e-08,
|
30 |
+
"log_to_wandb": true,
|
31 |
+
"wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
|
32 |
+
"run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
|
33 |
+
"wandb_entity": null,
|
34 |
+
"wandb_log_frequency": 100,
|
35 |
+
"n_checkpoints": 10,
|
36 |
+
"checkpoint_path": "checkpoints/9vu4ulem"
|
37 |
+
}
|
final_sparse_autoencoder_gpt2-small_blocks.10.hook_resid_pre_24576/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:012f006deef081ab33ad36bbe1e5dfef7a2353d33e412767463145dacc27aa51
|
3 |
+
size 151098176
|
final_sparse_autoencoder_gpt2-small_blocks.11.hook_resid_pre_24576/cfg.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_name": "gpt2-small",
|
3 |
+
"hook_point": "blocks.11.hook_resid_pre",
|
4 |
+
"hook_point_layer": 11,
|
5 |
+
"hook_point_head_index": null,
|
6 |
+
"dataset_path": "Skylion007/openwebtext",
|
7 |
+
"is_dataset_tokenized": false,
|
8 |
+
"context_size": 128,
|
9 |
+
"use_cached_activations": false,
|
10 |
+
"cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.11.hook_resid_pre",
|
11 |
+
"d_in": 768,
|
12 |
+
"n_batches_in_buffer": 128,
|
13 |
+
"total_training_tokens": 300000000,
|
14 |
+
"store_batch_size": 32,
|
15 |
+
"seed": 42,
|
16 |
+
"b_dec_init_method": "geometric_median",
|
17 |
+
"expansion_factor": 32,
|
18 |
+
"from_pretrained_path": null,
|
19 |
+
"d_sae": 24576,
|
20 |
+
"l1_coefficient": 8e-05,
|
21 |
+
"lp_norm": 1,
|
22 |
+
"lr": 0.0004,
|
23 |
+
"lr_scheduler_name": null,
|
24 |
+
"lr_warm_up_steps": 5000,
|
25 |
+
"train_batch_size": 4096,
|
26 |
+
"use_ghost_grads": true,
|
27 |
+
"feature_sampling_window": 1000,
|
28 |
+
"dead_feature_window": 5000,
|
29 |
+
"dead_feature_threshold": 1e-08,
|
30 |
+
"log_to_wandb": true,
|
31 |
+
"wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
|
32 |
+
"run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
|
33 |
+
"wandb_entity": null,
|
34 |
+
"wandb_log_frequency": 100,
|
35 |
+
"n_checkpoints": 10,
|
36 |
+
"checkpoint_path": "checkpoints/gf296egd"
|
37 |
+
}
|
final_sparse_autoencoder_gpt2-small_blocks.11.hook_resid_pre_24576/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5953fa562360a790e05214544a70752331ae832e68750b65c676efc4a84853f
|
3 |
+
size 151098176
|
final_sparse_autoencoder_gpt2-small_blocks.2.hook_resid_pre_24576/cfg.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_name": "gpt2-small",
|
3 |
+
"hook_point": "blocks.2.hook_resid_pre",
|
4 |
+
"hook_point_layer": 2,
|
5 |
+
"hook_point_head_index": null,
|
6 |
+
"dataset_path": "Skylion007/openwebtext",
|
7 |
+
"is_dataset_tokenized": false,
|
8 |
+
"context_size": 128,
|
9 |
+
"use_cached_activations": false,
|
10 |
+
"cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.2.hook_resid_pre",
|
11 |
+
"d_in": 768,
|
12 |
+
"n_batches_in_buffer": 128,
|
13 |
+
"total_training_tokens": 300000000,
|
14 |
+
"store_batch_size": 32,
|
15 |
+
"seed": 42,
|
16 |
+
"b_dec_init_method": "geometric_median",
|
17 |
+
"expansion_factor": 32,
|
18 |
+
"from_pretrained_path": null,
|
19 |
+
"d_sae": 24576,
|
20 |
+
"l1_coefficient": 8e-05,
|
21 |
+
"lp_norm": 1,
|
22 |
+
"lr": 0.0004,
|
23 |
+
"lr_scheduler_name": null,
|
24 |
+
"lr_warm_up_steps": 5000,
|
25 |
+
"train_batch_size": 4096,
|
26 |
+
"use_ghost_grads": true,
|
27 |
+
"feature_sampling_window": 1000,
|
28 |
+
"dead_feature_window": 5000,
|
29 |
+
"dead_feature_threshold": 1e-08,
|
30 |
+
"log_to_wandb": true,
|
31 |
+
"wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
|
32 |
+
"run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
|
33 |
+
"wandb_entity": null,
|
34 |
+
"wandb_log_frequency": 100,
|
35 |
+
"n_checkpoints": 10,
|
36 |
+
"checkpoint_path": "checkpoints/59dzvtdt"
|
37 |
+
}
|
final_sparse_autoencoder_gpt2-small_blocks.2.hook_resid_pre_24576/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:780da3a17f33bb2f6bd82ce1b7802e8c2ab6e41369746dc94915c11b0f1274e5
|
3 |
+
size 151098176
|
final_sparse_autoencoder_gpt2-small_blocks.3.hook_resid_pre_24576/cfg.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_name": "gpt2-small",
|
3 |
+
"hook_point": "blocks.3.hook_resid_pre",
|
4 |
+
"hook_point_layer": 3,
|
5 |
+
"hook_point_head_index": null,
|
6 |
+
"dataset_path": "Skylion007/openwebtext",
|
7 |
+
"is_dataset_tokenized": false,
|
8 |
+
"context_size": 128,
|
9 |
+
"use_cached_activations": false,
|
10 |
+
"cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.3.hook_resid_pre",
|
11 |
+
"d_in": 768,
|
12 |
+
"n_batches_in_buffer": 128,
|
13 |
+
"total_training_tokens": 300000000,
|
14 |
+
"store_batch_size": 32,
|
15 |
+
"seed": 42,
|
16 |
+
"b_dec_init_method": "geometric_median",
|
17 |
+
"expansion_factor": 32,
|
18 |
+
"from_pretrained_path": null,
|
19 |
+
"d_sae": 24576,
|
20 |
+
"l1_coefficient": 8e-05,
|
21 |
+
"lp_norm": 1,
|
22 |
+
"lr": 0.0004,
|
23 |
+
"lr_scheduler_name": null,
|
24 |
+
"lr_warm_up_steps": 5000,
|
25 |
+
"train_batch_size": 4096,
|
26 |
+
"use_ghost_grads": true,
|
27 |
+
"feature_sampling_window": 1000,
|
28 |
+
"dead_feature_window": 5000,
|
29 |
+
"dead_feature_threshold": 1e-08,
|
30 |
+
"log_to_wandb": true,
|
31 |
+
"wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
|
32 |
+
"run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
|
33 |
+
"wandb_entity": null,
|
34 |
+
"wandb_log_frequency": 100,
|
35 |
+
"n_checkpoints": 10,
|
36 |
+
"checkpoint_path": "checkpoints/0sgl1gqz"
|
37 |
+
}
|
final_sparse_autoencoder_gpt2-small_blocks.3.hook_resid_pre_24576/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0d6126e8799d90e9f77dcd0e37b8e379b3ef5aa6e1c656e2abfd6bcd137160d
|
3 |
+
size 151098176
|
final_sparse_autoencoder_gpt2-small_blocks.4.hook_resid_pre_24576/cfg.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_name": "gpt2-small",
|
3 |
+
"hook_point": "blocks.4.hook_resid_pre",
|
4 |
+
"hook_point_layer": 4,
|
5 |
+
"hook_point_head_index": null,
|
6 |
+
"dataset_path": "Skylion007/openwebtext",
|
7 |
+
"is_dataset_tokenized": false,
|
8 |
+
"context_size": 128,
|
9 |
+
"use_cached_activations": false,
|
10 |
+
"cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.4.hook_resid_pre",
|
11 |
+
"d_in": 768,
|
12 |
+
"n_batches_in_buffer": 128,
|
13 |
+
"total_training_tokens": 300000000,
|
14 |
+
"store_batch_size": 32,
|
15 |
+
"seed": 42,
|
16 |
+
"b_dec_init_method": "geometric_median",
|
17 |
+
"expansion_factor": 32,
|
18 |
+
"from_pretrained_path": null,
|
19 |
+
"d_sae": 24576,
|
20 |
+
"l1_coefficient": 8e-05,
|
21 |
+
"lp_norm": 1,
|
22 |
+
"lr": 0.0004,
|
23 |
+
"lr_scheduler_name": null,
|
24 |
+
"lr_warm_up_steps": 5000,
|
25 |
+
"train_batch_size": 4096,
|
26 |
+
"use_ghost_grads": true,
|
27 |
+
"feature_sampling_window": 1000,
|
28 |
+
"dead_feature_window": 5000,
|
29 |
+
"dead_feature_threshold": 1e-08,
|
30 |
+
"log_to_wandb": true,
|
31 |
+
"wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
|
32 |
+
"run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
|
33 |
+
"wandb_entity": null,
|
34 |
+
"wandb_log_frequency": 100,
|
35 |
+
"n_checkpoints": 10,
|
36 |
+
"checkpoint_path": "checkpoints/7rzeo1iv"
|
37 |
+
}
|
final_sparse_autoencoder_gpt2-small_blocks.4.hook_resid_pre_24576/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:213adff21b849e290a2baa229ced82e352ec1bb122c36d30295c84b76bc27ad5
|
3 |
+
size 151098176
|
final_sparse_autoencoder_gpt2-small_blocks.5.hook_resid_pre_24576/cfg.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_name": "gpt2-small",
|
3 |
+
"hook_point": "blocks.5.hook_resid_pre",
|
4 |
+
"hook_point_layer": 5,
|
5 |
+
"hook_point_head_index": null,
|
6 |
+
"dataset_path": "Skylion007/openwebtext",
|
7 |
+
"is_dataset_tokenized": false,
|
8 |
+
"context_size": 128,
|
9 |
+
"use_cached_activations": false,
|
10 |
+
"cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.5.hook_resid_pre",
|
11 |
+
"d_in": 768,
|
12 |
+
"n_batches_in_buffer": 128,
|
13 |
+
"total_training_tokens": 300000000,
|
14 |
+
"store_batch_size": 32,
|
15 |
+
"seed": 42,
|
16 |
+
"b_dec_init_method": "geometric_median",
|
17 |
+
"expansion_factor": 32,
|
18 |
+
"from_pretrained_path": null,
|
19 |
+
"d_sae": 24576,
|
20 |
+
"l1_coefficient": 8e-05,
|
21 |
+
"lp_norm": 1,
|
22 |
+
"lr": 0.0004,
|
23 |
+
"lr_scheduler_name": null,
|
24 |
+
"lr_warm_up_steps": 5000,
|
25 |
+
"train_batch_size": 4096,
|
26 |
+
"use_ghost_grads": true,
|
27 |
+
"feature_sampling_window": 1000,
|
28 |
+
"dead_feature_window": 5000,
|
29 |
+
"dead_feature_threshold": 1e-08,
|
30 |
+
"log_to_wandb": true,
|
31 |
+
"wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
|
32 |
+
"run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
|
33 |
+
"wandb_entity": null,
|
34 |
+
"wandb_log_frequency": 100,
|
35 |
+
"n_checkpoints": 10,
|
36 |
+
"checkpoint_path": "checkpoints/65ufbyeo"
|
37 |
+
}
|
final_sparse_autoencoder_gpt2-small_blocks.5.hook_resid_pre_24576/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebae89dc4890c9d26411247355b933bce207c115a4ade7b5e11b8177060dbe4f
|
3 |
+
size 151098176
|
final_sparse_autoencoder_gpt2-small_blocks.6.hook_resid_pre_24576/cfg.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_name": "gpt2-small",
|
3 |
+
"hook_point": "blocks.6.hook_resid_pre",
|
4 |
+
"hook_point_layer": 6,
|
5 |
+
"hook_point_head_index": null,
|
6 |
+
"dataset_path": "Skylion007/openwebtext",
|
7 |
+
"is_dataset_tokenized": false,
|
8 |
+
"context_size": 128,
|
9 |
+
"use_cached_activations": false,
|
10 |
+
"cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.6.hook_resid_pre",
|
11 |
+
"d_in": 768,
|
12 |
+
"n_batches_in_buffer": 128,
|
13 |
+
"total_training_tokens": 300000000,
|
14 |
+
"store_batch_size": 32,
|
15 |
+
"seed": 42,
|
16 |
+
"b_dec_init_method": "geometric_median",
|
17 |
+
"expansion_factor": 32,
|
18 |
+
"from_pretrained_path": null,
|
19 |
+
"d_sae": 24576,
|
20 |
+
"l1_coefficient": 8e-05,
|
21 |
+
"lp_norm": 1,
|
22 |
+
"lr": 0.0004,
|
23 |
+
"lr_scheduler_name": null,
|
24 |
+
"lr_warm_up_steps": 5000,
|
25 |
+
"train_batch_size": 4096,
|
26 |
+
"use_ghost_grads": true,
|
27 |
+
"feature_sampling_window": 1000,
|
28 |
+
"dead_feature_window": 5000,
|
29 |
+
"dead_feature_threshold": 1e-08,
|
30 |
+
"log_to_wandb": true,
|
31 |
+
"wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
|
32 |
+
"run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
|
33 |
+
"wandb_entity": null,
|
34 |
+
"wandb_log_frequency": 100,
|
35 |
+
"n_checkpoints": 10,
|
36 |
+
"checkpoint_path": "checkpoints/vjvlfpxa"
|
37 |
+
}
|
final_sparse_autoencoder_gpt2-small_blocks.6.hook_resid_pre_24576/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4af77d18a0d13065cb41a953e222b150804bc84b71786d374248fe8c4f4263f4
|
3 |
+
size 151098176
|
final_sparse_autoencoder_gpt2-small_blocks.7.hook_resid_pre_24576/cfg.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_name": "gpt2-small",
|
3 |
+
"hook_point": "blocks.7.hook_resid_pre",
|
4 |
+
"hook_point_layer": 7,
|
5 |
+
"hook_point_head_index": null,
|
6 |
+
"dataset_path": "Skylion007/openwebtext",
|
7 |
+
"is_dataset_tokenized": false,
|
8 |
+
"context_size": 128,
|
9 |
+
"use_cached_activations": false,
|
10 |
+
"cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.7.hook_resid_pre",
|
11 |
+
"d_in": 768,
|
12 |
+
"n_batches_in_buffer": 128,
|
13 |
+
"total_training_tokens": 300000000,
|
14 |
+
"store_batch_size": 32,
|
15 |
+
"seed": 42,
|
16 |
+
"b_dec_init_method": "geometric_median",
|
17 |
+
"expansion_factor": 32,
|
18 |
+
"from_pretrained_path": null,
|
19 |
+
"d_sae": 24576,
|
20 |
+
"l1_coefficient": 8e-05,
|
21 |
+
"lp_norm": 1,
|
22 |
+
"lr": 0.0004,
|
23 |
+
"lr_scheduler_name": null,
|
24 |
+
"lr_warm_up_steps": 5000,
|
25 |
+
"train_batch_size": 4096,
|
26 |
+
"use_ghost_grads": true,
|
27 |
+
"feature_sampling_window": 1000,
|
28 |
+
"dead_feature_window": 5000,
|
29 |
+
"dead_feature_threshold": 1e-08,
|
30 |
+
"log_to_wandb": true,
|
31 |
+
"wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
|
32 |
+
"run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
|
33 |
+
"wandb_entity": null,
|
34 |
+
"wandb_log_frequency": 100,
|
35 |
+
"n_checkpoints": 10,
|
36 |
+
"checkpoint_path": "checkpoints/n6gbpj7s"
|
37 |
+
}
|
final_sparse_autoencoder_gpt2-small_blocks.7.hook_resid_pre_24576/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc9b9cd6cc5da7c5c332a9657b678f63e88fefb0966f612201dbc06ff8d096b2
|
3 |
+
size 151098176
|
final_sparse_autoencoder_gpt2-small_blocks.8.hook_resid_pre_24576/cfg.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_name": "gpt2-small",
|
3 |
+
"hook_point": "blocks.8.hook_resid_pre",
|
4 |
+
"hook_point_layer": 8,
|
5 |
+
"hook_point_head_index": null,
|
6 |
+
"dataset_path": "Skylion007/openwebtext",
|
7 |
+
"is_dataset_tokenized": false,
|
8 |
+
"context_size": 128,
|
9 |
+
"use_cached_activations": false,
|
10 |
+
"cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.8.hook_resid_pre",
|
11 |
+
"d_in": 768,
|
12 |
+
"n_batches_in_buffer": 128,
|
13 |
+
"total_training_tokens": 300000000,
|
14 |
+
"store_batch_size": 32,
|
15 |
+
"seed": 42,
|
16 |
+
"b_dec_init_method": "geometric_median",
|
17 |
+
"expansion_factor": 32,
|
18 |
+
"from_pretrained_path": null,
|
19 |
+
"d_sae": 24576,
|
20 |
+
"l1_coefficient": 8e-05,
|
21 |
+
"lp_norm": 1,
|
22 |
+
"lr": 0.0004,
|
23 |
+
"lr_scheduler_name": null,
|
24 |
+
"lr_warm_up_steps": 5000,
|
25 |
+
"train_batch_size": 4096,
|
26 |
+
"use_ghost_grads": true,
|
27 |
+
"feature_sampling_window": 1000,
|
28 |
+
"dead_feature_window": 5000,
|
29 |
+
"dead_feature_threshold": 1e-08,
|
30 |
+
"log_to_wandb": true,
|
31 |
+
"wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
|
32 |
+
"run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
|
33 |
+
"wandb_entity": null,
|
34 |
+
"wandb_log_frequency": 100,
|
35 |
+
"n_checkpoints": 10,
|
36 |
+
"checkpoint_path": "checkpoints/ut7lhl4q"
|
37 |
+
}
|
final_sparse_autoencoder_gpt2-small_blocks.8.hook_resid_pre_24576/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd0c772df44ce5885ad0ec48313cf1f8edc6b0e13533d8898705c47678c62e06
|
3 |
+
size 151098176
|
final_sparse_autoencoder_gpt2-small_blocks.9.hook_resid_pre_24576/cfg.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_name": "gpt2-small",
|
3 |
+
"hook_point": "blocks.9.hook_resid_pre",
|
4 |
+
"hook_point_layer": 9,
|
5 |
+
"hook_point_head_index": null,
|
6 |
+
"dataset_path": "Skylion007/openwebtext",
|
7 |
+
"is_dataset_tokenized": false,
|
8 |
+
"context_size": 128,
|
9 |
+
"use_cached_activations": false,
|
10 |
+
"cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.9.hook_resid_pre",
|
11 |
+
"d_in": 768,
|
12 |
+
"n_batches_in_buffer": 128,
|
13 |
+
"total_training_tokens": 300000000,
|
14 |
+
"store_batch_size": 32,
|
15 |
+
"seed": 42,
|
16 |
+
"b_dec_init_method": "geometric_median",
|
17 |
+
"expansion_factor": 32,
|
18 |
+
"from_pretrained_path": null,
|
19 |
+
"d_sae": 24576,
|
20 |
+
"l1_coefficient": 8e-05,
|
21 |
+
"lp_norm": 1,
|
22 |
+
"lr": 0.0004,
|
23 |
+
"lr_scheduler_name": null,
|
24 |
+
"lr_warm_up_steps": 5000,
|
25 |
+
"train_batch_size": 4096,
|
26 |
+
"use_ghost_grads": true,
|
27 |
+
"feature_sampling_window": 1000,
|
28 |
+
"dead_feature_window": 5000,
|
29 |
+
"dead_feature_threshold": 1e-08,
|
30 |
+
"log_to_wandb": true,
|
31 |
+
"wandb_project": "mats_sae_training_gpt2_small_resid_pre_5",
|
32 |
+
"run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08",
|
33 |
+
"wandb_entity": null,
|
34 |
+
"wandb_log_frequency": 100,
|
35 |
+
"n_checkpoints": 10,
|
36 |
+
"checkpoint_path": "checkpoints/wg1xo7vo"
|
37 |
+
}
|
final_sparse_autoencoder_gpt2-small_blocks.9.hook_resid_pre_24576/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f08b5d7483364e3b2bc2b8aa82968ddaa10007756aa2a906a2b91a6c01f5d9a
|
3 |
+
size 151098176
|