Upload folder using huggingface_hub
Browse files- checkpoints/embed_0.pt +3 -0
- checkpoints/embed_1024.pt +3 -0
- checkpoints/embed_1536.pt +3 -0
- checkpoints/embed_2048.pt +3 -0
- checkpoints/embed_512.pt +3 -0
- checkpoints/torso[0].attn_0.pt +3 -0
- checkpoints/torso[0].attn_1024.pt +3 -0
- checkpoints/torso[0].attn_1536.pt +3 -0
- checkpoints/torso[0].attn_2048.pt +3 -0
- checkpoints/torso[0].attn_512.pt +3 -0
- checkpoints/torso[0].res_final_0.pt +3 -0
- checkpoints/torso[0].res_final_1024.pt +3 -0
- checkpoints/torso[0].res_final_1536.pt +3 -0
- checkpoints/torso[0].res_final_2048.pt +3 -0
- checkpoints/torso[0].res_final_512.pt +3 -0
- checkpoints/torso[0].res_mlp_0.pt +3 -0
- checkpoints/torso[0].res_mlp_1024.pt +3 -0
- checkpoints/torso[0].res_mlp_1536.pt +3 -0
- checkpoints/torso[0].res_mlp_2048.pt +3 -0
- checkpoints/torso[0].res_mlp_512.pt +3 -0
- trainer_0/config.json +26 -0
- trainer_0/embed.pt +3 -0
- trainer_0/torso[0].attn.pt +3 -0
- trainer_0/torso[0].res_final.pt +3 -0
- trainer_0/torso[0].res_mlp.pt +3 -0
checkpoints/embed_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78dbb0e1385cb5a48e0a8fec6416a7de71b9513d183816a2aa0d8a171b2ac6c1
|
3 |
+
size 37778320
|
checkpoints/embed_1024.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e75456de3cf26bd7b88ef20bcc9c2a22a391bc3a8ff0b3691bf094eee21a84e6
|
3 |
+
size 37778536
|
checkpoints/embed_1536.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a844c7b43fb927f7f2fcc5f9e1ea2465fde6cd04a0a684206cd46403c1a74f9
|
3 |
+
size 37778536
|
checkpoints/embed_2048.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a38fb4dbfa7d266d17495d5b75647e2802f57bce78115a98245e215a48af5144
|
3 |
+
size 37778536
|
checkpoints/embed_512.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55b71ff59a4592a9130d6ff6ba5e714320a469c48f68842790b11711ac8e7a6e
|
3 |
+
size 37778528
|
checkpoints/torso[0].attn_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffc354c3a5802b66bbccd7d7df0772ed0743cbd1cae5c9d84b80f3e2bdfbdc6f
|
3 |
+
size 37778576
|
checkpoints/torso[0].attn_1024.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40fb99b721ae4cb0d4fdcce9543cfdfa5a2daf9fe9e72f3bfb70dd29773ed114
|
3 |
+
size 37778600
|
checkpoints/torso[0].attn_1536.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58c79cb57a8f7fcbb60f00d2698c9ef162e5b816b077562656290cc6a9669c90
|
3 |
+
size 37778600
|
checkpoints/torso[0].attn_2048.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aeb2496155bcb1cf217b0da6ee56e4179aef4093bd55e8117bf0e611ff2f4490
|
3 |
+
size 37778600
|
checkpoints/torso[0].attn_512.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a507f1a4449fd383003a101c49f638598905ed73cc70789beba2446b5077d7a
|
3 |
+
size 37778592
|
checkpoints/torso[0].res_final_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97c6e916cbcd13b1865aa13704d9d6b6bc63ea69b284fd56b0b1eb2c23936be2
|
3 |
+
size 37778616
|
checkpoints/torso[0].res_final_1024.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a4837bbf7fef17d48c43c7aea35e6b1fcfd32067e5fa5d482744164f628136e
|
3 |
+
size 37778704
|
checkpoints/torso[0].res_final_1536.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cf42177c199eb38692325e9568bb6bc67e6cd2330e59e3b75a6eee3ae6b72a2
|
3 |
+
size 37778704
|
checkpoints/torso[0].res_final_2048.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9168742cc04fda0349066f415b4dfe74b56e8f12135fe9a52792a1cf54aa2e24
|
3 |
+
size 37778704
|
checkpoints/torso[0].res_final_512.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d500ccab3728578705ff528ca42e193002d99acd4fcf41d4bed50a902c1b153
|
3 |
+
size 37778696
|
checkpoints/torso[0].res_mlp_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f739e58039152b9ae31520b5c9ed20ab8dcf14095c3150eed62a9fe58d0f5217
|
3 |
+
size 37778600
|
checkpoints/torso[0].res_mlp_1024.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb70a24bf8c2800778308e672e93ff5463b0d6b2c707f83196a92e7185276437
|
3 |
+
size 37778624
|
checkpoints/torso[0].res_mlp_1536.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb1c74517d8bc739b5361723bc0c61c152d5590d350bf207796ef52bd2d1a946
|
3 |
+
size 37778624
|
checkpoints/torso[0].res_mlp_2048.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:281f8f6b1e2faad6b4613c9d8f724e49f204de7b2d0ea8c25b5275c5c4039ff2
|
3 |
+
size 37778624
|
checkpoints/torso[0].res_mlp_512.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa183208f68c3ae3bff9bd694a3e11b57e0c48928c46ef7bf5903678f2a503c2
|
3 |
+
size 37778616
|
trainer_0/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.00032659863237109043,
|
6 |
+
"steps": 30000,
|
7 |
+
"seed": null,
|
8 |
+
"activation_dim": 768,
|
9 |
+
"dict_size": 6144,
|
10 |
+
"k": 30,
|
11 |
+
"device": "cuda",
|
12 |
+
"layer": 0,
|
13 |
+
"lm_name": "TinyModel_2L_3E",
|
14 |
+
"wandb_name": "AutoEncoderTopK",
|
15 |
+
"submodule_name": null
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 768,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 128,
|
21 |
+
"ctx_len": 256,
|
22 |
+
"refresh_batch_size": 512,
|
23 |
+
"out_batch_size": 1024,
|
24 |
+
"device": "cuda"
|
25 |
+
}
|
26 |
+
}
|
trainer_0/embed.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac2202e94acf00b7e1002697a4f85773e4165a6ccc4b1c5dae7a77a58d5ef173
|
3 |
+
size 37778240
|
trainer_0/torso[0].attn.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5db6e6dc98b98f9c8878409d639a6c9c5cb08ff1c0c8f65cb3a926ada45400f
|
3 |
+
size 37778560
|
trainer_0/torso[0].res_final.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:248b8f82d5be897347adc718b74cab1f77d3270cebff95c7bc95551206247db8
|
3 |
+
size 37778600
|
trainer_0/torso[0].res_mlp.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:daa6428ed71295587a885e0f9bf08606208f15df31a137635ad0cf3404ceb70b
|
3 |
+
size 37778584
|