jbloom commited on
Commit
1fe73d5
·
verified ·
1 Parent(s): d1b17cb

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. benchmark_stats.csv +13 -0
  3. benchmark_stats.html +650 -0
  4. benchmark_stats.png +3 -0
  5. v5_128k_layer_0/cfg.json +1 -0
  6. v5_128k_layer_0/metrics.json +1 -0
  7. v5_128k_layer_0/sae_weights.safetensors +3 -0
  8. v5_128k_layer_0/sparsity.safetensors +3 -0
  9. v5_128k_layer_1/cfg.json +1 -0
  10. v5_128k_layer_1/metrics.json +1 -0
  11. v5_128k_layer_1/sae_weights.safetensors +3 -0
  12. v5_128k_layer_1/sparsity.safetensors +3 -0
  13. v5_128k_layer_10/cfg.json +1 -0
  14. v5_128k_layer_10/metrics.json +1 -0
  15. v5_128k_layer_10/sae_weights.safetensors +3 -0
  16. v5_128k_layer_10/sparsity.safetensors +3 -0
  17. v5_128k_layer_11/cfg.json +1 -0
  18. v5_128k_layer_11/metrics.json +1 -0
  19. v5_128k_layer_11/sae_weights.safetensors +3 -0
  20. v5_128k_layer_11/sparsity.safetensors +3 -0
  21. v5_128k_layer_2/cfg.json +1 -0
  22. v5_128k_layer_2/metrics.json +1 -0
  23. v5_128k_layer_2/sae_weights.safetensors +3 -0
  24. v5_128k_layer_2/sparsity.safetensors +3 -0
  25. v5_128k_layer_3/cfg.json +1 -0
  26. v5_128k_layer_3/metrics.json +1 -0
  27. v5_128k_layer_3/sae_weights.safetensors +3 -0
  28. v5_128k_layer_3/sparsity.safetensors +3 -0
  29. v5_128k_layer_4/cfg.json +1 -0
  30. v5_128k_layer_4/metrics.json +1 -0
  31. v5_128k_layer_4/sae_weights.safetensors +3 -0
  32. v5_128k_layer_4/sparsity.safetensors +3 -0
  33. v5_128k_layer_5/cfg.json +1 -0
  34. v5_128k_layer_5/metrics.json +1 -0
  35. v5_128k_layer_5/sae_weights.safetensors +3 -0
  36. v5_128k_layer_5/sparsity.safetensors +3 -0
  37. v5_128k_layer_6/cfg.json +1 -0
  38. v5_128k_layer_6/metrics.json +1 -0
  39. v5_128k_layer_6/sae_weights.safetensors +3 -0
  40. v5_128k_layer_6/sparsity.safetensors +3 -0
  41. v5_128k_layer_7/cfg.json +1 -0
  42. v5_128k_layer_7/metrics.json +1 -0
  43. v5_128k_layer_7/sae_weights.safetensors +3 -0
  44. v5_128k_layer_7/sparsity.safetensors +3 -0
  45. v5_128k_layer_8/cfg.json +1 -0
  46. v5_128k_layer_8/metrics.json +1 -0
  47. v5_128k_layer_8/sae_weights.safetensors +3 -0
  48. v5_128k_layer_8/sparsity.safetensors +3 -0
  49. v5_128k_layer_9/cfg.json +1 -0
  50. v5_128k_layer_9/metrics.json +1 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ benchmark_stats.png filter=lfs diff=lfs merge=lfs -text
benchmark_stats.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,version,d_sae,layer,kl_div_with_sae,kl_div_with_ablation,ce_loss_with_sae,ce_loss_without_sae,ce_loss_with_ablation,kl_div_score,ce_loss_score,l2_norm_in,l2_norm_out,l2_ratio,l0,l1,explained_variance,mse,total_tokens_evaluated,filepath
2
+ OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_0/metrics.json,5,128,0,0.003102128393948078,2.121527671813965,3.6000306606292725,3.599064588546753,5.74860143661499,0.9985377855612434,0.9995505673311031,32.01313781738281,31.91054916381836,0.9968054294586182,31.989421844482422,39.967445373535156,0.9732184410095215,6.201825141906738,6144.0,OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_0/metrics.json
3
+ OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_1/metrics.json,5,128,1,0.001699602697044611,0.024065840989351273,3.60137939453125,3.599064588546753,3.6206326484680176,0.9293769663899693,0.8926743530504184,9.714648246765137,9.198001861572266,0.9423195719718933,31.9990234375,78.19619750976562,0.8797982931137085,8.594531059265137,6144.0,OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_1/metrics.json
4
+ OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_2/metrics.json,5,128,2,0.001999747008085251,0.031004613265395164,3.5997862815856934,3.599064588546753,3.626660108566284,0.9355016303229556,0.9738474564556262,8.641822814941406,8.086132049560547,0.9341656565666199,31.98714256286621,79.24567413330078,0.8692671060562134,8.59189224243164,6144.0,OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_2/metrics.json
5
+ OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_3/metrics.json,5,128,3,0.00250535411760211,0.025133918970823288,3.601374387741089,3.599064588546753,3.6286609172821045,0.900319798097923,0.9219565637687698,8.571012496948242,7.854372024536133,0.9161635637283325,32.0,77.34489440917969,0.8376470804214478,11.842028617858887,6144.0,OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_3/metrics.json
6
+ OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_4/metrics.json,5,128,4,0.0034840195439755917,0.026722650974988937,3.6015777587890625,3.599064588546753,3.6321334838867188,0.8696229821196835,0.9240019898919258,9.123016357421875,8.11795425415039,0.891417384147644,31.99934959411621,75.46078491210938,0.7981663942337036,17.370716094970703,6144.0,OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_4/metrics.json
7
+ OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_5/metrics.json,5,128,5,0.003555751871317625,0.031378373503685,3.6017022132873535,3.599064588546753,3.6277596950531006,0.8866814473063734,0.9080810262886769,10.034396171569824,8.960555076599121,0.8944891691207886,31.99837303161621,73.32916259765625,0.8041481375694275,22.21611785888672,6144.0,OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_5/metrics.json
8
+ OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_6/metrics.json,5,128,6,0.004435557406395674,0.03278880566358566,3.6031253337860107,3.599064588546753,3.634286403656006,0.8647234226246404,0.8847093704097312,11.67806625366211,10.348960876464844,0.8881208300590515,31.998699188232422,70.52044677734375,0.7761087417602539,31.851516723632812,6144.0,OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_6/metrics.json
9
+ OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_7/metrics.json,5,128,7,0.004355770535767078,0.03466065973043442,3.602255344390869,3.599064588546753,3.634833812713623,0.8743309974581235,0.9107960567097922,13.65020751953125,12.425106048583984,0.9115623235702515,31.99934959411621,66.42544555664062,0.8032481074333191,37.54448699951172,6144.0,OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_7/metrics.json
10
+ OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_8/metrics.json,5,128,8,0.005139736924320459,0.02938206121325493,3.602754592895508,3.599064588546753,3.6258018016815186,0.8250722817907069,0.8619899415037808,16.137948989868164,14.539435386657715,0.902074933052063,32.0,66.75920867919922,0.7767739295959473,52.779170989990234,6144.0,OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_8/metrics.json
11
+ OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_9/metrics.json,5,128,9,0.004128233529627323,0.028918448835611343,3.601983070373535,3.599064588546753,3.636500835418701,0.8572456789403016,0.922041281628338,20.912498474121094,19.252647399902344,0.9233078956604004,31.99934959411621,60.197113037109375,0.800841212272644,70.06503295898438,6144.0,OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_9/metrics.json
12
+ OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_10/metrics.json,5,128,10,0.003733165329322219,0.02475181221961975,3.601586103439331,3.599064588546753,3.6404881477355957,0.8491760806765053,0.9391284828741302,31.82137680053711,30.27041244506836,0.9501370787620544,32.0,52.3072624206543,0.8368248343467712,112.1676254272461,6144.0,OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_10/metrics.json
13
+ OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_11/metrics.json,5,128,11,0.003717603161931038,0.10687470436096191,3.6006953716278076,3.599064588546753,3.730869770050049,0.9652153127893099,0.9876273219121214,280.86444091796875,280.55767822265625,0.9987168312072754,31.75,20.949716567993164,0.9697019457817078,169.45314025878906,6144.0,OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_11/metrics.json
benchmark_stats.html ADDED
@@ -0,0 +1,650 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <style type="text/css">
2
+ #T_2e50c_row0_col2, #T_2e50c_row0_col6, #T_2e50c_row0_col16, #T_2e50c_row0_col17, #T_2e50c_row1_col3, #T_2e50c_row1_col4, #T_2e50c_row1_col6, #T_2e50c_row1_col7, #T_2e50c_row1_col17, #T_2e50c_row2_col4, #T_2e50c_row2_col5, #T_2e50c_row2_col6, #T_2e50c_row2_col7, #T_2e50c_row2_col10, #T_2e50c_row2_col11, #T_2e50c_row2_col17, #T_2e50c_row3_col4, #T_2e50c_row3_col6, #T_2e50c_row3_col7, #T_2e50c_row3_col10, #T_2e50c_row3_col11, #T_2e50c_row3_col17, #T_2e50c_row4_col4, #T_2e50c_row4_col6, #T_2e50c_row4_col10, #T_2e50c_row4_col11, #T_2e50c_row4_col17, #T_2e50c_row5_col4, #T_2e50c_row5_col6, #T_2e50c_row5_col7, #T_2e50c_row5_col17, #T_2e50c_row6_col6, #T_2e50c_row6_col12, #T_2e50c_row6_col15, #T_2e50c_row6_col17, #T_2e50c_row7_col6, #T_2e50c_row7_col17, #T_2e50c_row8_col4, #T_2e50c_row8_col6, #T_2e50c_row8_col7, #T_2e50c_row8_col8, #T_2e50c_row8_col9, #T_2e50c_row8_col15, #T_2e50c_row8_col17, #T_2e50c_row9_col4, #T_2e50c_row9_col6, #T_2e50c_row9_col17, #T_2e50c_row10_col4, #T_2e50c_row10_col6, #T_2e50c_row10_col17, #T_2e50c_row11_col6, #T_2e50c_row11_col13, #T_2e50c_row11_col14, #T_2e50c_row11_col17 {
3
+ background-color: #440154;
4
+ color: #f1f1f1;
5
+ }
6
+ #T_2e50c_row0_col3 {
7
+ background-color: #297a8e;
8
+ color: #f1f1f1;
9
+ }
10
+ #T_2e50c_row0_col4, #T_2e50c_row0_col7, #T_2e50c_row0_col8, #T_2e50c_row0_col9, #T_2e50c_row0_col15, #T_2e50c_row1_col13, #T_2e50c_row2_col14, #T_2e50c_row3_col13, #T_2e50c_row4_col13, #T_2e50c_row6_col5, #T_2e50c_row7_col13, #T_2e50c_row8_col3, #T_2e50c_row8_col13, #T_2e50c_row9_col13, #T_2e50c_row10_col13, #T_2e50c_row11_col2, #T_2e50c_row11_col10, #T_2e50c_row11_col11, #T_2e50c_row11_col12, #T_2e50c_row11_col16 {
11
+ background-color: #fde725;
12
+ color: #000000;
13
+ }
14
+ #T_2e50c_row0_col5 {
15
+ background-color: #481b6d;
16
+ color: #f1f1f1;
17
+ }
18
+ #T_2e50c_row0_col10, #T_2e50c_row0_col11, #T_2e50c_row2_col3 {
19
+ background-color: #482071;
20
+ color: #f1f1f1;
21
+ }
22
+ #T_2e50c_row0_col12, #T_2e50c_row1_col14, #T_2e50c_row11_col15 {
23
+ background-color: #f4e61e;
24
+ color: #000000;
25
+ }
26
+ #T_2e50c_row0_col13 {
27
+ background-color: #e5e419;
28
+ color: #000000;
29
+ }
30
+ #T_2e50c_row0_col14 {
31
+ background-color: #31668e;
32
+ color: #f1f1f1;
33
+ }
34
+ #T_2e50c_row1_col2 {
35
+ background-color: #482173;
36
+ color: #f1f1f1;
37
+ }
38
+ #T_2e50c_row1_col5 {
39
+ background-color: #228b8d;
40
+ color: #f1f1f1;
41
+ }
42
+ #T_2e50c_row1_col8 {
43
+ background-color: #22a884;
44
+ color: #f1f1f1;
45
+ }
46
+ #T_2e50c_row1_col9 {
47
+ background-color: #3e4a89;
48
+ color: #f1f1f1;
49
+ }
50
+ #T_2e50c_row1_col10, #T_2e50c_row1_col11, #T_2e50c_row4_col7, #T_2e50c_row5_col10, #T_2e50c_row5_col11, #T_2e50c_row6_col4, #T_2e50c_row6_col7, #T_2e50c_row7_col4, #T_2e50c_row7_col7, #T_2e50c_row9_col7 {
51
+ background-color: #440256;
52
+ color: #f1f1f1;
53
+ }
54
+ #T_2e50c_row1_col12 {
55
+ background-color: #218e8d;
56
+ color: #f1f1f1;
57
+ }
58
+ #T_2e50c_row1_col15 {
59
+ background-color: #1f968b;
60
+ color: #f1f1f1;
61
+ }
62
+ #T_2e50c_row1_col16, #T_2e50c_row2_col16 {
63
+ background-color: #450559;
64
+ color: #f1f1f1;
65
+ }
66
+ #T_2e50c_row2_col2 {
67
+ background-color: #433e85;
68
+ color: #f1f1f1;
69
+ }
70
+ #T_2e50c_row2_col8, #T_2e50c_row7_col2 {
71
+ background-color: #2ab07f;
72
+ color: #f1f1f1;
73
+ }
74
+ #T_2e50c_row2_col9 {
75
+ background-color: #84d44b;
76
+ color: #000000;
77
+ }
78
+ #T_2e50c_row2_col12 {
79
+ background-color: #287c8e;
80
+ color: #f1f1f1;
81
+ }
82
+ #T_2e50c_row2_col13 {
83
+ background-color: #dde318;
84
+ color: #000000;
85
+ }
86
+ #T_2e50c_row2_col15 {
87
+ background-color: #23898e;
88
+ color: #f1f1f1;
89
+ }
90
+ #T_2e50c_row3_col2, #T_2e50c_row11_col5 {
91
+ background-color: #38588c;
92
+ color: #f1f1f1;
93
+ }
94
+ #T_2e50c_row3_col3 {
95
+ background-color: #3d4d8a;
96
+ color: #f1f1f1;
97
+ }
98
+ #T_2e50c_row3_col5 {
99
+ background-color: #238a8d;
100
+ color: #f1f1f1;
101
+ }
102
+ #T_2e50c_row3_col8, #T_2e50c_row3_col9, #T_2e50c_row9_col9 {
103
+ background-color: #26818e;
104
+ color: #f1f1f1;
105
+ }
106
+ #T_2e50c_row3_col12 {
107
+ background-color: #3b528b;
108
+ color: #f1f1f1;
109
+ }
110
+ #T_2e50c_row3_col14 {
111
+ background-color: #eae51a;
112
+ color: #000000;
113
+ }
114
+ #T_2e50c_row3_col15 {
115
+ background-color: #33628d;
116
+ color: #f1f1f1;
117
+ }
118
+ #T_2e50c_row3_col16 {
119
+ background-color: #470d60;
120
+ color: #f1f1f1;
121
+ }
122
+ #T_2e50c_row4_col2 {
123
+ background-color: #2d708e;
124
+ color: #f1f1f1;
125
+ }
126
+ #T_2e50c_row4_col3 {
127
+ background-color: #1f948c;
128
+ color: #f1f1f1;
129
+ }
130
+ #T_2e50c_row4_col5, #T_2e50c_row10_col5, #T_2e50c_row10_col14 {
131
+ background-color: #1f998a;
132
+ color: #f1f1f1;
133
+ }
134
+ #T_2e50c_row4_col8 {
135
+ background-color: #3a538b;
136
+ color: #f1f1f1;
137
+ }
138
+ #T_2e50c_row4_col9 {
139
+ background-color: #25848e;
140
+ color: #f1f1f1;
141
+ }
142
+ #T_2e50c_row4_col12, #T_2e50c_row8_col10 {
143
+ background-color: #460b5e;
144
+ color: #f1f1f1;
145
+ }
146
+ #T_2e50c_row4_col14 {
147
+ background-color: #d5e21a;
148
+ color: #000000;
149
+ }
150
+ #T_2e50c_row4_col15 {
151
+ background-color: #482878;
152
+ color: #f1f1f1;
153
+ }
154
+ #T_2e50c_row4_col16 {
155
+ background-color: #481a6c;
156
+ color: #f1f1f1;
157
+ }
158
+ #T_2e50c_row5_col2 {
159
+ background-color: #25858e;
160
+ color: #f1f1f1;
161
+ }
162
+ #T_2e50c_row5_col3 {
163
+ background-color: #1f9a8a;
164
+ color: #f1f1f1;
165
+ }
166
+ #T_2e50c_row5_col5 {
167
+ background-color: #1fa187;
168
+ color: #f1f1f1;
169
+ }
170
+ #T_2e50c_row5_col8, #T_2e50c_row7_col9 {
171
+ background-color: #2e6d8e;
172
+ color: #f1f1f1;
173
+ }
174
+ #T_2e50c_row5_col9 {
175
+ background-color: #31688e;
176
+ color: #f1f1f1;
177
+ }
178
+ #T_2e50c_row5_col12 {
179
+ background-color: #481668;
180
+ color: #f1f1f1;
181
+ }
182
+ #T_2e50c_row5_col13, #T_2e50c_row6_col13 {
183
+ background-color: #fbe723;
184
+ color: #000000;
185
+ }
186
+ #T_2e50c_row5_col14 {
187
+ background-color: #bddf26;
188
+ color: #000000;
189
+ }
190
+ #T_2e50c_row5_col15 {
191
+ background-color: #46327e;
192
+ color: #f1f1f1;
193
+ }
194
+ #T_2e50c_row5_col16 {
195
+ background-color: #482475;
196
+ color: #f1f1f1;
197
+ }
198
+ #T_2e50c_row6_col2 {
199
+ background-color: #1e9b8a;
200
+ color: #f1f1f1;
201
+ }
202
+ #T_2e50c_row6_col3 {
203
+ background-color: #77d153;
204
+ color: #000000;
205
+ }
206
+ #T_2e50c_row6_col8 {
207
+ background-color: #3e4c8a;
208
+ color: #f1f1f1;
209
+ }
210
+ #T_2e50c_row6_col9 {
211
+ background-color: #443983;
212
+ color: #f1f1f1;
213
+ }
214
+ #T_2e50c_row6_col10, #T_2e50c_row6_col11, #T_2e50c_row10_col7 {
215
+ background-color: #450457;
216
+ color: #f1f1f1;
217
+ }
218
+ #T_2e50c_row6_col14 {
219
+ background-color: #9bd93c;
220
+ color: #000000;
221
+ }
222
+ #T_2e50c_row6_col16 {
223
+ background-color: #453781;
224
+ color: #f1f1f1;
225
+ }
226
+ #T_2e50c_row7_col3 {
227
+ background-color: #69cd5b;
228
+ color: #000000;
229
+ }
230
+ #T_2e50c_row7_col5 {
231
+ background-color: #58c765;
232
+ color: #000000;
233
+ }
234
+ #T_2e50c_row7_col8 {
235
+ background-color: #375b8d;
236
+ color: #f1f1f1;
237
+ }
238
+ #T_2e50c_row7_col10, #T_2e50c_row7_col11 {
239
+ background-color: #46075a;
240
+ color: #f1f1f1;
241
+ }
242
+ #T_2e50c_row7_col12 {
243
+ background-color: #3f4788;
244
+ color: #f1f1f1;
245
+ }
246
+ #T_2e50c_row7_col14 {
247
+ background-color: #6ece58;
248
+ color: #000000;
249
+ }
250
+ #T_2e50c_row7_col15, #T_2e50c_row10_col8 {
251
+ background-color: #46307e;
252
+ color: #f1f1f1;
253
+ }
254
+ #T_2e50c_row7_col16 {
255
+ background-color: #424186;
256
+ color: #f1f1f1;
257
+ }
258
+ #T_2e50c_row8_col2 {
259
+ background-color: #52c569;
260
+ color: #000000;
261
+ }
262
+ #T_2e50c_row8_col5 {
263
+ background-color: #b5de2b;
264
+ color: #000000;
265
+ }
266
+ #T_2e50c_row8_col11 {
267
+ background-color: #460a5d;
268
+ color: #f1f1f1;
269
+ }
270
+ #T_2e50c_row8_col12, #T_2e50c_row9_col15 {
271
+ background-color: #472d7b;
272
+ color: #f1f1f1;
273
+ }
274
+ #T_2e50c_row8_col14 {
275
+ background-color: #73d056;
276
+ color: #000000;
277
+ }
278
+ #T_2e50c_row8_col16 {
279
+ background-color: #365c8d;
280
+ color: #f1f1f1;
281
+ }
282
+ #T_2e50c_row9_col2 {
283
+ background-color: #86d549;
284
+ color: #000000;
285
+ }
286
+ #T_2e50c_row9_col3 {
287
+ background-color: #46c06f;
288
+ color: #f1f1f1;
289
+ }
290
+ #T_2e50c_row9_col5 {
291
+ background-color: #32b67a;
292
+ color: #f1f1f1;
293
+ }
294
+ #T_2e50c_row9_col8 {
295
+ background-color: #423f85;
296
+ color: #f1f1f1;
297
+ }
298
+ #T_2e50c_row9_col10 {
299
+ background-color: #471164;
300
+ color: #f1f1f1;
301
+ }
302
+ #T_2e50c_row9_col11, #T_2e50c_row11_col4 {
303
+ background-color: #471063;
304
+ color: #f1f1f1;
305
+ }
306
+ #T_2e50c_row9_col12 {
307
+ background-color: #32648e;
308
+ color: #f1f1f1;
309
+ }
310
+ #T_2e50c_row9_col14 {
311
+ background-color: #38b977;
312
+ color: #f1f1f1;
313
+ }
314
+ #T_2e50c_row9_col16 {
315
+ background-color: #2a768e;
316
+ color: #f1f1f1;
317
+ }
318
+ #T_2e50c_row10_col2 {
319
+ background-color: #c2df23;
320
+ color: #000000;
321
+ }
322
+ #T_2e50c_row10_col3 {
323
+ background-color: #21a685;
324
+ color: #f1f1f1;
325
+ }
326
+ #T_2e50c_row10_col9, #T_2e50c_row10_col12 {
327
+ background-color: #1f9f88;
328
+ color: #f1f1f1;
329
+ }
330
+ #T_2e50c_row10_col10, #T_2e50c_row10_col11 {
331
+ background-color: #481f70;
332
+ color: #f1f1f1;
333
+ }
334
+ #T_2e50c_row10_col15 {
335
+ background-color: #34618d;
336
+ color: #f1f1f1;
337
+ }
338
+ #T_2e50c_row10_col16 {
339
+ background-color: #2fb47c;
340
+ color: #f1f1f1;
341
+ }
342
+ #T_2e50c_row11_col3 {
343
+ background-color: #21a585;
344
+ color: #f1f1f1;
345
+ }
346
+ #T_2e50c_row11_col7 {
347
+ background-color: #481467;
348
+ color: #f1f1f1;
349
+ }
350
+ #T_2e50c_row11_col8 {
351
+ background-color: #7fd34e;
352
+ color: #000000;
353
+ }
354
+ #T_2e50c_row11_col9 {
355
+ background-color: #c5e021;
356
+ color: #000000;
357
+ }
358
+ </style>
359
+ <table id="T_2e50c">
360
+ <thead>
361
+ <tr>
362
+ <th class="blank level0" >&nbsp;</th>
363
+ <th id="T_2e50c_level0_col0" class="col_heading level0 col0" >version</th>
364
+ <th id="T_2e50c_level0_col1" class="col_heading level0 col1" >d_sae</th>
365
+ <th id="T_2e50c_level0_col2" class="col_heading level0 col2" >layer</th>
366
+ <th id="T_2e50c_level0_col3" class="col_heading level0 col3" >kl_div_with_sae</th>
367
+ <th id="T_2e50c_level0_col4" class="col_heading level0 col4" >kl_div_with_ablation</th>
368
+ <th id="T_2e50c_level0_col5" class="col_heading level0 col5" >ce_loss_with_sae</th>
369
+ <th id="T_2e50c_level0_col6" class="col_heading level0 col6" >ce_loss_without_sae</th>
370
+ <th id="T_2e50c_level0_col7" class="col_heading level0 col7" >ce_loss_with_ablation</th>
371
+ <th id="T_2e50c_level0_col8" class="col_heading level0 col8" >kl_div_score</th>
372
+ <th id="T_2e50c_level0_col9" class="col_heading level0 col9" >ce_loss_score</th>
373
+ <th id="T_2e50c_level0_col10" class="col_heading level0 col10" >l2_norm_in</th>
374
+ <th id="T_2e50c_level0_col11" class="col_heading level0 col11" >l2_norm_out</th>
375
+ <th id="T_2e50c_level0_col12" class="col_heading level0 col12" >l2_ratio</th>
376
+ <th id="T_2e50c_level0_col13" class="col_heading level0 col13" >l0</th>
377
+ <th id="T_2e50c_level0_col14" class="col_heading level0 col14" >l1</th>
378
+ <th id="T_2e50c_level0_col15" class="col_heading level0 col15" >explained_variance</th>
379
+ <th id="T_2e50c_level0_col16" class="col_heading level0 col16" >mse</th>
380
+ <th id="T_2e50c_level0_col17" class="col_heading level0 col17" >total_tokens_evaluated</th>
381
+ <th id="T_2e50c_level0_col18" class="col_heading level0 col18" >filepath</th>
382
+ </tr>
383
+ </thead>
384
+ <tbody>
385
+ <tr>
386
+ <th id="T_2e50c_level0_row0" class="row_heading level0 row0" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_0/metrics.json</th>
387
+ <td id="T_2e50c_row0_col0" class="data row0 col0" >5</td>
388
+ <td id="T_2e50c_row0_col1" class="data row0 col1" >128</td>
389
+ <td id="T_2e50c_row0_col2" class="data row0 col2" >0</td>
390
+ <td id="T_2e50c_row0_col3" class="data row0 col3" >0.003102</td>
391
+ <td id="T_2e50c_row0_col4" class="data row0 col4" >2.121528</td>
392
+ <td id="T_2e50c_row0_col5" class="data row0 col5" >3.600031</td>
393
+ <td id="T_2e50c_row0_col6" class="data row0 col6" >3.599065</td>
394
+ <td id="T_2e50c_row0_col7" class="data row0 col7" >5.748601</td>
395
+ <td id="T_2e50c_row0_col8" class="data row0 col8" >0.998538</td>
396
+ <td id="T_2e50c_row0_col9" class="data row0 col9" >0.999551</td>
397
+ <td id="T_2e50c_row0_col10" class="data row0 col10" >32.013138</td>
398
+ <td id="T_2e50c_row0_col11" class="data row0 col11" >31.910549</td>
399
+ <td id="T_2e50c_row0_col12" class="data row0 col12" >0.996805</td>
400
+ <td id="T_2e50c_row0_col13" class="data row0 col13" >31.989422</td>
401
+ <td id="T_2e50c_row0_col14" class="data row0 col14" >39.967445</td>
402
+ <td id="T_2e50c_row0_col15" class="data row0 col15" >0.973218</td>
403
+ <td id="T_2e50c_row0_col16" class="data row0 col16" >6.201825</td>
404
+ <td id="T_2e50c_row0_col17" class="data row0 col17" >6144.000000</td>
405
+ <td id="T_2e50c_row0_col18" class="data row0 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_0/metrics.json</td>
406
+ </tr>
407
+ <tr>
408
+ <th id="T_2e50c_level0_row1" class="row_heading level0 row1" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_1/metrics.json</th>
409
+ <td id="T_2e50c_row1_col0" class="data row1 col0" >5</td>
410
+ <td id="T_2e50c_row1_col1" class="data row1 col1" >128</td>
411
+ <td id="T_2e50c_row1_col2" class="data row1 col2" >1</td>
412
+ <td id="T_2e50c_row1_col3" class="data row1 col3" >0.001700</td>
413
+ <td id="T_2e50c_row1_col4" class="data row1 col4" >0.024066</td>
414
+ <td id="T_2e50c_row1_col5" class="data row1 col5" >3.601379</td>
415
+ <td id="T_2e50c_row1_col6" class="data row1 col6" >3.599065</td>
416
+ <td id="T_2e50c_row1_col7" class="data row1 col7" >3.620633</td>
417
+ <td id="T_2e50c_row1_col8" class="data row1 col8" >0.929377</td>
418
+ <td id="T_2e50c_row1_col9" class="data row1 col9" >0.892674</td>
419
+ <td id="T_2e50c_row1_col10" class="data row1 col10" >9.714648</td>
420
+ <td id="T_2e50c_row1_col11" class="data row1 col11" >9.198002</td>
421
+ <td id="T_2e50c_row1_col12" class="data row1 col12" >0.942320</td>
422
+ <td id="T_2e50c_row1_col13" class="data row1 col13" >31.999023</td>
423
+ <td id="T_2e50c_row1_col14" class="data row1 col14" >78.196198</td>
424
+ <td id="T_2e50c_row1_col15" class="data row1 col15" >0.879798</td>
425
+ <td id="T_2e50c_row1_col16" class="data row1 col16" >8.594531</td>
426
+ <td id="T_2e50c_row1_col17" class="data row1 col17" >6144.000000</td>
427
+ <td id="T_2e50c_row1_col18" class="data row1 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_1/metrics.json</td>
428
+ </tr>
429
+ <tr>
430
+ <th id="T_2e50c_level0_row2" class="row_heading level0 row2" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_2/metrics.json</th>
431
+ <td id="T_2e50c_row2_col0" class="data row2 col0" >5</td>
432
+ <td id="T_2e50c_row2_col1" class="data row2 col1" >128</td>
433
+ <td id="T_2e50c_row2_col2" class="data row2 col2" >2</td>
434
+ <td id="T_2e50c_row2_col3" class="data row2 col3" >0.002000</td>
435
+ <td id="T_2e50c_row2_col4" class="data row2 col4" >0.031005</td>
436
+ <td id="T_2e50c_row2_col5" class="data row2 col5" >3.599786</td>
437
+ <td id="T_2e50c_row2_col6" class="data row2 col6" >3.599065</td>
438
+ <td id="T_2e50c_row2_col7" class="data row2 col7" >3.626660</td>
439
+ <td id="T_2e50c_row2_col8" class="data row2 col8" >0.935502</td>
440
+ <td id="T_2e50c_row2_col9" class="data row2 col9" >0.973847</td>
441
+ <td id="T_2e50c_row2_col10" class="data row2 col10" >8.641823</td>
442
+ <td id="T_2e50c_row2_col11" class="data row2 col11" >8.086132</td>
443
+ <td id="T_2e50c_row2_col12" class="data row2 col12" >0.934166</td>
444
+ <td id="T_2e50c_row2_col13" class="data row2 col13" >31.987143</td>
445
+ <td id="T_2e50c_row2_col14" class="data row2 col14" >79.245674</td>
446
+ <td id="T_2e50c_row2_col15" class="data row2 col15" >0.869267</td>
447
+ <td id="T_2e50c_row2_col16" class="data row2 col16" >8.591892</td>
448
+ <td id="T_2e50c_row2_col17" class="data row2 col17" >6144.000000</td>
449
+ <td id="T_2e50c_row2_col18" class="data row2 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_2/metrics.json</td>
450
+ </tr>
451
+ <tr>
452
+ <th id="T_2e50c_level0_row3" class="row_heading level0 row3" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_3/metrics.json</th>
453
+ <td id="T_2e50c_row3_col0" class="data row3 col0" >5</td>
454
+ <td id="T_2e50c_row3_col1" class="data row3 col1" >128</td>
455
+ <td id="T_2e50c_row3_col2" class="data row3 col2" >3</td>
456
+ <td id="T_2e50c_row3_col3" class="data row3 col3" >0.002505</td>
457
+ <td id="T_2e50c_row3_col4" class="data row3 col4" >0.025134</td>
458
+ <td id="T_2e50c_row3_col5" class="data row3 col5" >3.601374</td>
459
+ <td id="T_2e50c_row3_col6" class="data row3 col6" >3.599065</td>
460
+ <td id="T_2e50c_row3_col7" class="data row3 col7" >3.628661</td>
461
+ <td id="T_2e50c_row3_col8" class="data row3 col8" >0.900320</td>
462
+ <td id="T_2e50c_row3_col9" class="data row3 col9" >0.921957</td>
463
+ <td id="T_2e50c_row3_col10" class="data row3 col10" >8.571012</td>
464
+ <td id="T_2e50c_row3_col11" class="data row3 col11" >7.854372</td>
465
+ <td id="T_2e50c_row3_col12" class="data row3 col12" >0.916164</td>
466
+ <td id="T_2e50c_row3_col13" class="data row3 col13" >32.000000</td>
467
+ <td id="T_2e50c_row3_col14" class="data row3 col14" >77.344894</td>
468
+ <td id="T_2e50c_row3_col15" class="data row3 col15" >0.837647</td>
469
+ <td id="T_2e50c_row3_col16" class="data row3 col16" >11.842029</td>
470
+ <td id="T_2e50c_row3_col17" class="data row3 col17" >6144.000000</td>
471
+ <td id="T_2e50c_row3_col18" class="data row3 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_3/metrics.json</td>
472
+ </tr>
473
+ <tr>
474
+ <th id="T_2e50c_level0_row4" class="row_heading level0 row4" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_4/metrics.json</th>
475
+ <td id="T_2e50c_row4_col0" class="data row4 col0" >5</td>
476
+ <td id="T_2e50c_row4_col1" class="data row4 col1" >128</td>
477
+ <td id="T_2e50c_row4_col2" class="data row4 col2" >4</td>
478
+ <td id="T_2e50c_row4_col3" class="data row4 col3" >0.003484</td>
479
+ <td id="T_2e50c_row4_col4" class="data row4 col4" >0.026723</td>
480
+ <td id="T_2e50c_row4_col5" class="data row4 col5" >3.601578</td>
481
+ <td id="T_2e50c_row4_col6" class="data row4 col6" >3.599065</td>
482
+ <td id="T_2e50c_row4_col7" class="data row4 col7" >3.632133</td>
483
+ <td id="T_2e50c_row4_col8" class="data row4 col8" >0.869623</td>
484
+ <td id="T_2e50c_row4_col9" class="data row4 col9" >0.924002</td>
485
+ <td id="T_2e50c_row4_col10" class="data row4 col10" >9.123016</td>
486
+ <td id="T_2e50c_row4_col11" class="data row4 col11" >8.117954</td>
487
+ <td id="T_2e50c_row4_col12" class="data row4 col12" >0.891417</td>
488
+ <td id="T_2e50c_row4_col13" class="data row4 col13" >31.999350</td>
489
+ <td id="T_2e50c_row4_col14" class="data row4 col14" >75.460785</td>
490
+ <td id="T_2e50c_row4_col15" class="data row4 col15" >0.798166</td>
491
+ <td id="T_2e50c_row4_col16" class="data row4 col16" >17.370716</td>
492
+ <td id="T_2e50c_row4_col17" class="data row4 col17" >6144.000000</td>
493
+ <td id="T_2e50c_row4_col18" class="data row4 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_4/metrics.json</td>
494
+ </tr>
495
+ <tr>
496
+ <th id="T_2e50c_level0_row5" class="row_heading level0 row5" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_5/metrics.json</th>
497
+ <td id="T_2e50c_row5_col0" class="data row5 col0" >5</td>
498
+ <td id="T_2e50c_row5_col1" class="data row5 col1" >128</td>
499
+ <td id="T_2e50c_row5_col2" class="data row5 col2" >5</td>
500
+ <td id="T_2e50c_row5_col3" class="data row5 col3" >0.003556</td>
501
+ <td id="T_2e50c_row5_col4" class="data row5 col4" >0.031378</td>
502
+ <td id="T_2e50c_row5_col5" class="data row5 col5" >3.601702</td>
503
+ <td id="T_2e50c_row5_col6" class="data row5 col6" >3.599065</td>
504
+ <td id="T_2e50c_row5_col7" class="data row5 col7" >3.627760</td>
505
+ <td id="T_2e50c_row5_col8" class="data row5 col8" >0.886681</td>
506
+ <td id="T_2e50c_row5_col9" class="data row5 col9" >0.908081</td>
507
+ <td id="T_2e50c_row5_col10" class="data row5 col10" >10.034396</td>
508
+ <td id="T_2e50c_row5_col11" class="data row5 col11" >8.960555</td>
509
+ <td id="T_2e50c_row5_col12" class="data row5 col12" >0.894489</td>
510
+ <td id="T_2e50c_row5_col13" class="data row5 col13" >31.998373</td>
511
+ <td id="T_2e50c_row5_col14" class="data row5 col14" >73.329163</td>
512
+ <td id="T_2e50c_row5_col15" class="data row5 col15" >0.804148</td>
513
+ <td id="T_2e50c_row5_col16" class="data row5 col16" >22.216118</td>
514
+ <td id="T_2e50c_row5_col17" class="data row5 col17" >6144.000000</td>
515
+ <td id="T_2e50c_row5_col18" class="data row5 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_5/metrics.json</td>
516
+ </tr>
517
+ <tr>
518
+ <th id="T_2e50c_level0_row6" class="row_heading level0 row6" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_6/metrics.json</th>
519
+ <td id="T_2e50c_row6_col0" class="data row6 col0" >5</td>
520
+ <td id="T_2e50c_row6_col1" class="data row6 col1" >128</td>
521
+ <td id="T_2e50c_row6_col2" class="data row6 col2" >6</td>
522
+ <td id="T_2e50c_row6_col3" class="data row6 col3" >0.004436</td>
523
+ <td id="T_2e50c_row6_col4" class="data row6 col4" >0.032789</td>
524
+ <td id="T_2e50c_row6_col5" class="data row6 col5" >3.603125</td>
525
+ <td id="T_2e50c_row6_col6" class="data row6 col6" >3.599065</td>
526
+ <td id="T_2e50c_row6_col7" class="data row6 col7" >3.634286</td>
527
+ <td id="T_2e50c_row6_col8" class="data row6 col8" >0.864723</td>
528
+ <td id="T_2e50c_row6_col9" class="data row6 col9" >0.884709</td>
529
+ <td id="T_2e50c_row6_col10" class="data row6 col10" >11.678066</td>
530
+ <td id="T_2e50c_row6_col11" class="data row6 col11" >10.348961</td>
531
+ <td id="T_2e50c_row6_col12" class="data row6 col12" >0.888121</td>
532
+ <td id="T_2e50c_row6_col13" class="data row6 col13" >31.998699</td>
533
+ <td id="T_2e50c_row6_col14" class="data row6 col14" >70.520447</td>
534
+ <td id="T_2e50c_row6_col15" class="data row6 col15" >0.776109</td>
535
+ <td id="T_2e50c_row6_col16" class="data row6 col16" >31.851517</td>
536
+ <td id="T_2e50c_row6_col17" class="data row6 col17" >6144.000000</td>
537
+ <td id="T_2e50c_row6_col18" class="data row6 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_6/metrics.json</td>
538
+ </tr>
539
+ <tr>
540
+ <th id="T_2e50c_level0_row7" class="row_heading level0 row7" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_7/metrics.json</th>
541
+ <td id="T_2e50c_row7_col0" class="data row7 col0" >5</td>
542
+ <td id="T_2e50c_row7_col1" class="data row7 col1" >128</td>
543
+ <td id="T_2e50c_row7_col2" class="data row7 col2" >7</td>
544
+ <td id="T_2e50c_row7_col3" class="data row7 col3" >0.004356</td>
545
+ <td id="T_2e50c_row7_col4" class="data row7 col4" >0.034661</td>
546
+ <td id="T_2e50c_row7_col5" class="data row7 col5" >3.602255</td>
547
+ <td id="T_2e50c_row7_col6" class="data row7 col6" >3.599065</td>
548
+ <td id="T_2e50c_row7_col7" class="data row7 col7" >3.634834</td>
549
+ <td id="T_2e50c_row7_col8" class="data row7 col8" >0.874331</td>
550
+ <td id="T_2e50c_row7_col9" class="data row7 col9" >0.910796</td>
551
+ <td id="T_2e50c_row7_col10" class="data row7 col10" >13.650208</td>
552
+ <td id="T_2e50c_row7_col11" class="data row7 col11" >12.425106</td>
553
+ <td id="T_2e50c_row7_col12" class="data row7 col12" >0.911562</td>
554
+ <td id="T_2e50c_row7_col13" class="data row7 col13" >31.999350</td>
555
+ <td id="T_2e50c_row7_col14" class="data row7 col14" >66.425446</td>
556
+ <td id="T_2e50c_row7_col15" class="data row7 col15" >0.803248</td>
557
+ <td id="T_2e50c_row7_col16" class="data row7 col16" >37.544487</td>
558
+ <td id="T_2e50c_row7_col17" class="data row7 col17" >6144.000000</td>
559
+ <td id="T_2e50c_row7_col18" class="data row7 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_7/metrics.json</td>
560
+ </tr>
561
+ <tr>
562
+ <th id="T_2e50c_level0_row8" class="row_heading level0 row8" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_8/metrics.json</th>
563
+ <td id="T_2e50c_row8_col0" class="data row8 col0" >5</td>
564
+ <td id="T_2e50c_row8_col1" class="data row8 col1" >128</td>
565
+ <td id="T_2e50c_row8_col2" class="data row8 col2" >8</td>
566
+ <td id="T_2e50c_row8_col3" class="data row8 col3" >0.005140</td>
567
+ <td id="T_2e50c_row8_col4" class="data row8 col4" >0.029382</td>
568
+ <td id="T_2e50c_row8_col5" class="data row8 col5" >3.602755</td>
569
+ <td id="T_2e50c_row8_col6" class="data row8 col6" >3.599065</td>
570
+ <td id="T_2e50c_row8_col7" class="data row8 col7" >3.625802</td>
571
+ <td id="T_2e50c_row8_col8" class="data row8 col8" >0.825072</td>
572
+ <td id="T_2e50c_row8_col9" class="data row8 col9" >0.861990</td>
573
+ <td id="T_2e50c_row8_col10" class="data row8 col10" >16.137949</td>
574
+ <td id="T_2e50c_row8_col11" class="data row8 col11" >14.539435</td>
575
+ <td id="T_2e50c_row8_col12" class="data row8 col12" >0.902075</td>
576
+ <td id="T_2e50c_row8_col13" class="data row8 col13" >32.000000</td>
577
+ <td id="T_2e50c_row8_col14" class="data row8 col14" >66.759209</td>
578
+ <td id="T_2e50c_row8_col15" class="data row8 col15" >0.776774</td>
579
+ <td id="T_2e50c_row8_col16" class="data row8 col16" >52.779171</td>
580
+ <td id="T_2e50c_row8_col17" class="data row8 col17" >6144.000000</td>
581
+ <td id="T_2e50c_row8_col18" class="data row8 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_8/metrics.json</td>
582
+ </tr>
583
+ <tr>
584
+ <th id="T_2e50c_level0_row9" class="row_heading level0 row9" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_9/metrics.json</th>
585
+ <td id="T_2e50c_row9_col0" class="data row9 col0" >5</td>
586
+ <td id="T_2e50c_row9_col1" class="data row9 col1" >128</td>
587
+ <td id="T_2e50c_row9_col2" class="data row9 col2" >9</td>
588
+ <td id="T_2e50c_row9_col3" class="data row9 col3" >0.004128</td>
589
+ <td id="T_2e50c_row9_col4" class="data row9 col4" >0.028918</td>
590
+ <td id="T_2e50c_row9_col5" class="data row9 col5" >3.601983</td>
591
+ <td id="T_2e50c_row9_col6" class="data row9 col6" >3.599065</td>
592
+ <td id="T_2e50c_row9_col7" class="data row9 col7" >3.636501</td>
593
+ <td id="T_2e50c_row9_col8" class="data row9 col8" >0.857246</td>
594
+ <td id="T_2e50c_row9_col9" class="data row9 col9" >0.922041</td>
595
+ <td id="T_2e50c_row9_col10" class="data row9 col10" >20.912498</td>
596
+ <td id="T_2e50c_row9_col11" class="data row9 col11" >19.252647</td>
597
+ <td id="T_2e50c_row9_col12" class="data row9 col12" >0.923308</td>
598
+ <td id="T_2e50c_row9_col13" class="data row9 col13" >31.999350</td>
599
+ <td id="T_2e50c_row9_col14" class="data row9 col14" >60.197113</td>
600
+ <td id="T_2e50c_row9_col15" class="data row9 col15" >0.800841</td>
601
+ <td id="T_2e50c_row9_col16" class="data row9 col16" >70.065033</td>
602
+ <td id="T_2e50c_row9_col17" class="data row9 col17" >6144.000000</td>
603
+ <td id="T_2e50c_row9_col18" class="data row9 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_9/metrics.json</td>
604
+ </tr>
605
+ <tr>
606
+ <th id="T_2e50c_level0_row10" class="row_heading level0 row10" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_10/metrics.json</th>
607
+ <td id="T_2e50c_row10_col0" class="data row10 col0" >5</td>
608
+ <td id="T_2e50c_row10_col1" class="data row10 col1" >128</td>
609
+ <td id="T_2e50c_row10_col2" class="data row10 col2" >10</td>
610
+ <td id="T_2e50c_row10_col3" class="data row10 col3" >0.003733</td>
611
+ <td id="T_2e50c_row10_col4" class="data row10 col4" >0.024752</td>
612
+ <td id="T_2e50c_row10_col5" class="data row10 col5" >3.601586</td>
613
+ <td id="T_2e50c_row10_col6" class="data row10 col6" >3.599065</td>
614
+ <td id="T_2e50c_row10_col7" class="data row10 col7" >3.640488</td>
615
+ <td id="T_2e50c_row10_col8" class="data row10 col8" >0.849176</td>
616
+ <td id="T_2e50c_row10_col9" class="data row10 col9" >0.939128</td>
617
+ <td id="T_2e50c_row10_col10" class="data row10 col10" >31.821377</td>
618
+ <td id="T_2e50c_row10_col11" class="data row10 col11" >30.270412</td>
619
+ <td id="T_2e50c_row10_col12" class="data row10 col12" >0.950137</td>
620
+ <td id="T_2e50c_row10_col13" class="data row10 col13" >32.000000</td>
621
+ <td id="T_2e50c_row10_col14" class="data row10 col14" >52.307262</td>
622
+ <td id="T_2e50c_row10_col15" class="data row10 col15" >0.836825</td>
623
+ <td id="T_2e50c_row10_col16" class="data row10 col16" >112.167625</td>
624
+ <td id="T_2e50c_row10_col17" class="data row10 col17" >6144.000000</td>
625
+ <td id="T_2e50c_row10_col18" class="data row10 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_10/metrics.json</td>
626
+ </tr>
627
+ <tr>
628
+ <th id="T_2e50c_level0_row11" class="row_heading level0 row11" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_11/metrics.json</th>
629
+ <td id="T_2e50c_row11_col0" class="data row11 col0" >5</td>
630
+ <td id="T_2e50c_row11_col1" class="data row11 col1" >128</td>
631
+ <td id="T_2e50c_row11_col2" class="data row11 col2" >11</td>
632
+ <td id="T_2e50c_row11_col3" class="data row11 col3" >0.003718</td>
633
+ <td id="T_2e50c_row11_col4" class="data row11 col4" >0.106875</td>
634
+ <td id="T_2e50c_row11_col5" class="data row11 col5" >3.600695</td>
635
+ <td id="T_2e50c_row11_col6" class="data row11 col6" >3.599065</td>
636
+ <td id="T_2e50c_row11_col7" class="data row11 col7" >3.730870</td>
637
+ <td id="T_2e50c_row11_col8" class="data row11 col8" >0.965215</td>
638
+ <td id="T_2e50c_row11_col9" class="data row11 col9" >0.987627</td>
639
+ <td id="T_2e50c_row11_col10" class="data row11 col10" >280.864441</td>
640
+ <td id="T_2e50c_row11_col11" class="data row11 col11" >280.557678</td>
641
+ <td id="T_2e50c_row11_col12" class="data row11 col12" >0.998717</td>
642
+ <td id="T_2e50c_row11_col13" class="data row11 col13" >31.750000</td>
643
+ <td id="T_2e50c_row11_col14" class="data row11 col14" >20.949717</td>
644
+ <td id="T_2e50c_row11_col15" class="data row11 col15" >0.969702</td>
645
+ <td id="T_2e50c_row11_col16" class="data row11 col16" >169.453140</td>
646
+ <td id="T_2e50c_row11_col17" class="data row11 col17" >6144.000000</td>
647
+ <td id="T_2e50c_row11_col18" class="data row11 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_11/metrics.json</td>
648
+ </tr>
649
+ </tbody>
650
+ </table>
benchmark_stats.png ADDED

Git LFS Details

  • SHA256: 5b7fcd1de8bd4a582795139b685a75c5b6d81e1fa22d0e9f220bf451dd04dc2d
  • Pointer size: 132 Bytes
  • Size of remote file: 4.61 MB
v5_128k_layer_0/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.0.hook_attn_out", "hook_layer": 0, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_0/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.003102128393948078, "metrics/kl_div_with_ablation": 2.121527671813965, "metrics/ce_loss_with_sae": 3.6000306606292725, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 5.74860143661499, "metrics/kl_div_score": 0.9985377855612434, "metrics/ce_loss_score": 0.9995505673311031, "metrics/l2_norm_in": 32.01313781738281, "metrics/l2_norm_out": 31.91054916381836, "metrics/l2_ratio": 0.9968054294586182, "metrics/l0": 31.989421844482422, "metrics/l1": 39.967445373535156, "metrics/explained_variance": 0.9732184410095215, "metrics/mse": 6.201825141906738, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_0/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5801db318f259a3e5fe1b5817b9ebba6935b6c9b142e783205a92f4a82beb7b6
3
+ size 805834048
v5_128k_layer_0/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:414c210bf7d6fbddb053ceaa011fac25ceb9f5a073178d9e5d5d9d18053a06e5
3
+ size 524368
v5_128k_layer_1/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.1.hook_attn_out", "hook_layer": 1, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_1/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.001699602697044611, "metrics/kl_div_with_ablation": 0.024065840989351273, "metrics/ce_loss_with_sae": 3.60137939453125, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6206326484680176, "metrics/kl_div_score": 0.9293769663899693, "metrics/ce_loss_score": 0.8926743530504184, "metrics/l2_norm_in": 9.714648246765137, "metrics/l2_norm_out": 9.198001861572266, "metrics/l2_ratio": 0.9423195719718933, "metrics/l0": 31.9990234375, "metrics/l1": 78.19619750976562, "metrics/explained_variance": 0.8797982931137085, "metrics/mse": 8.594531059265137, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_1/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57d70f600a4846151fd7ea487915802ab8bbb90015c1b0e65460cfe90e46da64
3
+ size 805834048
v5_128k_layer_1/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1eabe96ac25facfe3ade666f181005b41973249ec88b0ac22be2f85103668a5
3
+ size 524368
v5_128k_layer_10/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.10.hook_attn_out", "hook_layer": 10, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_10/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.003733165329322219, "metrics/kl_div_with_ablation": 0.02475181221961975, "metrics/ce_loss_with_sae": 3.601586103439331, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6404881477355957, "metrics/kl_div_score": 0.8491760806765053, "metrics/ce_loss_score": 0.9391284828741302, "metrics/l2_norm_in": 31.82137680053711, "metrics/l2_norm_out": 30.27041244506836, "metrics/l2_ratio": 0.9501370787620544, "metrics/l0": 32.0, "metrics/l1": 52.3072624206543, "metrics/explained_variance": 0.8368248343467712, "metrics/mse": 112.1676254272461, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_10/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e39ee1ce57e86bb93fe7a87c6b6a37605c32e576aa1cf4c699a92400db705633
3
+ size 805834048
v5_128k_layer_10/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4e7b306d4f7029f5a36e85a70a927aaaaeeff535cae93ce9cae14e3b221f5e0
3
+ size 524368
v5_128k_layer_11/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.11.hook_attn_out", "hook_layer": 11, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_11/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.003717603161931038, "metrics/kl_div_with_ablation": 0.10687470436096191, "metrics/ce_loss_with_sae": 3.6006953716278076, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.730869770050049, "metrics/kl_div_score": 0.9652153127893099, "metrics/ce_loss_score": 0.9876273219121214, "metrics/l2_norm_in": 280.86444091796875, "metrics/l2_norm_out": 280.55767822265625, "metrics/l2_ratio": 0.9987168312072754, "metrics/l0": 31.75, "metrics/l1": 20.949716567993164, "metrics/explained_variance": 0.9697019457817078, "metrics/mse": 169.45314025878906, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_11/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6eaa35a91eea1a55d199c911f529aaabdb6055e20d83f043ba8055c4f845405
3
+ size 805834048
v5_128k_layer_11/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:678bdda55d4ad6ee4d75de36b9d5b3f79350f6c5405a71d2b7f1e0c1aecd632e
3
+ size 524368
v5_128k_layer_2/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.2.hook_attn_out", "hook_layer": 2, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_2/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.001999747008085251, "metrics/kl_div_with_ablation": 0.031004613265395164, "metrics/ce_loss_with_sae": 3.5997862815856934, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.626660108566284, "metrics/kl_div_score": 0.9355016303229556, "metrics/ce_loss_score": 0.9738474564556262, "metrics/l2_norm_in": 8.641822814941406, "metrics/l2_norm_out": 8.086132049560547, "metrics/l2_ratio": 0.9341656565666199, "metrics/l0": 31.98714256286621, "metrics/l1": 79.24567413330078, "metrics/explained_variance": 0.8692671060562134, "metrics/mse": 8.59189224243164, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_2/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6be094d591c4d3d2c6789824417ba08da1d01f21fb8e2b56a7442f551cb991c
3
+ size 805834048
v5_128k_layer_2/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf997fff4f66a268b08854209d7f8edda454fc1f8d8b500bfef44cba5c94d91
3
+ size 524368
v5_128k_layer_3/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.3.hook_attn_out", "hook_layer": 3, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_3/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.00250535411760211, "metrics/kl_div_with_ablation": 0.025133918970823288, "metrics/ce_loss_with_sae": 3.601374387741089, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6286609172821045, "metrics/kl_div_score": 0.900319798097923, "metrics/ce_loss_score": 0.9219565637687698, "metrics/l2_norm_in": 8.571012496948242, "metrics/l2_norm_out": 7.854372024536133, "metrics/l2_ratio": 0.9161635637283325, "metrics/l0": 32.0, "metrics/l1": 77.34489440917969, "metrics/explained_variance": 0.8376470804214478, "metrics/mse": 11.842028617858887, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_3/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97cb9d4eb7f67689e8f61c614e0ca03de47cbf44e3e356737e3d99d19f4d3807
3
+ size 805834048
v5_128k_layer_3/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4be4718eddb1f57d87446d7402af2afc7d46700fd05d7178594a7b2a3896806d
3
+ size 524368
v5_128k_layer_4/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.4.hook_attn_out", "hook_layer": 4, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_4/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.0034840195439755917, "metrics/kl_div_with_ablation": 0.026722650974988937, "metrics/ce_loss_with_sae": 3.6015777587890625, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6321334838867188, "metrics/kl_div_score": 0.8696229821196835, "metrics/ce_loss_score": 0.9240019898919258, "metrics/l2_norm_in": 9.123016357421875, "metrics/l2_norm_out": 8.11795425415039, "metrics/l2_ratio": 0.891417384147644, "metrics/l0": 31.99934959411621, "metrics/l1": 75.46078491210938, "metrics/explained_variance": 0.7981663942337036, "metrics/mse": 17.370716094970703, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_4/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9642c61b839b69ffba9d3b957e3536548290d94358610ec2da586e2a6ec16f4
3
+ size 805834048
v5_128k_layer_4/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffa846f2de01f62de07be9ed953834d3bd1ea5a456f73f59243972db266de1b9
3
+ size 524368
v5_128k_layer_5/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.5.hook_attn_out", "hook_layer": 5, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_5/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.003555751871317625, "metrics/kl_div_with_ablation": 0.031378373503685, "metrics/ce_loss_with_sae": 3.6017022132873535, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6277596950531006, "metrics/kl_div_score": 0.8866814473063734, "metrics/ce_loss_score": 0.9080810262886769, "metrics/l2_norm_in": 10.034396171569824, "metrics/l2_norm_out": 8.960555076599121, "metrics/l2_ratio": 0.8944891691207886, "metrics/l0": 31.99837303161621, "metrics/l1": 73.32916259765625, "metrics/explained_variance": 0.8041481375694275, "metrics/mse": 22.21611785888672, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_5/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c69062f1ae86549c68612dc574d78024749e1b2f6b9cf2338fd74800747aed52
3
+ size 805834048
v5_128k_layer_5/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a893b71eb1bc60fe9d9ee06770f4853c0c145bf61883acbb36d00b3d4daea7
3
+ size 524368
v5_128k_layer_6/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.6.hook_attn_out", "hook_layer": 6, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_6/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.004435557406395674, "metrics/kl_div_with_ablation": 0.03278880566358566, "metrics/ce_loss_with_sae": 3.6031253337860107, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.634286403656006, "metrics/kl_div_score": 0.8647234226246404, "metrics/ce_loss_score": 0.8847093704097312, "metrics/l2_norm_in": 11.67806625366211, "metrics/l2_norm_out": 10.348960876464844, "metrics/l2_ratio": 0.8881208300590515, "metrics/l0": 31.998699188232422, "metrics/l1": 70.52044677734375, "metrics/explained_variance": 0.7761087417602539, "metrics/mse": 31.851516723632812, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_6/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d57cca0ea3db86f6e07a251c29fa046056c2aced38bfb97fb9c14a72e1235e86
3
+ size 805834048
v5_128k_layer_6/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ca493d7aa8d26ea18ce5a93b247b29c43b0f058f4dcde9582c8dd9e569d1dc1
3
+ size 524368
v5_128k_layer_7/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.7.hook_attn_out", "hook_layer": 7, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_7/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.004355770535767078, "metrics/kl_div_with_ablation": 0.03466065973043442, "metrics/ce_loss_with_sae": 3.602255344390869, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.634833812713623, "metrics/kl_div_score": 0.8743309974581235, "metrics/ce_loss_score": 0.9107960567097922, "metrics/l2_norm_in": 13.65020751953125, "metrics/l2_norm_out": 12.425106048583984, "metrics/l2_ratio": 0.9115623235702515, "metrics/l0": 31.99934959411621, "metrics/l1": 66.42544555664062, "metrics/explained_variance": 0.8032481074333191, "metrics/mse": 37.54448699951172, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_7/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3d55f6e9221c68f5bce0af83fd51b437c33d0ce4e1fd7f66f14b56072026585
3
+ size 805834048
v5_128k_layer_7/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:781f838bccc120af03ede7b27ed850d78e9534c5139949122572aebb4d56c260
3
+ size 524368
v5_128k_layer_8/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.8.hook_attn_out", "hook_layer": 8, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_8/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.005139736924320459, "metrics/kl_div_with_ablation": 0.02938206121325493, "metrics/ce_loss_with_sae": 3.602754592895508, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6258018016815186, "metrics/kl_div_score": 0.8250722817907069, "metrics/ce_loss_score": 0.8619899415037808, "metrics/l2_norm_in": 16.137948989868164, "metrics/l2_norm_out": 14.539435386657715, "metrics/l2_ratio": 0.902074933052063, "metrics/l0": 32.0, "metrics/l1": 66.75920867919922, "metrics/explained_variance": 0.7767739295959473, "metrics/mse": 52.779170989990234, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_8/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3479c0a21589142e220e0585a8b03d3c9e7be8960f764d23627badf132c98dff
3
+ size 805834048
v5_128k_layer_8/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9034f92998777870d690662ab5c26afcd3c34281b50e8c8a17debe53d7c773c
3
+ size 524368
v5_128k_layer_9/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.9.hook_attn_out", "hook_layer": 9, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_9/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.004128233529627323, "metrics/kl_div_with_ablation": 0.028918448835611343, "metrics/ce_loss_with_sae": 3.601983070373535, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.636500835418701, "metrics/kl_div_score": 0.8572456789403016, "metrics/ce_loss_score": 0.922041281628338, "metrics/l2_norm_in": 20.912498474121094, "metrics/l2_norm_out": 19.252647399902344, "metrics/l2_ratio": 0.9233078956604004, "metrics/l0": 31.99934959411621, "metrics/l1": 60.197113037109375, "metrics/explained_variance": 0.800841212272644, "metrics/mse": 70.06503295898438, "metrics/total_tokens_evaluated": 6144}