GPT2-Small-OAI-v5-128k-attn-out-SAEs / benchmark_stats.html
jbloom's picture
Upload folder using huggingface_hub
1fe73d5 verified
<style type="text/css">
#T_2e50c_row0_col2, #T_2e50c_row0_col6, #T_2e50c_row0_col16, #T_2e50c_row0_col17, #T_2e50c_row1_col3, #T_2e50c_row1_col4, #T_2e50c_row1_col6, #T_2e50c_row1_col7, #T_2e50c_row1_col17, #T_2e50c_row2_col4, #T_2e50c_row2_col5, #T_2e50c_row2_col6, #T_2e50c_row2_col7, #T_2e50c_row2_col10, #T_2e50c_row2_col11, #T_2e50c_row2_col17, #T_2e50c_row3_col4, #T_2e50c_row3_col6, #T_2e50c_row3_col7, #T_2e50c_row3_col10, #T_2e50c_row3_col11, #T_2e50c_row3_col17, #T_2e50c_row4_col4, #T_2e50c_row4_col6, #T_2e50c_row4_col10, #T_2e50c_row4_col11, #T_2e50c_row4_col17, #T_2e50c_row5_col4, #T_2e50c_row5_col6, #T_2e50c_row5_col7, #T_2e50c_row5_col17, #T_2e50c_row6_col6, #T_2e50c_row6_col12, #T_2e50c_row6_col15, #T_2e50c_row6_col17, #T_2e50c_row7_col6, #T_2e50c_row7_col17, #T_2e50c_row8_col4, #T_2e50c_row8_col6, #T_2e50c_row8_col7, #T_2e50c_row8_col8, #T_2e50c_row8_col9, #T_2e50c_row8_col15, #T_2e50c_row8_col17, #T_2e50c_row9_col4, #T_2e50c_row9_col6, #T_2e50c_row9_col17, #T_2e50c_row10_col4, #T_2e50c_row10_col6, #T_2e50c_row10_col17, #T_2e50c_row11_col6, #T_2e50c_row11_col13, #T_2e50c_row11_col14, #T_2e50c_row11_col17 {
background-color: #440154;
color: #f1f1f1;
}
#T_2e50c_row0_col3 {
background-color: #297a8e;
color: #f1f1f1;
}
#T_2e50c_row0_col4, #T_2e50c_row0_col7, #T_2e50c_row0_col8, #T_2e50c_row0_col9, #T_2e50c_row0_col15, #T_2e50c_row1_col13, #T_2e50c_row2_col14, #T_2e50c_row3_col13, #T_2e50c_row4_col13, #T_2e50c_row6_col5, #T_2e50c_row7_col13, #T_2e50c_row8_col3, #T_2e50c_row8_col13, #T_2e50c_row9_col13, #T_2e50c_row10_col13, #T_2e50c_row11_col2, #T_2e50c_row11_col10, #T_2e50c_row11_col11, #T_2e50c_row11_col12, #T_2e50c_row11_col16 {
background-color: #fde725;
color: #000000;
}
#T_2e50c_row0_col5 {
background-color: #481b6d;
color: #f1f1f1;
}
#T_2e50c_row0_col10, #T_2e50c_row0_col11, #T_2e50c_row2_col3 {
background-color: #482071;
color: #f1f1f1;
}
#T_2e50c_row0_col12, #T_2e50c_row1_col14, #T_2e50c_row11_col15 {
background-color: #f4e61e;
color: #000000;
}
#T_2e50c_row0_col13 {
background-color: #e5e419;
color: #000000;
}
#T_2e50c_row0_col14 {
background-color: #31668e;
color: #f1f1f1;
}
#T_2e50c_row1_col2 {
background-color: #482173;
color: #f1f1f1;
}
#T_2e50c_row1_col5 {
background-color: #228b8d;
color: #f1f1f1;
}
#T_2e50c_row1_col8 {
background-color: #22a884;
color: #f1f1f1;
}
#T_2e50c_row1_col9 {
background-color: #3e4a89;
color: #f1f1f1;
}
#T_2e50c_row1_col10, #T_2e50c_row1_col11, #T_2e50c_row4_col7, #T_2e50c_row5_col10, #T_2e50c_row5_col11, #T_2e50c_row6_col4, #T_2e50c_row6_col7, #T_2e50c_row7_col4, #T_2e50c_row7_col7, #T_2e50c_row9_col7 {
background-color: #440256;
color: #f1f1f1;
}
#T_2e50c_row1_col12 {
background-color: #218e8d;
color: #f1f1f1;
}
#T_2e50c_row1_col15 {
background-color: #1f968b;
color: #f1f1f1;
}
#T_2e50c_row1_col16, #T_2e50c_row2_col16 {
background-color: #450559;
color: #f1f1f1;
}
#T_2e50c_row2_col2 {
background-color: #433e85;
color: #f1f1f1;
}
#T_2e50c_row2_col8, #T_2e50c_row7_col2 {
background-color: #2ab07f;
color: #f1f1f1;
}
#T_2e50c_row2_col9 {
background-color: #84d44b;
color: #000000;
}
#T_2e50c_row2_col12 {
background-color: #287c8e;
color: #f1f1f1;
}
#T_2e50c_row2_col13 {
background-color: #dde318;
color: #000000;
}
#T_2e50c_row2_col15 {
background-color: #23898e;
color: #f1f1f1;
}
#T_2e50c_row3_col2, #T_2e50c_row11_col5 {
background-color: #38588c;
color: #f1f1f1;
}
#T_2e50c_row3_col3 {
background-color: #3d4d8a;
color: #f1f1f1;
}
#T_2e50c_row3_col5 {
background-color: #238a8d;
color: #f1f1f1;
}
#T_2e50c_row3_col8, #T_2e50c_row3_col9, #T_2e50c_row9_col9 {
background-color: #26818e;
color: #f1f1f1;
}
#T_2e50c_row3_col12 {
background-color: #3b528b;
color: #f1f1f1;
}
#T_2e50c_row3_col14 {
background-color: #eae51a;
color: #000000;
}
#T_2e50c_row3_col15 {
background-color: #33628d;
color: #f1f1f1;
}
#T_2e50c_row3_col16 {
background-color: #470d60;
color: #f1f1f1;
}
#T_2e50c_row4_col2 {
background-color: #2d708e;
color: #f1f1f1;
}
#T_2e50c_row4_col3 {
background-color: #1f948c;
color: #f1f1f1;
}
#T_2e50c_row4_col5, #T_2e50c_row10_col5, #T_2e50c_row10_col14 {
background-color: #1f998a;
color: #f1f1f1;
}
#T_2e50c_row4_col8 {
background-color: #3a538b;
color: #f1f1f1;
}
#T_2e50c_row4_col9 {
background-color: #25848e;
color: #f1f1f1;
}
#T_2e50c_row4_col12, #T_2e50c_row8_col10 {
background-color: #460b5e;
color: #f1f1f1;
}
#T_2e50c_row4_col14 {
background-color: #d5e21a;
color: #000000;
}
#T_2e50c_row4_col15 {
background-color: #482878;
color: #f1f1f1;
}
#T_2e50c_row4_col16 {
background-color: #481a6c;
color: #f1f1f1;
}
#T_2e50c_row5_col2 {
background-color: #25858e;
color: #f1f1f1;
}
#T_2e50c_row5_col3 {
background-color: #1f9a8a;
color: #f1f1f1;
}
#T_2e50c_row5_col5 {
background-color: #1fa187;
color: #f1f1f1;
}
#T_2e50c_row5_col8, #T_2e50c_row7_col9 {
background-color: #2e6d8e;
color: #f1f1f1;
}
#T_2e50c_row5_col9 {
background-color: #31688e;
color: #f1f1f1;
}
#T_2e50c_row5_col12 {
background-color: #481668;
color: #f1f1f1;
}
#T_2e50c_row5_col13, #T_2e50c_row6_col13 {
background-color: #fbe723;
color: #000000;
}
#T_2e50c_row5_col14 {
background-color: #bddf26;
color: #000000;
}
#T_2e50c_row5_col15 {
background-color: #46327e;
color: #f1f1f1;
}
#T_2e50c_row5_col16 {
background-color: #482475;
color: #f1f1f1;
}
#T_2e50c_row6_col2 {
background-color: #1e9b8a;
color: #f1f1f1;
}
#T_2e50c_row6_col3 {
background-color: #77d153;
color: #000000;
}
#T_2e50c_row6_col8 {
background-color: #3e4c8a;
color: #f1f1f1;
}
#T_2e50c_row6_col9 {
background-color: #443983;
color: #f1f1f1;
}
#T_2e50c_row6_col10, #T_2e50c_row6_col11, #T_2e50c_row10_col7 {
background-color: #450457;
color: #f1f1f1;
}
#T_2e50c_row6_col14 {
background-color: #9bd93c;
color: #000000;
}
#T_2e50c_row6_col16 {
background-color: #453781;
color: #f1f1f1;
}
#T_2e50c_row7_col3 {
background-color: #69cd5b;
color: #000000;
}
#T_2e50c_row7_col5 {
background-color: #58c765;
color: #000000;
}
#T_2e50c_row7_col8 {
background-color: #375b8d;
color: #f1f1f1;
}
#T_2e50c_row7_col10, #T_2e50c_row7_col11 {
background-color: #46075a;
color: #f1f1f1;
}
#T_2e50c_row7_col12 {
background-color: #3f4788;
color: #f1f1f1;
}
#T_2e50c_row7_col14 {
background-color: #6ece58;
color: #000000;
}
#T_2e50c_row7_col15, #T_2e50c_row10_col8 {
background-color: #46307e;
color: #f1f1f1;
}
#T_2e50c_row7_col16 {
background-color: #424186;
color: #f1f1f1;
}
#T_2e50c_row8_col2 {
background-color: #52c569;
color: #000000;
}
#T_2e50c_row8_col5 {
background-color: #b5de2b;
color: #000000;
}
#T_2e50c_row8_col11 {
background-color: #460a5d;
color: #f1f1f1;
}
#T_2e50c_row8_col12, #T_2e50c_row9_col15 {
background-color: #472d7b;
color: #f1f1f1;
}
#T_2e50c_row8_col14 {
background-color: #73d056;
color: #000000;
}
#T_2e50c_row8_col16 {
background-color: #365c8d;
color: #f1f1f1;
}
#T_2e50c_row9_col2 {
background-color: #86d549;
color: #000000;
}
#T_2e50c_row9_col3 {
background-color: #46c06f;
color: #f1f1f1;
}
#T_2e50c_row9_col5 {
background-color: #32b67a;
color: #f1f1f1;
}
#T_2e50c_row9_col8 {
background-color: #423f85;
color: #f1f1f1;
}
#T_2e50c_row9_col10 {
background-color: #471164;
color: #f1f1f1;
}
#T_2e50c_row9_col11, #T_2e50c_row11_col4 {
background-color: #471063;
color: #f1f1f1;
}
#T_2e50c_row9_col12 {
background-color: #32648e;
color: #f1f1f1;
}
#T_2e50c_row9_col14 {
background-color: #38b977;
color: #f1f1f1;
}
#T_2e50c_row9_col16 {
background-color: #2a768e;
color: #f1f1f1;
}
#T_2e50c_row10_col2 {
background-color: #c2df23;
color: #000000;
}
#T_2e50c_row10_col3 {
background-color: #21a685;
color: #f1f1f1;
}
#T_2e50c_row10_col9, #T_2e50c_row10_col12 {
background-color: #1f9f88;
color: #f1f1f1;
}
#T_2e50c_row10_col10, #T_2e50c_row10_col11 {
background-color: #481f70;
color: #f1f1f1;
}
#T_2e50c_row10_col15 {
background-color: #34618d;
color: #f1f1f1;
}
#T_2e50c_row10_col16 {
background-color: #2fb47c;
color: #f1f1f1;
}
#T_2e50c_row11_col3 {
background-color: #21a585;
color: #f1f1f1;
}
#T_2e50c_row11_col7 {
background-color: #481467;
color: #f1f1f1;
}
#T_2e50c_row11_col8 {
background-color: #7fd34e;
color: #000000;
}
#T_2e50c_row11_col9 {
background-color: #c5e021;
color: #000000;
}
</style>
<table id="T_2e50c">
<thead>
<tr>
<th class="blank level0" >&nbsp;</th>
<th id="T_2e50c_level0_col0" class="col_heading level0 col0" >version</th>
<th id="T_2e50c_level0_col1" class="col_heading level0 col1" >d_sae</th>
<th id="T_2e50c_level0_col2" class="col_heading level0 col2" >layer</th>
<th id="T_2e50c_level0_col3" class="col_heading level0 col3" >kl_div_with_sae</th>
<th id="T_2e50c_level0_col4" class="col_heading level0 col4" >kl_div_with_ablation</th>
<th id="T_2e50c_level0_col5" class="col_heading level0 col5" >ce_loss_with_sae</th>
<th id="T_2e50c_level0_col6" class="col_heading level0 col6" >ce_loss_without_sae</th>
<th id="T_2e50c_level0_col7" class="col_heading level0 col7" >ce_loss_with_ablation</th>
<th id="T_2e50c_level0_col8" class="col_heading level0 col8" >kl_div_score</th>
<th id="T_2e50c_level0_col9" class="col_heading level0 col9" >ce_loss_score</th>
<th id="T_2e50c_level0_col10" class="col_heading level0 col10" >l2_norm_in</th>
<th id="T_2e50c_level0_col11" class="col_heading level0 col11" >l2_norm_out</th>
<th id="T_2e50c_level0_col12" class="col_heading level0 col12" >l2_ratio</th>
<th id="T_2e50c_level0_col13" class="col_heading level0 col13" >l0</th>
<th id="T_2e50c_level0_col14" class="col_heading level0 col14" >l1</th>
<th id="T_2e50c_level0_col15" class="col_heading level0 col15" >explained_variance</th>
<th id="T_2e50c_level0_col16" class="col_heading level0 col16" >mse</th>
<th id="T_2e50c_level0_col17" class="col_heading level0 col17" >total_tokens_evaluated</th>
<th id="T_2e50c_level0_col18" class="col_heading level0 col18" >filepath</th>
</tr>
</thead>
<tbody>
<tr>
<th id="T_2e50c_level0_row0" class="row_heading level0 row0" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_0/metrics.json</th>
<td id="T_2e50c_row0_col0" class="data row0 col0" >5</td>
<td id="T_2e50c_row0_col1" class="data row0 col1" >128</td>
<td id="T_2e50c_row0_col2" class="data row0 col2" >0</td>
<td id="T_2e50c_row0_col3" class="data row0 col3" >0.003102</td>
<td id="T_2e50c_row0_col4" class="data row0 col4" >2.121528</td>
<td id="T_2e50c_row0_col5" class="data row0 col5" >3.600031</td>
<td id="T_2e50c_row0_col6" class="data row0 col6" >3.599065</td>
<td id="T_2e50c_row0_col7" class="data row0 col7" >5.748601</td>
<td id="T_2e50c_row0_col8" class="data row0 col8" >0.998538</td>
<td id="T_2e50c_row0_col9" class="data row0 col9" >0.999551</td>
<td id="T_2e50c_row0_col10" class="data row0 col10" >32.013138</td>
<td id="T_2e50c_row0_col11" class="data row0 col11" >31.910549</td>
<td id="T_2e50c_row0_col12" class="data row0 col12" >0.996805</td>
<td id="T_2e50c_row0_col13" class="data row0 col13" >31.989422</td>
<td id="T_2e50c_row0_col14" class="data row0 col14" >39.967445</td>
<td id="T_2e50c_row0_col15" class="data row0 col15" >0.973218</td>
<td id="T_2e50c_row0_col16" class="data row0 col16" >6.201825</td>
<td id="T_2e50c_row0_col17" class="data row0 col17" >6144.000000</td>
<td id="T_2e50c_row0_col18" class="data row0 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_0/metrics.json</td>
</tr>
<tr>
<th id="T_2e50c_level0_row1" class="row_heading level0 row1" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_1/metrics.json</th>
<td id="T_2e50c_row1_col0" class="data row1 col0" >5</td>
<td id="T_2e50c_row1_col1" class="data row1 col1" >128</td>
<td id="T_2e50c_row1_col2" class="data row1 col2" >1</td>
<td id="T_2e50c_row1_col3" class="data row1 col3" >0.001700</td>
<td id="T_2e50c_row1_col4" class="data row1 col4" >0.024066</td>
<td id="T_2e50c_row1_col5" class="data row1 col5" >3.601379</td>
<td id="T_2e50c_row1_col6" class="data row1 col6" >3.599065</td>
<td id="T_2e50c_row1_col7" class="data row1 col7" >3.620633</td>
<td id="T_2e50c_row1_col8" class="data row1 col8" >0.929377</td>
<td id="T_2e50c_row1_col9" class="data row1 col9" >0.892674</td>
<td id="T_2e50c_row1_col10" class="data row1 col10" >9.714648</td>
<td id="T_2e50c_row1_col11" class="data row1 col11" >9.198002</td>
<td id="T_2e50c_row1_col12" class="data row1 col12" >0.942320</td>
<td id="T_2e50c_row1_col13" class="data row1 col13" >31.999023</td>
<td id="T_2e50c_row1_col14" class="data row1 col14" >78.196198</td>
<td id="T_2e50c_row1_col15" class="data row1 col15" >0.879798</td>
<td id="T_2e50c_row1_col16" class="data row1 col16" >8.594531</td>
<td id="T_2e50c_row1_col17" class="data row1 col17" >6144.000000</td>
<td id="T_2e50c_row1_col18" class="data row1 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_1/metrics.json</td>
</tr>
<tr>
<th id="T_2e50c_level0_row2" class="row_heading level0 row2" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_2/metrics.json</th>
<td id="T_2e50c_row2_col0" class="data row2 col0" >5</td>
<td id="T_2e50c_row2_col1" class="data row2 col1" >128</td>
<td id="T_2e50c_row2_col2" class="data row2 col2" >2</td>
<td id="T_2e50c_row2_col3" class="data row2 col3" >0.002000</td>
<td id="T_2e50c_row2_col4" class="data row2 col4" >0.031005</td>
<td id="T_2e50c_row2_col5" class="data row2 col5" >3.599786</td>
<td id="T_2e50c_row2_col6" class="data row2 col6" >3.599065</td>
<td id="T_2e50c_row2_col7" class="data row2 col7" >3.626660</td>
<td id="T_2e50c_row2_col8" class="data row2 col8" >0.935502</td>
<td id="T_2e50c_row2_col9" class="data row2 col9" >0.973847</td>
<td id="T_2e50c_row2_col10" class="data row2 col10" >8.641823</td>
<td id="T_2e50c_row2_col11" class="data row2 col11" >8.086132</td>
<td id="T_2e50c_row2_col12" class="data row2 col12" >0.934166</td>
<td id="T_2e50c_row2_col13" class="data row2 col13" >31.987143</td>
<td id="T_2e50c_row2_col14" class="data row2 col14" >79.245674</td>
<td id="T_2e50c_row2_col15" class="data row2 col15" >0.869267</td>
<td id="T_2e50c_row2_col16" class="data row2 col16" >8.591892</td>
<td id="T_2e50c_row2_col17" class="data row2 col17" >6144.000000</td>
<td id="T_2e50c_row2_col18" class="data row2 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_2/metrics.json</td>
</tr>
<tr>
<th id="T_2e50c_level0_row3" class="row_heading level0 row3" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_3/metrics.json</th>
<td id="T_2e50c_row3_col0" class="data row3 col0" >5</td>
<td id="T_2e50c_row3_col1" class="data row3 col1" >128</td>
<td id="T_2e50c_row3_col2" class="data row3 col2" >3</td>
<td id="T_2e50c_row3_col3" class="data row3 col3" >0.002505</td>
<td id="T_2e50c_row3_col4" class="data row3 col4" >0.025134</td>
<td id="T_2e50c_row3_col5" class="data row3 col5" >3.601374</td>
<td id="T_2e50c_row3_col6" class="data row3 col6" >3.599065</td>
<td id="T_2e50c_row3_col7" class="data row3 col7" >3.628661</td>
<td id="T_2e50c_row3_col8" class="data row3 col8" >0.900320</td>
<td id="T_2e50c_row3_col9" class="data row3 col9" >0.921957</td>
<td id="T_2e50c_row3_col10" class="data row3 col10" >8.571012</td>
<td id="T_2e50c_row3_col11" class="data row3 col11" >7.854372</td>
<td id="T_2e50c_row3_col12" class="data row3 col12" >0.916164</td>
<td id="T_2e50c_row3_col13" class="data row3 col13" >32.000000</td>
<td id="T_2e50c_row3_col14" class="data row3 col14" >77.344894</td>
<td id="T_2e50c_row3_col15" class="data row3 col15" >0.837647</td>
<td id="T_2e50c_row3_col16" class="data row3 col16" >11.842029</td>
<td id="T_2e50c_row3_col17" class="data row3 col17" >6144.000000</td>
<td id="T_2e50c_row3_col18" class="data row3 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_3/metrics.json</td>
</tr>
<tr>
<th id="T_2e50c_level0_row4" class="row_heading level0 row4" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_4/metrics.json</th>
<td id="T_2e50c_row4_col0" class="data row4 col0" >5</td>
<td id="T_2e50c_row4_col1" class="data row4 col1" >128</td>
<td id="T_2e50c_row4_col2" class="data row4 col2" >4</td>
<td id="T_2e50c_row4_col3" class="data row4 col3" >0.003484</td>
<td id="T_2e50c_row4_col4" class="data row4 col4" >0.026723</td>
<td id="T_2e50c_row4_col5" class="data row4 col5" >3.601578</td>
<td id="T_2e50c_row4_col6" class="data row4 col6" >3.599065</td>
<td id="T_2e50c_row4_col7" class="data row4 col7" >3.632133</td>
<td id="T_2e50c_row4_col8" class="data row4 col8" >0.869623</td>
<td id="T_2e50c_row4_col9" class="data row4 col9" >0.924002</td>
<td id="T_2e50c_row4_col10" class="data row4 col10" >9.123016</td>
<td id="T_2e50c_row4_col11" class="data row4 col11" >8.117954</td>
<td id="T_2e50c_row4_col12" class="data row4 col12" >0.891417</td>
<td id="T_2e50c_row4_col13" class="data row4 col13" >31.999350</td>
<td id="T_2e50c_row4_col14" class="data row4 col14" >75.460785</td>
<td id="T_2e50c_row4_col15" class="data row4 col15" >0.798166</td>
<td id="T_2e50c_row4_col16" class="data row4 col16" >17.370716</td>
<td id="T_2e50c_row4_col17" class="data row4 col17" >6144.000000</td>
<td id="T_2e50c_row4_col18" class="data row4 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_4/metrics.json</td>
</tr>
<tr>
<th id="T_2e50c_level0_row5" class="row_heading level0 row5" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_5/metrics.json</th>
<td id="T_2e50c_row5_col0" class="data row5 col0" >5</td>
<td id="T_2e50c_row5_col1" class="data row5 col1" >128</td>
<td id="T_2e50c_row5_col2" class="data row5 col2" >5</td>
<td id="T_2e50c_row5_col3" class="data row5 col3" >0.003556</td>
<td id="T_2e50c_row5_col4" class="data row5 col4" >0.031378</td>
<td id="T_2e50c_row5_col5" class="data row5 col5" >3.601702</td>
<td id="T_2e50c_row5_col6" class="data row5 col6" >3.599065</td>
<td id="T_2e50c_row5_col7" class="data row5 col7" >3.627760</td>
<td id="T_2e50c_row5_col8" class="data row5 col8" >0.886681</td>
<td id="T_2e50c_row5_col9" class="data row5 col9" >0.908081</td>
<td id="T_2e50c_row5_col10" class="data row5 col10" >10.034396</td>
<td id="T_2e50c_row5_col11" class="data row5 col11" >8.960555</td>
<td id="T_2e50c_row5_col12" class="data row5 col12" >0.894489</td>
<td id="T_2e50c_row5_col13" class="data row5 col13" >31.998373</td>
<td id="T_2e50c_row5_col14" class="data row5 col14" >73.329163</td>
<td id="T_2e50c_row5_col15" class="data row5 col15" >0.804148</td>
<td id="T_2e50c_row5_col16" class="data row5 col16" >22.216118</td>
<td id="T_2e50c_row5_col17" class="data row5 col17" >6144.000000</td>
<td id="T_2e50c_row5_col18" class="data row5 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_5/metrics.json</td>
</tr>
<tr>
<th id="T_2e50c_level0_row6" class="row_heading level0 row6" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_6/metrics.json</th>
<td id="T_2e50c_row6_col0" class="data row6 col0" >5</td>
<td id="T_2e50c_row6_col1" class="data row6 col1" >128</td>
<td id="T_2e50c_row6_col2" class="data row6 col2" >6</td>
<td id="T_2e50c_row6_col3" class="data row6 col3" >0.004436</td>
<td id="T_2e50c_row6_col4" class="data row6 col4" >0.032789</td>
<td id="T_2e50c_row6_col5" class="data row6 col5" >3.603125</td>
<td id="T_2e50c_row6_col6" class="data row6 col6" >3.599065</td>
<td id="T_2e50c_row6_col7" class="data row6 col7" >3.634286</td>
<td id="T_2e50c_row6_col8" class="data row6 col8" >0.864723</td>
<td id="T_2e50c_row6_col9" class="data row6 col9" >0.884709</td>
<td id="T_2e50c_row6_col10" class="data row6 col10" >11.678066</td>
<td id="T_2e50c_row6_col11" class="data row6 col11" >10.348961</td>
<td id="T_2e50c_row6_col12" class="data row6 col12" >0.888121</td>
<td id="T_2e50c_row6_col13" class="data row6 col13" >31.998699</td>
<td id="T_2e50c_row6_col14" class="data row6 col14" >70.520447</td>
<td id="T_2e50c_row6_col15" class="data row6 col15" >0.776109</td>
<td id="T_2e50c_row6_col16" class="data row6 col16" >31.851517</td>
<td id="T_2e50c_row6_col17" class="data row6 col17" >6144.000000</td>
<td id="T_2e50c_row6_col18" class="data row6 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_6/metrics.json</td>
</tr>
<tr>
<th id="T_2e50c_level0_row7" class="row_heading level0 row7" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_7/metrics.json</th>
<td id="T_2e50c_row7_col0" class="data row7 col0" >5</td>
<td id="T_2e50c_row7_col1" class="data row7 col1" >128</td>
<td id="T_2e50c_row7_col2" class="data row7 col2" >7</td>
<td id="T_2e50c_row7_col3" class="data row7 col3" >0.004356</td>
<td id="T_2e50c_row7_col4" class="data row7 col4" >0.034661</td>
<td id="T_2e50c_row7_col5" class="data row7 col5" >3.602255</td>
<td id="T_2e50c_row7_col6" class="data row7 col6" >3.599065</td>
<td id="T_2e50c_row7_col7" class="data row7 col7" >3.634834</td>
<td id="T_2e50c_row7_col8" class="data row7 col8" >0.874331</td>
<td id="T_2e50c_row7_col9" class="data row7 col9" >0.910796</td>
<td id="T_2e50c_row7_col10" class="data row7 col10" >13.650208</td>
<td id="T_2e50c_row7_col11" class="data row7 col11" >12.425106</td>
<td id="T_2e50c_row7_col12" class="data row7 col12" >0.911562</td>
<td id="T_2e50c_row7_col13" class="data row7 col13" >31.999350</td>
<td id="T_2e50c_row7_col14" class="data row7 col14" >66.425446</td>
<td id="T_2e50c_row7_col15" class="data row7 col15" >0.803248</td>
<td id="T_2e50c_row7_col16" class="data row7 col16" >37.544487</td>
<td id="T_2e50c_row7_col17" class="data row7 col17" >6144.000000</td>
<td id="T_2e50c_row7_col18" class="data row7 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_7/metrics.json</td>
</tr>
<tr>
<th id="T_2e50c_level0_row8" class="row_heading level0 row8" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_8/metrics.json</th>
<td id="T_2e50c_row8_col0" class="data row8 col0" >5</td>
<td id="T_2e50c_row8_col1" class="data row8 col1" >128</td>
<td id="T_2e50c_row8_col2" class="data row8 col2" >8</td>
<td id="T_2e50c_row8_col3" class="data row8 col3" >0.005140</td>
<td id="T_2e50c_row8_col4" class="data row8 col4" >0.029382</td>
<td id="T_2e50c_row8_col5" class="data row8 col5" >3.602755</td>
<td id="T_2e50c_row8_col6" class="data row8 col6" >3.599065</td>
<td id="T_2e50c_row8_col7" class="data row8 col7" >3.625802</td>
<td id="T_2e50c_row8_col8" class="data row8 col8" >0.825072</td>
<td id="T_2e50c_row8_col9" class="data row8 col9" >0.861990</td>
<td id="T_2e50c_row8_col10" class="data row8 col10" >16.137949</td>
<td id="T_2e50c_row8_col11" class="data row8 col11" >14.539435</td>
<td id="T_2e50c_row8_col12" class="data row8 col12" >0.902075</td>
<td id="T_2e50c_row8_col13" class="data row8 col13" >32.000000</td>
<td id="T_2e50c_row8_col14" class="data row8 col14" >66.759209</td>
<td id="T_2e50c_row8_col15" class="data row8 col15" >0.776774</td>
<td id="T_2e50c_row8_col16" class="data row8 col16" >52.779171</td>
<td id="T_2e50c_row8_col17" class="data row8 col17" >6144.000000</td>
<td id="T_2e50c_row8_col18" class="data row8 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_8/metrics.json</td>
</tr>
<tr>
<th id="T_2e50c_level0_row9" class="row_heading level0 row9" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_9/metrics.json</th>
<td id="T_2e50c_row9_col0" class="data row9 col0" >5</td>
<td id="T_2e50c_row9_col1" class="data row9 col1" >128</td>
<td id="T_2e50c_row9_col2" class="data row9 col2" >9</td>
<td id="T_2e50c_row9_col3" class="data row9 col3" >0.004128</td>
<td id="T_2e50c_row9_col4" class="data row9 col4" >0.028918</td>
<td id="T_2e50c_row9_col5" class="data row9 col5" >3.601983</td>
<td id="T_2e50c_row9_col6" class="data row9 col6" >3.599065</td>
<td id="T_2e50c_row9_col7" class="data row9 col7" >3.636501</td>
<td id="T_2e50c_row9_col8" class="data row9 col8" >0.857246</td>
<td id="T_2e50c_row9_col9" class="data row9 col9" >0.922041</td>
<td id="T_2e50c_row9_col10" class="data row9 col10" >20.912498</td>
<td id="T_2e50c_row9_col11" class="data row9 col11" >19.252647</td>
<td id="T_2e50c_row9_col12" class="data row9 col12" >0.923308</td>
<td id="T_2e50c_row9_col13" class="data row9 col13" >31.999350</td>
<td id="T_2e50c_row9_col14" class="data row9 col14" >60.197113</td>
<td id="T_2e50c_row9_col15" class="data row9 col15" >0.800841</td>
<td id="T_2e50c_row9_col16" class="data row9 col16" >70.065033</td>
<td id="T_2e50c_row9_col17" class="data row9 col17" >6144.000000</td>
<td id="T_2e50c_row9_col18" class="data row9 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_9/metrics.json</td>
</tr>
<tr>
<th id="T_2e50c_level0_row10" class="row_heading level0 row10" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_10/metrics.json</th>
<td id="T_2e50c_row10_col0" class="data row10 col0" >5</td>
<td id="T_2e50c_row10_col1" class="data row10 col1" >128</td>
<td id="T_2e50c_row10_col2" class="data row10 col2" >10</td>
<td id="T_2e50c_row10_col3" class="data row10 col3" >0.003733</td>
<td id="T_2e50c_row10_col4" class="data row10 col4" >0.024752</td>
<td id="T_2e50c_row10_col5" class="data row10 col5" >3.601586</td>
<td id="T_2e50c_row10_col6" class="data row10 col6" >3.599065</td>
<td id="T_2e50c_row10_col7" class="data row10 col7" >3.640488</td>
<td id="T_2e50c_row10_col8" class="data row10 col8" >0.849176</td>
<td id="T_2e50c_row10_col9" class="data row10 col9" >0.939128</td>
<td id="T_2e50c_row10_col10" class="data row10 col10" >31.821377</td>
<td id="T_2e50c_row10_col11" class="data row10 col11" >30.270412</td>
<td id="T_2e50c_row10_col12" class="data row10 col12" >0.950137</td>
<td id="T_2e50c_row10_col13" class="data row10 col13" >32.000000</td>
<td id="T_2e50c_row10_col14" class="data row10 col14" >52.307262</td>
<td id="T_2e50c_row10_col15" class="data row10 col15" >0.836825</td>
<td id="T_2e50c_row10_col16" class="data row10 col16" >112.167625</td>
<td id="T_2e50c_row10_col17" class="data row10 col17" >6144.000000</td>
<td id="T_2e50c_row10_col18" class="data row10 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_10/metrics.json</td>
</tr>
<tr>
<th id="T_2e50c_level0_row11" class="row_heading level0 row11" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_11/metrics.json</th>
<td id="T_2e50c_row11_col0" class="data row11 col0" >5</td>
<td id="T_2e50c_row11_col1" class="data row11 col1" >128</td>
<td id="T_2e50c_row11_col2" class="data row11 col2" >11</td>
<td id="T_2e50c_row11_col3" class="data row11 col3" >0.003718</td>
<td id="T_2e50c_row11_col4" class="data row11 col4" >0.106875</td>
<td id="T_2e50c_row11_col5" class="data row11 col5" >3.600695</td>
<td id="T_2e50c_row11_col6" class="data row11 col6" >3.599065</td>
<td id="T_2e50c_row11_col7" class="data row11 col7" >3.730870</td>
<td id="T_2e50c_row11_col8" class="data row11 col8" >0.965215</td>
<td id="T_2e50c_row11_col9" class="data row11 col9" >0.987627</td>
<td id="T_2e50c_row11_col10" class="data row11 col10" >280.864441</td>
<td id="T_2e50c_row11_col11" class="data row11 col11" >280.557678</td>
<td id="T_2e50c_row11_col12" class="data row11 col12" >0.998717</td>
<td id="T_2e50c_row11_col13" class="data row11 col13" >31.750000</td>
<td id="T_2e50c_row11_col14" class="data row11 col14" >20.949717</td>
<td id="T_2e50c_row11_col15" class="data row11 col15" >0.969702</td>
<td id="T_2e50c_row11_col16" class="data row11 col16" >169.453140</td>
<td id="T_2e50c_row11_col17" class="data row11 col17" >6144.000000</td>
<td id="T_2e50c_row11_col18" class="data row11 col18" >OAI_GPT2Small_v5_128k_resid_delta_attn/v5_128k_layer_11/metrics.json</td>
</tr>
</tbody>
</table>