|
"""Show random samples. Simple method, but it often turns up interesting things.""" |
|
import pandas as pd |
|
import streamlit as st |
|
|
|
from src.subpages.page import Context, Page |
|
from src.utils import htmlify_labeled_example |
|
|
|
|
|
class RandomSamplesPage(Page): |
|
name = "Random Samples" |
|
icon = "shuffle" |
|
|
|
def _get_widget_defaults(self): |
|
return { |
|
"random_sample_size_min": 128, |
|
} |
|
|
|
def render(self, context: Context): |
|
st.title("🎲 Random Samples") |
|
with st.expander("💡", expanded=True): |
|
st.write( |
|
"Show random samples. Simple method, but it often turns up interesting things." |
|
) |
|
|
|
random_sample_size = st.number_input( |
|
"Random sample size:", |
|
value=min(st.session_state.random_sample_size_min, context.split_sample_size), |
|
step=16, |
|
key="random_sample_size", |
|
) |
|
|
|
if st.button("🎲 Resample"): |
|
st.experimental_rerun() |
|
|
|
random_indices = context.df.sample(int(random_sample_size)).index |
|
samples = context.df_tokens_merged.loc[random_indices] |
|
|
|
for i, idx in enumerate(random_indices): |
|
sample = samples.loc[idx] |
|
|
|
if isinstance(sample, pd.Series): |
|
continue |
|
|
|
col1, _, col2 = st.columns([0.08, 0.025, 0.8]) |
|
|
|
counter = f"<span title='#sample | index' style='display: block; background-color: black; opacity: 1; color: wh^; padding: 0 5px'>[{i+1} | {idx}]</span>" |
|
loss = f"<span title='total loss' style='display: block; background-color: yellow; color: gray; padding: 0 5px;'>𝐿 {sample.losses.sum():.3f}</span>" |
|
col1.write(f"{counter}{loss}", unsafe_allow_html=True) |
|
col1.write("") |
|
col2.write(htmlify_labeled_example(sample), unsafe_allow_html=True) |
|
|