|
import gradio as gr |
|
import pandas as pd |
|
|
|
|
|
UGI_COLS = [ |
|
'#P', 'Model', 'UGI π', 'W/10 π', 'Unruly', 'Internet', 'CrimeStats', 'Stories/Jokes', 'PolContro' |
|
] |
|
|
|
|
|
def load_leaderboard_data(csv_file_path): |
|
try: |
|
df = pd.read_csv(csv_file_path) |
|
|
|
df['Model'] = df.apply(lambda row: f'<a href="{row["Link"]}" target="_blank" style="color: blue; text-decoration: none;">{row["Model"]}</a>' if pd.notna(row["Link"]) else row["Model"], axis=1) |
|
|
|
df.drop(columns=['Link'], inplace=True) |
|
return df |
|
except Exception as e: |
|
print(f"Error loading CSV file: {e}") |
|
return pd.DataFrame(columns=UGI_COLS) |
|
|
|
|
|
def update_table(df: pd.DataFrame, query: str, param_ranges: list) -> pd.DataFrame: |
|
filtered_df = df |
|
if any(param_ranges): |
|
conditions = [] |
|
for param_range in param_ranges: |
|
if param_range == '~1.5': |
|
conditions.append((filtered_df['Params'] < 2.5)) |
|
elif param_range == '~3': |
|
conditions.append(((filtered_df['Params'] >= 2.5) & (filtered_df['Params'] < 6))) |
|
elif param_range == '~7': |
|
conditions.append(((filtered_df['Params'] >= 6) & (filtered_df['Params'] < 9.5))) |
|
elif param_range == '~13': |
|
conditions.append(((filtered_df['Params'] >= 9.5) & (filtered_df['Params'] < 16))) |
|
elif param_range == '~20': |
|
conditions.append(((filtered_df['Params'] >= 16) & (filtered_df['Params'] < 28))) |
|
elif param_range == '~34': |
|
conditions.append(((filtered_df['Params'] >= 28) & (filtered_df['Params'] < 40))) |
|
elif param_range == '~50': |
|
conditions.append(((filtered_df['Params'] >= 40) & (filtered_df['Params'] < 60))) |
|
elif param_range == '~70+': |
|
conditions.append((filtered_df['Params'] >= 60)) |
|
|
|
if conditions: |
|
filtered_df = filtered_df[pd.concat(conditions, axis=1).any(axis=1)] |
|
|
|
if query: |
|
filtered_df = filtered_df[filtered_df['Model'].str.contains(query, case=False)] |
|
|
|
return filtered_df[UGI_COLS] |
|
|
|
|
|
GraInter = gr.Blocks() |
|
|
|
with GraInter: |
|
gr.HTML(""" |
|
<div style="display: flex; flex-direction: column; align-items: center;"> |
|
<div style="align-self: flex-start;"> |
|
<a href="mailto:[email protected]" target="_blank" style="color: blue; text-decoration: none;">Contact</a> |
|
</div> |
|
<h1 style="margin: 0;">UGI Leaderboard</h1> |
|
</div> |
|
""") |
|
gr.Markdown(""" |
|
**UGI: Uncensored General Intelligence**. The average score from 5 different subjects that LLMs are commonly steered away from. The leaderboard is made of roughly 60 questions/tasks, measuring both "willingness to answer" and "accuracy" in controversial fact-based questions. |
|
|
|
**W/10:** A more narrow, 10-point score, solely measuring the LLM's Willingness to answer controversial questions. |
|
|
|
**Unruly:** Knowledge of activities that are generally frowned upon. |
|
|
|
**Internet:** Knowledge of various internet information, from professional to deviant. |
|
|
|
**CrimeStats:** Knowledge of crime statistics which are uncomfortable to talk about. |
|
|
|
**Stories/Jokes:** Ability to write offensive stories and jokes. |
|
|
|
**PolContro:** Knowledge of politically/socially controversial information. |
|
""") |
|
with gr.Column(): |
|
with gr.Row(): |
|
search_bar = gr.Textbox(placeholder=" π Search for a model...", show_label=False, elem_id="search-bar") |
|
with gr.Row(): |
|
filter_columns_size = gr.CheckboxGroup( |
|
label="Model sizes (in billions of parameters)", |
|
choices=['~1.5', '~3', '~7', '~13', '~20', '~34', '~50', '~70+'], |
|
value=[], |
|
interactive=True, |
|
elem_id="filter-columns-size", |
|
) |
|
|
|
|
|
leaderboard_df = load_leaderboard_data("ugi-leaderboard-data.csv") |
|
|
|
|
|
datatypes = ['html' if col == 'Model' else 'str' for col in UGI_COLS] |
|
|
|
leaderboard_table = gr.Dataframe( |
|
value=leaderboard_df[UGI_COLS], |
|
datatype=datatypes, |
|
interactive=False, |
|
visible=True, |
|
elem_classes="text-sm" |
|
) |
|
|
|
|
|
inputs = [ |
|
search_bar, |
|
filter_columns_size |
|
] |
|
|
|
outputs = leaderboard_table |
|
|
|
search_bar.change( |
|
fn=lambda query, param_ranges: update_table(leaderboard_df, query, param_ranges), |
|
inputs=inputs, |
|
outputs=outputs |
|
) |
|
|
|
filter_columns_size.change( |
|
fn=lambda query, param_ranges: update_table(leaderboard_df, query, param_ranges), |
|
inputs=inputs, |
|
outputs=outputs |
|
) |
|
|
|
|
|
GraInter.launch() |