Spaces:

g8a9
/

fair-asr-leaderboard

Running

App Files Files Community

g8a9 commited on Dec 23, 2024

Commit

86e679c

1 Parent(s): 5b47e88

enhance data processing and visualization: add support for common languages and improve handling of models with NaN values

Browse files

Files changed (4) hide show

app.py +107 -32
config.py +9 -0
parsing.py +30 -10
twists_banner.png +0 -0

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ from config import (
     CITATION_BUTTON_TEXT,
     CITATION_BUTTON_LABEL,
 )
-from parsing import read_all_configs
 # Set up logging
 logging.basicConfig(
@@ -50,43 +50,66 @@ def format_dataframe(df, times_100=False):
     return df
-with gr.Blocks() as fm_interface:
     fm = SETUPS[0]
     setup = fm["majority_group"] + "_" + fm["minority_group"]
     results = read_all_configs(setup)
-    model_results = (
         results.pivot_table(
             index="Model", values="Gap", aggfunc=lambda x: 100 * x.abs().sum()
         )
         .reset_index()
         .sort_values("Gap")
     )
-    best_model = model_results.iloc[0]["Model"]
-    print("Best model:", best_model)
-    # model_results = format_dataframe(model_results)
-    # print(results.head())
-    gr.Markdown("### Sum of Absolute Gaps ⬇️")
-    gr.DataFrame(format_dataframe(model_results))
-    gr.Markdown("#### F-M gaps by language")
-    lang_results = results.pivot_table(
         index="Model",
         values="Gap",
         columns="Language",
     ).reset_index()
-    gr.DataFrame(format_dataframe(lang_results, times_100=True))
     # gr.Plot(fig1)
     results["Gap"] = results["Gap"] * 100
-    fig = px.bar(
-        results,
         x="Language",
         y="Gap",
         color="Model",
-        title="Gaps by Language and Model",
         labels={
             "Gap": "Sum of Absolute Gaps (%)",
             "Language": "Language",
@@ -95,29 +118,81 @@ with gr.Blocks() as fm_interface:
         barmode="group",
     )
     lang_order = (
-        lang_results.set_index("Model")
-        .loc[best_model]
-        .sort_values(ascending=False)
-        .index
     )
-    print(lang_order)
-    # [best_model].sort_values().index
-    fig.update_layout(xaxis={"categoryorder": "array", "categoryarray": lang_order})
-    gr.Plot(fig)
-    # gr.Plot(fig2)
 tabs = [fm_interface]
 titles = ["F-M Setup"]
 with gr.Blocks() as demo:
     gr.Markdown("# Fair ASR Leadeboard")
-    gr.Markdown(
-        """
-Datasets currently included:
-- **Mozilla Common Voice v17**
-"""
-    )
     gr.TabbedInterface(tabs, titles)
     gr.Textbox(

     CITATION_BUTTON_TEXT,
     CITATION_BUTTON_LABEL,
 )
+from parsing import read_all_configs, get_common_langs
 # Set up logging
 logging.basicConfig(
     return df
+def _build_models_with_nan_md(models_with_nan):
+    model_markups = [f"*{m}*" for m in models_with_nan]
+    return f"""
+We are currently hiding the results of {', '.join(model_markups)} because they don't support all languages.
+"""
+def build_components(show_common_langs):
+    aggregated_df, lang_df, barplot_fig, models_with_nan = _populate_components(
+        show_common_langs
+    )
+    models_with_nan_md = _build_models_with_nan_md(models_with_nan)
+    return (
+        gr.DataFrame(format_dataframe(aggregated_df)),
+        gr.DataFrame(format_dataframe(lang_df, times_100=True)),
+        gr.Plot(barplot_fig),
+        gr.Markdown(models_with_nan_md, visible=len(models_with_nan) > 0),
+    )
+def _populate_components(show_common_langs):
     fm = SETUPS[0]
     setup = fm["majority_group"] + "_" + fm["minority_group"]
     results = read_all_configs(setup)
+    if show_common_langs:
+        common_langs = get_common_langs()
+        results = results[results["Language"].isin(common_langs)]
+    models_with_nan = results[results.isna().any(axis=1)]["Model"].unique().tolist()
+    logger.info(f"Models with NaN values: {models_with_nan}")
+    results = results[~results["Model"].isin(models_with_nan)]
+    aggregated_df = (
         results.pivot_table(
             index="Model", values="Gap", aggfunc=lambda x: 100 * x.abs().sum()
         )
         .reset_index()
         .sort_values("Gap")
     )
+    best_model = aggregated_df.iloc[0]["Model"]
+    top_3_models = aggregated_df["Model"].head(3).tolist()
+    # main_df = gr.DataFrame(format_dataframe(model_results))
+    lang_df = results.pivot_table(
         index="Model",
         values="Gap",
         columns="Language",
     ).reset_index()
+    # lang_df = gr.DataFrame(format_dataframe(lang_results, times_100=True))
     # gr.Plot(fig1)
     results["Gap"] = results["Gap"] * 100
+    barplot_fig = px.bar(
+        results.loc[results["Model"].isin(top_3_models)],
         x="Language",
         y="Gap",
         color="Model",
+        title="Gaps by Language and Model (top 3, sorted by the best model)",
         labels={
             "Gap": "Sum of Absolute Gaps (%)",
             "Language": "Language",
         barmode="group",
     )
     lang_order = (
+        lang_df.set_index("Model").loc[best_model].sort_values(ascending=False).index
+    )
+    logger.info(f"Lang order: {lang_order}")
+    barplot_fig.update_layout(
+        xaxis={"categoryorder": "array", "categoryarray": lang_order}
+    )
+    return aggregated_df, lang_df, barplot_fig, models_with_nan
+with gr.Blocks() as fm_interface:
+    aggregated_df, lang_df, barplot_fig, model_with_nan = _populate_components(
+        show_common_langs=False
     )
+    model_with_nans_md = gr.Markdown(_build_models_with_nan_md(model_with_nan))
+    gr.Markdown("### Sum of Absolute Gaps ⬇️")
+    aggregated_df_comp = gr.DataFrame(format_dataframe(aggregated_df))
+    gr.Markdown("#### F-M gaps by language")
+    lang_df_comp = gr.DataFrame(format_dataframe(lang_df, times_100=True))
+    barplot_fig_comp = gr.Plot(barplot_fig)
+###################
+# LIST MAIN TABS
+###################
 tabs = [fm_interface]
 titles = ["F-M Setup"]
+banner = """
+<style>
+    .full-width-image {
+        width: 100%;
+        height: auto;
+        margin: 0;
+        padding: 0;
+    }
+</style>
+<div>
+    <img src="https://huggingface.co/spaces/g8a9/fair-asr-leaderboard/raw/main/twists_banner.png" alt="Twists Banner" class="full-width-image">
+</div>
+"""
+###################
+# MAIN INTERFACE
+###################
 with gr.Blocks() as demo:
+    gr.HTML(banner)
     gr.Markdown("# Fair ASR Leadeboard")
+    with gr.Row() as config_row:
+        show_common_langs = gr.CheckboxGroup(
+            choices=["Show only common languages"],
+            label="Main configuration",
+        )
+        include_datasets = gr.CheckboxGroup(
+            choices=["Mozilla CV 17"],
+            label="Include datasets",
+            value=["Mozilla CV 17"],
+            interactive=False,
+        )
+        show_common_langs.input(
+            build_components,
+            inputs=[show_common_langs],
+            outputs=[
+                aggregated_df_comp,
+                lang_df_comp,
+                barplot_fig_comp,
+                model_with_nans_md,
+            ],
+        )
     gr.TabbedInterface(tabs, titles)
     gr.Textbox(

config.py CHANGED Viewed

@@ -59,10 +59,19 @@ class SeamlessInfo:
     # fmt: on
 model2info = {
     "openai--whisper-large-v3": WhisperInfo,
     "openai--whisper-large-v3-turbo": WhisperInfo,
     "facebook--seamless-m4t-v2-large": SeamlessInfo,
 }

     # fmt: on
+class CanaryInfo:
+    # fmt: off
+    langs = [
+        "en", "es", "de", "fr",
+    ]
+    # fmt: on
 model2info = {
     "openai--whisper-large-v3": WhisperInfo,
     "openai--whisper-large-v3-turbo": WhisperInfo,
     "facebook--seamless-m4t-v2-large": SeamlessInfo,
+    "nvidia--canary-1b": CanaryInfo,
 }

parsing.py CHANGED Viewed

@@ -3,6 +3,9 @@ from typing import List
 from os.path import join as opj
 import json
 from config import dataset2info, model2info, LOCAL_RESULTS_DIR
 def load_language_results(
@@ -10,16 +13,24 @@ def load_language_results(
 ):
     lang_gaps = dict()
     for lang in lang_ids:
-        with open(
-            opj(
-                LOCAL_RESULTS_DIR,
-                "evaluation",
-                dataset_id,
-                f"results_{model_id}_{dataset_id}_devtest_{lang}_gender_{setup}.json",
             )
-        ) as fp:
-            data = json.load(fp)
-            lang_gaps[lang] = data[f"{data['eval_metric']}_diff_mean"]
     return lang_gaps
@@ -50,7 +61,16 @@ def read_all_configs(setup: str):
             )
     results_df = pd.DataFrame(rows)
-    results_df = results_df.drop(columns=["Dataset"])
     # results_df = results_df.sort_values(by="Mean Gap", ascending=True)
     return results_df

 from os.path import join as opj
 import json
 from config import dataset2info, model2info, LOCAL_RESULTS_DIR
+import logging
+logger = logging.getLogger(__name__)
 def load_language_results(
 ):
     lang_gaps = dict()
     for lang in lang_ids:
+        try:
+            with open(
+                opj(
+                    LOCAL_RESULTS_DIR,
+                    "evaluation",
+                    dataset_id,
+                    f"results_{model_id}_{dataset_id}_devtest_{lang}_gender_{setup}.json",
+                )
+            ) as fp:
+                data = json.load(fp)
+                lang_gaps[lang] = data[f"{data['eval_metric']}_diff_mean"]
+        except FileNotFoundError:
+            logger.debug(
+                f"We could not find the result file for <model,dataset,lang>: {model_id}, {dataset_id}, {lang}"
             )
+            lang_gaps[lang] = None
     return lang_gaps
             )
     results_df = pd.DataFrame(rows)
+    # results_df = results_df.drop(columns=["Dataset"])
     # results_df = results_df.sort_values(by="Mean Gap", ascending=True)
     return results_df
+def get_common_langs():
+    """Return a list of langs that are support by all models"""
+    common_langs = set(model2info[list(model2info.keys())[0]].langs)
+    for model_id in model2info.keys():
+        common_langs = common_langs.intersection(model2info[model_id].langs)
+    return list(common_langs)

twists_banner.png ADDED Viewed