Spaces:
Running
Running
enhance data processing and visualization: add support for common languages and improve handling of models with NaN values
Browse files- app.py +107 -32
- config.py +9 -0
- parsing.py +30 -10
- twists_banner.png +0 -0
app.py
CHANGED
@@ -12,7 +12,7 @@ from config import (
|
|
12 |
CITATION_BUTTON_TEXT,
|
13 |
CITATION_BUTTON_LABEL,
|
14 |
)
|
15 |
-
from parsing import read_all_configs
|
16 |
|
17 |
# Set up logging
|
18 |
logging.basicConfig(
|
@@ -50,43 +50,66 @@ def format_dataframe(df, times_100=False):
|
|
50 |
return df
|
51 |
|
52 |
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
fm = SETUPS[0]
|
55 |
setup = fm["majority_group"] + "_" + fm["minority_group"]
|
56 |
results = read_all_configs(setup)
|
57 |
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
results.pivot_table(
|
60 |
index="Model", values="Gap", aggfunc=lambda x: 100 * x.abs().sum()
|
61 |
)
|
62 |
.reset_index()
|
63 |
.sort_values("Gap")
|
64 |
)
|
65 |
-
best_model =
|
66 |
-
|
67 |
-
#
|
68 |
-
# print(results.head())
|
69 |
|
70 |
-
|
71 |
-
gr.DataFrame(format_dataframe(model_results))
|
72 |
-
|
73 |
-
gr.Markdown("#### F-M gaps by language")
|
74 |
-
|
75 |
-
lang_results = results.pivot_table(
|
76 |
index="Model",
|
77 |
values="Gap",
|
78 |
columns="Language",
|
79 |
).reset_index()
|
80 |
-
gr.DataFrame(format_dataframe(lang_results, times_100=True))
|
81 |
|
82 |
# gr.Plot(fig1)
|
83 |
results["Gap"] = results["Gap"] * 100
|
84 |
-
|
85 |
-
results,
|
86 |
x="Language",
|
87 |
y="Gap",
|
88 |
color="Model",
|
89 |
-
title="Gaps by Language and Model",
|
90 |
labels={
|
91 |
"Gap": "Sum of Absolute Gaps (%)",
|
92 |
"Language": "Language",
|
@@ -95,29 +118,81 @@ with gr.Blocks() as fm_interface:
|
|
95 |
barmode="group",
|
96 |
)
|
97 |
lang_order = (
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
)
|
103 |
-
|
104 |
|
105 |
-
|
106 |
-
|
107 |
-
gr.Plot(fig)
|
108 |
-
# gr.Plot(fig2)
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
tabs = [fm_interface]
|
111 |
titles = ["F-M Setup"]
|
112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
with gr.Blocks() as demo:
|
|
|
114 |
gr.Markdown("# Fair ASR Leadeboard")
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
""
|
120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
gr.TabbedInterface(tabs, titles)
|
122 |
|
123 |
gr.Textbox(
|
|
|
12 |
CITATION_BUTTON_TEXT,
|
13 |
CITATION_BUTTON_LABEL,
|
14 |
)
|
15 |
+
from parsing import read_all_configs, get_common_langs
|
16 |
|
17 |
# Set up logging
|
18 |
logging.basicConfig(
|
|
|
50 |
return df
|
51 |
|
52 |
|
53 |
+
def _build_models_with_nan_md(models_with_nan):
|
54 |
+
model_markups = [f"*{m}*" for m in models_with_nan]
|
55 |
+
return f"""
|
56 |
+
We are currently hiding the results of {', '.join(model_markups)} because they don't support all languages.
|
57 |
+
"""
|
58 |
+
|
59 |
+
|
60 |
+
def build_components(show_common_langs):
|
61 |
+
aggregated_df, lang_df, barplot_fig, models_with_nan = _populate_components(
|
62 |
+
show_common_langs
|
63 |
+
)
|
64 |
+
models_with_nan_md = _build_models_with_nan_md(models_with_nan)
|
65 |
+
|
66 |
+
return (
|
67 |
+
gr.DataFrame(format_dataframe(aggregated_df)),
|
68 |
+
gr.DataFrame(format_dataframe(lang_df, times_100=True)),
|
69 |
+
gr.Plot(barplot_fig),
|
70 |
+
gr.Markdown(models_with_nan_md, visible=len(models_with_nan) > 0),
|
71 |
+
)
|
72 |
+
|
73 |
+
|
74 |
+
def _populate_components(show_common_langs):
|
75 |
fm = SETUPS[0]
|
76 |
setup = fm["majority_group"] + "_" + fm["minority_group"]
|
77 |
results = read_all_configs(setup)
|
78 |
|
79 |
+
if show_common_langs:
|
80 |
+
common_langs = get_common_langs()
|
81 |
+
results = results[results["Language"].isin(common_langs)]
|
82 |
+
|
83 |
+
models_with_nan = results[results.isna().any(axis=1)]["Model"].unique().tolist()
|
84 |
+
logger.info(f"Models with NaN values: {models_with_nan}")
|
85 |
+
results = results[~results["Model"].isin(models_with_nan)]
|
86 |
+
|
87 |
+
aggregated_df = (
|
88 |
results.pivot_table(
|
89 |
index="Model", values="Gap", aggfunc=lambda x: 100 * x.abs().sum()
|
90 |
)
|
91 |
.reset_index()
|
92 |
.sort_values("Gap")
|
93 |
)
|
94 |
+
best_model = aggregated_df.iloc[0]["Model"]
|
95 |
+
top_3_models = aggregated_df["Model"].head(3).tolist()
|
96 |
+
# main_df = gr.DataFrame(format_dataframe(model_results))
|
|
|
97 |
|
98 |
+
lang_df = results.pivot_table(
|
|
|
|
|
|
|
|
|
|
|
99 |
index="Model",
|
100 |
values="Gap",
|
101 |
columns="Language",
|
102 |
).reset_index()
|
103 |
+
# lang_df = gr.DataFrame(format_dataframe(lang_results, times_100=True))
|
104 |
|
105 |
# gr.Plot(fig1)
|
106 |
results["Gap"] = results["Gap"] * 100
|
107 |
+
barplot_fig = px.bar(
|
108 |
+
results.loc[results["Model"].isin(top_3_models)],
|
109 |
x="Language",
|
110 |
y="Gap",
|
111 |
color="Model",
|
112 |
+
title="Gaps by Language and Model (top 3, sorted by the best model)",
|
113 |
labels={
|
114 |
"Gap": "Sum of Absolute Gaps (%)",
|
115 |
"Language": "Language",
|
|
|
118 |
barmode="group",
|
119 |
)
|
120 |
lang_order = (
|
121 |
+
lang_df.set_index("Model").loc[best_model].sort_values(ascending=False).index
|
122 |
+
)
|
123 |
+
logger.info(f"Lang order: {lang_order}")
|
124 |
+
|
125 |
+
barplot_fig.update_layout(
|
126 |
+
xaxis={"categoryorder": "array", "categoryarray": lang_order}
|
127 |
+
)
|
128 |
+
|
129 |
+
return aggregated_df, lang_df, barplot_fig, models_with_nan
|
130 |
+
|
131 |
+
|
132 |
+
with gr.Blocks() as fm_interface:
|
133 |
+
aggregated_df, lang_df, barplot_fig, model_with_nan = _populate_components(
|
134 |
+
show_common_langs=False
|
135 |
)
|
136 |
+
model_with_nans_md = gr.Markdown(_build_models_with_nan_md(model_with_nan))
|
137 |
|
138 |
+
gr.Markdown("### Sum of Absolute Gaps ⬇️")
|
139 |
+
aggregated_df_comp = gr.DataFrame(format_dataframe(aggregated_df))
|
|
|
|
|
140 |
|
141 |
+
gr.Markdown("#### F-M gaps by language")
|
142 |
+
lang_df_comp = gr.DataFrame(format_dataframe(lang_df, times_100=True))
|
143 |
+
|
144 |
+
barplot_fig_comp = gr.Plot(barplot_fig)
|
145 |
+
|
146 |
+
###################
|
147 |
+
# LIST MAIN TABS
|
148 |
+
###################
|
149 |
tabs = [fm_interface]
|
150 |
titles = ["F-M Setup"]
|
151 |
|
152 |
+
banner = """
|
153 |
+
<style>
|
154 |
+
.full-width-image {
|
155 |
+
width: 100%;
|
156 |
+
height: auto;
|
157 |
+
margin: 0;
|
158 |
+
padding: 0;
|
159 |
+
}
|
160 |
+
</style>
|
161 |
+
<div>
|
162 |
+
<img src="https://huggingface.co/spaces/g8a9/fair-asr-leaderboard/raw/main/twists_banner.png" alt="Twists Banner" class="full-width-image">
|
163 |
+
</div>
|
164 |
+
"""
|
165 |
+
|
166 |
+
###################
|
167 |
+
# MAIN INTERFACE
|
168 |
+
###################
|
169 |
with gr.Blocks() as demo:
|
170 |
+
gr.HTML(banner)
|
171 |
gr.Markdown("# Fair ASR Leadeboard")
|
172 |
+
|
173 |
+
with gr.Row() as config_row:
|
174 |
+
show_common_langs = gr.CheckboxGroup(
|
175 |
+
choices=["Show only common languages"],
|
176 |
+
label="Main configuration",
|
177 |
+
)
|
178 |
+
include_datasets = gr.CheckboxGroup(
|
179 |
+
choices=["Mozilla CV 17"],
|
180 |
+
label="Include datasets",
|
181 |
+
value=["Mozilla CV 17"],
|
182 |
+
interactive=False,
|
183 |
+
)
|
184 |
+
|
185 |
+
show_common_langs.input(
|
186 |
+
build_components,
|
187 |
+
inputs=[show_common_langs],
|
188 |
+
outputs=[
|
189 |
+
aggregated_df_comp,
|
190 |
+
lang_df_comp,
|
191 |
+
barplot_fig_comp,
|
192 |
+
model_with_nans_md,
|
193 |
+
],
|
194 |
+
)
|
195 |
+
|
196 |
gr.TabbedInterface(tabs, titles)
|
197 |
|
198 |
gr.Textbox(
|
config.py
CHANGED
@@ -59,10 +59,19 @@ class SeamlessInfo:
|
|
59 |
# fmt: on
|
60 |
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
model2info = {
|
63 |
"openai--whisper-large-v3": WhisperInfo,
|
64 |
"openai--whisper-large-v3-turbo": WhisperInfo,
|
65 |
"facebook--seamless-m4t-v2-large": SeamlessInfo,
|
|
|
66 |
}
|
67 |
|
68 |
|
|
|
59 |
# fmt: on
|
60 |
|
61 |
|
62 |
+
class CanaryInfo:
|
63 |
+
# fmt: off
|
64 |
+
langs = [
|
65 |
+
"en", "es", "de", "fr",
|
66 |
+
]
|
67 |
+
# fmt: on
|
68 |
+
|
69 |
+
|
70 |
model2info = {
|
71 |
"openai--whisper-large-v3": WhisperInfo,
|
72 |
"openai--whisper-large-v3-turbo": WhisperInfo,
|
73 |
"facebook--seamless-m4t-v2-large": SeamlessInfo,
|
74 |
+
"nvidia--canary-1b": CanaryInfo,
|
75 |
}
|
76 |
|
77 |
|
parsing.py
CHANGED
@@ -3,6 +3,9 @@ from typing import List
|
|
3 |
from os.path import join as opj
|
4 |
import json
|
5 |
from config import dataset2info, model2info, LOCAL_RESULTS_DIR
|
|
|
|
|
|
|
6 |
|
7 |
|
8 |
def load_language_results(
|
@@ -10,16 +13,24 @@ def load_language_results(
|
|
10 |
):
|
11 |
lang_gaps = dict()
|
12 |
for lang in lang_ids:
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
)
|
20 |
-
|
21 |
-
|
22 |
-
lang_gaps[lang] = data[f"{data['eval_metric']}_diff_mean"]
|
23 |
return lang_gaps
|
24 |
|
25 |
|
@@ -50,7 +61,16 @@ def read_all_configs(setup: str):
|
|
50 |
)
|
51 |
|
52 |
results_df = pd.DataFrame(rows)
|
53 |
-
results_df = results_df.drop(columns=["Dataset"])
|
54 |
# results_df = results_df.sort_values(by="Mean Gap", ascending=True)
|
55 |
|
56 |
return results_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from os.path import join as opj
|
4 |
import json
|
5 |
from config import dataset2info, model2info, LOCAL_RESULTS_DIR
|
6 |
+
import logging
|
7 |
+
|
8 |
+
logger = logging.getLogger(__name__)
|
9 |
|
10 |
|
11 |
def load_language_results(
|
|
|
13 |
):
|
14 |
lang_gaps = dict()
|
15 |
for lang in lang_ids:
|
16 |
+
|
17 |
+
try:
|
18 |
+
with open(
|
19 |
+
opj(
|
20 |
+
LOCAL_RESULTS_DIR,
|
21 |
+
"evaluation",
|
22 |
+
dataset_id,
|
23 |
+
f"results_{model_id}_{dataset_id}_devtest_{lang}_gender_{setup}.json",
|
24 |
+
)
|
25 |
+
) as fp:
|
26 |
+
data = json.load(fp)
|
27 |
+
lang_gaps[lang] = data[f"{data['eval_metric']}_diff_mean"]
|
28 |
+
except FileNotFoundError:
|
29 |
+
logger.debug(
|
30 |
+
f"We could not find the result file for <model,dataset,lang>: {model_id}, {dataset_id}, {lang}"
|
31 |
)
|
32 |
+
lang_gaps[lang] = None
|
33 |
+
|
|
|
34 |
return lang_gaps
|
35 |
|
36 |
|
|
|
61 |
)
|
62 |
|
63 |
results_df = pd.DataFrame(rows)
|
64 |
+
# results_df = results_df.drop(columns=["Dataset"])
|
65 |
# results_df = results_df.sort_values(by="Mean Gap", ascending=True)
|
66 |
|
67 |
return results_df
|
68 |
+
|
69 |
+
|
70 |
+
def get_common_langs():
|
71 |
+
"""Return a list of langs that are support by all models"""
|
72 |
+
common_langs = set(model2info[list(model2info.keys())[0]].langs)
|
73 |
+
for model_id in model2info.keys():
|
74 |
+
common_langs = common_langs.intersection(model2info[model_id].langs)
|
75 |
+
|
76 |
+
return list(common_langs)
|
twists_banner.png
ADDED