g8a9 commited on
Commit
86e679c
·
1 Parent(s): 5b47e88

enhance data processing and visualization: add support for common languages and improve handling of models with NaN values

Browse files
Files changed (4) hide show
  1. app.py +107 -32
  2. config.py +9 -0
  3. parsing.py +30 -10
  4. twists_banner.png +0 -0
app.py CHANGED
@@ -12,7 +12,7 @@ from config import (
12
  CITATION_BUTTON_TEXT,
13
  CITATION_BUTTON_LABEL,
14
  )
15
- from parsing import read_all_configs
16
 
17
  # Set up logging
18
  logging.basicConfig(
@@ -50,43 +50,66 @@ def format_dataframe(df, times_100=False):
50
  return df
51
 
52
 
53
- with gr.Blocks() as fm_interface:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  fm = SETUPS[0]
55
  setup = fm["majority_group"] + "_" + fm["minority_group"]
56
  results = read_all_configs(setup)
57
 
58
- model_results = (
 
 
 
 
 
 
 
 
59
  results.pivot_table(
60
  index="Model", values="Gap", aggfunc=lambda x: 100 * x.abs().sum()
61
  )
62
  .reset_index()
63
  .sort_values("Gap")
64
  )
65
- best_model = model_results.iloc[0]["Model"]
66
- print("Best model:", best_model)
67
- # model_results = format_dataframe(model_results)
68
- # print(results.head())
69
 
70
- gr.Markdown("### Sum of Absolute Gaps ⬇️")
71
- gr.DataFrame(format_dataframe(model_results))
72
-
73
- gr.Markdown("#### F-M gaps by language")
74
-
75
- lang_results = results.pivot_table(
76
  index="Model",
77
  values="Gap",
78
  columns="Language",
79
  ).reset_index()
80
- gr.DataFrame(format_dataframe(lang_results, times_100=True))
81
 
82
  # gr.Plot(fig1)
83
  results["Gap"] = results["Gap"] * 100
84
- fig = px.bar(
85
- results,
86
  x="Language",
87
  y="Gap",
88
  color="Model",
89
- title="Gaps by Language and Model",
90
  labels={
91
  "Gap": "Sum of Absolute Gaps (%)",
92
  "Language": "Language",
@@ -95,29 +118,81 @@ with gr.Blocks() as fm_interface:
95
  barmode="group",
96
  )
97
  lang_order = (
98
- lang_results.set_index("Model")
99
- .loc[best_model]
100
- .sort_values(ascending=False)
101
- .index
 
 
 
 
 
 
 
 
 
 
102
  )
103
- print(lang_order)
104
 
105
- # [best_model].sort_values().index
106
- fig.update_layout(xaxis={"categoryorder": "array", "categoryarray": lang_order})
107
- gr.Plot(fig)
108
- # gr.Plot(fig2)
109
 
 
 
 
 
 
 
 
 
110
  tabs = [fm_interface]
111
  titles = ["F-M Setup"]
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  with gr.Blocks() as demo:
 
114
  gr.Markdown("# Fair ASR Leadeboard")
115
- gr.Markdown(
116
- """
117
- Datasets currently included:
118
- - **Mozilla Common Voice v17**
119
- """
120
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  gr.TabbedInterface(tabs, titles)
122
 
123
  gr.Textbox(
 
12
  CITATION_BUTTON_TEXT,
13
  CITATION_BUTTON_LABEL,
14
  )
15
+ from parsing import read_all_configs, get_common_langs
16
 
17
  # Set up logging
18
  logging.basicConfig(
 
50
  return df
51
 
52
 
53
+ def _build_models_with_nan_md(models_with_nan):
54
+ model_markups = [f"*{m}*" for m in models_with_nan]
55
+ return f"""
56
+ We are currently hiding the results of {', '.join(model_markups)} because they don't support all languages.
57
+ """
58
+
59
+
60
+ def build_components(show_common_langs):
61
+ aggregated_df, lang_df, barplot_fig, models_with_nan = _populate_components(
62
+ show_common_langs
63
+ )
64
+ models_with_nan_md = _build_models_with_nan_md(models_with_nan)
65
+
66
+ return (
67
+ gr.DataFrame(format_dataframe(aggregated_df)),
68
+ gr.DataFrame(format_dataframe(lang_df, times_100=True)),
69
+ gr.Plot(barplot_fig),
70
+ gr.Markdown(models_with_nan_md, visible=len(models_with_nan) > 0),
71
+ )
72
+
73
+
74
+ def _populate_components(show_common_langs):
75
  fm = SETUPS[0]
76
  setup = fm["majority_group"] + "_" + fm["minority_group"]
77
  results = read_all_configs(setup)
78
 
79
+ if show_common_langs:
80
+ common_langs = get_common_langs()
81
+ results = results[results["Language"].isin(common_langs)]
82
+
83
+ models_with_nan = results[results.isna().any(axis=1)]["Model"].unique().tolist()
84
+ logger.info(f"Models with NaN values: {models_with_nan}")
85
+ results = results[~results["Model"].isin(models_with_nan)]
86
+
87
+ aggregated_df = (
88
  results.pivot_table(
89
  index="Model", values="Gap", aggfunc=lambda x: 100 * x.abs().sum()
90
  )
91
  .reset_index()
92
  .sort_values("Gap")
93
  )
94
+ best_model = aggregated_df.iloc[0]["Model"]
95
+ top_3_models = aggregated_df["Model"].head(3).tolist()
96
+ # main_df = gr.DataFrame(format_dataframe(model_results))
 
97
 
98
+ lang_df = results.pivot_table(
 
 
 
 
 
99
  index="Model",
100
  values="Gap",
101
  columns="Language",
102
  ).reset_index()
103
+ # lang_df = gr.DataFrame(format_dataframe(lang_results, times_100=True))
104
 
105
  # gr.Plot(fig1)
106
  results["Gap"] = results["Gap"] * 100
107
+ barplot_fig = px.bar(
108
+ results.loc[results["Model"].isin(top_3_models)],
109
  x="Language",
110
  y="Gap",
111
  color="Model",
112
+ title="Gaps by Language and Model (top 3, sorted by the best model)",
113
  labels={
114
  "Gap": "Sum of Absolute Gaps (%)",
115
  "Language": "Language",
 
118
  barmode="group",
119
  )
120
  lang_order = (
121
+ lang_df.set_index("Model").loc[best_model].sort_values(ascending=False).index
122
+ )
123
+ logger.info(f"Lang order: {lang_order}")
124
+
125
+ barplot_fig.update_layout(
126
+ xaxis={"categoryorder": "array", "categoryarray": lang_order}
127
+ )
128
+
129
+ return aggregated_df, lang_df, barplot_fig, models_with_nan
130
+
131
+
132
+ with gr.Blocks() as fm_interface:
133
+ aggregated_df, lang_df, barplot_fig, model_with_nan = _populate_components(
134
+ show_common_langs=False
135
  )
136
+ model_with_nans_md = gr.Markdown(_build_models_with_nan_md(model_with_nan))
137
 
138
+ gr.Markdown("### Sum of Absolute Gaps ⬇️")
139
+ aggregated_df_comp = gr.DataFrame(format_dataframe(aggregated_df))
 
 
140
 
141
+ gr.Markdown("#### F-M gaps by language")
142
+ lang_df_comp = gr.DataFrame(format_dataframe(lang_df, times_100=True))
143
+
144
+ barplot_fig_comp = gr.Plot(barplot_fig)
145
+
146
+ ###################
147
+ # LIST MAIN TABS
148
+ ###################
149
  tabs = [fm_interface]
150
  titles = ["F-M Setup"]
151
 
152
+ banner = """
153
+ <style>
154
+ .full-width-image {
155
+ width: 100%;
156
+ height: auto;
157
+ margin: 0;
158
+ padding: 0;
159
+ }
160
+ </style>
161
+ <div>
162
+ <img src="https://huggingface.co/spaces/g8a9/fair-asr-leaderboard/raw/main/twists_banner.png" alt="Twists Banner" class="full-width-image">
163
+ </div>
164
+ """
165
+
166
+ ###################
167
+ # MAIN INTERFACE
168
+ ###################
169
  with gr.Blocks() as demo:
170
+ gr.HTML(banner)
171
  gr.Markdown("# Fair ASR Leadeboard")
172
+
173
+ with gr.Row() as config_row:
174
+ show_common_langs = gr.CheckboxGroup(
175
+ choices=["Show only common languages"],
176
+ label="Main configuration",
177
+ )
178
+ include_datasets = gr.CheckboxGroup(
179
+ choices=["Mozilla CV 17"],
180
+ label="Include datasets",
181
+ value=["Mozilla CV 17"],
182
+ interactive=False,
183
+ )
184
+
185
+ show_common_langs.input(
186
+ build_components,
187
+ inputs=[show_common_langs],
188
+ outputs=[
189
+ aggregated_df_comp,
190
+ lang_df_comp,
191
+ barplot_fig_comp,
192
+ model_with_nans_md,
193
+ ],
194
+ )
195
+
196
  gr.TabbedInterface(tabs, titles)
197
 
198
  gr.Textbox(
config.py CHANGED
@@ -59,10 +59,19 @@ class SeamlessInfo:
59
  # fmt: on
60
 
61
 
 
 
 
 
 
 
 
 
62
  model2info = {
63
  "openai--whisper-large-v3": WhisperInfo,
64
  "openai--whisper-large-v3-turbo": WhisperInfo,
65
  "facebook--seamless-m4t-v2-large": SeamlessInfo,
 
66
  }
67
 
68
 
 
59
  # fmt: on
60
 
61
 
62
+ class CanaryInfo:
63
+ # fmt: off
64
+ langs = [
65
+ "en", "es", "de", "fr",
66
+ ]
67
+ # fmt: on
68
+
69
+
70
  model2info = {
71
  "openai--whisper-large-v3": WhisperInfo,
72
  "openai--whisper-large-v3-turbo": WhisperInfo,
73
  "facebook--seamless-m4t-v2-large": SeamlessInfo,
74
+ "nvidia--canary-1b": CanaryInfo,
75
  }
76
 
77
 
parsing.py CHANGED
@@ -3,6 +3,9 @@ from typing import List
3
  from os.path import join as opj
4
  import json
5
  from config import dataset2info, model2info, LOCAL_RESULTS_DIR
 
 
 
6
 
7
 
8
  def load_language_results(
@@ -10,16 +13,24 @@ def load_language_results(
10
  ):
11
  lang_gaps = dict()
12
  for lang in lang_ids:
13
- with open(
14
- opj(
15
- LOCAL_RESULTS_DIR,
16
- "evaluation",
17
- dataset_id,
18
- f"results_{model_id}_{dataset_id}_devtest_{lang}_gender_{setup}.json",
 
 
 
 
 
 
 
 
 
19
  )
20
- ) as fp:
21
- data = json.load(fp)
22
- lang_gaps[lang] = data[f"{data['eval_metric']}_diff_mean"]
23
  return lang_gaps
24
 
25
 
@@ -50,7 +61,16 @@ def read_all_configs(setup: str):
50
  )
51
 
52
  results_df = pd.DataFrame(rows)
53
- results_df = results_df.drop(columns=["Dataset"])
54
  # results_df = results_df.sort_values(by="Mean Gap", ascending=True)
55
 
56
  return results_df
 
 
 
 
 
 
 
 
 
 
3
  from os.path import join as opj
4
  import json
5
  from config import dataset2info, model2info, LOCAL_RESULTS_DIR
6
+ import logging
7
+
8
+ logger = logging.getLogger(__name__)
9
 
10
 
11
  def load_language_results(
 
13
  ):
14
  lang_gaps = dict()
15
  for lang in lang_ids:
16
+
17
+ try:
18
+ with open(
19
+ opj(
20
+ LOCAL_RESULTS_DIR,
21
+ "evaluation",
22
+ dataset_id,
23
+ f"results_{model_id}_{dataset_id}_devtest_{lang}_gender_{setup}.json",
24
+ )
25
+ ) as fp:
26
+ data = json.load(fp)
27
+ lang_gaps[lang] = data[f"{data['eval_metric']}_diff_mean"]
28
+ except FileNotFoundError:
29
+ logger.debug(
30
+ f"We could not find the result file for <model,dataset,lang>: {model_id}, {dataset_id}, {lang}"
31
  )
32
+ lang_gaps[lang] = None
33
+
 
34
  return lang_gaps
35
 
36
 
 
61
  )
62
 
63
  results_df = pd.DataFrame(rows)
64
+ # results_df = results_df.drop(columns=["Dataset"])
65
  # results_df = results_df.sort_values(by="Mean Gap", ascending=True)
66
 
67
  return results_df
68
+
69
+
70
+ def get_common_langs():
71
+ """Return a list of langs that are support by all models"""
72
+ common_langs = set(model2info[list(model2info.keys())[0]].langs)
73
+ for model_id in model2info.keys():
74
+ common_langs = common_langs.intersection(model2info[model_id].langs)
75
+
76
+ return list(common_langs)
twists_banner.png ADDED