fair-asr-leaderboard / parsing.py
g8a9's picture
refactor: streamline dataset and model handling with helper classes
fc63ec6
raw
history blame
2.17 kB
import pandas as pd
from typing import List
from os.path import join as opj
import json
import logging
from config import DatasetHelper, ModelHelper, LOCAL_RESULTS_DIR
logger = logging.getLogger(__name__)
def load_language_results(
model_id: str, dataset_id: str, lang_ids: List[str], contrast_string: str
):
lang_gaps = dict()
for lang in lang_ids:
try:
with open(
opj(
LOCAL_RESULTS_DIR,
"evaluation",
dataset_id,
f"results_{model_id}_{dataset_id}_devtest_{lang}_gender_{contrast_string}.json",
)
) as fp:
data = json.load(fp)
lang_gaps[lang] = data[f"{data['eval_metric']}_diff_mean"]
except FileNotFoundError:
logger.debug(
f"We could not find the result file for <model,dataset,lang>: {model_id}, {dataset_id}, {lang}"
)
lang_gaps[lang] = None
return lang_gaps
def read_all_configs(contrast_type: str):
dataset_h = DatasetHelper()
model_h = ModelHelper()
rows = list()
for dataset_config in dataset_h.dataset_configs:
for model_id in model_h.sanitized_model_ids:
contrast_info = dataset_config.group_contrasts[contrast_type]
contrast_string = (
f"{contrast_info['majority_group']}_{contrast_info['minority_group']}"
)
lang_gaps = load_language_results(
model_id,
dataset_config.sanitized_id(),
dataset_config.langs,
contrast_string,
)
rows.extend(
[
{
"Model": model_id,
"Dataset": dataset_config.sanitized_id(),
"Language": lang,
"Type": dataset_config.speaking_condition.capitalize(),
"Gap": lang_gaps[lang],
}
for lang in lang_gaps
]
)
results_df = pd.DataFrame(rows)
return results_df