SUPPORTED_METRICS = [ "avg_mcauroc", # for classification tasks "exact_match", # for QA tasks "acc", # for multichoice tasks "rouge_raw_r2_mid_f_without_bootstrap", # for summarization tasks "rouge_raw_r2_mid_f", # for summarization tasks, older metric version for back compatibility "word_perplexity", # for language modeling tasks ] EXTRA_INFO_RELEASE_KEYS = [ 'filtered_resps', 'doc_id', ]