Spaces:
Running
Running
import pandas as pd | |
import json | |
from datetime import datetime | |
def process_csv_to_json(): | |
# 读取CSV文件 | |
df = pd.read_csv('src/record.csv') | |
# 清理数据:删除空行,重命名列 | |
df = df.dropna(how='all') | |
df = df.rename(columns={ | |
'dataset': 'Dataset', | |
'llm': 'LLM', | |
'score\n(EM)': 'Score', | |
'pass rate': 'Pass rate', | |
'Cost($)': 'Cost($)', | |
'Eval Date': 'Eval Date', | |
'framework': 'Framework', | |
'X-shot': 'X-shot', | |
'Nums': 'Samples', | |
'All tokens': 'All tokens', | |
'Total input tokens': 'Total input tokens', | |
'Average input tokens': 'Average input tokens', | |
'Total output tokens': 'Total output tokens', | |
'Average output tokens': 'Average output tokens' | |
}) | |
# 辅助函数:处理包含逗号的数字字符串 | |
def parse_number(value): | |
if pd.isna(value): | |
return 0 | |
# 先移除逗号,然后转换为浮点数,最后转换为整数 | |
return int(float(str(value).replace(',', ''))) | |
# 初始化结果字典 | |
result = { | |
"time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), | |
"results": {} | |
} | |
# 获取所有唯一的LLM | |
llms = df['LLM'].dropna().unique() | |
# 遍历每个算法 | |
for algorithm in df['Algorithm'].dropna().unique(): | |
if not isinstance(algorithm, str): | |
continue | |
result['results'][algorithm] = {} | |
# 对每个LLM进行处理 | |
for llm in llms: | |
llm_data = df[(df['Algorithm'] == algorithm) & (df['LLM'] == llm)] | |
if llm_data.empty: | |
continue | |
# 创建LLM对应的字典 | |
result['results'][algorithm][llm] = { | |
'META': { | |
'Algorithm': str(algorithm), | |
'LLM': str(llm), | |
'Eval Date': str(llm_data['Eval Date'].iloc[0]) | |
} | |
} | |
# 对每个数据集进行处理 | |
for dataset in df['Dataset'].dropna().unique(): | |
if not isinstance(dataset, str): | |
continue | |
dataset_data = llm_data[llm_data['Dataset'] == dataset] | |
if not dataset_data.empty: | |
data_row = dataset_data.iloc[0] | |
result['results'][algorithm][llm][dataset] = { | |
'Score': round(float(data_row['Score']), 2), # 保留两位小数 | |
'Pass rate': round(float(data_row['Pass rate']) / 100, 4), # 转换为小数并保留两位小数 | |
'Cost($)': float(data_row['Cost($)']) if pd.notnull(data_row['Cost($)']) else 0.0, | |
'Framework': str(data_row['Framework']) if 'Framework' in data_row and pd.notnull(data_row['Framework']) else '', | |
'X-shot': str(data_row['X-shot']) if pd.notnull(data_row['X-shot']) else '', | |
'Samples': parse_number(data_row['Samples']), | |
'All tokens': parse_number(data_row['All tokens']), | |
'Total input tokens': parse_number(data_row['Total input tokens']), | |
'Average input tokens': parse_number(data_row['Average input tokens']), | |
'Total output tokens': parse_number(data_row['Total output tokens']), | |
'Average output tokens': parse_number(data_row['Average output tokens']) | |
} | |
# 检查每个字段是否存在 | |
required_fields = ['Score', 'Pass rate', 'Cost($)', 'Framework', 'X-shot', 'Samples', 'All tokens', 'Total input tokens', 'Average input tokens', 'Total output tokens', 'Average output tokens'] | |
for key, value in result['results'].items(): | |
for llm, datasets in value.items(): | |
# 检查 META 信息 | |
meta = datasets.get('META', {}) | |
if 'LLM' not in meta or 'Eval Date' not in meta: | |
print(f"Missing META fields in algorithm '{key}' for LLM '{llm}'") | |
for dataset, data in datasets.items(): | |
if dataset == 'META': | |
continue | |
missing_fields = [field for field in required_fields if field not in data] | |
if missing_fields: | |
print(f"Missing fields {missing_fields} in dataset '{dataset}' for LLM '{llm}' in algorithm '{key}'") | |
# 保存为JSON文件 | |
with open('src/detail_math_score.json', 'w', encoding='utf-8') as f: | |
json.dump(result, f, indent=4, ensure_ascii=False) | |
def process_csv_to_overall_json(): | |
# 读取CSV文件 | |
df = pd.read_csv('src/record.csv') | |
# 清理数据:删除空行,重命名列 | |
df = df.dropna(how='all') | |
df = df.rename(columns={ | |
'dataset': 'Dataset', | |
'llm': 'LLM', | |
'score\n(EM)': 'Score', | |
'Cost($)': 'Cost($)', | |
'Eval Date': 'Eval Date' | |
}) | |
# 初始化结果字典 | |
result = { | |
"time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), | |
"results": {} | |
} | |
# 获取所有唯一的LLM | |
llms = df['LLM'].dropna().unique() | |
for llm in llms: | |
# 处理基础算法 | |
for algorithm in df['Algorithm'].dropna().unique(): | |
if not isinstance(algorithm, str): | |
continue | |
# 为非gpt-3.5-turbo的模型添加后缀 | |
# 修改:为llama模型添加更多信息以确保唯一性 | |
algo_key = algorithm if llm == 'gpt-3.5-turbo' else f"{algorithm}-{llm}" | |
# 检查该算法-LLM组合是否存在 | |
algo_data = df[(df['Algorithm'] == algorithm) & (df['LLM'] == llm)] | |
if algo_data.empty: | |
print(f"No data found for algorithm '{algorithm}' and LLM '{llm}'") | |
continue | |
result['results'][algo_key] = { | |
"META": { | |
"Algorithm": algorithm, | |
"LLM": llm, | |
"Eval Date": str(algo_data['Eval Date'].iloc[0]) | |
} | |
} | |
# 处理每个数据集 | |
for dataset in ['gsm8k', 'AQuA']: | |
dataset_data = df[(df['Algorithm'] == algorithm) & | |
(df['Dataset'] == dataset) & | |
(df['LLM'] == llm)] | |
if not dataset_data.empty: | |
result['results'][algo_key][dataset] = { | |
"Score": float(dataset_data['Score'].iloc[0]) if pd.notnull(dataset_data['Score'].iloc[0]) else 0.0, | |
"Cost($)": float(dataset_data['Cost($)'].iloc[0]) if pd.notnull(dataset_data['Cost($)'].iloc[0]) else 0.0 | |
} | |
else: | |
# 如果数据集为空,确保键存在并设置默认值 | |
result['results'][algo_key][dataset] = { | |
"Score": 0.0, | |
"Cost($)": 0.0 | |
} | |
# 保存为JSON文件 | |
with open('src/overall_math_score.json', 'w', encoding='utf-8') as f: | |
json.dump(result, f, indent=4, ensure_ascii=False) | |
if __name__ == "__main__": | |
# 生成两种格式的JSON文件 | |
process_csv_to_json() | |
process_csv_to_overall_json() |