Spaces:
Running
Running
File size: 7,426 Bytes
c9a97c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
import pandas as pd
import json
from datetime import datetime
def process_csv_to_json():
# 读取CSV文件
df = pd.read_csv('src/record.csv')
# 清理数据:删除空行,重命名列
df = df.dropna(how='all')
df = df.rename(columns={
'dataset': 'Dataset',
'llm': 'LLM',
'score\n(EM)': 'Score',
'pass rate': 'Pass rate',
'Cost($)': 'Cost($)',
'Eval Date': 'Eval Date',
'framework': 'Framework',
'X-shot': 'X-shot',
'Nums': 'Samples',
'All tokens': 'All tokens',
'Total input tokens': 'Total input tokens',
'Average input tokens': 'Average input tokens',
'Total output tokens': 'Total output tokens',
'Average output tokens': 'Average output tokens'
})
# 辅助函数:处理包含逗号的数字字符串
def parse_number(value):
if pd.isna(value):
return 0
# 先移除逗号,然后转换为浮点数,最后转换为整数
return int(float(str(value).replace(',', '')))
# 初始化结果字典
result = {
"time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"results": {}
}
# 获取所有唯一的LLM
llms = df['LLM'].dropna().unique()
# 遍历每个算法
for algorithm in df['Algorithm'].dropna().unique():
if not isinstance(algorithm, str):
continue
result['results'][algorithm] = {}
# 对每个LLM进行处理
for llm in llms:
llm_data = df[(df['Algorithm'] == algorithm) & (df['LLM'] == llm)]
if llm_data.empty:
continue
# 创建LLM对应的字典
result['results'][algorithm][llm] = {
'META': {
'Algorithm': str(algorithm),
'LLM': str(llm),
'Eval Date': str(llm_data['Eval Date'].iloc[0])
}
}
# 对每个数据集进行处理
for dataset in df['Dataset'].dropna().unique():
if not isinstance(dataset, str):
continue
dataset_data = llm_data[llm_data['Dataset'] == dataset]
if not dataset_data.empty:
data_row = dataset_data.iloc[0]
result['results'][algorithm][llm][dataset] = {
'Score': round(float(data_row['Score']), 2), # 保留两位小数
'Pass rate': round(float(data_row['Pass rate']) / 100, 4), # 转换为小数并保留两位小数
'Cost($)': float(data_row['Cost($)']) if pd.notnull(data_row['Cost($)']) else 0.0,
'Framework': str(data_row['Framework']) if 'Framework' in data_row and pd.notnull(data_row['Framework']) else '',
'X-shot': str(data_row['X-shot']) if pd.notnull(data_row['X-shot']) else '',
'Samples': parse_number(data_row['Samples']),
'All tokens': parse_number(data_row['All tokens']),
'Total input tokens': parse_number(data_row['Total input tokens']),
'Average input tokens': parse_number(data_row['Average input tokens']),
'Total output tokens': parse_number(data_row['Total output tokens']),
'Average output tokens': parse_number(data_row['Average output tokens'])
}
# 检查每个字段是否存在
required_fields = ['Score', 'Pass rate', 'Cost($)', 'Framework', 'X-shot', 'Samples', 'All tokens', 'Total input tokens', 'Average input tokens', 'Total output tokens', 'Average output tokens']
for key, value in result['results'].items():
for llm, datasets in value.items():
# 检查 META 信息
meta = datasets.get('META', {})
if 'LLM' not in meta or 'Eval Date' not in meta:
print(f"Missing META fields in algorithm '{key}' for LLM '{llm}'")
for dataset, data in datasets.items():
if dataset == 'META':
continue
missing_fields = [field for field in required_fields if field not in data]
if missing_fields:
print(f"Missing fields {missing_fields} in dataset '{dataset}' for LLM '{llm}' in algorithm '{key}'")
# 保存为JSON文件
with open('src/detail_math_score.json', 'w', encoding='utf-8') as f:
json.dump(result, f, indent=4, ensure_ascii=False)
def process_csv_to_overall_json():
# 读取CSV文件
df = pd.read_csv('src/record.csv')
# 清理数据:删除空行,重命名列
df = df.dropna(how='all')
df = df.rename(columns={
'dataset': 'Dataset',
'llm': 'LLM',
'score\n(EM)': 'Score',
'Cost($)': 'Cost($)',
'Eval Date': 'Eval Date'
})
# 初始化结果字典
result = {
"time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"results": {}
}
# 获取所有唯一的LLM
llms = df['LLM'].dropna().unique()
for llm in llms:
# 处理基础算法
for algorithm in df['Algorithm'].dropna().unique():
if not isinstance(algorithm, str):
continue
# 为非gpt-3.5-turbo的模型添加后缀
# 修改:为llama模型添加更多信息以确保唯一性
algo_key = algorithm if llm == 'gpt-3.5-turbo' else f"{algorithm}-{llm}"
# 检查该算法-LLM组合是否存在
algo_data = df[(df['Algorithm'] == algorithm) & (df['LLM'] == llm)]
if algo_data.empty:
print(f"No data found for algorithm '{algorithm}' and LLM '{llm}'")
continue
result['results'][algo_key] = {
"META": {
"Algorithm": algorithm,
"LLM": llm,
"Eval Date": str(algo_data['Eval Date'].iloc[0])
}
}
# 处理每个数据集
for dataset in ['gsm8k', 'AQuA']:
dataset_data = df[(df['Algorithm'] == algorithm) &
(df['Dataset'] == dataset) &
(df['LLM'] == llm)]
if not dataset_data.empty:
result['results'][algo_key][dataset] = {
"Score": float(dataset_data['Score'].iloc[0]) if pd.notnull(dataset_data['Score'].iloc[0]) else 0.0,
"Cost($)": float(dataset_data['Cost($)'].iloc[0]) if pd.notnull(dataset_data['Cost($)'].iloc[0]) else 0.0
}
else:
# 如果数据集为空,确保键存在并设置默认值
result['results'][algo_key][dataset] = {
"Score": 0.0,
"Cost($)": 0.0
}
# 保存为JSON文件
with open('src/overall_math_score.json', 'w', encoding='utf-8') as f:
json.dump(result, f, indent=4, ensure_ascii=False)
if __name__ == "__main__":
# 生成两种格式的JSON文件
process_csv_to_json()
process_csv_to_overall_json() |