File size: 7,426 Bytes
c9a97c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import pandas as pd
import json
from datetime import datetime

def process_csv_to_json():
    # 读取CSV文件
    df = pd.read_csv('src/record.csv')
    
    # 清理数据:删除空行,重命名列
    df = df.dropna(how='all')
    df = df.rename(columns={
        'dataset': 'Dataset',
        'llm': 'LLM',
        'score\n(EM)': 'Score',
        'pass rate': 'Pass rate',
        'Cost($)': 'Cost($)',
        'Eval Date': 'Eval Date',
        'framework': 'Framework',
        'X-shot': 'X-shot',
        'Nums': 'Samples',
        'All tokens': 'All tokens',
        'Total input tokens': 'Total input tokens',
        'Average input tokens': 'Average input tokens',
        'Total output tokens': 'Total output tokens',
        'Average output tokens': 'Average output tokens'
    })
    
    # 辅助函数:处理包含逗号的数字字符串
    def parse_number(value):
        if pd.isna(value):
            return 0
        # 先移除逗号,然后转换为浮点数,最后转换为整数
        return int(float(str(value).replace(',', '')))
    
    # 初始化结果字典
    result = {
        "time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "results": {}
    }
    
    # 获取所有唯一的LLM
    llms = df['LLM'].dropna().unique()
    
    # 遍历每个算法
    for algorithm in df['Algorithm'].dropna().unique():
        if not isinstance(algorithm, str):
            continue
            
        result['results'][algorithm] = {}
        
        # 对每个LLM进行处理
        for llm in llms:
            llm_data = df[(df['Algorithm'] == algorithm) & (df['LLM'] == llm)]
            if llm_data.empty:
                continue
                
            # 创建LLM对应的字典
            result['results'][algorithm][llm] = {
                'META': {
                    'Algorithm': str(algorithm),
                    'LLM': str(llm),
                    'Eval Date': str(llm_data['Eval Date'].iloc[0])
                }
            }
            
            # 对每个数据集进行处理
            for dataset in df['Dataset'].dropna().unique():
                if not isinstance(dataset, str):
                    continue
                    
                dataset_data = llm_data[llm_data['Dataset'] == dataset]
                
                if not dataset_data.empty:
                    data_row = dataset_data.iloc[0]
                    result['results'][algorithm][llm][dataset] = {
                        'Score': round(float(data_row['Score']), 2),  # 保留两位小数
                        'Pass rate': round(float(data_row['Pass rate']) / 100, 4),  # 转换为小数并保留两位小数
                        'Cost($)': float(data_row['Cost($)']) if pd.notnull(data_row['Cost($)']) else 0.0,
                        'Framework': str(data_row['Framework']) if 'Framework' in data_row and pd.notnull(data_row['Framework']) else '',
                        'X-shot': str(data_row['X-shot']) if pd.notnull(data_row['X-shot']) else '',
                        'Samples': parse_number(data_row['Samples']),
                        'All tokens': parse_number(data_row['All tokens']),
                        'Total input tokens': parse_number(data_row['Total input tokens']),
                        'Average input tokens': parse_number(data_row['Average input tokens']),
                        'Total output tokens': parse_number(data_row['Total output tokens']),
                        'Average output tokens': parse_number(data_row['Average output tokens'])
                    }
    
    # 检查每个字段是否存在
    required_fields = ['Score', 'Pass rate', 'Cost($)', 'Framework', 'X-shot', 'Samples', 'All tokens', 'Total input tokens', 'Average input tokens', 'Total output tokens', 'Average output tokens']
    
    for key, value in result['results'].items():
        for llm, datasets in value.items():
            # 检查 META 信息
            meta = datasets.get('META', {})
            if 'LLM' not in meta or 'Eval Date' not in meta:
                print(f"Missing META fields in algorithm '{key}' for LLM '{llm}'")
            
            for dataset, data in datasets.items():
                if dataset == 'META':
                    continue
                missing_fields = [field for field in required_fields if field not in data]
                if missing_fields:
                    print(f"Missing fields {missing_fields} in dataset '{dataset}' for LLM '{llm}' in algorithm '{key}'")
    
    # 保存为JSON文件
    with open('src/detail_math_score.json', 'w', encoding='utf-8') as f:
        json.dump(result, f, indent=4, ensure_ascii=False)

def process_csv_to_overall_json():
    # 读取CSV文件
    df = pd.read_csv('src/record.csv')
    
    # 清理数据:删除空行,重命名列
    df = df.dropna(how='all')
    df = df.rename(columns={
        'dataset': 'Dataset',
        'llm': 'LLM',
        'score\n(EM)': 'Score',
        'Cost($)': 'Cost($)',
        'Eval Date': 'Eval Date'
    })
    
    # 初始化结果字典
    result = {
        "time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "results": {}
    }
    
    # 获取所有唯一的LLM
    llms = df['LLM'].dropna().unique()
    for llm in llms:
        # 处理基础算法
        for algorithm in df['Algorithm'].dropna().unique():
            if not isinstance(algorithm, str):
                continue
                
            # 为非gpt-3.5-turbo的模型添加后缀
            # 修改:为llama模型添加更多信息以确保唯一性
            algo_key = algorithm if llm == 'gpt-3.5-turbo' else f"{algorithm}-{llm}"
            # 检查该算法-LLM组合是否存在
            algo_data = df[(df['Algorithm'] == algorithm) & (df['LLM'] == llm)]
            if algo_data.empty:
                print(f"No data found for algorithm '{algorithm}' and LLM '{llm}'")
                continue
                
            result['results'][algo_key] = {
                "META": {
                    "Algorithm": algorithm,
                    "LLM": llm,
                    "Eval Date": str(algo_data['Eval Date'].iloc[0])
                }
            }
            
            # 处理每个数据集
            for dataset in ['gsm8k', 'AQuA']:
                dataset_data = df[(df['Algorithm'] == algorithm) & 
                                (df['Dataset'] == dataset) &
                                (df['LLM'] == llm)]
                if not dataset_data.empty:
                    result['results'][algo_key][dataset] = {
                        "Score": float(dataset_data['Score'].iloc[0]) if pd.notnull(dataset_data['Score'].iloc[0]) else 0.0,
                        "Cost($)": float(dataset_data['Cost($)'].iloc[0]) if pd.notnull(dataset_data['Cost($)'].iloc[0]) else 0.0
                    }
                else:
                    # 如果数据集为空,确保键存在并设置默认值
                    result['results'][algo_key][dataset] = {
                        "Score": 0.0,
                        "Cost($)": 0.0
                    }


    # 保存为JSON文件
    with open('src/overall_math_score.json', 'w', encoding='utf-8') as f:
        json.dump(result, f, indent=4, ensure_ascii=False)

if __name__ == "__main__":
    # 生成两种格式的JSON文件
    process_csv_to_json()
    process_csv_to_overall_json()