|
|
|
|
|
import os |
|
import pandas as pd |
|
import json |
|
import random |
|
|
|
from src.display.utils import COLUMNS, EVAL_COLS, Tasks |
|
from src.envs import EVAL_RESULTS_PATH, FIXED_QUESTIONS_FILE |
|
|
|
def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols): |
|
|
|
df = pd.DataFrame(columns=cols) |
|
|
|
|
|
if os.path.exists(eval_results_path): |
|
result_files = [ |
|
os.path.join(eval_results_path, f) |
|
for f in os.listdir(eval_results_path) |
|
if f.endswith('.json') |
|
] |
|
data_list = [] |
|
for file in result_files: |
|
with open(file, 'r') as f: |
|
data = json.load(f) |
|
|
|
flattened_data = {} |
|
flattened_data.update(data.get('config', {})) |
|
flattened_data.update(data.get('results', {})) |
|
data_list.append(flattened_data) |
|
if data_list: |
|
df = pd.DataFrame(data_list) |
|
|
|
|
|
if 'model' not in df.columns and 'model_name' in df.columns: |
|
df.rename(columns={'model_name': 'model'}, inplace=True) |
|
|
|
|
|
for col in cols: |
|
if col not in df.columns: |
|
df[col] = None |
|
|
|
|
|
if 'average' in df.columns: |
|
df = df.sort_values(by=['average'], ascending=False) |
|
|
|
return df |
|
|
|
def get_evaluation_queue_df(eval_requests_path, eval_cols): |
|
|
|
finished_df = pd.DataFrame(columns=eval_cols) |
|
running_df = pd.DataFrame(columns=eval_cols) |
|
pending_df = pd.DataFrame(columns=eval_cols) |
|
|
|
|
|
if os.path.exists(eval_requests_path): |
|
request_files = [ |
|
os.path.join(eval_requests_path, f) |
|
for f in os.listdir(eval_requests_path) |
|
if f.endswith('.json') |
|
] |
|
data_list = [] |
|
for file in request_files: |
|
with open(file, 'r') as f: |
|
data = json.load(f) |
|
data_list.append(data) |
|
if data_list: |
|
df = pd.DataFrame(data_list) |
|
|
|
finished_df = df[df['status'] == 'finished'] |
|
running_df = df[df['status'] == 'running'] |
|
pending_df = df[df['status'] == 'pending'] |
|
|
|
return finished_df, running_df, pending_df |
|
|
|
def preselect_fixed_questions(dataset_path, num_questions_per_subject=30, fixed_questions_file='fixed_questions.json'): |
|
""" |
|
Preselects a fixed number of questions per subject and saves them to a JSON file. |
|
""" |
|
|
|
|
|
if not os.path.exists(dataset_path): |
|
raise FileNotFoundError(f"Dataset file not found at {dataset_path}") |
|
|
|
dataset = pd.read_csv(dataset_path) |
|
|
|
fixed_questions = {} |
|
|
|
for task in Tasks: |
|
subject = task.value.benchmark |
|
subject_questions = dataset[dataset['Subject'] == subject] |
|
|
|
if len(subject_questions) < num_questions_per_subject: |
|
raise ValueError(f"Not enough questions for subject '{subject}'. Required: {num_questions_per_subject}, Available: {len(subject_questions)}") |
|
|
|
|
|
selected_questions = subject_questions.sample(n=num_questions_per_subject, random_state=42) |
|
fixed_questions[subject] = selected_questions.to_dict(orient='records') |
|
|
|
|
|
with open(os.path.join(EVAL_RESULTS_PATH, fixed_questions_file), 'w') as f: |
|
json.dump(fixed_questions, f, indent=4) |
|
|
|
print(f"Fixed questions preselected and saved to {fixed_questions_file}") |
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
DATASET_PATH = os.path.join(EVAL_RESULTS_PATH, "your_dataset.csv") |
|
FIXED_QUESTIONS_FILE = "fixed_questions.json" |
|
|
|
|
|
preselect_fixed_questions(DATASET_PATH, num_questions_per_subject=30, fixed_questions_file=FIXED_QUESTIONS_FILE) |
|
|