File size: 8,327 Bytes
aa37927 5eeb4d8 aa37927 863e0ef aa37927 5eeb4d8 aa37927 5eeb4d8 aa37927 36c174a 0b82379 aa37927 cee99cf 36c174a 0b82379 863e0ef 46816dc 6f9c828 46816dc 863e0ef 46816dc 863e0ef df56132 7368029 df56132 9535ee7 df56132 863e0ef df56132 863e0ef af1056f 863e0ef af1056f 863e0ef af1056f 3e0f1db af1056f 3e0f1db af1056f b0c7694 af1056f b0c7694 3e0f1db 863e0ef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
from enum import Enum
from dataclasses import dataclass
# Define TaskInfo and Tasks as before
@dataclass
class TaskInfo:
benchmark: str
col_name: str
metric: str
# src/about.py
from enum import Enum
from dataclasses import dataclass
# Define TaskInfo dataclass
@dataclass
class TaskInfo:
benchmark: str
col_name: str
metric: str
# Define Tasks enum with your specific subjects, excluding the unwanted ones
class Tasks(Enum):
Professional_Law = TaskInfo(benchmark='professional_law', col_name='Professional Law', metric='accuracy')
Moral_Scenarios = TaskInfo(benchmark='moral_scenarios', col_name='Moral Scenarios', metric='accuracy')
Miscellaneous = TaskInfo(benchmark='miscellaneous', col_name='Miscellaneous', metric='accuracy')
High_School_Psychology = TaskInfo(benchmark='high_school_psychology', col_name='High School Psychology', metric='accuracy')
High_School_Macroeconomics = TaskInfo(benchmark='high_school_macroeconomics', col_name='High School Macroeconomics', metric='accuracy')
Elementary_Mathematics = TaskInfo(benchmark='elementary_mathematics', col_name='Elementary Mathematics', metric='accuracy')
Prehistory = TaskInfo(benchmark='prehistory', col_name='Prehistory', metric='accuracy')
Philosophy = TaskInfo(benchmark='philosophy', col_name='Philosophy', metric='accuracy')
High_School_Biology = TaskInfo(benchmark='high_school_biology', col_name='High School Biology', metric='accuracy')
Nutrition = TaskInfo(benchmark='nutrition', col_name='Nutrition', metric='accuracy')
Professional_Accounting = TaskInfo(benchmark='professional_accounting', col_name='Professional Accounting', metric='accuracy')
Professional_Medicine = TaskInfo(benchmark='professional_medicine', col_name='Professional Medicine', metric='accuracy')
High_School_Mathematics = TaskInfo(benchmark='high_school_mathematics', col_name='High School Mathematics', metric='accuracy')
Clinical_Knowledge = TaskInfo(benchmark='clinical_knowledge', col_name='Clinical Knowledge', metric='accuracy')
Security_Studies = TaskInfo(benchmark='security_studies', col_name='Security Studies', metric='accuracy')
High_School_Microeconomics = TaskInfo(benchmark='high_school_microeconomics', col_name='High School Microeconomics', metric='accuracy')
High_School_World_History = TaskInfo(benchmark='high_school_world_history', col_name='High School World History', metric='accuracy')
Conceptual_Physics = TaskInfo(benchmark='conceptual_physics', col_name='Conceptual Physics', metric='accuracy')
Marketing = TaskInfo(benchmark='marketing', col_name='Marketing', metric='accuracy')
High_School_Statistics = TaskInfo(benchmark='high_school_statistics', col_name='High School Statistics', metric='accuracy')
High_School_US_History = TaskInfo(benchmark='high_school_us_history', col_name='High School US History', metric='accuracy')
High_School_Chemistry = TaskInfo(benchmark='high_school_chemistry', col_name='High School Chemistry', metric='accuracy')
Sociology = TaskInfo(benchmark='sociology', col_name='Sociology', metric='accuracy')
High_School_Geography = TaskInfo(benchmark='high_school_geography', col_name='High School Geography', metric='accuracy')
High_School_Government_and_Politics = TaskInfo(benchmark='high_school_government_and_politics', col_name='High School Government and Politics', metric='accuracy')
College_Medicine = TaskInfo(benchmark='college_medicine', col_name='College Medicine', metric='accuracy')
Virology = TaskInfo(benchmark='virology', col_name='Virology', metric='accuracy')
High_School_European_History = TaskInfo(benchmark='high_school_european_history', col_name='High School European History', metric='accuracy')
Logical_Fallacies = TaskInfo(benchmark='logical_fallacies', col_name='Logical Fallacies', metric='accuracy')
Astronomy = TaskInfo(benchmark='astronomy', col_name='Astronomy', metric='accuracy')
High_School_Physics = TaskInfo(benchmark='high_school_physics', col_name='High School Physics', metric='accuracy')
Electrical_Engineering = TaskInfo(benchmark='electrical_engineering', col_name='Electrical Engineering', metric='accuracy')
College_Biology = TaskInfo(benchmark='college_biology', col_name='College Biology', metric='accuracy')
Anatomy = TaskInfo(benchmark='anatomy', col_name='Anatomy', metric='accuracy')
Formal_Logic = TaskInfo(benchmark='formal_logic', col_name='Formal Logic', metric='accuracy')
International_Law = TaskInfo(benchmark='international_law', col_name='International Law', metric='accuracy')
Econometrics = TaskInfo(benchmark='econometrics', col_name='Econometrics', metric='accuracy')
Machine_Learning = TaskInfo(benchmark='machine_learning', col_name='Machine Learning', metric='accuracy')
Management = TaskInfo(benchmark='management', col_name='Management', metric='accuracy')
College_Physics = TaskInfo(benchmark='college_physics', col_name='College Physics', metric='accuracy')
US_Foreign_Policy = TaskInfo(benchmark='us_foreign_policy', col_name='US Foreign Policy', metric='accuracy')
Business_Ethics = TaskInfo(benchmark='business_ethics', col_name='Business Ethics', metric='accuracy')
College_Mathematics = TaskInfo(benchmark='college_mathematics', col_name='College Mathematics', metric='accuracy')
College_Chemistry = TaskInfo(benchmark='college_chemistry', col_name='College Chemistry', metric='accuracy')
College_Computer_Science = TaskInfo(benchmark='college_computer_science', col_name='College Computer Science', metric='accuracy')
High_School_Computer_Science = TaskInfo(benchmark='high_school_computer_science', col_name='High School Computer Science', metric='accuracy')
Computer_Security = TaskInfo(benchmark='computer_security', col_name='Computer Security', metric='accuracy')
Global_Facts = TaskInfo(benchmark='global_facts', col_name='Global Facts', metric='accuracy')
Medical_Genetics = TaskInfo(benchmark='medical_genetics', col_name='Medical Genetics', metric='accuracy')
Abstract_Algebra = TaskInfo(benchmark='abstract_algebra', col_name='Abstract Algebra', metric='accuracy')
# Now include the variables expected by app.py
TITLE = """
<div align="center">
<a href="https://imgbb.com/">
<img src="https://i.ibb.co/k1gQsTw/Blue-and-White-Modern-Technology-Company-Logo-2.png" alt="Blue-and-White-Modern-Technology-Company-Logo-2" border="0" width="500" height="auto">
</a>
</div>
"""
INTRODUCTION_TEXT = """
<div style="background-color:#001f3f; padding: 20px; border-radius: 10px;">
<h1 style="color:#ffffff; font-family: Arial, sans-serif; text-align: center;">
Welcome to <span style="color:#f39c12;">ILMAAM</span>: Index for Language Models for Arabic Assessment on Multitasks!
</h1>
<p style="color:#d4d4d4; font-family: 'Verdana', sans-serif; font-size: 18px; text-align: center;">
This leaderboard showcases the performance of various Arabic LLMs on the
<strong style="color:#d4d4d4;">newly released MMMLU OpenAI Benchmark</strong> across different subjects.
</p>
</div>
"""
LLM_BENCHMARKS_TEXT = """
## About ILMAAM
ILMAAM is based on The Massive Multitask Multilingual Language Understanding benchmark which is designed to evaluate Arabic models on a wide range of subjects.
## How to Interpret the Leaderboard
- **Model**: The name of the model evaluated.
- **Average ⬆️**: The average accuracy across all subjects.
- **Subject Columns**: The accuracy (%) for each individual subject.
## How to Submit Your Model
Go to the **Submit here!** tab and provide your model details to have it evaluated and appear on the leaderboard.
"""
EVALUATION_QUEUE_TEXT = """
Below are the lists of models that have been evaluated, are currently being evaluated, or are pending evaluation.
"""
CITATION_BUTTON_LABEL = "Citation"
CITATION_BUTTON_TEXT = """
If you use this leaderboard or the MMMLU dataset in your research, please cite:
@misc{ILMAAM,
author = {Nacar, Omer},
title = {ILMAAM: Index for Language Models For Arabic Assessment on Multitasks},
year = {2024},
publisher = {Robotics and Internet-of-Things Lab, Prince Sultan University, Riyadh}"
Acknowledgment:
Thanks for Prince Sultan University and RIOTU Lab for their support.
}"""
|