Spaces:
Sleeping
Sleeping
leukas
commited on
Commit
Β·
f7ac2a9
1
Parent(s):
80cbf2f
initial commit
Browse files- README.md +2 -2
- app.py +85 -0
- src/__pycache__/about.cpython-310.pyc +0 -0
- src/__pycache__/envs.cpython-310.pyc +0 -0
- src/about.py +47 -0
- src/display/__pycache__/css_html_js.cpython-310.pyc +0 -0
- src/display/__pycache__/utils.cpython-310.pyc +0 -0
- src/display/css_html_js.py +105 -0
- src/display/formatting.py +27 -0
- src/display/utils.py +110 -0
- src/envs.py +21 -0
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
title: Cute Leaderboard
|
3 |
-
emoji:
|
4 |
colorFrom: blue
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
@@ -9,4 +9,4 @@ app_file: app.py
|
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
-
|
|
|
1 |
---
|
2 |
title: Cute Leaderboard
|
3 |
+
emoji: π
|
4 |
colorFrom: blue
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
|
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
+
|
app.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
3 |
+
import pandas as pd
|
4 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
5 |
+
from huggingface_hub import snapshot_download
|
6 |
+
|
7 |
+
from src.about import (
|
8 |
+
INTRODUCTION_TEXT,
|
9 |
+
LLM_BENCHMARKS_TEXT,
|
10 |
+
TITLE,
|
11 |
+
)
|
12 |
+
from src.display.css_html_js import custom_css
|
13 |
+
from src.display.utils import (
|
14 |
+
AutoEvalColumn,
|
15 |
+
fields,
|
16 |
+
)
|
17 |
+
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
18 |
+
|
19 |
+
|
20 |
+
def restart_space():
|
21 |
+
API.restart_space(repo_id=REPO_ID)
|
22 |
+
|
23 |
+
### Space initialisation
|
24 |
+
try:
|
25 |
+
print(EVAL_RESULTS_PATH)
|
26 |
+
snapshot_download(
|
27 |
+
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
|
28 |
+
)
|
29 |
+
except Exception:
|
30 |
+
restart_space()
|
31 |
+
|
32 |
+
|
33 |
+
LEADERBOARD_DF = pd.read_csv(f"{EVAL_RESULTS_PATH}/results.csv")
|
34 |
+
# multiply each column by 100, except first column
|
35 |
+
for col in LEADERBOARD_DF.columns[1:]:
|
36 |
+
LEADERBOARD_DF[col] = LEADERBOARD_DF[col] * 100
|
37 |
+
|
38 |
+
# add average column
|
39 |
+
LEADERBOARD_DF["Average"] = LEADERBOARD_DF.mean(axis=1)
|
40 |
+
|
41 |
+
# # move average to the front
|
42 |
+
LEADERBOARD_DF = LEADERBOARD_DF[["model", "Average"] + [col for col in LEADERBOARD_DF.columns if col not in ["model", "Average"]]]
|
43 |
+
|
44 |
+
# round to nearest tenth
|
45 |
+
LEADERBOARD_DF = LEADERBOARD_DF.round(1)
|
46 |
+
|
47 |
+
# sort by average
|
48 |
+
LEADERBOARD_DF = LEADERBOARD_DF.sort_values(by="Average", ascending=False)
|
49 |
+
|
50 |
+
|
51 |
+
def init_leaderboard(dataframe):
|
52 |
+
if dataframe is None or dataframe.empty:
|
53 |
+
raise ValueError("Leaderboard DataFrame is empty or None.")
|
54 |
+
return Leaderboard(
|
55 |
+
value=dataframe,
|
56 |
+
datatype=[c.type for c in fields(AutoEvalColumn)],
|
57 |
+
select_columns=SelectColumns(
|
58 |
+
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
|
59 |
+
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
|
60 |
+
label="Select Columns to Display:",
|
61 |
+
),
|
62 |
+
search_columns=[AutoEvalColumn.model.name],
|
63 |
+
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
|
64 |
+
bool_checkboxgroup_label="Hide models",
|
65 |
+
interactive=False,
|
66 |
+
)
|
67 |
+
|
68 |
+
|
69 |
+
demo = gr.Blocks(css=custom_css)
|
70 |
+
with demo:
|
71 |
+
gr.HTML(TITLE)
|
72 |
+
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
73 |
+
|
74 |
+
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
75 |
+
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
76 |
+
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
77 |
+
|
78 |
+
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2):
|
79 |
+
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
80 |
+
|
81 |
+
|
82 |
+
scheduler = BackgroundScheduler()
|
83 |
+
scheduler.add_job(restart_space, "interval", seconds=1800)
|
84 |
+
scheduler.start()
|
85 |
+
demo.queue(default_concurrency_limit=40).launch()
|
src/__pycache__/about.cpython-310.pyc
ADDED
Binary file (1.45 kB). View file
|
|
src/__pycache__/envs.cpython-310.pyc
ADDED
Binary file (480 Bytes). View file
|
|
src/about.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from enum import Enum
|
3 |
+
|
4 |
+
@dataclass
|
5 |
+
class Task:
|
6 |
+
benchmark: str
|
7 |
+
metric: str
|
8 |
+
col_name: str
|
9 |
+
|
10 |
+
|
11 |
+
# Select your tasks here
|
12 |
+
# ---------------------------------------------------
|
13 |
+
class Tasks(Enum):
|
14 |
+
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
15 |
+
task0 = Task("Spelling", "exact_match", "spell")
|
16 |
+
task1 = Task("Inverse Spelling", "exact_match", "spell_inverse")
|
17 |
+
task2 = Task("Contains Char", "exact_match", "cont_char")
|
18 |
+
task3 = Task("contains_word", "exact_match", "cont_word")
|
19 |
+
task4 = Task("orth", "exact_match", "orth")
|
20 |
+
task5 = Task("sem", "exact_match", "sem")
|
21 |
+
task6 = Task("insert_char", "exact_match", "ins_char")
|
22 |
+
task7 = Task("insert_word", "exact_match", "ins_word")
|
23 |
+
task8 = Task("del_char", "exact_match", "del_char")
|
24 |
+
task9 = Task("del_word", "exact_match", "del_word")
|
25 |
+
task10 = Task("sub_char", "exact_match", "sub_char")
|
26 |
+
task11 = Task("sub_word", "exact_match", "sub_word")
|
27 |
+
task12 = Task("swap_char", "exact_match", "swap_char")
|
28 |
+
task13 = Task("swap_word", "exact_match", "swap_word")
|
29 |
+
|
30 |
+
|
31 |
+
|
32 |
+
NUM_FEWSHOT = 0 # Change with your few shot
|
33 |
+
# ---------------------------------------------------
|
34 |
+
|
35 |
+
# Your leaderboard name
|
36 |
+
TITLE = """<h1 align="center" id="space-title">CUTE Leaderboard</h1>"""
|
37 |
+
|
38 |
+
# What does your leaderboard evaluate?
|
39 |
+
INTRODUCTION_TEXT = """
|
40 |
+
This is the evaluation leaderboard for CUTE, an orthographic understanding benchmark.
|
41 |
+
"""
|
42 |
+
|
43 |
+
# Which evaluations are you running? how can people reproduce what you have?
|
44 |
+
LLM_BENCHMARKS_TEXT = f"""
|
45 |
+
## How it works
|
46 |
+
For more details, visit our repo: https://github.com/leukas/cute
|
47 |
+
"""
|
src/display/__pycache__/css_html_js.cpython-310.pyc
ADDED
Binary file (1.9 kB). View file
|
|
src/display/__pycache__/utils.cpython-310.pyc
ADDED
Binary file (4.18 kB). View file
|
|
src/display/css_html_js.py
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
custom_css = """
|
2 |
+
|
3 |
+
.markdown-text {
|
4 |
+
font-size: 16px !important;
|
5 |
+
}
|
6 |
+
|
7 |
+
#models-to-add-text {
|
8 |
+
font-size: 18px !important;
|
9 |
+
}
|
10 |
+
|
11 |
+
#citation-button span {
|
12 |
+
font-size: 16px !important;
|
13 |
+
}
|
14 |
+
|
15 |
+
#citation-button textarea {
|
16 |
+
font-size: 16px !important;
|
17 |
+
}
|
18 |
+
|
19 |
+
#citation-button > label > button {
|
20 |
+
margin: 6px;
|
21 |
+
transform: scale(1.3);
|
22 |
+
}
|
23 |
+
|
24 |
+
#leaderboard-table {
|
25 |
+
margin-top: 15px
|
26 |
+
}
|
27 |
+
|
28 |
+
#leaderboard-table-lite {
|
29 |
+
margin-top: 15px
|
30 |
+
}
|
31 |
+
|
32 |
+
#search-bar-table-box > div:first-child {
|
33 |
+
background: none;
|
34 |
+
border: none;
|
35 |
+
}
|
36 |
+
|
37 |
+
#search-bar {
|
38 |
+
padding: 0px;
|
39 |
+
}
|
40 |
+
|
41 |
+
/* Limit the width of the first AutoEvalColumn so that names don't expand too much */
|
42 |
+
table td:first-child,
|
43 |
+
table th:first-child {
|
44 |
+
max-width: 400px;
|
45 |
+
overflow: auto;
|
46 |
+
white-space: nowrap;
|
47 |
+
}
|
48 |
+
|
49 |
+
.tab-buttons button {
|
50 |
+
font-size: 20px;
|
51 |
+
}
|
52 |
+
|
53 |
+
#scale-logo {
|
54 |
+
border-style: none !important;
|
55 |
+
box-shadow: none;
|
56 |
+
display: block;
|
57 |
+
margin-left: auto;
|
58 |
+
margin-right: auto;
|
59 |
+
max-width: 600px;
|
60 |
+
}
|
61 |
+
|
62 |
+
#scale-logo .download {
|
63 |
+
display: none;
|
64 |
+
}
|
65 |
+
#filter_type{
|
66 |
+
border: 0;
|
67 |
+
padding-left: 0;
|
68 |
+
padding-top: 0;
|
69 |
+
}
|
70 |
+
#filter_type label {
|
71 |
+
display: flex;
|
72 |
+
}
|
73 |
+
#filter_type label > span{
|
74 |
+
margin-top: var(--spacing-lg);
|
75 |
+
margin-right: 0.5em;
|
76 |
+
}
|
77 |
+
#filter_type label > .wrap{
|
78 |
+
width: 103px;
|
79 |
+
}
|
80 |
+
#filter_type label > .wrap .wrap-inner{
|
81 |
+
padding: 2px;
|
82 |
+
}
|
83 |
+
#filter_type label > .wrap .wrap-inner input{
|
84 |
+
width: 1px
|
85 |
+
}
|
86 |
+
#filter-columns-type{
|
87 |
+
border:0;
|
88 |
+
padding:0.5;
|
89 |
+
}
|
90 |
+
#filter-columns-size{
|
91 |
+
border:0;
|
92 |
+
padding:0.5;
|
93 |
+
}
|
94 |
+
#box-filter > .form{
|
95 |
+
border: 0
|
96 |
+
}
|
97 |
+
"""
|
98 |
+
|
99 |
+
get_window_url_params = """
|
100 |
+
function(url_params) {
|
101 |
+
const params = new URLSearchParams(window.location.search);
|
102 |
+
url_params = Object.fromEntries(params);
|
103 |
+
return url_params;
|
104 |
+
}
|
105 |
+
"""
|
src/display/formatting.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def model_hyperlink(link, model_name):
|
2 |
+
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
3 |
+
|
4 |
+
|
5 |
+
def make_clickable_model(model_name):
|
6 |
+
link = f"https://huggingface.co/{model_name}"
|
7 |
+
return model_hyperlink(link, model_name)
|
8 |
+
|
9 |
+
|
10 |
+
def styled_error(error):
|
11 |
+
return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
|
12 |
+
|
13 |
+
|
14 |
+
def styled_warning(warn):
|
15 |
+
return f"<p style='color: orange; font-size: 20px; text-align: center;'>{warn}</p>"
|
16 |
+
|
17 |
+
|
18 |
+
def styled_message(message):
|
19 |
+
return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
|
20 |
+
|
21 |
+
|
22 |
+
def has_no_nan_values(df, columns):
|
23 |
+
return df[columns].notna().all(axis=1)
|
24 |
+
|
25 |
+
|
26 |
+
def has_nan_values(df, columns):
|
27 |
+
return df[columns].isna().any(axis=1)
|
src/display/utils.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass, make_dataclass
|
2 |
+
from enum import Enum
|
3 |
+
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
from src.about import Tasks
|
7 |
+
|
8 |
+
def fields(raw_class):
|
9 |
+
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
10 |
+
|
11 |
+
|
12 |
+
# These classes are for user facing column names,
|
13 |
+
# to avoid having to change them all around the code
|
14 |
+
# when a modif is needed
|
15 |
+
@dataclass
|
16 |
+
class ColumnContent:
|
17 |
+
name: str
|
18 |
+
type: str
|
19 |
+
displayed_by_default: bool
|
20 |
+
hidden: bool = False
|
21 |
+
never_hidden: bool = False
|
22 |
+
|
23 |
+
## Leaderboard columns
|
24 |
+
auto_eval_column_dict = []
|
25 |
+
# Init
|
26 |
+
# auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
27 |
+
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("model", "markdown", True, never_hidden=True)])
|
28 |
+
#Scores
|
29 |
+
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
|
30 |
+
for task in Tasks:
|
31 |
+
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
32 |
+
# Model information
|
33 |
+
# auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
34 |
+
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
35 |
+
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
36 |
+
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
37 |
+
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
38 |
+
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
39 |
+
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub β€οΈ", "number", False)])
|
40 |
+
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
41 |
+
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
42 |
+
|
43 |
+
# We use make dataclass to dynamically fill the scores from Tasks
|
44 |
+
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
45 |
+
|
46 |
+
## For the queue columns in the submission tab
|
47 |
+
@dataclass(frozen=True)
|
48 |
+
class EvalQueueColumn: # Queue column
|
49 |
+
model = ColumnContent("model", "markdown", True)
|
50 |
+
revision = ColumnContent("revision", "str", True)
|
51 |
+
private = ColumnContent("private", "bool", True)
|
52 |
+
precision = ColumnContent("precision", "str", True)
|
53 |
+
weight_type = ColumnContent("weight_type", "str", "Original")
|
54 |
+
status = ColumnContent("status", "str", True)
|
55 |
+
|
56 |
+
## All the model information that we might need
|
57 |
+
@dataclass
|
58 |
+
class ModelDetails:
|
59 |
+
name: str
|
60 |
+
display_name: str = ""
|
61 |
+
symbol: str = "" # emoji
|
62 |
+
|
63 |
+
|
64 |
+
class ModelType(Enum):
|
65 |
+
PT = ModelDetails(name="pretrained", symbol="π’")
|
66 |
+
FT = ModelDetails(name="fine-tuned", symbol="πΆ")
|
67 |
+
IFT = ModelDetails(name="instruction-tuned", symbol="β")
|
68 |
+
RL = ModelDetails(name="RL-tuned", symbol="π¦")
|
69 |
+
Unknown = ModelDetails(name="", symbol="?")
|
70 |
+
|
71 |
+
def to_str(self, separator=" "):
|
72 |
+
return f"{self.value.symbol}{separator}{self.value.name}"
|
73 |
+
|
74 |
+
@staticmethod
|
75 |
+
def from_str(type):
|
76 |
+
if "fine-tuned" in type or "πΆ" in type:
|
77 |
+
return ModelType.FT
|
78 |
+
if "pretrained" in type or "π’" in type:
|
79 |
+
return ModelType.PT
|
80 |
+
if "RL-tuned" in type or "π¦" in type:
|
81 |
+
return ModelType.RL
|
82 |
+
if "instruction-tuned" in type or "β" in type:
|
83 |
+
return ModelType.IFT
|
84 |
+
return ModelType.Unknown
|
85 |
+
|
86 |
+
class WeightType(Enum):
|
87 |
+
Adapter = ModelDetails("Adapter")
|
88 |
+
Original = ModelDetails("Original")
|
89 |
+
Delta = ModelDetails("Delta")
|
90 |
+
|
91 |
+
class Precision(Enum):
|
92 |
+
float16 = ModelDetails("float16")
|
93 |
+
bfloat16 = ModelDetails("bfloat16")
|
94 |
+
Unknown = ModelDetails("?")
|
95 |
+
|
96 |
+
def from_str(precision):
|
97 |
+
if precision in ["torch.float16", "float16"]:
|
98 |
+
return Precision.float16
|
99 |
+
if precision in ["torch.bfloat16", "bfloat16"]:
|
100 |
+
return Precision.bfloat16
|
101 |
+
return Precision.Unknown
|
102 |
+
|
103 |
+
# Column selection
|
104 |
+
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
105 |
+
|
106 |
+
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
107 |
+
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
108 |
+
|
109 |
+
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
110 |
+
|
src/envs.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from huggingface_hub import HfApi
|
4 |
+
|
5 |
+
# Info to change for your repository
|
6 |
+
# ----------------------------------
|
7 |
+
TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
8 |
+
|
9 |
+
OWNER = "leukas" # Change to your org - don't forget to create a results and request dataset, with the correct format!
|
10 |
+
# ----------------------------------
|
11 |
+
|
12 |
+
REPO_ID = f"{OWNER}/cute_leaderboard"
|
13 |
+
RESULTS_REPO = f"{OWNER}/cute_results"
|
14 |
+
|
15 |
+
# If you setup a cache later, just change HF_HOME
|
16 |
+
CACHE_PATH=os.getenv("HF_HOME", ".")
|
17 |
+
|
18 |
+
# Local caches
|
19 |
+
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "results")
|
20 |
+
|
21 |
+
API = HfApi(token=TOKEN)
|