Spaces:
Sleeping
Sleeping
meg-huggingface
commited on
Commit
·
9bd728a
1
Parent(s):
9be7bfc
Trying toxigen download
Browse files- main_backend.py +5 -0
- src/backend/run_eval_suite.py +0 -2
- src/display/utils.py +10 -10
main_backend.py
CHANGED
@@ -2,6 +2,7 @@ import logging
|
|
2 |
import pprint
|
3 |
|
4 |
from huggingface_hub import snapshot_download
|
|
|
5 |
|
6 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
7 |
|
@@ -21,6 +22,10 @@ RUNNING_STATUS = "RUNNING"
|
|
21 |
FINISHED_STATUS = "FINISHED"
|
22 |
FAILED_STATUS = "FAILED"
|
23 |
|
|
|
|
|
|
|
|
|
24 |
print("Downloading snapshot from %s to %s" % (RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND))
|
25 |
snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", token=TOKEN, max_workers=60)
|
26 |
snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", token=TOKEN, max_workers=60)
|
|
|
2 |
import pprint
|
3 |
|
4 |
from huggingface_hub import snapshot_download
|
5 |
+
from datasets import load_dataset
|
6 |
|
7 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
8 |
|
|
|
22 |
FINISHED_STATUS = "FINISHED"
|
23 |
FAILED_STATUS = "FAILED"
|
24 |
|
25 |
+
print("JUST trying toxigen access...")
|
26 |
+
load_dataset("skg/toxigen-data", token=TOKEN)
|
27 |
+
print("Done.")
|
28 |
+
|
29 |
print("Downloading snapshot from %s to %s" % (RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND))
|
30 |
snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", token=TOKEN, max_workers=60)
|
31 |
snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", token=TOKEN, max_workers=60)
|
src/backend/run_eval_suite.py
CHANGED
@@ -2,7 +2,6 @@ import json
|
|
2 |
import os
|
3 |
import logging
|
4 |
from datetime import datetime
|
5 |
-
import spaces
|
6 |
|
7 |
from lm_eval import tasks, evaluator, utils
|
8 |
|
@@ -11,7 +10,6 @@ from src.backend.manage_requests import EvalRequest
|
|
11 |
|
12 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
13 |
|
14 |
-
@spaces.GPU
|
15 |
def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_size, device, local_dir: str, results_repo: str, no_cache=True, limit=None):
|
16 |
if limit:
|
17 |
print(
|
|
|
2 |
import os
|
3 |
import logging
|
4 |
from datetime import datetime
|
|
|
5 |
|
6 |
from lm_eval import tasks, evaluator, utils
|
7 |
|
|
|
10 |
|
11 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
12 |
|
|
|
13 |
def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_size, device, local_dir: str, results_repo: str, no_cache=True, limit=None):
|
14 |
if limit:
|
15 |
print(
|
src/display/utils.py
CHANGED
@@ -31,15 +31,15 @@ auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average
|
|
31 |
for task in Tasks:
|
32 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
33 |
# Model information
|
34 |
-
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
35 |
-
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
36 |
-
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
37 |
-
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
38 |
-
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
39 |
-
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
40 |
-
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
|
41 |
-
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
42 |
-
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
43 |
# Dummy column for the search bar (hidden by the custom CSS)
|
44 |
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
45 |
|
@@ -127,7 +127,7 @@ EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
|
127 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
128 |
|
129 |
NUMERIC_INTERVALS = {
|
130 |
-
"?": pd.Interval(-1, 0, closed="right"),
|
131 |
"~1.5": pd.Interval(0, 2, closed="right"),
|
132 |
"~3": pd.Interval(2, 4, closed="right"),
|
133 |
"~7": pd.Interval(4, 9, closed="right"),
|
|
|
31 |
for task in Tasks:
|
32 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
33 |
# Model information
|
34 |
+
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False, hidden=True)])
|
35 |
+
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False, hidden=True)])
|
36 |
+
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True, hidden=True)])
|
37 |
+
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False, hidden=True)])
|
38 |
+
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False, hidden=True)])
|
39 |
+
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False, hidden=True)])
|
40 |
+
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False, hidden=True)])
|
41 |
+
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False, hidden=True)])
|
42 |
+
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False, hidden=True)])
|
43 |
# Dummy column for the search bar (hidden by the custom CSS)
|
44 |
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
45 |
|
|
|
127 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
128 |
|
129 |
NUMERIC_INTERVALS = {
|
130 |
+
" ? ": pd.Interval(-1, 0, closed="right"),
|
131 |
"~1.5": pd.Interval(0, 2, closed="right"),
|
132 |
"~3": pd.Interval(2, 4, closed="right"),
|
133 |
"~7": pd.Interval(4, 9, closed="right"),
|