Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
CPU, TOKEN, env variables
#4
by
meg
HF staff
- opened
- app.py +2 -2
- main_backend.py +3 -3
- scripts/create_request_file.py +3 -5
- src/backend/manage_requests.py +3 -3
- src/display/utils.py +3 -0
- src/envs.py +1 -1
- src/submission/check_validity.py +1 -1
- src/submission/submit.py +1 -1
app.py
CHANGED
@@ -26,7 +26,7 @@ from src.display.utils import (
|
|
26 |
WeightType,
|
27 |
Precision
|
28 |
)
|
29 |
-
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
30 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
31 |
from src.submission.submit import add_new_eval
|
32 |
|
@@ -309,7 +309,7 @@ with demo:
|
|
309 |
choices=[i.value.name for i in Precision if i != Precision.Unknown],
|
310 |
label="Precision",
|
311 |
multiselect=False,
|
312 |
-
value="float16",
|
313 |
interactive=True,
|
314 |
)
|
315 |
weight_type = gr.Dropdown(
|
|
|
26 |
WeightType,
|
27 |
Precision
|
28 |
)
|
29 |
+
from src.envs import API, DEVICE, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
30 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
31 |
from src.submission.submit import add_new_eval
|
32 |
|
|
|
309 |
choices=[i.value.name for i in Precision if i != Precision.Unknown],
|
310 |
label="Precision",
|
311 |
multiselect=False,
|
312 |
+
value="float16" if DEVICE != "cpu" else "float32",
|
313 |
interactive=True,
|
314 |
)
|
315 |
weight_type = gr.Dropdown(
|
main_backend.py
CHANGED
@@ -9,7 +9,7 @@ from src.backend.run_eval_suite import run_evaluation
|
|
9 |
from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
|
10 |
from src.backend.sort_queue import sort_models_by_priority
|
11 |
|
12 |
-
from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, DEVICE, API, LIMIT
|
13 |
from src.about import Tasks, NUM_FEWSHOT
|
14 |
TASKS_HARNESS = [task.value.benchmark for task in Tasks]
|
15 |
|
@@ -21,8 +21,8 @@ RUNNING_STATUS = "RUNNING"
|
|
21 |
FINISHED_STATUS = "FINISHED"
|
22 |
FAILED_STATUS = "FAILED"
|
23 |
|
24 |
-
snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
|
25 |
-
snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
|
26 |
|
27 |
def run_auto_eval():
|
28 |
current_pending_status = [PENDING_STATUS]
|
|
|
9 |
from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
|
10 |
from src.backend.sort_queue import sort_models_by_priority
|
11 |
|
12 |
+
from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, DEVICE, API, LIMIT, TOKEN
|
13 |
from src.about import Tasks, NUM_FEWSHOT
|
14 |
TASKS_HARNESS = [task.value.benchmark for task in Tasks]
|
15 |
|
|
|
21 |
FINISHED_STATUS = "FINISHED"
|
22 |
FAILED_STATUS = "FAILED"
|
23 |
|
24 |
+
snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
|
25 |
+
snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
|
26 |
|
27 |
def run_auto_eval():
|
28 |
current_pending_status = [PENDING_STATUS]
|
scripts/create_request_file.py
CHANGED
@@ -7,11 +7,9 @@ from datetime import datetime, timezone
|
|
7 |
import click
|
8 |
from colorama import Fore
|
9 |
from huggingface_hub import HfApi, snapshot_download
|
|
|
10 |
|
11 |
-
|
12 |
-
QUEUE_REPO = "open-llm-leaderboard/requests"
|
13 |
-
|
14 |
-
precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ")
|
15 |
model_types = ("pretrained", "fine-tuned", "RL-tuned", "instruction-tuned")
|
16 |
weight_types = ("Original", "Delta", "Adapter")
|
17 |
|
@@ -36,7 +34,7 @@ def get_model_size(model_info, precision: str):
|
|
36 |
def main():
|
37 |
api = HfApi()
|
38 |
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
39 |
-
snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH, repo_type="dataset")
|
40 |
|
41 |
model_name = click.prompt("Enter model name")
|
42 |
revision = click.prompt("Enter revision", default="main")
|
|
|
7 |
import click
|
8 |
from colorama import Fore
|
9 |
from huggingface_hub import HfApi, snapshot_download
|
10 |
+
from src.envs import TOKEN, EVAL_REQUESTS_PATH, QUEUE_REPO
|
11 |
|
12 |
+
precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ", "float32")
|
|
|
|
|
|
|
13 |
model_types = ("pretrained", "fine-tuned", "RL-tuned", "instruction-tuned")
|
14 |
weight_types = ("Original", "Delta", "Adapter")
|
15 |
|
|
|
34 |
def main():
|
35 |
api = HfApi()
|
36 |
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
37 |
+
snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", token=TOKEN)
|
38 |
|
39 |
model_name = click.prompt("Enter model name")
|
40 |
revision = click.prompt("Enter revision", default="main")
|
src/backend/manage_requests.py
CHANGED
@@ -26,7 +26,7 @@ class EvalRequest:
|
|
26 |
def get_model_args(self):
|
27 |
model_args = f"pretrained={self.model},revision={self.revision}"
|
28 |
|
29 |
-
if self.precision in ["float16", "bfloat16"]:
|
30 |
model_args += f",dtype={self.precision}"
|
31 |
# Quantized models need some added config, the install of bits and bytes, etc
|
32 |
#elif self.precision == "8bit":
|
@@ -71,7 +71,7 @@ def get_eval_requests(job_status: list, local_dir: str, hf_repo: str) -> list[Ev
|
|
71 |
Returns:
|
72 |
`list[EvalRequest]`: a list of model info dicts.
|
73 |
"""
|
74 |
-
snapshot_download(repo_id=hf_repo, revision="main", local_dir=local_dir, repo_type="dataset", max_workers=60)
|
75 |
json_files = glob.glob(f"{local_dir}/**/*.json", recursive=True)
|
76 |
|
77 |
eval_requests = []
|
@@ -97,7 +97,7 @@ def check_completed_evals(
|
|
97 |
local_dir_results: str,
|
98 |
):
|
99 |
"""Checks if the currently running evals are completed, if yes, update their status on the hub."""
|
100 |
-
snapshot_download(repo_id=hf_repo_results, revision="main", local_dir=local_dir_results, repo_type="dataset", max_workers=60)
|
101 |
|
102 |
running_evals = get_eval_requests(checked_status, hf_repo=hf_repo, local_dir=local_dir)
|
103 |
|
|
|
26 |
def get_model_args(self):
|
27 |
model_args = f"pretrained={self.model},revision={self.revision}"
|
28 |
|
29 |
+
if self.precision in ["float16", "bfloat16", "float32"]:
|
30 |
model_args += f",dtype={self.precision}"
|
31 |
# Quantized models need some added config, the install of bits and bytes, etc
|
32 |
#elif self.precision == "8bit":
|
|
|
71 |
Returns:
|
72 |
`list[EvalRequest]`: a list of model info dicts.
|
73 |
"""
|
74 |
+
snapshot_download(repo_id=hf_repo, revision="main", local_dir=local_dir, repo_type="dataset", max_workers=60, token=TOKEN)
|
75 |
json_files = glob.glob(f"{local_dir}/**/*.json", recursive=True)
|
76 |
|
77 |
eval_requests = []
|
|
|
97 |
local_dir_results: str,
|
98 |
):
|
99 |
"""Checks if the currently running evals are completed, if yes, update their status on the hub."""
|
100 |
+
snapshot_download(repo_id=hf_repo_results, revision="main", local_dir=local_dir_results, repo_type="dataset", max_workers=60, token=TOKEN)
|
101 |
|
102 |
running_evals = get_eval_requests(checked_status, hf_repo=hf_repo, local_dir=local_dir)
|
103 |
|
src/display/utils.py
CHANGED
@@ -94,6 +94,7 @@ class WeightType(Enum):
|
|
94 |
class Precision(Enum):
|
95 |
float16 = ModelDetails("float16")
|
96 |
bfloat16 = ModelDetails("bfloat16")
|
|
|
97 |
#qt_8bit = ModelDetails("8bit")
|
98 |
#qt_4bit = ModelDetails("4bit")
|
99 |
#qt_GPTQ = ModelDetails("GPTQ")
|
@@ -104,6 +105,8 @@ class Precision(Enum):
|
|
104 |
return Precision.float16
|
105 |
if precision in ["torch.bfloat16", "bfloat16"]:
|
106 |
return Precision.bfloat16
|
|
|
|
|
107 |
#if precision in ["8bit"]:
|
108 |
# return Precision.qt_8bit
|
109 |
#if precision in ["4bit"]:
|
|
|
94 |
class Precision(Enum):
|
95 |
float16 = ModelDetails("float16")
|
96 |
bfloat16 = ModelDetails("bfloat16")
|
97 |
+
float32 = ModelDetails("float32")
|
98 |
#qt_8bit = ModelDetails("8bit")
|
99 |
#qt_4bit = ModelDetails("4bit")
|
100 |
#qt_GPTQ = ModelDetails("GPTQ")
|
|
|
105 |
return Precision.float16
|
106 |
if precision in ["torch.bfloat16", "bfloat16"]:
|
107 |
return Precision.bfloat16
|
108 |
+
if precision in ["float32"]:
|
109 |
+
return Precision.float32
|
110 |
#if precision in ["8bit"]:
|
111 |
# return Precision.qt_8bit
|
112 |
#if precision in ["4bit"]:
|
src/envs.py
CHANGED
@@ -7,7 +7,7 @@ from huggingface_hub import HfApi
|
|
7 |
TOKEN = os.environ.get("TOKEN") # A read/write token for your org
|
8 |
|
9 |
OWNER = "demo-leaderboard-backend" # Change to your org - don't forget to create a results and request file
|
10 |
-
DEVICE = "cpu" # cuda:0 if you add compute
|
11 |
LIMIT = 20 # !!!! Should be None for actual evaluations!!!
|
12 |
# ----------------------------------
|
13 |
|
|
|
7 |
TOKEN = os.environ.get("TOKEN") # A read/write token for your org
|
8 |
|
9 |
OWNER = "demo-leaderboard-backend" # Change to your org - don't forget to create a results and request file
|
10 |
+
DEVICE = "cpu" # "cuda:0" if you add compute
|
11 |
LIMIT = 20 # !!!! Should be None for actual evaluations!!!
|
12 |
# ----------------------------------
|
13 |
|
src/submission/check_validity.py
CHANGED
@@ -8,7 +8,7 @@ import huggingface_hub
|
|
8 |
from huggingface_hub import ModelCard
|
9 |
from huggingface_hub.hf_api import ModelInfo
|
10 |
from transformers import AutoConfig
|
11 |
-
from transformers.models.auto.tokenization_auto import
|
12 |
|
13 |
def check_model_card(repo_id: str) -> tuple[bool, str]:
|
14 |
"""Checks if the model card and license exist and have been filled"""
|
|
|
8 |
from huggingface_hub import ModelCard
|
9 |
from huggingface_hub.hf_api import ModelInfo
|
10 |
from transformers import AutoConfig
|
11 |
+
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
12 |
|
13 |
def check_model_card(repo_id: str) -> tuple[bool, str]:
|
14 |
"""Checks if the model card and license exist and have been filled"""
|
src/submission/submit.py
CHANGED
@@ -50,7 +50,7 @@ def add_new_eval(
|
|
50 |
return styled_error(f'Base model "{base_model}" {error}')
|
51 |
|
52 |
if not weight_type == "Adapter":
|
53 |
-
model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
|
54 |
if not model_on_hub:
|
55 |
return styled_error(f'Model "{model}" {error}')
|
56 |
|
|
|
50 |
return styled_error(f'Base model "{base_model}" {error}')
|
51 |
|
52 |
if not weight_type == "Adapter":
|
53 |
+
model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
|
54 |
if not model_on_hub:
|
55 |
return styled_error(f'Model "{model}" {error}')
|
56 |
|