Spaces:
Running
Running
File size: 4,831 Bytes
056eba8 8dde322 2c89e68 056eba8 aaa5b50 056eba8 2c89e68 056eba8 8dde322 056eba8 aaa5b50 056eba8 8dde322 056eba8 8dde322 056eba8 7a7f4b5 056eba8 c08b28d 056eba8 8dde322 7a7f4b5 8dde322 056eba8 2c89e68 056eba8 c08b28d 056eba8 2c89e68 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import re
import os
import glob
import json
import os
from typing import List
from tqdm import tqdm
from src.utils_display import AutoEvalColumn, model_hyperlink
from src.auto_leaderboard.model_metadata_type import ModelType, model_type_from_str, MODEL_TYPE_METADATA
from src.auto_leaderboard.model_metadata_flags import FLAGGED_MODELS
from huggingface_hub import HfApi
import huggingface_hub
api = HfApi(token=os.environ.get("H4_TOKEN", None))
def get_model_infos_from_hub(leaderboard_data: List[dict]):
for model_data in tqdm(leaderboard_data):
model_name = model_data["model_name_for_query"]
try:
model_info = api.model_info(model_name)
except huggingface_hub.utils._errors.RepositoryNotFoundError:
print("Repo not found!", model_name)
model_data[AutoEvalColumn.license.name] = None
model_data[AutoEvalColumn.likes.name] = None
model_data[AutoEvalColumn.params.name] = get_model_size(model_name, None)
continue
model_data[AutoEvalColumn.license.name] = get_model_license(model_info)
model_data[AutoEvalColumn.likes.name] = get_model_likes(model_info)
model_data[AutoEvalColumn.params.name] = get_model_size(model_name, model_info)
def get_model_license(model_info):
try:
return model_info.cardData["license"]
except Exception:
return None
def get_model_likes(model_info):
return model_info.likes
size_pattern = re.compile(r"(\d\.)?\d+(b|m)")
def get_model_size(model_name, model_info):
# In billions
try:
return round(model_info.safetensors["total"] / 1e9, 3)
except AttributeError:
try:
size_match = re.search(size_pattern, model_name.lower())
size = size_match.group(0)
return round(float(size[:-1]) if size[-1] == "b" else float(size[:-1]) / 1e3, 3)
except AttributeError:
return None
def get_model_type(leaderboard_data: List[dict]):
for model_data in leaderboard_data:
request_files = os.path.join("eval-queue", model_data["model_name_for_query"] + "_eval_request_*" + ".json")
request_files = glob.glob(request_files)
# Select correct request file (precision)
request_file = ""
if len(request_files) == 1:
request_file = request_files[0]
elif len(request_files) > 1:
request_files = sorted(request_files, reverse=True)
for tmp_request_file in request_files:
with open(tmp_request_file, "r") as f:
req_content = json.load(f)
if req_content["status"] == "FINISHED" and req_content["precision"] == model_data["Precision"].split(".")[-1]:
request_file = tmp_request_file
if request_file == "":
model_data[AutoEvalColumn.model_type.name] = ""
model_data[AutoEvalColumn.model_type_symbol.name] = ""
continue
try:
with open(request_file, "r") as f:
request = json.load(f)
is_delta = request["weight_type"] != "Original"
except Exception:
is_delta = False
try:
with open(request_file, "r") as f:
request = json.load(f)
model_type = model_type_from_str(request["model_type"])
model_data[AutoEvalColumn.model_type.name] = model_type.value.name
model_data[AutoEvalColumn.model_type_symbol.name] = model_type.value.symbol #+ ("🔺" if is_delta else "")
except KeyError:
if model_data["model_name_for_query"] in MODEL_TYPE_METADATA:
model_data[AutoEvalColumn.model_type.name] = MODEL_TYPE_METADATA[model_data["model_name_for_query"]].value.name
model_data[AutoEvalColumn.model_type_symbol.name] = MODEL_TYPE_METADATA[model_data["model_name_for_query"]].value.symbol #+ ("🔺" if is_delta else "")
else:
model_data[AutoEvalColumn.model_type.name] = ModelType.Unknown.value.name
model_data[AutoEvalColumn.model_type_symbol.name] = ModelType.Unknown.value.symbol
def flag_models(leaderboard_data:List[dict]):
for model_data in leaderboard_data:
if model_data["model_name_for_query"] in FLAGGED_MODELS:
issue_num = FLAGGED_MODELS[model_data["model_name_for_query"]].split("/")[-1]
issue_link = model_hyperlink(FLAGGED_MODELS[model_data["model_name_for_query"]], f"See discussion #{issue_num}")
model_data[AutoEvalColumn.model.name] = f"{model_data[AutoEvalColumn.model.name]} has been flagged! {issue_link}"
def apply_metadata(leaderboard_data: List[dict]):
get_model_type(leaderboard_data)
get_model_infos_from_hub(leaderboard_data)
flag_models(leaderboard_data)
|