File size: 3,120 Bytes
6d6662e d30410b de0f093 d30410b 6d6662e d30410b 6d6662e d30410b 6d6662e d30410b 6d6662e d30410b 6d6662e d30410b 313c3d8 e3b30bf d30410b 6d6662e d30410b 6d6662e d30410b 6d6662e d30410b 6d6662e d30410b 6d6662e d30410b 6d6662e d30410b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import os
from constants import EVAL_REQUESTS_PATH
from pathlib import Path
from huggingface_hub import HfApi, Repository
TOKEN_HUB = os.environ.get("TOKEN_HUB", None)
QUEUE_REPO = os.environ.get("QUEUE_REPO", None)
QUEUE_PATH = os.environ.get("QUEUE_PATH", None)
hf_api = HfApi(
endpoint="https://huggingface.co.",
token=TOKEN_HUB,
)
# Language code for Persian
PERSIAN_LANGUAGE_CODE = "fa"
def load_all_info_from_dataset_hub():
eval_queue_repo = None
requested_models = []
if TOKEN_HUB is None:
raise ValueError("No Hugging Face token provided. Skipping evaluation requests and results.")
print("Pulling evaluation requests and results.")
# eval_queue_repo = Repository(
# local_dir=QUEUE_PATH,
# clone_from=QUEUE_REPO,
# use_auth_token=TOKEN_HUB,
# repo_type="dataset",
# )
# eval_queue_repo.git_pull()
# Local directory where dataset repo is cloned + folder with eval requests
directory = QUEUE_PATH / EVAL_REQUESTS_PATH
requested_models = get_all_requested_models(directory)
requested_models = [p.stem for p in requested_models]
# Local directory where dataset repo is cloned
csv_results = get_csv_with_results(QUEUE_PATH)
if csv_results is None:
raise ValueError("CSV results file not found.")
return eval_queue_repo, requested_models, csv_results
def upload_file(requested_model_name, path_or_fileobj):
dest_repo_file = Path(EVAL_REQUESTS_PATH) / path_or_fileobj.name
dest_repo_file = str(dest_repo_file)
hf_api.upload_file(
path_or_fileobj=path_or_fileobj,
path_in_repo=str(dest_repo_file),
repo_id=QUEUE_REPO,
token=TOKEN_HUB,
repo_type="dataset",
commit_message=f"Add {requested_model_name} to eval queue")
def get_all_requested_models(directory):
directory = Path(directory)
all_requested_models = list(directory.glob("*.txt"))
return all_requested_models
def get_csv_with_results(directory):
directory = Path(directory)
all_csv_files = list(directory.glob("*.csv"))
latest = [f for f in all_csv_files if f.stem.endswith("latest")]
if len(latest) != 1:
return None
return latest[0]
def is_model_on_hub(model_name, revision="main") -> (bool, str):
try:
model_name = model_name.replace(" ", "")
author = model_name.split("/")[0]
model_id = model_name.split("/")[1]
if len(author) == 0 or len(model_id) == 0:
return False, "is not a valid model name. Please use the format `author/model_name`."
except Exception as e:
return False, "is not a valid model name. Please use the format `author/model_name`."
try:
models = list(hf_api.list_models(author=author, search=model_id))
matched = [m.modelId for m in models if m.modelId == model_name]
if len(matched) != 1:
return False, "was not found on the hub!"
else:
return True, None
except Exception as e:
print(f"Could not get the model from the hub.: {e}")
return False, "was not found on hub!"
|