Spaces:
Runtime error
Runtime error
chaeeunlee
commited on
Commit
·
5402045
1
Parent(s):
447928c
fixed local HF_HOME
Browse files- .DS_Store +0 -0
- app.py +3 -0
- hub/.locks/datasets--chaeeunlee--test_requests/28df5f900b358436f0267334b3e3e9af33f917ba.lock +0 -0
- hub/.locks/datasets--chaeeunlee--test_requests/32897cd3e640101ba184f8c4ccd896981de3804a.lock +0 -0
- hub/.locks/datasets--chaeeunlee--test_results/28df5f900b358436f0267334b3e3e9af33f917ba.lock +0 -0
- hub/.locks/datasets--chaeeunlee--test_results/32897cd3e640101ba184f8c4ccd896981de3804a.lock +0 -0
- hub/.locks/models--EleutherAI--pythia-70m/0204ed10c186a4c7c68f55dff8f26087a45898d6.lock +0 -0
- hub/.locks/models--EleutherAI--pythia-70m/d7a9196e329eaf06d6e2802fed376e7459834236.lock +0 -0
- hub/.locks/models--EleutherAI--pythia-70m/df0253c0ab197de15c12fa7fbb7edcca9b6848a3.lock +0 -0
- hub/.locks/models--EleutherAI--pythia-70m/f1860edb10f80bcaf7b023fce47c68a23b724c23.lock +0 -0
- hub/.locks/models--EleutherAI--pythia-70m/f74dfbfab8f97770a87769c739fb080c21c8bacc.lock +0 -0
- hub/datasets--chaeeunlee--test_requests/blobs/28df5f900b358436f0267334b3e3e9af33f917ba +55 -0
- hub/datasets--chaeeunlee--test_requests/blobs/32897cd3e640101ba184f8c4ccd896981de3804a +3 -0
- hub/datasets--chaeeunlee--test_requests/refs/main +1 -0
- hub/datasets--chaeeunlee--test_results/blobs/28df5f900b358436f0267334b3e3e9af33f917ba +55 -0
- hub/datasets--chaeeunlee--test_results/blobs/32897cd3e640101ba184f8c4ccd896981de3804a +3 -0
- hub/datasets--chaeeunlee--test_results/refs/main +1 -0
- hub/models--EleutherAI--pythia-70m/.no_exist/a39f36b100fe8a5377810d56c3f4789b9c53ac42/added_tokens.json +0 -0
- hub/models--EleutherAI--pythia-70m/.no_exist/a39f36b100fe8a5377810d56c3f4789b9c53ac42/merges.txt +0 -0
- hub/models--EleutherAI--pythia-70m/.no_exist/a39f36b100fe8a5377810d56c3f4789b9c53ac42/vocab.json +0 -0
- hub/models--EleutherAI--pythia-70m/blobs/0204ed10c186a4c7c68f55dff8f26087a45898d6 +5 -0
- hub/models--EleutherAI--pythia-70m/blobs/d7a9196e329eaf06d6e2802fed376e7459834236 +24 -0
- hub/models--EleutherAI--pythia-70m/blobs/df0253c0ab197de15c12fa7fbb7edcca9b6848a3 +294 -0
- hub/models--EleutherAI--pythia-70m/blobs/f1860edb10f80bcaf7b023fce47c68a23b724c23 +9 -0
- hub/models--EleutherAI--pythia-70m/blobs/f74dfbfab8f97770a87769c739fb080c21c8bacc +0 -0
- hub/models--EleutherAI--pythia-70m/refs/main +1 -0
- hub/models--EleutherAI--pythia-70m/snapshots/a39f36b100fe8a5377810d56c3f4789b9c53ac42/README.md +1 -0
- hub/models--EleutherAI--pythia-70m/snapshots/a39f36b100fe8a5377810d56c3f4789b9c53ac42/config.json +1 -0
- hub/models--EleutherAI--pythia-70m/snapshots/a39f36b100fe8a5377810d56c3f4789b9c53ac42/special_tokens_map.json +1 -0
- hub/models--EleutherAI--pythia-70m/snapshots/a39f36b100fe8a5377810d56c3f4789b9c53ac42/tokenizer.json +1 -0
- hub/models--EleutherAI--pythia-70m/snapshots/a39f36b100fe8a5377810d56c3f4789b9c53ac42/tokenizer_config.json +1 -0
- hub/version.txt +1 -0
- manage_repos.ipynb +28 -20
- src/.DS_Store +0 -0
- src/backend/.DS_Store +0 -0
- src/backend/envs.py +4 -2
- src/backend/tasks/.DS_Store +0 -0
- src/backend/tasks/medmcqa/.DS_Store +0 -0
- src/backend/tasks/medmcqa/medmcqa.yaml +18 -0
- src/backend/tasks/medmcqa/utils_medmcqa.py +19 -0
- src/backend/tasks/medqa/.DS_Store +0 -0
- src/backend/tasks/medqa/medqa.yaml +16 -0
- src/backend/tasks/medqa/preprocess_medqa.py +8 -0
- src/display/__pycache__/utils.cpython-310.pyc +0 -0
- src/display/utils.py +5 -2
- src/populate.py +2 -1
- src/submission/__pycache__/submit.cpython-310.pyc +0 -0
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
app.py
CHANGED
@@ -124,9 +124,12 @@ def filter_models(df: pd.DataFrame, type_query: list, size_query: list, precisio
|
|
124 |
ui_snapshot_download(repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30)
|
125 |
ui_snapshot_download(repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30)
|
126 |
|
|
|
|
|
127 |
|
128 |
raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS) # k the problem is that the results are only saved in _bk dirs.
|
129 |
leaderboard_df = original_df.copy()
|
|
|
130 |
|
131 |
|
132 |
################################################################################################################################
|
|
|
124 |
ui_snapshot_download(repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30)
|
125 |
ui_snapshot_download(repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30)
|
126 |
|
127 |
+
print(f"COLS = {COLS}")
|
128 |
+
|
129 |
|
130 |
raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS) # k the problem is that the results are only saved in _bk dirs.
|
131 |
leaderboard_df = original_df.copy()
|
132 |
+
print(f"leaderboard_df = {leaderboard_df}")
|
133 |
|
134 |
|
135 |
################################################################################################################################
|
hub/.locks/datasets--chaeeunlee--test_requests/28df5f900b358436f0267334b3e3e9af33f917ba.lock
ADDED
File without changes
|
hub/.locks/datasets--chaeeunlee--test_requests/32897cd3e640101ba184f8c4ccd896981de3804a.lock
ADDED
File without changes
|
hub/.locks/datasets--chaeeunlee--test_results/28df5f900b358436f0267334b3e3e9af33f917ba.lock
ADDED
File without changes
|
hub/.locks/datasets--chaeeunlee--test_results/32897cd3e640101ba184f8c4ccd896981de3804a.lock
ADDED
File without changes
|
hub/.locks/models--EleutherAI--pythia-70m/0204ed10c186a4c7c68f55dff8f26087a45898d6.lock
ADDED
File without changes
|
hub/.locks/models--EleutherAI--pythia-70m/d7a9196e329eaf06d6e2802fed376e7459834236.lock
ADDED
File without changes
|
hub/.locks/models--EleutherAI--pythia-70m/df0253c0ab197de15c12fa7fbb7edcca9b6848a3.lock
ADDED
File without changes
|
hub/.locks/models--EleutherAI--pythia-70m/f1860edb10f80bcaf7b023fce47c68a23b724c23.lock
ADDED
File without changes
|
hub/.locks/models--EleutherAI--pythia-70m/f74dfbfab8f97770a87769c739fb080c21c8bacc.lock
ADDED
File without changes
|
hub/datasets--chaeeunlee--test_requests/blobs/28df5f900b358436f0267334b3e3e9af33f917ba
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.lz4 filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
26 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
27 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
37 |
+
# Audio files - uncompressed
|
38 |
+
*.pcm filter=lfs diff=lfs merge=lfs -text
|
39 |
+
*.sam filter=lfs diff=lfs merge=lfs -text
|
40 |
+
*.raw filter=lfs diff=lfs merge=lfs -text
|
41 |
+
# Audio files - compressed
|
42 |
+
*.aac filter=lfs diff=lfs merge=lfs -text
|
43 |
+
*.flac filter=lfs diff=lfs merge=lfs -text
|
44 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
45 |
+
*.ogg filter=lfs diff=lfs merge=lfs -text
|
46 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
47 |
+
# Image files - uncompressed
|
48 |
+
*.bmp filter=lfs diff=lfs merge=lfs -text
|
49 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
50 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
51 |
+
*.tiff filter=lfs diff=lfs merge=lfs -text
|
52 |
+
# Image files - compressed
|
53 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
54 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
55 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
hub/datasets--chaeeunlee--test_requests/blobs/32897cd3e640101ba184f8c4ccd896981de3804a
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
---
|
hub/datasets--chaeeunlee--test_requests/refs/main
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
7d6de7e1844e0f6535382e4509843534d6f043bb
|
hub/datasets--chaeeunlee--test_results/blobs/28df5f900b358436f0267334b3e3e9af33f917ba
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.lz4 filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
26 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
27 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
37 |
+
# Audio files - uncompressed
|
38 |
+
*.pcm filter=lfs diff=lfs merge=lfs -text
|
39 |
+
*.sam filter=lfs diff=lfs merge=lfs -text
|
40 |
+
*.raw filter=lfs diff=lfs merge=lfs -text
|
41 |
+
# Audio files - compressed
|
42 |
+
*.aac filter=lfs diff=lfs merge=lfs -text
|
43 |
+
*.flac filter=lfs diff=lfs merge=lfs -text
|
44 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
45 |
+
*.ogg filter=lfs diff=lfs merge=lfs -text
|
46 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
47 |
+
# Image files - uncompressed
|
48 |
+
*.bmp filter=lfs diff=lfs merge=lfs -text
|
49 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
50 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
51 |
+
*.tiff filter=lfs diff=lfs merge=lfs -text
|
52 |
+
# Image files - compressed
|
53 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
54 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
55 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
hub/datasets--chaeeunlee--test_results/blobs/32897cd3e640101ba184f8c4ccd896981de3804a
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
---
|
hub/datasets--chaeeunlee--test_results/refs/main
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
da979ab65e0398923653d6a75c5eddd5f7c7c098
|
hub/models--EleutherAI--pythia-70m/.no_exist/a39f36b100fe8a5377810d56c3f4789b9c53ac42/added_tokens.json
ADDED
File without changes
|
hub/models--EleutherAI--pythia-70m/.no_exist/a39f36b100fe8a5377810d56c3f4789b9c53ac42/merges.txt
ADDED
File without changes
|
hub/models--EleutherAI--pythia-70m/.no_exist/a39f36b100fe8a5377810d56c3f4789b9c53ac42/vocab.json
ADDED
File without changes
|
hub/models--EleutherAI--pythia-70m/blobs/0204ed10c186a4c7c68f55dff8f26087a45898d6
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<|endoftext|>",
|
3 |
+
"eos_token": "<|endoftext|>",
|
4 |
+
"unk_token": "<|endoftext|>"
|
5 |
+
}
|
hub/models--EleutherAI--pythia-70m/blobs/d7a9196e329eaf06d6e2802fed376e7459834236
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"GPTNeoXForCausalLM"
|
4 |
+
],
|
5 |
+
"bos_token_id": 0,
|
6 |
+
"eos_token_id": 0,
|
7 |
+
"hidden_act": "gelu",
|
8 |
+
"hidden_size": 512,
|
9 |
+
"initializer_range": 0.02,
|
10 |
+
"intermediate_size": 2048,
|
11 |
+
"layer_norm_eps": 1e-05,
|
12 |
+
"max_position_embeddings": 2048,
|
13 |
+
"model_type": "gpt_neox",
|
14 |
+
"num_attention_heads": 8,
|
15 |
+
"num_hidden_layers": 6,
|
16 |
+
"rotary_emb_base": 10000,
|
17 |
+
"rotary_pct": 0.25,
|
18 |
+
"tie_word_embeddings": false,
|
19 |
+
"torch_dtype": "float16",
|
20 |
+
"transformers_version": "4.24.0",
|
21 |
+
"use_cache": true,
|
22 |
+
"use_parallel_residual": true,
|
23 |
+
"vocab_size": 50304
|
24 |
+
}
|
hub/models--EleutherAI--pythia-70m/blobs/df0253c0ab197de15c12fa7fbb7edcca9b6848a3
ADDED
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
tags:
|
5 |
+
- pytorch
|
6 |
+
- causal-lm
|
7 |
+
- pythia
|
8 |
+
license: apache-2.0
|
9 |
+
datasets:
|
10 |
+
- EleutherAI/pile
|
11 |
+
library_name: gpt-neox
|
12 |
+
---
|
13 |
+
|
14 |
+
The *Pythia Scaling Suite* is a collection of models developed to facilitate
|
15 |
+
interpretability research [(see paper)](https://arxiv.org/pdf/2304.01373.pdf).
|
16 |
+
It contains two sets of eight models of sizes
|
17 |
+
70M, 160M, 410M, 1B, 1.4B, 2.8B, 6.9B, and 12B. For each size, there are two
|
18 |
+
models: one trained on the Pile, and one trained on the Pile after the dataset
|
19 |
+
has been globally deduplicated. All 8 model sizes are trained on the exact
|
20 |
+
same data, in the exact same order. We also provide 154 intermediate
|
21 |
+
checkpoints per model, hosted on Hugging Face as branches.
|
22 |
+
|
23 |
+
The Pythia model suite was deliberately designed to promote scientific
|
24 |
+
research on large language models, especially interpretability research.
|
25 |
+
Despite not centering downstream performance as a design goal, we find the
|
26 |
+
models <a href="#evaluations">match or exceed</a> the performance of
|
27 |
+
similar and same-sized models, such as those in the OPT and GPT-Neo suites.
|
28 |
+
|
29 |
+
<details>
|
30 |
+
<summary style="font-weight:600">Details on previous early release and naming convention.</summary>
|
31 |
+
|
32 |
+
Previously, we released an early version of the Pythia suite to the public.
|
33 |
+
However, we decided to retrain the model suite to address a few hyperparameter
|
34 |
+
discrepancies. This model card <a href="#changelog">lists the changes</a>;
|
35 |
+
see appendix B in the Pythia paper for further discussion. We found no
|
36 |
+
difference in benchmark performance between the two Pythia versions.
|
37 |
+
The old models are
|
38 |
+
[still available](https://huggingface.co/models?other=pythia_v0), but we
|
39 |
+
suggest the retrained suite if you are just starting to use Pythia.<br>
|
40 |
+
**This is the current release.**
|
41 |
+
|
42 |
+
Please note that all models in the *Pythia* suite were renamed in January
|
43 |
+
2023. For clarity, a <a href="#naming-convention-and-parameter-count">table
|
44 |
+
comparing the old and new names</a> is provided in this model card, together
|
45 |
+
with exact parameter counts.
|
46 |
+
</details>
|
47 |
+
<br>
|
48 |
+
|
49 |
+
# Pythia-70M
|
50 |
+
|
51 |
+
## Model Details
|
52 |
+
|
53 |
+
- Developed by: [EleutherAI](http://eleuther.ai)
|
54 |
+
- Model type: Transformer-based Language Model
|
55 |
+
- Language: English
|
56 |
+
- Learn more: [Pythia's GitHub repository](https://github.com/EleutherAI/pythia)
|
57 |
+
for training procedure, config files, and details on how to use.
|
58 |
+
[See paper](https://arxiv.org/pdf/2304.01373.pdf) for more evals and implementation
|
59 |
+
details.
|
60 |
+
- Library: [GPT-NeoX](https://github.com/EleutherAI/gpt-neox)
|
61 |
+
- License: Apache 2.0
|
62 |
+
- Contact: to ask questions about this model, join the [EleutherAI
|
63 |
+
Discord](https://discord.gg/zBGx3azzUn), and post them in `#release-discussion`.
|
64 |
+
Please read the existing *Pythia* documentation before asking about it in the
|
65 |
+
EleutherAI Discord. For general correspondence: [contact@eleuther.
|
66 |
+
ai](mailto:[email protected]).
|
67 |
+
|
68 |
+
<figure>
|
69 |
+
|
70 |
+
| Pythia model | Non-Embedding Params | Layers | Model Dim | Heads | Batch Size | Learning Rate | Equivalent Models |
|
71 |
+
| -----------: | -------------------: | :----: | :-------: | :---: | :--------: | :-------------------: | :--------------------: |
|
72 |
+
| 70M | 18,915,328 | 6 | 512 | 8 | 2M | 1.0 x 10<sup>-3</sup> | — |
|
73 |
+
| 160M | 85,056,000 | 12 | 768 | 12 | 2M | 6.0 x 10<sup>-4</sup> | GPT-Neo 125M, OPT-125M |
|
74 |
+
| 410M | 302,311,424 | 24 | 1024 | 16 | 2M | 3.0 x 10<sup>-4</sup> | OPT-350M |
|
75 |
+
| 1.0B | 805,736,448 | 16 | 2048 | 8 | 2M | 3.0 x 10<sup>-4</sup> | — |
|
76 |
+
| 1.4B | 1,208,602,624 | 24 | 2048 | 16 | 2M | 2.0 x 10<sup>-4</sup> | GPT-Neo 1.3B, OPT-1.3B |
|
77 |
+
| 2.8B | 2,517,652,480 | 32 | 2560 | 32 | 2M | 1.6 x 10<sup>-4</sup> | GPT-Neo 2.7B, OPT-2.7B |
|
78 |
+
| 6.9B | 6,444,163,072 | 32 | 4096 | 32 | 2M | 1.2 x 10<sup>-4</sup> | OPT-6.7B |
|
79 |
+
| 12B | 11,327,027,200 | 36 | 5120 | 40 | 2M | 1.2 x 10<sup>-4</sup> | — |
|
80 |
+
<figcaption>Engineering details for the <i>Pythia Suite</i>. Deduped and
|
81 |
+
non-deduped models of a given size have the same hyperparameters. “Equivalent”
|
82 |
+
models have <b>exactly</b> the same architecture, and the same number of
|
83 |
+
non-embedding parameters.</figcaption>
|
84 |
+
</figure>
|
85 |
+
|
86 |
+
## Uses and Limitations
|
87 |
+
|
88 |
+
### Intended Use
|
89 |
+
|
90 |
+
The primary intended use of Pythia is research on the behavior, functionality,
|
91 |
+
and limitations of large language models. This suite is intended to provide
|
92 |
+
a controlled setting for performing scientific experiments. We also provide
|
93 |
+
154 checkpoints per model: initial `step0`, 10 log-spaced checkpoints
|
94 |
+
`step{1,2,4...512}`, and 143 evenly-spaced checkpoints from `step1000` to
|
95 |
+
`step143000`. These checkpoints are hosted on Hugging Face as branches. Note
|
96 |
+
that branch `143000` corresponds exactly to the model checkpoint on the `main`
|
97 |
+
branch of each model.
|
98 |
+
|
99 |
+
You may also further fine-tune and adapt Pythia-70M for deployment,
|
100 |
+
as long as your use is in accordance with the Apache 2.0 license. Pythia
|
101 |
+
models work with the Hugging Face [Transformers
|
102 |
+
Library](https://huggingface.co/docs/transformers/index). If you decide to use
|
103 |
+
pre-trained Pythia-70M as a basis for your fine-tuned model, please
|
104 |
+
conduct your own risk and bias assessment.
|
105 |
+
|
106 |
+
### Out-of-scope use
|
107 |
+
|
108 |
+
The Pythia Suite is **not** intended for deployment. It is not a in itself
|
109 |
+
a product and cannot be used for human-facing interactions. For example,
|
110 |
+
the model may generate harmful or offensive text. Please evaluate the risks
|
111 |
+
associated with your particular use case.
|
112 |
+
|
113 |
+
Pythia models are English-language only, and are not suitable for translation
|
114 |
+
or generating text in other languages.
|
115 |
+
|
116 |
+
Pythia-70M has not been fine-tuned for downstream contexts in which
|
117 |
+
language models are commonly deployed, such as writing genre prose,
|
118 |
+
or commercial chatbots. This means Pythia-70M will **not**
|
119 |
+
respond to a given prompt the way a product like ChatGPT does. This is because,
|
120 |
+
unlike this model, ChatGPT was fine-tuned using methods such as Reinforcement
|
121 |
+
Learning from Human Feedback (RLHF) to better “follow” human instructions.
|
122 |
+
|
123 |
+
### Limitations and biases
|
124 |
+
|
125 |
+
The core functionality of a large language model is to take a string of text
|
126 |
+
and predict the next token. The token used by the model need not produce the
|
127 |
+
most “accurate” text. Never rely on Pythia-70M to produce factually accurate
|
128 |
+
output.
|
129 |
+
|
130 |
+
This model was trained on [the Pile](https://pile.eleuther.ai/), a dataset
|
131 |
+
known to contain profanity and texts that are lewd or otherwise offensive.
|
132 |
+
See [Section 6 of the Pile paper](https://arxiv.org/abs/2101.00027) for a
|
133 |
+
discussion of documented biases with regards to gender, religion, and race.
|
134 |
+
Pythia-70M may produce socially unacceptable or undesirable text, *even if*
|
135 |
+
the prompt itself does not include anything explicitly offensive.
|
136 |
+
|
137 |
+
If you plan on using text generated through, for example, the Hosted Inference
|
138 |
+
API, we recommend having a human curate the outputs of this language model
|
139 |
+
before presenting it to other people. Please inform your audience that the
|
140 |
+
text was generated by Pythia-70M.
|
141 |
+
|
142 |
+
### Quickstart
|
143 |
+
|
144 |
+
Pythia models can be loaded and used via the following code, demonstrated here
|
145 |
+
for the third `pythia-70m-deduped` checkpoint:
|
146 |
+
|
147 |
+
```python
|
148 |
+
from transformers import GPTNeoXForCausalLM, AutoTokenizer
|
149 |
+
|
150 |
+
model = GPTNeoXForCausalLM.from_pretrained(
|
151 |
+
"EleutherAI/pythia-70m-deduped",
|
152 |
+
revision="step3000",
|
153 |
+
cache_dir="./pythia-70m-deduped/step3000",
|
154 |
+
)
|
155 |
+
|
156 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
157 |
+
"EleutherAI/pythia-70m-deduped",
|
158 |
+
revision="step3000",
|
159 |
+
cache_dir="./pythia-70m-deduped/step3000",
|
160 |
+
)
|
161 |
+
|
162 |
+
inputs = tokenizer("Hello, I am", return_tensors="pt")
|
163 |
+
tokens = model.generate(**inputs)
|
164 |
+
tokenizer.decode(tokens[0])
|
165 |
+
```
|
166 |
+
|
167 |
+
Revision/branch `step143000` corresponds exactly to the model checkpoint on
|
168 |
+
the `main` branch of each model.<br>
|
169 |
+
For more information on how to use all Pythia models, see [documentation on
|
170 |
+
GitHub](https://github.com/EleutherAI/pythia).
|
171 |
+
|
172 |
+
## Training
|
173 |
+
|
174 |
+
### Training data
|
175 |
+
|
176 |
+
[The Pile](https://pile.eleuther.ai/) is a 825GiB general-purpose dataset in
|
177 |
+
English. It was created by EleutherAI specifically for training large language
|
178 |
+
models. It contains texts from 22 diverse sources, roughly broken down into
|
179 |
+
five categories: academic writing (e.g. arXiv), internet (e.g. CommonCrawl),
|
180 |
+
prose (e.g. Project Gutenberg), dialogue (e.g. YouTube subtitles), and
|
181 |
+
miscellaneous (e.g. GitHub, Enron Emails). See [the Pile
|
182 |
+
paper](https://arxiv.org/abs/2101.00027) for a breakdown of all data sources,
|
183 |
+
methodology, and a discussion of ethical implications. Consult [the
|
184 |
+
datasheet](https://arxiv.org/abs/2201.07311) for more detailed documentation
|
185 |
+
about the Pile and its component datasets. The Pile can be downloaded from
|
186 |
+
the [official website](https://pile.eleuther.ai/), or from a [community
|
187 |
+
mirror](https://the-eye.eu/public/AI/pile/).<br>
|
188 |
+
The Pile was **not** deduplicated before being used to train Pythia-70M.
|
189 |
+
|
190 |
+
### Training procedure
|
191 |
+
|
192 |
+
All models were trained on the exact same data, in the exact same order. Each
|
193 |
+
model saw 299,892,736,000 tokens during training, and 143 checkpoints for each
|
194 |
+
model are saved every 2,097,152,000 tokens, spaced evenly throughout training,
|
195 |
+
from `step1000` to `step143000` (which is the same as `main`). In addition, we
|
196 |
+
also provide frequent early checkpoints: `step0` and `step{1,2,4...512}`.
|
197 |
+
This corresponds to training for just under 1 epoch on the Pile for
|
198 |
+
non-deduplicated models, and about 1.5 epochs on the deduplicated Pile.
|
199 |
+
|
200 |
+
All *Pythia* models trained for 143000 steps at a batch size
|
201 |
+
of 2M (2,097,152 tokens).<br>
|
202 |
+
See [GitHub](https://github.com/EleutherAI/pythia) for more details on training
|
203 |
+
procedure, including [how to reproduce
|
204 |
+
it](https://github.com/EleutherAI/pythia/blob/main/README.md#reproducing-training).<br>
|
205 |
+
Pythia uses the same tokenizer as [GPT-NeoX-
|
206 |
+
20B](https://huggingface.co/EleutherAI/gpt-neox-20b).
|
207 |
+
|
208 |
+
## Evaluations
|
209 |
+
|
210 |
+
All 16 *Pythia* models were evaluated using the [LM Evaluation
|
211 |
+
Harness](https://github.com/EleutherAI/lm-evaluation-harness). You can access
|
212 |
+
the results by model and step at `results/json/*` in the [GitHub
|
213 |
+
repository](https://github.com/EleutherAI/pythia/tree/main/results/json/).<br>
|
214 |
+
Expand the sections below to see plots of evaluation results for all
|
215 |
+
Pythia and Pythia-deduped models compared with OPT and BLOOM.
|
216 |
+
|
217 |
+
<details>
|
218 |
+
<summary>LAMBADA – OpenAI</summary>
|
219 |
+
<img src="/EleutherAI/pythia-12b/resolve/main/eval_plots/lambada_openai_v1.png" style="width:auto"/>
|
220 |
+
</details>
|
221 |
+
|
222 |
+
<details>
|
223 |
+
<summary>Physical Interaction: Question Answering (PIQA)</summary>
|
224 |
+
<img src="/EleutherAI/pythia-12b/resolve/main/eval_plots/piqa_v1.png" style="width:auto"/>
|
225 |
+
</details>
|
226 |
+
|
227 |
+
<details>
|
228 |
+
<summary>WinoGrande</summary>
|
229 |
+
<img src="/EleutherAI/pythia-12b/resolve/main/eval_plots/winogrande_v1.png" style="width:auto"/>
|
230 |
+
</details>
|
231 |
+
|
232 |
+
<details>
|
233 |
+
<summary>AI2 Reasoning Challenge—Easy Set</summary>
|
234 |
+
<img src="/EleutherAI/pythia-12b/resolve/main/eval_plots/arc_easy_v1.png" style="width:auto"/>
|
235 |
+
</details>
|
236 |
+
|
237 |
+
<details>
|
238 |
+
<summary>SciQ</summary>
|
239 |
+
<img src="/EleutherAI/pythia-12b/resolve/main/eval_plots/sciq_v1.png" style="width:auto"/>
|
240 |
+
</details>
|
241 |
+
|
242 |
+
## Changelog
|
243 |
+
|
244 |
+
This section compares differences between previously released
|
245 |
+
[Pythia v0](https://huggingface.co/models?other=pythia_v0) and the current
|
246 |
+
models. See Appendix B of the Pythia paper for further discussion of these
|
247 |
+
changes and the motivation behind them. We found that retraining Pythia had no
|
248 |
+
impact on benchmark performance.
|
249 |
+
|
250 |
+
- All model sizes are now trained with uniform batch size of 2M tokens.
|
251 |
+
Previously, the models of size 160M, 410M, and 1.4B parameters were trained
|
252 |
+
with batch sizes of 4M tokens.
|
253 |
+
- We added checkpoints at initialization (step 0) and steps {1,2,4,8,16,32,64,
|
254 |
+
128,256,512} in addition to every 1000 training steps.
|
255 |
+
- Flash Attention was used in the new retrained suite.
|
256 |
+
- We remedied a minor inconsistency that existed in the original suite: all
|
257 |
+
models of size 2.8B parameters or smaller had a learning rate (LR) schedule
|
258 |
+
which decayed to a minimum LR of 10% the starting LR rate, but the 6.9B and
|
259 |
+
12B models all used an LR schedule which decayed to a minimum LR of 0. In
|
260 |
+
the redone training runs, we rectified this inconsistency: all models now were
|
261 |
+
trained with LR decaying to a minimum of 0.1× their maximum LR.
|
262 |
+
|
263 |
+
### Naming convention and parameter count
|
264 |
+
|
265 |
+
*Pythia* models were renamed in January 2023. It is possible that the old
|
266 |
+
naming convention still persists in some documentation by accident. The
|
267 |
+
current naming convention (70M, 160M, etc.) is based on total parameter count.
|
268 |
+
|
269 |
+
<figure style="width:32em">
|
270 |
+
|
271 |
+
| current Pythia suffix | old suffix | total params | non-embedding params |
|
272 |
+
| --------------------: | ---------: | -------------: | -------------------: |
|
273 |
+
| 70M | 19M | 70,426,624 | 18,915,328 |
|
274 |
+
| 160M | 125M | 162,322,944 | 85,056,000 |
|
275 |
+
| 410M | 350M | 405,334,016 | 302,311,424 |
|
276 |
+
| 1B | 800M | 1,011,781,632 | 805,736,448 |
|
277 |
+
| 1.4B | 1.3B | 1,414,647,808 | 1,208,602,624 |
|
278 |
+
| 2.8B | 2.7B | 2,775,208,960 | 2,517,652,480 |
|
279 |
+
| 6.9B | 6.7B | 6,857,302,016 | 6,444,163,072 |
|
280 |
+
| 12B | 13B | 11,846,072,320 | 11,327,027,200 |
|
281 |
+
</figure>
|
282 |
+
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
|
283 |
+
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_EleutherAI__pythia-70m)
|
284 |
+
|
285 |
+
| Metric | Value |
|
286 |
+
|-----------------------|---------------------------|
|
287 |
+
| Avg. | 25.28 |
|
288 |
+
| ARC (25-shot) | 21.59 |
|
289 |
+
| HellaSwag (10-shot) | 27.29 |
|
290 |
+
| MMLU (5-shot) | 25.9 |
|
291 |
+
| TruthfulQA (0-shot) | 47.06 |
|
292 |
+
| Winogrande (5-shot) | 51.46 |
|
293 |
+
| GSM8K (5-shot) | 0.3 |
|
294 |
+
| DROP (3-shot) | 3.33 |
|
hub/models--EleutherAI--pythia-70m/blobs/f1860edb10f80bcaf7b023fce47c68a23b724c23
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"bos_token": "<|endoftext|>",
|
4 |
+
"eos_token": "<|endoftext|>",
|
5 |
+
"name_or_path": "EleutherAI/gpt-neox-20b",
|
6 |
+
"special_tokens_map_file": "/admin/home-hailey/.cache/huggingface/hub/models--EleutherAI--gpt-neox-20b/snapshots/4e49eadb5d14bd22f314ec3f45b69a87b88c7691/special_tokens_map.json",
|
7 |
+
"tokenizer_class": "GPTNeoXTokenizer",
|
8 |
+
"unk_token": "<|endoftext|>"
|
9 |
+
}
|
hub/models--EleutherAI--pythia-70m/blobs/f74dfbfab8f97770a87769c739fb080c21c8bacc
ADDED
The diff for this file is too large to render.
See raw diff
|
|
hub/models--EleutherAI--pythia-70m/refs/main
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
a39f36b100fe8a5377810d56c3f4789b9c53ac42
|
hub/models--EleutherAI--pythia-70m/snapshots/a39f36b100fe8a5377810d56c3f4789b9c53ac42/README.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../blobs/df0253c0ab197de15c12fa7fbb7edcca9b6848a3
|
hub/models--EleutherAI--pythia-70m/snapshots/a39f36b100fe8a5377810d56c3f4789b9c53ac42/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../blobs/d7a9196e329eaf06d6e2802fed376e7459834236
|
hub/models--EleutherAI--pythia-70m/snapshots/a39f36b100fe8a5377810d56c3f4789b9c53ac42/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../blobs/0204ed10c186a4c7c68f55dff8f26087a45898d6
|
hub/models--EleutherAI--pythia-70m/snapshots/a39f36b100fe8a5377810d56c3f4789b9c53ac42/tokenizer.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../blobs/f74dfbfab8f97770a87769c739fb080c21c8bacc
|
hub/models--EleutherAI--pythia-70m/snapshots/a39f36b100fe8a5377810d56c3f4789b9c53ac42/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../blobs/f1860edb10f80bcaf7b023fce47c68a23b724c23
|
hub/version.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
1
|
manage_repos.ipynb
CHANGED
@@ -11,31 +11,16 @@
|
|
11 |
},
|
12 |
{
|
13 |
"cell_type": "code",
|
14 |
-
"execution_count":
|
15 |
"metadata": {},
|
16 |
"outputs": [
|
17 |
-
{
|
18 |
-
"name": "stdout",
|
19 |
-
"output_type": "stream",
|
20 |
-
"text": [
|
21 |
-
"CACHE_PATH = /Users/chaeeunlee/Documents/VSC_workspaces/test_leaderboard\n"
|
22 |
-
]
|
23 |
-
},
|
24 |
-
{
|
25 |
-
"name": "stderr",
|
26 |
-
"output_type": "stream",
|
27 |
-
"text": [
|
28 |
-
"/Users/chaeeunlee/anaconda3/envs/lb/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
29 |
-
" from .autonotebook import tqdm as notebook_tqdm\n"
|
30 |
-
]
|
31 |
-
},
|
32 |
{
|
33 |
"data": {
|
34 |
"text/plain": [
|
35 |
"'\\n( path_in_repo: str\\nrepo_id: str\\ntoken: typing.Optional[str] = None\\nrepo_type: typing.Optional[str] = Nonerevision: typing.Optional[str] = Nonecommit_message: typing.Optional[str] = Nonecommit_description: typing.Optional[str] = Nonecreate_pr: typing.Optional[bool] = Noneparent_commit: typing.Optional[str] = None )\\n'"
|
36 |
]
|
37 |
},
|
38 |
-
"execution_count":
|
39 |
"metadata": {},
|
40 |
"output_type": "execute_result"
|
41 |
}
|
@@ -69,14 +54,37 @@
|
|
69 |
"metadata": {},
|
70 |
"outputs": [],
|
71 |
"source": [
|
72 |
-
"res = API.delete_folder(path_in_repo='EleutherAI/
|
73 |
]
|
74 |
},
|
75 |
{
|
76 |
"cell_type": "code",
|
77 |
-
"execution_count":
|
78 |
"metadata": {},
|
79 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
"source": [
|
81 |
"# res = API.delete_folder(path_in_repo='EleutherAI/', repo_id=QUEUE_REPO, repo_type='dataset')\n",
|
82 |
"\n",
|
|
|
11 |
},
|
12 |
{
|
13 |
"cell_type": "code",
|
14 |
+
"execution_count": 6,
|
15 |
"metadata": {},
|
16 |
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
{
|
18 |
"data": {
|
19 |
"text/plain": [
|
20 |
"'\\n( path_in_repo: str\\nrepo_id: str\\ntoken: typing.Optional[str] = None\\nrepo_type: typing.Optional[str] = Nonerevision: typing.Optional[str] = Nonecommit_message: typing.Optional[str] = Nonecommit_description: typing.Optional[str] = Nonecreate_pr: typing.Optional[bool] = Noneparent_commit: typing.Optional[str] = None )\\n'"
|
21 |
]
|
22 |
},
|
23 |
+
"execution_count": 6,
|
24 |
"metadata": {},
|
25 |
"output_type": "execute_result"
|
26 |
}
|
|
|
54 |
"metadata": {},
|
55 |
"outputs": [],
|
56 |
"source": [
|
57 |
+
"res = API.delete_folder(path_in_repo='EleutherAI/', repo_id=RESULTS_REPO, repo_type='dataset')"
|
58 |
]
|
59 |
},
|
60 |
{
|
61 |
"cell_type": "code",
|
62 |
+
"execution_count": 5,
|
63 |
"metadata": {},
|
64 |
+
"outputs": [
|
65 |
+
{
|
66 |
+
"ename": "EntryNotFoundError",
|
67 |
+
"evalue": "404 Client Error. (Request ID: Root=1-65c41aaf-4be744ec4d8b25f96aac8d20;c17f9346-8c9c-44c6-af2a-d69065e58148)\n\nEntry Not Found for url: https://huggingface.co/api/datasets/chaeeunlee/test_requests/commit/main.\nA file with the name \"EleutherAI/pythia-70m_pubmedqa_eval_request_False_float32_Original.json\" does not exist",
|
68 |
+
"output_type": "error",
|
69 |
+
"traceback": [
|
70 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
71 |
+
"\u001b[0;31mHTTPError\u001b[0m Traceback (most recent call last)",
|
72 |
+
"File \u001b[0;32m~/anaconda3/envs/lb/lib/python3.10/site-packages/huggingface_hub/utils/_errors.py:286\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m 285\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 286\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 287\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
73 |
+
"File \u001b[0;32m~/anaconda3/envs/lb/lib/python3.10/site-packages/requests/models.py:1021\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1020\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[0;32m-> 1021\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n",
|
74 |
+
"\u001b[0;31mHTTPError\u001b[0m: 404 Client Error: Not Found for url: https://huggingface.co/api/datasets/chaeeunlee/test_requests/commit/main",
|
75 |
+
"\nThe above exception was the direct cause of the following exception:\n",
|
76 |
+
"\u001b[0;31mEntryNotFoundError\u001b[0m Traceback (most recent call last)",
|
77 |
+
"Cell \u001b[0;32mIn[5], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# res = API.delete_folder(path_in_repo='EleutherAI/', repo_id=QUEUE_REPO, repo_type='dataset')\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mAPI\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdelete_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath_in_repo\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mEleutherAI/pythia-70m_pubmedqa_eval_request_False_float32_Original.json\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mQUEUE_REPO\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mdataset\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
78 |
+
"File \u001b[0;32m~/anaconda3/envs/lb/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:118\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 116\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
79 |
+
"File \u001b[0;32m~/anaconda3/envs/lb/lib/python3.10/site-packages/huggingface_hub/hf_api.py:4704\u001b[0m, in \u001b[0;36mHfApi.delete_file\u001b[0;34m(self, path_in_repo, repo_id, token, repo_type, revision, commit_message, commit_description, create_pr, parent_commit)\u001b[0m\n\u001b[1;32m 4698\u001b[0m commit_message \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 4699\u001b[0m commit_message \u001b[38;5;28;01mif\u001b[39;00m commit_message \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDelete \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_in_repo\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m with huggingface_hub\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 4700\u001b[0m )\n\u001b[1;32m 4702\u001b[0m operations \u001b[38;5;241m=\u001b[39m [CommitOperationDelete(path_in_repo\u001b[38;5;241m=\u001b[39mpath_in_repo)]\n\u001b[0;32m-> 4704\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_commit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4705\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4706\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4707\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4708\u001b[0m \u001b[43m \u001b[49m\u001b[43moperations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moperations\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4709\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4710\u001b[0m \u001b[43m \u001b[49m\u001b[43mcommit_message\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcommit_message\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4711\u001b[0m \u001b[43m \u001b[49m\u001b[43mcommit_description\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcommit_description\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4712\u001b[0m \u001b[43m \u001b[49m\u001b[43mcreate_pr\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcreate_pr\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4713\u001b[0m \u001b[43m \u001b[49m\u001b[43mparent_commit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparent_commit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4714\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
|
80 |
+
"File \u001b[0;32m~/anaconda3/envs/lb/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:118\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 116\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
81 |
+
"File \u001b[0;32m~/anaconda3/envs/lb/lib/python3.10/site-packages/huggingface_hub/hf_api.py:1208\u001b[0m, in \u001b[0;36mfuture_compatible.<locals>._inner\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1205\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrun_as_future(fn, \u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1207\u001b[0m \u001b[38;5;66;03m# Otherwise, call the function normally\u001b[39;00m\n\u001b[0;32m-> 1208\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
82 |
+
"File \u001b[0;32m~/anaconda3/envs/lb/lib/python3.10/site-packages/huggingface_hub/hf_api.py:3600\u001b[0m, in \u001b[0;36mHfApi.create_commit\u001b[0;34m(self, repo_id, operations, commit_message, commit_description, token, repo_type, revision, create_pr, num_threads, parent_commit, run_as_future)\u001b[0m\n\u001b[1;32m 3598\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 3599\u001b[0m commit_resp \u001b[38;5;241m=\u001b[39m get_session()\u001b[38;5;241m.\u001b[39mpost(url\u001b[38;5;241m=\u001b[39mcommit_url, headers\u001b[38;5;241m=\u001b[39mheaders, data\u001b[38;5;241m=\u001b[39mdata, params\u001b[38;5;241m=\u001b[39mparams)\n\u001b[0;32m-> 3600\u001b[0m \u001b[43mhf_raise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcommit_resp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mendpoint_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcommit\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3601\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m RepositoryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 3602\u001b[0m e\u001b[38;5;241m.\u001b[39mappend_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE)\n",
|
83 |
+
"File \u001b[0;32m~/anaconda3/envs/lb/lib/python3.10/site-packages/huggingface_hub/utils/_errors.py:296\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m error_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEntryNotFound\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 295\u001b[0m message \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m Client Error.\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEntry Not Found for url: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 296\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m EntryNotFoundError(message, response) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m error_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGatedRepo\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 299\u001b[0m message \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 300\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m Client Error.\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot access gated repo for url \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 301\u001b[0m )\n",
|
84 |
+
"\u001b[0;31mEntryNotFoundError\u001b[0m: 404 Client Error. (Request ID: Root=1-65c41aaf-4be744ec4d8b25f96aac8d20;c17f9346-8c9c-44c6-af2a-d69065e58148)\n\nEntry Not Found for url: https://huggingface.co/api/datasets/chaeeunlee/test_requests/commit/main.\nA file with the name \"EleutherAI/pythia-70m_pubmedqa_eval_request_False_float32_Original.json\" does not exist"
|
85 |
+
]
|
86 |
+
}
|
87 |
+
],
|
88 |
"source": [
|
89 |
"# res = API.delete_folder(path_in_repo='EleutherAI/', repo_id=QUEUE_REPO, repo_type='dataset')\n",
|
90 |
"\n",
|
src/.DS_Store
CHANGED
Binary files a/src/.DS_Store and b/src/.DS_Store differ
|
|
src/backend/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
src/backend/envs.py
CHANGED
@@ -19,8 +19,10 @@ class Task:
|
|
19 |
# how are these differentiated with Tasks in display/utils.py ?
|
20 |
class Tasks(Enum):
|
21 |
task0 = Task("pubmedqa", "acc", "PubMedQA", 0) # 64, as in the ATLAS paper
|
22 |
-
task1 = Task("hellaswag", "acc_norm", "HellaSwag", 0) # 64, as in the ATLAS paper
|
23 |
-
|
|
|
|
|
24 |
|
25 |
|
26 |
num_fewshots = {
|
|
|
19 |
# how are these differentiated with Tasks in display/utils.py ?
|
20 |
class Tasks(Enum):
|
21 |
task0 = Task("pubmedqa", "acc", "PubMedQA", 0) # 64, as in the ATLAS paper
|
22 |
+
# task1 = Task("hellaswag", "acc_norm", "HellaSwag", 0) # 64, as in the ATLAS paper
|
23 |
+
task1 = Task("medqa", "acc_norm", "MedQA", 0) # medqa_4options?
|
24 |
+
task2 = Task("medmcqa", "acc_norm", "MedMCQA", 0)
|
25 |
+
|
26 |
|
27 |
|
28 |
num_fewshots = {
|
src/backend/tasks/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
src/backend/tasks/medmcqa/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
src/backend/tasks/medmcqa/medmcqa.yaml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
task: medmcqa
|
2 |
+
dataset_path: medmcqa
|
3 |
+
output_type: multiple_choice
|
4 |
+
training_split: train
|
5 |
+
validation_split: validation
|
6 |
+
test_split: validation
|
7 |
+
doc_to_text: !function utils_medmcqa.doc_to_text
|
8 |
+
doc_to_target: cop
|
9 |
+
doc_to_choice: [ 'A','B','C','D' ]
|
10 |
+
should_decontaminate: true
|
11 |
+
doc_to_decontamination_query: "{{question}}"
|
12 |
+
metric_list:
|
13 |
+
- metric: acc
|
14 |
+
aggregation: mean
|
15 |
+
higher_is_better: true
|
16 |
+
- metric: acc_norm
|
17 |
+
aggregation: mean
|
18 |
+
higher_is_better: true
|
src/backend/tasks/medmcqa/utils_medmcqa.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copied from Master
|
2 |
+
def doc_to_text(doc) -> str:
|
3 |
+
"""
|
4 |
+
Question: <question>
|
5 |
+
Choices:
|
6 |
+
A. <choice1>
|
7 |
+
B. <choice2>
|
8 |
+
C. <choice3>
|
9 |
+
D. <choice4>
|
10 |
+
Answer:
|
11 |
+
"""
|
12 |
+
choices = [doc["opa"], doc["opb"], doc["opc"], doc["opd"]]
|
13 |
+
option_choices = {'A': choices[0], 'B': choices[1], 'C': choices[2], 'D': choices[3]}
|
14 |
+
|
15 |
+
prompt = "Question: " + doc["question"] + "\nChoices:\n"
|
16 |
+
for choice, option in option_choices.items():
|
17 |
+
prompt += f"{choice.upper()}. {option}\n"
|
18 |
+
prompt += "Answer:"
|
19 |
+
return prompt
|
src/backend/tasks/medqa/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
src/backend/tasks/medqa/medqa.yaml
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
task: medqa_4options
|
2 |
+
dataset_path: GBaker/MedQA-USMLE-4-options-hf
|
3 |
+
output_type: multiple_choice
|
4 |
+
training_split: train
|
5 |
+
validation_split: validation
|
6 |
+
test_split: test
|
7 |
+
doc_to_text: !function preprocess_medqa.doc_to_text
|
8 |
+
doc_to_target: !function preprocess_medqa.doc_to_target
|
9 |
+
doc_to_choice: [ 'A', 'B', 'C', 'D' ]
|
10 |
+
metric_list:
|
11 |
+
- metric: acc
|
12 |
+
aggregation: mean
|
13 |
+
higher_is_better: true
|
14 |
+
- metric: acc_norm
|
15 |
+
aggregation: mean
|
16 |
+
higher_is_better: true
|
src/backend/tasks/medqa/preprocess_medqa.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def doc_to_text(doc) -> str:
|
2 |
+
option_choices = {'A': doc["ending0"], 'B': doc["ending1"], 'C': doc["ending2"], 'D': doc["ending3"]}
|
3 |
+
answers = "".join((f"{k}. {v}\n") for k, v in option_choices.items())
|
4 |
+
return f"Question: {doc['sent1']}\n{answers}Answer:"
|
5 |
+
|
6 |
+
|
7 |
+
def doc_to_target(doc) -> int:
|
8 |
+
return doc["label"]
|
src/display/__pycache__/utils.cpython-310.pyc
CHANGED
Binary files a/src/display/__pycache__/utils.cpython-310.pyc and b/src/display/__pycache__/utils.cpython-310.pyc differ
|
|
src/display/utils.py
CHANGED
@@ -17,7 +17,10 @@ class Task:
|
|
17 |
|
18 |
class Tasks(Enum):
|
19 |
# arc = Task("arc:challenge", "acc_norm", "ARC")
|
20 |
-
hellaswag = Task("hellaswag", "acc_norm", "HellaSwag")
|
|
|
|
|
|
|
21 |
# mmlu = Task("hendrycksTest", "acc", "MMLU")
|
22 |
# truthfulqa = Task("truthfulqa:mc", "mc2", "TruthfulQA")
|
23 |
# winogrande = Task("winogrande", "acc", "Winogrande")
|
@@ -45,7 +48,7 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
|
|
45 |
#Scores
|
46 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
|
47 |
for task in Tasks:
|
48 |
-
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number",
|
49 |
# Model information
|
50 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
51 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
|
|
17 |
|
18 |
class Tasks(Enum):
|
19 |
# arc = Task("arc:challenge", "acc_norm", "ARC")
|
20 |
+
# hellaswag = Task("hellaswag", "acc_norm", "HellaSwag")
|
21 |
+
|
22 |
+
medqa = Task("medqa", "acc_norm", "MedQA") # medqa_4options?
|
23 |
+
medmcqa = Task("medmcqa", "acc_norm", "MedMCQA")
|
24 |
# mmlu = Task("hendrycksTest", "acc", "MMLU")
|
25 |
# truthfulqa = Task("truthfulqa:mc", "mc2", "TruthfulQA")
|
26 |
# winogrande = Task("winogrande", "acc", "Winogrande")
|
|
|
48 |
#Scores
|
49 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
|
50 |
for task in Tasks:
|
51 |
+
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", False)]) # hidden was true by default
|
52 |
# Model information
|
53 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
54 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
src/populate.py
CHANGED
@@ -15,13 +15,14 @@ and cleaning the data based on specific criteria. Let's break down the function
|
|
15 |
|
16 |
'''
|
17 |
|
|
|
18 |
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> tuple[list[EvalResult], pd.DataFrame]:
|
19 |
|
20 |
print(f"results_path = {results_path}")
|
21 |
|
22 |
raw_data = get_raw_eval_results(results_path, requests_path)
|
23 |
|
24 |
-
print(f"
|
25 |
|
26 |
all_data_json = [v.to_dict() for v in raw_data] # if v.is_complete()]
|
27 |
# all_data_json.append(baseline_row)
|
|
|
15 |
|
16 |
'''
|
17 |
|
18 |
+
## TO-DO: if raw_data is [], return dummy df with correct columns so that the UI shows the right columns
|
19 |
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> tuple[list[EvalResult], pd.DataFrame]:
|
20 |
|
21 |
print(f"results_path = {results_path}")
|
22 |
|
23 |
raw_data = get_raw_eval_results(results_path, requests_path)
|
24 |
|
25 |
+
# print(f"@@raw_data = {raw_data}")
|
26 |
|
27 |
all_data_json = [v.to_dict() for v in raw_data] # if v.is_complete()]
|
28 |
# all_data_json.append(baseline_row)
|
src/submission/__pycache__/submit.cpython-310.pyc
CHANGED
Binary files a/src/submission/__pycache__/submit.cpython-310.pyc and b/src/submission/__pycache__/submit.cpython-310.pyc differ
|
|