Spaces:
Build error
Build error
qwen2.5-1.5b
Browse files
data/Qwen2.5-0.5B-Instruct_results.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/Qwen2.5-1.5B-Instruct_results.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/07_Qwen2.5_models.ipynb
CHANGED
@@ -19954,13 +19954,337 @@
|
|
19954 |
},
|
19955 |
{
|
19956 |
"cell_type": "code",
|
19957 |
-
"execution_count":
|
19958 |
"metadata": {},
|
19959 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19960 |
"source": [
|
19961 |
"%%time\n",
|
19962 |
"\n",
|
19963 |
-
"!./scripts/eval-mgtv-qwen2.
|
19964 |
]
|
19965 |
}
|
19966 |
],
|
|
|
19954 |
},
|
19955 |
{
|
19956 |
"cell_type": "code",
|
19957 |
+
"execution_count": 8,
|
19958 |
"metadata": {},
|
19959 |
+
"outputs": [
|
19960 |
+
{
|
19961 |
+
"name": "stdout",
|
19962 |
+
"output_type": "stream",
|
19963 |
+
"text": [
|
19964 |
+
"Current Directory:\n",
|
19965 |
+
"/home/inflaton/code/logical-reasoning\n",
|
19966 |
+
"Sat Sep 21 23:55:43 2024 \n",
|
19967 |
+
"+-----------------------------------------------------------------------------------------+\n",
|
19968 |
+
"| NVIDIA-SMI 560.35.02 Driver Version: 560.94 CUDA Version: 12.6 |\n",
|
19969 |
+
"|-----------------------------------------+------------------------+----------------------+\n",
|
19970 |
+
"| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
|
19971 |
+
"| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
|
19972 |
+
"| | | MIG M. |\n",
|
19973 |
+
"|=========================================+========================+======================|\n",
|
19974 |
+
"| 0 NVIDIA GeForce RTX 4090 On | 00000000:01:00.0 Off | Off |\n",
|
19975 |
+
"| 54% 59C P3 41W / 450W | 471MiB / 24564MiB | 2% Default |\n",
|
19976 |
+
"| | | N/A |\n",
|
19977 |
+
"+-----------------------------------------+------------------------+----------------------+\n",
|
19978 |
+
" \n",
|
19979 |
+
"+-----------------------------------------------------------------------------------------+\n",
|
19980 |
+
"| Processes: |\n",
|
19981 |
+
"| GPU GI CI PID Type Process name GPU Memory |\n",
|
19982 |
+
"| ID ID Usage |\n",
|
19983 |
+
"|=========================================================================================|\n",
|
19984 |
+
"| 0 N/A N/A 25 G /Xwayland N/A |\n",
|
19985 |
+
"+-----------------------------------------------------------------------------------------+\n",
|
19986 |
+
"Linux Gen-AI 5.15.133.1-microsoft-standard-WSL2 #1 SMP Thu Oct 5 21:02:42 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux\n",
|
19987 |
+
"PRETTY_NAME=\"Ubuntu 22.04.2 LTS\"\n",
|
19988 |
+
"NAME=\"Ubuntu\"\n",
|
19989 |
+
"VERSION_ID=\"22.04\"\n",
|
19990 |
+
"VERSION=\"22.04.2 LTS (Jammy Jellyfish)\"\n",
|
19991 |
+
"VERSION_CODENAME=jammy\n",
|
19992 |
+
"ID=ubuntu\n",
|
19993 |
+
"ID_LIKE=debian\n",
|
19994 |
+
"HOME_URL=\"https://www.ubuntu.com/\"\n",
|
19995 |
+
"SUPPORT_URL=\"https://help.ubuntu.com/\"\n",
|
19996 |
+
"BUG_REPORT_URL=\"https://bugs.launchpad.net/ubuntu/\"\n",
|
19997 |
+
"PRIVACY_POLICY_URL=\"https://www.ubuntu.com/legal/terms-and-policies/privacy-policy\"\n",
|
19998 |
+
"UBUNTU_CODENAME=jammy\n",
|
19999 |
+
"Architecture: x86_64\n",
|
20000 |
+
" CPU op-mode(s): 32-bit, 64-bit\n",
|
20001 |
+
" Address sizes: 39 bits physical, 48 bits virtual\n",
|
20002 |
+
" Byte Order: Little Endian\n",
|
20003 |
+
"CPU(s): 32\n",
|
20004 |
+
" On-line CPU(s) list: 0-31\n",
|
20005 |
+
"Vendor ID: GenuineIntel\n",
|
20006 |
+
" Model name: 13th Gen Intel(R) Core(TM) i9-13900KF\n",
|
20007 |
+
" CPU family: 6\n",
|
20008 |
+
" Model: 183\n",
|
20009 |
+
" Thread(s) per core: 2\n",
|
20010 |
+
" Core(s) per socket: 16\n",
|
20011 |
+
" Socket(s): 1\n",
|
20012 |
+
" Stepping: 1\n",
|
20013 |
+
" BogoMIPS: 5990.39\n",
|
20014 |
+
" Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mc\n",
|
20015 |
+
" a cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscal\n",
|
20016 |
+
" l nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopo\n",
|
20017 |
+
" logy tsc_reliable nonstop_tsc cpuid pni pclmulqdq vmx s\n",
|
20018 |
+
" sse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt tsc_dea\n",
|
20019 |
+
" dline_timer aes xsave avx f16c rdrand hypervisor lahf_l\n",
|
20020 |
+
" m abm 3dnowprefetch ssbd ibrs ibpb stibp ibrs_enhanced \n",
|
20021 |
+
" tpr_shadow vnmi ept vpid ept_ad fsgsbase tsc_adjust bmi\n",
|
20022 |
+
" 1 avx2 smep bmi2 erms invpcid rdseed adx smap clflushop\n",
|
20023 |
+
" t clwb sha_ni xsaveopt xsavec xgetbv1 xsaves avx_vnni u\n",
|
20024 |
+
" mip waitpkg gfni vaes vpclmulqdq rdpid movdiri movdir64\n",
|
20025 |
+
" b fsrm md_clear serialize flush_l1d arch_capabilities\n",
|
20026 |
+
"Virtualization features: \n",
|
20027 |
+
" Virtualization: VT-x\n",
|
20028 |
+
" Hypervisor vendor: Microsoft\n",
|
20029 |
+
" Virtualization type: full\n",
|
20030 |
+
"Caches (sum of all): \n",
|
20031 |
+
" L1d: 768 KiB (16 instances)\n",
|
20032 |
+
" L1i: 512 KiB (16 instances)\n",
|
20033 |
+
" L2: 32 MiB (16 instances)\n",
|
20034 |
+
" L3: 36 MiB (1 instance)\n",
|
20035 |
+
"Vulnerabilities: \n",
|
20036 |
+
" Gather data sampling: Not affected\n",
|
20037 |
+
" Itlb multihit: Not affected\n",
|
20038 |
+
" L1tf: Not affected\n",
|
20039 |
+
" Mds: Not affected\n",
|
20040 |
+
" Meltdown: Not affected\n",
|
20041 |
+
" Mmio stale data: Not affected\n",
|
20042 |
+
" Retbleed: Mitigation; Enhanced IBRS\n",
|
20043 |
+
" Spec rstack overflow: Not affected\n",
|
20044 |
+
" Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl\n",
|
20045 |
+
" and seccomp\n",
|
20046 |
+
" Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer\n",
|
20047 |
+
" sanitization\n",
|
20048 |
+
" Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB fillin\n",
|
20049 |
+
" g, PBRSB-eIBRS SW sequence\n",
|
20050 |
+
" Srbds: Not affected\n",
|
20051 |
+
" Tsx async abort: Not affected\n",
|
20052 |
+
"MemTotal: 49330024 kB\n",
|
20053 |
+
"Current Directory:\n",
|
20054 |
+
"/home/inflaton/code/logical-reasoning\n",
|
20055 |
+
"Evaluating Qwen/Qwen2.5-7B-Instruct with few-shot learning\n",
|
20056 |
+
"loading env vars from: /home/inflaton/code/logical-reasoning/.env\n",
|
20057 |
+
"Adding /home/inflaton/code/logical-reasoning to sys.path\n",
|
20058 |
+
"loading /home/inflaton/code/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n",
|
20059 |
+
"CUDA is available, we have found 1 GPU(s)\n",
|
20060 |
+
"NVIDIA GeForce RTX 4090\n",
|
20061 |
+
"CUDA version: 12.1\n",
|
20062 |
+
"Qwen/Qwen2.5-7B-Instruct None False datasets/mgtv data/Qwen2.5-7B-Instruct_results.csv 2048 1\n",
|
20063 |
+
"(0) GPU = NVIDIA GeForce RTX 4090. Max memory = 23.988 GB.\n",
|
20064 |
+
"0.0 GB of memory reserved.\n",
|
20065 |
+
"loading model: Qwen/Qwen2.5-7B-Instruct with adapter: None\n",
|
20066 |
+
"config.json: 100%|βββββββββββββββββββββββββββββ| 663/663 [00:00<00:00, 10.9MB/s]\n",
|
20067 |
+
"[INFO|configuration_utils.py:733] 2024-09-21 23:55:48,860 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/config.json\n",
|
20068 |
+
"[INFO|configuration_utils.py:800] 2024-09-21 23:55:48,861 >> Model config Qwen2Config {\n",
|
20069 |
+
" \"_name_or_path\": \"Qwen/Qwen2.5-7B-Instruct\",\n",
|
20070 |
+
" \"architectures\": [\n",
|
20071 |
+
" \"Qwen2ForCausalLM\"\n",
|
20072 |
+
" ],\n",
|
20073 |
+
" \"attention_dropout\": 0.0,\n",
|
20074 |
+
" \"bos_token_id\": 151643,\n",
|
20075 |
+
" \"eos_token_id\": 151645,\n",
|
20076 |
+
" \"hidden_act\": \"silu\",\n",
|
20077 |
+
" \"hidden_size\": 3584,\n",
|
20078 |
+
" \"initializer_range\": 0.02,\n",
|
20079 |
+
" \"intermediate_size\": 18944,\n",
|
20080 |
+
" \"max_position_embeddings\": 32768,\n",
|
20081 |
+
" \"max_window_layers\": 28,\n",
|
20082 |
+
" \"model_type\": \"qwen2\",\n",
|
20083 |
+
" \"num_attention_heads\": 28,\n",
|
20084 |
+
" \"num_hidden_layers\": 28,\n",
|
20085 |
+
" \"num_key_value_heads\": 4,\n",
|
20086 |
+
" \"rms_norm_eps\": 1e-06,\n",
|
20087 |
+
" \"rope_theta\": 1000000.0,\n",
|
20088 |
+
" \"sliding_window\": null,\n",
|
20089 |
+
" \"tie_word_embeddings\": false,\n",
|
20090 |
+
" \"torch_dtype\": \"bfloat16\",\n",
|
20091 |
+
" \"transformers_version\": \"4.43.3\",\n",
|
20092 |
+
" \"use_cache\": true,\n",
|
20093 |
+
" \"use_sliding_window\": false,\n",
|
20094 |
+
" \"vocab_size\": 152064\n",
|
20095 |
+
"}\n",
|
20096 |
+
"\n",
|
20097 |
+
"tokenizer_config.json: 100%|βββββββββββββββ| 7.30k/7.30k [00:00<00:00, 53.0MB/s]\n",
|
20098 |
+
"vocab.json: 100%|ββββββββββββββββββββββββββ| 2.78M/2.78M [00:01<00:00, 1.76MB/s]\n",
|
20099 |
+
"merges.txt: 100%|ββββββββββββββββββββββββββ| 1.67M/1.67M [00:00<00:00, 23.9MB/s]\n",
|
20100 |
+
"tokenizer.json: 100%|ββββββββββββββββββββββ| 7.03M/7.03M [00:00<00:00, 10.8MB/s]\n",
|
20101 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,949 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/vocab.json\n",
|
20102 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/merges.txt\n",
|
20103 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/tokenizer.json\n",
|
20104 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file added_tokens.json from cache at None\n",
|
20105 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file special_tokens_map.json from cache at None\n",
|
20106 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/tokenizer_config.json\n",
|
20107 |
+
"[INFO|tokenization_utils_base.py:2533] 2024-09-21 23:55:54,041 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
|
20108 |
+
"[INFO|configuration_utils.py:733] 2024-09-21 23:55:55,157 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/config.json\n",
|
20109 |
+
"[INFO|configuration_utils.py:800] 2024-09-21 23:55:55,157 >> Model config Qwen2Config {\n",
|
20110 |
+
" \"_name_or_path\": \"Qwen/Qwen2.5-7B-Instruct\",\n",
|
20111 |
+
" \"architectures\": [\n",
|
20112 |
+
" \"Qwen2ForCausalLM\"\n",
|
20113 |
+
" ],\n",
|
20114 |
+
" \"attention_dropout\": 0.0,\n",
|
20115 |
+
" \"bos_token_id\": 151643,\n",
|
20116 |
+
" \"eos_token_id\": 151645,\n",
|
20117 |
+
" \"hidden_act\": \"silu\",\n",
|
20118 |
+
" \"hidden_size\": 3584,\n",
|
20119 |
+
" \"initializer_range\": 0.02,\n",
|
20120 |
+
" \"intermediate_size\": 18944,\n",
|
20121 |
+
" \"max_position_embeddings\": 32768,\n",
|
20122 |
+
" \"max_window_layers\": 28,\n",
|
20123 |
+
" \"model_type\": \"qwen2\",\n",
|
20124 |
+
" \"num_attention_heads\": 28,\n",
|
20125 |
+
" \"num_hidden_layers\": 28,\n",
|
20126 |
+
" \"num_key_value_heads\": 4,\n",
|
20127 |
+
" \"rms_norm_eps\": 1e-06,\n",
|
20128 |
+
" \"rope_theta\": 1000000.0,\n",
|
20129 |
+
" \"sliding_window\": null,\n",
|
20130 |
+
" \"tie_word_embeddings\": false,\n",
|
20131 |
+
" \"torch_dtype\": \"bfloat16\",\n",
|
20132 |
+
" \"transformers_version\": \"4.43.3\",\n",
|
20133 |
+
" \"use_cache\": true,\n",
|
20134 |
+
" \"use_sliding_window\": false,\n",
|
20135 |
+
" \"vocab_size\": 152064\n",
|
20136 |
+
"}\n",
|
20137 |
+
"\n",
|
20138 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,421 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/vocab.json\n",
|
20139 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,421 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/merges.txt\n",
|
20140 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,421 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/tokenizer.json\n",
|
20141 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,422 >> loading file added_tokens.json from cache at None\n",
|
20142 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,422 >> loading file special_tokens_map.json from cache at None\n",
|
20143 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,422 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/tokenizer_config.json\n",
|
20144 |
+
"[INFO|tokenization_utils_base.py:2533] 2024-09-21 23:55:55,509 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
|
20145 |
+
"09/21/2024 23:55:55 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
|
20146 |
+
"[INFO|configuration_utils.py:733] 2024-09-21 23:55:55,814 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/config.json\n",
|
20147 |
+
"[INFO|configuration_utils.py:800] 2024-09-21 23:55:55,814 >> Model config Qwen2Config {\n",
|
20148 |
+
" \"_name_or_path\": \"Qwen/Qwen2.5-7B-Instruct\",\n",
|
20149 |
+
" \"architectures\": [\n",
|
20150 |
+
" \"Qwen2ForCausalLM\"\n",
|
20151 |
+
" ],\n",
|
20152 |
+
" \"attention_dropout\": 0.0,\n",
|
20153 |
+
" \"bos_token_id\": 151643,\n",
|
20154 |
+
" \"eos_token_id\": 151645,\n",
|
20155 |
+
" \"hidden_act\": \"silu\",\n",
|
20156 |
+
" \"hidden_size\": 3584,\n",
|
20157 |
+
" \"initializer_range\": 0.02,\n",
|
20158 |
+
" \"intermediate_size\": 18944,\n",
|
20159 |
+
" \"max_position_embeddings\": 32768,\n",
|
20160 |
+
" \"max_window_layers\": 28,\n",
|
20161 |
+
" \"model_type\": \"qwen2\",\n",
|
20162 |
+
" \"num_attention_heads\": 28,\n",
|
20163 |
+
" \"num_hidden_layers\": 28,\n",
|
20164 |
+
" \"num_key_value_heads\": 4,\n",
|
20165 |
+
" \"rms_norm_eps\": 1e-06,\n",
|
20166 |
+
" \"rope_theta\": 1000000.0,\n",
|
20167 |
+
" \"sliding_window\": null,\n",
|
20168 |
+
" \"tie_word_embeddings\": false,\n",
|
20169 |
+
" \"torch_dtype\": \"bfloat16\",\n",
|
20170 |
+
" \"transformers_version\": \"4.43.3\",\n",
|
20171 |
+
" \"use_cache\": true,\n",
|
20172 |
+
" \"use_sliding_window\": false,\n",
|
20173 |
+
" \"vocab_size\": 152064\n",
|
20174 |
+
"}\n",
|
20175 |
+
"\n",
|
20176 |
+
"09/21/2024 23:55:55 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
|
20177 |
+
"model.safetensors.index.json: 100%|ββββββββ| 27.8k/27.8k [00:00<00:00, 24.5MB/s]\n",
|
20178 |
+
"[INFO|modeling_utils.py:3634] 2024-09-21 23:55:56,890 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/model.safetensors.index.json\n",
|
20179 |
+
"Downloading shards: 0%| | 0/4 [00:00<?, ?it/s]\n",
|
20180 |
+
"model-00001-of-00004.safetensors: 0%| | 0.00/3.95G [00:00<?, ?B/s]\u001b[A\n",
|
20181 |
+
"model-00001-of-00004.safetensors: 0%| | 10.5M/3.95G [00:02<13:57, 4.70MB/s]\u001b[A\n",
|
20182 |
+
"model-00001-of-00004.safetensors: 1%| | 21.0M/3.95G [00:03<09:51, 6.63MB/s]\u001b[A\n",
|
20183 |
+
"model-00001-of-00004.safetensors: 1%| | 31.5M/3.95G [00:04<07:42, 8.47MB/s]\u001b[A\n",
|
20184 |
+
"model-00001-of-00004.safetensors: 1%| | 41.9M/3.95G [00:05<06:45, 9.63MB/s]\u001b[A\n",
|
20185 |
+
"model-00001-of-00004.safetensors: 1%| | 52.4M/3.95G [00:06<06:45, 9.59MB/s]\u001b[A\n",
|
20186 |
+
"model-00001-of-00004.safetensors: 2%| | 62.9M/3.95G [00:07<06:38, 9.73MB/s]\u001b[A\n",
|
20187 |
+
"model-00001-of-00004.safetensors: 2%| | 73.4M/3.95G [00:08<06:20, 10.2MB/s]\u001b[A\n",
|
20188 |
+
"model-00001-of-00004.safetensors: 2%| | 83.9M/3.95G [00:08<05:58, 10.8MB/s]\u001b[A\n",
|
20189 |
+
"model-00001-of-00004.safetensors: 2%| | 94.4M/3.95G [00:09<05:52, 10.9MB/s]\u001b[A\n",
|
20190 |
+
"model-00001-of-00004.safetensors: 3%|β | 105M/3.95G [00:10<05:43, 11.2MB/s]\u001b[A\n",
|
20191 |
+
"model-00001-of-00004.safetensors: 3%|β | 115M/3.95G [00:11<05:32, 11.5MB/s]\u001b[A\n",
|
20192 |
+
"model-00001-of-00004.safetensors: 3%|β | 126M/3.95G [00:12<05:49, 10.9MB/s]\u001b[A\n",
|
20193 |
+
"model-00001-of-00004.safetensors: 3%|β | 136M/3.95G [00:13<06:04, 10.4MB/s]\u001b[A\n",
|
20194 |
+
"model-00001-of-00004.safetensors: 4%|β | 147M/3.95G [00:14<06:14, 10.2MB/s]\u001b[A\n",
|
20195 |
+
"model-00001-of-00004.safetensors: 4%|β | 157M/3.95G [00:16<06:31, 9.67MB/s]\u001b[A\n",
|
20196 |
+
"model-00001-of-00004.safetensors: 4%|β | 168M/3.95G [00:17<06:42, 9.38MB/s]\u001b[A\n",
|
20197 |
+
"model-00001-of-00004.safetensors: 5%|β | 178M/3.95G [00:18<06:45, 9.29MB/s]\u001b[A\n",
|
20198 |
+
"model-00001-of-00004.safetensors: 5%|β | 189M/3.95G [00:19<06:18, 9.92MB/s]\u001b[A\n",
|
20199 |
+
"model-00001-of-00004.safetensors: 5%|β | 199M/3.95G [00:20<05:50, 10.7MB/s]\u001b[A\n",
|
20200 |
+
"model-00001-of-00004.safetensors: 5%|β | 210M/3.95G [00:20<05:32, 11.2MB/s]\u001b[A\n",
|
20201 |
+
"model-00001-of-00004.safetensors: 6%|β | 220M/3.95G [00:21<05:19, 11.7MB/s]\u001b[A\n",
|
20202 |
+
"model-00001-of-00004.safetensors: 6%|β | 231M/3.95G [00:22<05:19, 11.6MB/s]\u001b[A\n",
|
20203 |
+
"model-00001-of-00004.safetensors: 6%|β | 241M/3.95G [00:23<05:12, 11.8MB/s]\u001b[A\n",
|
20204 |
+
"model-00001-of-00004.safetensors: 6%|β | 252M/3.95G [00:24<05:07, 12.0MB/s]\u001b[A\n",
|
20205 |
+
"model-00001-of-00004.safetensors: 7%|β | 262M/3.95G [00:25<05:24, 11.3MB/s]\u001b[A\n",
|
20206 |
+
"model-00001-of-00004.safetensors: 7%|β | 273M/3.95G [00:26<05:18, 11.5MB/s]\u001b[A\n",
|
20207 |
+
"model-00001-of-00004.safetensors: 7%|β | 283M/3.95G [00:27<05:42, 10.7MB/s]\u001b[A\n",
|
20208 |
+
"model-00001-of-00004.safetensors: 7%|β | 294M/3.95G [00:28<05:39, 10.8MB/s]\u001b[A\n",
|
20209 |
+
"model-00001-of-00004.safetensors: 8%|β | 304M/3.95G [00:29<05:33, 10.9MB/s]\u001b[A^C\n",
|
20210 |
+
"Downloading shards: 0%| | 0/4 [02:41<?, ?it/s]\n",
|
20211 |
+
"Traceback (most recent call last):\n",
|
20212 |
+
" File \"/home/inflaton/code/logical-reasoning/llm_toolkit/eval_shots.py\", line 64, in <module>\n",
|
20213 |
+
" model, tokenizer = load_model(\n",
|
20214 |
+
" ^^^^^^^^^^^\n",
|
20215 |
+
" File \"/home/inflaton/code/logical-reasoning/llm_toolkit/llm_utils.py\", line 52, in load_model\n",
|
20216 |
+
" chat_model = ChatModel(args)\n",
|
20217 |
+
" ^^^^^^^^^^^^^^^\n",
|
20218 |
+
" File \"/home/inflaton/code/LLaMA-Factory/src/llamafactory/chat/chat_model.py\", line 52, in __init__\n",
|
20219 |
+
" self.engine: \"BaseEngine\" = HuggingfaceEngine(model_args, data_args, finetuning_args, generating_args)\n",
|
20220 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
20221 |
+
" File \"/home/inflaton/code/LLaMA-Factory/src/llamafactory/chat/hf_engine.py\", line 59, in __init__\n",
|
20222 |
+
" self.model = load_model(\n",
|
20223 |
+
" ^^^^^^^^^^^\n",
|
20224 |
+
" File \"/home/inflaton/code/LLaMA-Factory/src/llamafactory/model/loader.py\", line 162, in load_model\n",
|
20225 |
+
" model = load_class.from_pretrained(**init_kwargs)\n",
|
20226 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
20227 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py\", line 564, in from_pretrained\n",
|
20228 |
+
" return model_class.from_pretrained(\n",
|
20229 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
20230 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/modeling_utils.py\", line 3671, in from_pretrained\n",
|
20231 |
+
" resolved_archive_file, sharded_metadata = get_checkpoint_shard_files(\n",
|
20232 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
20233 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/utils/hub.py\", line 1079, in get_checkpoint_shard_files\n",
|
20234 |
+
" cached_filename = cached_file(\n",
|
20235 |
+
" ^^^^^^^^^^^^\n",
|
20236 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/utils/hub.py\", line 402, in cached_file\n",
|
20237 |
+
" resolved_file = hf_hub_download(\n",
|
20238 |
+
" ^^^^^^^^^^^^^^^^\n",
|
20239 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py\", line 114, in _inner_fn\n",
|
20240 |
+
" return fn(*args, **kwargs)\n",
|
20241 |
+
" ^^^^^^^^^^^^^^^^^^^\n",
|
20242 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/file_download.py\", line 1221, in hf_hub_download\n",
|
20243 |
+
" return _hf_hub_download_to_cache_dir(\n",
|
20244 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
20245 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/file_download.py\", line 1367, in _hf_hub_download_to_cache_dir\n",
|
20246 |
+
" _download_to_tmp_and_move(\n",
|
20247 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/file_download.py\", line 1884, in _download_to_tmp_and_move\n",
|
20248 |
+
" http_get(\n",
|
20249 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/file_download.py\", line 539, in http_get\n",
|
20250 |
+
" for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):\n",
|
20251 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/requests/models.py\", line 820, in generate\n",
|
20252 |
+
" yield from self.raw.stream(chunk_size, decode_content=True)\n",
|
20253 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/urllib3/response.py\", line 1060, in stream\n",
|
20254 |
+
" data = self.read(amt=amt, decode_content=decode_content)\n",
|
20255 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
20256 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/urllib3/response.py\", line 949, in read\n",
|
20257 |
+
" data = self._raw_read(amt)\n",
|
20258 |
+
" ^^^^^^^^^^^^^^^^^^^\n",
|
20259 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/urllib3/response.py\", line 873, in _raw_read\n",
|
20260 |
+
" data = self._fp_read(amt, read1=read1) if not fp_closed else b\"\"\n",
|
20261 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
20262 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/urllib3/response.py\", line 856, in _fp_read\n",
|
20263 |
+
" return self._fp.read(amt) if amt is not None else self._fp.read()\n",
|
20264 |
+
" ^^^^^^^^^^^^^^^^^^\n",
|
20265 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/http/client.py\", line 473, in read\n",
|
20266 |
+
" s = self.fp.read(amt)\n",
|
20267 |
+
" ^^^^^^^^^^^^^^^^^\n",
|
20268 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/socket.py\", line 706, in readinto\n",
|
20269 |
+
" return self._sock.recv_into(b)\n",
|
20270 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^\n",
|
20271 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/ssl.py\", line 1314, in recv_into\n",
|
20272 |
+
" return self.read(nbytes, buffer)\n",
|
20273 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
20274 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/ssl.py\", line 1166, in read\n",
|
20275 |
+
" return self._sslobj.read(len, buffer)\n",
|
20276 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
20277 |
+
"KeyboardInterrupt\n",
|
20278 |
+
"model-00001-of-00004.safetensors: 8%|β | 304M/3.95G [00:29<05:57, 10.2MB/s]\n",
|
20279 |
+
"CPU times: user 901 ms, sys: 326 ms, total: 1.23 s\n",
|
20280 |
+
"Wall time: 2min 55s\n"
|
20281 |
+
]
|
20282 |
+
}
|
20283 |
+
],
|
20284 |
"source": [
|
20285 |
"%%time\n",
|
20286 |
"\n",
|
20287 |
+
"!./scripts/eval-mgtv-qwen2.5_3b.sh"
|
20288 |
]
|
20289 |
}
|
20290 |
],
|
scripts/eval-mgtv-qwen2.5_3b.sh
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
BASEDIR=$(dirname "$0")
|
4 |
+
cd $BASEDIR/..
|
5 |
+
echo Current Directory:
|
6 |
+
pwd
|
7 |
+
|
8 |
+
BASEDIR=`pwd`
|
9 |
+
|
10 |
+
nvidia-smi
|
11 |
+
uname -a
|
12 |
+
cat /etc/os-release
|
13 |
+
lscpu
|
14 |
+
grep MemTotal /proc/meminfo
|
15 |
+
|
16 |
+
# $BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-3B-Instruct
|
17 |
+
|
18 |
+
$BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-1.5B-Instruct
|
19 |
+
|
20 |
+
$BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-0.5B-Instruct
|
21 |
+
|
22 |
+
$BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-3B-Instruct
|
23 |
+
|
24 |
+
$BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-1.5B-Instruct
|
25 |
+
|
26 |
+
$BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-0.5B-Instruct
|
27 |
+
|