dh-mc commited on
Commit
ceb9311
Β·
1 Parent(s): e06d133

qwen2.5-1.5b

Browse files
data/Qwen2.5-0.5B-Instruct_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/Qwen2.5-1.5B-Instruct_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/07_Qwen2.5_models.ipynb CHANGED
@@ -19954,13 +19954,337 @@
19954
  },
19955
  {
19956
  "cell_type": "code",
19957
- "execution_count": null,
19958
  "metadata": {},
19959
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19960
  "source": [
19961
  "%%time\n",
19962
  "\n",
19963
- "!./scripts/eval-mgtv-qwen2.5.sh"
19964
  ]
19965
  }
19966
  ],
 
19954
  },
19955
  {
19956
  "cell_type": "code",
19957
+ "execution_count": 8,
19958
  "metadata": {},
19959
+ "outputs": [
19960
+ {
19961
+ "name": "stdout",
19962
+ "output_type": "stream",
19963
+ "text": [
19964
+ "Current Directory:\n",
19965
+ "/home/inflaton/code/logical-reasoning\n",
19966
+ "Sat Sep 21 23:55:43 2024 \n",
19967
+ "+-----------------------------------------------------------------------------------------+\n",
19968
+ "| NVIDIA-SMI 560.35.02 Driver Version: 560.94 CUDA Version: 12.6 |\n",
19969
+ "|-----------------------------------------+------------------------+----------------------+\n",
19970
+ "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
19971
+ "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
19972
+ "| | | MIG M. |\n",
19973
+ "|=========================================+========================+======================|\n",
19974
+ "| 0 NVIDIA GeForce RTX 4090 On | 00000000:01:00.0 Off | Off |\n",
19975
+ "| 54% 59C P3 41W / 450W | 471MiB / 24564MiB | 2% Default |\n",
19976
+ "| | | N/A |\n",
19977
+ "+-----------------------------------------+------------------------+----------------------+\n",
19978
+ " \n",
19979
+ "+-----------------------------------------------------------------------------------------+\n",
19980
+ "| Processes: |\n",
19981
+ "| GPU GI CI PID Type Process name GPU Memory |\n",
19982
+ "| ID ID Usage |\n",
19983
+ "|=========================================================================================|\n",
19984
+ "| 0 N/A N/A 25 G /Xwayland N/A |\n",
19985
+ "+-----------------------------------------------------------------------------------------+\n",
19986
+ "Linux Gen-AI 5.15.133.1-microsoft-standard-WSL2 #1 SMP Thu Oct 5 21:02:42 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux\n",
19987
+ "PRETTY_NAME=\"Ubuntu 22.04.2 LTS\"\n",
19988
+ "NAME=\"Ubuntu\"\n",
19989
+ "VERSION_ID=\"22.04\"\n",
19990
+ "VERSION=\"22.04.2 LTS (Jammy Jellyfish)\"\n",
19991
+ "VERSION_CODENAME=jammy\n",
19992
+ "ID=ubuntu\n",
19993
+ "ID_LIKE=debian\n",
19994
+ "HOME_URL=\"https://www.ubuntu.com/\"\n",
19995
+ "SUPPORT_URL=\"https://help.ubuntu.com/\"\n",
19996
+ "BUG_REPORT_URL=\"https://bugs.launchpad.net/ubuntu/\"\n",
19997
+ "PRIVACY_POLICY_URL=\"https://www.ubuntu.com/legal/terms-and-policies/privacy-policy\"\n",
19998
+ "UBUNTU_CODENAME=jammy\n",
19999
+ "Architecture: x86_64\n",
20000
+ " CPU op-mode(s): 32-bit, 64-bit\n",
20001
+ " Address sizes: 39 bits physical, 48 bits virtual\n",
20002
+ " Byte Order: Little Endian\n",
20003
+ "CPU(s): 32\n",
20004
+ " On-line CPU(s) list: 0-31\n",
20005
+ "Vendor ID: GenuineIntel\n",
20006
+ " Model name: 13th Gen Intel(R) Core(TM) i9-13900KF\n",
20007
+ " CPU family: 6\n",
20008
+ " Model: 183\n",
20009
+ " Thread(s) per core: 2\n",
20010
+ " Core(s) per socket: 16\n",
20011
+ " Socket(s): 1\n",
20012
+ " Stepping: 1\n",
20013
+ " BogoMIPS: 5990.39\n",
20014
+ " Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mc\n",
20015
+ " a cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscal\n",
20016
+ " l nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopo\n",
20017
+ " logy tsc_reliable nonstop_tsc cpuid pni pclmulqdq vmx s\n",
20018
+ " sse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt tsc_dea\n",
20019
+ " dline_timer aes xsave avx f16c rdrand hypervisor lahf_l\n",
20020
+ " m abm 3dnowprefetch ssbd ibrs ibpb stibp ibrs_enhanced \n",
20021
+ " tpr_shadow vnmi ept vpid ept_ad fsgsbase tsc_adjust bmi\n",
20022
+ " 1 avx2 smep bmi2 erms invpcid rdseed adx smap clflushop\n",
20023
+ " t clwb sha_ni xsaveopt xsavec xgetbv1 xsaves avx_vnni u\n",
20024
+ " mip waitpkg gfni vaes vpclmulqdq rdpid movdiri movdir64\n",
20025
+ " b fsrm md_clear serialize flush_l1d arch_capabilities\n",
20026
+ "Virtualization features: \n",
20027
+ " Virtualization: VT-x\n",
20028
+ " Hypervisor vendor: Microsoft\n",
20029
+ " Virtualization type: full\n",
20030
+ "Caches (sum of all): \n",
20031
+ " L1d: 768 KiB (16 instances)\n",
20032
+ " L1i: 512 KiB (16 instances)\n",
20033
+ " L2: 32 MiB (16 instances)\n",
20034
+ " L3: 36 MiB (1 instance)\n",
20035
+ "Vulnerabilities: \n",
20036
+ " Gather data sampling: Not affected\n",
20037
+ " Itlb multihit: Not affected\n",
20038
+ " L1tf: Not affected\n",
20039
+ " Mds: Not affected\n",
20040
+ " Meltdown: Not affected\n",
20041
+ " Mmio stale data: Not affected\n",
20042
+ " Retbleed: Mitigation; Enhanced IBRS\n",
20043
+ " Spec rstack overflow: Not affected\n",
20044
+ " Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl\n",
20045
+ " and seccomp\n",
20046
+ " Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer\n",
20047
+ " sanitization\n",
20048
+ " Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB fillin\n",
20049
+ " g, PBRSB-eIBRS SW sequence\n",
20050
+ " Srbds: Not affected\n",
20051
+ " Tsx async abort: Not affected\n",
20052
+ "MemTotal: 49330024 kB\n",
20053
+ "Current Directory:\n",
20054
+ "/home/inflaton/code/logical-reasoning\n",
20055
+ "Evaluating Qwen/Qwen2.5-7B-Instruct with few-shot learning\n",
20056
+ "loading env vars from: /home/inflaton/code/logical-reasoning/.env\n",
20057
+ "Adding /home/inflaton/code/logical-reasoning to sys.path\n",
20058
+ "loading /home/inflaton/code/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n",
20059
+ "CUDA is available, we have found 1 GPU(s)\n",
20060
+ "NVIDIA GeForce RTX 4090\n",
20061
+ "CUDA version: 12.1\n",
20062
+ "Qwen/Qwen2.5-7B-Instruct None False datasets/mgtv data/Qwen2.5-7B-Instruct_results.csv 2048 1\n",
20063
+ "(0) GPU = NVIDIA GeForce RTX 4090. Max memory = 23.988 GB.\n",
20064
+ "0.0 GB of memory reserved.\n",
20065
+ "loading model: Qwen/Qwen2.5-7B-Instruct with adapter: None\n",
20066
+ "config.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 663/663 [00:00<00:00, 10.9MB/s]\n",
20067
+ "[INFO|configuration_utils.py:733] 2024-09-21 23:55:48,860 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/config.json\n",
20068
+ "[INFO|configuration_utils.py:800] 2024-09-21 23:55:48,861 >> Model config Qwen2Config {\n",
20069
+ " \"_name_or_path\": \"Qwen/Qwen2.5-7B-Instruct\",\n",
20070
+ " \"architectures\": [\n",
20071
+ " \"Qwen2ForCausalLM\"\n",
20072
+ " ],\n",
20073
+ " \"attention_dropout\": 0.0,\n",
20074
+ " \"bos_token_id\": 151643,\n",
20075
+ " \"eos_token_id\": 151645,\n",
20076
+ " \"hidden_act\": \"silu\",\n",
20077
+ " \"hidden_size\": 3584,\n",
20078
+ " \"initializer_range\": 0.02,\n",
20079
+ " \"intermediate_size\": 18944,\n",
20080
+ " \"max_position_embeddings\": 32768,\n",
20081
+ " \"max_window_layers\": 28,\n",
20082
+ " \"model_type\": \"qwen2\",\n",
20083
+ " \"num_attention_heads\": 28,\n",
20084
+ " \"num_hidden_layers\": 28,\n",
20085
+ " \"num_key_value_heads\": 4,\n",
20086
+ " \"rms_norm_eps\": 1e-06,\n",
20087
+ " \"rope_theta\": 1000000.0,\n",
20088
+ " \"sliding_window\": null,\n",
20089
+ " \"tie_word_embeddings\": false,\n",
20090
+ " \"torch_dtype\": \"bfloat16\",\n",
20091
+ " \"transformers_version\": \"4.43.3\",\n",
20092
+ " \"use_cache\": true,\n",
20093
+ " \"use_sliding_window\": false,\n",
20094
+ " \"vocab_size\": 152064\n",
20095
+ "}\n",
20096
+ "\n",
20097
+ "tokenizer_config.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 7.30k/7.30k [00:00<00:00, 53.0MB/s]\n",
20098
+ "vocab.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 2.78M/2.78M [00:01<00:00, 1.76MB/s]\n",
20099
+ "merges.txt: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.67M/1.67M [00:00<00:00, 23.9MB/s]\n",
20100
+ "tokenizer.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 7.03M/7.03M [00:00<00:00, 10.8MB/s]\n",
20101
+ "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,949 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/vocab.json\n",
20102
+ "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/merges.txt\n",
20103
+ "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/tokenizer.json\n",
20104
+ "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file added_tokens.json from cache at None\n",
20105
+ "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file special_tokens_map.json from cache at None\n",
20106
+ "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/tokenizer_config.json\n",
20107
+ "[INFO|tokenization_utils_base.py:2533] 2024-09-21 23:55:54,041 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
20108
+ "[INFO|configuration_utils.py:733] 2024-09-21 23:55:55,157 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/config.json\n",
20109
+ "[INFO|configuration_utils.py:800] 2024-09-21 23:55:55,157 >> Model config Qwen2Config {\n",
20110
+ " \"_name_or_path\": \"Qwen/Qwen2.5-7B-Instruct\",\n",
20111
+ " \"architectures\": [\n",
20112
+ " \"Qwen2ForCausalLM\"\n",
20113
+ " ],\n",
20114
+ " \"attention_dropout\": 0.0,\n",
20115
+ " \"bos_token_id\": 151643,\n",
20116
+ " \"eos_token_id\": 151645,\n",
20117
+ " \"hidden_act\": \"silu\",\n",
20118
+ " \"hidden_size\": 3584,\n",
20119
+ " \"initializer_range\": 0.02,\n",
20120
+ " \"intermediate_size\": 18944,\n",
20121
+ " \"max_position_embeddings\": 32768,\n",
20122
+ " \"max_window_layers\": 28,\n",
20123
+ " \"model_type\": \"qwen2\",\n",
20124
+ " \"num_attention_heads\": 28,\n",
20125
+ " \"num_hidden_layers\": 28,\n",
20126
+ " \"num_key_value_heads\": 4,\n",
20127
+ " \"rms_norm_eps\": 1e-06,\n",
20128
+ " \"rope_theta\": 1000000.0,\n",
20129
+ " \"sliding_window\": null,\n",
20130
+ " \"tie_word_embeddings\": false,\n",
20131
+ " \"torch_dtype\": \"bfloat16\",\n",
20132
+ " \"transformers_version\": \"4.43.3\",\n",
20133
+ " \"use_cache\": true,\n",
20134
+ " \"use_sliding_window\": false,\n",
20135
+ " \"vocab_size\": 152064\n",
20136
+ "}\n",
20137
+ "\n",
20138
+ "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,421 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/vocab.json\n",
20139
+ "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,421 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/merges.txt\n",
20140
+ "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,421 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/tokenizer.json\n",
20141
+ "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,422 >> loading file added_tokens.json from cache at None\n",
20142
+ "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,422 >> loading file special_tokens_map.json from cache at None\n",
20143
+ "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,422 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/tokenizer_config.json\n",
20144
+ "[INFO|tokenization_utils_base.py:2533] 2024-09-21 23:55:55,509 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
20145
+ "09/21/2024 23:55:55 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
20146
+ "[INFO|configuration_utils.py:733] 2024-09-21 23:55:55,814 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/config.json\n",
20147
+ "[INFO|configuration_utils.py:800] 2024-09-21 23:55:55,814 >> Model config Qwen2Config {\n",
20148
+ " \"_name_or_path\": \"Qwen/Qwen2.5-7B-Instruct\",\n",
20149
+ " \"architectures\": [\n",
20150
+ " \"Qwen2ForCausalLM\"\n",
20151
+ " ],\n",
20152
+ " \"attention_dropout\": 0.0,\n",
20153
+ " \"bos_token_id\": 151643,\n",
20154
+ " \"eos_token_id\": 151645,\n",
20155
+ " \"hidden_act\": \"silu\",\n",
20156
+ " \"hidden_size\": 3584,\n",
20157
+ " \"initializer_range\": 0.02,\n",
20158
+ " \"intermediate_size\": 18944,\n",
20159
+ " \"max_position_embeddings\": 32768,\n",
20160
+ " \"max_window_layers\": 28,\n",
20161
+ " \"model_type\": \"qwen2\",\n",
20162
+ " \"num_attention_heads\": 28,\n",
20163
+ " \"num_hidden_layers\": 28,\n",
20164
+ " \"num_key_value_heads\": 4,\n",
20165
+ " \"rms_norm_eps\": 1e-06,\n",
20166
+ " \"rope_theta\": 1000000.0,\n",
20167
+ " \"sliding_window\": null,\n",
20168
+ " \"tie_word_embeddings\": false,\n",
20169
+ " \"torch_dtype\": \"bfloat16\",\n",
20170
+ " \"transformers_version\": \"4.43.3\",\n",
20171
+ " \"use_cache\": true,\n",
20172
+ " \"use_sliding_window\": false,\n",
20173
+ " \"vocab_size\": 152064\n",
20174
+ "}\n",
20175
+ "\n",
20176
+ "09/21/2024 23:55:55 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
20177
+ "model.safetensors.index.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 27.8k/27.8k [00:00<00:00, 24.5MB/s]\n",
20178
+ "[INFO|modeling_utils.py:3634] 2024-09-21 23:55:56,890 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/model.safetensors.index.json\n",
20179
+ "Downloading shards: 0%| | 0/4 [00:00<?, ?it/s]\n",
20180
+ "model-00001-of-00004.safetensors: 0%| | 0.00/3.95G [00:00<?, ?B/s]\u001b[A\n",
20181
+ "model-00001-of-00004.safetensors: 0%| | 10.5M/3.95G [00:02<13:57, 4.70MB/s]\u001b[A\n",
20182
+ "model-00001-of-00004.safetensors: 1%| | 21.0M/3.95G [00:03<09:51, 6.63MB/s]\u001b[A\n",
20183
+ "model-00001-of-00004.safetensors: 1%| | 31.5M/3.95G [00:04<07:42, 8.47MB/s]\u001b[A\n",
20184
+ "model-00001-of-00004.safetensors: 1%| | 41.9M/3.95G [00:05<06:45, 9.63MB/s]\u001b[A\n",
20185
+ "model-00001-of-00004.safetensors: 1%| | 52.4M/3.95G [00:06<06:45, 9.59MB/s]\u001b[A\n",
20186
+ "model-00001-of-00004.safetensors: 2%| | 62.9M/3.95G [00:07<06:38, 9.73MB/s]\u001b[A\n",
20187
+ "model-00001-of-00004.safetensors: 2%| | 73.4M/3.95G [00:08<06:20, 10.2MB/s]\u001b[A\n",
20188
+ "model-00001-of-00004.safetensors: 2%| | 83.9M/3.95G [00:08<05:58, 10.8MB/s]\u001b[A\n",
20189
+ "model-00001-of-00004.safetensors: 2%| | 94.4M/3.95G [00:09<05:52, 10.9MB/s]\u001b[A\n",
20190
+ "model-00001-of-00004.safetensors: 3%|▏ | 105M/3.95G [00:10<05:43, 11.2MB/s]\u001b[A\n",
20191
+ "model-00001-of-00004.safetensors: 3%|▏ | 115M/3.95G [00:11<05:32, 11.5MB/s]\u001b[A\n",
20192
+ "model-00001-of-00004.safetensors: 3%|▏ | 126M/3.95G [00:12<05:49, 10.9MB/s]\u001b[A\n",
20193
+ "model-00001-of-00004.safetensors: 3%|▏ | 136M/3.95G [00:13<06:04, 10.4MB/s]\u001b[A\n",
20194
+ "model-00001-of-00004.safetensors: 4%|▏ | 147M/3.95G [00:14<06:14, 10.2MB/s]\u001b[A\n",
20195
+ "model-00001-of-00004.safetensors: 4%|▏ | 157M/3.95G [00:16<06:31, 9.67MB/s]\u001b[A\n",
20196
+ "model-00001-of-00004.safetensors: 4%|▏ | 168M/3.95G [00:17<06:42, 9.38MB/s]\u001b[A\n",
20197
+ "model-00001-of-00004.safetensors: 5%|▏ | 178M/3.95G [00:18<06:45, 9.29MB/s]\u001b[A\n",
20198
+ "model-00001-of-00004.safetensors: 5%|▏ | 189M/3.95G [00:19<06:18, 9.92MB/s]\u001b[A\n",
20199
+ "model-00001-of-00004.safetensors: 5%|β–Ž | 199M/3.95G [00:20<05:50, 10.7MB/s]\u001b[A\n",
20200
+ "model-00001-of-00004.safetensors: 5%|β–Ž | 210M/3.95G [00:20<05:32, 11.2MB/s]\u001b[A\n",
20201
+ "model-00001-of-00004.safetensors: 6%|β–Ž | 220M/3.95G [00:21<05:19, 11.7MB/s]\u001b[A\n",
20202
+ "model-00001-of-00004.safetensors: 6%|β–Ž | 231M/3.95G [00:22<05:19, 11.6MB/s]\u001b[A\n",
20203
+ "model-00001-of-00004.safetensors: 6%|β–Ž | 241M/3.95G [00:23<05:12, 11.8MB/s]\u001b[A\n",
20204
+ "model-00001-of-00004.safetensors: 6%|β–Ž | 252M/3.95G [00:24<05:07, 12.0MB/s]\u001b[A\n",
20205
+ "model-00001-of-00004.safetensors: 7%|β–Ž | 262M/3.95G [00:25<05:24, 11.3MB/s]\u001b[A\n",
20206
+ "model-00001-of-00004.safetensors: 7%|β–Ž | 273M/3.95G [00:26<05:18, 11.5MB/s]\u001b[A\n",
20207
+ "model-00001-of-00004.safetensors: 7%|β–Ž | 283M/3.95G [00:27<05:42, 10.7MB/s]\u001b[A\n",
20208
+ "model-00001-of-00004.safetensors: 7%|β–Ž | 294M/3.95G [00:28<05:39, 10.8MB/s]\u001b[A\n",
20209
+ "model-00001-of-00004.safetensors: 8%|▍ | 304M/3.95G [00:29<05:33, 10.9MB/s]\u001b[A^C\n",
20210
+ "Downloading shards: 0%| | 0/4 [02:41<?, ?it/s]\n",
20211
+ "Traceback (most recent call last):\n",
20212
+ " File \"/home/inflaton/code/logical-reasoning/llm_toolkit/eval_shots.py\", line 64, in <module>\n",
20213
+ " model, tokenizer = load_model(\n",
20214
+ " ^^^^^^^^^^^\n",
20215
+ " File \"/home/inflaton/code/logical-reasoning/llm_toolkit/llm_utils.py\", line 52, in load_model\n",
20216
+ " chat_model = ChatModel(args)\n",
20217
+ " ^^^^^^^^^^^^^^^\n",
20218
+ " File \"/home/inflaton/code/LLaMA-Factory/src/llamafactory/chat/chat_model.py\", line 52, in __init__\n",
20219
+ " self.engine: \"BaseEngine\" = HuggingfaceEngine(model_args, data_args, finetuning_args, generating_args)\n",
20220
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
20221
+ " File \"/home/inflaton/code/LLaMA-Factory/src/llamafactory/chat/hf_engine.py\", line 59, in __init__\n",
20222
+ " self.model = load_model(\n",
20223
+ " ^^^^^^^^^^^\n",
20224
+ " File \"/home/inflaton/code/LLaMA-Factory/src/llamafactory/model/loader.py\", line 162, in load_model\n",
20225
+ " model = load_class.from_pretrained(**init_kwargs)\n",
20226
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
20227
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py\", line 564, in from_pretrained\n",
20228
+ " return model_class.from_pretrained(\n",
20229
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
20230
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/modeling_utils.py\", line 3671, in from_pretrained\n",
20231
+ " resolved_archive_file, sharded_metadata = get_checkpoint_shard_files(\n",
20232
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
20233
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/utils/hub.py\", line 1079, in get_checkpoint_shard_files\n",
20234
+ " cached_filename = cached_file(\n",
20235
+ " ^^^^^^^^^^^^\n",
20236
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/utils/hub.py\", line 402, in cached_file\n",
20237
+ " resolved_file = hf_hub_download(\n",
20238
+ " ^^^^^^^^^^^^^^^^\n",
20239
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py\", line 114, in _inner_fn\n",
20240
+ " return fn(*args, **kwargs)\n",
20241
+ " ^^^^^^^^^^^^^^^^^^^\n",
20242
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/file_download.py\", line 1221, in hf_hub_download\n",
20243
+ " return _hf_hub_download_to_cache_dir(\n",
20244
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
20245
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/file_download.py\", line 1367, in _hf_hub_download_to_cache_dir\n",
20246
+ " _download_to_tmp_and_move(\n",
20247
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/file_download.py\", line 1884, in _download_to_tmp_and_move\n",
20248
+ " http_get(\n",
20249
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/file_download.py\", line 539, in http_get\n",
20250
+ " for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):\n",
20251
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/requests/models.py\", line 820, in generate\n",
20252
+ " yield from self.raw.stream(chunk_size, decode_content=True)\n",
20253
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/urllib3/response.py\", line 1060, in stream\n",
20254
+ " data = self.read(amt=amt, decode_content=decode_content)\n",
20255
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
20256
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/urllib3/response.py\", line 949, in read\n",
20257
+ " data = self._raw_read(amt)\n",
20258
+ " ^^^^^^^^^^^^^^^^^^^\n",
20259
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/urllib3/response.py\", line 873, in _raw_read\n",
20260
+ " data = self._fp_read(amt, read1=read1) if not fp_closed else b\"\"\n",
20261
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
20262
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/urllib3/response.py\", line 856, in _fp_read\n",
20263
+ " return self._fp.read(amt) if amt is not None else self._fp.read()\n",
20264
+ " ^^^^^^^^^^^^^^^^^^\n",
20265
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/http/client.py\", line 473, in read\n",
20266
+ " s = self.fp.read(amt)\n",
20267
+ " ^^^^^^^^^^^^^^^^^\n",
20268
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/socket.py\", line 706, in readinto\n",
20269
+ " return self._sock.recv_into(b)\n",
20270
+ " ^^^^^^^^^^^^^^^^^^^^^^^\n",
20271
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/ssl.py\", line 1314, in recv_into\n",
20272
+ " return self.read(nbytes, buffer)\n",
20273
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
20274
+ " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/ssl.py\", line 1166, in read\n",
20275
+ " return self._sslobj.read(len, buffer)\n",
20276
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
20277
+ "KeyboardInterrupt\n",
20278
+ "model-00001-of-00004.safetensors: 8%|▍ | 304M/3.95G [00:29<05:57, 10.2MB/s]\n",
20279
+ "CPU times: user 901 ms, sys: 326 ms, total: 1.23 s\n",
20280
+ "Wall time: 2min 55s\n"
20281
+ ]
20282
+ }
20283
+ ],
20284
  "source": [
20285
  "%%time\n",
20286
  "\n",
20287
+ "!./scripts/eval-mgtv-qwen2.5_3b.sh"
20288
  ]
20289
  }
20290
  ],
scripts/eval-mgtv-qwen2.5_3b.sh ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ BASEDIR=$(dirname "$0")
4
+ cd $BASEDIR/..
5
+ echo Current Directory:
6
+ pwd
7
+
8
+ BASEDIR=`pwd`
9
+
10
+ nvidia-smi
11
+ uname -a
12
+ cat /etc/os-release
13
+ lscpu
14
+ grep MemTotal /proc/meminfo
15
+
16
+ # $BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-3B-Instruct
17
+
18
+ $BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-1.5B-Instruct
19
+
20
+ $BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-0.5B-Instruct
21
+
22
+ $BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-3B-Instruct
23
+
24
+ $BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-1.5B-Instruct
25
+
26
+ $BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-0.5B-Instruct
27
+