Spaces:

inflaton-ai
/

logical-reasoning

Build error

App Files Files Community

dh-mc commited on Sep 22, 2024

Commit

ceb9311

1 Parent(s): e06d133

qwen2.5-1.5b

Browse files

Files changed (4) hide show

data/Qwen2.5-0.5B-Instruct_results.csv +0 -0
data/Qwen2.5-1.5B-Instruct_results.csv +0 -0
notebooks/07_Qwen2.5_models.ipynb +327 -3
scripts/eval-mgtv-qwen2.5_3b.sh +27 -0

data/Qwen2.5-0.5B-Instruct_results.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/Qwen2.5-1.5B-Instruct_results.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/07_Qwen2.5_models.ipynb CHANGED Viewed

@@ -19954,13 +19954,337 @@
         },
         {
             "cell_type": "code",
-            "execution_count": null,
             "metadata": {},
-            "outputs": [],
             "source": [
                 "%%time\n",
                 "\n",
-                "!./scripts/eval-mgtv-qwen2.5.sh"
             ]
         }
     ],

         },
         {
             "cell_type": "code",
+            "execution_count": 8,
             "metadata": {},
+            "outputs": [
+                {
+                    "name": "stdout",
+                    "output_type": "stream",
+                    "text": [
+                        "Current Directory:\n",
+                        "/home/inflaton/code/logical-reasoning\n",
+                        "Sat Sep 21 23:55:43 2024       \n",
+                        "+-----------------------------------------------------------------------------------------+\n",
+                        "| NVIDIA-SMI 560.35.02              Driver Version: 560.94         CUDA Version: 12.6     |\n",
+                        "|-----------------------------------------+------------------------+----------------------+\n",
+                        "| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |\n",
+                        "| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |\n",
+                        "|                                         |                        |               MIG M. |\n",
+                        "|=========================================+========================+======================|\n",
+                        "|   0  NVIDIA GeForce RTX 4090        On  |   00000000:01:00.0 Off |                  Off |\n",
+                        "| 54%   59C    P3             41W /  450W |     471MiB /  24564MiB |      2%      Default |\n",
+                        "|                                         |                        |                  N/A |\n",
+                        "+-----------------------------------------+------------------------+----------------------+\n",
+                        "                                                                                         \n",
+                        "+-----------------------------------------------------------------------------------------+\n",
+                        "| Processes:                                                                              |\n",
+                        "|  GPU   GI   CI        PID   Type   Process name                              GPU Memory |\n",
+                        "|        ID   ID                                                               Usage      |\n",
+                        "|=========================================================================================|\n",
+                        "|    0   N/A  N/A        25      G   /Xwayland                                   N/A      |\n",
+                        "+-----------------------------------------------------------------------------------------+\n",
+                        "Linux Gen-AI 5.15.133.1-microsoft-standard-WSL2 #1 SMP Thu Oct 5 21:02:42 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux\n",
+                        "PRETTY_NAME=\"Ubuntu 22.04.2 LTS\"\n",
+                        "NAME=\"Ubuntu\"\n",
+                        "VERSION_ID=\"22.04\"\n",
+                        "VERSION=\"22.04.2 LTS (Jammy Jellyfish)\"\n",
+                        "VERSION_CODENAME=jammy\n",
+                        "ID=ubuntu\n",
+                        "ID_LIKE=debian\n",
+                        "HOME_URL=\"https://www.ubuntu.com/\"\n",
+                        "SUPPORT_URL=\"https://help.ubuntu.com/\"\n",
+                        "BUG_REPORT_URL=\"https://bugs.launchpad.net/ubuntu/\"\n",
+                        "PRIVACY_POLICY_URL=\"https://www.ubuntu.com/legal/terms-and-policies/privacy-policy\"\n",
+                        "UBUNTU_CODENAME=jammy\n",
+                        "Architecture:            x86_64\n",
+                        "  CPU op-mode(s):        32-bit, 64-bit\n",
+                        "  Address sizes:         39 bits physical, 48 bits virtual\n",
+                        "  Byte Order:            Little Endian\n",
+                        "CPU(s):                  32\n",
+                        "  On-line CPU(s) list:   0-31\n",
+                        "Vendor ID:               GenuineIntel\n",
+                        "  Model name:            13th Gen Intel(R) Core(TM) i9-13900KF\n",
+                        "    CPU family:          6\n",
+                        "    Model:               183\n",
+                        "    Thread(s) per core:  2\n",
+                        "    Core(s) per socket:  16\n",
+                        "    Socket(s):           1\n",
+                        "    Stepping:            1\n",
+                        "    BogoMIPS:            5990.39\n",
+                        "    Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mc\n",
+                        "                         a cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscal\n",
+                        "                         l nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopo\n",
+                        "                         logy tsc_reliable nonstop_tsc cpuid pni pclmulqdq vmx s\n",
+                        "                         sse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt tsc_dea\n",
+                        "                         dline_timer aes xsave avx f16c rdrand hypervisor lahf_l\n",
+                        "                         m abm 3dnowprefetch ssbd ibrs ibpb stibp ibrs_enhanced \n",
+                        "                         tpr_shadow vnmi ept vpid ept_ad fsgsbase tsc_adjust bmi\n",
+                        "                         1 avx2 smep bmi2 erms invpcid rdseed adx smap clflushop\n",
+                        "                         t clwb sha_ni xsaveopt xsavec xgetbv1 xsaves avx_vnni u\n",
+                        "                         mip waitpkg gfni vaes vpclmulqdq rdpid movdiri movdir64\n",
+                        "                         b fsrm md_clear serialize flush_l1d arch_capabilities\n",
+                        "Virtualization features: \n",
+                        "  Virtualization:        VT-x\n",
+                        "  Hypervisor vendor:     Microsoft\n",
+                        "  Virtualization type:   full\n",
+                        "Caches (sum of all):     \n",
+                        "  L1d:                   768 KiB (16 instances)\n",
+                        "  L1i:                   512 KiB (16 instances)\n",
+                        "  L2:                    32 MiB (16 instances)\n",
+                        "  L3:                    36 MiB (1 instance)\n",
+                        "Vulnerabilities:         \n",
+                        "  Gather data sampling:  Not affected\n",
+                        "  Itlb multihit:         Not affected\n",
+                        "  L1tf:                  Not affected\n",
+                        "  Mds:                   Not affected\n",
+                        "  Meltdown:              Not affected\n",
+                        "  Mmio stale data:       Not affected\n",
+                        "  Retbleed:              Mitigation; Enhanced IBRS\n",
+                        "  Spec rstack overflow:  Not affected\n",
+                        "  Spec store bypass:     Mitigation; Speculative Store Bypass disabled via prctl\n",
+                        "                          and seccomp\n",
+                        "  Spectre v1:            Mitigation; usercopy/swapgs barriers and __user pointer\n",
+                        "                          sanitization\n",
+                        "  Spectre v2:            Mitigation; Enhanced IBRS, IBPB conditional, RSB fillin\n",
+                        "                         g, PBRSB-eIBRS SW sequence\n",
+                        "  Srbds:                 Not affected\n",
+                        "  Tsx async abort:       Not affected\n",
+                        "MemTotal:       49330024 kB\n",
+                        "Current Directory:\n",
+                        "/home/inflaton/code/logical-reasoning\n",
+                        "Evaluating Qwen/Qwen2.5-7B-Instruct with few-shot learning\n",
+                        "loading env vars from: /home/inflaton/code/logical-reasoning/.env\n",
+                        "Adding /home/inflaton/code/logical-reasoning to sys.path\n",
+                        "loading /home/inflaton/code/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n",
+                        "CUDA is available, we have found  1  GPU(s)\n",
+                        "NVIDIA GeForce RTX 4090\n",
+                        "CUDA version: 12.1\n",
+                        "Qwen/Qwen2.5-7B-Instruct None False datasets/mgtv data/Qwen2.5-7B-Instruct_results.csv 2048 1\n",
+                        "(0) GPU = NVIDIA GeForce RTX 4090. Max memory = 23.988 GB.\n",
+                        "0.0 GB of memory reserved.\n",
+                        "loading model: Qwen/Qwen2.5-7B-Instruct with adapter: None\n",
+                        "config.json: 100%|█████████████████████████████| 663/663 [00:00<00:00, 10.9MB/s]\n",
+                        "[INFO|configuration_utils.py:733] 2024-09-21 23:55:48,860 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/config.json\n",
+                        "[INFO|configuration_utils.py:800] 2024-09-21 23:55:48,861 >> Model config Qwen2Config {\n",
+                        "  \"_name_or_path\": \"Qwen/Qwen2.5-7B-Instruct\",\n",
+                        "  \"architectures\": [\n",
+                        "    \"Qwen2ForCausalLM\"\n",
+                        "  ],\n",
+                        "  \"attention_dropout\": 0.0,\n",
+                        "  \"bos_token_id\": 151643,\n",
+                        "  \"eos_token_id\": 151645,\n",
+                        "  \"hidden_act\": \"silu\",\n",
+                        "  \"hidden_size\": 3584,\n",
+                        "  \"initializer_range\": 0.02,\n",
+                        "  \"intermediate_size\": 18944,\n",
+                        "  \"max_position_embeddings\": 32768,\n",
+                        "  \"max_window_layers\": 28,\n",
+                        "  \"model_type\": \"qwen2\",\n",
+                        "  \"num_attention_heads\": 28,\n",
+                        "  \"num_hidden_layers\": 28,\n",
+                        "  \"num_key_value_heads\": 4,\n",
+                        "  \"rms_norm_eps\": 1e-06,\n",
+                        "  \"rope_theta\": 1000000.0,\n",
+                        "  \"sliding_window\": null,\n",
+                        "  \"tie_word_embeddings\": false,\n",
+                        "  \"torch_dtype\": \"bfloat16\",\n",
+                        "  \"transformers_version\": \"4.43.3\",\n",
+                        "  \"use_cache\": true,\n",
+                        "  \"use_sliding_window\": false,\n",
+                        "  \"vocab_size\": 152064\n",
+                        "}\n",
+                        "\n",
+                        "tokenizer_config.json: 100%|███████████████| 7.30k/7.30k [00:00<00:00, 53.0MB/s]\n",
+                        "vocab.json: 100%|██████████████████████████| 2.78M/2.78M [00:01<00:00, 1.76MB/s]\n",
+                        "merges.txt: 100%|██████████████████████████| 1.67M/1.67M [00:00<00:00, 23.9MB/s]\n",
+                        "tokenizer.json: 100%|██████████████████████| 7.03M/7.03M [00:00<00:00, 10.8MB/s]\n",
+                        "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,949 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/vocab.json\n",
+                        "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/merges.txt\n",
+                        "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/tokenizer.json\n",
+                        "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file added_tokens.json from cache at None\n",
+                        "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file special_tokens_map.json from cache at None\n",
+                        "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/tokenizer_config.json\n",
+                        "[INFO|tokenization_utils_base.py:2533] 2024-09-21 23:55:54,041 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+                        "[INFO|configuration_utils.py:733] 2024-09-21 23:55:55,157 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/config.json\n",
+                        "[INFO|configuration_utils.py:800] 2024-09-21 23:55:55,157 >> Model config Qwen2Config {\n",
+                        "  \"_name_or_path\": \"Qwen/Qwen2.5-7B-Instruct\",\n",
+                        "  \"architectures\": [\n",
+                        "    \"Qwen2ForCausalLM\"\n",
+                        "  ],\n",
+                        "  \"attention_dropout\": 0.0,\n",
+                        "  \"bos_token_id\": 151643,\n",
+                        "  \"eos_token_id\": 151645,\n",
+                        "  \"hidden_act\": \"silu\",\n",
+                        "  \"hidden_size\": 3584,\n",
+                        "  \"initializer_range\": 0.02,\n",
+                        "  \"intermediate_size\": 18944,\n",
+                        "  \"max_position_embeddings\": 32768,\n",
+                        "  \"max_window_layers\": 28,\n",
+                        "  \"model_type\": \"qwen2\",\n",
+                        "  \"num_attention_heads\": 28,\n",
+                        "  \"num_hidden_layers\": 28,\n",
+                        "  \"num_key_value_heads\": 4,\n",
+                        "  \"rms_norm_eps\": 1e-06,\n",
+                        "  \"rope_theta\": 1000000.0,\n",
+                        "  \"sliding_window\": null,\n",
+                        "  \"tie_word_embeddings\": false,\n",
+                        "  \"torch_dtype\": \"bfloat16\",\n",
+                        "  \"transformers_version\": \"4.43.3\",\n",
+                        "  \"use_cache\": true,\n",
+                        "  \"use_sliding_window\": false,\n",
+                        "  \"vocab_size\": 152064\n",
+                        "}\n",
+                        "\n",
+                        "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,421 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/vocab.json\n",
+                        "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,421 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/merges.txt\n",
+                        "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,421 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/tokenizer.json\n",
+                        "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,422 >> loading file added_tokens.json from cache at None\n",
+                        "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,422 >> loading file special_tokens_map.json from cache at None\n",
+                        "[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,422 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/tokenizer_config.json\n",
+                        "[INFO|tokenization_utils_base.py:2533] 2024-09-21 23:55:55,509 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+                        "09/21/2024 23:55:55 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+                        "[INFO|configuration_utils.py:733] 2024-09-21 23:55:55,814 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/config.json\n",
+                        "[INFO|configuration_utils.py:800] 2024-09-21 23:55:55,814 >> Model config Qwen2Config {\n",
+                        "  \"_name_or_path\": \"Qwen/Qwen2.5-7B-Instruct\",\n",
+                        "  \"architectures\": [\n",
+                        "    \"Qwen2ForCausalLM\"\n",
+                        "  ],\n",
+                        "  \"attention_dropout\": 0.0,\n",
+                        "  \"bos_token_id\": 151643,\n",
+                        "  \"eos_token_id\": 151645,\n",
+                        "  \"hidden_act\": \"silu\",\n",
+                        "  \"hidden_size\": 3584,\n",
+                        "  \"initializer_range\": 0.02,\n",
+                        "  \"intermediate_size\": 18944,\n",
+                        "  \"max_position_embeddings\": 32768,\n",
+                        "  \"max_window_layers\": 28,\n",
+                        "  \"model_type\": \"qwen2\",\n",
+                        "  \"num_attention_heads\": 28,\n",
+                        "  \"num_hidden_layers\": 28,\n",
+                        "  \"num_key_value_heads\": 4,\n",
+                        "  \"rms_norm_eps\": 1e-06,\n",
+                        "  \"rope_theta\": 1000000.0,\n",
+                        "  \"sliding_window\": null,\n",
+                        "  \"tie_word_embeddings\": false,\n",
+                        "  \"torch_dtype\": \"bfloat16\",\n",
+                        "  \"transformers_version\": \"4.43.3\",\n",
+                        "  \"use_cache\": true,\n",
+                        "  \"use_sliding_window\": false,\n",
+                        "  \"vocab_size\": 152064\n",
+                        "}\n",
+                        "\n",
+                        "09/21/2024 23:55:55 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+                        "model.safetensors.index.json: 100%|████████| 27.8k/27.8k [00:00<00:00, 24.5MB/s]\n",
+                        "[INFO|modeling_utils.py:3634] 2024-09-21 23:55:56,890 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/model.safetensors.index.json\n",
+                        "Downloading shards:   0%|                                 | 0/4 [00:00<?, ?it/s]\n",
+                        "model-00001-of-00004.safetensors:   0%|             | 0.00/3.95G [00:00<?, ?B/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   0%|    | 10.5M/3.95G [00:02<13:57, 4.70MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   1%|    | 21.0M/3.95G [00:03<09:51, 6.63MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   1%|    | 31.5M/3.95G [00:04<07:42, 8.47MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   1%|    | 41.9M/3.95G [00:05<06:45, 9.63MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   1%|    | 52.4M/3.95G [00:06<06:45, 9.59MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   2%|    | 62.9M/3.95G [00:07<06:38, 9.73MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   2%|    | 73.4M/3.95G [00:08<06:20, 10.2MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   2%|    | 83.9M/3.95G [00:08<05:58, 10.8MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   2%|    | 94.4M/3.95G [00:09<05:52, 10.9MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   3%|▏    | 105M/3.95G [00:10<05:43, 11.2MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   3%|▏    | 115M/3.95G [00:11<05:32, 11.5MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   3%|▏    | 126M/3.95G [00:12<05:49, 10.9MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   3%|▏    | 136M/3.95G [00:13<06:04, 10.4MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   4%|▏    | 147M/3.95G [00:14<06:14, 10.2MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   4%|▏    | 157M/3.95G [00:16<06:31, 9.67MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   4%|▏    | 168M/3.95G [00:17<06:42, 9.38MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   5%|▏    | 178M/3.95G [00:18<06:45, 9.29MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   5%|▏    | 189M/3.95G [00:19<06:18, 9.92MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   5%|▎    | 199M/3.95G [00:20<05:50, 10.7MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   5%|▎    | 210M/3.95G [00:20<05:32, 11.2MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   6%|▎    | 220M/3.95G [00:21<05:19, 11.7MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   6%|▎    | 231M/3.95G [00:22<05:19, 11.6MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   6%|▎    | 241M/3.95G [00:23<05:12, 11.8MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   6%|▎    | 252M/3.95G [00:24<05:07, 12.0MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   7%|▎    | 262M/3.95G [00:25<05:24, 11.3MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   7%|▎    | 273M/3.95G [00:26<05:18, 11.5MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   7%|▎    | 283M/3.95G [00:27<05:42, 10.7MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   7%|▎    | 294M/3.95G [00:28<05:39, 10.8MB/s]\u001b[A\n",
+                        "model-00001-of-00004.safetensors:   8%|▍    | 304M/3.95G [00:29<05:33, 10.9MB/s]\u001b[A^C\n",
+                        "Downloading shards:   0%|                                 | 0/4 [02:41<?, ?it/s]\n",
+                        "Traceback (most recent call last):\n",
+                        "  File \"/home/inflaton/code/logical-reasoning/llm_toolkit/eval_shots.py\", line 64, in <module>\n",
+                        "    model, tokenizer = load_model(\n",
+                        "                       ^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/code/logical-reasoning/llm_toolkit/llm_utils.py\", line 52, in load_model\n",
+                        "    chat_model = ChatModel(args)\n",
+                        "                 ^^^^^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/code/LLaMA-Factory/src/llamafactory/chat/chat_model.py\", line 52, in __init__\n",
+                        "    self.engine: \"BaseEngine\" = HuggingfaceEngine(model_args, data_args, finetuning_args, generating_args)\n",
+                        "                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/code/LLaMA-Factory/src/llamafactory/chat/hf_engine.py\", line 59, in __init__\n",
+                        "    self.model = load_model(\n",
+                        "                 ^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/code/LLaMA-Factory/src/llamafactory/model/loader.py\", line 162, in load_model\n",
+                        "    model = load_class.from_pretrained(**init_kwargs)\n",
+                        "            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py\", line 564, in from_pretrained\n",
+                        "    return model_class.from_pretrained(\n",
+                        "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/modeling_utils.py\", line 3671, in from_pretrained\n",
+                        "    resolved_archive_file, sharded_metadata = get_checkpoint_shard_files(\n",
+                        "                                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/utils/hub.py\", line 1079, in get_checkpoint_shard_files\n",
+                        "    cached_filename = cached_file(\n",
+                        "                      ^^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/utils/hub.py\", line 402, in cached_file\n",
+                        "    resolved_file = hf_hub_download(\n",
+                        "                    ^^^^^^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py\", line 114, in _inner_fn\n",
+                        "    return fn(*args, **kwargs)\n",
+                        "           ^^^^^^^^^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/file_download.py\", line 1221, in hf_hub_download\n",
+                        "    return _hf_hub_download_to_cache_dir(\n",
+                        "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/file_download.py\", line 1367, in _hf_hub_download_to_cache_dir\n",
+                        "    _download_to_tmp_and_move(\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/file_download.py\", line 1884, in _download_to_tmp_and_move\n",
+                        "    http_get(\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/file_download.py\", line 539, in http_get\n",
+                        "    for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/requests/models.py\", line 820, in generate\n",
+                        "    yield from self.raw.stream(chunk_size, decode_content=True)\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/urllib3/response.py\", line 1060, in stream\n",
+                        "    data = self.read(amt=amt, decode_content=decode_content)\n",
+                        "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/urllib3/response.py\", line 949, in read\n",
+                        "    data = self._raw_read(amt)\n",
+                        "           ^^^^^^^^^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/urllib3/response.py\", line 873, in _raw_read\n",
+                        "    data = self._fp_read(amt, read1=read1) if not fp_closed else b\"\"\n",
+                        "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/urllib3/response.py\", line 856, in _fp_read\n",
+                        "    return self._fp.read(amt) if amt is not None else self._fp.read()\n",
+                        "           ^^^^^^^^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/http/client.py\", line 473, in read\n",
+                        "    s = self.fp.read(amt)\n",
+                        "        ^^^^^^^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/socket.py\", line 706, in readinto\n",
+                        "    return self._sock.recv_into(b)\n",
+                        "           ^^^^^^^^^^^^^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/ssl.py\", line 1314, in recv_into\n",
+                        "    return self.read(nbytes, buffer)\n",
+                        "           ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+                        "  File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/ssl.py\", line 1166, in read\n",
+                        "    return self._sslobj.read(len, buffer)\n",
+                        "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+                        "KeyboardInterrupt\n",
+                        "model-00001-of-00004.safetensors:   8%|▍    | 304M/3.95G [00:29<05:57, 10.2MB/s]\n",
+                        "CPU times: user 901 ms, sys: 326 ms, total: 1.23 s\n",
+                        "Wall time: 2min 55s\n"
+                    ]
+                }
+            ],
             "source": [
                 "%%time\n",
                 "\n",
+                "!./scripts/eval-mgtv-qwen2.5_3b.sh"
             ]
         }
     ],

scripts/eval-mgtv-qwen2.5_3b.sh ADDED Viewed

	@@ -0,0 +1,27 @@

+#!/bin/sh
+BASEDIR=$(dirname "$0")
+cd $BASEDIR/..
+echo Current Directory:
+pwd
+BASEDIR=`pwd`
+nvidia-smi
+uname -a
+cat /etc/os-release
+lscpu
+grep MemTotal /proc/meminfo
+# $BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-3B-Instruct
+$BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-1.5B-Instruct
+$BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-0.5B-Instruct
+$BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-3B-Instruct
+$BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-1.5B-Instruct
+$BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-0.5B-Instruct