{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "d67610d1-a6d9-420f-90d7-248d46b31697", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "loading file tokenizer.model from cache at /root/.cache/huggingface/hub/models--baichuan-inc--Baichuan-13B-Base/snapshots/0ef0739c7bdd34df954003ef76d80f3dabca2ff9/tokenizer.model\n", "loading file added_tokens.json from cache at None\n", "loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--baichuan-inc--Baichuan-13B-Base/snapshots/0ef0739c7bdd34df954003ef76d80f3dabca2ff9/special_tokens_map.json\n", "loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--baichuan-inc--Baichuan-13B-Base/snapshots/0ef0739c7bdd34df954003ef76d80f3dabca2ff9/tokenizer_config.json\n", "loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--baichuan-inc--Baichuan-13B-Base/snapshots/0ef0739c7bdd34df954003ef76d80f3dabca2ff9/config.json\n", "loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--baichuan-inc--Baichuan-13B-Base/snapshots/0ef0739c7bdd34df954003ef76d80f3dabca2ff9/config.json\n", "Model config BaichuanConfig {\n", " \"_from_model_config\": true,\n", " \"_name_or_path\": \"baichuan-inc/Baichuan-13B-Base\",\n", " \"architectures\": [\n", " \"BaichuanForCausalLM\"\n", " ],\n", " \"auto_map\": {\n", " \"AutoConfig\": \"baichuan-inc/Baichuan-13B-Base--configuration_baichuan.BaichuanConfig\",\n", " \"AutoModelForCausalLM\": \"baichuan-inc/Baichuan-13B-Base--modeling_baichuan.BaichuanForCausalLM\"\n", " },\n", " \"bos_token_id\": 1,\n", " \"eos_token_id\": 2,\n", " \"gradient_checkpointing\": [\n", " false\n", " ],\n", " \"hidden_act\": \"silu\",\n", " \"hidden_size\": 5120,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 13696,\n", " \"model_max_length\": 4096,\n", " \"model_type\": \"baichuan\",\n", " \"num_attention_heads\": 40,\n", " \"num_hidden_layers\": 40,\n", " \"pad_token_id\": 0,\n", " \"rms_norm_eps\": 1e-06,\n", " \"tie_word_embeddings\": false,\n", " \"torch_dtype\": \"bfloat16\",\n", " \"transformers_version\": \"4.32.1\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 64000\n", "}\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[2023-08-31 19:08:00,343] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--baichuan-inc--Baichuan-13B-Base/snapshots/0ef0739c7bdd34df954003ef76d80f3dabca2ff9/pytorch_model.bin.index.json\n", "Generate config GenerationConfig {\n", " \"_from_model_config\": true,\n", " \"bos_token_id\": 1,\n", " \"eos_token_id\": 2,\n", " \"pad_token_id\": 0,\n", " \"transformers_version\": \"4.32.1\"\n", "}\n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "87e88732661e46cebbc8cc1ab0f93a77", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/3 [00:00