PiaoYang commited on
Commit
7d6cc44
1 Parent(s): e99d347

Upload config

Browse files
Files changed (2) hide show
  1. config.json +31 -0
  2. configuration_chatglm.py +103 -0
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./chatglm-6b",
3
+ "architectures": [
4
+ "ChatGLMForConditionalGeneration"
5
+ ],
6
+ "auto_map": {
7
+ "AutoConfig": "configuration_chatglm.ChatGLMConfig",
8
+ "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
9
+ "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration"
10
+ },
11
+ "bos_token_id": 130004,
12
+ "eos_token_id": 130005,
13
+ "gmask_token_id": 130001,
14
+ "hidden_size": 4096,
15
+ "inner_hidden_size": 16384,
16
+ "layernorm_epsilon": 1e-05,
17
+ "mask_token_id": 130000,
18
+ "max_sequence_length": 2048,
19
+ "model_type": "chatglm",
20
+ "num_attention_heads": 32,
21
+ "num_layers": 28,
22
+ "pad_token_id": 3,
23
+ "position_encoding_2d": true,
24
+ "pre_seq_len": null,
25
+ "prefix_projection": false,
26
+ "quantization_bit": 0,
27
+ "torch_dtype": "float16",
28
+ "transformers_version": "4.33.0",
29
+ "use_cache": true,
30
+ "vocab_size": 130528
31
+ }
configuration_chatglm.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ ChatGLM model configuration """
2
+
3
+ from transformers.configuration_utils import PretrainedConfig
4
+ from transformers.utils import logging
5
+
6
+ logger = logging.get_logger(__name__)
7
+
8
+
9
+ class ChatGLMConfig(PretrainedConfig):
10
+ r"""
11
+ This is the configuration class to store the configuration of a [`~ChatGLMModel`].
12
+ It is used to instantiate an ChatGLM model according to the specified arguments, defining the model
13
+ architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
14
+ the ChatGLM-6B [THUDM/ChatGLM-6B](https://huggingface.co/THUDM/chatglm-6b) architecture.
15
+
16
+ Configuration objects inherit from [`PretrainedConfig`] and can be used
17
+ to control the model outputs. Read the documentation from [`PretrainedConfig`]
18
+ for more information.
19
+
20
+
21
+ Args:
22
+ vocab_size (`int`, *optional*, defaults to 150528):
23
+ Vocabulary size of the ChatGLM-6B model. Defines the number of different tokens that can be represented by the
24
+ `inputs_ids` passed when calling [`~ChatGLMModel`] or
25
+ [`~TFChatGLMModel`].
26
+ hidden_size (`int`, *optional*, defaults to 4096):
27
+ Dimension of the encoder layers and the pooler layer.
28
+ num_hidden_layers (`int`, *optional*, defaults to 28):
29
+ Number of hidden layers in the Transformer encoder.
30
+ num_attention_heads (`int`, *optional*, defaults to 32):
31
+ Number of attention heads for each attention layer in the Transformer encoder.
32
+ inner_hidden_size (`int`, *optional*, defaults to 16384):
33
+ Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
34
+ max_sequence_length (`int`, *optional*, defaults to 512):
35
+ The maximum sequence length that this model might ever be used with.
36
+ Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
37
+ layernorm_epsilon (`float`, *optional*, defaults to 1e-5):
38
+ The epsilon used by the layer normalization layers.
39
+ use_cache (`bool`, *optional*, defaults to `True`):
40
+ Whether the model should return the last key/values attentions (not used by all models).
41
+ Example:
42
+
43
+ ```python
44
+ >>> from configuration_chatglm import ChatGLMConfig
45
+ >>> from modeling_chatglm import ChatGLMModel
46
+
47
+ >>> # Initializing a ChatGLM-6B THUDM/ChatGLM-6B style configuration
48
+ >>> configuration = ChatGLMConfig()
49
+
50
+ >>> # Initializing a model from the THUDM/ChatGLM-6B style configuration
51
+ >>> model = ChatGLMModel(configuration)
52
+
53
+ >>> # Accessing the model configuration
54
+ >>> configuration = model.config
55
+ ```
56
+ """
57
+ model_type = "chatglm"
58
+
59
+ def __init__(
60
+ self,
61
+ vocab_size=150528,
62
+ hidden_size=4096,
63
+ num_layers=28,
64
+ num_attention_heads=32,
65
+ layernorm_epsilon=1e-5,
66
+ use_cache=False,
67
+ bos_token_id=150004,
68
+ eos_token_id=150005,
69
+ mask_token_id=150000,
70
+ gmask_token_id=150001,
71
+ pad_token_id=0,
72
+ max_sequence_length=2048,
73
+ inner_hidden_size=16384,
74
+ position_encoding_2d=True,
75
+ quantization_bit=0,
76
+ pre_seq_len=None,
77
+ prefix_projection=False,
78
+ **kwargs
79
+ ):
80
+ self.num_layers = num_layers
81
+ self.vocab_size = vocab_size
82
+ self.hidden_size = hidden_size
83
+ self.num_attention_heads = num_attention_heads
84
+ self.max_sequence_length = max_sequence_length
85
+ self.layernorm_epsilon = layernorm_epsilon
86
+ self.inner_hidden_size = inner_hidden_size
87
+ self.use_cache = use_cache
88
+ self.bos_token_id = bos_token_id
89
+ self.eos_token_id = eos_token_id
90
+ self.pad_token_id = pad_token_id
91
+ self.mask_token_id = mask_token_id
92
+ self.gmask_token_id = gmask_token_id
93
+ self.position_encoding_2d = position_encoding_2d
94
+ self.quantization_bit = quantization_bit
95
+ self.pre_seq_len = pre_seq_len
96
+ self.prefix_projection = prefix_projection
97
+
98
+ super().__init__(
99
+ pad_token_id=pad_token_id,
100
+ bos_token_id=bos_token_id,
101
+ eos_token_id=eos_token_id,
102
+ **kwargs
103
+ )