""" ChatGLM model configuration """ from transformers.configuration_utils import PretrainedConfig from transformers.utils import logging logger = logging.get_logger(__name__) class ChatGLMConfig(PretrainedConfig): r""" This is the configuration class to store the configuration of a [`~ChatGLMModel`]. It is used to instantiate an ChatGLM model according to the specified arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of the ChatGLM-6B [THUDM/ChatGLM-6B](https://huggingface.co./THUDM/chatglm-6b) architecture. Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the documentation from [`PretrainedConfig`] for more information. Args: vocab_size (`int`, *optional*, defaults to 150528): Vocabulary size of the ChatGLM-6B model. Defines the number of different tokens that can be represented by the `inputs_ids` passed when calling [`~ChatGLMModel`] or [`~TFChatGLMModel`]. hidden_size (`int`, *optional*, defaults to 4096): Dimension of the encoder layers and the pooler layer. num_hidden_layers (`int`, *optional*, defaults to 28): Number of hidden layers in the Transformer encoder. num_attention_heads (`int`, *optional*, defaults to 32): Number of attention heads for each attention layer in the Transformer encoder. inner_hidden_size (`int`, *optional*, defaults to 16384): Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder. max_sequence_length (`int`, *optional*, defaults to 512): The maximum sequence length that this model might ever be used with. Typically set this to something large just in case (e.g., 512 or 1024 or 2048). layernorm_epsilon (`float`, *optional*, defaults to 1e-5): The epsilon used by the layer normalization layers. use_cache (`bool`, *optional*, defaults to `True`): Whether the model should return the last key/values attentions (not used by all models). Example: ```python >>> from configuration_chatglm import ChatGLMConfig >>> from modeling_chatglm import ChatGLMModel >>> # Initializing a ChatGLM-6B THUDM/ChatGLM-6B style configuration >>> configuration = ChatGLMConfig() >>> # Initializing a model from the THUDM/ChatGLM-6B style configuration >>> model = ChatGLMModel(configuration) >>> # Accessing the model configuration >>> configuration = model.config ``` """ model_type = "chatglm" def __init__( self, vocab_size=150528, hidden_size=4096, num_layers=28, num_attention_heads=32, layernorm_epsilon=1e-5, use_cache=False, bos_token_id=150004, eos_token_id=150005, mask_token_id=150000, gmask_token_id=150001, pad_token_id=0, max_sequence_length=2048, inner_hidden_size=16384, position_encoding_2d=True, quantization_bit=0, quantization_embeddings=False, pre_seq_len=None, prefix_projection=False, **kwargs ): self.num_layers = num_layers self.vocab_size = vocab_size self.hidden_size = hidden_size self.num_attention_heads = num_attention_heads self.max_sequence_length = max_sequence_length self.layernorm_epsilon = layernorm_epsilon self.inner_hidden_size = inner_hidden_size self.use_cache = use_cache self.bos_token_id = bos_token_id self.eos_token_id = eos_token_id self.pad_token_id = pad_token_id self.mask_token_id = mask_token_id self.gmask_token_id = gmask_token_id self.position_encoding_2d = position_encoding_2d self.quantization_bit = quantization_bit self.quantization_embeddings = quantization_embeddings self.pre_seq_len = pre_seq_len self.prefix_projection = prefix_projection super().__init__( pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs )