from transformers import PretrainedConfig class MinGRULMConfig(PretrainedConfig): model_type = "mingru_lm" # Model type for registration in the Transformers library def __init__( self, vocab_size=50257, dim=512, depth=12, ff_mult=4, min_gru_expansion=1.5, enable_conv=False, initializer_range=0.02, pad_vocab_size_multiple=8, **kwargs, ): self.vocab_size = vocab_size self.dim = dim # Dimension of embeddings self.depth = depth # Number of layers self.ff_mult = ff_mult # Feed-forward multiplier self.min_gru_expansion = min_gru_expansion # Expansion factor for minGRU self.enable_conv = enable_conv # Whether convolution is enabled self.initializer_range = initializer_range self.pad_vocab_size_multiple = pad_vocab_size_multiple # Adjust vocab size to be a multiple of `pad_vocab_size_multiple` if self.vocab_size % self.pad_vocab_size_multiple != 0: self.vocab_size += ( self.pad_vocab_size_multiple - self.vocab_size % self.pad_vocab_size_multiple ) super().__init__(**kwargs)