from transformers import PretrainedConfig | |
class MinGRULMConfig(PretrainedConfig): | |
model_type = "mingru_lm" # Model type for registration in the Transformers library | |
def __init__( | |
self, | |
vocab_size=50257, | |
dim=512, | |
depth=12, | |
ff_mult=4, | |
min_gru_expansion=1.5, | |
enable_conv=False, | |
initializer_range=0.02, | |
pad_vocab_size_multiple=8, | |
**kwargs, | |
): | |
self.vocab_size = vocab_size | |
self.dim = dim # Dimension of embeddings | |
self.depth = depth # Number of layers | |
self.ff_mult = ff_mult # Feed-forward multiplier | |
self.min_gru_expansion = min_gru_expansion # Expansion factor for minGRU | |
self.enable_conv = enable_conv # Whether convolution is enabled | |
self.initializer_range = initializer_range | |
self.pad_vocab_size_multiple = pad_vocab_size_multiple | |
# Adjust vocab size to be a multiple of `pad_vocab_size_multiple` | |
if self.vocab_size % self.pad_vocab_size_multiple != 0: | |
self.vocab_size += ( | |
self.pad_vocab_size_multiple | |
- self.vocab_size % self.pad_vocab_size_multiple | |
) | |
super().__init__(**kwargs) |