minGRU-LM / configuration_minGRULM.py
suayptalha's picture
Create configuration_minGRULM.py
2a6d57e verified
raw
history blame
1.23 kB
from transformers import PretrainedConfig
class MinGRULMConfig(PretrainedConfig):
model_type = "mingru_lm" # Model type for registration in the Transformers library
def __init__(
self,
vocab_size=50257,
dim=512,
depth=12,
ff_mult=4,
min_gru_expansion=1.5,
enable_conv=False,
initializer_range=0.02,
pad_vocab_size_multiple=8,
**kwargs,
):
self.vocab_size = vocab_size
self.dim = dim # Dimension of embeddings
self.depth = depth # Number of layers
self.ff_mult = ff_mult # Feed-forward multiplier
self.min_gru_expansion = min_gru_expansion # Expansion factor for minGRU
self.enable_conv = enable_conv # Whether convolution is enabled
self.initializer_range = initializer_range
self.pad_vocab_size_multiple = pad_vocab_size_multiple
# Adjust vocab size to be a multiple of `pad_vocab_size_multiple`
if self.vocab_size % self.pad_vocab_size_multiple != 0:
self.vocab_size += (
self.pad_vocab_size_multiple
- self.vocab_size % self.pad_vocab_size_multiple
)
super().__init__(**kwargs)