from transformers.models.t5.configuration_t5 import T5Config | |
class DecoderOnlyT5Config(T5Config): | |
is_decoder_only = True | |
# whether to call attention and mlp in parallel. | |
# https://github.com/google/flaxformer/blob/ea17eb012a1d340ddff017b7a534c2162aaec34c/flaxformer/architectures/t5/t5_architecture.py#L384 | |
parallel_layers = True | |
has_relative_attention_bias = False | |
# https://arxiv.org/abs/1911.02150 | |
multi_query_attention = True |