{"vocab_size": 24, "query_size": 512, "key_size": 512, "value_size": 512, "num_hiddens": 512, "num_layers": 6, "dropout": 0.2, "lr": 0.0004, "training_steps": 300000, "batch_size": 4096, "label_smoothing": 0.1, "ffn_num_input": 512, "ffn_num_hiddens": 2048, "num_heads": 8, "norm_shape": [512], "device": "cpu"} |