{ | |
"batch_size": 10, | |
"block_size": 512, | |
"max_iters": 5000, | |
"eval_interval": 50, | |
"learning_rate": 3e-5, | |
"eval_iters": 100, | |
"d_model": 384, | |
"n_head": 12, | |
"n_layer": 12, | |
"dropout": 0.2, | |
"norm_eps": 1e-5 | |
} |
{ | |
"batch_size": 10, | |
"block_size": 512, | |
"max_iters": 5000, | |
"eval_interval": 50, | |
"learning_rate": 3e-5, | |
"eval_iters": 100, | |
"d_model": 384, | |
"n_head": 12, | |
"n_layer": 12, | |
"dropout": 0.2, | |
"norm_eps": 1e-5 | |
} |