|
|
|
save_data: run |
|
|
|
src_vocab: run/vocab/gl-es/bpe.vocab.src |
|
tgt_vocab: run/vocab/gl-es/bpe.vocab.tgt |
|
overwrite: True |
|
|
|
|
|
data: |
|
europarl: |
|
path_tgt: corpora/europarl/partitions/es_train.txt |
|
path_src: corpora/europarl_translit/partitions/gl_train.txt |
|
transforms: [bpe, filtertoolong] |
|
weight: 120 |
|
opensub: |
|
path_tgt: corpora/opensub/partitions/es_train.txt |
|
path_src: corpora/opensub_translit/partitions/gl_train.txt |
|
transforms: [bpe, filtertoolong] |
|
weight: 180 |
|
dgt: |
|
path_tgt: corpora/dgt/partitions/es_train.txt |
|
path_src: corpora/dgt_translit/partitions/gl_train.txt |
|
transforms: [bpe, filtertoolong] |
|
weight: 18 |
|
cluvi: |
|
path_tgt: corpora/cluvi/partitions/es_train.txt |
|
path_src: corpora/cluvi/partitions/gl_train.txt |
|
transforms: [bpe, filtertoolong] |
|
weight: 40 |
|
opensub-es-gl: |
|
path_tgt: corpora/opensub-es-gl/partitions/es_train.txt |
|
path_src: corpora/opensub-es-gl/partitions/gl_train.txt |
|
transforms: [bpe, filtertoolong] |
|
weight: 25 |
|
ted2020: |
|
path_tgt: corpora/ted2020/partitions/es_train.txt |
|
path_src: corpora/ted2020/partitions/gl_train.txt |
|
transforms: [bpe, filtertoolong] |
|
weight: 10 |
|
corgaback: |
|
path_tgt: corpora/corgaback/partitions/es_train.txt |
|
path_src: corpora/corgaback/partitions/gl_train.txt |
|
transforms: [bpe, filtertoolong] |
|
weight: 13 |
|
ccmatrix: |
|
path_tgt: corpora/ccmatrix/es.txt |
|
path_src: corpora/ccmatrix/gl.txt |
|
transforms: [bpe, filtertoolong] |
|
weight: 180 |
|
resto: |
|
path_tgt: corpora/resto/es.txt |
|
path_src: corpora/resto/gl.txt |
|
transforms: [bpe, filtertoolong] |
|
weight: 120 |
|
opensub_2018: |
|
path_tgt: corpora/opensub_2018/es.txt |
|
path_src: corpora/opensub_2018/gl.txt |
|
transforms: [bpe, filtertoolong] |
|
weight: 25 |
|
|
|
|
|
valid: |
|
path_tgt: corpora/partitions/all-es_valid.txt |
|
path_src: corpora/partitions_translit/all-gl_valid.txt |
|
transforms: [bpe, filtertoolong] |
|
|
|
|
|
|
|
tgt_subword_model: ./bpe/es.code |
|
src_subword_model: ./bpe/gl.code |
|
tgt_subword_vocab: ./run/vocab/gl-es/bpe.vocab.src |
|
src_subword_vocab: ./run/vocab/gl-es/bpe.vocab.tgt |
|
src_subword_type: bpe |
|
tgt_subord_type: bpe |
|
|
|
src_subword_nbest: 1 |
|
src_subword_alpha: 0.0 |
|
tgt_subword_nbest: 1 |
|
tgt_subword_alpha: 0.0 |
|
|
|
|
|
tgt_embeddings: ../embeddings/es.emb.txt |
|
src_embeddings: ../embeddings/gl.emb.txt |
|
|
|
|
|
embeddings_type: "word2vec" |
|
|
|
|
|
word_vec_size: 512 |
|
|
|
|
|
|
|
src_seq_length: 150 |
|
tgt_seq_length: 150 |
|
|
|
|
|
skip_empty_level: silent |
|
|
|
|
|
|
|
|
|
save_model: run/model |
|
keep_checkpoint: 50 |
|
save_checkpoint_steps: 10000 |
|
average_decay: 0.0005 |
|
seed: 1234 |
|
report_every: 1000 |
|
train_steps: 200000 |
|
valid_steps: 10000 |
|
|
|
|
|
queue_size: 10000 |
|
bucket_size: 32768 |
|
world_size: 1 |
|
gpu_ranks: [0] |
|
batch_type: "tokens" |
|
|
|
batch_size: 8192 |
|
valid_batch_size: 64 |
|
batch_size_multiple: 1 |
|
max_generator_batches: 2 |
|
accum_count: [4] |
|
accum_steps: [0] |
|
|
|
|
|
model_dtype: "fp16" |
|
optim: "adam" |
|
learning_rate: 2 |
|
|
|
warmup_steps: 8000 |
|
decay_method: "noam" |
|
adam_beta2: 0.998 |
|
max_grad_norm: 0 |
|
label_smoothing: 0.1 |
|
param_init: 0 |
|
param_init_glorot: true |
|
normalization: "tokens" |
|
|
|
|
|
encoder_type: transformer |
|
decoder_type: transformer |
|
position_encoding: true |
|
enc_layers: 6 |
|
dec_layers: 6 |
|
heads: 8 |
|
rnn_size: 512 |
|
transformer_ff: 2048 |
|
dropout_steps: [0] |
|
dropout: [0.1] |
|
attention_dropout: [0.1] |
|
share_decoder_embeddings: true |
|
share_embeddings: false |
|
|