File size: 3,237 Bytes
b2d9eff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
accum_count: 100
accum_steps: 0
adam_beta2: 0.998
attention_dropout: 0.1
batch_size: 1500
batch_type: tokens
bucket_size: 49304
data:
27e6308e0faf12cd1b4bed73708fad6f:
path_src: dataset/vep-ru/et-synt/target.txt
path_tgt: dataset/vep-ru/et-synt/source.txt
src_prefix: et_s_Latn
tgt_prefix: ''
transforms: &id001
- sentencepiece
- filtertoolong
- prefix
weight: 13
2caaf1b1e5e15d4a06b5b77c438ba0bc:
path_src: dataset/vep-ru/fin-original/target.txt
path_tgt: dataset/vep-ru/fin-original/source.txt
src_prefix: fi_Latn
tgt_prefix: ''
transforms: *id001
weight: 13
3def7622ce4622202126c8828c6fae3f:
path_src: dataset/vep-ru/fin-synt/target.txt
path_tgt: dataset/vep-ru/fin-synt/source.txt
src_prefix: fi_s_Latn
tgt_prefix: ''
transforms: *id001
weight: 13
517216e92cd45eb876c79dc3bafaad18:
path_src: dataset/vep-ru/var/target.txt
path_tgt: dataset/vep-ru/var/source.txt
src_prefix: ''
tgt_prefix: ''
transforms: *id001
weight: 6
815e8e066d03025e82529ef5c5ad232c:
path_src: dataset/vep-ru/vep/target.txt
path_tgt: dataset/vep-ru/vep/source.txt
src_prefix: ''
tgt_prefix: ''
transforms: *id001
weight: 5
8e6d221db9fc70bb3fd1104c24c8f25c:
path_src: dataset/vep-ru/vep-dic/target.txt
path_tgt: dataset/vep-ru/vep-dic/source.txt
src_prefix: ''
tgt_prefix: ''
transforms: *id001
weight: 1
f080464d79eb04a44e9947db1b54c17b:
path_src: dataset/vep-ru/et-original/target.txt
path_tgt: dataset/vep-ru/et-original/source.txt
src_prefix: et_Latn
tgt_prefix: ''
transforms: *id001
weight: 13
valid:
path_src: run/ru_vep-1.0/src-val.txt
path_tgt: run/ru_vep-1.0/tgt-val.txt
transforms: *id001
dec_layers: 20
decay_method: rsqrt
decoder_type: transformer
dropout: 0.1
dropout_steps: 0
early_stopping: 0
enc_layers: 20
encoder_type: transformer
gpu_ranks:
- 0
- 1
heads: 8
hidden_size: 512
keep_checkpoint: 15
label_smoothing: 0.1
learning_rate: 0.0015
max_generator_batches: 2
max_grad_norm: 0
max_relative_positions: 20
model_dtype: fp16
normalization: tokens
num_workers: 6
optim: pagedadamw8bit
param_init: 0
param_init_glorot: true
pos_ffn_activation_fn: gated-gelu
position_encoding: false
queue_size: 10000
reset_optim: all
rnn_size: 512
save_checkpoint_steps: 100
save_data: run/ru_vep-1.0/opennmt
save_model: run/ru_vep-1.0/opennmt/openmt.model
share_decoder_embeddings: true
share_embeddings: true
share_vocab: true
skip_empty_level: silent
src_onmttok_kwargs:
lang: ru
mode: none
src_seq_length: 185
src_subword_alpha: 0.0
src_subword_model: run/ru_vep-1.0/sentencepiece.model
src_subword_nbest: 1
src_subword_type: sentencepiece
src_vocab: run/ru_vep-1.0/opennmt/openmt.vocab
src_vocab_size: 32000
tgt_onmttok_kwargs:
lang: vep
mode: none
tgt_seq_length: 185
tgt_subword_alpha: 0.0
tgt_subword_model: run/ru_vep-1.0/sentencepiece.model
tgt_subword_nbest: 1
tgt_subword_type: sentencepiece
tgt_vocab: run/ru_vep-1.0/opennmt/openmt.vocab
tgt_vocab_size: 32000
train_steps: 1500
transformer_ff: 6144
update_vocab: 'False'
valid_batch_size: 64
valid_metrics:
- BLEU
valid_steps: 100
warmup_steps: 200
word_vec_size: 512
world_size: 2
|