rewicks commited on
Commit
48c1bb5
·
verified ·
1 Parent(s): ba7f15d

Upload marian-config.yaml

Browse files
Files changed (1) hide show
  1. marian-config.yaml +121 -0
marian-config.yaml ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file defines the SGD-related parameters for Marian trainings.
2
+ # This is the teacher configuration.
3
+
4
+ seed: 141414
5
+
6
+ # cost
7
+ cost-type: ce-sum
8
+ label-smoothing: 0.1
9
+
10
+ # optimizer config
11
+ optimizer: adam
12
+ learn-rate: 0.0005
13
+ lr-warmup: 4000
14
+ lr-decay-inv-sqrt: 4000
15
+ mini-batch-warmup: 4000
16
+ mini-batch-round-up: true
17
+ optimizer-params:
18
+ - 0.9
19
+ - 0.999
20
+ - 1e-08
21
+ - 0.01
22
+ clip-norm: 0
23
+ dynamic-gradient-scaling:
24
+ - 2
25
+ - log
26
+ exponential-smoothing: 1e-3
27
+
28
+ # alignment
29
+ guided-alignment-weight: 0
30
+
31
+ # batch-size related parameters
32
+ mini-batch-fit: true
33
+ mini-batch-fit-step: 5
34
+ maxi-batch: 1000
35
+ mini-batch: 1000
36
+ mini-batch-words: 500000
37
+ max-length: 256
38
+
39
+ # validation-related parameters
40
+ # Note: Valid-metrics is specified in code (cf. k_validMetricNames), since it has a relation with the model pathname.
41
+ # Note: Decoding parameters below are only for the validation decoding, decoding parameters in deployment are separate.
42
+ early-stopping: 40
43
+ valid-mini-batch: 32
44
+ beam-size: 4
45
+ normalize: 1.0
46
+ word-penalty: 0.0
47
+ valid-max-length: 1000
48
+ n-best: false
49
+
50
+ # general parameters
51
+ logical-epoch: 1Gt
52
+ after: 40e
53
+ valid-freq: 1Gt
54
+ save-freq: 1Gt
55
+ disp-freq: 100Mt
56
+ disp-label-counts: true
57
+ lr-report: true
58
+ sync-sgd: true
59
+ shuffle: batches
60
+ shuffle-in-ram: true
61
+ disp-first: 10
62
+
63
+ # multi-node sharding mode, irrelevant for single-node
64
+ sharding: local
65
+ sync-freq: 200u
66
+
67
+ fp16: false
68
+ # https://machinetranslation.visualstudio.com/Marian/_git/autogen?path=/configs/trainingConfigTeacherPoloniumV2Top15.yml&version=GBmain&_a=contents
69
+ # for fp16 stability
70
+ cost-scaling:
71
+ - 256.f
72
+ - 10000
73
+ - 1.f
74
+ - 256.f
75
+
76
+ # model structure
77
+ type: transformer
78
+
79
+ # Flo generates separate vocabs, so don't tie between source and target
80
+ tied-embeddings: true
81
+ tied-embeddings-all: true
82
+ tied-embeddings-src: false
83
+
84
+ # dimensions
85
+ dim-emb: 1024
86
+ enc-depth: 6
87
+ dec-depth: 6
88
+ transformer-dim-ffn: 8192
89
+ transformer-decoder-dim-ffn: 8192
90
+ transformer-depth-scaling: true
91
+ lemma-dim-emb: 0
92
+ max-length: 256
93
+
94
+ # architecture details
95
+ transformer-decoder-autoreg: self-attention
96
+ transformer-tied-layers: []
97
+
98
+ # further transformer details
99
+ transformer-ffn-activation: relu
100
+
101
+ transformer-heads: 8
102
+ transformer-postprocess-emb: d
103
+ transformer-postprocess: dan
104
+
105
+ transformer-dropout: 0.1
106
+ transformer-dropout-attention: 0
107
+ transformer-dropout-ffn: 0.1
108
+
109
+ # data munging
110
+ all-caps-every: 0
111
+ english-title-case-every: 0
112
+
113
+ log-time-zone: PST8PDT
114
+
115
+ quiet-translation: true
116
+ keep-best: true
117
+ overwrite: false
118
+ interpolate-env-vars: true
119
+ log: train.log
120
+ valid-log: valid.log
121
+ valid-translation-output: valid.trg.output