dhladek commited on
Commit
77a3ac8
·
1 Parent(s): 0dd4dff

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -21,5 +21,5 @@
21
  "pad_token_id": 0,
22
  "relative_attention_num_buckets": 32,
23
  "tie_word_embeddings": false,
24
- "vocab_size": 32100
25
  }
 
21
  "pad_token_id": 0,
22
  "relative_attention_num_buckets": 32,
23
  "tie_word_embeddings": false,
24
+ "vocab_size": 64100
25
  }
hydra/config.yaml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mode: pt
2
+ device: gpu
3
+ precision: bf16
4
+ eval_only: false
5
+ predict_only: false
6
+ seed: 2137
7
+ model:
8
+ klass: hf_t5
9
+ name: /home/jovyan/bert-train/nanot5/base_slovak_model/
10
+ overwrite:
11
+ dropout_rate: 0.0
12
+ add_config:
13
+ is_bf16: false
14
+ checkpoint_path: ''
15
+ random_init: true
16
+ compile: false
17
+ data:
18
+ input_length: 512
19
+ mlm_probability: 0.15
20
+ mean_noise_span_length: 3.0
21
+ num_workers: 2
22
+ optim:
23
+ name: adamwscale
24
+ base_lr: 0.02
25
+ batch_size: 128
26
+ total_steps: 65536
27
+ epochs: -1
28
+ warmup_steps: 10000
29
+ lr_scheduler: cosine
30
+ weight_decay: 0.0
31
+ grad_clip: 1.0
32
+ grad_acc: 4
33
+ final_cosine: 1.0e-05
34
+ eval:
35
+ every_steps: 5000
36
+ steps: 500
37
+ checkpoint:
38
+ every_steps: 20000
39
+ logging:
40
+ neptune: false
41
+ neptune_creds:
42
+ project: null
43
+ api_token: null
44
+ tags: ''
45
+ every_steps: 100
46
+ grad_l2: true
47
+ weights_l2: true
hydra/hydra.yaml ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ./logs/${now:%Y-%m-%d}/${now:%H-%M-%S}-${logging.neptune_creds.tags}
4
+ sweep:
5
+ dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task:
115
+ - optim.name=adamwscale
116
+ - optim.lr_scheduler=cosine
117
+ - model.name=/home/jovyan/bert-train/nanot5/base_slovak_model/
118
+ - optim.grad_acc=4
119
+ - model.klass=hf_t5
120
+ - eval.every_steps=5000
121
+ - checkpoint.every_steps=20000
122
+ job:
123
+ name: main
124
+ chdir: true
125
+ override_dirname: checkpoint.every_steps=20000,eval.every_steps=5000,model.klass=hf_t5,model.name=/home/jovyan/bert-train/nanot5/base_slovak_model/,optim.grad_acc=4,optim.lr_scheduler=cosine,optim.name=adamwscale
126
+ id: ???
127
+ num: ???
128
+ config_name: default
129
+ env_set: {}
130
+ env_copy: []
131
+ config:
132
+ override_dirname:
133
+ kv_sep: '='
134
+ item_sep: ','
135
+ exclude_keys: []
136
+ runtime:
137
+ version: 1.3.2
138
+ version_base: '1.1'
139
+ cwd: /home/jovyan/nanoT5
140
+ config_sources:
141
+ - path: hydra.conf
142
+ schema: pkg
143
+ provider: hydra
144
+ - path: /home/jovyan/nanoT5/nanoT5/configs
145
+ schema: file
146
+ provider: main
147
+ - path: ''
148
+ schema: structured
149
+ provider: schema
150
+ output_dir: /home/jovyan/nanoT5/logs/2024-01-02/07-29-30-
151
+ choices:
152
+ local_env: default
153
+ task: pt
154
+ hydra/env: default
155
+ hydra/callbacks: null
156
+ hydra/job_logging: default
157
+ hydra/hydra_logging: default
158
+ hydra/hydra_help: default
159
+ hydra/help: default
160
+ hydra/sweeper: basic
161
+ hydra/launcher: basic
162
+ hydra/output: default
163
+ verbose: false
hydra/overrides.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ - optim.name=adamwscale
2
+ - optim.lr_scheduler=cosine
3
+ - model.name=/home/jovyan/bert-train/nanot5/base_slovak_model/
4
+ - optim.grad_acc=4
5
+ - model.klass=hf_t5
6
+ - eval.every_steps=5000
7
+ - checkpoint.every_steps=20000
main.log CHANGED
The diff for this file is too large to render. See raw diff
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac4258d6a76c6e7a0687bc7af68ce34538f3b67e4387678e96eb6a2d29114a9e
3
- size 990173032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8adcaa3befd91b9080f22d00466debbce77b0989eb3b038269f83aad1ee5e934
3
+ size 1186781032
optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d9f3304271a05b8894b45869b7bae03ae655aa7615410d7ee722ab02064ceea
3
+ size 2373662661
random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a429eefea0f594d70075b0d57721b18d7e6e4d12263218ee30780a6e613c04cd
3
+ size 14663
scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b51bb232eab4bf0dd13cf507f602d257c3596e6828466eb03664de335c46223
3
+ size 819
spiece.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89ffef2172838c61ae45e7f6174233eec5c315c22646537d077c4e6b5dc76cd9
3
- size 812913
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:353edf8f042581ee554bb883dd4a19e0888d778553e3fd5ca9c97bb76434406b
3
+ size 1408434
spiece.vocab ADDED
The diff for this file is too large to render. See raw diff