Kyriection commited on
Commit
c811243
1 Parent(s): 9bd8059
Files changed (7) hide show
  1. config.json +28 -0
  2. merges.txt +619 -0
  3. model.safetensors +3 -0
  4. special_tokens_map.json +51 -0
  5. tokenizer.json +1588 -0
  6. tokenizer_config.json +58 -0
  7. vocab.json +1 -0
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "seyonec/ChemBERTa_zinc250k_v2_40k",
3
+ "architectures": [
4
+ "RobertaForReactionEmbedding"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_norm_eps": 1e-12,
17
+ "max_position_embeddings": 514,
18
+ "model_type": "roberta",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 6,
21
+ "pad_token_id": 1,
22
+ "position_embedding_type": "absolute",
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.35.0",
25
+ "type_vocab_size": 1,
26
+ "use_cache": true,
27
+ "vocab_size": 52000
28
+ }
merges.txt ADDED
@@ -0,0 +1,619 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #version: 0.2
2
+ c c
3
+ C C
4
+ ( =
5
+ cc c
6
+ ] (
7
+ @ @
8
+ C c
9
+ N C
10
+ cc ccc
11
+ n c
12
+ CC C
13
+ ) [
14
+ N H
15
+ + ]
16
+ C O
17
+ cc cc
18
+ N c
19
+ C l
20
+ O C
21
+ CC N
22
+ ) (
23
+ CO c
24
+ ( -
25
+ ( [
26
+ CC CC
27
+ C N
28
+ - ]
29
+ ) (=
30
+ CC O
31
+ n H
32
+ n n
33
+ -] )
34
+ + ](
35
+ CC c
36
+ ) =
37
+ s c
38
+ C S
39
+ n cc
40
+ B r
41
+ C NC
42
+ n nc
43
+ N Cc
44
+ o c
45
+ 1 2
46
+ +] (=
47
+ CC CCC
48
+ CO C
49
+ C n
50
+ 2 1
51
+ CCC N
52
+ c n
53
+ O c
54
+ CC OC
55
+ CCO CC
56
+ +] [
57
+ c nc
58
+ CC S
59
+ ]( [
60
+ CCO c
61
+ ccc s
62
+ N CC
63
+ ccc nc
64
+ O CC
65
+ CCC O
66
+ ( /
67
+ @ ]
68
+ ccc o
69
+ CS c
70
+ @@ ]
71
+ c nn
72
+ CC n
73
+ CC NC
74
+ 3 2
75
+ cccc n
76
+ 2 3
77
+ n o
78
+ +] )
79
+ ) /
80
+ n oc
81
+ c sc
82
+ c s
83
+ cc ncc
84
+ ccc n
85
+ CC Cc
86
+ S c
87
+ cc nc
88
+ S CC
89
+ O Cc
90
+ S C
91
+ cc n
92
+ cc sc
93
+ N NC
94
+ @ ](
95
+ O CO
96
+ N S
97
+ nc nc
98
+ N CCc
99
+ C Nc
100
+ @@ ](
101
+ = [
102
+ O CCO
103
+ n cccc
104
+ N N
105
+ c ncc
106
+ CCCC CC
107
+ N CCC
108
+ o n
109
+ +]( [
110
+ CC CCN
111
+ nc n
112
+ CCC NC
113
+ ncc s
114
+ +] =
115
+ CS C
116
+ -] )[
117
+ S Cc
118
+ CCC n
119
+ s ccc
120
+ cnc n
121
+ CCS c
122
+ 3 4
123
+ CO CC
124
+ nn nn
125
+ n ccc
126
+ ( \
127
+ n cccn
128
+ CO Cc
129
+ ncc n
130
+ )( [
131
+ CCS C
132
+ cc nn
133
+ cc oc
134
+ CN S
135
+ CCC Oc
136
+ CO CCN
137
+ 4 3
138
+ @@] (=
139
+ F c
140
+ CCS CC
141
+ -]) =
142
+ @] (=
143
+ CS CC
144
+ CCC S
145
+ cncc n
146
+ nn n
147
+ / [
148
+ c oc
149
+ nnc n
150
+ c nnc
151
+ N CCN
152
+ N Nc
153
+ nn nc
154
+ CCCC O
155
+ nc nn
156
+ +] )[
157
+ CC l
158
+ Cl c
159
+ O CCCO
160
+ CC Nc
161
+ CS Cc
162
+ cnn n
163
+ o ccc
164
+ CCCC NC
165
+ NCC NC
166
+ O CCC
167
+ CCN S
168
+ o nc
169
+ CCC OC
170
+ )= [
171
+ ncc nc
172
+ CO CCn
173
+ O CCN
174
+ cn ccc
175
+ F C
176
+ (\ [
177
+ CCCC CCC
178
+ N O
179
+ CO CCNC
180
+ n s
181
+ cs cc
182
+ 1 3
183
+ csc n
184
+ n sc
185
+ NCC n
186
+ N CCOc
187
+ CCCC n
188
+ CCCC c
189
+ N n
190
+ NCC Cc
191
+ no nc
192
+ cc on
193
+ NCCC n
194
+ +] )(
195
+ sc nc
196
+ N CCS
197
+ N CCCN
198
+ nc sc
199
+ CN Cc
200
+ CCC Nc
201
+ N CCCC
202
+ Br c
203
+ s cc
204
+ s ccn
205
+ S CCC
206
+ CO CCO
207
+ CO CCOc
208
+ (= [
209
+ nnc s
210
+ oc nc
211
+ CCO Cc
212
+ 3 1
213
+ ns nc
214
+ nc oc
215
+ O CCc
216
+ O S
217
+ S CCc
218
+ CCCCC N
219
+ OCC NC
220
+ CCC Sc
221
+ CS CN
222
+ CO CCCNC
223
+ CO CCC
224
+ cnc nc
225
+ CCC l
226
+ CCO CCN
227
+ -]) /
228
+ c o
229
+ CS CCS
230
+ nn sc
231
+ \ [
232
+ CCCC Oc
233
+ CS CCO
234
+ N CCO
235
+ C Br
236
+ CCCC S
237
+ ) -
238
+ CO CCCN
239
+ NCC Nc
240
+ +] \
241
+ ccc nn
242
+ ]( /
243
+ OCC n
244
+ CO N
245
+ 4 5
246
+ CCC SCC
247
+ cs nn
248
+ O CCOc
249
+ @ ]([
250
+ +] /
251
+ S CCN
252
+ )( /
253
+ N CCCO
254
+ @@ ]([
255
+ O CCCC
256
+ CCC SC
257
+ O N
258
+ ncc o
259
+ (/ [
260
+ CO CCc
261
+ O COC
262
+ s nc
263
+ ccnc n
264
+ CS CCC
265
+ cc no
266
+ nc on
267
+ CCS Cc
268
+ 5 4
269
+ +] )([
270
+ N CCSc
271
+ nnc o
272
+ CCCCC NC
273
+ 2 4
274
+ CO CCOC
275
+ CS CCN
276
+ CCN CC
277
+ nn cc
278
+ CCC OCC
279
+ N CCOC
280
+ N NS
281
+ CCCC OC
282
+ CO NC
283
+ N OCc
284
+ NCCC OC
285
+ CCN Cc
286
+ CN CC
287
+ S CCS
288
+ s nnc
289
+ o cc
290
+ ) \
291
+ CO CCCC
292
+ CCCC l
293
+ OCC Cc
294
+ N CCOCC
295
+ N CCCCn
296
+ CO CCCn
297
+ S CCOc
298
+ nc ncc
299
+ CN CCc
300
+ O CCCNC
301
+ CCO CCC
302
+ CCCN S
303
+ CCOCC NC
304
+ cnc s
305
+ NCCC NC
306
+ CCCC Sc
307
+ CCO CCO
308
+ 4 2
309
+ O CCCN
310
+ CCCCC n
311
+ OCC Nc
312
+ CO CO
313
+ -] )(
314
+ O CCOC
315
+ O CCSc
316
+ O CCS
317
+ NC N
318
+ OC N
319
+ NCCC Sc
320
+ NCCN S
321
+ N OC
322
+ CCO CCCNC
323
+ cnc o
324
+ COCC Nc
325
+ CCC F
326
+ CCO NC
327
+ N CCCCC
328
+ +] =[
329
+ -]) =[
330
+ cn oc
331
+ O CCCn
332
+ ] /
333
+ CN n
334
+ CCO CCCC
335
+ CCOCC n
336
+ CO CCS
337
+ Cl C
338
+ CCCC Cc
339
+ S CCO
340
+ CN CCN
341
+ NCCC Oc
342
+ N OCC
343
+ NCCC S
344
+ OC n
345
+ CCCC Nc
346
+ CN N
347
+ S CCNC
348
+ Cl Cc
349
+ -] /
350
+ CCS CCC
351
+ O CCCOc
352
+ S CCn
353
+ CS CCNC
354
+ )/ [
355
+ CO CCCOc
356
+ CCOCC S
357
+ cc nnc
358
+ CCO CCOc
359
+ CS CCCNC
360
+ NCC CCN
361
+ s n
362
+ CO CCOCC
363
+ -] [
364
+ CCCCC O
365
+ S CCCS
366
+ cc s
367
+ -] =[
368
+ CCCCC S
369
+ cnn s
370
+ COCCN S
371
+ CO CCCNc
372
+ CS CCCCNC
373
+ CC Br
374
+ CS CCc
375
+ NCCC Nc
376
+ NCCC OCC
377
+ S CCCC
378
+ o ncc
379
+ CS CCCN
380
+ CS CCOC
381
+ CCC SCc
382
+ CO CCSc
383
+ CO n
384
+ (- [
385
+ NCCS Cc
386
+ CO CCCOC
387
+ CO CCOCCNC
388
+ CCCC OCC
389
+ CCCC CCNC
390
+ CS CCn
391
+ CCCCN S
392
+ N CCSCC
393
+ ] [
394
+ CCCCC Oc
395
+ I c
396
+ N CCSC
397
+ O CCCCC
398
+ S CN
399
+ CO P
400
+ CCO P
401
+ CS CCOc
402
+ cn sn
403
+ OCCC l
404
+ OCCC Sc
405
+ ns cc
406
+ CO COc
407
+ Br Cc
408
+ NCCCC l
409
+ O P
410
+ S Cn
411
+ S CCCc
412
+ ]( /[
413
+ CO S
414
+ CCCC CCN
415
+ CCO CCCN
416
+ +]( -
417
+ +]( /
418
+ CCCO Cc
419
+ C I
420
+ N Oc
421
+ N CCCCCC
422
+ O CCNS
423
+ S N
424
+ Br C
425
+ cn sc
426
+ OCC F
427
+ 3 5
428
+ O CCSC
429
+ S CCOC
430
+ S CCCO
431
+ S CCCOc
432
+ CO CCCCC
433
+ Cl CC
434
+ -] )([
435
+ CCCCC OC
436
+ CCOCC Nc
437
+ OCCC Nc
438
+ NCCCC Oc
439
+ N CCOCc
440
+ O CCOCC
441
+ O CCCS
442
+ O CCSCc
443
+ S CCCN
444
+ Cl CCc
445
+ CCCC CCO
446
+ CCCC CCCCCCC
447
+ CN CCC
448
+ CCCN CC
449
+ CCOCC Sc
450
+ NN N
451
+ NCCC SC
452
+ COCCN Cc
453
+ 4 1
454
+ 5 6
455
+ N SC
456
+ N CCCCc
457
+ O NC
458
+ c onc
459
+ CCC Br
460
+ +] /[
461
+ CCCC CCn
462
+ CCO CCOC
463
+ CCO CCOCC
464
+ CCO CCCn
465
+ CCO CCCNc
466
+ Br CC
467
+ CCCCC l
468
+ CCCCC Sc
469
+ CCS S
470
+ CCS CCOC
471
+ OCC CCN
472
+ NCCCN S
473
+ NCCCO Cc
474
+ O H
475
+ S CCSc
476
+ NC n
477
+ CCCC CCS
478
+ CCCC OCc
479
+ CCCC SC
480
+ CS CCSC
481
+ CCS CCN
482
+ OCC Br
483
+ OCCO CCOCCO
484
+ NCCCC SC
485
+ COCCO CCN
486
+ NCCO CCO
487
+ ( #
488
+ 1 4
489
+ I C
490
+ O n
491
+ O CCSCC
492
+ O CCCCn
493
+ O CCOCCS
494
+ S S
495
+ S SC
496
+ S CCCn
497
+ NC NS
498
+ CO CCCS
499
+ CO CCCCCNC
500
+ CCCC SCC
501
+ CCCC CCSc
502
+ CCO N
503
+ nn ccc
504
+ -]) \
505
+ +]( \[
506
+ CS CCCC
507
+ CS CCCNc
508
+ 12 3
509
+ CCOCC Cc
510
+ CCOCCO CCOCC
511
+ - [
512
+ 1 32
513
+ 5 3
514
+ C H
515
+ N P
516
+ O O
517
+ O COc
518
+ P H
519
+ S CCNS
520
+ s ncc
521
+ CCC NCc
522
+ +] )=
523
+ CO CCCc
524
+ CO NS
525
+ CO CCCCNC
526
+ CO CCCCS
527
+ CCCC CCCC
528
+ CCCC CCc
529
+ CCO CCOCc
530
+ CS CCCn
531
+ Br CCC
532
+ Br CCc
533
+ CCCCC Br
534
+ CCS CCOc
535
+ CCS CCCO
536
+ CCS CCn
537
+ OCC l
538
+ CCCO CCO
539
+ +]) /
540
+ SCC Nc
541
+ OCCO CCOc
542
+ CCCCCC Nc
543
+ NCCC F
544
+ NCCC l
545
+ CCCCO CCN
546
+ NCCCC Cc
547
+ OCCCC Sc
548
+ COCCS CCC
549
+ # [
550
+ 3 12
551
+ C F
552
+ F O
553
+ F CCC
554
+ N NCc
555
+ N Sc
556
+ O Nc
557
+ O CCCCNC
558
+ O CCCNS
559
+ O CCCCCS
560
+ S H
561
+ S Sc
562
+ S CCCCO
563
+ S CCCCCO
564
+ S CCCCCS
565
+ S SN
566
+ s s
567
+ s sc
568
+ s snc
569
+ NC Sc
570
+ nc no
571
+ CCC OCCNC
572
+ +] #
573
+ +] \[
574
+ CO CCCCN
575
+ CO CCSCC
576
+ CO CCCCOc
577
+ CO CCSCc
578
+ Cl CCCSc
579
+ CCCC CCOc
580
+ CCCC SCc
581
+ CCCC OCCNC
582
+ CN NC
583
+ CN CCO
584
+ -] /[
585
+ CCO S
586
+ CCO CO
587
+ CCO CCc
588
+ CCO COCC
589
+ +]( /[
590
+ CS CCCc
591
+ CS CCNS
592
+ CCCCC Nc
593
+ CCCCC OCC
594
+ CCCCC NS
595
+ CCS CCc
596
+ CCS CCSc
597
+ NCC Br
598
+ CCCO S
599
+ CCCO CCC
600
+ no cc
601
+ @@]( /
602
+ OCCO Cc
603
+ OCCO CCN
604
+ OCCO CCO
605
+ CCCCCC l
606
+ NCCC SCC
607
+ CCCCO CCCNC
608
+ OCCC OC
609
+ OCCC Br
610
+ OCCC SC
611
+ NO CCc
612
+ SCCC Br
613
+ SCCC SCC
614
+ COCCO Cc
615
+ NCCO CCc
616
+ COCCCN S
617
+ COCCOCC Sc
618
+ SCCCC CCSc
619
+ OCCOCCOCCO CCO
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dacf3cc507e5f80d8a958ec7148de9e07b5424ad021f2adc6e2d914544efceb
3
+ size 338540976
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
@@ -0,0 +1,1588 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "<s>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": true,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "<pad>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": true,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "</s>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": true,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "<unk>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": true,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "<mask>",
45
+ "single_word": false,
46
+ "lstrip": true,
47
+ "rstrip": false,
48
+ "normalized": true,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": null,
53
+ "pre_tokenizer": {
54
+ "type": "ByteLevel",
55
+ "add_prefix_space": false,
56
+ "trim_offsets": true,
57
+ "use_regex": true
58
+ },
59
+ "post_processor": {
60
+ "type": "RobertaProcessing",
61
+ "sep": [
62
+ "</s>",
63
+ 2
64
+ ],
65
+ "cls": [
66
+ "<s>",
67
+ 0
68
+ ],
69
+ "trim_offsets": true,
70
+ "add_prefix_space": false
71
+ },
72
+ "decoder": {
73
+ "type": "ByteLevel",
74
+ "add_prefix_space": true,
75
+ "trim_offsets": true,
76
+ "use_regex": true
77
+ },
78
+ "model": {
79
+ "type": "BPE",
80
+ "dropout": null,
81
+ "unk_token": null,
82
+ "continuing_subword_prefix": "",
83
+ "end_of_word_suffix": "",
84
+ "fuse_unk": false,
85
+ "byte_fallback": false,
86
+ "vocab": {
87
+ "<s>": 0,
88
+ "<pad>": 1,
89
+ "</s>": 2,
90
+ "<unk>": 3,
91
+ "<mask>": 4,
92
+ "!": 5,
93
+ "\"": 6,
94
+ "#": 7,
95
+ "$": 8,
96
+ "%": 9,
97
+ "&": 10,
98
+ "'": 11,
99
+ "(": 12,
100
+ ")": 13,
101
+ "*": 14,
102
+ "+": 15,
103
+ ",": 16,
104
+ "-": 17,
105
+ ".": 18,
106
+ "/": 19,
107
+ "0": 20,
108
+ "1": 21,
109
+ "2": 22,
110
+ "3": 23,
111
+ "4": 24,
112
+ "5": 25,
113
+ "6": 26,
114
+ "7": 27,
115
+ "8": 28,
116
+ "9": 29,
117
+ ":": 30,
118
+ ";": 31,
119
+ "<": 32,
120
+ "=": 33,
121
+ ">": 34,
122
+ "?": 35,
123
+ "@": 36,
124
+ "A": 37,
125
+ "B": 38,
126
+ "C": 39,
127
+ "D": 40,
128
+ "E": 41,
129
+ "F": 42,
130
+ "G": 43,
131
+ "H": 44,
132
+ "I": 45,
133
+ "J": 46,
134
+ "K": 47,
135
+ "L": 48,
136
+ "M": 49,
137
+ "N": 50,
138
+ "O": 51,
139
+ "P": 52,
140
+ "Q": 53,
141
+ "R": 54,
142
+ "S": 55,
143
+ "T": 56,
144
+ "U": 57,
145
+ "V": 58,
146
+ "W": 59,
147
+ "X": 60,
148
+ "Y": 61,
149
+ "Z": 62,
150
+ "[": 63,
151
+ "\\": 64,
152
+ "]": 65,
153
+ "^": 66,
154
+ "_": 67,
155
+ "`": 68,
156
+ "a": 69,
157
+ "b": 70,
158
+ "c": 71,
159
+ "d": 72,
160
+ "e": 73,
161
+ "f": 74,
162
+ "g": 75,
163
+ "h": 76,
164
+ "i": 77,
165
+ "j": 78,
166
+ "k": 79,
167
+ "l": 80,
168
+ "m": 81,
169
+ "n": 82,
170
+ "o": 83,
171
+ "p": 84,
172
+ "q": 85,
173
+ "r": 86,
174
+ "s": 87,
175
+ "t": 88,
176
+ "u": 89,
177
+ "v": 90,
178
+ "w": 91,
179
+ "x": 92,
180
+ "y": 93,
181
+ "z": 94,
182
+ "{": 95,
183
+ "|": 96,
184
+ "}": 97,
185
+ "~": 98,
186
+ "¡": 99,
187
+ "¢": 100,
188
+ "£": 101,
189
+ "¤": 102,
190
+ "¥": 103,
191
+ "¦": 104,
192
+ "§": 105,
193
+ "¨": 106,
194
+ "©": 107,
195
+ "ª": 108,
196
+ "«": 109,
197
+ "¬": 110,
198
+ "®": 111,
199
+ "¯": 112,
200
+ "°": 113,
201
+ "±": 114,
202
+ "²": 115,
203
+ "³": 116,
204
+ "´": 117,
205
+ "µ": 118,
206
+ "¶": 119,
207
+ "·": 120,
208
+ "¸": 121,
209
+ "¹": 122,
210
+ "º": 123,
211
+ "»": 124,
212
+ "¼": 125,
213
+ "½": 126,
214
+ "¾": 127,
215
+ "¿": 128,
216
+ "À": 129,
217
+ "Á": 130,
218
+ "Â": 131,
219
+ "Ã": 132,
220
+ "Ä": 133,
221
+ "Å": 134,
222
+ "Æ": 135,
223
+ "Ç": 136,
224
+ "È": 137,
225
+ "É": 138,
226
+ "Ê": 139,
227
+ "Ë": 140,
228
+ "Ì": 141,
229
+ "Í": 142,
230
+ "Î": 143,
231
+ "Ï": 144,
232
+ "Ð": 145,
233
+ "Ñ": 146,
234
+ "Ò": 147,
235
+ "Ó": 148,
236
+ "Ô": 149,
237
+ "Õ": 150,
238
+ "Ö": 151,
239
+ "×": 152,
240
+ "Ø": 153,
241
+ "Ù": 154,
242
+ "Ú": 155,
243
+ "Û": 156,
244
+ "Ü": 157,
245
+ "Ý": 158,
246
+ "Þ": 159,
247
+ "ß": 160,
248
+ "à": 161,
249
+ "á": 162,
250
+ "â": 163,
251
+ "ã": 164,
252
+ "ä": 165,
253
+ "å": 166,
254
+ "æ": 167,
255
+ "ç": 168,
256
+ "è": 169,
257
+ "é": 170,
258
+ "ê": 171,
259
+ "ë": 172,
260
+ "ì": 173,
261
+ "í": 174,
262
+ "î": 175,
263
+ "ï": 176,
264
+ "ð": 177,
265
+ "ñ": 178,
266
+ "ò": 179,
267
+ "ó": 180,
268
+ "ô": 181,
269
+ "õ": 182,
270
+ "ö": 183,
271
+ "÷": 184,
272
+ "ø": 185,
273
+ "ù": 186,
274
+ "ú": 187,
275
+ "û": 188,
276
+ "ü": 189,
277
+ "ý": 190,
278
+ "þ": 191,
279
+ "ÿ": 192,
280
+ "Ā": 193,
281
+ "ā": 194,
282
+ "Ă": 195,
283
+ "ă": 196,
284
+ "Ą": 197,
285
+ "ą": 198,
286
+ "Ć": 199,
287
+ "ć": 200,
288
+ "Ĉ": 201,
289
+ "ĉ": 202,
290
+ "Ċ": 203,
291
+ "ċ": 204,
292
+ "Č": 205,
293
+ "č": 206,
294
+ "Ď": 207,
295
+ "ď": 208,
296
+ "Đ": 209,
297
+ "đ": 210,
298
+ "Ē": 211,
299
+ "ē": 212,
300
+ "Ĕ": 213,
301
+ "ĕ": 214,
302
+ "Ė": 215,
303
+ "ė": 216,
304
+ "Ę": 217,
305
+ "ę": 218,
306
+ "Ě": 219,
307
+ "ě": 220,
308
+ "Ĝ": 221,
309
+ "ĝ": 222,
310
+ "Ğ": 223,
311
+ "ğ": 224,
312
+ "Ġ": 225,
313
+ "ġ": 226,
314
+ "Ģ": 227,
315
+ "ģ": 228,
316
+ "Ĥ": 229,
317
+ "ĥ": 230,
318
+ "Ħ": 231,
319
+ "ħ": 232,
320
+ "Ĩ": 233,
321
+ "ĩ": 234,
322
+ "Ī": 235,
323
+ "ī": 236,
324
+ "Ĭ": 237,
325
+ "ĭ": 238,
326
+ "Į": 239,
327
+ "į": 240,
328
+ "İ": 241,
329
+ "ı": 242,
330
+ "IJ": 243,
331
+ "ij": 244,
332
+ "Ĵ": 245,
333
+ "ĵ": 246,
334
+ "Ķ": 247,
335
+ "ķ": 248,
336
+ "ĸ": 249,
337
+ "Ĺ": 250,
338
+ "ĺ": 251,
339
+ "Ļ": 252,
340
+ "ļ": 253,
341
+ "Ľ": 254,
342
+ "ľ": 255,
343
+ "Ŀ": 256,
344
+ "ŀ": 257,
345
+ "Ł": 258,
346
+ "ł": 259,
347
+ "Ń": 260,
348
+ "cc": 261,
349
+ "CC": 262,
350
+ "(=": 263,
351
+ "ccc": 264,
352
+ "](": 265,
353
+ "@@": 266,
354
+ "Cc": 267,
355
+ "NC": 268,
356
+ "ccccc": 269,
357
+ "nc": 270,
358
+ "CCC": 271,
359
+ ")[": 272,
360
+ "NH": 273,
361
+ "+]": 274,
362
+ "CO": 275,
363
+ "cccc": 276,
364
+ "Nc": 277,
365
+ "Cl": 278,
366
+ "OC": 279,
367
+ "CCN": 280,
368
+ ")(": 281,
369
+ "COc": 282,
370
+ "(-": 283,
371
+ "([": 284,
372
+ "CCCC": 285,
373
+ "CN": 286,
374
+ "-]": 287,
375
+ ")(=": 288,
376
+ "CCO": 289,
377
+ "nH": 290,
378
+ "nn": 291,
379
+ "-])": 292,
380
+ "+](": 293,
381
+ "CCc": 294,
382
+ ")=": 295,
383
+ "sc": 296,
384
+ "CS": 297,
385
+ "ncc": 298,
386
+ "Br": 299,
387
+ "CNC": 300,
388
+ "nnc": 301,
389
+ "NCc": 302,
390
+ "oc": 303,
391
+ "12": 304,
392
+ "+](=": 305,
393
+ "CCCCC": 306,
394
+ "COC": 307,
395
+ "Cn": 308,
396
+ "21": 309,
397
+ "CCCN": 310,
398
+ "cn": 311,
399
+ "Oc": 312,
400
+ "CCOC": 313,
401
+ "CCOCC": 314,
402
+ "+][": 315,
403
+ "cnc": 316,
404
+ "CCS": 317,
405
+ "]([": 318,
406
+ "CCOc": 319,
407
+ "cccs": 320,
408
+ "NCC": 321,
409
+ "cccnc": 322,
410
+ "OCC": 323,
411
+ "CCCO": 324,
412
+ "(/": 325,
413
+ "@]": 326,
414
+ "ccco": 327,
415
+ "CSc": 328,
416
+ "@@]": 329,
417
+ "cnn": 330,
418
+ "CCn": 331,
419
+ "CCNC": 332,
420
+ "32": 333,
421
+ "ccccn": 334,
422
+ "23": 335,
423
+ "no": 336,
424
+ "+])": 337,
425
+ ")/": 338,
426
+ "noc": 339,
427
+ "csc": 340,
428
+ "cs": 341,
429
+ "ccncc": 342,
430
+ "cccn": 343,
431
+ "CCCc": 344,
432
+ "Sc": 345,
433
+ "ccnc": 346,
434
+ "SCC": 347,
435
+ "OCc": 348,
436
+ "SC": 349,
437
+ "ccn": 350,
438
+ "ccsc": 351,
439
+ "NNC": 352,
440
+ "@](": 353,
441
+ "OCO": 354,
442
+ "NS": 355,
443
+ "ncnc": 356,
444
+ "NCCc": 357,
445
+ "CNc": 358,
446
+ "@@](": 359,
447
+ "=[": 360,
448
+ "OCCO": 361,
449
+ "ncccc": 362,
450
+ "NN": 363,
451
+ "cncc": 364,
452
+ "CCCCCC": 365,
453
+ "NCCC": 366,
454
+ "on": 367,
455
+ "+]([": 368,
456
+ "CCCCN": 369,
457
+ "ncn": 370,
458
+ "CCCNC": 371,
459
+ "nccs": 372,
460
+ "+]=": 373,
461
+ "CSC": 374,
462
+ "-])[": 375,
463
+ "SCc": 376,
464
+ "CCCn": 377,
465
+ "sccc": 378,
466
+ "cncn": 379,
467
+ "CCSc": 380,
468
+ "34": 381,
469
+ "COCC": 382,
470
+ "nnnn": 383,
471
+ "nccc": 384,
472
+ "(\\": 385,
473
+ "ncccn": 386,
474
+ "COCc": 387,
475
+ "nccn": 388,
476
+ ")([": 389,
477
+ "CCSC": 390,
478
+ "ccnn": 391,
479
+ "ccoc": 392,
480
+ "CNS": 393,
481
+ "CCCOc": 394,
482
+ "COCCN": 395,
483
+ "43": 396,
484
+ "@@](=": 397,
485
+ "Fc": 398,
486
+ "CCSCC": 399,
487
+ "-])=": 400,
488
+ "@](=": 401,
489
+ "CSCC": 402,
490
+ "CCCS": 403,
491
+ "cnccn": 404,
492
+ "nnn": 405,
493
+ "/[": 406,
494
+ "coc": 407,
495
+ "nncn": 408,
496
+ "cnnc": 409,
497
+ "NCCN": 410,
498
+ "NNc": 411,
499
+ "nnnc": 412,
500
+ "CCCCO": 413,
501
+ "ncnn": 414,
502
+ "+])[": 415,
503
+ "CCl": 416,
504
+ "Clc": 417,
505
+ "OCCCO": 418,
506
+ "CCNc": 419,
507
+ "CSCc": 420,
508
+ "cnnn": 421,
509
+ "occc": 422,
510
+ "CCCCNC": 423,
511
+ "NCCNC": 424,
512
+ "OCCC": 425,
513
+ "CCNS": 426,
514
+ "onc": 427,
515
+ "CCCOC": 428,
516
+ ")=[": 429,
517
+ "nccnc": 430,
518
+ "COCCn": 431,
519
+ "OCCN": 432,
520
+ "cnccc": 433,
521
+ "FC": 434,
522
+ "(\\[": 435,
523
+ "CCCCCCC": 436,
524
+ "NO": 437,
525
+ "COCCNC": 438,
526
+ "ns": 439,
527
+ "cscc": 440,
528
+ "13": 441,
529
+ "cscn": 442,
530
+ "nsc": 443,
531
+ "NCCn": 444,
532
+ "NCCOc": 445,
533
+ "CCCCn": 446,
534
+ "CCCCc": 447,
535
+ "Nn": 448,
536
+ "NCCCc": 449,
537
+ "nonc": 450,
538
+ "ccon": 451,
539
+ "NCCCn": 452,
540
+ "+])(": 453,
541
+ "scnc": 454,
542
+ "NCCS": 455,
543
+ "NCCCN": 456,
544
+ "ncsc": 457,
545
+ "CNCc": 458,
546
+ "CCCNc": 459,
547
+ "NCCCC": 460,
548
+ "Brc": 461,
549
+ "scc": 462,
550
+ "sccn": 463,
551
+ "SCCC": 464,
552
+ "COCCO": 465,
553
+ "COCCOc": 466,
554
+ "(=[": 467,
555
+ "nncs": 468,
556
+ "ocnc": 469,
557
+ "CCOCc": 470,
558
+ "31": 471,
559
+ "nsnc": 472,
560
+ "ncoc": 473,
561
+ "OCCc": 474,
562
+ "OS": 475,
563
+ "SCCc": 476,
564
+ "CCCCCN": 477,
565
+ "OCCNC": 478,
566
+ "CCCSc": 479,
567
+ "CSCN": 480,
568
+ "COCCCNC": 481,
569
+ "COCCC": 482,
570
+ "cncnc": 483,
571
+ "CCCl": 484,
572
+ "CCOCCN": 485,
573
+ "-])/": 486,
574
+ "co": 487,
575
+ "CSCCS": 488,
576
+ "nnsc": 489,
577
+ "\\[": 490,
578
+ "CCCCOc": 491,
579
+ "CSCCO": 492,
580
+ "NCCO": 493,
581
+ "CBr": 494,
582
+ "CCCCS": 495,
583
+ ")-": 496,
584
+ "COCCCN": 497,
585
+ "NCCNc": 498,
586
+ "+]\\": 499,
587
+ "cccnn": 500,
588
+ "](/": 501,
589
+ "OCCn": 502,
590
+ "CON": 503,
591
+ "45": 504,
592
+ "CCCSCC": 505,
593
+ "csnn": 506,
594
+ "OCCOc": 507,
595
+ "@]([": 508,
596
+ "+]/": 509,
597
+ "SCCN": 510,
598
+ ")(/": 511,
599
+ "NCCCO": 512,
600
+ "@@]([": 513,
601
+ "OCCCC": 514,
602
+ "CCCSC": 515,
603
+ "ON": 516,
604
+ "ncco": 517,
605
+ "(/[": 518,
606
+ "COCCc": 519,
607
+ "OCOC": 520,
608
+ "snc": 521,
609
+ "ccncn": 522,
610
+ "CSCCC": 523,
611
+ "ccno": 524,
612
+ "ncon": 525,
613
+ "CCSCc": 526,
614
+ "54": 527,
615
+ "+])([": 528,
616
+ "NCCSc": 529,
617
+ "nnco": 530,
618
+ "CCCCCNC": 531,
619
+ "24": 532,
620
+ "COCCOC": 533,
621
+ "CSCCN": 534,
622
+ "CCNCC": 535,
623
+ "nncc": 536,
624
+ "CCCOCC": 537,
625
+ "NCCOC": 538,
626
+ "NNS": 539,
627
+ "CCCCOC": 540,
628
+ "CONC": 541,
629
+ "NOCc": 542,
630
+ "NCCCOC": 543,
631
+ "CCNCc": 544,
632
+ "CNCC": 545,
633
+ "SCCS": 546,
634
+ "snnc": 547,
635
+ "occ": 548,
636
+ ")\\": 549,
637
+ "COCCCC": 550,
638
+ "CCCCl": 551,
639
+ "OCCCc": 552,
640
+ "NCCOCC": 553,
641
+ "NCCCCn": 554,
642
+ "COCCCn": 555,
643
+ "SCCOc": 556,
644
+ "ncncc": 557,
645
+ "CNCCc": 558,
646
+ "OCCCNC": 559,
647
+ "CCOCCC": 560,
648
+ "CCCNS": 561,
649
+ "CCOCCNC": 562,
650
+ "cncs": 563,
651
+ "NCCCNC": 564,
652
+ "CCCCSc": 565,
653
+ "CCOCCO": 566,
654
+ "42": 567,
655
+ "OCCCN": 568,
656
+ "CCCCCn": 569,
657
+ "OCCNc": 570,
658
+ "COCO": 571,
659
+ "-])(": 572,
660
+ "OCCOC": 573,
661
+ "OCCSc": 574,
662
+ "OCCS": 575,
663
+ "NCN": 576,
664
+ "OCN": 577,
665
+ "NCCCSc": 578,
666
+ "NCCNS": 579,
667
+ "NOC": 580,
668
+ "CCOCCCNC": 581,
669
+ "cnco": 582,
670
+ "COCCNc": 583,
671
+ "CCCF": 584,
672
+ "CCONC": 585,
673
+ "NCCCCC": 586,
674
+ "+]=[": 587,
675
+ "-])=[": 588,
676
+ "cnoc": 589,
677
+ "OCCCn": 590,
678
+ "]/": 591,
679
+ "CNn": 592,
680
+ "CCOCCCC": 593,
681
+ "CCOCCn": 594,
682
+ "COCCS": 595,
683
+ "ClC": 596,
684
+ "CCCCCc": 597,
685
+ "SCCO": 598,
686
+ "CNCCN": 599,
687
+ "NCCCOc": 600,
688
+ "NOCC": 601,
689
+ "NCCCS": 602,
690
+ "OCn": 603,
691
+ "CCCCNc": 604,
692
+ "CNN": 605,
693
+ "SCCNC": 606,
694
+ "ClCc": 607,
695
+ "-]/": 608,
696
+ "CCSCCC": 609,
697
+ "OCCCOc": 610,
698
+ "SCCn": 611,
699
+ "CSCCNC": 612,
700
+ ")/[": 613,
701
+ "COCCCOc": 614,
702
+ "CCOCCS": 615,
703
+ "ccnnc": 616,
704
+ "CCOCCOc": 617,
705
+ "CSCCCNC": 618,
706
+ "NCCCCN": 619,
707
+ "sn": 620,
708
+ "COCCOCC": 621,
709
+ "-][": 622,
710
+ "CCCCCO": 623,
711
+ "SCCCS": 624,
712
+ "ccs": 625,
713
+ "-]=[": 626,
714
+ "CCCCCS": 627,
715
+ "cnns": 628,
716
+ "COCCNS": 629,
717
+ "COCCCNc": 630,
718
+ "CSCCCCNC": 631,
719
+ "CCBr": 632,
720
+ "CSCCc": 633,
721
+ "NCCCNc": 634,
722
+ "NCCCOCC": 635,
723
+ "SCCCC": 636,
724
+ "oncc": 637,
725
+ "CSCCCN": 638,
726
+ "CSCCOC": 639,
727
+ "CCCSCc": 640,
728
+ "COCCSc": 641,
729
+ "COn": 642,
730
+ "(-[": 643,
731
+ "NCCSCc": 644,
732
+ "COCCCOC": 645,
733
+ "COCCOCCNC": 646,
734
+ "CCCCOCC": 647,
735
+ "CCCCCCNC": 648,
736
+ "CSCCn": 649,
737
+ "CCCCNS": 650,
738
+ "NCCSCC": 651,
739
+ "][": 652,
740
+ "CCCCCOc": 653,
741
+ "Ic": 654,
742
+ "NCCSC": 655,
743
+ "OCCCCC": 656,
744
+ "SCN": 657,
745
+ "COP": 658,
746
+ "CCOP": 659,
747
+ "CSCCOc": 660,
748
+ "cnsn": 661,
749
+ "OCCCl": 662,
750
+ "OCCCSc": 663,
751
+ "nscc": 664,
752
+ "COCOc": 665,
753
+ "BrCc": 666,
754
+ "NCCCCl": 667,
755
+ "OP": 668,
756
+ "SCn": 669,
757
+ "SCCCc": 670,
758
+ "](/[": 671,
759
+ "COS": 672,
760
+ "CCCCCCN": 673,
761
+ "CCOCCCN": 674,
762
+ "+](-": 675,
763
+ "+](/": 676,
764
+ "CCCOCc": 677,
765
+ "CI": 678,
766
+ "NOc": 679,
767
+ "NCCCCCC": 680,
768
+ "OCCNS": 681,
769
+ "SN": 682,
770
+ "BrC": 683,
771
+ "cnsc": 684,
772
+ "OCCF": 685,
773
+ "35": 686,
774
+ "OCCSC": 687,
775
+ "SCCOC": 688,
776
+ "SCCCO": 689,
777
+ "SCCCOc": 690,
778
+ "COCCCCC": 691,
779
+ "ClCC": 692,
780
+ "-])([": 693,
781
+ "CCCCCOC": 694,
782
+ "CCOCCNc": 695,
783
+ "OCCCNc": 696,
784
+ "NCCCCOc": 697,
785
+ "NCCOCc": 698,
786
+ "OCCOCC": 699,
787
+ "OCCCS": 700,
788
+ "OCCSCc": 701,
789
+ "SCCCN": 702,
790
+ "ClCCc": 703,
791
+ "CCCCCCO": 704,
792
+ "CCCCCCCCCCC": 705,
793
+ "CNCCC": 706,
794
+ "CCCNCC": 707,
795
+ "CCOCCSc": 708,
796
+ "NNN": 709,
797
+ "NCCCSC": 710,
798
+ "COCCNCc": 711,
799
+ "41": 712,
800
+ "56": 713,
801
+ "NSC": 714,
802
+ "NCCCCc": 715,
803
+ "ONC": 716,
804
+ "conc": 717,
805
+ "CCCBr": 718,
806
+ "+]/[": 719,
807
+ "CCCCCCn": 720,
808
+ "CCOCCOC": 721,
809
+ "CCOCCOCC": 722,
810
+ "CCOCCCn": 723,
811
+ "CCOCCCNc": 724,
812
+ "BrCC": 725,
813
+ "CCCCCl": 726,
814
+ "CCCCCSc": 727,
815
+ "CCSS": 728,
816
+ "CCSCCOC": 729,
817
+ "OCCCCN": 730,
818
+ "NCCCNS": 731,
819
+ "NCCCOCc": 732,
820
+ "OH": 733,
821
+ "SCCSc": 734,
822
+ "NCn": 735,
823
+ "CCCCCCS": 736,
824
+ "CCCCOCc": 737,
825
+ "CCCCSC": 738,
826
+ "CSCCSC": 739,
827
+ "CCSCCN": 740,
828
+ "OCCBr": 741,
829
+ "OCCOCCOCCO": 742,
830
+ "NCCCCSC": 743,
831
+ "COCCOCCN": 744,
832
+ "NCCOCCO": 745,
833
+ "(#": 746,
834
+ "14": 747,
835
+ "IC": 748,
836
+ "On": 749,
837
+ "OCCSCC": 750,
838
+ "OCCCCn": 751,
839
+ "OCCOCCS": 752,
840
+ "SS": 753,
841
+ "SSC": 754,
842
+ "SCCCn": 755,
843
+ "NCNS": 756,
844
+ "COCCCS": 757,
845
+ "COCCCCCNC": 758,
846
+ "CCCCSCC": 759,
847
+ "CCCCCCSc": 760,
848
+ "CCON": 761,
849
+ "nnccc": 762,
850
+ "-])\\": 763,
851
+ "+](\\[": 764,
852
+ "CSCCCC": 765,
853
+ "CSCCCNc": 766,
854
+ "123": 767,
855
+ "CCOCCCc": 768,
856
+ "CCOCCOCCOCC": 769,
857
+ "-[": 770,
858
+ "132": 771,
859
+ "53": 772,
860
+ "CH": 773,
861
+ "NP": 774,
862
+ "OO": 775,
863
+ "OCOc": 776,
864
+ "PH": 777,
865
+ "SCCNS": 778,
866
+ "sncc": 779,
867
+ "CCCNCc": 780,
868
+ "+])=": 781,
869
+ "COCCCc": 782,
870
+ "CONS": 783,
871
+ "COCCCCNC": 784,
872
+ "COCCCCS": 785,
873
+ "CCCCCCCC": 786,
874
+ "CCCCCCc": 787,
875
+ "CCOCCOCc": 788,
876
+ "CSCCCn": 789,
877
+ "BrCCC": 790,
878
+ "BrCCc": 791,
879
+ "CCCCCBr": 792,
880
+ "CCSCCOc": 793,
881
+ "CCSCCCO": 794,
882
+ "CCSCCn": 795,
883
+ "OCCl": 796,
884
+ "CCCOCCO": 797,
885
+ "+])/": 798,
886
+ "SCCNc": 799,
887
+ "OCCOCCOc": 800,
888
+ "CCCCCCNc": 801,
889
+ "NCCCF": 802,
890
+ "NCCCl": 803,
891
+ "CCCCOCCN": 804,
892
+ "NCCCCCc": 805,
893
+ "OCCCCSc": 806,
894
+ "COCCSCCC": 807,
895
+ "#[": 808,
896
+ "312": 809,
897
+ "CF": 810,
898
+ "FO": 811,
899
+ "FCCC": 812,
900
+ "NNCc": 813,
901
+ "NSc": 814,
902
+ "ONc": 815,
903
+ "OCCCCNC": 816,
904
+ "OCCCNS": 817,
905
+ "OCCCCCS": 818,
906
+ "SH": 819,
907
+ "SSc": 820,
908
+ "SCCCCO": 821,
909
+ "SCCCCCO": 822,
910
+ "SCCCCCS": 823,
911
+ "SSN": 824,
912
+ "ss": 825,
913
+ "ssc": 826,
914
+ "ssnc": 827,
915
+ "NCSc": 828,
916
+ "ncno": 829,
917
+ "CCCOCCNC": 830,
918
+ "+]#": 831,
919
+ "+]\\[": 832,
920
+ "COCCCCN": 833,
921
+ "COCCSCC": 834,
922
+ "COCCCCOc": 835,
923
+ "COCCSCc": 836,
924
+ "ClCCCSc": 837,
925
+ "CCCCCCOc": 838,
926
+ "CCCCSCc": 839,
927
+ "CCCCOCCNC": 840,
928
+ "CNNC": 841,
929
+ "CNCCO": 842,
930
+ "-]/[": 843,
931
+ "CCOS": 844,
932
+ "CCOCO": 845,
933
+ "CCOCCc": 846,
934
+ "CCOCOCC": 847,
935
+ "+](/[": 848,
936
+ "CSCCCc": 849,
937
+ "CSCCNS": 850,
938
+ "CCCCCNc": 851,
939
+ "CCCCCOCC": 852,
940
+ "CCCCCNS": 853,
941
+ "CCSCCc": 854,
942
+ "CCSCCSc": 855,
943
+ "NCCBr": 856,
944
+ "CCCOS": 857,
945
+ "CCCOCCC": 858,
946
+ "nocc": 859,
947
+ "@@](/": 860,
948
+ "OCCOCc": 861,
949
+ "OCCOCCN": 862,
950
+ "OCCOCCO": 863,
951
+ "CCCCCCl": 864,
952
+ "NCCCSCC": 865,
953
+ "CCCCOCCCNC": 866,
954
+ "OCCCOC": 867,
955
+ "OCCCBr": 868,
956
+ "OCCCSC": 869,
957
+ "NOCCc": 870,
958
+ "SCCCBr": 871,
959
+ "SCCCSCC": 872,
960
+ "COCCOCc": 873,
961
+ "NCCOCCc": 874,
962
+ "COCCCNS": 875,
963
+ "COCCOCCSc": 876,
964
+ "SCCCCCCSc": 877,
965
+ "OCCOCCOCCOCCO": 878
966
+ },
967
+ "merges": [
968
+ "c c",
969
+ "C C",
970
+ "( =",
971
+ "cc c",
972
+ "] (",
973
+ "@ @",
974
+ "C c",
975
+ "N C",
976
+ "cc ccc",
977
+ "n c",
978
+ "CC C",
979
+ ") [",
980
+ "N H",
981
+ "+ ]",
982
+ "C O",
983
+ "cc cc",
984
+ "N c",
985
+ "C l",
986
+ "O C",
987
+ "CC N",
988
+ ") (",
989
+ "CO c",
990
+ "( -",
991
+ "( [",
992
+ "CC CC",
993
+ "C N",
994
+ "- ]",
995
+ ") (=",
996
+ "CC O",
997
+ "n H",
998
+ "n n",
999
+ "-] )",
1000
+ "+ ](",
1001
+ "CC c",
1002
+ ") =",
1003
+ "s c",
1004
+ "C S",
1005
+ "n cc",
1006
+ "B r",
1007
+ "C NC",
1008
+ "n nc",
1009
+ "N Cc",
1010
+ "o c",
1011
+ "1 2",
1012
+ "+] (=",
1013
+ "CC CCC",
1014
+ "CO C",
1015
+ "C n",
1016
+ "2 1",
1017
+ "CCC N",
1018
+ "c n",
1019
+ "O c",
1020
+ "CC OC",
1021
+ "CCO CC",
1022
+ "+] [",
1023
+ "c nc",
1024
+ "CC S",
1025
+ "]( [",
1026
+ "CCO c",
1027
+ "ccc s",
1028
+ "N CC",
1029
+ "ccc nc",
1030
+ "O CC",
1031
+ "CCC O",
1032
+ "( /",
1033
+ "@ ]",
1034
+ "ccc o",
1035
+ "CS c",
1036
+ "@@ ]",
1037
+ "c nn",
1038
+ "CC n",
1039
+ "CC NC",
1040
+ "3 2",
1041
+ "cccc n",
1042
+ "2 3",
1043
+ "n o",
1044
+ "+] )",
1045
+ ") /",
1046
+ "n oc",
1047
+ "c sc",
1048
+ "c s",
1049
+ "cc ncc",
1050
+ "ccc n",
1051
+ "CC Cc",
1052
+ "S c",
1053
+ "cc nc",
1054
+ "S CC",
1055
+ "O Cc",
1056
+ "S C",
1057
+ "cc n",
1058
+ "cc sc",
1059
+ "N NC",
1060
+ "@ ](",
1061
+ "O CO",
1062
+ "N S",
1063
+ "nc nc",
1064
+ "N CCc",
1065
+ "C Nc",
1066
+ "@@ ](",
1067
+ "= [",
1068
+ "O CCO",
1069
+ "n cccc",
1070
+ "N N",
1071
+ "c ncc",
1072
+ "CCCC CC",
1073
+ "N CCC",
1074
+ "o n",
1075
+ "+]( [",
1076
+ "CC CCN",
1077
+ "nc n",
1078
+ "CCC NC",
1079
+ "ncc s",
1080
+ "+] =",
1081
+ "CS C",
1082
+ "-] )[",
1083
+ "S Cc",
1084
+ "CCC n",
1085
+ "s ccc",
1086
+ "cnc n",
1087
+ "CCS c",
1088
+ "3 4",
1089
+ "CO CC",
1090
+ "nn nn",
1091
+ "n ccc",
1092
+ "( \\",
1093
+ "n cccn",
1094
+ "CO Cc",
1095
+ "ncc n",
1096
+ ")( [",
1097
+ "CCS C",
1098
+ "cc nn",
1099
+ "cc oc",
1100
+ "CN S",
1101
+ "CCC Oc",
1102
+ "CO CCN",
1103
+ "4 3",
1104
+ "@@] (=",
1105
+ "F c",
1106
+ "CCS CC",
1107
+ "-]) =",
1108
+ "@] (=",
1109
+ "CS CC",
1110
+ "CCC S",
1111
+ "cncc n",
1112
+ "nn n",
1113
+ "/ [",
1114
+ "c oc",
1115
+ "nnc n",
1116
+ "c nnc",
1117
+ "N CCN",
1118
+ "N Nc",
1119
+ "nn nc",
1120
+ "CCCC O",
1121
+ "nc nn",
1122
+ "+] )[",
1123
+ "CC l",
1124
+ "Cl c",
1125
+ "O CCCO",
1126
+ "CC Nc",
1127
+ "CS Cc",
1128
+ "cnn n",
1129
+ "o ccc",
1130
+ "CCCC NC",
1131
+ "NCC NC",
1132
+ "O CCC",
1133
+ "CCN S",
1134
+ "o nc",
1135
+ "CCC OC",
1136
+ ")= [",
1137
+ "ncc nc",
1138
+ "CO CCn",
1139
+ "O CCN",
1140
+ "cn ccc",
1141
+ "F C",
1142
+ "(\\ [",
1143
+ "CCCC CCC",
1144
+ "N O",
1145
+ "CO CCNC",
1146
+ "n s",
1147
+ "cs cc",
1148
+ "1 3",
1149
+ "csc n",
1150
+ "n sc",
1151
+ "NCC n",
1152
+ "N CCOc",
1153
+ "CCCC n",
1154
+ "CCCC c",
1155
+ "N n",
1156
+ "NCC Cc",
1157
+ "no nc",
1158
+ "cc on",
1159
+ "NCCC n",
1160
+ "+] )(",
1161
+ "sc nc",
1162
+ "N CCS",
1163
+ "N CCCN",
1164
+ "nc sc",
1165
+ "CN Cc",
1166
+ "CCC Nc",
1167
+ "N CCCC",
1168
+ "Br c",
1169
+ "s cc",
1170
+ "s ccn",
1171
+ "S CCC",
1172
+ "CO CCO",
1173
+ "CO CCOc",
1174
+ "(= [",
1175
+ "nnc s",
1176
+ "oc nc",
1177
+ "CCO Cc",
1178
+ "3 1",
1179
+ "ns nc",
1180
+ "nc oc",
1181
+ "O CCc",
1182
+ "O S",
1183
+ "S CCc",
1184
+ "CCCCC N",
1185
+ "OCC NC",
1186
+ "CCC Sc",
1187
+ "CS CN",
1188
+ "CO CCCNC",
1189
+ "CO CCC",
1190
+ "cnc nc",
1191
+ "CCC l",
1192
+ "CCO CCN",
1193
+ "-]) /",
1194
+ "c o",
1195
+ "CS CCS",
1196
+ "nn sc",
1197
+ "\\ [",
1198
+ "CCCC Oc",
1199
+ "CS CCO",
1200
+ "N CCO",
1201
+ "C Br",
1202
+ "CCCC S",
1203
+ ") -",
1204
+ "CO CCCN",
1205
+ "NCC Nc",
1206
+ "+] \\",
1207
+ "ccc nn",
1208
+ "]( /",
1209
+ "OCC n",
1210
+ "CO N",
1211
+ "4 5",
1212
+ "CCC SCC",
1213
+ "cs nn",
1214
+ "O CCOc",
1215
+ "@ ]([",
1216
+ "+] /",
1217
+ "S CCN",
1218
+ ")( /",
1219
+ "N CCCO",
1220
+ "@@ ]([",
1221
+ "O CCCC",
1222
+ "CCC SC",
1223
+ "O N",
1224
+ "ncc o",
1225
+ "(/ [",
1226
+ "CO CCc",
1227
+ "O COC",
1228
+ "s nc",
1229
+ "ccnc n",
1230
+ "CS CCC",
1231
+ "cc no",
1232
+ "nc on",
1233
+ "CCS Cc",
1234
+ "5 4",
1235
+ "+] )([",
1236
+ "N CCSc",
1237
+ "nnc o",
1238
+ "CCCCC NC",
1239
+ "2 4",
1240
+ "CO CCOC",
1241
+ "CS CCN",
1242
+ "CCN CC",
1243
+ "nn cc",
1244
+ "CCC OCC",
1245
+ "N CCOC",
1246
+ "N NS",
1247
+ "CCCC OC",
1248
+ "CO NC",
1249
+ "N OCc",
1250
+ "NCCC OC",
1251
+ "CCN Cc",
1252
+ "CN CC",
1253
+ "S CCS",
1254
+ "s nnc",
1255
+ "o cc",
1256
+ ") \\",
1257
+ "CO CCCC",
1258
+ "CCCC l",
1259
+ "OCC Cc",
1260
+ "N CCOCC",
1261
+ "N CCCCn",
1262
+ "CO CCCn",
1263
+ "S CCOc",
1264
+ "nc ncc",
1265
+ "CN CCc",
1266
+ "O CCCNC",
1267
+ "CCO CCC",
1268
+ "CCCN S",
1269
+ "CCOCC NC",
1270
+ "cnc s",
1271
+ "NCCC NC",
1272
+ "CCCC Sc",
1273
+ "CCO CCO",
1274
+ "4 2",
1275
+ "O CCCN",
1276
+ "CCCCC n",
1277
+ "OCC Nc",
1278
+ "CO CO",
1279
+ "-] )(",
1280
+ "O CCOC",
1281
+ "O CCSc",
1282
+ "O CCS",
1283
+ "NC N",
1284
+ "OC N",
1285
+ "NCCC Sc",
1286
+ "NCCN S",
1287
+ "N OC",
1288
+ "CCO CCCNC",
1289
+ "cnc o",
1290
+ "COCC Nc",
1291
+ "CCC F",
1292
+ "CCO NC",
1293
+ "N CCCCC",
1294
+ "+] =[",
1295
+ "-]) =[",
1296
+ "cn oc",
1297
+ "O CCCn",
1298
+ "] /",
1299
+ "CN n",
1300
+ "CCO CCCC",
1301
+ "CCOCC n",
1302
+ "CO CCS",
1303
+ "Cl C",
1304
+ "CCCC Cc",
1305
+ "S CCO",
1306
+ "CN CCN",
1307
+ "NCCC Oc",
1308
+ "N OCC",
1309
+ "NCCC S",
1310
+ "OC n",
1311
+ "CCCC Nc",
1312
+ "CN N",
1313
+ "S CCNC",
1314
+ "Cl Cc",
1315
+ "-] /",
1316
+ "CCS CCC",
1317
+ "O CCCOc",
1318
+ "S CCn",
1319
+ "CS CCNC",
1320
+ ")/ [",
1321
+ "CO CCCOc",
1322
+ "CCOCC S",
1323
+ "cc nnc",
1324
+ "CCO CCOc",
1325
+ "CS CCCNC",
1326
+ "NCC CCN",
1327
+ "s n",
1328
+ "CO CCOCC",
1329
+ "-] [",
1330
+ "CCCCC O",
1331
+ "S CCCS",
1332
+ "cc s",
1333
+ "-] =[",
1334
+ "CCCCC S",
1335
+ "cnn s",
1336
+ "COCCN S",
1337
+ "CO CCCNc",
1338
+ "CS CCCCNC",
1339
+ "CC Br",
1340
+ "CS CCc",
1341
+ "NCCC Nc",
1342
+ "NCCC OCC",
1343
+ "S CCCC",
1344
+ "o ncc",
1345
+ "CS CCCN",
1346
+ "CS CCOC",
1347
+ "CCC SCc",
1348
+ "CO CCSc",
1349
+ "CO n",
1350
+ "(- [",
1351
+ "NCCS Cc",
1352
+ "CO CCCOC",
1353
+ "CO CCOCCNC",
1354
+ "CCCC OCC",
1355
+ "CCCC CCNC",
1356
+ "CS CCn",
1357
+ "CCCCN S",
1358
+ "N CCSCC",
1359
+ "] [",
1360
+ "CCCCC Oc",
1361
+ "I c",
1362
+ "N CCSC",
1363
+ "O CCCCC",
1364
+ "S CN",
1365
+ "CO P",
1366
+ "CCO P",
1367
+ "CS CCOc",
1368
+ "cn sn",
1369
+ "OCCC l",
1370
+ "OCCC Sc",
1371
+ "ns cc",
1372
+ "CO COc",
1373
+ "Br Cc",
1374
+ "NCCCC l",
1375
+ "O P",
1376
+ "S Cn",
1377
+ "S CCCc",
1378
+ "]( /[",
1379
+ "CO S",
1380
+ "CCCC CCN",
1381
+ "CCO CCCN",
1382
+ "+]( -",
1383
+ "+]( /",
1384
+ "CCCO Cc",
1385
+ "C I",
1386
+ "N Oc",
1387
+ "N CCCCCC",
1388
+ "O CCNS",
1389
+ "S N",
1390
+ "Br C",
1391
+ "cn sc",
1392
+ "OCC F",
1393
+ "3 5",
1394
+ "O CCSC",
1395
+ "S CCOC",
1396
+ "S CCCO",
1397
+ "S CCCOc",
1398
+ "CO CCCCC",
1399
+ "Cl CC",
1400
+ "-] )([",
1401
+ "CCCCC OC",
1402
+ "CCOCC Nc",
1403
+ "OCCC Nc",
1404
+ "NCCCC Oc",
1405
+ "N CCOCc",
1406
+ "O CCOCC",
1407
+ "O CCCS",
1408
+ "O CCSCc",
1409
+ "S CCCN",
1410
+ "Cl CCc",
1411
+ "CCCC CCO",
1412
+ "CCCC CCCCCCC",
1413
+ "CN CCC",
1414
+ "CCCN CC",
1415
+ "CCOCC Sc",
1416
+ "NN N",
1417
+ "NCCC SC",
1418
+ "COCCN Cc",
1419
+ "4 1",
1420
+ "5 6",
1421
+ "N SC",
1422
+ "N CCCCc",
1423
+ "O NC",
1424
+ "c onc",
1425
+ "CCC Br",
1426
+ "+] /[",
1427
+ "CCCC CCn",
1428
+ "CCO CCOC",
1429
+ "CCO CCOCC",
1430
+ "CCO CCCn",
1431
+ "CCO CCCNc",
1432
+ "Br CC",
1433
+ "CCCCC l",
1434
+ "CCCCC Sc",
1435
+ "CCS S",
1436
+ "CCS CCOC",
1437
+ "OCC CCN",
1438
+ "NCCCN S",
1439
+ "NCCCO Cc",
1440
+ "O H",
1441
+ "S CCSc",
1442
+ "NC n",
1443
+ "CCCC CCS",
1444
+ "CCCC OCc",
1445
+ "CCCC SC",
1446
+ "CS CCSC",
1447
+ "CCS CCN",
1448
+ "OCC Br",
1449
+ "OCCO CCOCCO",
1450
+ "NCCCC SC",
1451
+ "COCCO CCN",
1452
+ "NCCO CCO",
1453
+ "( #",
1454
+ "1 4",
1455
+ "I C",
1456
+ "O n",
1457
+ "O CCSCC",
1458
+ "O CCCCn",
1459
+ "O CCOCCS",
1460
+ "S S",
1461
+ "S SC",
1462
+ "S CCCn",
1463
+ "NC NS",
1464
+ "CO CCCS",
1465
+ "CO CCCCCNC",
1466
+ "CCCC SCC",
1467
+ "CCCC CCSc",
1468
+ "CCO N",
1469
+ "nn ccc",
1470
+ "-]) \\",
1471
+ "+]( \\[",
1472
+ "CS CCCC",
1473
+ "CS CCCNc",
1474
+ "12 3",
1475
+ "CCOCC Cc",
1476
+ "CCOCCO CCOCC",
1477
+ "- [",
1478
+ "1 32",
1479
+ "5 3",
1480
+ "C H",
1481
+ "N P",
1482
+ "O O",
1483
+ "O COc",
1484
+ "P H",
1485
+ "S CCNS",
1486
+ "s ncc",
1487
+ "CCC NCc",
1488
+ "+] )=",
1489
+ "CO CCCc",
1490
+ "CO NS",
1491
+ "CO CCCCNC",
1492
+ "CO CCCCS",
1493
+ "CCCC CCCC",
1494
+ "CCCC CCc",
1495
+ "CCO CCOCc",
1496
+ "CS CCCn",
1497
+ "Br CCC",
1498
+ "Br CCc",
1499
+ "CCCCC Br",
1500
+ "CCS CCOc",
1501
+ "CCS CCCO",
1502
+ "CCS CCn",
1503
+ "OCC l",
1504
+ "CCCO CCO",
1505
+ "+]) /",
1506
+ "SCC Nc",
1507
+ "OCCO CCOc",
1508
+ "CCCCCC Nc",
1509
+ "NCCC F",
1510
+ "NCCC l",
1511
+ "CCCCO CCN",
1512
+ "NCCCC Cc",
1513
+ "OCCCC Sc",
1514
+ "COCCS CCC",
1515
+ "# [",
1516
+ "3 12",
1517
+ "C F",
1518
+ "F O",
1519
+ "F CCC",
1520
+ "N NCc",
1521
+ "N Sc",
1522
+ "O Nc",
1523
+ "O CCCCNC",
1524
+ "O CCCNS",
1525
+ "O CCCCCS",
1526
+ "S H",
1527
+ "S Sc",
1528
+ "S CCCCO",
1529
+ "S CCCCCO",
1530
+ "S CCCCCS",
1531
+ "S SN",
1532
+ "s s",
1533
+ "s sc",
1534
+ "s snc",
1535
+ "NC Sc",
1536
+ "nc no",
1537
+ "CCC OCCNC",
1538
+ "+] #",
1539
+ "+] \\[",
1540
+ "CO CCCCN",
1541
+ "CO CCSCC",
1542
+ "CO CCCCOc",
1543
+ "CO CCSCc",
1544
+ "Cl CCCSc",
1545
+ "CCCC CCOc",
1546
+ "CCCC SCc",
1547
+ "CCCC OCCNC",
1548
+ "CN NC",
1549
+ "CN CCO",
1550
+ "-] /[",
1551
+ "CCO S",
1552
+ "CCO CO",
1553
+ "CCO CCc",
1554
+ "CCO COCC",
1555
+ "+]( /[",
1556
+ "CS CCCc",
1557
+ "CS CCNS",
1558
+ "CCCCC Nc",
1559
+ "CCCCC OCC",
1560
+ "CCCCC NS",
1561
+ "CCS CCc",
1562
+ "CCS CCSc",
1563
+ "NCC Br",
1564
+ "CCCO S",
1565
+ "CCCO CCC",
1566
+ "no cc",
1567
+ "@@]( /",
1568
+ "OCCO Cc",
1569
+ "OCCO CCN",
1570
+ "OCCO CCO",
1571
+ "CCCCCC l",
1572
+ "NCCC SCC",
1573
+ "CCCCO CCCNC",
1574
+ "OCCC OC",
1575
+ "OCCC Br",
1576
+ "OCCC SC",
1577
+ "NO CCc",
1578
+ "SCCC Br",
1579
+ "SCCC SCC",
1580
+ "COCCO Cc",
1581
+ "NCCO CCc",
1582
+ "COCCCN S",
1583
+ "COCCOCC Sc",
1584
+ "SCCCC CCSc",
1585
+ "OCCOCCOCCO CCO"
1586
+ ]
1587
+ }
1588
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "full_tokenizer_file": null,
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 1000000000000000019884624838656,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>":0,"<pad>":1,"</s>":2,"<unk>":3,"<mask>":4,"!":5,"\"":6,"#":7,"$":8,"%":9,"&":10,"'":11,"(":12,")":13,"*":14,"+":15,",":16,"-":17,".":18,"/":19,"0":20,"1":21,"2":22,"3":23,"4":24,"5":25,"6":26,"7":27,"8":28,"9":29,":":30,";":31,"<":32,"=":33,">":34,"?":35,"@":36,"A":37,"B":38,"C":39,"D":40,"E":41,"F":42,"G":43,"H":44,"I":45,"J":46,"K":47,"L":48,"M":49,"N":50,"O":51,"P":52,"Q":53,"R":54,"S":55,"T":56,"U":57,"V":58,"W":59,"X":60,"Y":61,"Z":62,"[":63,"\\":64,"]":65,"^":66,"_":67,"`":68,"a":69,"b":70,"c":71,"d":72,"e":73,"f":74,"g":75,"h":76,"i":77,"j":78,"k":79,"l":80,"m":81,"n":82,"o":83,"p":84,"q":85,"r":86,"s":87,"t":88,"u":89,"v":90,"w":91,"x":92,"y":93,"z":94,"{":95,"|":96,"}":97,"~":98,"¡":99,"¢":100,"£":101,"¤":102,"¥":103,"¦":104,"§":105,"¨":106,"©":107,"ª":108,"«":109,"¬":110,"®":111,"¯":112,"°":113,"±":114,"²":115,"³":116,"´":117,"µ":118,"¶":119,"·":120,"¸":121,"¹":122,"º":123,"»":124,"¼":125,"½":126,"¾":127,"¿":128,"À":129,"Á":130,"Â":131,"Ã":132,"Ä":133,"Å":134,"Æ":135,"Ç":136,"È":137,"É":138,"Ê":139,"Ë":140,"Ì":141,"Í":142,"Î":143,"Ï":144,"Ð":145,"Ñ":146,"Ò":147,"Ó":148,"Ô":149,"Õ":150,"Ö":151,"×":152,"Ø":153,"Ù":154,"Ú":155,"Û":156,"Ü":157,"Ý":158,"Þ":159,"ß":160,"à":161,"á":162,"â":163,"ã":164,"ä":165,"å":166,"æ":167,"ç":168,"è":169,"é":170,"ê":171,"ë":172,"ì":173,"í":174,"î":175,"ï":176,"ð":177,"ñ":178,"ò":179,"ó":180,"ô":181,"õ":182,"ö":183,"÷":184,"ø":185,"ù":186,"ú":187,"û":188,"ü":189,"ý":190,"þ":191,"ÿ":192,"Ā":193,"ā":194,"Ă":195,"ă":196,"Ą":197,"ą":198,"Ć":199,"ć":200,"Ĉ":201,"ĉ":202,"Ċ":203,"ċ":204,"Č":205,"č":206,"Ď":207,"ď":208,"Đ":209,"đ":210,"Ē":211,"ē":212,"Ĕ":213,"ĕ":214,"Ė":215,"ė":216,"Ę":217,"ę":218,"Ě":219,"ě":220,"Ĝ":221,"ĝ":222,"Ğ":223,"ğ":224,"Ġ":225,"ġ":226,"Ģ":227,"ģ":228,"Ĥ":229,"ĥ":230,"Ħ":231,"ħ":232,"Ĩ":233,"ĩ":234,"Ī":235,"ī":236,"Ĭ":237,"ĭ":238,"Į":239,"į":240,"İ":241,"ı":242,"IJ":243,"ij":244,"Ĵ":245,"ĵ":246,"Ķ":247,"ķ":248,"ĸ":249,"Ĺ":250,"ĺ":251,"Ļ":252,"ļ":253,"Ľ":254,"ľ":255,"Ŀ":256,"ŀ":257,"Ł":258,"ł":259,"Ń":260,"cc":261,"CC":262,"(=":263,"ccc":264,"](":265,"@@":266,"Cc":267,"NC":268,"ccccc":269,"nc":270,"CCC":271,")[":272,"NH":273,"+]":274,"CO":275,"cccc":276,"Nc":277,"Cl":278,"OC":279,"CCN":280,")(":281,"COc":282,"(-":283,"([":284,"CCCC":285,"CN":286,"-]":287,")(=":288,"CCO":289,"nH":290,"nn":291,"-])":292,"+](":293,"CCc":294,")=":295,"sc":296,"CS":297,"ncc":298,"Br":299,"CNC":300,"nnc":301,"NCc":302,"oc":303,"12":304,"+](=":305,"CCCCC":306,"COC":307,"Cn":308,"21":309,"CCCN":310,"cn":311,"Oc":312,"CCOC":313,"CCOCC":314,"+][":315,"cnc":316,"CCS":317,"]([":318,"CCOc":319,"cccs":320,"NCC":321,"cccnc":322,"OCC":323,"CCCO":324,"(/":325,"@]":326,"ccco":327,"CSc":328,"@@]":329,"cnn":330,"CCn":331,"CCNC":332,"32":333,"ccccn":334,"23":335,"no":336,"+])":337,")/":338,"noc":339,"csc":340,"cs":341,"ccncc":342,"cccn":343,"CCCc":344,"Sc":345,"ccnc":346,"SCC":347,"OCc":348,"SC":349,"ccn":350,"ccsc":351,"NNC":352,"@](":353,"OCO":354,"NS":355,"ncnc":356,"NCCc":357,"CNc":358,"@@](":359,"=[":360,"OCCO":361,"ncccc":362,"NN":363,"cncc":364,"CCCCCC":365,"NCCC":366,"on":367,"+]([":368,"CCCCN":369,"ncn":370,"CCCNC":371,"nccs":372,"+]=":373,"CSC":374,"-])[":375,"SCc":376,"CCCn":377,"sccc":378,"cncn":379,"CCSc":380,"34":381,"COCC":382,"nnnn":383,"nccc":384,"(\\":385,"ncccn":386,"COCc":387,"nccn":388,")([":389,"CCSC":390,"ccnn":391,"ccoc":392,"CNS":393,"CCCOc":394,"COCCN":395,"43":396,"@@](=":397,"Fc":398,"CCSCC":399,"-])=":400,"@](=":401,"CSCC":402,"CCCS":403,"cnccn":404,"nnn":405,"/[":406,"coc":407,"nncn":408,"cnnc":409,"NCCN":410,"NNc":411,"nnnc":412,"CCCCO":413,"ncnn":414,"+])[":415,"CCl":416,"Clc":417,"OCCCO":418,"CCNc":419,"CSCc":420,"cnnn":421,"occc":422,"CCCCNC":423,"NCCNC":424,"OCCC":425,"CCNS":426,"onc":427,"CCCOC":428,")=[":429,"nccnc":430,"COCCn":431,"OCCN":432,"cnccc":433,"FC":434,"(\\[":435,"CCCCCCC":436,"NO":437,"COCCNC":438,"ns":439,"cscc":440,"13":441,"cscn":442,"nsc":443,"NCCn":444,"NCCOc":445,"CCCCn":446,"CCCCc":447,"Nn":448,"NCCCc":449,"nonc":450,"ccon":451,"NCCCn":452,"+])(":453,"scnc":454,"NCCS":455,"NCCCN":456,"ncsc":457,"CNCc":458,"CCCNc":459,"NCCCC":460,"Brc":461,"scc":462,"sccn":463,"SCCC":464,"COCCO":465,"COCCOc":466,"(=[":467,"nncs":468,"ocnc":469,"CCOCc":470,"31":471,"nsnc":472,"ncoc":473,"OCCc":474,"OS":475,"SCCc":476,"CCCCCN":477,"OCCNC":478,"CCCSc":479,"CSCN":480,"COCCCNC":481,"COCCC":482,"cncnc":483,"CCCl":484,"CCOCCN":485,"-])/":486,"co":487,"CSCCS":488,"nnsc":489,"\\[":490,"CCCCOc":491,"CSCCO":492,"NCCO":493,"CBr":494,"CCCCS":495,")-":496,"COCCCN":497,"NCCNc":498,"+]\\":499,"cccnn":500,"](/":501,"OCCn":502,"CON":503,"45":504,"CCCSCC":505,"csnn":506,"OCCOc":507,"@]([":508,"+]/":509,"SCCN":510,")(/":511,"NCCCO":512,"@@]([":513,"OCCCC":514,"CCCSC":515,"ON":516,"ncco":517,"(/[":518,"COCCc":519,"OCOC":520,"snc":521,"ccncn":522,"CSCCC":523,"ccno":524,"ncon":525,"CCSCc":526,"54":527,"+])([":528,"NCCSc":529,"nnco":530,"CCCCCNC":531,"24":532,"COCCOC":533,"CSCCN":534,"CCNCC":535,"nncc":536,"CCCOCC":537,"NCCOC":538,"NNS":539,"CCCCOC":540,"CONC":541,"NOCc":542,"NCCCOC":543,"CCNCc":544,"CNCC":545,"SCCS":546,"snnc":547,"occ":548,")\\":549,"COCCCC":550,"CCCCl":551,"OCCCc":552,"NCCOCC":553,"NCCCCn":554,"COCCCn":555,"SCCOc":556,"ncncc":557,"CNCCc":558,"OCCCNC":559,"CCOCCC":560,"CCCNS":561,"CCOCCNC":562,"cncs":563,"NCCCNC":564,"CCCCSc":565,"CCOCCO":566,"42":567,"OCCCN":568,"CCCCCn":569,"OCCNc":570,"COCO":571,"-])(":572,"OCCOC":573,"OCCSc":574,"OCCS":575,"NCN":576,"OCN":577,"NCCCSc":578,"NCCNS":579,"NOC":580,"CCOCCCNC":581,"cnco":582,"COCCNc":583,"CCCF":584,"CCONC":585,"NCCCCC":586,"+]=[":587,"-])=[":588,"cnoc":589,"OCCCn":590,"]/":591,"CNn":592,"CCOCCCC":593,"CCOCCn":594,"COCCS":595,"ClC":596,"CCCCCc":597,"SCCO":598,"CNCCN":599,"NCCCOc":600,"NOCC":601,"NCCCS":602,"OCn":603,"CCCCNc":604,"CNN":605,"SCCNC":606,"ClCc":607,"-]/":608,"CCSCCC":609,"OCCCOc":610,"SCCn":611,"CSCCNC":612,")/[":613,"COCCCOc":614,"CCOCCS":615,"ccnnc":616,"CCOCCOc":617,"CSCCCNC":618,"NCCCCN":619,"sn":620,"COCCOCC":621,"-][":622,"CCCCCO":623,"SCCCS":624,"ccs":625,"-]=[":626,"CCCCCS":627,"cnns":628,"COCCNS":629,"COCCCNc":630,"CSCCCCNC":631,"CCBr":632,"CSCCc":633,"NCCCNc":634,"NCCCOCC":635,"SCCCC":636,"oncc":637,"CSCCCN":638,"CSCCOC":639,"CCCSCc":640,"COCCSc":641,"COn":642,"(-[":643,"NCCSCc":644,"COCCCOC":645,"COCCOCCNC":646,"CCCCOCC":647,"CCCCCCNC":648,"CSCCn":649,"CCCCNS":650,"NCCSCC":651,"][":652,"CCCCCOc":653,"Ic":654,"NCCSC":655,"OCCCCC":656,"SCN":657,"COP":658,"CCOP":659,"CSCCOc":660,"cnsn":661,"OCCCl":662,"OCCCSc":663,"nscc":664,"COCOc":665,"BrCc":666,"NCCCCl":667,"OP":668,"SCn":669,"SCCCc":670,"](/[":671,"COS":672,"CCCCCCN":673,"CCOCCCN":674,"+](-":675,"+](/":676,"CCCOCc":677,"CI":678,"NOc":679,"NCCCCCC":680,"OCCNS":681,"SN":682,"BrC":683,"cnsc":684,"OCCF":685,"35":686,"OCCSC":687,"SCCOC":688,"SCCCO":689,"SCCCOc":690,"COCCCCC":691,"ClCC":692,"-])([":693,"CCCCCOC":694,"CCOCCNc":695,"OCCCNc":696,"NCCCCOc":697,"NCCOCc":698,"OCCOCC":699,"OCCCS":700,"OCCSCc":701,"SCCCN":702,"ClCCc":703,"CCCCCCO":704,"CCCCCCCCCCC":705,"CNCCC":706,"CCCNCC":707,"CCOCCSc":708,"NNN":709,"NCCCSC":710,"COCCNCc":711,"41":712,"56":713,"NSC":714,"NCCCCc":715,"ONC":716,"conc":717,"CCCBr":718,"+]/[":719,"CCCCCCn":720,"CCOCCOC":721,"CCOCCOCC":722,"CCOCCCn":723,"CCOCCCNc":724,"BrCC":725,"CCCCCl":726,"CCCCCSc":727,"CCSS":728,"CCSCCOC":729,"OCCCCN":730,"NCCCNS":731,"NCCCOCc":732,"OH":733,"SCCSc":734,"NCn":735,"CCCCCCS":736,"CCCCOCc":737,"CCCCSC":738,"CSCCSC":739,"CCSCCN":740,"OCCBr":741,"OCCOCCOCCO":742,"NCCCCSC":743,"COCCOCCN":744,"NCCOCCO":745,"(#":746,"14":747,"IC":748,"On":749,"OCCSCC":750,"OCCCCn":751,"OCCOCCS":752,"SS":753,"SSC":754,"SCCCn":755,"NCNS":756,"COCCCS":757,"COCCCCCNC":758,"CCCCSCC":759,"CCCCCCSc":760,"CCON":761,"nnccc":762,"-])\\":763,"+](\\[":764,"CSCCCC":765,"CSCCCNc":766,"123":767,"CCOCCCc":768,"CCOCCOCCOCC":769,"-[":770,"132":771,"53":772,"CH":773,"NP":774,"OO":775,"OCOc":776,"PH":777,"SCCNS":778,"sncc":779,"CCCNCc":780,"+])=":781,"COCCCc":782,"CONS":783,"COCCCCNC":784,"COCCCCS":785,"CCCCCCCC":786,"CCCCCCc":787,"CCOCCOCc":788,"CSCCCn":789,"BrCCC":790,"BrCCc":791,"CCCCCBr":792,"CCSCCOc":793,"CCSCCCO":794,"CCSCCn":795,"OCCl":796,"CCCOCCO":797,"+])/":798,"SCCNc":799,"OCCOCCOc":800,"CCCCCCNc":801,"NCCCF":802,"NCCCl":803,"CCCCOCCN":804,"NCCCCCc":805,"OCCCCSc":806,"COCCSCCC":807,"#[":808,"312":809,"CF":810,"FO":811,"FCCC":812,"NNCc":813,"NSc":814,"ONc":815,"OCCCCNC":816,"OCCCNS":817,"OCCCCCS":818,"SH":819,"SSc":820,"SCCCCO":821,"SCCCCCO":822,"SCCCCCS":823,"SSN":824,"ss":825,"ssc":826,"ssnc":827,"NCSc":828,"ncno":829,"CCCOCCNC":830,"+]#":831,"+]\\[":832,"COCCCCN":833,"COCCSCC":834,"COCCCCOc":835,"COCCSCc":836,"ClCCCSc":837,"CCCCCCOc":838,"CCCCSCc":839,"CCCCOCCNC":840,"CNNC":841,"CNCCO":842,"-]/[":843,"CCOS":844,"CCOCO":845,"CCOCCc":846,"CCOCOCC":847,"+](/[":848,"CSCCCc":849,"CSCCNS":850,"CCCCCNc":851,"CCCCCOCC":852,"CCCCCNS":853,"CCSCCc":854,"CCSCCSc":855,"NCCBr":856,"CCCOS":857,"CCCOCCC":858,"nocc":859,"@@](/":860,"OCCOCc":861,"OCCOCCN":862,"OCCOCCO":863,"CCCCCCl":864,"NCCCSCC":865,"CCCCOCCCNC":866,"OCCCOC":867,"OCCCBr":868,"OCCCSC":869,"NOCCc":870,"SCCCBr":871,"SCCCSCC":872,"COCCOCc":873,"NCCOCCc":874,"COCCCNS":875,"COCCOCCSc":876,"SCCCCCCSc":877,"OCCOCCOCCOCCO":878}