dnagpt_smaller_dict / tokenizer.json
vojtam's picture
Upload tokenizer.json
d814c3a verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<|endoftext|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": false
},
"post_processor": null,
"decoder": null,
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"<|endoftext|>": 0,
"A": 1,
"C": 2,
"G": 3,
"K": 4,
"M": 5,
"R": 6,
"T": 7,
"W": 8,
"Y": 9,
"Ċ": 10,
"TT": 11,
"AA": 12,
"TG": 13,
"AG": 14,
"CC": 15,
"TC": 16,
"AC": 17,
"GG": 18,
"ATT": 19,
"AT": 20,
"ATG": 21,
"GC": 22,
"TAA": 23,
"TCC": 24,
"ACC": 25,
"AAAA": 26,
"AGG": 27,
"AGC": 28,
"TTC": 29,
"ATC": 30,
"AAG": 31,
"TTTT": 32,
"TGC": 33,
"TGG": 34,
"AAC": 35,
"TTG": 36,
"TAG": 37,
"TAC": 38,
"CCC": 39,
"TATT": 40,
"TGGG": 41,
"AGAA": 42,
"TAT": 43,
"AGGG": 44,
"TTTC": 45,
"AGGC": 46,
"AGCC": 47,
"TGTG": 48,
"ATAA": 49,
"ATTC": 50,
"TTGG": 51,
"ACAC": 52,
"AAGG": 53,
"TCCC": 54,
"TCTC": 55,
"TATG": 56,
"TTTG": 57,
"TTCC": 58,
"AGAG": 59,
"ATGG": 60,
"AAAC": 61,
"AGTG": 62,
"ACCC": 63,
"TCTG": 64,
"AGAC": 65,
"ATTG": 66,
"ATCC": 67,
"ATGC": 68,
"ATAC": 69,
"TCAC": 70,
"AATG": 71,
"TGCC": 72,
"AAAG": 73,
"AATT": 74,
"TAAG": 75,
"TATC": 76,
"AAGC": 77,
"TAAC": 78,
"TGGC": 79,
"TTGC": 80,
"AACC": 81,
"GGG": 82,
"GGC": 83,
"TAGC": 84,
"ATAT": 85,
"TACC": 86,
"TTAC": 87,
"TAGG": 88,
"GCC": 89,
"ATATT": 90,
"AATC": 91,
"AGTC": 92,
"AAAAC": 93,
"AGTT": 94,
"TGAC": 95,
"TCTT": 96,
"AAAAG": 97,
"AAATT": 98,
"TGAA": 99,
"TTTTG": 100,
"TTTTC": 101,
"ATAG": 102,
"TGTC": 103,
"AATAA": 104,
"ATTTT": 105,
"TGAG": 106,
"AAATG": 107,
"ACAG": 108,
"ACTT": 109,
"TGTT": 110,
"AGGCC": 111,
"TGGGC": 112,
"ATTTC": 113,
"ACTC": 114,
"AAAT": 115,
"ACG": 116,
"ACTG": 117,
"TTTAA": 118,
"TGGCC": 119,
"TCAG": 120,
"ATTCC": 121,
"TATTC": 122,
"ATATG": 123,
"TCCCC": 124,
"TGTAA": 125,
"ATCTC": 126,
"ATGCC": 127,
"ACAA": 128,
"TTAA": 129,
"AAAAAAAA": 130,
"TTTCC": 131,
"AGAAG": 132,
"TGGGG": 133,
"ACCCC": 134,
"AGGGC": 135,
"TTTTTTTT": 136,
"AGATG": 137,
"TAAAA": 138,
"TTCTC": 139,
"ATTAC": 140,
"ATTGC": 141,
"GGCC": 142,
"ACCTC": 143,
"AAGCC": 144,
"GTG": 145,
"AGAAC": 146,
"ATAAAA": 147,
"TTATT": 148,
"AAATC": 149,
"ATGGC": 150,
"ACTGC": 151,
"AGAGC": 152,
"AATTC": 153,
"TATTG": 154,
"AAGGC": 155,
"AAAGC": 156,
"AAAGG": 157,
"AGAGG": 158,
"AGTTC": 159,
"TTCCC": 160,
"TTGCC": 161,
"TTGGC": 162,
"AGAAAA": 163,
"TTTAG": 164,
"GGGC": 165,
"AGTGC": 166,
"ACTCC": 167,
"ATATC": 168,
"ACATT": 169,
"AGGGG": 170,
"ATCCC": 171,
"ATAAC": 172,
"TCAGG": 173,
"TTTGC": 174,
"AGTAA": 175,
"TGAGG": 176,
"TCG": 177,
"AGATT": 178,
"ATGGG": 179,
"AGCCC": 180,
"TTGGG": 181,
"TATCC": 182,
"ACCAC": 183,
"TCTCC": 184,
"ATAAG": 185,
"AGATC": 186,
"TCTTC": 187,
"TGTAG": 188,
"AGACC": 189,
"TGTGG": 190,
"ATAGC": 191,
"AAACC": 192,
"TGTGC": 193,
"TGAAG": 194,
"AGTGG": 195,
"TTTAC": 196,
"GGGG": 197,
"AATGG": 198,
"TATGC": 199,
"AATAG": 200,
"TGCCC": 201,
"ATCAC": 202,
"AATAC": 203,
"AATGC": 204,
"TTTGG": 205,
"AAGGG": 206,
"TCATT": 207,
"TCTGC": 208,
"ACATG": 209,
"ACTTC": 210,
"ACAGC": 211,
"TGATG": 212,
"TGTTC": 213,
"TGATT": 214,
"AGTAG": 215,
"AATTTT": 216,
"AACCC": 217,
"AATCC": 218,
"TGAGC": 219,
"TCAGC": 220,
"TATGG": 221,
"AATTG": 222,
"TCCCAGC": 223,
"TCTTG": 224,
"ATACC": 225,
"TGTTG": 226,
"TTTATT": 227,
"CCCC": 228,
"TGAAC": 229,
"TACCC": 230,
"ATAGG": 231,
"TGTCC": 232,
"TTAAAA": 233,
"AGGCTGG": 234,
"AGTTG": 235,
"TCTTTT": 236,
"AGTCC": 237,
"AAGAC": 238,
"AAATAA": 239,
"AĊ": 240,
"ATTTTC": 241,
"TCTAA": 242,
"TCATC": 243,
"AGAAGC": 244,
"TTATG": 245,
"TTAAC": 246,
"AGTGGC": 247,
"TAGGC": 248,
"TCACC": 249,
"TGTTTT": 250,
"TAGCC": 251,
"TTTAT": 252,
"ATTGG": 253,
"TTAAG": 254,
"ACTAC": 255,
"TCAAG": 256,
"ATTTAA": 257,
"TCTGG": 258,
"ACAGG": 259,
"ACTAA": 260,
"TCAGCC": 261,
"TGAGGC": 262,
"TCATG": 263,
"AAAGAA": 264,
"AATAT": 265,
"TTATC": 266,
"ACATC": 267,
"ACAAC": 268,
"TGACC": 269,
"TGAAAA": 270,
"TTTTCC": 271,
"ACACC": 272,
"AATATT": 273,
"TCAAC": 274,
"ATATAA": 275,
"TGATC": 276,
"TAAGG": 277,
"TAAGC": 278,
"TTGGCC": 279,
"AGTAC": 280,
"ACTTG": 281,
"AAACAC": 282,
"TGTAC": 283,
"ACAAG": 284,
"AGTTTT": 285,
"TTAGC": 286,
"TAGGG": 287,
"ATTTTG": 288,
"ACAAAA": 289,
"TGTATT": 290,
"TAATTTT": 291,
"TAACC": 292,
"AGAGAA": 293,
"ACTGG": 294,
"TCTGCC": 295,
"ATTATT": 296,
"TCTAC": 297,
"TĊ": 298,
"TTTGTG": 299,
"TTAGG": 300,
"TCCTG": 301,
"TAATAA": 302,
"ACCATG": 303,
"AGCG": 304,
"TCTAG": 305,
"TCAA": 306,
"TTAG": 307,
"GCCC": 308,
"TTACC": 309,
"ACGG": 310,
"ATATTC": 311,
"AGAAGG": 312,
"AAAAGC": 313,
"TTCAC": 314,
"AAAACC": 315,
"AGGAGG": 316,
"AAAAGG": 317,
"ACACACAC": 318,
"TCAGAA": 319,
"TTCG": 320,
"AACG": 321,
"ATTTCC": 322,
"TGTCTC": 323,
"TCACTGC": 324,
"TGTAT": 325,
"ACTAG": 326,
"TGGGATTAC": 327,
"TTCCCC": 328,
"TGCG": 329,
"ATTTTTT": 330,
"ACGC": 331,
"TGTGTGTG": 332,
"AGTTTC": 333,
"AGGAAG": 334,
"TCTTTC": 335,
"AAATTC": 336,
"AATTTC": 337,
"AAAGTGC": 338,
"ACCG": 339,
"TCGC": 340,
"TCCG": 341,
"ACTTTT": 342,
"ATTTG": 343,
"AAGTGG": 344,
"ATATAC": 345,
"TCCTGCC": 346,
"TAAAAC": 347,
"AAAAATT": 348,
"AGCCCC": 349,
"AGTTTG": 350,
"AGCAGC": 351,
"TTGCCC": 352,
"AAATAC": 353,
"TGGGGC": 354,
"TTCTTC": 355,
"ATAGAA": 356,
"AAAATAA": 357,
"AGATGG": 358,
"TGTGGC": 359,
"AAGTG": 360,
"AGGAGAA": 361,
"TTTGCC": 362,
"AAATGG": 363,
"AAATGC": 364,
"TGAGAC": 365,
"ATCG": 366,
"AGGGGC": 367,
"TGTAATCCCAGC": 368,
"AGTCTC": 369,
"TGGGGG": 370,
"TGGGCC": 371,
"ACAGAA": 372,
"ATATTG": 373,
"TCCTCC": 374,
"TTTTGC": 375,
"TAAATG": 376,
"TTCTG": 377,
"AAAGAC": 378,
"AGGGCC": 379,
"ATTTAC": 380,
"ATACAC": 381,
"AACAC": 382,
"AAGAAG": 383,
"ATTTGC": 384,
"ACTTTGGG": 385,
"TGTGCC": 386,
"AGAGGC": 387,
"AGATAA": 388,
"TCCTC": 389,
"AGGTGG": 390,
"TTTCCC": 391,
"GTGG": 392,
"TTTTGG": 393,
"TATTCC": 394,
"AGCAC": 395,
"TTCTCC": 396,
"TATATT": 397,
"TTTATG": 398,
"AATGGC": 399,
"AACTC": 400,
"ATGTG": 401,
"AGAGGG": 402,
"TAAAC": 403,
"AAATTG": 404,
"AGACAC": 405,
"TTCTGC": 406,
"AGGTG": 407,
"TATAC": 408,
"AGTATT": 409,
"AAAGAG": 410,
"AGGCTGAGGC": 411,
"AGGAC": 412,
"TTTGGC": 413,
"ATTCCC": 414,
"TAAAAG": 415,
"TCAAAA": 416,
"TATATG": 417,
"TGAGAA": 418,
"ATATGC": 419,
"TGTTTC": 420,
"ATTTATT": 421,
"AGATTC": 422,
"TGCTGC": 423,
"TTGAAC": 424,
"AGTGCC": 425,
"ATATCC": 426,
"AAGAAAA": 427,
"ATATATAT": 428,
"TTTGGG": 429,
"AACAAC": 430,
"AATATG": 431,
"AGCTC": 432,
"TTTAAG": 433,
"ATCTTC": 434,
"AATATC": 435,
"AAATCC": 436,
"TACTC": 437,
"TCTGGC": 438,
"ATGGCC": 439,
"AATCTC": 440,
"ACTCCAGCC": 441,
"TCCAC": 442,
"AGTAGC": 443,
"TTATAA": 444,
"AAAGCC": 445,
"TTTATC": 446,
"ATCACC": 447,
"TGCAC": 448,
"AAAGGG": 449,
"TACG": 450,
"TCTTCC": 451,
"AATTTG": 452,
"AAATATT": 453,
"AGTAT": 454,
"ATGATG": 455,
"TCTCCC": 456,
"AGGAGC": 457,
"AAAGGC": 458,
"AAAGTG": 459,
"TAAATT": 460,
"AAGGCC": 461,
"TTATTC": 462,
"TTGCAGTG": 463,
"ATATGG": 464,
"TTGTTG": 465,
"AACTCC": 466,
"AGCCG": 467,
"GCG": 468,
"AATTCC": 469,
"ATCTCC": 470,
"ATTGCC": 471,
"TGGAAG": 472,
"ATGAC": 473,
"ACCTCC": 474,
"TTGGGG": 475,
"TCGG": 476,
"AATAAC": 477,
"TCCCAAAGTGC": 478,
"ATCCCC": 479,
"AAGAAC": 480,
"AATGCC": 481,
"AAGACC": 482,
"CCCAGC": 483,
"ATTTAG": 484,
"AATTGC": 485,
"AGAGCC": 486,
"TCACCC": 487,
"TGGGATTACAGGC": 488,
"TTTTTAA": 489,
"TGCTG": 490,
"AGCAGG": 491,
"ATGTAA": 492,
"ATAGAG": 493,
"TCATTC": 494,
"ATGTGG": 495,
"TTGGGC": 496,
"AAAAAC": 497,
"TCCCG": 498,
"TTTAAC": 499,
"TGAAAC": 500,
"AAGGGG": 501,
"TCTGTG": 502,
"AGGCTGGAGTGC": 503,
"ATTTGG": 504,
"AGCTGC": 505,
"AAACCC": 506,
"TAAAAATAC": 507,
"AGCTCC": 508,
"AGGAAC": 509,
"ATGAAG": 510,
"AGTTCC": 511,
"TTAAAC": 512,
"AGAACC": 513,
"TCTCG": 514,
"TTCAAC": 515,
"AATGTG": 516,
"AATAAG": 517,
"ATCATC": 518,
"ACCACC": 519,
"ATGGGG": 520,
"AGATGC": 521,
"AGGCCC": 522,
"AAGGGC": 523,
"TCTATT": 524,
"TTCTTG": 525,
"TTCTTTT": 526,
"AGTGGG": 527,
"TTCTGG": 528,
"AACCCC": 529,
"TAGAGAC": 530,
"TAGAC": 531,
"ATAGTG": 532,
"AGCTG": 533,
"AGCTGG": 534,
"TCTTTG": 535,
"TTCTT": 536,
"AGGTGC": 537,
"ATGTGC": 538,
"TGCCCC": 539,
"AGTGTG": 540,
"ATCTG": 541,
"TATAAAA": 542,
"ACAGAG": 543,
"AGCAAG": 544,
"TCTTGC": 545,
"ACTCTG": 546,
"TTCTAA": 547,
"ATAGGC": 548,
"ATGTTG": 549,
"ATACCC": 550,
"ATCTGC": 551,
"ATAGAC": 552,
"TTAGAA": 553,
"ATAAGC": 554,
"ATCTTG": 555,
"TTTTTTG": 556,
"TTCTAG": 557,
"TTAATT": 558,
"TCCTGC": 559,
"ATTTTTC": 560,
"AAGTAA": 561,
"TGGCCC": 562,
"TACAC": 563,
"AGGTC": 564,
"AAGCG": 565,
"AGACCC": 566,
"AACATGG": 567,
"TTCATG": 568,
"ATCATG": 569,
"AGGGGG": 570,
"TAAGCC": 571,
"AAGATG": 572,
"TCCCCC": 573,
"AGATAC": 574,
"AGGTTC": 575,
"ATTAAAA": 576,
"AGTATG": 577,
"ACCGC": 578,
"TGGTGG": 579,
"AGTCTG": 580,
"AATCAC": 581,
"ATAAGG": 582,
"ATTTAT": 583,
"TTGAC": 584,
"GGGCC": 585,
"AAGTC": 586,
"TTAGCC": 587,
"TTATTG": 588,
"AGCTAC": 589,
"AAAAAAAAAAAAAAAA": 590,
"TCCTGCCTCAGCC": 591,
"TCCTTC": 592,
"TATTTATT": 593,
"TGTTTG": 594,
"TGGAGC": 595,
"AAGAGG": 596,
"ATTATG": 597,
"TGGAC": 598,
"TTTTTTTTTTTTTTTT": 599,
"TTGAACC": 600,
"TCCACC": 601,
"AACAAAA": 602,
"AGATTG": 603,
"TTCATC": 604,
"TTGTG": 605,
"AAATATC": 606,
"TAAAAAA": 607,
"TGCTC": 608,
"CCCG": 609,
"ACTTTC": 610,
"ATGGGC": 611,
"AGTCAC": 612,
"TGGAGG": 613,
"AGTAAC": 614,
"AATCTG": 615,
"TACTAAAAATAC": 616,
"ATTAAC": 617,
"TACAAAA": 618,
"TGGTG": 619,
"ATGAAC": 620,
"TTTACC": 621,
"TTCAGC": 622,
"TAGAAC": 623,
"TGCAGC": 624,
"AATAGC": 625,
"AATGGG": 626,
"ATAACC": 627,
"ATGTC": 628,
"AAGAGC": 629,
"TTGTAA": 630,
"ATAGTT": 631,
"ATCTAA": 632,
"TTAAAG": 633,
"AAATAT": 634,
"AGGAAAA": 635,
"AGTAAG": 636,
"AGGCCG": 637,
"ATAAAAC": 638,
"AATCCC": 639,
"TGTATG": 640,
"TGAAGG": 641,
"TCCAGC": 642,
"TCCTGG": 643,
"ACCTGC": 644,
"TATTGC": 645,
"ACTGTG": 646,
"TTAATG": 647,
"TTCATT": 648,
"TGATTC": 649,
"TCCTGACCTC": 650,
"TAGAAG": 651,
"AGTCCC": 652,
"TGTGGG": 653,
"ACCCCC": 654,
"AGGCGG": 655,
"AGGTAA": 656,
"AGGATG": 657,
"TATATC": 658,
"TCACTGCAACC": 659,
"AGAAAAG": 660,
"TGGAAC": 661,
"AGTTTTG": 662,
"TCTGGG": 663,
"GCCCC": 664,
"AATACC": 665,
"AGTTGG": 666,
"GTCTC": 667,
"AAGATC": 668,
"ATAAATAA": 669,
"AGAAAAC": 670,
"TAGGCC": 671,
"ACTGCACTCCAGCC": 672,
"ATAGCC": 673,
"AGATCC": 674,
"ATGCCC": 675,
"TATAAC": 676,
"AGTTAC": 677,
"TGTTCC": 678,
"TTCTTTC": 679,
"AAGCCC": 680,
"TTTAGC": 681,
"ACTTCC": 682,
"AAATAG": 683,
"AGTATC": 684,
"TGAGCC": 685,
"TTCTAC": 686,
"TGGTGC": 687,
"TCAGGG": 688,
"TTGTAG": 689,
"TCCATG": 690,
"TGGGCAAC": 691,
"TATGCC": 692,
"TTCAAG": 693,
"AGCAAC": 694,
"ATAGGG": 695,
"TGAAGC": 696,
"ATTATC": 697,
"AAATAAAA": 698,
"TTGTTTT": 699,
"TAATAC": 700,
"ACATGG": 701,
"ACATTC": 702,
"TTGAGG": 703,
"TCACAC": 704,
"TGAGAG": 705,
"AGCATC": 706,
"AGACCAGCC": 707,
"TCTAT": 708,
"AGTTGC": 709,
"TGATCC": 710,
"GTGC": 711,
"AACATC": 712,
"TCACAGAG": 713,
"ATGAGCC": 714,
"ATGTTC": 715,
"TCAGGC": 716,
"ACAGCC": 717,
"TTATAC": 718,
"TGTCTG": 719,
"TGAATG": 720,
"TCCTTG": 721,
"ACAGGC": 722,
"AAAAAAG": 723,
"AGGCTGAGGCAGGAGAA": 724,
"TATGGC": 725,
"ATAAAAG": 726,
"TCAGAG": 727,
"AGCACC": 728,
"AAAAAAC": 729,
"TGATAA": 730,
"TGTAATCCCAGCACTTTGGG": 731,
"TGAGGG": 732,
"TCATGCC": 733,
"TTCAGG": 734,
"TCCATC": 735,
"TTTCTTTC": 736,
"ATTAAG": 737,
"AAACG": 738,
"TGGCTCAC": 739,
"TATCCC": 740,
"TTATTTT": 741,
"TCCCAAG": 742,
"TGATGG": 743,
"ATTTCTC": 744,
"TAGAAAA": 745,
"TCCAGG": 746,
"TGCTTC": 747,
"TCATTG": 748,
"TACCCC": 749,
"AATAGG": 750,
"TTGAAG": 751,
"TCAAGC": 752,
"AGCTAA": 753,
"TTTAGG": 754,
"AATTGG": 755,
"ACAAAC": 756,
"ACCAAC": 757,
"AGCTTC": 758,
"AATCTGC": 759,
"ACAGTG": 760,
"TACTAA": 761,
"TCCTGAG": 762,
"AGCATT": 763,
"TCAAAC": 764,
"ATCAAC": 765,
"ATGAAAA": 766,
"ACAGGG": 767,
"ACCTGG": 768,
"ACCTTG": 769,
"TTAAGG": 770,
"TGGGACTAC": 771,
"TTATCC": 772,
"TCTCTCTC": 773,
"AAGTGC": 774,
"ACTTTG": 775,
"ACACG": 776,
"ATGAGG": 777,
"ATGTAC": 778,
"AAACGGG": 779,
"ATTTTTG": 780,
"TCATCC": 781,
"ATGTCC": 782,
"GTGAGCC": 783,
"TTGAACCC": 784,
"TAATGC": 785,
"TGAATT": 786,
"ATTTTCC": 787,
"AGTAGG": 788,
"TCTTGG": 789,
"TCAGTG": 790,
"TAAATC": 791,
"ACAGAC": 792,
"GGGGC": 793,
"TCAAGG": 794,
"ATGATC": 795,
"TCCGCC": 796,
"ATTGGC": 797,
"ATAGTC": 798,
"TGAAAG": 799,
"AGGTAG": 800,
"TGCTGG": 801,
"AGGTTG": 802,
"ACATAA": 803,
"ACTAT": 804,
"TATTGG": 805,
"TTACAC": 806,
"AGGTGTG": 807,
"TGTTGG": 808,
"AACTTC": 809,
"TGAGTG": 810,
"TTAAATT": 811,
"TTAGAG": 812,
"TTGTATT": 813,
"TGCTCC": 814,
"TTTTTTTTTTTT": 815,
"ACATGC": 816,
"AAAAAAAAAAAA": 817,
"ACCATC": 818,
"TGACAC": 819,
"TAATAG": 820,
"TCATAA": 821,
"TTAAGC": 822,
"TGTATC": 823,
"AGAGAGAG": 824,
"AAGTTG": 825,
"ACCAGC": 826,
"TGCAGG": 827,
"ACAATG": 828,
"AAAGAAAA": 829,
"TCCTGGG": 830,
"ATGTAG": 831,
"AGGCG": 832,
"TAATTC": 833,
"TATGGG": 834,
"ATTTTAC": 835,
"ATCTAC": 836,
"AGAAAGAA": 837,
"TTTCACCATG": 838,
"TTTTAAAA": 839,
"AAGTTC": 840,
"ACTGGG": 841,
"TGTAAG": 842,
"ACTATT": 843,
"TGAAACCCC": 844,
"AGGCTGC": 845,
"TGATGC": 846,
"TTAACC": 847,
"TCATGG": 848,
"AGTACC": 849,
"AGATATT": 850,
"TGTCCC": 851,
"ACTGCC": 852,
"TAATTG": 853,
"AGGATGG": 854,
"AAGTAG": 855,
"TTATGC": 856,
"AGCATG": 857,
"AGGATC": 858,
"ACCTTC": 859,
"TGTCAC": 860,
"ATTCTCAGAA": 861,
"TCCCAGCTAC": 862,
"TTTGTGATG": 863,
"ACATCC": 864,
"TCCTAA": 865,
"TACATG": 866,
"TCGGCC": 867,
"TCAAAG": 868,
"TTAATC": 869,
"ATTAGG": 870,
"TTACCC": 871,
"AGACG": 872,
"TGGTAA": 873,
"AAATAAC": 874,
"GCGG": 875,
"TGTTGC": 876,
"ATTCG": 877,
"ACTCTC": 878,
"TGGCG": 879,
"TAATTTTTGTATT": 880,
"ACATAC": 881,
"TCCAAC": 882,
"ATTGGG": 883,
"TGGGTGAC": 884,
"ATTCTCCTGCCTCAGCC": 885,
"TGTCTT": 886,
"TACAAC": 887,
"ACCTAA": 888,
"AATTTTG": 889,
"ATCTGG": 890,
"TCAATG": 891,
"TCATGC": 892,
"TATTTTG": 893,
"TCTATC": 894,
"ATTTCG": 895,
"ACAAGG": 896,
"ATĊ": 897,
"TTATATT": 898,
"TGGGCG": 899,
"ATGAGC": 900,
"TCTTAC": 901,
"AGGTAC": 902,
"ACCTAC": 903,
"TCTATG": 904,
"ACTCCC": 905,
"ATCATT": 906,
"ATAATAA": 907,
"ACTATG": 908,
"AGATGGGG": 909,
"AGGAGGC": 910,
"AAGTCC": 911,
"TAGTAA": 912,
"ATTCAAC": 913,
"TTTAGTAGAGAC": 914,
"TTTCTTTT": 915,
"AAGTAC": 916,
"ACCTGCC": 917,
"TGTAAC": 918,
"ATATATG": 919,
"TGCATG": 920,
"TTAGTG": 921,
"ATATAAAA": 922,
"AATTTAA": 923,
"TGGAAAA": 924,
"TATTTTTT": 925,
"TTATGG": 926,
"AAATTCC": 927,
"TTAAATG": 928,
"TTAGGG": 929,
"TAAGGC": 930,
"AACATG": 931,
"ACATTG": 932,
"TGGGAGG": 933,
"TTGTTC": 934,
"TTGTGC": 935,
"TCTGTCTC": 936,
"ACACCC": 937,
"AGGCCCC": 938,
"AACAGC": 939,
"TGGATC": 940,
"TGGATG": 941,
"TGTTAC": 942,
"AAACAAAC": 943,
"ATAAATG": 944,
"TTAATAA": 945,
"ACAAAG": 946,
"AACTGC": 947,
"AACATT": 948,
"TTTGTTTT": 949,
"TGGTTC": 950,
"AGTTCG": 951,
"ACCAAG": 952,
"TATTTTC": 953,
"ACCAGG": 954,
"TCACG": 955,
"TCAACC": 956,
"TCAGAC": 957,
"TTGAGC": 958,
"ACTTGG": 959,
"ATATATT": 960,
"TATAAG": 961,
"ATGGTGGC": 962,
"TTTATTC": 963,
"AAAATAC": 964,
"AGCTGAG": 965,
"TCAGTC": 966,
"TTTGTAA": 967,
"TTAGAC": 968,
"ACTGGC": 969,
"AACTAC": 970,
"TGACCC": 971,
"ATCAGC": 972,
"AGATAG": 973,
"AAACTCC": 974,
"TGTTTTC": 975,
"ACCACG": 976,
"ATACG": 977,
"TCCAGCC": 978,
"TTGGAAACGGG": 979,
"TGGTAG": 980,
"AGAAATG": 981,
"TCTACC": 982,
"ACGCC": 983,
"AACTAA": 984,
"TATAGC": 985,
"AAAAATG": 986,
"AGATTTC": 987,
"TGAACC": 988,
"ACTTGC": 989,
"TATATAA": 990,
"TTCTTCC": 991,
"AAAGAAG": 992,
"ATTTTTAA": 993,
"TTTCG": 994,
"ATTTCTG": 995,
"ACTCAC": 996,
"TTAGTT": 997,
"ATATTTGG": 998,
"AGGAAGG": 999
},
"merges": [
"T T",
"A A",
"T G",
"A G",
"C C",
"T C",
"A C",
"G G",
"A TT",
"A T",
"A TG",
"G C",
"T AA",
"T CC",
"A CC",
"AA AA",
"AG G",
"AG C",
"TT C",
"A TC",
"AA G",
"TT TT",
"TG C",
"TG G",
"AA C",
"TT G",
"T AG",
"T AC",
"CC C",
"T ATT",
"TG GG",
"AG AA",
"T AT",
"AG GG",
"TT TC",
"AG GC",
"AG CC",
"TG TG",
"AT AA",
"ATT C",
"TT GG",
"AC AC",
"AA GG",
"TCC C",
"TC TC",
"T ATG",
"TT TG",
"TT CC",
"AG AG",
"ATG G",
"AA AC",
"AG TG",
"ACC C",
"TC TG",
"AG AC",
"ATT G",
"AT CC",
"ATG C",
"AT AC",
"TC AC",
"AA TG",
"TG CC",
"AA AG",
"AA TT",
"TAA G",
"T ATC",
"AA GC",
"TAA C",
"TG GC",
"TT GC",
"AA CC",
"GG G",
"GG C",
"T AGC",
"AT AT",
"T ACC",
"TT AC",
"T AGG",
"G CC",
"AT ATT",
"AA TC",
"AG TC",
"AAAA C",
"AG TT",
"TG AC",
"TC TT",
"AAAA G",
"AA ATT",
"TG AA",
"TTTT G",
"TT TTC",
"AT AG",
"TG TC",
"AA TAA",
"ATT TT",
"TG AG",
"AA ATG",
"AC AG",
"AC TT",
"TG TT",
"AGG CC",
"TGGG C",
"ATT TC",
"AC TC",
"AA AT",
"AC G",
"AC TG",
"TT TAA",
"TGG CC",
"TC AG",
"ATT CC",
"TATT C",
"AT ATG",
"TCC CC",
"TG TAA",
"ATC TC",
"ATG CC",
"AC AA",
"TT AA",
"AAAA AAAA",
"TT TCC",
"AG AAG",
"TGGG G",
"ACC CC",
"AGGG C",
"TTTT TTTT",
"AG ATG",
"TAA AA",
"TTC TC",
"ATT AC",
"ATT GC",
"GG CC",
"ACC TC",
"AAG CC",
"G TG",
"AG AAC",
"AT AAAA",
"TT ATT",
"AA ATC",
"ATG GC",
"AC TGC",
"AG AGC",
"AA TTC",
"TATT G",
"AAGG C",
"AA AGC",
"AA AGG",
"AG AGG",
"AG TTC",
"TT CCC",
"TTG CC",
"TTGG C",
"AG AAAA",
"TT TAG",
"GG GC",
"AG TGC",
"AC TCC",
"AT ATC",
"AC ATT",
"AGGG G",
"AT CCC",
"AT AAC",
"TC AGG",
"TT TGC",
"AG TAA",
"TG AGG",
"TC G",
"AG ATT",
"ATG GG",
"AG CCC",
"TTGG G",
"TAT CC",
"ACC AC",
"TC TCC",
"AT AAG",
"AG ATC",
"TC TTC",
"TG TAG",
"AG ACC",
"TG TGG",
"AT AGC",
"AA ACC",
"TG TGC",
"TG AAG",
"AG TGG",
"TT TAC",
"GG GG",
"AA TGG",
"TATG C",
"AA TAG",
"TG CCC",
"ATC AC",
"AA TAC",
"AA TGC",
"TT TGG",
"AAGG G",
"TC ATT",
"TC TGC",
"AC ATG",
"AC TTC",
"AC AGC",
"TG ATG",
"TG TTC",
"TG ATT",
"AG TAG",
"AA TTTT",
"AA CCC",
"AA TCC",
"TG AGC",
"TC AGC",
"TATG G",
"AA TTG",
"TCCC AGC",
"TC TTG",
"AT ACC",
"TG TTG",
"TT TATT",
"CC CC",
"TG AAC",
"T ACCC",
"AT AGG",
"TG TCC",
"TT AAAA",
"AGGC TGG",
"AG TTG",
"TC TTTT",
"AG TCC",
"AAG AC",
"AA ATAA",
"A Ċ",
"ATT TTC",
"TC TAA",
"TC ATC",
"AGAA GC",
"TT ATG",
"TT AAC",
"AGTG GC",
"TAG GC",
"TC ACC",
"TG TTTT",
"TAG CC",
"TT TAT",
"ATT GG",
"TT AAG",
"AC TAC",
"TC AAG",
"ATT TAA",
"TC TGG",
"AC AGG",
"AC TAA",
"TC AGCC",
"TG AGGC",
"TC ATG",
"AA AGAA",
"AA TAT",
"TT ATC",
"AC ATC",
"AC AAC",
"TG ACC",
"TG AAAA",
"TTTT CC",
"AC ACC",
"AA TATT",
"TC AAC",
"AT ATAA",
"TG ATC",
"TAA GG",
"TAA GC",
"TTGG CC",
"AG TAC",
"AC TTG",
"AA ACAC",
"TG TAC",
"AC AAG",
"AG TTTT",
"TT AGC",
"TAG GG",
"ATT TTG",
"AC AAAA",
"TG TATT",
"TAA TTTT",
"TAA CC",
"AG AGAA",
"AC TGG",
"TCTG CC",
"ATT ATT",
"TC TAC",
"T Ċ",
"TT TGTG",
"TT AGG",
"TCC TG",
"TAA TAA",
"ACC ATG",
"AGC G",
"TC TAG",
"TC AA",
"TT AG",
"G CCC",
"TT ACC",
"AC GG",
"AT ATTC",
"AGAA GG",
"AAAA GC",
"TTC AC",
"AAAA CC",
"AGG AGG",
"AAAA GG",
"ACAC ACAC",
"TC AGAA",
"TTC G",
"AAC G",
"ATT TCC",
"TG TCTC",
"TCAC TGC",
"TG TAT",
"AC TAG",
"TGGG ATTAC",
"TTCC CC",
"TGC G",
"ATT TTTT",
"AC GC",
"TGTG TGTG",
"AG TTTC",
"AGG AAG",
"TC TTTC",
"AA ATTC",
"AA TTTC",
"AAAG TGC",
"ACC G",
"TC GC",
"TCC G",
"AC TTTT",
"ATT TG",
"AAG TGG",
"AT ATAC",
"TCC TGCC",
"TAA AAC",
"AAAA ATT",
"AGCC CC",
"AG TTTG",
"AGC AGC",
"TTG CCC",
"AA ATAC",
"TGGG GC",
"TTC TTC",
"AT AGAA",
"AAAA TAA",
"AG ATGG",
"TGTG GC",
"AAG TG",
"AGG AGAA",
"TTTG CC",
"AA ATGG",
"AA ATGC",
"TG AGAC",
"ATC G",
"AGGG GC",
"TGTAA TCCCAGC",
"AG TCTC",
"TGGG GG",
"TGGG CC",
"AC AGAA",
"AT ATTG",
"TCC TCC",
"TTTT GC",
"TAA ATG",
"TTC TG",
"AA AGAC",
"AGGG CC",
"ATT TAC",
"AT ACAC",
"AAC AC",
"AAG AAG",
"ATT TGC",
"ACTT TGGG",
"TGTG CC",
"AG AGGC",
"AG ATAA",
"TCC TC",
"AGG TGG",
"TT TCCC",
"G TGG",
"TTTT GG",
"TATT CC",
"AGC AC",
"TTC TCC",
"TAT ATT",
"TT TATG",
"AATG GC",
"AAC TC",
"ATG TG",
"AG AGGG",
"TAA AC",
"AA ATTG",
"AG ACAC",
"TTC TGC",
"AGG TG",
"TAT AC",
"AG TATT",
"AA AGAG",
"AGGC TGAGGC",
"AGG AC",
"TTTG GC",
"ATT CCC",
"TAA AAG",
"TC AAAA",
"TAT ATG",
"TG AGAA",
"AT ATGC",
"TG TTTC",
"ATT TATT",
"AG ATTC",
"TGC TGC",
"TTG AAC",
"AGTG CC",
"AT ATCC",
"AAG AAAA",
"ATAT ATAT",
"TT TGGG",
"AAC AAC",
"AA TATG",
"AGC TC",
"TT TAAG",
"ATC TTC",
"AA TATC",
"AA ATCC",
"TAC TC",
"TCTG GC",
"ATGG CC",
"AA TCTC",
"ACTCC AGCC",
"TCC AC",
"AG TAGC",
"TT ATAA",
"AA AGCC",
"TT TATC",
"ATC ACC",
"TGC AC",
"AA AGGG",
"TAC G",
"TC TTCC",
"AA TTTG",
"AA ATATT",
"AG TAT",
"ATG ATG",
"TC TCCC",
"AGG AGC",
"AA AGGC",
"AA AGTG",
"TAA ATT",
"AAGG CC",
"TT ATTC",
"TTGC AGTG",
"AT ATGG",
"TTG TTG",
"AAC TCC",
"AGCC G",
"GC G",
"AA TTCC",
"ATC TCC",
"ATTG CC",
"TGG AAG",
"ATG AC",
"ACC TCC",
"TTGG GG",
"TC GG",
"AA TAAC",
"TCCC AAAGTGC",
"ATCC CC",
"AAG AAC",
"AATG CC",
"AAG ACC",
"CCC AGC",
"ATT TAG",
"AATT GC",
"AG AGCC",
"TC ACCC",
"TGGGATTAC AGGC",
"TTTT TAA",
"TGC TG",
"AGC AGG",
"ATG TAA",
"AT AGAG",
"TC ATTC",
"ATG TGG",
"TTGG GC",
"AAAA AC",
"TCCC G",
"TT TAAC",
"TG AAAC",
"AAGG GG",
"TC TGTG",
"AGGCTGG AGTGC",
"ATT TGG",
"AGC TGC",
"AA ACCC",
"TAAAA ATAC",
"AGC TCC",
"AGG AAC",
"ATG AAG",
"AG TTCC",
"TT AAAC",
"AGAA CC",
"TCTC G",
"TTC AAC",
"AA TGTG",
"AA TAAG",
"ATC ATC",
"ACC ACC",
"ATG GGG",
"AG ATGC",
"AGG CCC",
"AAGG GC",
"TC TATT",
"TTC TTG",
"TTC TTTT",
"AG TGGG",
"TTC TGG",
"AACC CC",
"TAG AGAC",
"TAG AC",
"AT AGTG",
"AGC TG",
"AGC TGG",
"TC TTTG",
"TTC TT",
"AGG TGC",
"ATG TGC",
"TGCC CC",
"AG TGTG",
"ATC TG",
"TAT AAAA",
"AC AGAG",
"AGC AAG",
"TC TTGC",
"AC TCTG",
"TTC TAA",
"AT AGGC",
"ATG TTG",
"AT ACCC",
"ATC TGC",
"AT AGAC",
"TT AGAA",
"ATAA GC",
"ATC TTG",
"TTTT TTG",
"TTC TAG",
"TT AATT",
"TCC TGC",
"ATT TTTC",
"AAG TAA",
"TGG CCC",
"TAC AC",
"AGG TC",
"AAGC G",
"AG ACCC",
"AAC ATGG",
"TTC ATG",
"ATC ATG",
"AGGG GG",
"TAAG CC",
"AAG ATG",
"TCC CCC",
"AG ATAC",
"AGG TTC",
"ATT AAAA",
"AG TATG",
"ACC GC",
"TGG TGG",
"AG TCTG",
"AA TCAC",
"ATAA GG",
"ATT TAT",
"TTG AC",
"GGG CC",
"AAG TC",
"TT AGCC",
"TT ATTG",
"AGC TAC",
"AAAAAAAA AAAAAAAA",
"TCCTGCC TCAGCC",
"TCC TTC",
"TATT TATT",
"TG TTTG",
"TGG AGC",
"AAG AGG",
"ATT ATG",
"TGG AC",
"TTTTTTTT TTTTTTTT",
"TTG AACC",
"TCC ACC",
"AAC AAAA",
"AG ATTG",
"TTC ATC",
"TTG TG",
"AAAT ATC",
"TAA AAAA",
"TGC TC",
"CCC G",
"AC TTTC",
"ATG GGC",
"AG TCAC",
"TGG AGG",
"AG TAAC",
"AA TCTG",
"TAC TAAAAATAC",
"ATT AAC",
"TAC AAAA",
"TGG TG",
"ATG AAC",
"TT TACC",
"TTC AGC",
"TAG AAC",
"TGC AGC",
"AA TAGC",
"AA TGGG",
"ATAA CC",
"ATG TC",
"AAG AGC",
"TTG TAA",
"AT AGTT",
"ATC TAA",
"TT AAAG",
"AA ATAT",
"AGG AAAA",
"AG TAAG",
"AGGCC G",
"AT AAAAC",
"AA TCCC",
"TG TATG",
"TG AAGG",
"TCC AGC",
"TCC TGG",
"ACC TGC",
"TATT GC",
"AC TGTG",
"TT AATG",
"TTC ATT",
"TG ATTC",
"TCCTG ACCTC",
"TAG AAG",
"AG TCCC",
"TG TGGG",
"ACC CCC",
"AGGC GG",
"AGG TAA",
"AGG ATG",
"TAT ATC",
"TCACTGC AACC",
"AG AAAAG",
"TGG AAC",
"AG TTTTG",
"TC TGGG",
"GCC CC",
"AA TACC",
"AG TTGG",
"G TCTC",
"AAG ATC",
"ATAA ATAA",
"AG AAAAC",
"TAGG CC",
"ACTGC ACTCCAGCC",
"AT AGCC",
"AG ATCC",
"ATG CCC",
"TAT AAC",
"AG TTAC",
"TG TTCC",
"TTC TTTC",
"AAG CCC",
"TT TAGC",
"AC TTCC",
"AA ATAG",
"AG TATC",
"TG AGCC",
"TTC TAC",
"TGG TGC",
"TC AGGG",
"TTG TAG",
"TCC ATG",
"TGGGC AAC",
"TATG CC",
"TTC AAG",
"AGC AAC",
"AT AGGG",
"TG AAGC",
"ATT ATC",
"AAAT AAAA",
"TTG TTTT",
"TAA TAC",
"AC ATGG",
"AC ATTC",
"TTG AGG",
"TC ACAC",
"TG AGAG",
"AGC ATC",
"AGACC AGCC",
"TC TAT",
"AG TTGC",
"TG ATCC",
"G TGC",
"AAC ATC",
"TCAC AGAG",
"ATG AGCC",
"ATG TTC",
"TC AGGC",
"AC AGCC",
"TT ATAC",
"TG TCTG",
"TG AATG",
"TCC TTG",
"AC AGGC",
"AAAA AAG",
"AGGCTGAGGC AGGAGAA",
"TATG GC",
"AT AAAAG",
"TC AGAG",
"AGC ACC",
"AAAA AAC",
"TG ATAA",
"TGTAATCCCAGC ACTTTGGG",
"TG AGGG",
"TC ATGCC",
"TTC AGG",
"TCC ATC",
"TTTC TTTC",
"ATT AAG",
"AAAC G",
"TGGC TCAC",
"TAT CCC",
"TT ATTTT",
"TCCC AAG",
"TG ATGG",
"ATT TCTC",
"TAG AAAA",
"TCC AGG",
"TGC TTC",
"TC ATTG",
"TACC CC",
"AA TAGG",
"TTG AAG",
"TC AAGC",
"AGC TAA",
"TT TAGG",
"AA TTGG",
"AC AAAC",
"ACC AAC",
"AGC TTC",
"AATC TGC",
"AC AGTG",
"TAC TAA",
"TCC TGAG",
"AGC ATT",
"TC AAAC",
"ATC AAC",
"ATG AAAA",
"AC AGGG",
"ACC TGG",
"ACC TTG",
"TT AAGG",
"TGGG ACTAC",
"TT ATCC",
"TCTC TCTC",
"AAG TGC",
"AC TTTG",
"ACAC G",
"ATG AGG",
"ATG TAC",
"AAAC GGG",
"ATT TTTG",
"TC ATCC",
"ATG TCC",
"GTG AGCC",
"TTG AACCC",
"TAA TGC",
"TG AATT",
"ATT TTCC",
"AG TAGG",
"TC TTGG",
"TC AGTG",
"TAA ATC",
"AC AGAC",
"GG GGC",
"TC AAGG",
"ATG ATC",
"TCC GCC",
"ATT GGC",
"AT AGTC",
"TG AAAG",
"AGG TAG",
"TGC TGG",
"AGG TTG",
"AC ATAA",
"AC TAT",
"TATT GG",
"TT ACAC",
"AGG TGTG",
"TG TTGG",
"AAC TTC",
"TG AGTG",
"TT AAATT",
"TT AGAG",
"TTG TATT",
"TGC TCC",
"TTTTTTTT TTTT",
"AC ATGC",
"AAAAAAAA AAAA",
"ACC ATC",
"TG ACAC",
"TAA TAG",
"TC ATAA",
"TT AAGC",
"TG TATC",
"AGAG AGAG",
"AAG TTG",
"ACC AGC",
"TGC AGG",
"AC AATG",
"AAAG AAAA",
"TCC TGGG",
"ATG TAG",
"AGGC G",
"TAA TTC",
"TATG GG",
"ATT TTAC",
"ATC TAC",
"AGAA AGAA",
"TTTC ACCATG",
"TTTT AAAA",
"AAG TTC",
"AC TGGG",
"TG TAAG",
"AC TATT",
"TGAA ACCCC",
"AGGC TGC",
"TG ATGC",
"TT AACC",
"TC ATGG",
"AG TACC",
"AG ATATT",
"TG TCCC",
"AC TGCC",
"TAA TTG",
"AGG ATGG",
"AAG TAG",
"TT ATGC",
"AGC ATG",
"AGG ATC",
"ACC TTC",
"TG TCAC",
"ATTC TCAGAA",
"TCCCAGC TAC",
"TTTGTG ATG",
"AC ATCC",
"TCC TAA",
"TAC ATG",
"TC GGCC",
"TC AAAG",
"TT AATC",
"ATT AGG",
"TT ACCC",
"AGAC G",
"TGG TAA",
"AAAT AAC",
"GC GG",
"TG TTGC",
"ATTC G",
"AC TCTC",
"TGGC G",
"TAATTTT TGTATT",
"AC ATAC",
"TCC AAC",
"ATT GGG",
"TGGG TGAC",
"ATTC TCCTGCCTCAGCC",
"TG TCTT",
"TAC AAC",
"ACC TAA",
"AA TTTTG",
"ATC TGG",
"TC AATG",
"TC ATGC",
"TATT TTG",
"TC TATC",
"ATTTC G",
"AC AAGG",
"AT Ċ",
"TT ATATT",
"TGGGC G",
"ATG AGC",
"TC TTAC",
"AGG TAC",
"ACC TAC",
"TC TATG",
"AC TCCC",
"ATC ATT",
"ATAA TAA",
"AC TATG",
"AGATG GGG",
"AGG AGGC",
"AAG TCC",
"TAG TAA",
"ATTC AAC",
"TTTAG TAGAGAC",
"TTTC TTTT",
"AAG TAC",
"ACC TGCC",
"TG TAAC",
"ATAT ATG",
"TGC ATG",
"TT AGTG",
"ATAT AAAA",
"AATT TAA",
"TGG AAAA",
"TATT TTTT",
"TT ATGG",
"AAATT CC",
"TT AAATG",
"TT AGGG",
"TAA GGC",
"AAC ATG",
"AC ATTG",
"TGGG AGG",
"TTG TTC",
"TTG TGC",
"TCTG TCTC",
"AC ACCC",
"AGGCC CC",
"AAC AGC",
"TGG ATC",
"TGG ATG",
"TG TTAC",
"AAAC AAAC",
"ATAA ATG",
"TT AATAA",
"AC AAAG",
"AAC TGC",
"AAC ATT",
"TTTG TTTT",
"TGG TTC",
"AGTTC G",
"ACC AAG",
"TATT TTC",
"ACC AGG",
"TCAC G",
"TC AACC",
"TC AGAC",
"TTG AGC",
"AC TTGG",
"ATAT ATT",
"TAT AAG",
"ATGG TGGC",
"TT TATTC",
"AAAA TAC",
"AGC TGAG",
"TC AGTC",
"TTTG TAA",
"TT AGAC",
"AC TGGC",
"AAC TAC",
"TG ACCC",
"ATC AGC",
"AG ATAG",
"AAAC TCC",
"TG TTTTC",
"ACC ACG",
"ATAC G",
"TCC AGCC",
"TTGG AAACGGG",
"TGG TAG",
"AGAA ATG",
"TC TACC",
"AC GCC",
"AAC TAA",
"TAT AGC",
"AAAA ATG",
"AG ATTTC",
"TG AACC",
"AC TTGC",
"TAT ATAA",
"TTC TTCC",
"AAAG AAG",
"ATTTT TAA",
"TTTC G",
"ATT TCTG",
"AC TCAC",
"TT AGTT",
"ATATT TGG",
"AGG AAGG"
]
}
}