Upload tokenizer
Browse files- merges.txt +4 -4
- tokenizer.json +23 -23
- tokenizer_config.json +1 -1
- vocab.json +0 -0
merges.txt
CHANGED
@@ -2056,8 +2056,8 @@ d o
|
|
2056 |
Ġchan ged
|
2057 |
Ġcontro l
|
2058 |
Ġs ense
|
2059 |
-
it ch
|
2060 |
as ure
|
|
|
2061 |
Ġfe bruary
|
2062 |
Ġ3 0
|
2063 |
Ġdad dy
|
@@ -2569,9 +2569,9 @@ in c
|
|
2569 |
Ġm ir
|
2570 |
Ġf em
|
2571 |
Ġbec om
|
2572 |
-
Ġ q
|
2573 |
em pt
|
2574 |
a f
|
|
|
2575 |
ri age
|
2576 |
Ġbr own
|
2577 |
od e
|
@@ -3068,10 +3068,10 @@ ur ity
|
|
3068 |
Ġb ath
|
3069 |
Ġl ength
|
3070 |
Ġtr ade
|
3071 |
-
bo ard
|
3072 |
Ġkn ock
|
3073 |
ast ic
|
3074 |
Ġjo ined
|
|
|
3075 |
b a
|
3076 |
ter day
|
3077 |
Ġco ffee
|
@@ -4739,7 +4739,6 @@ u ly
|
|
4739 |
Ġcom ment
|
4740 |
Ġtr ave
|
4741 |
Ġdis play
|
4742 |
-
Ġsw itch
|
4743 |
Ġass um
|
4744 |
Ġadv ice
|
4745 |
Ġstep s
|
@@ -4749,6 +4748,7 @@ u ly
|
|
4749 |
Ġk im
|
4750 |
im um
|
4751 |
Ġrec om
|
|
|
4752 |
Ġsign al
|
4753 |
Ġill ust
|
4754 |
ipp ing
|
|
|
2056 |
Ġchan ged
|
2057 |
Ġcontro l
|
2058 |
Ġs ense
|
|
|
2059 |
as ure
|
2060 |
+
it ch
|
2061 |
Ġfe bruary
|
2062 |
Ġ3 0
|
2063 |
Ġdad dy
|
|
|
2569 |
Ġm ir
|
2570 |
Ġf em
|
2571 |
Ġbec om
|
|
|
2572 |
em pt
|
2573 |
a f
|
2574 |
+
Ġ q
|
2575 |
ri age
|
2576 |
Ġbr own
|
2577 |
od e
|
|
|
3068 |
Ġb ath
|
3069 |
Ġl ength
|
3070 |
Ġtr ade
|
|
|
3071 |
Ġkn ock
|
3072 |
ast ic
|
3073 |
Ġjo ined
|
3074 |
+
bo ard
|
3075 |
b a
|
3076 |
ter day
|
3077 |
Ġco ffee
|
|
|
4739 |
Ġcom ment
|
4740 |
Ġtr ave
|
4741 |
Ġdis play
|
|
|
4742 |
Ġass um
|
4743 |
Ġadv ice
|
4744 |
Ġstep s
|
|
|
4748 |
Ġk im
|
4749 |
im um
|
4750 |
Ġrec om
|
4751 |
+
Ġsw itch
|
4752 |
Ġsign al
|
4753 |
Ġill ust
|
4754 |
ipp ing
|
tokenizer.json
CHANGED
@@ -2403,8 +2403,8 @@
|
|
2403 |
"Ġchanged": 2315,
|
2404 |
"Ġcontrol": 2316,
|
2405 |
"Ġsense": 2317,
|
2406 |
-
"
|
2407 |
-
"
|
2408 |
"Ġfebruary": 2320,
|
2409 |
"Ġ30": 2321,
|
2410 |
"Ġdaddy": 2322,
|
@@ -2916,9 +2916,9 @@
|
|
2916 |
"Ġmir": 2828,
|
2917 |
"Ġfem": 2829,
|
2918 |
"Ġbecom": 2830,
|
2919 |
-
"
|
2920 |
-
"
|
2921 |
-
"
|
2922 |
"riage": 2834,
|
2923 |
"Ġbrown": 2835,
|
2924 |
"ode": 2836,
|
@@ -3415,10 +3415,10 @@
|
|
3415 |
"Ġbath": 3327,
|
3416 |
"Ġlength": 3328,
|
3417 |
"Ġtrade": 3329,
|
3418 |
-
"
|
3419 |
-
"
|
3420 |
-
"
|
3421 |
-
"
|
3422 |
"ba": 3334,
|
3423 |
"terday": 3335,
|
3424 |
"Ġcoffee": 3336,
|
@@ -5086,16 +5086,16 @@
|
|
5086 |
"Ġcomment": 4998,
|
5087 |
"Ġtrave": 4999,
|
5088 |
"Ġdisplay": 5000,
|
5089 |
-
"
|
5090 |
-
"
|
5091 |
-
"
|
5092 |
-
"
|
5093 |
-
"
|
5094 |
-
"
|
5095 |
-
"
|
5096 |
-
"
|
5097 |
-
"
|
5098 |
-
"
|
5099 |
"Ġsignal": 5011,
|
5100 |
"Ġillust": 5012,
|
5101 |
"ipping": 5013,
|
@@ -10336,8 +10336,8 @@
|
|
10336 |
"Ġchan ged",
|
10337 |
"Ġcontro l",
|
10338 |
"Ġs ense",
|
10339 |
-
"it ch",
|
10340 |
"as ure",
|
|
|
10341 |
"Ġfe bruary",
|
10342 |
"Ġ3 0",
|
10343 |
"Ġdad dy",
|
@@ -10849,9 +10849,9 @@
|
|
10849 |
"Ġm ir",
|
10850 |
"Ġf em",
|
10851 |
"Ġbec om",
|
10852 |
-
"Ġ q",
|
10853 |
"em pt",
|
10854 |
"a f",
|
|
|
10855 |
"ri age",
|
10856 |
"Ġbr own",
|
10857 |
"od e",
|
@@ -11348,10 +11348,10 @@
|
|
11348 |
"Ġb ath",
|
11349 |
"Ġl ength",
|
11350 |
"Ġtr ade",
|
11351 |
-
"bo ard",
|
11352 |
"Ġkn ock",
|
11353 |
"ast ic",
|
11354 |
"Ġjo ined",
|
|
|
11355 |
"b a",
|
11356 |
"ter day",
|
11357 |
"Ġco ffee",
|
@@ -13019,7 +13019,6 @@
|
|
13019 |
"Ġcom ment",
|
13020 |
"Ġtr ave",
|
13021 |
"Ġdis play",
|
13022 |
-
"Ġsw itch",
|
13023 |
"Ġass um",
|
13024 |
"Ġadv ice",
|
13025 |
"Ġstep s",
|
@@ -13029,6 +13028,7 @@
|
|
13029 |
"Ġk im",
|
13030 |
"im um",
|
13031 |
"Ġrec om",
|
|
|
13032 |
"Ġsign al",
|
13033 |
"Ġill ust",
|
13034 |
"ipp ing",
|
|
|
2403 |
"Ġchanged": 2315,
|
2404 |
"Ġcontrol": 2316,
|
2405 |
"Ġsense": 2317,
|
2406 |
+
"asure": 2318,
|
2407 |
+
"itch": 2319,
|
2408 |
"Ġfebruary": 2320,
|
2409 |
"Ġ30": 2321,
|
2410 |
"Ġdaddy": 2322,
|
|
|
2916 |
"Ġmir": 2828,
|
2917 |
"Ġfem": 2829,
|
2918 |
"Ġbecom": 2830,
|
2919 |
+
"empt": 2831,
|
2920 |
+
"af": 2832,
|
2921 |
+
"Ġq": 2833,
|
2922 |
"riage": 2834,
|
2923 |
"Ġbrown": 2835,
|
2924 |
"ode": 2836,
|
|
|
3415 |
"Ġbath": 3327,
|
3416 |
"Ġlength": 3328,
|
3417 |
"Ġtrade": 3329,
|
3418 |
+
"Ġknock": 3330,
|
3419 |
+
"astic": 3331,
|
3420 |
+
"Ġjoined": 3332,
|
3421 |
+
"board": 3333,
|
3422 |
"ba": 3334,
|
3423 |
"terday": 3335,
|
3424 |
"Ġcoffee": 3336,
|
|
|
5086 |
"Ġcomment": 4998,
|
5087 |
"Ġtrave": 4999,
|
5088 |
"Ġdisplay": 5000,
|
5089 |
+
"Ġassum": 5001,
|
5090 |
+
"Ġadvice": 5002,
|
5091 |
+
"Ġsteps": 5003,
|
5092 |
+
"Ġdefeated": 5004,
|
5093 |
+
"Ġresources": 5005,
|
5094 |
+
"Ġrick": 5006,
|
5095 |
+
"Ġkim": 5007,
|
5096 |
+
"imum": 5008,
|
5097 |
+
"Ġrecom": 5009,
|
5098 |
+
"Ġswitch": 5010,
|
5099 |
"Ġsignal": 5011,
|
5100 |
"Ġillust": 5012,
|
5101 |
"ipping": 5013,
|
|
|
10336 |
"Ġchan ged",
|
10337 |
"Ġcontro l",
|
10338 |
"Ġs ense",
|
|
|
10339 |
"as ure",
|
10340 |
+
"it ch",
|
10341 |
"Ġfe bruary",
|
10342 |
"Ġ3 0",
|
10343 |
"Ġdad dy",
|
|
|
10849 |
"Ġm ir",
|
10850 |
"Ġf em",
|
10851 |
"Ġbec om",
|
|
|
10852 |
"em pt",
|
10853 |
"a f",
|
10854 |
+
"Ġ q",
|
10855 |
"ri age",
|
10856 |
"Ġbr own",
|
10857 |
"od e",
|
|
|
11348 |
"Ġb ath",
|
11349 |
"Ġl ength",
|
11350 |
"Ġtr ade",
|
|
|
11351 |
"Ġkn ock",
|
11352 |
"ast ic",
|
11353 |
"Ġjo ined",
|
11354 |
+
"bo ard",
|
11355 |
"b a",
|
11356 |
"ter day",
|
11357 |
"Ġco ffee",
|
|
|
13019 |
"Ġcom ment",
|
13020 |
"Ġtr ave",
|
13021 |
"Ġdis play",
|
|
|
13022 |
"Ġass um",
|
13023 |
"Ġadv ice",
|
13024 |
"Ġstep s",
|
|
|
13028 |
"Ġk im",
|
13029 |
"im um",
|
13030 |
"Ġrec om",
|
13031 |
+
"Ġsw itch",
|
13032 |
"Ġsign al",
|
13033 |
"Ġill ust",
|
13034 |
"ipp ing",
|
tokenizer_config.json
CHANGED
@@ -13,7 +13,7 @@
|
|
13 |
"single_word": false
|
14 |
},
|
15 |
"model_max_length": 1000000000000000019884624838656,
|
16 |
-
"name_or_path": "CamBabyTrainers/
|
17 |
"pad_token": "<pad>",
|
18 |
"sep_token": "</s>",
|
19 |
"special_tokens_map_file": null,
|
|
|
13 |
"single_word": false
|
14 |
},
|
15 |
"model_max_length": 1000000000000000019884624838656,
|
16 |
+
"name_or_path": "CamBabyTrainers/CamBabyTokenizer-8192",
|
17 |
"pad_token": "<pad>",
|
18 |
"sep_token": "</s>",
|
19 |
"special_tokens_map_file": null,
|
vocab.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|