goldfish-models commited on
Commit
7fb4b88
1 Parent(s): 49af655

Upload ctd_latn_5mb tokenizer.

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"[XXXXX300]": 46610, "[XXXXX156]": 46466, "[XXXXX440]": 46750, "[XXXXX523]": 46833, "[XXXXX708]": 47018, "[XXXXX676]": 46986, "[XXXXX589]": 46899, "[XXXXX375]": 46685, "[XXXXX442]": 46752, "[XXXXX282]": 46592, "[XXXXX612]": 46922, "[XXXXX431]": 46741, "[XXXXX731]": 47041, "[XXXXX452]": 46762, "[XXXXX125]": 46435, "[XXXXX729]": 47039, "[XXXXX498]": 46808, "[XXXXX614]": 46924, "[XXXXX370]": 46680, "[XXXXX622]": 46932, "[XXXXX710]": 47020, "[XXXXX542]": 46852, "[XXXXX47]": 46357, "[XXXXX129]": 46439, "[XXXXX288]": 46598, "[CLS]": 46306, "[XXXXX757]": 47067, "[XXXXX728]": 47038, "[XXXXX263]": 46573, "[XXXXX745]": 47055, "[XXXXX433]": 46743, "[XXXXX349]": 46659, "[XXXXX557]": 46867, "[XXXXX499]": 46809, "[XXXXX453]": 46763, "[XXXXX193]": 46503, "[XXXXX290]": 46600, "[XXXXX287]": 46597, "[XXXXX333]": 46643, "[XXXXX399]": 46709, "[XXXXX230]": 46540, "[XXXXX709]": 47019, "[XXXXX405]": 46715, "[XXXXX560]": 46870, "[XXXXX662]": 46972, "[XXXXX8]": 46318, "[XXXXX739]": 47049, "[XXXXX252]": 46562, "[XXXXX345]": 46655, "[XXXXX315]": 46625, "[XXXXX158]": 46468, "[XXXXX764]": 47074, "[XXXXX617]": 46927, "[XXXXX338]": 46648, "[XXXXX218]": 46528, "[XXXXX176]": 46486, "[XXXXX696]": 47006, "[XXXXX595]": 46905, "[XXXXX3]": 46313, "[XXXXX390]": 46700, "[XXXXX396]": 46706, "[XXXXX423]": 46733, "[XXXXX608]": 46918, "[XXXXX355]": 46665, "[XXXXX351]": 46661, "[XXXXX6]": 46316, "[XXXXX751]": 47061, "[XXXXX28]": 46338, "[XXXXX776]": 47086, "[XXXXX666]": 46976, "[XXXXX647]": 46957, "[XXXXX49]": 46359, "[XXXXX524]": 46834, "[XXXXX793]": 47103, "[XXXXX274]": 46584, "[XXXXX207]": 46517, "[XXXXX407]": 46717, "[XXXXX520]": 46830, "[XXXXX712]": 47022, "[XXXXX149]": 46459, "[XXXXX587]": 46897, "[XXXXX651]": 46961, "[XXXXX415]": 46725, "[XXXXX83]": 46393, "[XXXXX512]": 46822, "[XXXXX789]": 47099, "[XXXXX462]": 46772, "[XXXXX62]": 46372, "[XXXXX445]": 46755, "[XXXXX727]": 47037, "[XXXXX401]": 46711, "[XXXXX444]": 46754, "[XXXXX152]": 46462, "[XXXXX76]": 46386, "[XXXXX371]": 46681, "[XXXXX419]": 46729, "[XXXXX89]": 46399, "[XXXXX718]": 47028, "[XXXXX736]": 47046, "[XXXXX261]": 46571, "[XXXXX443]": 46753, "[XXXXX275]": 46585, "[XXXXX169]": 46479, "[XXXXX683]": 46993, "[XXXXX690]": 47000, "[XXXXX626]": 46936, "[XXXXX226]": 46536, "[XXXXX760]": 47070, "[XXXXX157]": 46467, "[XXXXX294]": 46604, "[XXXXX301]": 46611, "[XXXXX790]": 47100, "[XXXXX784]": 47094, "[XXXXX506]": 46816, "[XXXXX271]": 46581, "[XXXXX387]": 46697, "[XXXXX693]": 47003, "[XXXXX691]": 47001, "[XXXXX631]": 46941, "[XXXXX148]": 46458, "[XXXXX189]": 46499, "[XXXXX449]": 46759, "[XXXXX113]": 46423, "[XXXXX600]": 46910, "[XXXXX284]": 46594, "[XXXXX495]": 46805, "[XXXXX154]": 46464, "[XXXXX404]": 46714, "[XXXXX585]": 46895, "[XXXXX584]": 46894, "[XXXXX374]": 46684, "[XXXXX748]": 47058, "[XXXXX697]": 47007, "[XXXXX293]": 46603, "[XXXXX167]": 46477, "[XXXXX539]": 46849, "[XXXXX773]": 47083, "[XXXXX54]": 46364, "[XXXXX434]": 46744, "[XXXXX139]": 46449, "[XXXXX635]": 46945, "[XXXXX422]": 46732, "[XXXXX551]": 46861, "[XXXXX173]": 46483, "[XXXXX671]": 46981, "[XXXXX217]": 46527, "[XXXXX283]": 46593, "[XXXXX24]": 46334, "[XXXXX88]": 46398, "[XXXXX91]": 46401, "[XXXXX529]": 46839, "[XXXXX34]": 46344, "[XXXXX392]": 46702, "[XXXXX131]": 46441, "[XXXXX48]": 46358, "[XXXXX504]": 46814, "[XXXXX109]": 46419, "[XXXXX669]": 46979, "[XXXXX418]": 46728, "[XXXXX144]": 46454, "[XXXXX398]": 46708, "[XXXXX192]": 46502, "[XXXXX586]": 46896, "[XXXXX540]": 46850, "[XXXXX766]": 47076, "[XXXXX186]": 46496, "[XXXXX410]": 46720, "[XXXXX479]": 46789, "[XXXXX13]": 46323, "[XXXXX361]": 46671, "[XXXXX775]": 47085, "[XXXXX507]": 46817, "[XXXXX774]": 47084, "[XXXXX305]": 46615, "[XXXXX19]": 46329, "[XXXXX744]": 47054, "[XXXXX386]": 46696, "[XXXXX39]": 46349, "[XXXXX680]": 46990, "[XXXXX37]": 46347, "[XXXXX67]": 46377, "[XXXXX110]": 46420, "[XXXXX103]": 46413, "[XXXXX21]": 46331, "[XXXXX159]": 46469, "[XXXXX457]": 46767, "[XXXXX60]": 46370, "[XXXXX493]": 46803, "[XXXXX559]": 46869, "[XXXXX403]": 46713, "[XXXXX242]": 46552, "[XXXXX381]": 46691, "[XXXXX783]": 47093, "[XXXXX531]": 46841, "[XXXXX435]": 46745, "[XXXXX681]": 46991, "[XXXXX208]": 46518, "[XXXXX81]": 46391, "[XXXXX340]": 46650, "[XXXXX114]": 46424, "[XXXXX394]": 46704, "[XXXXX325]": 46635, "[XXXXX417]": 46727, "[XXXXX593]": 46903, "[XXXXX126]": 46436, "[XXXXX255]": 46565, "[XXXXX250]": 46560, "[XXXXX299]": 46609, "[XXXXX591]": 46901, "[XXXXX788]": 47098, "[XXXXX753]": 47063, "[XXXXX643]": 46953, "[XXXXX342]": 46652, "[XXXXX323]": 46633, "[XXXXX611]": 46921, "[XXXXX246]": 46556, "[XXXXX649]": 46959, "[XXXXX180]": 46490, "[XXXXX11]": 46321, "[XXXXX469]": 46779, "[XXXXX490]": 46800, "[XXXXX566]": 46876, "[XXXXX243]": 46553, "[XXXXX234]": 46544, "[XXXXX639]": 46949, "[XXXXX705]": 47015, "[XXXXX616]": 46926, "[XXXXX725]": 47035, "[XXXXX426]": 46736, "[XXXXX556]": 46866, "[XXXXX770]": 47080, "[XXXXX304]": 46614, "[XXXXX607]": 46917, "[XXXXX61]": 46371, "[XXXXX472]": 46782, "[XXXXX138]": 46448, "[XXXXX427]": 46737, "[XXXXX2]": 46312, "[XXXXX84]": 46394, "[XXXXX347]": 46657, "[XXXXX580]": 46890, "[XXXXX420]": 46730, "[XXXXX652]": 46962, "[XXXXX291]": 46601, "[XXXXX562]": 46872, "[XXXXX430]": 46740, "[XXXXX388]": 46698, "[XXXXX378]": 46688, "[XXXXX526]": 46836, "[XXXXX740]": 47050, "[XXXXX582]": 46892, "[XXXXX648]": 46958, "[XXXXX578]": 46888, "[XXXXX205]": 46515, "[XXXXX747]": 47057, "[XXXXX575]": 46885, "[XXXXX548]": 46858, "[XXXXX164]": 46474, "[XXXXX737]": 47047, "[XXXXX658]": 46968, "[XXXXX656]": 46966, "[XXXXX599]": 46909, "[XXXXX554]": 46864, "[XXXXX689]": 46999, "[XXXXX18]": 46328, "[XXXXX142]": 46452, "[XXXXX68]": 46378, "[XXXXX564]": 46874, "[XXXXX515]": 46825, "[XXXXX281]": 46591, "[XXXXX111]": 46421, "[XXXXX197]": 46507, "[XXXXX321]": 46631, "[XXXXX733]": 47043, "[XXXXX391]": 46701, "[XXXXX561]": 46871, "[XXXXX478]": 46788, "[XXXXX663]": 46973, "[XXXXX322]": 46632, "[XXXXX746]": 47056, "[XXXXX466]": 46776, "[XXXXX518]": 46828, "[XXXXX73]": 46383, "[XXXXX772]": 47082, "[XXXXX240]": 46550, "[XXXXX50]": 46360, "[XXXXX508]": 46818, "[XXXXX777]": 47087, "[XXXXX424]": 46734, "[XXXXX93]": 46403, "[XXXXX5]": 46315, "[XXXXX196]": 46506, "[XXXXX553]": 46863, "[XXXXX756]": 47066, "[XXXXX245]": 46555, "[XXXXX202]": 46512, "[XXXXX363]": 46673, "[XXXXX781]": 47091, "[XXXXX33]": 46343, "[MASK]": 46309, "[XXXXX619]": 46929, "[XXXXX675]": 46985, "[XXXXX64]": 46374, "[XXXXX517]": 46827, "[XXXXX310]": 46620, "[XXXXX348]": 46658, "[XXXXX44]": 46354, "[XXXXX85]": 46395, "[XXXXX32]": 46342, "[XXXXX236]": 46546, "[XXXXX36]": 46346, "[XXXXX97]": 46407, "[XXXXX573]": 46883, "[XXXXX179]": 46489, "[XXXXX30]": 46340, "[XXXXX602]": 46912, "[XXXXX268]": 46578, "[XXXXX166]": 46476, "[XXXXX706]": 47016, "[XXXXX297]": 46607, "[XXXXX461]": 46771, "[XXXXX749]": 47059, "[XXXXX51]": 46361, "[XXXXX592]": 46902, "[XXXXX483]": 46793, "[XXXXX368]": 46678, "[XXXXX210]": 46520, "[XXXXX212]": 46522, "[XXXXX429]": 46739, "[XXXXX650]": 46960, "[XXXXX606]": 46916, "[XXXXX354]": 46664, "[XXXXX450]": 46760, "[XXXXX382]": 46692, "[XXXXX416]": 46726, "[XXXXX319]": 46629, "[XXXXX96]": 46406, "[XXXXX546]": 46856, "[XXXXX519]": 46829, "[XXXXX414]": 46724, "[XXXXX509]": 46819, "[XXXXX46]": 46356, "[XXXXX258]": 46568, "[XXXXX38]": 46348, "[XXXXX31]": 46341, "[XXXXX124]": 46434, "[XXXXX27]": 46337, "[XXXXX489]": 46799, "[XXXXX65]": 46375, "[XXXXX71]": 46381, "[XXXXX161]": 46471, "[XXXXX438]": 46748, "[XXXXX188]": 46498, "[XXXXX69]": 46379, "[XXXXX360]": 46670, "[XXXXX713]": 47023, "[XXXXX455]": 46765, "[XXXXX136]": 46446, "[XXXXX670]": 46980, "[XXXXX308]": 46618, "[XXXXX191]": 46501, "[XXXXX128]": 46438, "[XXXXX641]": 46951, "[XXXXX160]": 46470, "[XXXXX59]": 46369, "[XXXXX384]": 46694, "[XXXXX467]": 46777, "[XXXXX57]": 46367, "[XXXXX660]": 46970, "[XXXXX72]": 46382, "[XXXXX262]": 46572, "[XXXXX702]": 47012, "[XXXXX601]": 46911, "[XXXXX165]": 46475, "[XXXXX172]": 46482, "[XXXXX446]": 46756, "[XXXXX241]": 46551, "[XXXXX260]": 46570, "[XXXXX233]": 46543, "[XXXXX762]": 47072, "[XXXXX235]": 46545, "[XXXXX232]": 46542, "[XXXXX270]": 46580, "[XXXXX285]": 46595, "[XXXXX590]": 46900, "[XXXXX143]": 46453, "[XXXXX486]": 46796, "[XXXXX353]": 46663, "[XXXXX265]": 46575, "[XXXXX476]": 46786, "[XXXXX501]": 46811, "[XXXXX257]": 46567, "[XXXXX581]": 46891, "[XXXXX719]": 47029, "[XXXXX213]": 46523, "[XXXXX668]": 46978, "[XXXXX137]": 46447, "[XXXXX373]": 46683, "[XXXXX583]": 46893, "[XXXXX204]": 46514, "[XXXXX119]": 46429, "[XXXXX672]": 46982, "[XXXXX724]": 47034, "[XXXXX482]": 46792, "[XXXXX229]": 46539, "[XXXXX135]": 46445, "[XXXXX58]": 46368, "[XXXXX215]": 46525, "[XXXXX625]": 46935, "[XXXXX779]": 47089, "[XXXXX484]": 46794, "[XXXXX679]": 46989, "[XXXXX503]": 46813, "[XXXXX367]": 46677, "[XXXXX759]": 47069, "[XXXXX547]": 46857, "[XXXXX356]": 46666, "[XXXXX502]": 46812, "[XXXXX406]": 46716, "[XXXXX278]": 46588, "[XXXXX108]": 46418, "[XXXXX162]": 46472, "[XXXXX98]": 46408, "[XXXXX579]": 46889, "[XXXXX532]": 46842, "[XXXXX127]": 46437, "[XXXXX389]": 46699, "[XXXXX272]": 46582, "[SEP]": 46307, "[XXXXX528]": 46838, "[XXXXX550]": 46860, "[XXXXX155]": 46465, "[XXXXX75]": 46385, "[XXXXX629]": 46939, "[XXXXX665]": 46975, "[XXXXX346]": 46656, "[XXXXX150]": 46460, "[XXXXX609]": 46919, "[XXXXX428]": 46738, "[XXXXX352]": 46662, "[XXXXX441]": 46751, "[XXXXX505]": 46815, "[XXXXX730]": 47040, "[XXXXX74]": 46384, "[XXXXX209]": 46519, "[XXXXX618]": 46928, "[XXXXX545]": 46855, "[XXXXX397]": 46707, "[XXXXX525]": 46835, "[XXXXX94]": 46404, "[XXXXX510]": 46820, "[XXXXX318]": 46628, "[XXXXX412]": 46722, "[XXXXX701]": 47011, "[XXXXX359]": 46669, "[XXXXX530]": 46840, "[XXXXX102]": 46412, "[XXXXX500]": 46810, "[XXXXX659]": 46969, "[XXXXX448]": 46758, "[XXXXX35]": 46345, "[XXXXX572]": 46882, "[XXXXX630]": 46940, "[XXXXX100]": 46410, "[XXXXX634]": 46944, "[XXXXX303]": 46613, "[XXXXX653]": 46963, "[XXXXX481]": 46791, "[XXXXX95]": 46405, "[XXXXX624]": 46934, "[XXXXX29]": 46339, "[XXXXX41]": 46351, "[XXXXX522]": 46832, "[XXXXX620]": 46930, "[XXXXX715]": 47025, "[XXXXX769]": 47079, "[XXXXX692]": 47002, "[XXXXX567]": 46877, "[XXXXX52]": 46362, "[XXXXX752]": 47062, "[XXXXX244]": 46554, "[XXXXX637]": 46947, "[XXXXX767]": 47077, "[XXXXX755]": 47065, "[XXXXX307]": 46617, "[XXXXX140]": 46450, "[XXXXX703]": 47013, "[XXXXX23]": 46333, "[XXXXX425]": 46735, "[XXXXX673]": 46983, "[XXXXX134]": 46444, "[XXXXX439]": 46749, "[XXXXX655]": 46965, "[XXXXX400]": 46710, "[XXXXX533]": 46843, "[XXXXX224]": 46534, "[XXXXX726]": 47036, "[XXXXX266]": 46576, "[XXXXX470]": 46780, "[XXXXX221]": 46531, "[XXXXX636]": 46946, "[XXXXX409]": 46719, "[XXXXX118]": 46428, "[XXXXX754]": 47064, "[XXXXX464]": 46774, "[XXXXX568]": 46878, "[XXXXX362]": 46672, "[XXXXX687]": 46997, "[XXXXX408]": 46718, "[XXXXX787]": 47097, "[XXXXX543]": 46853, "[XXXXX654]": 46964, "[XXXXX117]": 46427, "[XXXXX480]": 46790, "[XXXXX239]": 46549, "[XXXXX183]": 46493, "[XXXXX264]": 46574, "[XXXXX364]": 46674, "[XXXXX220]": 46530, "[XXXXX513]": 46823, "[XXXXX537]": 46847, "[XXXXX77]": 46387, "[XXXXX720]": 47030, "[XXXXX153]": 46463, "[XXXXX132]": 46442, "[XXXXX494]": 46804, "[XXXXX320]": 46630, "[XXXXX53]": 46363, "[XXXXX170]": 46480, "[XXXXX385]": 46695, "[XXXXX627]": 46937, "[XXXXX228]": 46538, "[XXXXX763]": 47073, "[XXXXX569]": 46879, "[XXXXX685]": 46995, "[XXXXX311]": 46621, "[XXXXX791]": 47101, "[XXXXX267]": 46577, "[XXXXX646]": 46956, "[XXXXX661]": 46971, "[XXXXX292]": 46602, "[XXXXX366]": 46676, "[XXXXX782]": 47092, "[XXXXX358]": 46668, "[XXXXX372]": 46682, "[XXXXX279]": 46589, "[XXXXX253]": 46563, "[XXXXX286]": 46596, "[XXXXX336]": 46646, "[XXXXX761]": 47071, "[XXXXX686]": 46996, "[XXXXX178]": 46488, "[XXXXX9]": 46319, "[XXXXX699]": 47009, "[XXXXX43]": 46353, "[XXXXX694]": 47004, "[XXXXX259]": 46569, "[XXXXX14]": 46324, "[XXXXX330]": 46640, "<pad>": 46308, "[XXXXX45]": 46355, "[XXXXX175]": 46485, "[XXXXX674]": 46984, "[XXXXX632]": 46942, "[XXXXX198]": 46508, "[XXXXX395]": 46705, "[XXXXX326]": 46636, "[XXXXX92]": 46402, "[XXXXX596]": 46906, "[XXXXX133]": 46443, "[XXXXX249]": 46559, "[XXXXX280]": 46590, "[XXXXX521]": 46831, "[XXXXX677]": 46987, "[XXXXX723]": 47033, "[XXXXX621]": 46931, "[XXXXX459]": 46769, "[XXXXX195]": 46505, "[XXXXX380]": 46690, "[XXXXX721]": 47031, "[XXXXX437]": 46747, "[XXXXX576]": 46886, "[XXXXX376]": 46686, "[XXXXX341]": 46651, "[XXXXX623]": 46933, "[XXXXX475]": 46785, "[XXXXX171]": 46481, "[XXXXX185]": 46495, "[XXXXX468]": 46778, "[XXXXX558]": 46868, "[XXXXX214]": 46524, "[XXXXX786]": 47096, "[XXXXX130]": 46440, "[XXXXX588]": 46898, "[XXXXX594]": 46904, "[XXXXX615]": 46925, "[XXXXX778]": 47088, "[XXXXX734]": 47044, "[XXXXX477]": 46787, "[XXXXX15]": 46325, "[XXXXX571]": 46881, "[XXXXX628]": 46938, "[XXXXX369]": 46679, "[XXXXX25]": 46335, "[XXXXX276]": 46586, "[XXXXX742]": 47052, "[XXXXX56]": 46366, "[XXXXX421]": 46731, "[XXXXX238]": 46548, "[XXXXX177]": 46487, "[XXXXX42]": 46352, "[XXXXX1]": 46311, "[XXXXX312]": 46622, "[XXXXX473]": 46783, "[XXXXX563]": 46873, "[XXXXX458]": 46768, "[XXXXX247]": 46557, "[XXXXX10]": 46320, "[XXXXX682]": 46992, "[XXXXX704]": 47014, "[XXXXX273]": 46583, "[XXXXX4]": 46314, "[XXXXX684]": 46994, "[XXXXX436]": 46746, "[XXXXX200]": 46510, "[XXXXX181]": 46491, "[XXXXX738]": 47048, "[XXXXX344]": 46654, "[XXXXX219]": 46529, "[XXXXX695]": 47005, "[XXXXX613]": 46923, "[XXXXX638]": 46948, "[XXXXX147]": 46457, "[XXXXX295]": 46605, "[XXXXX70]": 46380, "[XXXXX231]": 46541, "[XXXXX552]": 46862, "[XXXXX146]": 46456, "[XXXXX248]": 46558, "[XXXXX40]": 46350, "[XXXXX16]": 46326, "[XXXXX432]": 46742, "[XXXXX163]": 46473, "[XXXXX633]": 46943, "[XXXXX487]": 46797, "[XXXXX597]": 46907, "[XXXXX99]": 46409, "[XXXXX182]": 46492, "[XXXXX771]": 47081, "[XXXXX313]": 46623, "[XXXXX145]": 46455, "[XXXXX227]": 46537, "[XXXXX750]": 47060, "[XXXXX55]": 46365, "[XXXXX456]": 46766, "[XXXXX90]": 46400, "[XXXXX174]": 46484, "[XXXXX350]": 46660, "[XXXXX357]": 46667, "[XXXXX225]": 46535, "[XXXXX711]": 47021, "[XXXXX454]": 46764, "[XXXXX664]": 46974, "[XXXXX332]": 46642, "[XXXXX327]": 46637, "[XXXXX251]": 46561, "[XXXXX688]": 46998, "[XXXXX168]": 46478, "[XXXXX610]": 46920, "[XXXXX379]": 46689, "[XXXXX447]": 46757, "[XXXXX463]": 46773, "[XXXXX316]": 46626, "[XXXXX223]": 46533, "[XXXXX667]": 46977, "[XXXXX544]": 46854, "[XXXXX698]": 47008, "[XXXXX780]": 47090, "[XXXXX413]": 46723, "[XXXXX716]": 47026, "[XXXXX335]": 46645, "[XXXXX678]": 46988, "[XXXXX184]": 46494, "[XXXXX768]": 47078, "[XXXXX78]": 46388, "[XXXXX26]": 46336, "[XXXXX122]": 46432, "[XXXXX104]": 46414, "[XXXXX277]": 46587, "[XXXXX411]": 46721, "[XXXXX640]": 46950, "[XXXXX785]": 47095, "[XXXXX296]": 46606, "[XXXXX343]": 46653, "[XXXXX324]": 46634, "[XXXXX460]": 46770, "[XXXXX211]": 46521, "[XXXXX538]": 46848, "[XXXXX141]": 46451, "[XXXXX121]": 46431, "[XXXXX534]": 46844, "[XXXXX402]": 46712, "[XXXXX605]": 46915, "[XXXXX199]": 46509, "[XXXXX491]": 46801, "[XXXXX216]": 46526, "[XXXXX106]": 46416, "[XXXXX598]": 46908, "[XXXXX80]": 46390, "[XXXXX741]": 47051, "[XXXXX101]": 46411, "[XXXXX151]": 46461, "[XXXXX87]": 46397, "[XXXXX642]": 46952, "[XXXXX492]": 46802, "[XXXXX707]": 47017, "[XXXXX471]": 46781, "[XXXXX541]": 46851, "[XXXXX714]": 47024, "[XXXXX700]": 47010, "[XXXXX22]": 46332, "[XXXXX201]": 46511, "[XXXXX393]": 46703, "[XXXXX577]": 46887, "[XXXXX339]": 46649, "[XXXXX735]": 47045, "[XXXXX604]": 46914, "[XXXXX451]": 46761, "[XXXXX302]": 46612, "[XXXXX644]": 46954, "[XXXXX120]": 46430, "[XXXXX222]": 46532, "[XXXXX514]": 46824, "[XXXXX496]": 46806, "[XXXXX555]": 46865, "[XXXXX645]": 46955, "[XXXXX82]": 46392, "[XXXXX570]": 46880, "[XXXXX758]": 47068, "[XXXXX66]": 46376, "[XXXXX112]": 46422, "[XXXXX194]": 46504, "[XXXXX535]": 46845, "[XXXXX377]": 46687, "[XXXXX722]": 47032, "[XXXXX511]": 46821, "[XXXXX474]": 46784, "[XXXXX657]": 46967, "[XXXXX717]": 47027, "[XXXXX86]": 46396, "[XXXXX536]": 46846, "[XXXXX237]": 46547, "[XXXXX206]": 46516, "[XXXXX254]": 46564, "[XXXXX334]": 46644, "[XXXXX365]": 46675, "[XXXXX17]": 46327, "[XXXXX497]": 46807, "[XXXXX203]": 46513, "[XXXXX792]": 47102, "[XXXXX7]": 46317, "[XXXXX256]": 46566, "[XXXXX309]": 46619, "[XXXXX328]": 46638, "[XXXXX465]": 46775, "[XXXXX574]": 46884, "[XXXXX527]": 46837, "[XXXXX383]": 46693, "[XXXXX314]": 46624, "[XXXXX107]": 46417, "[XXXXX20]": 46330, "[XXXXX105]": 46415, "[XXXXX63]": 46373, "[XXXXX123]": 46433, "[XXXXX289]": 46599, "[XXXXX329]": 46639, "[XXXXX187]": 46497, "[XXXXX190]": 46500, "[XXXXX331]": 46641, "[XXXXX549]": 46859, "[XXXXX317]": 46627, "[XXXXX306]": 46616, "[XXXXX0]": 46310, "[XXXXX565]": 46875, "[XXXXX337]": 46647, "[XXXXX743]": 47053, "[XXXXX488]": 46798, "[XXXXX12]": 46322, "[XXXXX516]": 46826, "[XXXXX298]": 46608, "[XXXXX115]": 46425, "[XXXXX485]": 46795, "[XXXXX269]": 46579, "[XXXXX732]": 47042, "[XXXXX765]": 47075, "[XXXXX603]": 46913, "[XXXXX116]": 46426, "[XXXXX79]": 46389}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["[XXXXX0]", "[XXXXX1]", "[XXXXX2]", "[XXXXX3]", "[XXXXX4]", "[XXXXX5]", "[XXXXX6]", "[XXXXX7]", "[XXXXX8]", "[XXXXX9]", "[XXXXX10]", "[XXXXX11]", "[XXXXX12]", "[XXXXX13]", "[XXXXX14]", "[XXXXX15]", "[XXXXX16]", "[XXXXX17]", "[XXXXX18]", "[XXXXX19]", "[XXXXX20]", "[XXXXX21]", "[XXXXX22]", "[XXXXX23]", "[XXXXX24]", "[XXXXX25]", "[XXXXX26]", "[XXXXX27]", "[XXXXX28]", "[XXXXX29]", "[XXXXX30]", "[XXXXX31]", "[XXXXX32]", "[XXXXX33]", "[XXXXX34]", "[XXXXX35]", "[XXXXX36]", "[XXXXX37]", "[XXXXX38]", "[XXXXX39]", "[XXXXX40]", "[XXXXX41]", "[XXXXX42]", "[XXXXX43]", "[XXXXX44]", "[XXXXX45]", "[XXXXX46]", "[XXXXX47]", "[XXXXX48]", "[XXXXX49]", "[XXXXX50]", "[XXXXX51]", "[XXXXX52]", "[XXXXX53]", "[XXXXX54]", "[XXXXX55]", "[XXXXX56]", "[XXXXX57]", "[XXXXX58]", "[XXXXX59]", "[XXXXX60]", "[XXXXX61]", "[XXXXX62]", "[XXXXX63]", "[XXXXX64]", "[XXXXX65]", "[XXXXX66]", "[XXXXX67]", "[XXXXX68]", "[XXXXX69]", "[XXXXX70]", "[XXXXX71]", "[XXXXX72]", "[XXXXX73]", "[XXXXX74]", "[XXXXX75]", "[XXXXX76]", "[XXXXX77]", "[XXXXX78]", "[XXXXX79]", "[XXXXX80]", "[XXXXX81]", "[XXXXX82]", "[XXXXX83]", "[XXXXX84]", "[XXXXX85]", "[XXXXX86]", "[XXXXX87]", "[XXXXX88]", "[XXXXX89]", "[XXXXX90]", "[XXXXX91]", "[XXXXX92]", "[XXXXX93]", "[XXXXX94]", "[XXXXX95]", "[XXXXX96]", "[XXXXX97]", "[XXXXX98]", "[XXXXX99]", "[XXXXX100]", "[XXXXX101]", "[XXXXX102]", "[XXXXX103]", "[XXXXX104]", "[XXXXX105]", "[XXXXX106]", "[XXXXX107]", "[XXXXX108]", "[XXXXX109]", "[XXXXX110]", "[XXXXX111]", "[XXXXX112]", "[XXXXX113]", "[XXXXX114]", "[XXXXX115]", "[XXXXX116]", "[XXXXX117]", "[XXXXX118]", "[XXXXX119]", "[XXXXX120]", "[XXXXX121]", "[XXXXX122]", "[XXXXX123]", "[XXXXX124]", "[XXXXX125]", "[XXXXX126]", "[XXXXX127]", "[XXXXX128]", "[XXXXX129]", "[XXXXX130]", "[XXXXX131]", "[XXXXX132]", "[XXXXX133]", "[XXXXX134]", "[XXXXX135]", "[XXXXX136]", "[XXXXX137]", "[XXXXX138]", "[XXXXX139]", "[XXXXX140]", "[XXXXX141]", "[XXXXX142]", "[XXXXX143]", "[XXXXX144]", "[XXXXX145]", "[XXXXX146]", "[XXXXX147]", "[XXXXX148]", "[XXXXX149]", "[XXXXX150]", "[XXXXX151]", "[XXXXX152]", "[XXXXX153]", "[XXXXX154]", "[XXXXX155]", "[XXXXX156]", "[XXXXX157]", "[XXXXX158]", "[XXXXX159]", "[XXXXX160]", "[XXXXX161]", "[XXXXX162]", "[XXXXX163]", "[XXXXX164]", "[XXXXX165]", "[XXXXX166]", "[XXXXX167]", "[XXXXX168]", "[XXXXX169]", "[XXXXX170]", "[XXXXX171]", "[XXXXX172]", "[XXXXX173]", "[XXXXX174]", "[XXXXX175]", "[XXXXX176]", "[XXXXX177]", "[XXXXX178]", "[XXXXX179]", "[XXXXX180]", "[XXXXX181]", "[XXXXX182]", "[XXXXX183]", "[XXXXX184]", "[XXXXX185]", "[XXXXX186]", "[XXXXX187]", "[XXXXX188]", "[XXXXX189]", "[XXXXX190]", "[XXXXX191]", "[XXXXX192]", "[XXXXX193]", "[XXXXX194]", "[XXXXX195]", "[XXXXX196]", "[XXXXX197]", "[XXXXX198]", "[XXXXX199]", "[XXXXX200]", "[XXXXX201]", "[XXXXX202]", "[XXXXX203]", "[XXXXX204]", "[XXXXX205]", "[XXXXX206]", "[XXXXX207]", "[XXXXX208]", "[XXXXX209]", "[XXXXX210]", "[XXXXX211]", "[XXXXX212]", "[XXXXX213]", "[XXXXX214]", "[XXXXX215]", "[XXXXX216]", "[XXXXX217]", "[XXXXX218]", "[XXXXX219]", "[XXXXX220]", "[XXXXX221]", "[XXXXX222]", "[XXXXX223]", "[XXXXX224]", "[XXXXX225]", "[XXXXX226]", "[XXXXX227]", "[XXXXX228]", "[XXXXX229]", "[XXXXX230]", "[XXXXX231]", "[XXXXX232]", "[XXXXX233]", "[XXXXX234]", "[XXXXX235]", "[XXXXX236]", "[XXXXX237]", "[XXXXX238]", "[XXXXX239]", "[XXXXX240]", "[XXXXX241]", "[XXXXX242]", "[XXXXX243]", "[XXXXX244]", "[XXXXX245]", "[XXXXX246]", "[XXXXX247]", "[XXXXX248]", "[XXXXX249]", "[XXXXX250]", "[XXXXX251]", "[XXXXX252]", "[XXXXX253]", "[XXXXX254]", "[XXXXX255]", "[XXXXX256]", "[XXXXX257]", "[XXXXX258]", "[XXXXX259]", "[XXXXX260]", "[XXXXX261]", "[XXXXX262]", "[XXXXX263]", "[XXXXX264]", "[XXXXX265]", "[XXXXX266]", "[XXXXX267]", "[XXXXX268]", "[XXXXX269]", "[XXXXX270]", "[XXXXX271]", "[XXXXX272]", "[XXXXX273]", "[XXXXX274]", "[XXXXX275]", "[XXXXX276]", "[XXXXX277]", "[XXXXX278]", "[XXXXX279]", "[XXXXX280]", "[XXXXX281]", "[XXXXX282]", "[XXXXX283]", "[XXXXX284]", "[XXXXX285]", "[XXXXX286]", "[XXXXX287]", "[XXXXX288]", "[XXXXX289]", "[XXXXX290]", "[XXXXX291]", "[XXXXX292]", "[XXXXX293]", "[XXXXX294]", "[XXXXX295]", "[XXXXX296]", "[XXXXX297]", "[XXXXX298]", "[XXXXX299]", "[XXXXX300]", "[XXXXX301]", "[XXXXX302]", "[XXXXX303]", "[XXXXX304]", "[XXXXX305]", "[XXXXX306]", "[XXXXX307]", "[XXXXX308]", "[XXXXX309]", "[XXXXX310]", "[XXXXX311]", "[XXXXX312]", "[XXXXX313]", "[XXXXX314]", "[XXXXX315]", "[XXXXX316]", "[XXXXX317]", "[XXXXX318]", "[XXXXX319]", "[XXXXX320]", "[XXXXX321]", "[XXXXX322]", "[XXXXX323]", "[XXXXX324]", "[XXXXX325]", "[XXXXX326]", "[XXXXX327]", "[XXXXX328]", "[XXXXX329]", "[XXXXX330]", "[XXXXX331]", "[XXXXX332]", "[XXXXX333]", "[XXXXX334]", "[XXXXX335]", "[XXXXX336]", "[XXXXX337]", "[XXXXX338]", "[XXXXX339]", "[XXXXX340]", "[XXXXX341]", "[XXXXX342]", "[XXXXX343]", "[XXXXX344]", "[XXXXX345]", "[XXXXX346]", "[XXXXX347]", "[XXXXX348]", "[XXXXX349]", "[XXXXX350]", "[XXXXX351]", "[XXXXX352]", "[XXXXX353]", "[XXXXX354]", "[XXXXX355]", "[XXXXX356]", "[XXXXX357]", "[XXXXX358]", "[XXXXX359]", "[XXXXX360]", "[XXXXX361]", "[XXXXX362]", "[XXXXX363]", "[XXXXX364]", "[XXXXX365]", "[XXXXX366]", "[XXXXX367]", "[XXXXX368]", "[XXXXX369]", "[XXXXX370]", "[XXXXX371]", "[XXXXX372]", "[XXXXX373]", "[XXXXX374]", "[XXXXX375]", "[XXXXX376]", "[XXXXX377]", "[XXXXX378]", "[XXXXX379]", "[XXXXX380]", "[XXXXX381]", "[XXXXX382]", "[XXXXX383]", "[XXXXX384]", "[XXXXX385]", "[XXXXX386]", "[XXXXX387]", "[XXXXX388]", "[XXXXX389]", "[XXXXX390]", "[XXXXX391]", "[XXXXX392]", "[XXXXX393]", "[XXXXX394]", "[XXXXX395]", "[XXXXX396]", "[XXXXX397]", "[XXXXX398]", "[XXXXX399]", "[XXXXX400]", "[XXXXX401]", "[XXXXX402]", "[XXXXX403]", "[XXXXX404]", "[XXXXX405]", "[XXXXX406]", "[XXXXX407]", "[XXXXX408]", "[XXXXX409]", "[XXXXX410]", "[XXXXX411]", "[XXXXX412]", "[XXXXX413]", "[XXXXX414]", "[XXXXX415]", "[XXXXX416]", "[XXXXX417]", "[XXXXX418]", "[XXXXX419]", "[XXXXX420]", "[XXXXX421]", "[XXXXX422]", "[XXXXX423]", "[XXXXX424]", "[XXXXX425]", "[XXXXX426]", "[XXXXX427]", "[XXXXX428]", "[XXXXX429]", "[XXXXX430]", "[XXXXX431]", "[XXXXX432]", "[XXXXX433]", "[XXXXX434]", "[XXXXX435]", "[XXXXX436]", "[XXXXX437]", "[XXXXX438]", "[XXXXX439]", "[XXXXX440]", "[XXXXX441]", "[XXXXX442]", "[XXXXX443]", "[XXXXX444]", "[XXXXX445]", "[XXXXX446]", "[XXXXX447]", "[XXXXX448]", "[XXXXX449]", "[XXXXX450]", "[XXXXX451]", "[XXXXX452]", "[XXXXX453]", "[XXXXX454]", "[XXXXX455]", "[XXXXX456]", "[XXXXX457]", "[XXXXX458]", "[XXXXX459]", "[XXXXX460]", "[XXXXX461]", "[XXXXX462]", "[XXXXX463]", "[XXXXX464]", "[XXXXX465]", "[XXXXX466]", "[XXXXX467]", "[XXXXX468]", "[XXXXX469]", "[XXXXX470]", "[XXXXX471]", "[XXXXX472]", "[XXXXX473]", "[XXXXX474]", "[XXXXX475]", "[XXXXX476]", "[XXXXX477]", "[XXXXX478]", "[XXXXX479]", "[XXXXX480]", "[XXXXX481]", "[XXXXX482]", "[XXXXX483]", "[XXXXX484]", "[XXXXX485]", "[XXXXX486]", "[XXXXX487]", "[XXXXX488]", "[XXXXX489]", "[XXXXX490]", "[XXXXX491]", "[XXXXX492]", "[XXXXX493]", "[XXXXX494]", "[XXXXX495]", "[XXXXX496]", "[XXXXX497]", "[XXXXX498]", "[XXXXX499]", "[XXXXX500]", "[XXXXX501]", "[XXXXX502]", "[XXXXX503]", "[XXXXX504]", "[XXXXX505]", "[XXXXX506]", "[XXXXX507]", "[XXXXX508]", "[XXXXX509]", "[XXXXX510]", "[XXXXX511]", "[XXXXX512]", "[XXXXX513]", "[XXXXX514]", "[XXXXX515]", "[XXXXX516]", "[XXXXX517]", "[XXXXX518]", "[XXXXX519]", "[XXXXX520]", "[XXXXX521]", "[XXXXX522]", "[XXXXX523]", "[XXXXX524]", "[XXXXX525]", "[XXXXX526]", "[XXXXX527]", "[XXXXX528]", "[XXXXX529]", "[XXXXX530]", "[XXXXX531]", "[XXXXX532]", "[XXXXX533]", "[XXXXX534]", "[XXXXX535]", "[XXXXX536]", "[XXXXX537]", "[XXXXX538]", "[XXXXX539]", "[XXXXX540]", "[XXXXX541]", "[XXXXX542]", "[XXXXX543]", "[XXXXX544]", "[XXXXX545]", "[XXXXX546]", "[XXXXX547]", "[XXXXX548]", "[XXXXX549]", "[XXXXX550]", "[XXXXX551]", "[XXXXX552]", "[XXXXX553]", "[XXXXX554]", "[XXXXX555]", "[XXXXX556]", "[XXXXX557]", "[XXXXX558]", "[XXXXX559]", "[XXXXX560]", "[XXXXX561]", "[XXXXX562]", "[XXXXX563]", "[XXXXX564]", "[XXXXX565]", "[XXXXX566]", "[XXXXX567]", "[XXXXX568]", "[XXXXX569]", "[XXXXX570]", "[XXXXX571]", "[XXXXX572]", "[XXXXX573]", "[XXXXX574]", "[XXXXX575]", "[XXXXX576]", "[XXXXX577]", "[XXXXX578]", "[XXXXX579]", "[XXXXX580]", "[XXXXX581]", "[XXXXX582]", "[XXXXX583]", "[XXXXX584]", "[XXXXX585]", "[XXXXX586]", "[XXXXX587]", "[XXXXX588]", "[XXXXX589]", "[XXXXX590]", "[XXXXX591]", "[XXXXX592]", "[XXXXX593]", "[XXXXX594]", "[XXXXX595]", "[XXXXX596]", "[XXXXX597]", "[XXXXX598]", "[XXXXX599]", "[XXXXX600]", "[XXXXX601]", "[XXXXX602]", "[XXXXX603]", "[XXXXX604]", "[XXXXX605]", "[XXXXX606]", "[XXXXX607]", "[XXXXX608]", "[XXXXX609]", "[XXXXX610]", "[XXXXX611]", "[XXXXX612]", "[XXXXX613]", "[XXXXX614]", "[XXXXX615]", "[XXXXX616]", "[XXXXX617]", "[XXXXX618]", "[XXXXX619]", "[XXXXX620]", "[XXXXX621]", "[XXXXX622]", "[XXXXX623]", "[XXXXX624]", "[XXXXX625]", "[XXXXX626]", "[XXXXX627]", "[XXXXX628]", "[XXXXX629]", "[XXXXX630]", "[XXXXX631]", "[XXXXX632]", "[XXXXX633]", "[XXXXX634]", "[XXXXX635]", "[XXXXX636]", "[XXXXX637]", "[XXXXX638]", "[XXXXX639]", "[XXXXX640]", "[XXXXX641]", "[XXXXX642]", "[XXXXX643]", "[XXXXX644]", "[XXXXX645]", "[XXXXX646]", "[XXXXX647]", "[XXXXX648]", "[XXXXX649]", "[XXXXX650]", "[XXXXX651]", "[XXXXX652]", "[XXXXX653]", "[XXXXX654]", "[XXXXX655]", "[XXXXX656]", "[XXXXX657]", "[XXXXX658]", "[XXXXX659]", "[XXXXX660]", "[XXXXX661]", "[XXXXX662]", "[XXXXX663]", "[XXXXX664]", "[XXXXX665]", "[XXXXX666]", "[XXXXX667]", "[XXXXX668]", "[XXXXX669]", "[XXXXX670]", "[XXXXX671]", "[XXXXX672]", "[XXXXX673]", "[XXXXX674]", "[XXXXX675]", "[XXXXX676]", "[XXXXX677]", "[XXXXX678]", "[XXXXX679]", "[XXXXX680]", "[XXXXX681]", "[XXXXX682]", "[XXXXX683]", "[XXXXX684]", "[XXXXX685]", "[XXXXX686]", "[XXXXX687]", "[XXXXX688]", "[XXXXX689]", "[XXXXX690]", "[XXXXX691]", "[XXXXX692]", "[XXXXX693]", "[XXXXX694]", "[XXXXX695]", "[XXXXX696]", "[XXXXX697]", "[XXXXX698]", "[XXXXX699]", "[XXXXX700]", "[XXXXX701]", "[XXXXX702]", "[XXXXX703]", "[XXXXX704]", "[XXXXX705]", "[XXXXX706]", "[XXXXX707]", "[XXXXX708]", "[XXXXX709]", "[XXXXX710]", "[XXXXX711]", "[XXXXX712]", "[XXXXX713]", "[XXXXX714]", "[XXXXX715]", "[XXXXX716]", "[XXXXX717]", "[XXXXX718]", "[XXXXX719]", "[XXXXX720]", "[XXXXX721]", "[XXXXX722]", "[XXXXX723]", "[XXXXX724]", "[XXXXX725]", "[XXXXX726]", "[XXXXX727]", "[XXXXX728]", "[XXXXX729]", "[XXXXX730]", "[XXXXX731]", "[XXXXX732]", "[XXXXX733]", "[XXXXX734]", "[XXXXX735]", "[XXXXX736]", "[XXXXX737]", "[XXXXX738]", "[XXXXX739]", "[XXXXX740]", "[XXXXX741]", "[XXXXX742]", "[XXXXX743]", "[XXXXX744]", "[XXXXX745]", "[XXXXX746]", "[XXXXX747]", "[XXXXX748]", "[XXXXX749]", "[XXXXX750]", "[XXXXX751]", "[XXXXX752]", "[XXXXX753]", "[XXXXX754]", "[XXXXX755]", "[XXXXX756]", "[XXXXX757]", "[XXXXX758]", "[XXXXX759]", "[XXXXX760]", "[XXXXX761]", "[XXXXX762]", "[XXXXX763]", "[XXXXX764]", "[XXXXX765]", "[XXXXX766]", "[XXXXX767]", "[XXXXX768]", "[XXXXX769]", "[XXXXX770]", "[XXXXX771]", "[XXXXX772]", "[XXXXX773]", "[XXXXX774]", "[XXXXX775]", "[XXXXX776]", "[XXXXX777]", "[XXXXX778]", "[XXXXX779]", "[XXXXX780]", "[XXXXX781]", "[XXXXX782]", "[XXXXX783]", "[XXXXX784]", "[XXXXX785]", "[XXXXX786]", "[XXXXX787]", "[XXXXX788]", "[XXXXX789]", "[XXXXX790]", "[XXXXX791]", "[XXXXX792]", "[XXXXX793]"]}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d9b4a0fa8b0f910794b7b6bdca6c82c453ad2f9bd767285cfd8c25b983361b7
3
+ size 990686
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "remove_space": true, "keep_accents": true, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "__type": "AddedToken"}, "sp_model_kwargs": {}, "name_or_path": "models/5mb/ctd_latn_5mb", "model_input_names": ["input_ids", "attention_mask"], "special_tokens_map_file": "models/5mb/ctd_latn_5mb/special_tokens_map.json", "tokenizer_class": "AlbertTokenizer"}