goldfish-models commited on
Commit
fdaed79
1 Parent(s): fdf4474

Upload rmc_latn_full tokenizer.

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"[XXXXX42]": 32458, "[XXXXX85]": 32501, "[XXXXX315]": 32731, "[XXXXX243]": 32659, "[XXXXX198]": 32614, "[XXXXX329]": 32745, "[XXXXX141]": 32557, "[XXXXX11]": 32427, "[XXXXX235]": 32651, "[XXXXX49]": 32465, "[XXXXX182]": 32598, "[XXXXX193]": 32609, "[XXXXX161]": 32577, "[XXXXX326]": 32742, "[XXXXX146]": 32562, "[XXXXX286]": 32702, "[XXXXX285]": 32701, "[XXXXX209]": 32625, "[XXXXX87]": 32503, "[XXXXX203]": 32619, "[XXXXX308]": 32724, "[XXXXX313]": 32729, "[XXXXX250]": 32666, "[XXXXX181]": 32597, "[XXXXX296]": 32712, "[XXXXX259]": 32675, "[XXXXX174]": 32590, "[XXXXX1]": 32417, "[XXXXX349]": 32765, "[XXXXX137]": 32553, "[XXXXX149]": 32565, "[XXXXX39]": 32455, "[XXXXX36]": 32452, "[XXXXX70]": 32486, "[XXXXX144]": 32560, "[XXXXX3]": 32419, "[CLS]": 32412, "[XXXXX272]": 32688, "[XXXXX115]": 32531, "[XXXXX273]": 32689, "[XXXXX24]": 32440, "[XXXXX226]": 32642, "[XXXXX102]": 32518, "[XXXXX38]": 32454, "[XXXXX330]": 32746, "[XXXXX204]": 32620, "[XXXXX201]": 32617, "[XXXXX218]": 32634, "[XXXXX138]": 32554, "[XXXXX236]": 32652, "[XXXXX199]": 32615, "[XXXXX22]": 32438, "[XXXXX61]": 32477, "[XXXXX13]": 32429, "[XXXXX93]": 32509, "[XXXXX233]": 32649, "[XXXXX214]": 32630, "[XXXXX191]": 32607, "[XXXXX256]": 32672, "[XXXXX5]": 32421, "[XXXXX317]": 32733, "[XXXXX232]": 32648, "[XXXXX172]": 32588, "[XXXXX104]": 32520, "[XXXXX0]": 32416, "[XXXXX234]": 32650, "[XXXXX341]": 32757, "[XXXXX271]": 32687, "[XXXXX9]": 32425, "[XXXXX338]": 32754, "[XXXXX311]": 32727, "[XXXXX44]": 32460, "[XXXXX40]": 32456, "[XXXXX282]": 32698, "[XXXXX65]": 32481, "[XXXXX292]": 32708, "[XXXXX110]": 32526, "[XXXXX331]": 32747, "[XXXXX225]": 32641, "[XXXXX206]": 32622, "[XXXXX127]": 32543, "[XXXXX347]": 32763, "[XXXXX74]": 32490, "[XXXXX223]": 32639, "[XXXXX111]": 32527, "[XXXXX134]": 32550, "[XXXXX197]": 32613, "[XXXXX152]": 32568, "[XXXXX288]": 32704, "[XXXXX103]": 32519, "[XXXXX142]": 32558, "[XXXXX301]": 32717, "[XXXXX78]": 32494, "[XXXXX340]": 32756, "[XXXXX328]": 32744, "[XXXXX7]": 32423, "[XXXXX81]": 32497, "[XXXXX62]": 32478, "[XXXXX155]": 32571, "[XXXXX133]": 32549, "[XXXXX105]": 32521, "[XXXXX17]": 32433, "[XXXXX59]": 32475, "[XXXXX23]": 32439, "[XXXXX12]": 32428, "[XXXXX26]": 32442, "[XXXXX52]": 32468, "[XXXXX268]": 32684, "[XXXXX68]": 32484, "[XXXXX123]": 32539, "[XXXXX208]": 32624, "[XXXXX207]": 32623, "[XXXXX113]": 32529, "[XXXXX333]": 32749, "[XXXXX97]": 32513, "[XXXXX284]": 32700, "[XXXXX342]": 32758, "[XXXXX345]": 32761, "[XXXXX295]": 32711, "[XXXXX153]": 32569, "[XXXXX257]": 32673, "[XXXXX260]": 32676, "[XXXXX121]": 32537, "[XXXXX319]": 32735, "[XXXXX276]": 32692, "[XXXXX82]": 32498, "[XXXXX16]": 32432, "[SEP]": 32413, "[XXXXX98]": 32514, "[XXXXX66]": 32482, "[XXXXX280]": 32696, "[XXXXX41]": 32457, "[XXXXX215]": 32631, "[XXXXX293]": 32709, "[XXXXX124]": 32540, "[XXXXX129]": 32545, "[XXXXX58]": 32474, "[XXXXX50]": 32466, "[XXXXX281]": 32697, "[XXXXX109]": 32525, "[XXXXX176]": 32592, "[XXXXX241]": 32657, "[XXXXX55]": 32471, "[XXXXX99]": 32515, "[XXXXX20]": 32436, "[XXXXX30]": 32446, "[XXXXX183]": 32599, "[XXXXX189]": 32605, "[XXXXX8]": 32424, "[XXXXX217]": 32633, "[XXXXX140]": 32556, "[XXXXX322]": 32738, "[XXXXX135]": 32551, "[XXXXX54]": 32470, "[XXXXX255]": 32671, "[XXXXX73]": 32489, "[XXXXX195]": 32611, "[XXXXX303]": 32719, "[XXXXX48]": 32464, "[XXXXX324]": 32740, "[XXXXX247]": 32663, "[XXXXX148]": 32564, "[XXXXX157]": 32573, "[XXXXX242]": 32658, "[XXXXX304]": 32720, "[XXXXX169]": 32585, "[XXXXX19]": 32435, "[XXXXX114]": 32530, "[XXXXX287]": 32703, "[XXXXX212]": 32628, "[XXXXX279]": 32695, "[XXXXX194]": 32610, "[XXXXX300]": 32716, "[XXXXX187]": 32603, "[XXXXX297]": 32713, "[XXXXX64]": 32480, "[XXXXX53]": 32469, "[XXXXX202]": 32618, "[XXXXX239]": 32655, "[XXXXX316]": 32732, "[XXXXX45]": 32461, "[XXXXX240]": 32656, "[XXXXX220]": 32636, "[XXXXX246]": 32662, "[XXXXX139]": 32555, "[XXXXX245]": 32661, "[XXXXX171]": 32587, "[XXXXX339]": 32755, "[XXXXX283]": 32699, "[XXXXX244]": 32660, "[XXXXX230]": 32646, "[XXXXX180]": 32596, "[XXXXX86]": 32502, "[XXXXX200]": 32616, "[XXXXX2]": 32418, "[XXXXX274]": 32690, "[XXXXX95]": 32511, "[XXXXX14]": 32430, "[XXXXX76]": 32492, "[XXXXX132]": 32548, "[XXXXX130]": 32546, "[XXXXX270]": 32686, "[XXXXX350]": 32766, "[XXXXX323]": 32739, "[XXXXX32]": 32448, "[XXXXX131]": 32547, "[XXXXX21]": 32437, "[XXXXX254]": 32670, "[XXXXX249]": 32665, "[XXXXX332]": 32748, "[XXXXX277]": 32693, "[XXXXX46]": 32462, "[XXXXX4]": 32420, "[XXXXX221]": 32637, "[XXXXX310]": 32726, "[XXXXX178]": 32594, "[XXXXX186]": 32602, "[XXXXX253]": 32669, "[XXXXX184]": 32600, "[XXXXX264]": 32680, "[XXXXX278]": 32694, "[XXXXX75]": 32491, "[XXXXX344]": 32760, "[XXXXX227]": 32643, "[XXXXX346]": 32762, "[XXXXX77]": 32493, "[XXXXX147]": 32563, "[XXXXX164]": 32580, "[XXXXX167]": 32583, "[XXXXX125]": 32541, "[XXXXX37]": 32453, "[XXXXX289]": 32705, "[XXXXX188]": 32604, "[XXXXX219]": 32635, "[XXXXX224]": 32640, "[XXXXX177]": 32593, "[XXXXX185]": 32601, "[XXXXX343]": 32759, "[XXXXX314]": 32730, "[XXXXX63]": 32479, "[XXXXX80]": 32496, "[XXXXX263]": 32679, "[XXXXX96]": 32512, "[XXXXX119]": 32535, "[XXXXX205]": 32621, "[XXXXX159]": 32575, "[XXXXX18]": 32434, "[XXXXX305]": 32721, "[XXXXX154]": 32570, "[XXXXX28]": 32444, "[XXXXX56]": 32472, "[XXXXX248]": 32664, "[XXXXX71]": 32487, "[XXXXX192]": 32608, "[XXXXX116]": 32532, "[XXXXX262]": 32678, "[XXXXX291]": 32707, "[XXXXX89]": 32505, "[XXXXX231]": 32647, "[XXXXX258]": 32674, "[XXXXX33]": 32449, "[XXXXX60]": 32476, "[XXXXX100]": 32516, "[XXXXX51]": 32467, "[XXXXX91]": 32507, "[XXXXX88]": 32504, "[XXXXX35]": 32451, "[XXXXX238]": 32654, "[XXXXX163]": 32579, "[XXXXX325]": 32741, "[XXXXX31]": 32447, "[XXXXX106]": 32522, "[XXXXX69]": 32485, "[XXXXX307]": 32723, "[XXXXX112]": 32528, "[XXXXX190]": 32606, "[XXXXX47]": 32463, "[XXXXX251]": 32667, "[XXXXX117]": 32533, "[XXXXX210]": 32626, "[XXXXX294]": 32710, "[XXXXX320]": 32736, "[XXXXX57]": 32473, "[XXXXX175]": 32591, "[XXXXX122]": 32538, "[XXXXX72]": 32488, "[XXXXX266]": 32682, "[XXXXX335]": 32751, "[XXXXX290]": 32706, "[XXXXX145]": 32561, "[XXXXX348]": 32764, "[XXXXX120]": 32536, "[XXXXX170]": 32586, "[XXXXX337]": 32753, "[XXXXX318]": 32734, "[XXXXX229]": 32645, "[XXXXX213]": 32629, "[XXXXX151]": 32567, "[XXXXX143]": 32559, "[XXXXX160]": 32576, "[XXXXX166]": 32582, "[XXXXX92]": 32508, "[XXXXX299]": 32715, "[XXXXX269]": 32685, "<pad>": 32414, "[XXXXX27]": 32443, "[XXXXX211]": 32627, "[XXXXX309]": 32725, "[XXXXX351]": 32767, "[XXXXX126]": 32542, "[XXXXX237]": 32653, "[XXXXX267]": 32683, "[XXXXX34]": 32450, "[XXXXX29]": 32445, "[XXXXX136]": 32552, "[XXXXX252]": 32668, "[XXXXX158]": 32574, "[XXXXX79]": 32495, "[XXXXX216]": 32632, "[XXXXX43]": 32459, "[XXXXX168]": 32584, "[XXXXX156]": 32572, "[XXXXX173]": 32589, "[XXXXX327]": 32743, "[XXXXX302]": 32718, "[XXXXX67]": 32483, "[XXXXX306]": 32722, "[XXXXX321]": 32737, "[XXXXX118]": 32534, "[XXXXX10]": 32426, "[XXXXX165]": 32581, "[XXXXX94]": 32510, "[XXXXX84]": 32500, "[XXXXX275]": 32691, "[XXXXX228]": 32644, "[XXXXX108]": 32524, "[XXXXX128]": 32544, "[XXXXX179]": 32595, "[XXXXX162]": 32578, "[XXXXX261]": 32677, "[XXXXX83]": 32499, "[XXXXX312]": 32728, "[XXXXX90]": 32506, "[XXXXX334]": 32750, "[XXXXX150]": 32566, "[XXXXX265]": 32681, "[XXXXX6]": 32422, "[XXXXX107]": 32523, "[XXXXX196]": 32612, "[MASK]": 32415, "[XXXXX336]": 32752, "[XXXXX25]": 32441, "[XXXXX298]": 32714, "[XXXXX101]": 32517, "[XXXXX15]": 32431, "[XXXXX222]": 32638}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["[XXXXX0]", "[XXXXX1]", "[XXXXX2]", "[XXXXX3]", "[XXXXX4]", "[XXXXX5]", "[XXXXX6]", "[XXXXX7]", "[XXXXX8]", "[XXXXX9]", "[XXXXX10]", "[XXXXX11]", "[XXXXX12]", "[XXXXX13]", "[XXXXX14]", "[XXXXX15]", "[XXXXX16]", "[XXXXX17]", "[XXXXX18]", "[XXXXX19]", "[XXXXX20]", "[XXXXX21]", "[XXXXX22]", "[XXXXX23]", "[XXXXX24]", "[XXXXX25]", "[XXXXX26]", "[XXXXX27]", "[XXXXX28]", "[XXXXX29]", "[XXXXX30]", "[XXXXX31]", "[XXXXX32]", "[XXXXX33]", "[XXXXX34]", "[XXXXX35]", "[XXXXX36]", "[XXXXX37]", "[XXXXX38]", "[XXXXX39]", "[XXXXX40]", "[XXXXX41]", "[XXXXX42]", "[XXXXX43]", "[XXXXX44]", "[XXXXX45]", "[XXXXX46]", "[XXXXX47]", "[XXXXX48]", "[XXXXX49]", "[XXXXX50]", "[XXXXX51]", "[XXXXX52]", "[XXXXX53]", "[XXXXX54]", "[XXXXX55]", "[XXXXX56]", "[XXXXX57]", "[XXXXX58]", "[XXXXX59]", "[XXXXX60]", "[XXXXX61]", "[XXXXX62]", "[XXXXX63]", "[XXXXX64]", "[XXXXX65]", "[XXXXX66]", "[XXXXX67]", "[XXXXX68]", "[XXXXX69]", "[XXXXX70]", "[XXXXX71]", "[XXXXX72]", "[XXXXX73]", "[XXXXX74]", "[XXXXX75]", "[XXXXX76]", "[XXXXX77]", "[XXXXX78]", "[XXXXX79]", "[XXXXX80]", "[XXXXX81]", "[XXXXX82]", "[XXXXX83]", "[XXXXX84]", "[XXXXX85]", "[XXXXX86]", "[XXXXX87]", "[XXXXX88]", "[XXXXX89]", "[XXXXX90]", "[XXXXX91]", "[XXXXX92]", "[XXXXX93]", "[XXXXX94]", "[XXXXX95]", "[XXXXX96]", "[XXXXX97]", "[XXXXX98]", "[XXXXX99]", "[XXXXX100]", "[XXXXX101]", "[XXXXX102]", "[XXXXX103]", "[XXXXX104]", "[XXXXX105]", "[XXXXX106]", "[XXXXX107]", "[XXXXX108]", "[XXXXX109]", "[XXXXX110]", "[XXXXX111]", "[XXXXX112]", "[XXXXX113]", "[XXXXX114]", "[XXXXX115]", "[XXXXX116]", "[XXXXX117]", "[XXXXX118]", "[XXXXX119]", "[XXXXX120]", "[XXXXX121]", "[XXXXX122]", "[XXXXX123]", "[XXXXX124]", "[XXXXX125]", "[XXXXX126]", "[XXXXX127]", "[XXXXX128]", "[XXXXX129]", "[XXXXX130]", "[XXXXX131]", "[XXXXX132]", "[XXXXX133]", "[XXXXX134]", "[XXXXX135]", "[XXXXX136]", "[XXXXX137]", "[XXXXX138]", "[XXXXX139]", "[XXXXX140]", "[XXXXX141]", "[XXXXX142]", "[XXXXX143]", "[XXXXX144]", "[XXXXX145]", "[XXXXX146]", "[XXXXX147]", "[XXXXX148]", "[XXXXX149]", "[XXXXX150]", "[XXXXX151]", "[XXXXX152]", "[XXXXX153]", "[XXXXX154]", "[XXXXX155]", "[XXXXX156]", "[XXXXX157]", "[XXXXX158]", "[XXXXX159]", "[XXXXX160]", "[XXXXX161]", "[XXXXX162]", "[XXXXX163]", "[XXXXX164]", "[XXXXX165]", "[XXXXX166]", "[XXXXX167]", "[XXXXX168]", "[XXXXX169]", "[XXXXX170]", "[XXXXX171]", "[XXXXX172]", "[XXXXX173]", "[XXXXX174]", "[XXXXX175]", "[XXXXX176]", "[XXXXX177]", "[XXXXX178]", "[XXXXX179]", "[XXXXX180]", "[XXXXX181]", "[XXXXX182]", "[XXXXX183]", "[XXXXX184]", "[XXXXX185]", "[XXXXX186]", "[XXXXX187]", "[XXXXX188]", "[XXXXX189]", "[XXXXX190]", "[XXXXX191]", "[XXXXX192]", "[XXXXX193]", "[XXXXX194]", "[XXXXX195]", "[XXXXX196]", "[XXXXX197]", "[XXXXX198]", "[XXXXX199]", "[XXXXX200]", "[XXXXX201]", "[XXXXX202]", "[XXXXX203]", "[XXXXX204]", "[XXXXX205]", "[XXXXX206]", "[XXXXX207]", "[XXXXX208]", "[XXXXX209]", "[XXXXX210]", "[XXXXX211]", "[XXXXX212]", "[XXXXX213]", "[XXXXX214]", "[XXXXX215]", "[XXXXX216]", "[XXXXX217]", "[XXXXX218]", "[XXXXX219]", "[XXXXX220]", "[XXXXX221]", "[XXXXX222]", "[XXXXX223]", "[XXXXX224]", "[XXXXX225]", "[XXXXX226]", "[XXXXX227]", "[XXXXX228]", "[XXXXX229]", "[XXXXX230]", "[XXXXX231]", "[XXXXX232]", "[XXXXX233]", "[XXXXX234]", "[XXXXX235]", "[XXXXX236]", "[XXXXX237]", "[XXXXX238]", "[XXXXX239]", "[XXXXX240]", "[XXXXX241]", "[XXXXX242]", "[XXXXX243]", "[XXXXX244]", "[XXXXX245]", "[XXXXX246]", "[XXXXX247]", "[XXXXX248]", "[XXXXX249]", "[XXXXX250]", "[XXXXX251]", "[XXXXX252]", "[XXXXX253]", "[XXXXX254]", "[XXXXX255]", "[XXXXX256]", "[XXXXX257]", "[XXXXX258]", "[XXXXX259]", "[XXXXX260]", "[XXXXX261]", "[XXXXX262]", "[XXXXX263]", "[XXXXX264]", "[XXXXX265]", "[XXXXX266]", "[XXXXX267]", "[XXXXX268]", "[XXXXX269]", "[XXXXX270]", "[XXXXX271]", "[XXXXX272]", "[XXXXX273]", "[XXXXX274]", "[XXXXX275]", "[XXXXX276]", "[XXXXX277]", "[XXXXX278]", "[XXXXX279]", "[XXXXX280]", "[XXXXX281]", "[XXXXX282]", "[XXXXX283]", "[XXXXX284]", "[XXXXX285]", "[XXXXX286]", "[XXXXX287]", "[XXXXX288]", "[XXXXX289]", "[XXXXX290]", "[XXXXX291]", "[XXXXX292]", "[XXXXX293]", "[XXXXX294]", "[XXXXX295]", "[XXXXX296]", "[XXXXX297]", "[XXXXX298]", "[XXXXX299]", "[XXXXX300]", "[XXXXX301]", "[XXXXX302]", "[XXXXX303]", "[XXXXX304]", "[XXXXX305]", "[XXXXX306]", "[XXXXX307]", "[XXXXX308]", "[XXXXX309]", "[XXXXX310]", "[XXXXX311]", "[XXXXX312]", "[XXXXX313]", "[XXXXX314]", "[XXXXX315]", "[XXXXX316]", "[XXXXX317]", "[XXXXX318]", "[XXXXX319]", "[XXXXX320]", "[XXXXX321]", "[XXXXX322]", "[XXXXX323]", "[XXXXX324]", "[XXXXX325]", "[XXXXX326]", "[XXXXX327]", "[XXXXX328]", "[XXXXX329]", "[XXXXX330]", "[XXXXX331]", "[XXXXX332]", "[XXXXX333]", "[XXXXX334]", "[XXXXX335]", "[XXXXX336]", "[XXXXX337]", "[XXXXX338]", "[XXXXX339]", "[XXXXX340]", "[XXXXX341]", "[XXXXX342]", "[XXXXX343]", "[XXXXX344]", "[XXXXX345]", "[XXXXX346]", "[XXXXX347]", "[XXXXX348]", "[XXXXX349]", "[XXXXX350]", "[XXXXX351]"]}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:703e0bcc3bab1b00c2b08916da3528a5518a232599fc31e122751a4a099c794f
3
+ size 823202
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "remove_space": true, "keep_accents": true, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "__type": "AddedToken"}, "sp_model_kwargs": {}, "name_or_path": "models/full/rmc_latn_full", "model_input_names": ["input_ids", "attention_mask"], "special_tokens_map_file": "models/full/rmc_latn_full/special_tokens_map.json", "tokenizer_class": "AlbertTokenizer"}