Huertas97 commited on
Commit
eb15e8b
1 Parent(s): c7347bd

Update spaCy pipeline

Browse files
Files changed (30) hide show
  1. .gitattributes +3 -0
  2. .ipynb_checkpoints/XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2_test_IT-checkpoint.csv +6 -0
  3. README.md +56 -0
  4. XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2_test_DE.csv +6 -0
  5. XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2_test_EN.csv +6 -0
  6. XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2_test_ES.csv +6 -0
  7. XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2_test_FR.csv +6 -0
  8. XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2_test_IT.csv +6 -0
  9. config.cfg +150 -0
  10. meta.json +72 -0
  11. ner/cfg +13 -0
  12. ner/model +0 -0
  13. ner/moves +1 -0
  14. plots/.ipynb_checkpoints/TEST_XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_News Commentary_confusion_matrix-checkpoint.pdf +0 -0
  15. plots/.ipynb_checkpoints/TEST_XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_Overall_confusion_matrix-checkpoint.pdf +269 -1
  16. plots/.ipynb_checkpoints/TEST_XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_ParaCrawl_confusion_matrix-checkpoint.pdf +0 -0
  17. plots/TEST_XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_News Commentary_confusion_matrix.pdf +0 -0
  18. plots/TEST_XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_Overall_confusion_matrix.pdf +0 -0
  19. plots/TEST_XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_ParaCrawl_confusion_matrix.pdf +0 -0
  20. plots/TEST_XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_TED2020_confusion_matrix.pdf +0 -0
  21. plots/TEST_XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_WikiMatrix_confusion_matrix.pdf +0 -0
  22. tokenizer +3 -0
  23. transformer/cfg +3 -0
  24. transformer/model +3 -0
  25. vocab/key2row +1 -0
  26. vocab/lookups.bin +3 -0
  27. vocab/strings.json +3 -0
  28. vocab/vectors +0 -0
  29. vocab/vectors.cfg +3 -0
  30. xx_pipeline-any-py3-none-any.whl +3 -0
.gitattributes CHANGED
@@ -32,3 +32,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ xx_pipeline-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
36
+ vocab/strings.json filter=lfs diff=lfs merge=lfs -text
37
+ transformer/model filter=lfs diff=lfs merge=lfs -text
.ipynb_checkpoints/XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2_test_IT-checkpoint.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Model,Dataset,lang,P-INV_CAM,R-INV_CAM,F1-INV_CAM,TP-INV_CAM,FN-INV_CAM,FP-INV_CAM,P-LEETSPEAK,R-LEETSPEAK,F1-LEETSPEAK,TP-LEETSPEAK,FN-LEETSPEAK,FP-LEETSPEAK,P-MIX,R-MIX,F1-MIX,TP-MIX,FN-MIX,FP-MIX,P-O,R-O,F1-O,TP-O,FN-O,FP-O,P-PUNCT_CAM,R-PUNCT_CAM,F1-PUNCT_CAM,TP-PUNCT_CAM,FN-PUNCT_CAM,FP-PUNCT_CAM,F1-Macro,F1-Micro,F1-Weighted
2
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,overall,IT,0.9056304520222046,0.9217110573042776,0.9136,1142,119,97,0.9893648018648019,0.976279470960322,0.9827785817655571,6791,73,165,0.9748310810810811,0.8860740058344849,0.9283358803185072,5771,149,742,0.9983891261247522,0.9980942035200061,0.9982416430392413,104743,169,200,0.8497516556291391,0.9922667955534075,0.9154960981047937,4106,726,32,0.9476904406456198,0.990015267915566,0.8958518228704071
3
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,WikiMatrix,IT,0.9264305177111717,0.947075208913649,0.9366391184573003,340,27,19,0.9895201323772752,0.980327868852459,0.984902552841065,1794,19,36,0.9759181216134859,0.8724434876210979,0.92128445581131,1621,40,237,0.9989164506017569,0.9981053282808754,0.9985107247161673,25813,28,49,0.8376852505292872,0.9974789915966387,0.9106252397391639,1187,230,3,0.9503924183130014,0.9889385510788128,0.9003312569168942
4
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,TED2020,IT,0.9192708333333334,0.9514824797843666,0.9350993377483445,353,31,18,0.9943705220061413,0.9724724724724725,0.9832995951417004,1943,11,55,0.9665450121654501,0.902328222600795,0.9333333333333333,1589,55,172,0.9986299081035923,0.9983296585822142,0.9984797607711454,29884,41,50,0.8722466960352423,0.9858921161825727,0.925594078691079,1188,174,17,0.9551612211371205,0.9911537043862882,0.9087880971884397
5
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,OPUS_ParaCrawl,IT,0.8801571709233792,0.8818897637795275,0.8810226155358898,448,61,60,0.9857992073976222,0.9761281883584042,0.980939861978311,2985,43,73,0.9796623177283192,0.8870743571924948,0.9310722100656456,2553,53,325,0.9979772544324961,0.9979978533685601,0.9979875537942351,48351,98,97,0.8470993117010817,0.9936562860438293,0.9145435244161357,1723,311,11,0.9411131531580434,0.9900045915303924,0.8850525078969678
6
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,OPUS_News_Commentary,IT,1.0,1.0,1.0,1,0,0,1.0,1.0,1.0,27,0,0,1.0,0.5333333333333333,0.6956521739130436,8,0,7,1.0,1.0,1.0,373,0,0,0.5333333333333333,1.0,0.6956521739130436,8,7,0,0.8782608695652174,0.9834905660377359,0.8133333333333332
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - spacy
4
+ - token-classification
5
+ language:
6
+ - multilingual
7
+ model-index:
8
+ - name: xx_pipeline
9
+ results:
10
+ - task:
11
+ name: NER
12
+ type: token-classification
13
+ metrics:
14
+ - name: NER Precision
15
+ type: precision
16
+ value: 0.9200034895
17
+ - name: NER Recall
18
+ type: recall
19
+ value: 0.918641115
20
+ - name: NER F Score
21
+ type: f_score
22
+ value: 0.9193217975
23
+ ---
24
+ | Feature | Description |
25
+ | --- | --- |
26
+ | **Name** | `xx_pipeline` |
27
+ | **Version** | `0.0.0` |
28
+ | **spaCy** | `>=3.4.3,<3.5.0` |
29
+ | **Default Pipeline** | `transformer`, `ner` |
30
+ | **Components** | `transformer`, `ner` |
31
+ | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
32
+ | **Sources** | n/a |
33
+ | **License** | n/a |
34
+ | **Author** | [n/a]() |
35
+
36
+ ### Label Scheme
37
+
38
+ <details>
39
+
40
+ <summary>View label scheme (4 labels for 1 components)</summary>
41
+
42
+ | Component | Labels |
43
+ | --- | --- |
44
+ | **`ner`** | `INV_CAMO`, `LEETSPEAK`, `MIX`, `PUNCT_CAMO` |
45
+
46
+ </details>
47
+
48
+ ### Accuracy
49
+
50
+ | Type | Score |
51
+ | --- | --- |
52
+ | `ENTS_F` | 91.93 |
53
+ | `ENTS_P` | 92.00 |
54
+ | `ENTS_R` | 91.86 |
55
+ | `TRANSFORMER_LOSS` | 382037.26 |
56
+ | `NER_LOSS` | 320041.67 |
XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2_test_DE.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Model,Dataset,lang,P-INV_CAM,R-INV_CAM,F1-INV_CAM,TP-INV_CAM,FN-INV_CAM,FP-INV_CAM,P-LEETSPEAK,R-LEETSPEAK,F1-LEETSPEAK,TP-LEETSPEAK,FN-LEETSPEAK,FP-LEETSPEAK,P-MIX,R-MIX,F1-MIX,TP-MIX,FN-MIX,FP-MIX,P-O,R-O,F1-O,TP-O,FN-O,FP-O,P-PUNCT_CAM,R-PUNCT_CAM,F1-PUNCT_CAM,TP-PUNCT_CAM,FN-PUNCT_CAM,FP-PUNCT_CAM,F1-Macro,F1-Micro,F1-Weighted
2
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,overall,DE,0.9593908629441624,0.9402985074626866,0.949748743718593,1134,48,72,0.9802671523983,0.9236270022883295,0.9511045655375553,6458,130,534,0.9136866059817945,0.9160691003911343,0.9148763020833333,5621,531,515,0.9988433462421604,0.9983215872988688,0.9985823986158043,116581,135,196,0.9015554242961212,0.9941380807642206,0.9455859576664946,4579,500,27,0.9519795935243561,0.9900970401644599,0.9058705176113871
3
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,WikiMatrix,DE,0.9575971731448764,0.9377162629757786,0.9475524475524476,271,12,18,0.9799291617473436,0.9211986681465039,0.9496567505720824,1660,34,142,0.9075682382133995,0.8760479041916168,0.8915295551492992,1463,149,207,0.9987062309586411,0.9982063154382013,0.9984562106229398,23930,31,43,0.8513097072419107,0.9919210053859964,0.9162520729684909,1105,193,9,0.940689407373052,0.985475596228508,0.8848545038575294
4
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,TED2020,DE,0.9610778443113772,0.9610778443113772,0.9610778443113772,321,13,13,0.9796027911969941,0.9138708062093139,0.9455958549222798,1825,38,172,0.8917618761274805,0.9362373737373737,0.9134585771481367,1483,180,101,0.9995092907615808,0.9984314238096794,0.9989700665369714,30553,15,48,0.928937728937729,0.9929522317932654,0.9598788796366389,1268,97,9,0.9557962445110808,0.990417120666052,0.9109428130752214
5
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,OPUS_ParaCrawl,DE,0.9597069597069597,0.9274336283185841,0.9432943294329433,524,22,41,0.9805858310626703,0.9314137819475897,0.955367512858802,2879,57,212,0.9301907160849227,0.9275206315034087,0.9288537549407114,2585,194,202,0.9985411140583554,0.9982928648379878,0.9984169740168249,60232,88,103,0.9117773019271949,0.9957904583723106,0.9519338251732619,2129,206,9,0.9555732792845086,0.9917725927215741,0.9142058342563086
6
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,OPUS_News_Commentary,DE,0.9473684210526315,1.0,0.972972972972973,18,1,0,0.9894736842105263,0.9215686274509803,0.9543147208121827,94,1,8,0.9183673469387755,0.9473684210526315,0.9326424870466321,90,8,5,0.9994643813604713,0.9989293361884368,0.9991967871485944,1866,1,2,0.9506172839506173,1.0,0.9746835443037974,77,4,0,0.966762102456836,0.9930555555555556,0.929550840124867
XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2_test_EN.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Model,Dataset,lang,P-INV_CAM,R-INV_CAM,F1-INV_CAM,TP-INV_CAM,FN-INV_CAM,FP-INV_CAM,P-LEETSPEAK,R-LEETSPEAK,F1-LEETSPEAK,TP-LEETSPEAK,FN-LEETSPEAK,FP-LEETSPEAK,P-MIX,R-MIX,F1-MIX,TP-MIX,FN-MIX,FP-MIX,P-O,R-O,F1-O,TP-O,FN-O,FP-O,P-PUNCT_CAM,R-PUNCT_CAM,F1-PUNCT_CAM,TP-PUNCT_CAM,FN-PUNCT_CAM,FP-PUNCT_CAM,F1-Macro,F1-Micro,F1-Weighted
2
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,overall,EN,0.9295154185022027,0.8991477272727273,0.9140794223826715,633,48,71,0.9817336965229931,0.862836220250669,0.9184530055463949,6127,114,974,0.8228949858088931,0.8135054246165357,0.8181732668610666,4349,936,997,0.9981404078937078,0.998669773426768,0.998405020491358,114865,214,153,0.7638535841382816,0.9849229760734185,0.8604151753758054,3005,929,46,0.9019051781314593,0.9829218106995885,0.8156754939669992
3
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,WikiMatrix,EN,0.9289099526066351,0.8949771689497716,0.9116279069767441,196,15,23,0.985838779956427,0.8614945264159923,0.9194818389636779,1810,26,291,0.8194186765615337,0.8312421580928482,0.8252880722516349,1325,292,269,0.9980703176257189,0.9986484399135002,0.9983592950759551,25861,50,35,0.8004846526655897,0.9880358923230309,0.8844265952699687,991,247,12,0.9078367417075961,0.9795540843150619,0.8267880489582179
4
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,TED2020,EN,0.9701492537313433,0.9701492537313433,0.9701492537313433,195,6,6,0.9912998976458547,0.8658918194009835,0.9243617275113338,1937,17,300,0.8032890575585073,0.8540685944855414,0.8279009126466753,1270,311,217,0.9993485727821865,0.9994669509594882,0.9994077583654131,33750,22,18,0.8263157894736842,0.9863874345549738,0.8992840095465393,942,198,13,0.924220732360261,0.9856654936866073,0.8528804595032835
5
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,OPUS_ParaCrawl,EN,0.8992248062015504,0.8467153284671532,0.8721804511278195,232,26,42,0.9699279966116052,0.8612260248213615,0.9123505976095618,2290,71,369,0.8416873449131513,0.7702089009990918,0.8043632914394118,1696,319,506,0.9973565657693882,0.9981555659059153,0.9977559058784092,53576,142,99,0.6821499668214996,0.9799809342230696,0.8043818466353678,1028,479,21,0.8782064185381142,0.9826759551612957,0.7780101082718855
6
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,OPUS_News_Commentary,EN,0.9090909090909091,1.0,0.9523809523809523,10,1,0,1.0,0.8653846153846154,0.9278350515463918,90,0,14,0.8055555555555556,0.9206349206349206,0.8592592592592592,58,14,5,1.0,0.9994044073853484,0.9997021149836164,1678,0,1,0.8979591836734694,1.0,0.9462365591397849,44,5,0,0.9370827874620009,0.9894736842105263,0.8670401820993752
XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2_test_ES.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Model,Dataset,lang,P-INV_CAM,R-INV_CAM,F1-INV_CAM,TP-INV_CAM,FN-INV_CAM,FP-INV_CAM,P-LEETSPEAK,R-LEETSPEAK,F1-LEETSPEAK,TP-LEETSPEAK,FN-LEETSPEAK,FP-LEETSPEAK,P-MIX,R-MIX,F1-MIX,TP-MIX,FN-MIX,FP-MIX,P-O,R-O,F1-O,TP-O,FN-O,FP-O,P-PUNCT_CAM,R-PUNCT_CAM,F1-PUNCT_CAM,TP-PUNCT_CAM,FN-PUNCT_CAM,FP-PUNCT_CAM,F1-Macro,F1-Micro,F1-Weighted
2
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,overall,ES,0.9283429302623161,0.9283429302623161,0.9283429302623161,1451,112,112,0.9885572726200307,0.9713689579228005,0.9798877455565949,8380,97,247,0.9657499363381716,0.8870307566366507,0.9247180737580005,7585,269,966,0.9987828034471007,0.9986647193824327,0.9987237579243505,143598,175,192,0.8546575992408667,0.9899248946693534,0.9173315226616873,5404,919,55,0.94980080603259,0.9906423001369129,0.90101092918534
3
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,WikiMatrix,ES,0.9293785310734464,0.9138888888888889,0.9215686274509804,329,25,31,0.9903948772678762,0.9631551634665283,0.9765851091817943,1856,18,71,0.9575087310826542,0.8496900826446281,0.9003831417624522,1645,73,291,0.998149316508938,0.9982752818441865,0.9982122952026415,23731,44,41,0.8016759776536313,0.9913644214162349,0.8864864864864865,1148,284,10,0.936647132016871,0.9847700065173396,0.8785711094177607
4
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,TED2020,ES,0.9333333333333333,0.9357326478149101,0.9345314505776636,364,26,25,0.9919393077287814,0.9712163416898792,0.981468449448745,2092,17,62,0.964984552008239,0.9141463414634147,0.9388777555110221,1874,68,176,0.9989075131878765,0.9987516384745022,0.99882956974984,32002,35,40,0.8821917808219178,0.9884881043745203,0.932319942091929,1288,172,15,0.9572054334758399,0.9916179028941958,0.9144024740191629
5
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,OPUS_ParaCrawl,ES,0.9172932330827067,0.9277566539923955,0.9224952741020794,488,44,38,0.9884856943475226,0.9755509641873278,0.9819757365684576,2833,33,71,0.9742457689477557,0.8871021775544389,0.9286340522532,2648,70,337,0.9987801158889905,0.9987801158889905,0.9987801158889905,55675,68,68,0.8574640287769785,0.9906493506493507,0.9192576524463727,1907,317,18,0.9502285662518201,0.9916982663108781,0.901475505326671
6
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,OPUS_News_Commentary,ES,0.9407665505226481,0.9375,0.9391304347826087,270,17,18,0.9821867321867321,0.9738124238733252,0.9779816513761468,1599,29,43,0.9596774193548387,0.9037974683544304,0.9308996088657105,1428,60,152,0.9994410979320624,0.9986039152421431,0.999022331196946,32188,18,45,0.8790389395194698,0.9888164026095061,0.9307017543859649,1061,146,12,0.9555471561214753,0.9926662320730117,0.9108026989948275
XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2_test_FR.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Model,Dataset,lang,P-INV_CAM,R-INV_CAM,F1-INV_CAM,TP-INV_CAM,FN-INV_CAM,FP-INV_CAM,P-LEETSPEAK,R-LEETSPEAK,F1-LEETSPEAK,TP-LEETSPEAK,FN-LEETSPEAK,FP-LEETSPEAK,P-MIX,R-MIX,F1-MIX,TP-MIX,FN-MIX,FP-MIX,P-O,R-O,F1-O,TP-O,FN-O,FP-O,P-PUNCT_CAM,R-PUNCT_CAM,F1-PUNCT_CAM,TP-PUNCT_CAM,FN-PUNCT_CAM,FP-PUNCT_CAM,F1-Macro,F1-Micro,F1-Weighted
2
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,overall,FR,0.9451152579582875,0.9399563318777293,0.9425287356321839,861,50,55,0.9893436293436293,0.8920763124912965,0.9381956649091974,6406,69,775,0.864655020292924,0.891395306530835,0.8778215693299893,4900,767,597,0.9990071704357418,0.9991961288085368,0.9991016406877965,126784,126,102,0.8773920783266578,0.991450842343475,0.9309408570416716,3943,551,34,0.9377176935201679,0.9891801712620365,0.8785837713017278
3
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,WikiMatrix,FR,0.9422222222222222,0.9592760180995475,0.9506726457399103,212,13,9,0.9867708959711365,0.882258064516129,0.9315923928470053,1641,22,219,0.8617724867724867,0.8374035989717223,0.8494132985658409,1303,209,253,0.9991585067319462,0.9986619260618572,0.9989101546815549,26122,22,35,0.7905236907730673,0.9979013641133263,0.8821892393320964,951,252,2,0.9225555462332817,0.9831528279181708,0.8484950471190542
4
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,TED2020,FR,0.9449152542372882,0.9291666666666667,0.9369747899159665,223,13,17,0.9913444893248702,0.8901554404145078,0.938028938028938,1718,15,212,0.8554216867469879,0.9070262597586941,0.8804684808818463,1278,216,131,0.9990731014027066,0.9993509905120994,0.9992120266365897,32336,30,21,0.9071661237785016,0.9937555753791257,0.9484887186036611,1114,114,7,0.9406345908134004,0.9895296435221416,0.8854332232254306
5
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,OPUS_ParaCrawl,FR,0.9472477064220184,0.9343891402714932,0.9407744874715261,413,23,29,0.9901728227719417,0.8974201474201474,0.9415176413726439,2922,29,334,0.871824480369515,0.9184914841849149,0.8945497630331753,2265,333,201,0.9990791896869244,0.9993507277888506,0.9992149402901701,66185,61,43,0.9097744360902256,0.9896401308615049,0.9480282057978584,1815,180,19,0.9448170075930747,0.9915662975237788,0.8928177435363803
6
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,OPUS_News_Commentary,FR,0.9285714285714286,1.0,0.962962962962963,13,1,0,1.0,0.868421052631579,0.9295774647887324,66,0,10,0.8412698412698413,0.9298245614035088,0.8833333333333334,53,10,4,1.0,0.9993730407523511,0.9996864220758859,1594,0,1,0.9402985074626866,1.0,0.9692307692307692,63,4,0,0.9489581904783367,0.9916851441241685,0.8955863439721619
XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2_test_IT.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Model,Dataset,lang,P-INV_CAM,R-INV_CAM,F1-INV_CAM,TP-INV_CAM,FN-INV_CAM,FP-INV_CAM,P-LEETSPEAK,R-LEETSPEAK,F1-LEETSPEAK,TP-LEETSPEAK,FN-LEETSPEAK,FP-LEETSPEAK,P-MIX,R-MIX,F1-MIX,TP-MIX,FN-MIX,FP-MIX,P-O,R-O,F1-O,TP-O,FN-O,FP-O,P-PUNCT_CAM,R-PUNCT_CAM,F1-PUNCT_CAM,TP-PUNCT_CAM,FN-PUNCT_CAM,FP-PUNCT_CAM,F1-Macro,F1-Micro,F1-Weighted
2
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,overall,IT,0.9056304520222046,0.9217110573042776,0.9136,1142,119,97,0.9893648018648019,0.976279470960322,0.9827785817655571,6791,73,165,0.9748310810810811,0.8860740058344849,0.9283358803185072,5771,149,742,0.9983891261247522,0.9980942035200061,0.9982416430392413,104743,169,200,0.8497516556291391,0.9922667955534075,0.9154960981047937,4106,726,32,0.9476904406456198,0.990015267915566,0.8958518228704071
3
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,WikiMatrix,IT,0.9264305177111717,0.947075208913649,0.9366391184573003,340,27,19,0.9895201323772752,0.980327868852459,0.984902552841065,1794,19,36,0.9759181216134859,0.8724434876210979,0.92128445581131,1621,40,237,0.9989164506017569,0.9981053282808754,0.9985107247161673,25813,28,49,0.8376852505292872,0.9974789915966387,0.9106252397391639,1187,230,3,0.9503924183130014,0.9889385510788128,0.9003312569168942
4
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,TED2020,IT,0.9192708333333334,0.9514824797843666,0.9350993377483445,353,31,18,0.9943705220061413,0.9724724724724725,0.9832995951417004,1943,11,55,0.9665450121654501,0.902328222600795,0.9333333333333333,1589,55,172,0.9986299081035923,0.9983296585822142,0.9984797607711454,29884,41,50,0.8722466960352423,0.9858921161825727,0.925594078691079,1188,174,17,0.9551612211371205,0.9911537043862882,0.9087880971884397
5
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,OPUS_ParaCrawl,IT,0.8801571709233792,0.8818897637795275,0.8810226155358898,448,61,60,0.9857992073976222,0.9761281883584042,0.980939861978311,2985,43,73,0.9796623177283192,0.8870743571924948,0.9310722100656456,2553,53,325,0.9979772544324961,0.9979978533685601,0.9979875537942351,48351,98,97,0.8470993117010817,0.9936562860438293,0.9145435244161357,1723,311,11,0.9411131531580434,0.9900045915303924,0.8850525078969678
6
+ XX-LeetSpeakNER-paraphrase-multilingual-mpnet-base-v2,OPUS_News_Commentary,IT,1.0,1.0,1.0,1,0,0,1.0,1.0,1.0,27,0,0,1.0,0.5333333333333333,0.6956521739130436,8,0,7,1.0,1.0,1.0,373,0,0,0.5333333333333333,1.0,0.6956521739130436,8,7,0,0.8782608695652174,0.9834905660377359,0.8133333333333332
config.cfg ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = "./Data/XX/11-18-22_17-51/NER_TRAIN_DATA.spacy"
3
+ dev = "./Data/XX/11-18-22_17-51/NER_DEV_DATA.spacy"
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ gpu_allocator = "pytorch"
9
+ seed = 0
10
+
11
+ [nlp]
12
+ lang = "xx"
13
+ pipeline = ["transformer","ner"]
14
+ batch_size = 128
15
+ disabled = []
16
+ before_creation = null
17
+ after_creation = null
18
+ after_pipeline_creation = null
19
+ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+
21
+ [components]
22
+
23
+ [components.ner]
24
+ factory = "ner"
25
+ incorrect_spans_key = null
26
+ moves = null
27
+ scorer = {"@scorers":"spacy.ner_scorer.v1"}
28
+ update_with_oracle_cut_size = 100
29
+
30
+ [components.ner.model]
31
+ @architectures = "spacy.TransitionBasedParser.v2"
32
+ state_type = "ner"
33
+ extra_state_tokens = false
34
+ hidden_width = 64
35
+ maxout_pieces = 2
36
+ use_upper = false
37
+ nO = null
38
+
39
+ [components.ner.model.tok2vec]
40
+ @architectures = "spacy-transformers.TransformerListener.v1"
41
+ grad_factor = 1.0
42
+ pooling = {"@layers":"reduce_mean.v1"}
43
+ upstream = "*"
44
+
45
+ [components.transformer]
46
+ factory = "transformer"
47
+ max_batch_items = 4096
48
+ set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
49
+
50
+ [components.transformer.model]
51
+ @architectures = "spacy-transformers.TransformerModel.v3"
52
+ name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
53
+ mixed_precision = false
54
+
55
+ [components.transformer.model.get_spans]
56
+ @span_getters = "spacy-transformers.strided_spans.v1"
57
+ window = 128
58
+ stride = 96
59
+
60
+ [components.transformer.model.grad_scaler_config]
61
+
62
+ [components.transformer.model.tokenizer_config]
63
+ use_fast = true
64
+
65
+ [components.transformer.model.transformer_config]
66
+
67
+ [corpora]
68
+
69
+ [corpora.dev]
70
+ @readers = "spacy.Corpus.v1"
71
+ path = ${paths.dev}
72
+ max_length = 0
73
+ gold_preproc = false
74
+ limit = 0
75
+ augmenter = null
76
+
77
+ [corpora.train]
78
+ @readers = "spacy.Corpus.v1"
79
+ path = ${paths.train}
80
+ max_length = 0
81
+ gold_preproc = false
82
+ limit = 0
83
+ augmenter = null
84
+
85
+ [training]
86
+ accumulate_gradient = 3
87
+ dev_corpus = "corpora.dev"
88
+ train_corpus = "corpora.train"
89
+ seed = ${system.seed}
90
+ gpu_allocator = ${system.gpu_allocator}
91
+ dropout = 0.1
92
+ patience = 1600
93
+ max_epochs = 0
94
+ max_steps = 20000
95
+ eval_frequency = 200
96
+ frozen_components = []
97
+ annotating_components = []
98
+ before_to_disk = null
99
+
100
+ [training.batcher]
101
+ @batchers = "spacy.batch_by_padded.v1"
102
+ discard_oversize = true
103
+ size = 2000
104
+ buffer = 256
105
+ get_length = null
106
+
107
+ [training.logger]
108
+ @loggers = "spacy.WandbLogger.v3"
109
+ project_name = "ASOC-LeetSpeakNER-full-XX-MultiNER"
110
+ remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
111
+ model_log_interval = null
112
+ log_dataset_dir = null
113
+ entity = null
114
+ run_name = null
115
+
116
+ [training.optimizer]
117
+ @optimizers = "Adam.v1"
118
+ beta1 = 0.9
119
+ beta2 = 0.999
120
+ L2_is_weight_decay = true
121
+ L2 = 0.01
122
+ grad_clip = 1.0
123
+ use_averages = false
124
+ eps = 0.00000001
125
+
126
+ [training.optimizer.learn_rate]
127
+ @schedules = "warmup_linear.v1"
128
+ warmup_steps = 250
129
+ total_steps = 20000
130
+ initial_rate = 0.00005
131
+
132
+ [training.score_weights]
133
+ ents_f = 1.0
134
+ ents_p = 0.0
135
+ ents_r = 0.0
136
+ ents_per_type = null
137
+
138
+ [pretraining]
139
+
140
+ [initialize]
141
+ vectors = ${paths.vectors}
142
+ init_tok2vec = ${paths.init_tok2vec}
143
+ vocab_data = null
144
+ lookups = null
145
+ before_init = null
146
+ after_init = null
147
+
148
+ [initialize.components]
149
+
150
+ [initialize.tokenizer]
meta.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lang":"xx",
3
+ "name":"pipeline",
4
+ "version":"0.0.0",
5
+ "description":"",
6
+ "author":"",
7
+ "email":"",
8
+ "url":"",
9
+ "license":"",
10
+ "spacy_version":">=3.4.3,<3.5.0",
11
+ "spacy_git_version":"Unknown",
12
+ "vectors":{
13
+ "width":0,
14
+ "vectors":0,
15
+ "keys":0,
16
+ "name":null
17
+ },
18
+ "labels":{
19
+ "transformer":[
20
+
21
+ ],
22
+ "ner":[
23
+ "INV_CAMO",
24
+ "LEETSPEAK",
25
+ "MIX",
26
+ "PUNCT_CAMO"
27
+ ]
28
+ },
29
+ "pipeline":[
30
+ "transformer",
31
+ "ner"
32
+ ],
33
+ "components":[
34
+ "transformer",
35
+ "ner"
36
+ ],
37
+ "disabled":[
38
+
39
+ ],
40
+ "performance":{
41
+ "ents_f":0.9193217975,
42
+ "ents_p":0.9200034895,
43
+ "ents_r":0.918641115,
44
+ "ents_per_type":{
45
+ "LEETSPEAK":{
46
+ "p":0.9071114662,
47
+ "r":0.9820076986,
48
+ "f":0.9430749102
49
+ },
50
+ "MIX":{
51
+ "p":0.8874027994,
52
+ "r":0.865489625,
53
+ "f":0.8763092423
54
+ },
55
+ "PUNCT_CAMO":{
56
+ "p":0.9893371246,
57
+ "r":0.8676069508,
58
+ "f":0.9244820858
59
+ },
60
+ "INV_CAMO":{
61
+ "p":0.9334140436,
62
+ "r":0.9319097502,
63
+ "f":0.9326612903
64
+ }
65
+ },
66
+ "transformer_loss":3820.3726315295,
67
+ "ner_loss":3200.4167286961
68
+ },
69
+ "requirements":[
70
+ "spacy-transformers>=1.1.8,<1.2.0"
71
+ ]
72
+ }
ner/cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":1,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
+ }
ner/model ADDED
Binary file (226 kB). View file
 
ner/moves ADDED
@@ -0,0 +1 @@
 
 
1
+ ��moves�D{"0":{},"1":{"LEETSPEAK":278858,"MIX":249011,"PUNCT_CAMO":200301,"INV_CAMO":44687},"2":{"LEETSPEAK":278858,"MIX":249011,"PUNCT_CAMO":200301,"INV_CAMO":44687},"3":{"LEETSPEAK":278858,"MIX":249011,"PUNCT_CAMO":200301,"INV_CAMO":44687},"4":{"LEETSPEAK":278858,"MIX":249011,"PUNCT_CAMO":200301,"INV_CAMO":44687,"":1},"5":{"":1}}�cfg��neg_key�
plots/.ipynb_checkpoints/TEST_XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_News Commentary_confusion_matrix-checkpoint.pdf ADDED
Binary file (25.9 kB). View file
 
plots/.ipynb_checkpoints/TEST_XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_Overall_confusion_matrix-checkpoint.pdf ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
- V뷫�i��jv
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %PDF-1.4
2
+ %����
3
+ 1 0 obj
4
+ <</Creator (Chromium)
5
+ /Producer (Skia/PDF m88)
6
+ /CreationDate (D:20221124144321+00'00')
7
+ /ModDate (D:20221124144321+00'00')>>
8
+ endobj
9
+ 3 0 obj
10
+ <</ca 1
11
+ /BM /Normal>>
12
+ endobj
13
+ 4 0 obj
14
+ <</CA 1
15
+ /ca 1
16
+ /LC 0
17
+ /LJ 0
18
+ /LW 1
19
+ /ML 4
20
+ /SA true
21
+ /BM /Normal>>
22
+ endobj
23
+ 5 0 obj
24
+ <</Type /XObject
25
+ /Subtype /Image
26
+ /Width 394
27
+ /Height 365
28
+ /ColorSpace /DeviceRGB
29
+ /BitsPerComponent 8
30
+ /Filter /FlateDecode
31
+ /Length 2961>> stream
32
+ x���1��E�A:[ +q""b^��8lE ѤH�8�r�
33
+ endstream
34
+ endobj
35
+ 7 0 obj
36
+ <</Type /Pattern
37
+ /PatternType 2
38
+ /Matrix [22.5 0 0 -258.75 459 326.37]
39
+ /Shading <</Function <</Domain [0 1]
40
+ /FunctionType 3
41
+ /Encode [0 1 0 1 0 1 0 1 0 1 0 1]
42
+ /Bounds [.166666672 .33333334 .5 .66666669 .83333331]
43
+ /Functions [<</C0 [.996 .965 .71]
44
+ /C1 [1 .867 .604]
45
+ /Domain [0 1]
46
+ /FunctionType 2
47
+ /N 1>> <</C0 [1 .867 .604]
48
+ /C1 [1 .761 .522]
49
+ /Domain [0 1]
50
+ /FunctionType 2
51
+ /N 1>> <</C0 [1 .761 .522]
52
+ /C1 [1 .651 .475]
53
+ /Domain [0 1]
54
+ /FunctionType 2
55
+ /N 1>> <</C0 [1 .651 .475]
56
+ /C1 [.98 .541 .463]
57
+ /Domain [0 1]
58
+ /FunctionType 2
59
+ /N 1>> <</C0 [.98 .541 .463]
60
+ /C1 [.945 .427 .478]
61
+ /Domain [0 1]
62
+ /FunctionType 2
63
+ /N 1>> <</C0 [.945 .427 .478]
64
+ /C1 [.882 .325 .514]
65
+ /Domain [0 1]
66
+ /FunctionType 2
67
+ /N 1>>]>>
68
+ /Extend [true true]
69
+ /Coords [0 1 0 0]
70
+ /ShadingType 2
71
+ /ColorSpace /DeviceRGB>>>>
72
+ endobj
73
+ 9 0 obj
74
+ <</Filter /FlateDecode
75
+ /Length 1134>> stream
76
+ x���Ko�8���</P�����ɭh7��E�-Pl�h���Iɖ-�"-m�Ж%��͛�ҟP�z#G_�#�A<<v?�x�w��& ������x�k�E���ߤE|�y'����nsg���4�SJ��[��_��>x!�E�M��CJ��� {:�Cߟa�;��|�# (�ow��P�Քm�U*i/��[��%w:0�ߪ���Kdm���+����قS���5&sR���H�Tv%2�$�%6����2�|��h��>l��2\J����nf KʮN ������.ȿ� �:��tZp��阆���<��v�[ˁ+��� T�al��y�6�l���>vo�������T`y W���G�a[��ka�A=������p�3�iL�q�C ���E�GD\&˖�.b�����<iK�j>5��DŽ�క����.��>���J�Z��ʐ�ePm�*��2���E�yJ�)4�A���ZP�f�T����7��$iG�O��kM��#�E
77
+ 0{O�A��5*�,~�}�̈́���ڒ�19ȴ��Y�P�c��Gl��|����Tԃ���������E �[52��:�&��� �\��ԏ����0�Ȫ�?C؋
78
+ �R��w���qU.�lW-f�XdCQ� G��}[/uh��w[l޶�Qo��� ,j9��r_�EB~�V=�QlDŽ��ͤ!��hA��ທZ s��%
79
+ endstream
80
+ endobj
81
+ 2 0 obj
82
+ <</Type /Page
83
+ /Resources <</ProcSet [/PDF /Text /ImageB /ImageC /ImageI]
84
+ /ExtGState <</G3 3 0 R
85
+ /G4 4 0 R>>
86
+ /Pattern <</P7 7 0 R>>
87
+ /XObject <</X5 5 0 R>>
88
+ /Font <</F6 6 0 R
89
+ /F8 8 0 R>>>>
90
+ /MediaBox [0 0 525.12 375.12]
91
+ /Contents 9 0 R
92
+ /StructParents 0
93
+ /Parent 10 0 R>>
94
+ endobj
95
+ 10 0 obj
96
+ <</Type /Pages
97
+ /Count 1
98
+ /Kids [2 0 R]>>
99
+ endobj
100
+ 11 0 obj
101
+ <</Type /Catalog
102
+ /Pages 10 0 R>>
103
+ endobj
104
+ 12 0 obj
105
+ <</Length1 15376
106
+ /Filter /FlateDecode
107
+ /Length 10844>> stream
108
+ x��{ @T׵�^{�33g�a����03��d@�#�P0|D@A>~@�h*�OM��51�j�����hLb���޼~n�ۦ��Mۘ��mS���4�M��gŤ��9�s�^{�u�^{}�BLd�0� �,뜸�!��b�?gW��B �-�7��
109
+ ������_^���U��?Bg����csߏ=!B�RB�bGk_/�#!���_�شs�/G�/������ֶ��>��u���D@t�����+XO��ܿ����a����M=�[�~x⫈o
110
+ ��Ѕ���ʯ;�!��?w�*�7�Z:E�'h_%�#���$y���&�ϓ��;G Bw����W��(+3!�o�i}�|����a� �|����S(����t%�B�����a���"ǰ y�Z����(
111
+ ���J��r�'Ȯ�d~�,W��z���
112
+ ���ejYHU+F���8�U�Y�
113
+ A��,��9\�R��.�%��v}�,rQo��K�tO�EH/�@��
114
+ .R�3�׆XJYk[�������v�gͪE{J�&R�� iJBZ����N��.�>zE&�Z�Qm���5u!֊c�Y���!�7��)
115
+ Y���Pu]��"�����ևh o�j��򖡩���[<�����CBJE�� y|�54�婛/�GE�p{�-fW��^��B�*ں\!1ق��@I�C�e��q�1����-�|��x�<e-���q���5+T�
116
+ ��J��]e�-�a8.Ouݫ$0y�b���b���R�9��*�l��mC���hCM��s�CJ=.p����� r(�:�έ�1DKj�*Wy*���E 7ptBJ�g�x�a4(r!]��UG�;�p��Y4�!m�/�B��.����d�2\e���~�~R��SI�6
117
+ Z�P����ɡ��KhIˢ����V�� }�
118
+ �ʁ����i%Vtjǫ
119
+ !Cj�����+@T�n
120
+ d���S0�0! ST?p��:��h��\m|}v�w��s'1���x��;���@5Q!��}Q��Y���^�k8\��1�5kpX.��%.+�.�'�� `l�c;�t~��zh}�e�� �⩻O�2<?�|�uF�N<��%5J����_ӿf��r����3�g���<e:%/��us��r���z�M;��#k���
121
+ o#֪�? ���4ҫTէt�Р{�{���vlp��خX*X`�����46RCt�S{XK��� ��d�2b;k�����-�z������>1�����i,A�ES����M���h����˵A ' ,�\�̌�v[
122
+
123
+ |NH�hD4�cu��L5�THT��ڮ����D�lH/k/n����V1k�¼ؐ����9�G�w=�]�t��s����B��fp���+f�3Rl�� S򔁯q)�h`2�G.(�G�03C�]Z�y����6Jk<m�~���6�&�K*K�r ���,��|�$�%�% $1 `�ِ���3)t$R
124
+ ���[d�KO��Tš��'f�\�q}���vj7is#�#Ф�/�ۨ��;z�J�7�Ya5 s���hT7�~Ϋ�����uepA���m/�B����f�^��&k�����= ^I.��n��Wv/Z�Q�
125
+ ow_ܳx�E�'_��lI*�R�l�����梁�{+���J\ؽt�mY��ܪu�v~/�&�J֠a�@�Мl�hHU�Q�(eh�1��(�� �M��~�F�J�'޴��V�>i�Tj��&й�<��}ih��U�.����M�d�ߺ|� |���1S=�j�jY@��Ue�@��t:�>�����3�@܁8:;K��V:��a�k�[\[�^Fk+�+i���4w�=s�Ӕ�\���;��̦"gu�����"(Z5T�UE�l*��_�H�.�D�E���R������c�ؔi����sJ;�r���<���&�}f���]�����w�~��|���9�M���ޯo�;�3�]ܴ`A��Y�r�U,^W<#�`CUm�\�gFi��=e��l�^�[���RT����e;�ɓ��{� ��[�_�~����{D ��{vХyu�Oq]^f]��W^7�D��"+kIk��23�mF�k�ձDt�J׹4x�_�z*��7�I3�{�{h�k�븋
126
+ Gcm���%G�� 7�v-�x����
127
+ t�w��>(��|p���r}���������A�w��{��Z|P�E�'�@���7��7}������q��A��ys��"��˻>:��>��K}m>~S�5���Zxs����wp�a�bUc���|a �}�G>����=��~�0WY�[ur|D������k>��i%'�֧>x2<�! 0_���NJ9\>��XCf(3�v��n��,[���DV� Ę�f�����N�9�:��fn���
128
+ H)��
129
+ ��T2�L���*Tݫ=�
130
+ ��-��пx!!N^6�ۜ���m�;X��۵�H�i�A�����&��N�}sb�pP�ݧ�w���+�w2p���!e��I��u���A� ez�7(���r�²-#��v�r�r��,�!��c�q�q�!8�Ēѩ�V��b�Ȏ�gD��x����UÌ�D�Hj��� ��eV7y������{F��Z�����k_^s5ÑU+�R�ͦ�?����0�{���/���j9���n}��w-�yIh���d��Ғ���hV&�
131
+ �;j�S���4h�1�c���x���'���4 � �d���NRHa�'��h�)�T�q���e����r1�.4ݲ �r Z�Q��,ʟK��U �\�X�miAƎ"kE�L��D��B���;�;f�[y��|W�~�7wc�I;gn`Ǣ�3����ASRT�1*:*� ����+�ޓ�����f�I��^�s�{��5TP(@�*I#�<�����T:�R �Nc�z.=h�1zڤ��:��
132
+ uT����J
133
+ �5�s��rѼ��>2�
134
+ U� *y��G�v�������x���Ȧ]����Q�a���T��kA$O�)�ᦁ�85!���F ��<�E�KS��2��2d�b�:�ɨ�]`W�;L`۴��-��hO|y��"�D���x��u��&u}^@�A��8'�Z��pL�U������uV?�6v��ˮ�g�<�������1��0��}?�H��9��}j��4c|�x"��&��� #%V�-t���ר� 71�&�@�g5�fh�¹��������B���$�,$_Urv.8���:Ei�d,׋ "��aP)ΰϠ))IA�'���3��<6�dȶX�"l��v{�b'�]2ZBϖ@ op%y����1�Ւ�h���v�Fm6SU�� T��3��%e��������"'��ꎪ�p���c�b���k��l� �̙k�l�z�Y���fv[ |�'��=s���QpOanMQ��u���S[����BSR�̒�@�/�i�� xV�����m�k�=wb���IT(,Y[�0��������.N���֫�9�V�ɭ)t=P��?�^sNCE] �$�W����4���wʰ3֧�z��ө ��qCY�Ę�S��t�qs��\�٥��
135
+ A�s��v�.�0��XJײ�������[�\��I^Xױ�����j���e���l:���sST����}{���
136
+ �j*+ ��N� h�1��ڃZ�T2�1�
137
+ p�yx�bv:��E-e�7W��.�\Ԣ8_�l��9
138
+ ���K�F%zF�^���I�T$��<3�|���&�NV�3�e�꽳�c���� k^��pi_�|Yt=��'ׅ�O��unoE*e=J. ��ƿ�$��$��\o��}�>-=#B�8$R���M.��}8="L��G�k�u�"�U��Q�n�F6n�/�o}���8�jq��I��tٞm�v�;��
139
+ Ss�u$>s��(��5���o���Vu���N���@�T3k�|�5?��<0��[P��n�d'�37��\�� 6��(�V�X��eO��������9'o�LqZ^���h��$X�����K�� M�&a�pZxN�U��'��Dc��t��� ZA+ŁISſ�K&kXW��nR?���c���~�g|������g�����'b�ifF����oގ�}/P�0;Qg0���hߟ��o�vy�N�gp�͇B��N���w����⣃��p�@����a�� iC|?�F2%�rrv��dA�
140
+ �Y(�f�L��4PU�h�-��T9c��(b�OF8��"����+,��O�Ԛ� 6��X�H�^&���P��;�pM�P0�rE�BGV�}մ���3+K����B�i^�5#��|�ZB��p�2mT檶�$�$� 9Ah�i��>n�.�N�aF�e�~ �o��=�X4��xT�5J7ɲW�%SA��o|�p�|\~S~Wֽ'Ý��A�A'3�QLh#���`qX�[���r�r���{ݤ޴�k�g-��r�B[,Pj��`X��f�߽~��F�Q3U��o��xW8�1A#�a���{k����g�w���L�V�N��Go �ïUև_��;�M���;�鳍�����l�ZL�$���^ ��މoo�O��N�%ӺF2`�[ e�F�a�Nm���[U���\����h�O&���Z�ͬ�Xmv��WQZ����b ���o��o4�E�I�nt�e�#]���������3������?(��!��*�N�Ձ�r��x�ī�.���j�g8��D�?+�3Rfz;���q=�9O����h�I�T_^�r�����W�,E���1�D��/q�WOէ�.�����=�=�J��@\Z��
141
+ ���z��x��B��?��Zo6c���>���>o_?�����#��b�����G���
142
+ endstream
143
+ endobj
144
+ 13 0 obj
145
+ <</Type /FontDescriptor
146
+ /FontName /LiberationSans
147
+ /Flags 4
148
+ /Ascent 905.27344
149
+ /Descent -211.91406
150
+ /StemV 45.898438
151
+ /CapHeight 687.98828
152
+ /ItalicAngle 0
153
+ /FontBBox [-203.125 -303.22266 1050.29297 910.15625]
154
+ /FontFile2 12 0 R>>
155
+ endobj
156
+ 14 0 obj
157
+ <</Type /Font
158
+ /FontDescriptor 13 0 R
159
+ /BaseFont /LiberationSans
160
+ /Subtype /CIDFontType2
161
+ /CIDToGIDMap /Identity
162
+ /CIDSystemInfo <</Registry (Adobe)
163
+ /Ordering (Identity)
164
+ /Supplement 0>>
165
+ /W [0 [365.23438 0 0 277.83203] 19 28 556.15234 36 [666.99219 0 722.16797 0 666.99219 0 0 0 277.83203 0 666.99219 556.15234 833.00781 722.16797 777.83203 666.99219 0 722.16797 666.99219 610.83984 722.16797 666.99219 0 666.99219] 66 68 556.15234 70 [500 556.15234 556.15234] 76 79 222.16797 85 [333.00781 0 277.83203 556.15234 500]]
166
+ /DW 0>>
167
+ endobj
168
+ 15 0 obj
169
+ <</Filter /FlateDecode
170
+ /Length 323>> stream
171
+ x�]��j�0��y���E���������n�M�m�����o��-4����o�M&Q�<5Z9��Q��h���0��@�pS�$�J%�F�CgH���29ݏ�((�>|trv��G9^�DoV�U�Fw�U빝�����1)K*���^:��
172
+ endstream
173
+ endobj
174
+ 6 0 obj
175
+ <</Type /Font
176
+ /Subtype /Type0
177
+ /BaseFont /LiberationSans
178
+ /Encoding /Identity-H
179
+ /DescendantFonts [14 0 R]
180
+ /ToUnicode 15 0 R>>
181
+ endobj
182
+ 16 0 obj
183
+ <</Length1 6608
184
+ /Filter /FlateDecode
185
+ /Length 4242>> stream
186
+ x��9 p[Uv缧'ɟHO�,d�=���,+�'��c˶$�qY��G����'�#'%|6��L�o�`KJX:�3��N��,a�;�)�0t��[�� n������yOr��v��t����{���sϽ�>�0
187
  c
188
+ CR~��0��>8� U�&Z�oxv%gtc���ޅi�,8��7����J�Дv��[-�O�>�c����,,@|
189
+ o������5Q$�J,B'�c�~��-3�B3ə�h�=p>���W� x>�E�<� ��9��q���4��aK�%���0��E�cE �b.P�����"j�8,�q��"cs�4���%<�>ϐ��T��jL�Y�MA���[,>��g��ͼ��W0? �@Zq�\����G���W];{7��\��߅��F�t3l��/�/�����m�W���2�2�5g��$��
190
+ ���t��۶n�������zZ[��M��6n��}����k\U���Ғb��.�XL�Ѱ*#=M��r�A��J��(�FdM����Tp)J��u��,�w��,FT1�FI7I�Iҝ�t_�D^l�g��D�=�$.��@����%ުšRYE��N�7g�#�����H�񐽹��V�u8�Y s�f$�KsXވ*��{7�1�_� +�%���z=6�=��
191
+ ��"y�14*�\)����W�LQT⼊�JvK6��d�%2���J�H���H��r�#���A��!�M��wn��4�Z�Q��|�K��-R���T���U���li���Ғ]^�2��G<I[R xj�s���L
192
+ U!*�� ��nB����]�U�F%�����_�!�Z`�4~��M���U�Rh�rF(�t~{�!e}��#!%��J�?�Qj��H�s�h3�ti�EΐZz�BoJҵ
193
+ ]G��VtV����g�u��U��t@��)�7���s�~p*������^���|�[�r�B����Ѐ��2����w���+�����R3N<� q}D�A���a^�)X���r5̳ �0�*dN!�봅�m�G�^c��J�&�����{�#\߯_�h�#�C�K�l�Iȅ���]���;g��!'�N��t9#�cN�YS�1�ь撁U��@��S5����IhZjZBWxr����$uk�`u!�e�� �F*�b�j׭od��� �΀YkMu#���j����ϾGz�0sa�W���g'i�B�����=/�ߝ�Ӟ}v�Y:nM��d੟<�vt���vz����U��O~5;�����΢�s���2V�]������'.i�k���n��#٘a������:o��쯭����[��|��������~�����Wx�\�&��r�r�2k�Z��B��|�|��+/ϗU�������73� �EwA���\W��o��7��
194
+ ��;ծ0�N8< �׻��y��+��7������s��A���y �]�P**U�+�d�5M��RI wI#[SmUւU"_��T�[��
195
+ Lˠ������j�|}w�m�m)un���ޤ��gcA�zg����S����[����,yy:Cn�����)�+(h�6�2ym��h��z7
196
+ �=c���u���:--����5B����{B��|�������d���e�Ŧ�u��ʊ5ъm�����m�L�`� 32Ŷ�eȗ!�u�Eʎ��*�P�-[�bێY/X����d�[Y+\.�m�6^�#cw�ɰdg�&+�>�l��+^�VS�ʻ�j4�Zrf_
197
+ ��:ؖ�9ȇ��%Hna�A�'oz� 1pB �c��E �]p�*KɊtK��5���گ�&^�'^�'�`��x�Hk/q]�F�������m�>�;
198
+ w�I�PNZ�W�ܔى�M���v�F�:�^��"���������
199
+ ���R��RW��''ʄgN�LNDN0�'�8���L���q��m�����J�Ǿ[*}�T�U�0� ̌��I�h�3��}�G��j��7�= a��r���:E5F�u��� ;9.mFa��P�Ü�ܚ�>]
200
+ :�Ꞡ)u�C�`��`�
201
+ -��Tljlʑ,H�}�Q�9V�1�Al��Wy1G�A"*����~��ݟ�T���(#k1�1�Zۿ"�P�I�)���O!c��
202
+ endstream
203
+ endobj
204
+ 17 0 obj
205
+ <</Type /FontDescriptor
206
+ /FontName /LiberationSans-BoldItalic
207
+ /Flags 68
208
+ /Ascent 905.27344
209
+ /Descent -211.91406
210
+ /StemV 137.207031
211
+ /CapHeight 687.98828
212
+ /ItalicAngle -12
213
+ /FontBBox [-208.98438 -303.22266 1128.41797 1029.78516]
214
+ /FontFile2 16 0 R>>
215
+ endobj
216
+ 18 0 obj
217
+ <</Type /Font
218
+ /FontDescriptor 17 0 R
219
+ /BaseFont /LiberationSans-BoldItalic
220
+ /Subtype /CIDFontType2
221
+ /CIDToGIDMap /Identity
222
+ /CIDSystemInfo <</Registry (Adobe)
223
+ /Ordering (Identity)
224
+ /Supplement 0>>
225
+ /W [0 [365.23438 0 0 277.83203] 50 [777.83203] 68 72 556.15234 79 [277.83203] 85 [389.16016 0 0 0 556.15234]]
226
+ /DW 0>>
227
+ endobj
228
+ 19 0 obj
229
+ <</Filter /FlateDecode
230
+ /Length 259>> stream
231
+ x�]��j�0���s��X���vA�b���}���6P��o��,[�@3'�sh�?�J:��V�LR ���,Gq����ݍ��f��a_.��4�k�ừ�;����7+�J5��<�1?��r�����酙W� �(;����ۏ^�7��"r���Z�jG�Ԍ��|5Pw��J��_�j��7�q�䧳�Ț@�"R�E*�H�<�c�*Q���TU�.E�k�s���W�%dv7�7k��l4lI���6A�/�*�
232
+ endstream
233
+ endobj
234
+ 8 0 obj
235
+ <</Type /Font
236
+ /Subtype /Type0
237
+ /BaseFont /LiberationSans-BoldItalic
238
+ /Encoding /Identity-H
239
+ /DescendantFonts [18 0 R]
240
+ /ToUnicode 19 0 R>>
241
+ endobj
242
+ xref
243
+ 0 20
244
+ 0000000000 65535 f
245
+ 0000000015 00000 n
246
+ 0000005387 00000 n
247
+ 0000000154 00000 n
248
+ 0000000191 00000 n
249
+ 0000000267 00000 n
250
+ 0000017875 00000 n
251
+ 0000003396 00000 n
252
+ 0000023255 00000 n
253
+ 0000004182 00000 n
254
+ 0000005668 00000 n
255
+ 0000005724 00000 n
256
+ 0000005773 00000 n
257
+ 0000016705 00000 n
258
+ 0000016944 00000 n
259
+ 0000017481 00000 n
260
+ 0000018014 00000 n
261
+ 0000022342 00000 n
262
+ 0000022599 00000 n
263
+ 0000022925 00000 n
264
+ trailer
265
+ <</Size 20
266
+ /Root 11 0 R
267
+ /Info 1 0 R>>
268
+ startxref
269
+ 23405
270
+ %%EOF
plots/.ipynb_checkpoints/TEST_XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_ParaCrawl_confusion_matrix-checkpoint.pdf ADDED
Binary file (24 kB). View file
 
plots/TEST_XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_News Commentary_confusion_matrix.pdf ADDED
Binary file (25.6 kB). View file
 
plots/TEST_XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_Overall_confusion_matrix.pdf ADDED
Binary file (23.9 kB). View file
 
plots/TEST_XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_ParaCrawl_confusion_matrix.pdf ADDED
Binary file (24 kB). View file
 
plots/TEST_XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_TED2020_confusion_matrix.pdf ADDED
Binary file (23.4 kB). View file
 
plots/TEST_XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_WikiMatrix_confusion_matrix.pdf ADDED
Binary file (25 kB). View file
 
tokenizer ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ��prefix_search� ~^§|^%|^=|^—|^–|^\+(?![0-9])|^…|^……|^,|^:|^;|^\!|^\?|^¿|^؟|^¡|^\(|^\)|^\[|^\]|^\{|^\}|^<|^>|^_|^#|^\*|^&|^。|^?|^!|^,|^、|^;|^:|^~|^·|^।|^،|^۔|^؛|^٪|^\.\.+|^…|^\'|^"|^”|^“|^`|^‘|^´|^’|^‚|^,|^„|^»|^«|^「|^」|^『|^』|^(|^)|^〔|^〕|^【|^】|^《|^》|^〈|^〉|^\$|^£|^€|^¥|^฿|^US\$|^C\$|^A\$|^₽|^﷼|^₴|^₠|^₡|^₢|^₣|^₤|^₥|^₦|^₧|^₨|^₩|^₪|^₫|^€|^₭|^₮|^₯|^₰|^₱|^₲|^₳|^₴|^₵|^₶|^₷|^₸|^₹|^₺|^₻|^₼|^₽|^₾|^₿|^[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]�suffix_search�2y…$|……$|,$|:$|;$|\!$|\?$|¿$|؟$|¡$|\($|\)$|\[$|\]$|\{$|\}$|<$|>$|_$|#$|\*$|&$|。$|?$|!$|,$|、$|;$|:$|~$|·$|।$|،$|۔$|؛$|٪$|\.\.+$|…$|\'$|"$|”$|“$|`$|‘$|´$|’$|‚$|,$|„$|»$|«$|「$|」$|『$|』$|($|)$|〔$|〕$|【$|】$|《$|》$|〈$|〉$|[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]$|'s$|'S$|’s$|’S$|—$|–$|(?<=[0-9])\+$|(?<=°[FfCcKk])\.$|(?<=[0-9])(?:\$|£|€|¥|฿|US\$|C\$|A\$|₽|﷼|₴|₠|₡|₢|₣|₤|₥|₦|₧|₨|₩|₪|₫|€|₭|₮|₯|₰|₱|₲|₳|₴|₵|₶|₷|₸|₹|₺|₻|₼|₽|₾|₿)$|(?<=[0-9])(?:km|km²|km³|m|m²|m³|dm|dm²|dm³|cm|cm²|cm³|mm|mm²|mm³|ha|µm|nm|yd|in|ft|kg|g|mg|µg|t|lb|oz|m/s|km/h|kmh|mph|hPa|Pa|mbar|mb|MB|kb|KB|gb|GB|tb|TB|T|G|M|K|%|км|км²|км³|м|м²|м³|дм|дм²|дм³|см|см²|см³|мм|мм²|мм³|нм|кг|г|мг|м/с|км/ч|кПа|Па|мбар|Кб|КБ|кб|Мб|МБ|мб|Гб|ГБ|гб|Тб|ТБ|тбكم|كم²|كم³|م|م²|م³|سم|سم²|سم³|مم|مم²|مم³|كم|غرام|جرام|جم|كغ|ملغ|كوب|اكواب)$|(?<=[0-9a-z\uFF41-\uFF5A\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E\u017F\u0180\u0183\u0185\u0188\u018C\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F\u0240\u0242\u0247\u0249\u024B\u024D\u024F\u2C61\u2C65\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73\u2C74\u2C76-\u2C7B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFFёа-яәөүҗңһα-ωάέίόώήύа-щюяіїєґѓѕјљњќѐѝ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F%²\-\+…|……|,|:|;|\!|\?|¿|؟|¡|\(|\)|\[|\]|\{|\}|<|>|_|#|\*|&|。|?|!|,|、|;|:|~|·|।|،|۔|؛|٪(?:\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉)])\.$|(?<=[A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F][A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])\.$�infix_finditer�>�\.\.+|…|[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]|(?<=[0-9])[+\-\*^](?=[0-9-])|(?<=[a-z\uFF41-\uFF5A\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E\u017F\u0180\u0183\u0185\u0188\u018C\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F\u0240\u0242\u0247\u0249\u024B\u024D\u024F\u2C61\u2C65\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73\u2C74\u2C76-\u2C7B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFFёа-яәөүҗңһα-ωάέίόώήύа-щюяіїєґѓѕјљњќѐѝ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉])\.(?=[A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F]),(?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])(?:-|–|—|--|---|——|~)(?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F0-9])[:<>=/](?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])�token_match��url_match�
2
+ ��A�
3
+ � ��A� �'��A�'�''��A�''�(*_*)��A�(*_*)�(-8��A�(-8�(-:��A�(-:�(-;��A�(-;�(-_-)��A�(-_-)�(._.)��A�(._.)�(:��A�(:�(;��A�(;�(=��A�(=�(>_<)��A�(>_<)�(^_^)��A�(^_^)�(o:��A�(o:�(¬_¬)��A�(¬_¬)�(ಠ_ಠ)��A�(ಠ_ಠ)�(╯°□°)╯︵┻━┻��A�(╯°□°)╯︵┻━┻�)-:��A�)-:�):��A�):�-_-��A�-_-�-__-��A�-__-�._.��A�._.�0.0��A�0.0�0.o��A�0.o�0_0��A�0_0�0_o��A�0_o�8)��A�8)�8-)��A�8-)�8-D��A�8-D�8D��A�8D�:'(��A�:'(�:')��A�:')�:'-(��A�:'-(�:'-)��A�:'-)�:(��A�:(�:((��A�:((�:(((��A�:(((�:()��A�:()�:)��A�:)�:))��A�:))�:)))��A�:)))�:*��A�:*�:-(��A�:-(�:-((��A�:-((�:-(((��A�:-(((�:-)��A�:-)�:-))��A�:-))�:-)))��A�:-)))�:-*��A�:-*�:-/��A�:-/�:-0��A�:-0�:-3��A�:-3�:->��A�:->�:-D��A�:-D�:-O��A�:-O�:-P��A�:-P�:-X��A�:-X�:-]��A�:-]�:-o��A�:-o�:-p��A�:-p�:-x��A�:-x�:-|��A�:-|�:-}��A�:-}�:/��A�:/�:0��A�:0�:1��A�:1�:3��A�:3�:>��A�:>�:D��A�:D�:O��A�:O�:P��A�:P�:X��A�:X�:]��A�:]�:o��A�:o�:o)��A�:o)�:p��A�:p�:x��A�:x�:|��A�:|�:}��A�:}�;)��A�;)�;-)��A�;-)�;-D��A�;-D�;D��A�;D�;_;��A�;_;�<.<��A�<.<�</3��A�</3�<3��A�<3�<33��A�<33�<333��A�<333�<space>��A�<space>�=(��A�=(�=)��A�=)�=/��A�=/�=3��A�=3�=D��A�=D�=[��A�=[�=]��A�=]�=|��A�=|�>.<��A�>.<�>.>��A�>.>�>:(��A�>:(�>:o��A�>:o�><(((*>��A�><(((*>�@_@��A�@_@�C++��A�C++�O.O��A�O.O�O.o��A�O.o�O_O��A�O_O�O_o��A�O_o�V.V��A�V.V�V_V��A�V_V�XD��A�XD�XDD��A�XDD�[-:��A�[-:�[:��A�[:�[=��A�[=�\")��A�\")�\n��A�\n�\t��A�\t�]=��A�]=�^_^��A�^_^�^__^��A�^__^�^___^��A�^___^�a.��A�a.�b.��A�b.�c.��A�c.�d.��A�d.�e.��A�e.�f.��A�f.�g.��A�g.�h.��A�h.�i.��A�i.�j.��A�j.�k.��A�k.�l.��A�l.�m.��A�m.�n.��A�n.�o.��A�o.�o.0��A�o.0�o.O��A�o.O�o.o��A�o.o�o_0��A�o_0�o_O��A�o_O�o_o��A�o_o�p.��A�p.�q.��A�q.�r.��A�r.�s.��A�s.�t.��A�t.�u.��A�u.�v.��A�v.�v.v��A�v.v�v_v��A�v_v�w.��A�w.�x.��A�x.�xD��A�xD�xDD��A�xDD�y.��A�y.�z.��A�z.� ��A� C� �¯\(ツ)/¯��A�¯\(ツ)/¯�°C.��A�°�A�C�A�.�°F.��A�°�A�F�A�.�°K.��A�°�A�K�A�.�°c.��A�°�A�c�A�.�°f.��A�°�A�f�A�.�°k.��A�°�A�k�A�.�ä.��A�ä.�ö.��A�ö.�ü.��A�ü.�ಠ_ಠ��A�ಠ_ಠ�ಠ︵ಠ��A�ಠ︵ಠ�—��A�—�faster_heuristics�
transformer/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "max_batch_items":4096
3
+ }
transformer/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5982917e5a22ef833c1b3c4aa1d491facf793e9831fd35d4322d5e8283a0878
3
+ size 1134411414
vocab/key2row ADDED
@@ -0,0 +1 @@
 
 
1
+
vocab/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71
3
+ size 1
vocab/strings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f821bae84c6dc3934ba189b4ccad8870917ae2ede66c14c74413a11bcdcab0b2
3
+ size 16326872
vocab/vectors ADDED
Binary file (128 Bytes). View file
 
vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }
xx_pipeline-any-py3-none-any.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d039e768a6467b34e112428a18f8877bfcb348e566fa191719079b3bb7c9099
3
+ size 1021523182