freemt commited on
Commit
de6562c
·
1 Parent(s): 66fcc65

Bump version to 0.1.0a2

Browse files
app_mlbee.py CHANGED
@@ -148,15 +148,16 @@ def main():
148
  app = Multipage()
149
 
150
  app.add_page("Home", "house", home)
151
- app.add_page("Settings", "gear", settings)
 
 
152
  app.add_page("Info", "info", info)
153
 
154
  app.run()
155
 
156
  if set_loglevel() <= 10:
157
  st.markdown(state.ns.count)
158
- loggu.debug(f" run: {state.ns.count}")
159
- logger.debug(f" run: {state.ns.count}")
160
  state.ns.count += 1
161
  state.ns.updated = False
162
 
 
148
  app = Multipage()
149
 
150
  app.add_page("Home", "house", home)
151
+ # app.add_page("Settings", "gear", settings)
152
+ # app.add_page("Setup", "gear", settings)
153
+ app.add_page("Config", "gear", settings)
154
  app.add_page("Info", "info", info)
155
 
156
  app.run()
157
 
158
  if set_loglevel() <= 10:
159
  st.markdown(state.ns.count)
160
+ logger.debug(" run: %s", state.ns.count)
 
161
  state.ns.count += 1
162
  state.ns.updated = False
163
 
install-sw.sh CHANGED
@@ -21,4 +21,3 @@ rm setup_12.x
21
 
22
  # apt upate # alerady done in apt-get install -y nodejs
23
  apt install byobu -y > /dev/null 2>&1
24
-
 
21
 
22
  # apt upate # alerady done in apt-get install -y nodejs
23
  apt install byobu -y > /dev/null 2>&1
 
install-sw1.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pip install pipx
2
+ # pipx install poetry
3
+ # pipx ensurepath
4
+ # source ~/.bashrc
5
+
6
+ # curl -sSL https://install.python-poetry.org | python3 -
7
+ # -C- continue -S show error -o output
8
+ curl -sSL -C- -o install-poetry.py https://install.python-poetry.org
9
+ python install-poetry.py
10
+ rm install-poetry.py
11
+ echo export PATH=~/.local/bin:$PATH > ~/.bashrc
12
+ source ~/.bashrc
13
+ # ~/.local/bin/poetry install
14
+
15
+ wget -c https://deb.nodesource.com/setup_12.x
16
+ bash setup_12.x
17
+ apt-get install -y nodejs
18
+ npm install -g npm@latest
19
+ npm install -g nodemon
20
+ rm setup_12.x
21
+
22
+ # apt update # alerady done in apt-get install -y nodejs
23
+ apt install byobu -y > /dev/null 2>&1
24
+ byobu-enable
25
+ byobu
package.json CHANGED
@@ -4,18 +4,18 @@
4
  },
5
  "scripts": {
6
  "start": "pyright && pytest && yarn style",
7
- "test": "nodemon -w tests -w mlbee -x pytest tests",
8
- "pyright": "nodemon -w mlbee -w .venv -e .py -x pyright mlbee tests",
9
- "pytest": "nodemon -w tests -w mlbee -e .py -x pytest tests mlbee",
10
- "style": "nodemon -w mlbee -w tests -x \"black tests mlbee && python -m flake8\"",
11
- "docstyle": "nodemon -w mlbee -w tests -x pydocstyle --convention=google tests mlbee",
12
- "pylint": "nodemon -w mlbee -e .py -x pylint mlbee",
13
- "test:mlbee": "nodemon -w tests -e .py -x pytest -k mlbee tests",
14
  "publish": "poetry build && poetry publish",
15
- "black": "black tests mlbee",
16
- "flake8": "flake8 tests mlbee",
17
- "pflake8": "pflake8 tests mlbee",
18
- "pep257": "pep257 tests mlbee",
19
  "final": "run-s docstyle black flake8 pytest"
20
  }
21
  }
 
4
  },
5
  "scripts": {
6
  "start": "pyright && pytest && yarn style",
7
+ "test": "nodemon -w tests -w st_mlbee -x pytest tests",
8
+ "pyright": "nodemon -w st_mlbee -w .venv -e .py -x pyright st_mlbee tests",
9
+ "pytest": "nodemon -w tests -w st_mlbee -e .py -x pytest tests st_mlbee",
10
+ "style": "nodemon -w st_mlbee -w tests -x \"black tests st_mlbee && python -m flake8\"",
11
+ "docstyle": "nodemon -w st_mlbee -w tests -x pydocstyle --convention=google tests st_mlbee",
12
+ "pylint": "nodemon -w st_mlbee -e .py -x pylint st_mlbee",
13
+ "test:st_mlbee": "nodemon -w tests -e .py -x pytest -k st_mlbee tests",
14
  "publish": "poetry build && poetry publish",
15
+ "black": "black tests st_mlbee",
16
+ "flake8": "flake8 tests st_mlbee",
17
+ "pflake8": "pflake8 tests st_mlbee",
18
+ "pep257": "pep257 tests st_mlbee",
19
  "final": "run-s docstyle black flake8 pytest"
20
  }
21
  }
poetry.lock CHANGED
@@ -375,6 +375,31 @@ python-versions = "*"
375
  [package.extras]
376
  devel = ["colorama", "jsonschema", "json-spec", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"]
377
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  [[package]]
379
  name = "filelock"
380
  version = "3.7.1"
@@ -894,6 +919,17 @@ category = "main"
894
  optional = false
895
  python-versions = ">=3.5"
896
 
 
 
 
 
 
 
 
 
 
 
 
897
  [[package]]
898
  name = "nbclient"
899
  version = "0.6.4"
@@ -1236,6 +1272,17 @@ python-versions = ">=3.7"
1236
  [package.dependencies]
1237
  numpy = ">=1.16.6"
1238
 
 
 
 
 
 
 
 
 
 
 
 
1239
  [[package]]
1240
  name = "pycodestyle"
1241
  version = "2.7.0"
@@ -1537,6 +1584,37 @@ python-versions = ">=3.7"
1537
  [package.dependencies]
1538
  numpy = ">=1.16.5"
1539
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1540
  [[package]]
1541
  name = "semver"
1542
  version = "2.13.0"
@@ -1558,6 +1636,17 @@ nativelib = ["pyobjc-framework-cocoa", "pywin32"]
1558
  objc = ["pyobjc-framework-cocoa"]
1559
  win32 = ["pywin32"]
1560
 
 
 
 
 
 
 
 
 
 
 
 
1561
  [[package]]
1562
  name = "sentencepiece"
1563
  version = "0.1.96"
@@ -1578,6 +1667,18 @@ python-versions = ">=3.8.3,<4.0.0"
1578
  environs = ">=9.5.0,<10.0.0"
1579
  logzero = ">=1.7.0,<2.0.0"
1580
 
 
 
 
 
 
 
 
 
 
 
 
 
1581
  [[package]]
1582
  name = "simplejson"
1583
  version = "3.17.6"
@@ -1969,6 +2070,19 @@ decorator = ">=3.4.0"
1969
  [package.extras]
1970
  test = ["pytest (>=2.2.3)", "flake8 (>=2.4.0)", "isort (>=4.2.2)"]
1971
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1972
  [[package]]
1973
  name = "watchdog"
1974
  version = "2.1.8"
@@ -2041,7 +2155,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-
2041
  [metadata]
2042
  lock-version = "1.1"
2043
  python-versions = "^3.8.3"
2044
- content-hash = "4d98009b5b4b96e19c21e205e88a6e856ef4321da82fbb7a58134b86f39b89a8"
2045
 
2046
  [metadata.files]
2047
  about-time = [
@@ -2303,6 +2417,13 @@ fastjsonschema = [
2303
  {file = "fastjsonschema-2.15.3-py3-none-any.whl", hash = "sha256:ddb0b1d8243e6e3abb822bd14e447a89f4ab7439342912d590444831fa00b6a0"},
2304
  {file = "fastjsonschema-2.15.3.tar.gz", hash = "sha256:0a572f0836962d844c1fc435e200b2e4f4677e4e6611a2e3bdd01ba697c275ec"},
2305
  ]
 
 
 
 
 
 
 
2306
  filelock = [
2307
  {file = "filelock-3.7.1-py3-none-any.whl", hash = "sha256:37def7b658813cda163b56fc564cdc75e86d338246458c4c28ae84cabefa2404"},
2308
  {file = "filelock-3.7.1.tar.gz", hash = "sha256:3a0fd85166ad9dbab54c9aec96737b744106dc5f15c0b09a6744a445299fcf04"},
@@ -2553,6 +2674,10 @@ more-itertools = [
2553
  {file = "more-itertools-8.13.0.tar.gz", hash = "sha256:a42901a0a5b169d925f6f217cd5a190e32ef54360905b9c39ee7db5313bfec0f"},
2554
  {file = "more_itertools-8.13.0-py3-none-any.whl", hash = "sha256:c5122bffc5f104d37c1626b8615b511f3427aa5389b94d61e5ef8236bfbc3ddb"},
2555
  ]
 
 
 
 
2556
  nbclient = [
2557
  {file = "nbclient-0.6.4-py3-none-any.whl", hash = "sha256:f251bba200a2b401a061dfd700a7a70b5772f664fb49d4a2d3e5536ec0e98c76"},
2558
  {file = "nbclient-0.6.4.tar.gz", hash = "sha256:cdef7757cead1735d2c70cc66095b072dced8a1e6d1c7639ef90cd3e04a11f2e"},
@@ -2831,6 +2956,10 @@ pyarrow = [
2831
  {file = "pyarrow-8.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:cb06cacc19f3b426681f2f6803cc06ff481e7fe5b3a533b406bc5b2138843d4f"},
2832
  {file = "pyarrow-8.0.0.tar.gz", hash = "sha256:4a18a211ed888f1ac0b0ebcb99e2d9a3e913a481120ee9b1fe33d3fedb945d4e"},
2833
  ]
 
 
 
 
2834
  pycodestyle = [
2835
  {file = "pycodestyle-2.7.0-py2.py3-none-any.whl", hash = "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068"},
2836
  {file = "pycodestyle-2.7.0.tar.gz", hash = "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef"},
@@ -3160,6 +3289,14 @@ scipy = [
3160
  {file = "scipy-1.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:a5193a098ae9f29af283dcf0041f762601faf2e595c0db1da929875b7570353f"},
3161
  {file = "scipy-1.6.1.tar.gz", hash = "sha256:c4fceb864890b6168e79b0e714c585dbe2fd4222768ee90bc1aa0f8218691b11"},
3162
  ]
 
 
 
 
 
 
 
 
3163
  semver = [
3164
  {file = "semver-2.13.0-py2.py3-none-any.whl", hash = "sha256:ced8b23dceb22134307c1b8abfa523da14198793d9787ac838e70e29e77458d4"},
3165
  {file = "semver-2.13.0.tar.gz", hash = "sha256:fa0fe2722ee1c3f57eac478820c3a5ae2f624af8264cbdf9000c980ff7f75e3f"},
@@ -3168,6 +3305,10 @@ send2trash = [
3168
  {file = "Send2Trash-1.8.0-py3-none-any.whl", hash = "sha256:f20eaadfdb517eaca5ce077640cb261c7d2698385a6a0f072a4a5447fd49fa08"},
3169
  {file = "Send2Trash-1.8.0.tar.gz", hash = "sha256:d2c24762fd3759860a0aff155e45871447ea58d2be6bdd39b5c8f966a0c99c2d"},
3170
  ]
 
 
 
 
3171
  sentencepiece = [
3172
  {file = "sentencepiece-0.1.96-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc969e6694fb27fba7cee2953f350804faf03913f25ae1ee713a7b8a1bc08018"},
3173
  {file = "sentencepiece-0.1.96-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:36e9ff61e7b67c5b7ee96733613622620b4802fc8cf188a4dbc1f355b03dde02"},
@@ -3218,6 +3359,10 @@ set-loglevel = [
3218
  {file = "set_loglevel-0.1.2-py3-none-any.whl", hash = "sha256:fcfe76ccd3791511e2cb51ec11ded6cc2186a727e441a934c547a211d0fdf773"},
3219
  {file = "set_loglevel-0.1.2.tar.gz", hash = "sha256:4da23414a798cea918801b113e17af092f13ccda96345203b14042b6d6d3a896"},
3220
  ]
 
 
 
 
3221
  simplejson = [
3222
  {file = "simplejson-3.17.6-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a89acae02b2975b1f8e4974cb8cdf9bf9f6c91162fb8dec50c259ce700f2770a"},
3223
  {file = "simplejson-3.17.6-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:82ff356ff91be0ab2293fc6d8d262451eb6ac4fd999244c4b5f863e049ba219c"},
@@ -3461,6 +3606,18 @@ urllib3 = [
3461
  validators = [
3462
  {file = "validators-0.20.0.tar.gz", hash = "sha256:24148ce4e64100a2d5e267233e23e7afeb55316b47d30faae7eb6e7292bc226a"},
3463
  ]
 
 
 
 
 
 
 
 
 
 
 
 
3464
  watchdog = [
3465
  {file = "watchdog-2.1.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:676263bee67b165f16b05abc52acc7a94feac5b5ab2449b491f1a97638a79277"},
3466
  {file = "watchdog-2.1.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:aa68d2d9a89d686fae99d28a6edf3b18595e78f5adf4f5c18fbfda549ac0f20c"},
 
375
  [package.extras]
376
  devel = ["colorama", "jsonschema", "json-spec", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"]
377
 
378
+ [[package]]
379
+ name = "fastlid"
380
+ version = "0.1.7"
381
+ description = "Detect languages via a fasttext model"
382
+ category = "main"
383
+ optional = false
384
+ python-versions = ">=3.6,<4.0"
385
+
386
+ [package.dependencies]
387
+ fasttext = ">=0.9.2,<0.10.0"
388
+ logzero = ">=1.7.0,<2.0.0"
389
+ numpy = ">=1.20.3,<2.0.0"
390
+
391
+ [[package]]
392
+ name = "fasttext"
393
+ version = "0.9.2"
394
+ description = "fasttext Python bindings"
395
+ category = "main"
396
+ optional = false
397
+ python-versions = "*"
398
+
399
+ [package.dependencies]
400
+ numpy = "*"
401
+ pybind11 = ">=2.2"
402
+
403
  [[package]]
404
  name = "filelock"
405
  version = "3.7.1"
 
919
  optional = false
920
  python-versions = ">=3.5"
921
 
922
+ [[package]]
923
+ name = "morfessor"
924
+ version = "2.0.6"
925
+ description = "Morfessor"
926
+ category = "main"
927
+ optional = false
928
+ python-versions = "*"
929
+
930
+ [package.extras]
931
+ docs = ["sphinx", "sphinxcontrib-napoleon"]
932
+
933
  [[package]]
934
  name = "nbclient"
935
  version = "0.6.4"
 
1272
  [package.dependencies]
1273
  numpy = ">=1.16.6"
1274
 
1275
+ [[package]]
1276
+ name = "pybind11"
1277
+ version = "2.9.2"
1278
+ description = "Seamless operability between C++11 and Python"
1279
+ category = "main"
1280
+ optional = false
1281
+ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
1282
+
1283
+ [package.extras]
1284
+ global = ["pybind11-global (==2.9.2)"]
1285
+
1286
  [[package]]
1287
  name = "pycodestyle"
1288
  version = "2.7.0"
 
1584
  [package.dependencies]
1585
  numpy = ">=1.16.5"
1586
 
1587
+ [[package]]
1588
+ name = "seg-text"
1589
+ version = "0.1.2"
1590
+ description = "pack_name descr "
1591
+ category = "main"
1592
+ optional = false
1593
+ python-versions = ">=3.8,<4.0"
1594
+
1595
+ [package.dependencies]
1596
+ fastlid = ">=0.1.7,<0.2.0"
1597
+ icecream = ">=2.1.1,<3.0.0"
1598
+ install = ">=1.3.5,<2.0.0"
1599
+ logzero = ">=1.7.0,<2.0.0"
1600
+ Morfessor = "2.0.6"
1601
+ numpy = ">=1.22.2,<2.0.0"
1602
+ sentence-splitter = ">=1.4,<2.0"
1603
+ tqdm = ">=4.62.3,<5.0.0"
1604
+ vtext = ">=0.2.0,<0.3.0"
1605
+
1606
+ [[package]]
1607
+ name = "semantic-version"
1608
+ version = "2.10.0"
1609
+ description = "A library implementing the 'SemVer' scheme."
1610
+ category = "main"
1611
+ optional = false
1612
+ python-versions = ">=2.7"
1613
+
1614
+ [package.extras]
1615
+ dev = ["Django (>=1.11)", "nose2", "tox", "check-manifest", "coverage", "flake8", "wheel", "zest.releaser", "readme-renderer (<25.0)", "colorama (<=0.4.1)"]
1616
+ doc = ["sphinx", "sphinx-rtd-theme"]
1617
+
1618
  [[package]]
1619
  name = "semver"
1620
  version = "2.13.0"
 
1636
  objc = ["pyobjc-framework-cocoa"]
1637
  win32 = ["pywin32"]
1638
 
1639
+ [[package]]
1640
+ name = "sentence-splitter"
1641
+ version = "1.4"
1642
+ description = "Text to sentence splitter using heuristic algorithm by Philipp Koehn and Josh Schroeder"
1643
+ category = "main"
1644
+ optional = false
1645
+ python-versions = ">=3.5"
1646
+
1647
+ [package.dependencies]
1648
+ regex = ">=2017.12.12"
1649
+
1650
  [[package]]
1651
  name = "sentencepiece"
1652
  version = "0.1.96"
 
1667
  environs = ">=9.5.0,<10.0.0"
1668
  logzero = ">=1.7.0,<2.0.0"
1669
 
1670
+ [[package]]
1671
+ name = "setuptools-rust"
1672
+ version = "1.3.0"
1673
+ description = "Setuptools Rust extension plugin"
1674
+ category = "main"
1675
+ optional = false
1676
+ python-versions = ">=3.7"
1677
+
1678
+ [package.dependencies]
1679
+ semantic-version = ">=2.8.2,<3"
1680
+ typing-extensions = ">=3.7.4.3"
1681
+
1682
  [[package]]
1683
  name = "simplejson"
1684
  version = "3.17.6"
 
2070
  [package.extras]
2071
  test = ["pytest (>=2.2.3)", "flake8 (>=2.4.0)", "isort (>=4.2.2)"]
2072
 
2073
+ [[package]]
2074
+ name = "vtext"
2075
+ version = "0.2.0"
2076
+ description = "Natural Language Processing in Rust with Python bidings"
2077
+ category = "main"
2078
+ optional = false
2079
+ python-versions = ">=3.6"
2080
+
2081
+ [package.dependencies]
2082
+ numpy = ">=1.15.0"
2083
+ scipy = ">=1.1.0"
2084
+ setuptools-rust = ">=0.10.2"
2085
+
2086
  [[package]]
2087
  name = "watchdog"
2088
  version = "2.1.8"
 
2155
  [metadata]
2156
  lock-version = "1.1"
2157
  python-versions = "^3.8.3"
2158
+ content-hash = "bbd84e571c3522fe9890a9ca643114d82a62f2c0669322bfb6b7920612063533"
2159
 
2160
  [metadata.files]
2161
  about-time = [
 
2417
  {file = "fastjsonschema-2.15.3-py3-none-any.whl", hash = "sha256:ddb0b1d8243e6e3abb822bd14e447a89f4ab7439342912d590444831fa00b6a0"},
2418
  {file = "fastjsonschema-2.15.3.tar.gz", hash = "sha256:0a572f0836962d844c1fc435e200b2e4f4677e4e6611a2e3bdd01ba697c275ec"},
2419
  ]
2420
+ fastlid = [
2421
+ {file = "fastlid-0.1.7-py3-none-any.whl", hash = "sha256:591dbee44ac501c9aa89abb97a13b11cf964c3b8c4add1bdf02b44d30463e18f"},
2422
+ {file = "fastlid-0.1.7.tar.gz", hash = "sha256:a6693ea05b9e070b4656ce9320704688c0c0c6f09bf873d0add5184e96bdb055"},
2423
+ ]
2424
+ fasttext = [
2425
+ {file = "fasttext-0.9.2.tar.gz", hash = "sha256:665556f1f6dcb4fcbe25fa8ebcd4f71b18fa96a090de09d88d97a60cbd29dcb5"},
2426
+ ]
2427
  filelock = [
2428
  {file = "filelock-3.7.1-py3-none-any.whl", hash = "sha256:37def7b658813cda163b56fc564cdc75e86d338246458c4c28ae84cabefa2404"},
2429
  {file = "filelock-3.7.1.tar.gz", hash = "sha256:3a0fd85166ad9dbab54c9aec96737b744106dc5f15c0b09a6744a445299fcf04"},
 
2674
  {file = "more-itertools-8.13.0.tar.gz", hash = "sha256:a42901a0a5b169d925f6f217cd5a190e32ef54360905b9c39ee7db5313bfec0f"},
2675
  {file = "more_itertools-8.13.0-py3-none-any.whl", hash = "sha256:c5122bffc5f104d37c1626b8615b511f3427aa5389b94d61e5ef8236bfbc3ddb"},
2676
  ]
2677
+ morfessor = [
2678
+ {file = "Morfessor-2.0.6-py3-none-any.whl", hash = "sha256:7215e37909ebd2bafeeec5fdf4e339a25e61aee4895ff99317b9fb44eddab562"},
2679
+ {file = "Morfessor-2.0.6.tar.gz", hash = "sha256:bb3beac234341724c5f640f65803071f62373a50dba854d5a398567f9aefbab2"},
2680
+ ]
2681
  nbclient = [
2682
  {file = "nbclient-0.6.4-py3-none-any.whl", hash = "sha256:f251bba200a2b401a061dfd700a7a70b5772f664fb49d4a2d3e5536ec0e98c76"},
2683
  {file = "nbclient-0.6.4.tar.gz", hash = "sha256:cdef7757cead1735d2c70cc66095b072dced8a1e6d1c7639ef90cd3e04a11f2e"},
 
2956
  {file = "pyarrow-8.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:cb06cacc19f3b426681f2f6803cc06ff481e7fe5b3a533b406bc5b2138843d4f"},
2957
  {file = "pyarrow-8.0.0.tar.gz", hash = "sha256:4a18a211ed888f1ac0b0ebcb99e2d9a3e913a481120ee9b1fe33d3fedb945d4e"},
2958
  ]
2959
+ pybind11 = [
2960
+ {file = "pybind11-2.9.2-py2.py3-none-any.whl", hash = "sha256:20f56674da31c96bca7569b91e60f2bd30d693f0728541412ec927574f7bc9df"},
2961
+ {file = "pybind11-2.9.2.tar.gz", hash = "sha256:e5541f8bccf9111d1a94f7897593b55c4cf1a28d5e8cfc8225a855651f011071"},
2962
+ ]
2963
  pycodestyle = [
2964
  {file = "pycodestyle-2.7.0-py2.py3-none-any.whl", hash = "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068"},
2965
  {file = "pycodestyle-2.7.0.tar.gz", hash = "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef"},
 
3289
  {file = "scipy-1.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:a5193a098ae9f29af283dcf0041f762601faf2e595c0db1da929875b7570353f"},
3290
  {file = "scipy-1.6.1.tar.gz", hash = "sha256:c4fceb864890b6168e79b0e714c585dbe2fd4222768ee90bc1aa0f8218691b11"},
3291
  ]
3292
+ seg-text = [
3293
+ {file = "seg_text-0.1.2-py3-none-any.whl", hash = "sha256:9e67af219b81259d916a11708799ef52bb7d765f9d8010028dd8d48e053eda17"},
3294
+ {file = "seg_text-0.1.2.tar.gz", hash = "sha256:37332d6fa755659aba3d93faa1248242c56a5a84e1f9332990802226ad9c4ca8"},
3295
+ ]
3296
+ semantic-version = [
3297
+ {file = "semantic_version-2.10.0-py2.py3-none-any.whl", hash = "sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177"},
3298
+ {file = "semantic_version-2.10.0.tar.gz", hash = "sha256:bdabb6d336998cbb378d4b9db3a4b56a1e3235701dc05ea2690d9a997ed5041c"},
3299
+ ]
3300
  semver = [
3301
  {file = "semver-2.13.0-py2.py3-none-any.whl", hash = "sha256:ced8b23dceb22134307c1b8abfa523da14198793d9787ac838e70e29e77458d4"},
3302
  {file = "semver-2.13.0.tar.gz", hash = "sha256:fa0fe2722ee1c3f57eac478820c3a5ae2f624af8264cbdf9000c980ff7f75e3f"},
 
3305
  {file = "Send2Trash-1.8.0-py3-none-any.whl", hash = "sha256:f20eaadfdb517eaca5ce077640cb261c7d2698385a6a0f072a4a5447fd49fa08"},
3306
  {file = "Send2Trash-1.8.0.tar.gz", hash = "sha256:d2c24762fd3759860a0aff155e45871447ea58d2be6bdd39b5c8f966a0c99c2d"},
3307
  ]
3308
+ sentence-splitter = [
3309
+ {file = "sentence_splitter-1.4-py2.py3-none-any.whl", hash = "sha256:5645a3ad9c348e4287f4bc73bd573d92dccd4139042fddd51fff0591f1376763"},
3310
+ {file = "sentence_splitter-1.4.tar.gz", hash = "sha256:3d1d773d07cc733ca2955aa87d0fa1c0a7274c6bdeec1daac5c5e92efb512f63"},
3311
+ ]
3312
  sentencepiece = [
3313
  {file = "sentencepiece-0.1.96-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc969e6694fb27fba7cee2953f350804faf03913f25ae1ee713a7b8a1bc08018"},
3314
  {file = "sentencepiece-0.1.96-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:36e9ff61e7b67c5b7ee96733613622620b4802fc8cf188a4dbc1f355b03dde02"},
 
3359
  {file = "set_loglevel-0.1.2-py3-none-any.whl", hash = "sha256:fcfe76ccd3791511e2cb51ec11ded6cc2186a727e441a934c547a211d0fdf773"},
3360
  {file = "set_loglevel-0.1.2.tar.gz", hash = "sha256:4da23414a798cea918801b113e17af092f13ccda96345203b14042b6d6d3a896"},
3361
  ]
3362
+ setuptools-rust = [
3363
+ {file = "setuptools-rust-1.3.0.tar.gz", hash = "sha256:958c5bf4ab6483d59dab888538121871cc5006354a42fb0fbd50acf03caad1de"},
3364
+ {file = "setuptools_rust-1.3.0-py3-none-any.whl", hash = "sha256:7ead7398d6b6fe70a7743408dc2f7257dbcb8ca9b2d7a9f8b281c09bd86f36a5"},
3365
+ ]
3366
  simplejson = [
3367
  {file = "simplejson-3.17.6-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a89acae02b2975b1f8e4974cb8cdf9bf9f6c91162fb8dec50c259ce700f2770a"},
3368
  {file = "simplejson-3.17.6-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:82ff356ff91be0ab2293fc6d8d262451eb6ac4fd999244c4b5f863e049ba219c"},
 
3606
  validators = [
3607
  {file = "validators-0.20.0.tar.gz", hash = "sha256:24148ce4e64100a2d5e267233e23e7afeb55316b47d30faae7eb6e7292bc226a"},
3608
  ]
3609
+ vtext = [
3610
+ {file = "vtext-0.2.0-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:1791aad4a999525a7c19ae25ffdeb491839e81e958995567151a3bf8012c32ff"},
3611
+ {file = "vtext-0.2.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:7ddde99b3153f7bf439b06f69f221c59945b1ce103368ce3a4957e7112ab904b"},
3612
+ {file = "vtext-0.2.0-cp36-cp36m-win_amd64.whl", hash = "sha256:c54d2b4496afa0d8687345b2b89bed7e9aa03b223f0dc58ac923348d0f879a2c"},
3613
+ {file = "vtext-0.2.0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:397823cda22d04de43312e27cbe74be4318c20ec2ef38df9c66493580be06ec8"},
3614
+ {file = "vtext-0.2.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:be3d75845af06d92af9fb65dde8c37ea890f8ed00bb236884fe3b8e2c4b08e32"},
3615
+ {file = "vtext-0.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1fa5b18b31637ce012fdfddb1c6a207989320bcf246d5f131695c9fc92b2a32c"},
3616
+ {file = "vtext-0.2.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:c7a7826a44b81e9d1779bc800a5ee133647c7943c52b434ae8415df18933f77f"},
3617
+ {file = "vtext-0.2.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:eb37f4b72cf754ff20323f11519da9d3864c7f0a428be847da2ed55a3665cc44"},
3618
+ {file = "vtext-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:02c1dbefd2b6fd3522a96a9bd8f8e85ae4722ee088e2d952bbec830b0e88727c"},
3619
+ {file = "vtext-0.2.0.tar.gz", hash = "sha256:0ce1b0bb7e1cc0adcf5c8064757adaa6ea7bf52e366a3d30d2eac0588145f0e6"},
3620
+ ]
3621
  watchdog = [
3622
  {file = "watchdog-2.1.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:676263bee67b165f16b05abc52acc7a94feac5b5ab2449b491f1a97638a79277"},
3623
  {file = "watchdog-2.1.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:aa68d2d9a89d686fae99d28a6edf3b18595e78f5adf4f5c18fbfda549ac0f20c"},
pyproject.toml CHANGED
@@ -1,7 +1,7 @@
1
  [tool.poetry]
2
- name = "mlbee"
3
- version = "0.1.0-alpha.1"
4
- description = "st-mlbee"
5
  authors = ["ffreemt"]
6
  license = "MIT"
7
  readme = "README.md"
@@ -30,6 +30,7 @@ cchardet = "^2.1.7"
30
  streamlit-aggrid = "^0.2.3"
31
  typer = "^0.4.1"
32
  XlsxWriter = "^3.0.3"
 
33
 
34
  [tool.poe.executor]
35
  type = "poetry"
@@ -47,7 +48,7 @@ release = ["test", "build", "publish"]
47
  lint = { cmd = "pylint st_mlbee" }
48
  isort = "isort tests st_mlbee"
49
  black = "black tests st_mlbee"
50
- formt = ["isort", "black"]
51
  docstyle = "pydocstyle --convention=google tests st_mlbee"
52
  prerelease = {cmd = "poetry version prerelease && sync-version"}
53
  tunnel = {cmd = "ssh -CN ip_or_hostname_defined_in_hosts -L 9091:127.0.0.1:9091"}
 
1
  [tool.poetry]
2
+ name = "st_mlbee"
3
+ version = "0.1.0-alpha.2"
4
+ description = "st-mlbee, mlbee powered by streamlit"
5
  authors = ["ffreemt"]
6
  license = "MIT"
7
  readme = "README.md"
 
30
  streamlit-aggrid = "^0.2.3"
31
  typer = "^0.4.1"
32
  XlsxWriter = "^3.0.3"
33
+ seg-text = "^0.1.2"
34
 
35
  [tool.poe.executor]
36
  type = "poetry"
 
48
  lint = { cmd = "pylint st_mlbee" }
49
  isort = "isort tests st_mlbee"
50
  black = "black tests st_mlbee"
51
+ format = ["isort", "black"]
52
  docstyle = "pydocstyle --convention=google tests st_mlbee"
53
  prerelease = {cmd = "poetry version prerelease && sync-version"}
54
  tunnel = {cmd = "ssh -CN ip_or_hostname_defined_in_hosts -L 9091:127.0.0.1:9091"}
pyrightconfig.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "include": ["tests", "mlbee"],
3
  "venvPath": ".venv",
4
  "reportTypeshedErrors": false,
5
  "reportMissingImports": true,
 
1
  {
2
+ "include": ["tests", "st_mlbee"],
3
  "venvPath": ".venv",
4
  "reportTypeshedErrors": false,
5
  "reportMissingImports": true,
requirements.txt CHANGED
@@ -21,7 +21,7 @@ chardet==4.0.0; python_version >= "2.7" and python_full_version < "3.0.0" or pyt
21
  charset-normalizer==2.0.12; python_full_version >= "3.6.0" and python_version >= "3.6"
22
  click==8.1.3; python_full_version >= "3.8.3" and python_version < "4.0" and python_version >= "3.7" and python_full_version < "4.0.0"
23
  cmat2aset==0.1.0a7; python_full_version >= "3.8.3" and python_version < "4.0"
24
- colorama==0.4.4; python_full_version >= "3.8.3" and sys_platform == "win32" and platform_system == "Windows" and python_version >= "3.7" and python_full_version < "4.0.0" and python_version < "4.0" and (python_version >= "3.7" and python_full_version < "3.0.0" and platform_system == "Windows" or platform_system == "Windows" and python_version >= "3.7" and python_full_version >= "3.5.0") and (python_version >= "3.8" and python_full_version < "3.0.0" and sys_platform == "win32" or sys_platform == "win32" and python_version >= "3.8" and python_full_version >= "3.5.0")
25
  commonmark==0.9.1; python_full_version >= "3.6.3" and python_full_version < "4.0.0" and python_version >= "3.6"
26
  cssselect==1.1.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
27
  debugpy==1.6.0; python_version >= "3.7"
@@ -31,6 +31,8 @@ entrypoints==0.4; python_version >= "3.7"
31
  environs==9.5.0; python_full_version >= "3.8.3" and python_full_version < "4.0.0" and python_version >= "3.6"
32
  executing==0.8.3; python_full_version >= "3.8.3" and python_version < "4.0" and python_version >= "3.8"
33
  fastjsonschema==2.15.3; python_version >= "3.7"
 
 
34
  filelock==3.7.1; python_full_version >= "3.8.3" and python_full_version < "4.0.0" and python_version >= "3.7"
35
  gitdb==4.0.9; python_version >= "3.7"
36
  gitpython==3.1.27; python_full_version >= "3.7.1" and python_version < "4.0" and python_version >= "3.7"
@@ -66,6 +68,7 @@ marshmallow==3.16.0; python_full_version >= "3.8.3" and python_full_version < "4
66
  matplotlib-inline==0.1.3; python_version >= "3.8"
67
  mistune==0.8.4; python_version >= "3.7"
68
  more-itertools==8.13.0; python_version >= "3.5"
 
69
  nbclient==0.6.4; python_full_version >= "3.7.0" and python_version >= "3.7"
70
  nbconvert==6.5.0; python_version >= "3.7"
71
  nbformat==5.4.0; python_full_version >= "3.7.0" and python_version >= "3.7"
@@ -89,6 +92,7 @@ ptyprocess==0.7.0; sys_platform != "win32" and python_version >= "3.8" and os_na
89
  pure-eval==0.2.2; python_version >= "3.8"
90
  py==1.11.0; python_version >= "3.7" and python_full_version < "3.0.0" and implementation_name == "pypy" or implementation_name == "pypy" and python_version >= "3.7" and python_full_version >= "3.5.0"
91
  pyarrow==8.0.0; python_full_version >= "3.7.1" and python_version < "4.0" and python_version >= "3.7"
 
92
  pycparser==2.21; python_version >= "3.7" and python_full_version < "3.0.0" and implementation_name == "pypy" or implementation_name == "pypy" and python_version >= "3.7" and python_full_version >= "3.4.0"
93
  pydeck==0.7.1; python_full_version >= "3.7.1" and python_version < "4.0" and python_version >= "3.7"
94
  pygments==2.12.0; python_full_version >= "3.8.3" and python_version < "4.0" and python_version >= "3.8" and python_full_version < "4.0.0"
@@ -105,16 +109,20 @@ pywinpty==2.0.5; os_name == "nt" and python_version >= "3.7"
105
  pyyaml==6.0; python_full_version >= "3.8.3" and python_full_version < "4.0.0" and python_version >= "3.6"
106
  pyzmq==23.1.0; python_version >= "3.7"
107
  readability-lxml==0.8.1
108
- regex==2022.6.2; python_full_version >= "3.8.3" and python_full_version < "4.0.0" and python_version >= "3.7"
109
  requests==2.27.1; python_full_version >= "3.8.3" and python_version >= "3.6" and python_version < "4.0" and python_full_version < "4.0.0"
110
  rfc3986==1.5.0; python_version >= "3.7"
111
  rich==12.4.4; python_full_version >= "3.7.1" and python_full_version < "4.0.0" and python_version >= "3.6" and python_version < "4.0"
112
  scikit-learn==1.1.1; python_full_version >= "3.8.3" and python_full_version < "4.0.0" and python_version >= "3.8" and python_version < "4.0"
113
  scipy==1.6.1; python_full_version >= "3.8.3" and python_full_version < "4.0.0" and python_version >= "3.8" and python_version < "4.0"
 
 
114
  semver==2.13.0; python_full_version >= "3.7.1" and python_version >= "3.6" and python_version < "4.0"
115
  send2trash==1.8.0; python_version >= "3.7"
 
116
  sentencepiece==0.1.96; python_full_version >= "3.8.3" and python_full_version < "4.0.0"
117
  set-loglevel==0.1.2; python_full_version >= "3.8.3" and python_full_version < "4.0.0"
 
118
  simplejson==3.17.6; python_full_version >= "3.7.1" and python_version < "4.0"
119
  six==1.16.0; python_full_version >= "3.8.3" and python_version >= "3.8" and python_version < "4.0" and (python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.7")
120
  sklearn==0.0; python_full_version >= "3.8.3" and python_version < "4.0"
@@ -136,11 +144,12 @@ tqdm==4.64.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (py
136
  traitlets==5.2.2.post1; python_full_version >= "3.7.0" and python_version >= "3.8"
137
  transformers==4.19.3; python_full_version >= "3.8.3" and python_full_version < "4.0.0"
138
  typer==0.4.1; python_version >= "3.6"
139
- typing-extensions==4.2.0; python_full_version >= "3.8.3" and python_full_version < "4.0.0" and python_version >= "3.7" and python_version < "3.9"
140
  tzdata==2022.1; platform_system == "Windows" and python_version >= "3.6" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6")
141
  tzlocal==4.2; python_full_version >= "3.7.1" and python_version < "4.0" and python_version >= "3.6"
142
  urllib3==1.26.9; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.6"
143
  validators==0.20.0; python_full_version >= "3.7.1" and python_version < "4.0" and python_version >= "3.6"
 
144
  watchdog==2.1.8; platform_system != "Darwin" and python_version >= "3.6" and python_full_version >= "3.7.1" and python_version < "4.0"
145
  wcwidth==0.2.5; python_full_version >= "3.6.2" and python_version >= "3.8"
146
  webencodings==0.5.1; python_version >= "3.7"
@@ -148,4 +157,3 @@ widgetsnbextension==3.6.0; python_version >= "3.7"
148
  win32-setctime==1.1.0; sys_platform == "win32" and python_version >= "3.5"
149
  xlsxwriter==3.0.3; python_version >= "3.4"
150
  zipp==3.8.0; python_version < "3.9" and python_version >= "3.7"
151
- sentence-transformers
 
21
  charset-normalizer==2.0.12; python_full_version >= "3.6.0" and python_version >= "3.6"
22
  click==8.1.3; python_full_version >= "3.8.3" and python_version < "4.0" and python_version >= "3.7" and python_full_version < "4.0.0"
23
  cmat2aset==0.1.0a7; python_full_version >= "3.8.3" and python_version < "4.0"
24
+ colorama==0.4.4; python_full_version >= "3.8.3" and sys_platform == "win32" and platform_system == "Windows" and python_version >= "3.8" and python_full_version < "4.0.0" and python_version < "4.0" and (python_version >= "3.7" and python_full_version < "3.0.0" and platform_system == "Windows" or platform_system == "Windows" and python_version >= "3.7" and python_full_version >= "3.5.0") and (python_version >= "3.8" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.8" and python_version < "4.0" and python_full_version >= "3.4.0") and (python_version >= "3.8" and python_full_version < "3.0.0" and sys_platform == "win32" or sys_platform == "win32" and python_version >= "3.8" and python_full_version >= "3.5.0")
25
  commonmark==0.9.1; python_full_version >= "3.6.3" and python_full_version < "4.0.0" and python_version >= "3.6"
26
  cssselect==1.1.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
27
  debugpy==1.6.0; python_version >= "3.7"
 
31
  environs==9.5.0; python_full_version >= "3.8.3" and python_full_version < "4.0.0" and python_version >= "3.6"
32
  executing==0.8.3; python_full_version >= "3.8.3" and python_version < "4.0" and python_version >= "3.8"
33
  fastjsonschema==2.15.3; python_version >= "3.7"
34
+ fastlid==0.1.7; python_version >= "3.8" and python_version < "4.0"
35
+ fasttext==0.9.2; python_version >= "3.8" and python_version < "4.0"
36
  filelock==3.7.1; python_full_version >= "3.8.3" and python_full_version < "4.0.0" and python_version >= "3.7"
37
  gitdb==4.0.9; python_version >= "3.7"
38
  gitpython==3.1.27; python_full_version >= "3.7.1" and python_version < "4.0" and python_version >= "3.7"
 
68
  matplotlib-inline==0.1.3; python_version >= "3.8"
69
  mistune==0.8.4; python_version >= "3.7"
70
  more-itertools==8.13.0; python_version >= "3.5"
71
+ morfessor==2.0.6; python_version >= "3.8" and python_version < "4.0"
72
  nbclient==0.6.4; python_full_version >= "3.7.0" and python_version >= "3.7"
73
  nbconvert==6.5.0; python_version >= "3.7"
74
  nbformat==5.4.0; python_full_version >= "3.7.0" and python_version >= "3.7"
 
92
  pure-eval==0.2.2; python_version >= "3.8"
93
  py==1.11.0; python_version >= "3.7" and python_full_version < "3.0.0" and implementation_name == "pypy" or implementation_name == "pypy" and python_version >= "3.7" and python_full_version >= "3.5.0"
94
  pyarrow==8.0.0; python_full_version >= "3.7.1" and python_version < "4.0" and python_version >= "3.7"
95
+ pybind11==2.9.2; python_version >= "3.8" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.8" and python_version < "4.0" and python_full_version >= "3.5.0"
96
  pycparser==2.21; python_version >= "3.7" and python_full_version < "3.0.0" and implementation_name == "pypy" or implementation_name == "pypy" and python_version >= "3.7" and python_full_version >= "3.4.0"
97
  pydeck==0.7.1; python_full_version >= "3.7.1" and python_version < "4.0" and python_version >= "3.7"
98
  pygments==2.12.0; python_full_version >= "3.8.3" and python_version < "4.0" and python_version >= "3.8" and python_full_version < "4.0.0"
 
109
  pyyaml==6.0; python_full_version >= "3.8.3" and python_full_version < "4.0.0" and python_version >= "3.6"
110
  pyzmq==23.1.0; python_version >= "3.7"
111
  readability-lxml==0.8.1
112
+ regex==2022.6.2; python_full_version >= "3.8.3" and python_full_version < "4.0.0" and python_version >= "3.8" and python_version < "4.0"
113
  requests==2.27.1; python_full_version >= "3.8.3" and python_version >= "3.6" and python_version < "4.0" and python_full_version < "4.0.0"
114
  rfc3986==1.5.0; python_version >= "3.7"
115
  rich==12.4.4; python_full_version >= "3.7.1" and python_full_version < "4.0.0" and python_version >= "3.6" and python_version < "4.0"
116
  scikit-learn==1.1.1; python_full_version >= "3.8.3" and python_full_version < "4.0.0" and python_version >= "3.8" and python_version < "4.0"
117
  scipy==1.6.1; python_full_version >= "3.8.3" and python_full_version < "4.0.0" and python_version >= "3.8" and python_version < "4.0"
118
+ seg-text==0.1.2; python_version >= "3.8" and python_version < "4.0"
119
+ semantic-version==2.10.0; python_version >= "3.8" and python_version < "4.0"
120
  semver==2.13.0; python_full_version >= "3.7.1" and python_version >= "3.6" and python_version < "4.0"
121
  send2trash==1.8.0; python_version >= "3.7"
122
+ sentence-splitter==1.4; python_version >= "3.8" and python_version < "4.0"
123
  sentencepiece==0.1.96; python_full_version >= "3.8.3" and python_full_version < "4.0.0"
124
  set-loglevel==0.1.2; python_full_version >= "3.8.3" and python_full_version < "4.0.0"
125
+ setuptools-rust==1.3.0; python_version >= "3.8" and python_version < "4.0"
126
  simplejson==3.17.6; python_full_version >= "3.7.1" and python_version < "4.0"
127
  six==1.16.0; python_full_version >= "3.8.3" and python_version >= "3.8" and python_version < "4.0" and (python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.7")
128
  sklearn==0.0; python_full_version >= "3.8.3" and python_version < "4.0"
 
144
  traitlets==5.2.2.post1; python_full_version >= "3.7.0" and python_version >= "3.8"
145
  transformers==4.19.3; python_full_version >= "3.8.3" and python_full_version < "4.0.0"
146
  typer==0.4.1; python_version >= "3.6"
147
+ typing-extensions==4.2.0; python_full_version >= "3.8.3" and python_full_version < "4.0.0" and python_version >= "3.8" and python_version < "3.9"
148
  tzdata==2022.1; platform_system == "Windows" and python_version >= "3.6" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6")
149
  tzlocal==4.2; python_full_version >= "3.7.1" and python_version < "4.0" and python_version >= "3.6"
150
  urllib3==1.26.9; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.6"
151
  validators==0.20.0; python_full_version >= "3.7.1" and python_version < "4.0" and python_version >= "3.6"
152
+ vtext==0.2.0; python_version >= "3.8" and python_version < "4.0"
153
  watchdog==2.1.8; platform_system != "Darwin" and python_version >= "3.6" and python_full_version >= "3.7.1" and python_version < "4.0"
154
  wcwidth==0.2.5; python_full_version >= "3.6.2" and python_version >= "3.8"
155
  webencodings==0.5.1; python_version >= "3.7"
 
157
  win32-setctime==1.1.0; sys_platform == "win32" and python_version >= "3.5"
158
  xlsxwriter==3.0.3; python_version >= "3.4"
159
  zipp==3.8.0; python_version < "3.9" and python_version >= "3.7"
 
run-nodemon-streamlit-run-app_mlbee.sh CHANGED
@@ -1 +1 @@
1
- nodemon -w app_mlbee.py -x python -m streamlit run app_mlbee.py
 
1
+ nodemon -w app_mlbee.py -w st_mlbee -x python -m streamlit run app_mlbee.py
st_mlbee/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
  """Init."""
2
- __version__ = "0.1.0a1"
3
  from .st_mlbee import st_mlbee
4
 
5
  __all__ = ("st_mlbee",)
 
1
  """Init."""
2
+ __version__ = "0.1.0a2"
3
  from .st_mlbee import st_mlbee
4
 
5
  __all__ = ("st_mlbee",)
st_mlbee/__main__.py CHANGED
@@ -8,14 +8,14 @@ import typer
8
  from logzero import logger
9
  from set_loglevel import set_loglevel
10
 
11
- from mlbee import __version__, mlbee
12
 
13
  logzero.loglevel(set_loglevel())
14
 
15
  app = typer.Typer(
16
- name="mlbee",
17
  add_completion=False,
18
- help="mlbee help",
19
  )
20
 
21
 
@@ -38,7 +38,6 @@ def main(
38
  ),
39
  ):
40
  """Define."""
41
- ...
42
 
43
 
44
  if __name__ == "__main__":
 
8
  from logzero import logger
9
  from set_loglevel import set_loglevel
10
 
11
+ from st_mlbee import __version__, st_mlbee
12
 
13
  logzero.loglevel(set_loglevel())
14
 
15
  app = typer.Typer(
16
+ name="st-mlbee",
17
  add_completion=False,
18
+ help="st-mlbee help",
19
  )
20
 
21
 
 
38
  ),
39
  ):
40
  """Define."""
 
41
 
42
 
43
  if __name__ == "__main__":
st_mlbee/fetch_upload.py CHANGED
@@ -1,4 +1,5 @@
1
  """Fetch upload and convert to list1/list2."""
 
2
  import streamlit as st
3
  from logzero import logger
4
  from streamlit import session_state as state
 
1
  """Fetch upload and convert to list1/list2."""
2
+ # pylint: disable=too-many-locals, too-many-branches, too-many-statements
3
  import streamlit as st
4
  from logzero import logger
5
  from streamlit import session_state as state
st_mlbee/fetch_urls.py CHANGED
@@ -1,11 +1,11 @@
1
  """Fetch text from urls and convert to state.ns.list1/list2."""
2
- # pylint: disable=invalid-name
3
  import streamlit as st
4
  from icecream import ic
5
  from logzero import logger
6
  from streamlit import session_state as state
7
 
8
- from mlbee.url2txt import url2txt
9
 
10
  ic.configureOutput(
11
  includeContext=True,
@@ -18,7 +18,8 @@ def fetch_urls():
18
  beetype = state.ns.beetype
19
  sourcecount = state.ns.sourcecount
20
  value = ""
21
- if beetype == "ezbee" or beetype == "mlbee":
 
22
  url1 = (
23
  "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/test_en.txt"
24
  )
@@ -26,7 +27,7 @@ def fetch_urls():
26
  "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/test_zh.txt"
27
  )
28
  value = f"{url1} {url2}"
29
- if beetype == "dzbee":
30
  url1 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-de.txt"
31
  url2 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-zh.txt"
32
  value = f"{url1} {url2}"
 
1
  """Fetch text from urls and convert to state.ns.list1/list2."""
2
+ # pylint: disable=invalid-name, too-many-statements
3
  import streamlit as st
4
  from icecream import ic
5
  from logzero import logger
6
  from streamlit import session_state as state
7
 
8
+ from st_mlbee.url2txt import url2txt
9
 
10
  ic.configureOutput(
11
  includeContext=True,
 
18
  beetype = state.ns.beetype
19
  sourcecount = state.ns.sourcecount
20
  value = ""
21
+ # if beetype == "ezbee" or beetype == "mlbee":
22
+ if beetype in ["ezbee", "mlbee"]:
23
  url1 = (
24
  "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/test_en.txt"
25
  )
 
27
  "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/test_zh.txt"
28
  )
29
  value = f"{url1} {url2}"
30
+ if beetype in ["dzbee"]:
31
  url1 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-de.txt"
32
  url2 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-zh.txt"
33
  value = f"{url1} {url2}"
st_mlbee/home.py CHANGED
@@ -3,7 +3,7 @@
3
  org ezbee_page.py.
4
  """
5
  # pylint: disable=invalid-name
6
- # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches, too-many-statements
7
  import base64
8
  import platform
9
  import inspect
@@ -26,18 +26,20 @@ from aset2pairs import aset2pairs
26
  from icecream import ic
27
  from loguru import logger as loggu
28
  from logzero import logger
 
29
  from set_loglevel import set_loglevel
30
  from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
31
 
32
  # from st_aggrid.grid_options_builder import GridOptionsBuilder
33
  from streamlit import session_state as state
34
 
35
- from mlbee.color_map import color_map
36
- from mlbee.fetch_paste import fetch_paste
37
- from mlbee.fetch_upload import fetch_upload
38
- from mlbee.fetch_urls import fetch_urls
39
- # from mlbee.t2s import t2s
40
- from mlbee import mlbee
 
41
 
42
 
43
  def home(): # noqa
@@ -75,6 +77,19 @@ def home(): # noqa
75
  st.warning(f"{state.ns.sourcetype}: Not implemented")
76
  return None
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  logger.debug("state.ns.updated: %s", state.ns.updated)
79
 
80
  # if not updated, quit: this does not quite work
@@ -132,6 +147,12 @@ def home(): # noqa
132
  time_max /= 12
133
  time_av /= 12
134
 
 
 
 
 
 
 
135
  # time0 = len12 * 0.4
136
  # time1 = len12 * 1
137
  # eta = pendulum.now() + pendulum.duration(seconds=len12 * 0.66)
@@ -145,14 +166,16 @@ def home(): # noqa
145
  dt_str = eta.to_datetime_string()
146
  timezone_name = eta.timezone_name
147
  _ = (
148
- f"Running in {uname.node} -- "
149
- f"Estimated time to complete: {in_words0} to {in_words1}; "
150
- f"ETA: {diff_for_humans} ({dt_str} {timezone_name}) "
 
151
  )
 
 
152
 
153
  # only show this for upload
154
  if state.ns.sourcetype in ["upload"]:
155
- st.info(_)
156
  _ = st.expander("to be aligned", expanded=False)
157
  with _:
158
  st.write(df)
@@ -162,9 +185,11 @@ def home(): # noqa
162
  # if state.ns.beetype in ["ezbee", "dzbee", "debee"]:
163
  if state.ns.beetype in ["mlbee"]:
164
  with about_time() as t:
165
- with st.spinner(" diggin..."):
 
166
  try:
167
- aset = globals()[state.ns.beetype](
 
168
  list1,
169
  list2,
170
  # eps=eps,
@@ -218,48 +243,49 @@ def home(): # noqa
218
  # st.markdown(df_a.astype(str).to_markdown())
219
  # st.markdown(df_a.astype(str).to_numpy().tolist())
220
 
221
- # insert seq no
222
- df_a.insert(0, "sn", range(len(df_a)))
223
-
224
- gb = GridOptionsBuilder.from_dataframe(df_a)
225
- gb.configure_pagination(paginationAutoPageSize=True)
226
- options = {
227
- "resizable": True,
228
- "autoHeight": True,
229
- "wrapText": True,
230
- "editable": True,
231
- }
232
- gb.configure_default_column(**options)
233
- gridOptions = gb.build()
234
-
235
- # st.write("editable aligned (double-click a cell to edit, drag column header to adjust widths)")
236
- _ = "editable aligned (double-click a cell to edit, drag column header to adjust widths)"
237
- with st.expander(_, expanded=False):
238
- ag_df = AgGrid(
239
- # df,
240
- df_a,
241
- gridOptions=gridOptions,
242
- key="outside",
243
- reload_data=True,
244
- editable=True,
245
- # width="100%", # width parameter is deprecated
246
- height=750,
247
- # fit_columns_on_grid_load=True,
248
- update_mode=GridUpdateMode.MODEL_CHANGED,
249
- )
250
-
251
- # ### prep download
 
 
 
252
 
253
  # taken from vizbee cb_save_xlsx
254
  # subset = list(df_a.columns[2:3]) # 3rd col
255
  subset = list(df_a.columns[2:]) # 3rd col
256
-
257
- # pop("sn"): remove sn column
258
- df_a.pop("sn")
259
  s_df = df_a.astype(str).style.applymap(color_map, subset=subset)
260
 
261
  if set_loglevel() <= 10:
262
  logger.debug(" showing styled aligned")
 
263
  with st.expander("styled aligned"):
264
  # st.dataframe(s_df) # can't handle styleddf
265
  st.table(s_df)
@@ -279,7 +305,12 @@ def home(): # noqa
279
  if state.ns.src_filename:
280
  filename = f"{state.ns.src_filename}-"
281
 
282
- dl_xlsx = f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="{filename}aligned_paras.xlsx">Download aligned paras xlsx</a>'
 
 
 
 
 
283
 
284
  _ = """
285
  output = io.BytesIO()
 
3
  org ezbee_page.py.
4
  """
5
  # pylint: disable=invalid-name
6
+ # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches, too-many-statements, abstract-class-instantiated
7
  import base64
8
  import platform
9
  import inspect
 
26
  from icecream import ic
27
  from loguru import logger as loggu
28
  from logzero import logger
29
+ from seg_text import seg_text
30
  from set_loglevel import set_loglevel
31
  from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
32
 
33
  # from st_aggrid.grid_options_builder import GridOptionsBuilder
34
  from streamlit import session_state as state
35
 
36
+ from st_mlbee.color_map import color_map
37
+ from st_mlbee.fetch_paste import fetch_paste
38
+ from st_mlbee.fetch_upload import fetch_upload
39
+ from st_mlbee.fetch_urls import fetch_urls
40
+
41
+ # from st_mlbee.t2s import t2s
42
+ from st_mlbee import st_mlbee
43
 
44
 
45
  def home(): # noqa
 
77
  st.warning(f"{state.ns.sourcetype}: Not implemented")
78
  return None
79
 
80
+ # state.ns.list1 state.ns.list2 defiend in fetch_x
81
+ if state.ns.sentali: # split to sents
82
+ try:
83
+ state.ns.list1 = seg_text(state.ns.list1)
84
+ except Exception as exc:
85
+ logger.exception(exc)
86
+ raise
87
+ try:
88
+ state.ns.list2 = seg_text(state.ns.list2)
89
+ except Exception as exc:
90
+ logger.exception(exc)
91
+ raise
92
+
93
  logger.debug("state.ns.updated: %s", state.ns.updated)
94
 
95
  # if not updated, quit: this does not quite work
 
147
  time_max /= 12
148
  time_av /= 12
149
 
150
+ # reduce for sent align
151
+ if state.ns.sentali:
152
+ time_min /= 1.4
153
+ time_max /= 1.4
154
+ time_av /= 1.4
155
+
156
  # time0 = len12 * 0.4
157
  # time1 = len12 * 1
158
  # eta = pendulum.now() + pendulum.duration(seconds=len12 * 0.66)
 
166
  dt_str = eta.to_datetime_string()
167
  timezone_name = eta.timezone_name
168
  _ = (
169
+ f"running in {uname.node} -- "
170
+ f" processing {len1} + {len2} = {len12} blocks; "
171
+ f"estimated time to complete: {in_words0} to {in_words1}; "
172
+ f"eta: {diff_for_humans} ({dt_str} {timezone_name}) "
173
  )
174
+ eta_msg = _
175
+ # st.info(_)
176
 
177
  # only show this for upload
178
  if state.ns.sourcetype in ["upload"]:
 
179
  _ = st.expander("to be aligned", expanded=False)
180
  with _:
181
  st.write(df)
 
185
  # if state.ns.beetype in ["ezbee", "dzbee", "debee"]:
186
  if state.ns.beetype in ["mlbee"]:
187
  with about_time() as t:
188
+ # diggin...
189
+ with st.spinner(f"{eta_msg}"):
190
  try:
191
+ # aset = globals()[state.ns.beetype](
192
+ aset = st_mlbee(
193
  list1,
194
  list2,
195
  # eps=eps,
 
243
  # st.markdown(df_a.astype(str).to_markdown())
244
  # st.markdown(df_a.astype(str).to_numpy().tolist())
245
 
246
+ # insert seq no
247
+ df_a.insert(0, "sn", range(len(df_a)))
248
+
249
+ gb = GridOptionsBuilder.from_dataframe(df_a)
250
+ gb.configure_pagination(paginationAutoPageSize=True)
251
+ options = {
252
+ "resizable": True,
253
+ "autoHeight": True,
254
+ "wrapText": True,
255
+ "editable": True,
256
+ }
257
+ gb.configure_default_column(**options)
258
+ gridOptions = gb.build()
259
+
260
+ # st.write("editable aligned (double-click a cell to edit, drag column header to adjust widths)")
261
+ _ = "editable aligned (double-click a cell to edit, drag column header to adjust widths)"
262
+ with st.expander(_, expanded=False):
263
+ ag_df = AgGrid(
264
+ # df,
265
+ df_a,
266
+ gridOptions=gridOptions,
267
+ key="outside",
268
+ reload_data=True,
269
+ editable=True,
270
+ # width="100%", # width parameter is deprecated
271
+ height=750,
272
+ # fit_columns_on_grid_load=True,
273
+ update_mode=GridUpdateMode.MODEL_CHANGED,
274
+ )
275
+
276
+ # pop("sn"): remove sn column
277
+ df_a.pop("sn")
278
+
279
+ # ### prep download ### #
280
 
281
  # taken from vizbee cb_save_xlsx
282
  # subset = list(df_a.columns[2:3]) # 3rd col
283
  subset = list(df_a.columns[2:]) # 3rd col
 
 
 
284
  s_df = df_a.astype(str).style.applymap(color_map, subset=subset)
285
 
286
  if set_loglevel() <= 10:
287
  logger.debug(" showing styled aligned")
288
+
289
  with st.expander("styled aligned"):
290
  # st.dataframe(s_df) # can't handle styleddf
291
  st.table(s_df)
 
305
  if state.ns.src_filename:
306
  filename = f"{state.ns.src_filename}-"
307
 
308
+ if state.ns.sentali:
309
+ extra = "aligned_sents"
310
+ else:
311
+ extra = "aligned_paras"
312
+
313
+ dl_xlsx = f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="{filename}-{extra}.xlsx">Download aligned paras xlsx</a>'
314
 
315
  _ = """
316
  output = io.BytesIO()
st_mlbee/info.py CHANGED
@@ -1,16 +1,16 @@
1
- """Present info about mlbee."""
2
  from textwrap import dedent
3
 
4
  import streamlit as st
5
 
6
- from mlbee import __version__
7
 
8
- from mlbee.utils import msg
9
 
10
 
11
  def info():
12
  """Prep info page."""
13
 
14
- st.subheader(f"mlbee {__version__}")
15
 
16
  st.markdown(msg, unsafe_allow_html=True)
 
1
+ """Present info about st-mlbee."""
2
  from textwrap import dedent
3
 
4
  import streamlit as st
5
 
6
+ from st_mlbee import __version__
7
 
8
+ from st_mlbee.utils import msg
9
 
10
 
11
  def info():
12
  """Prep info page."""
13
 
14
+ st.subheader(f"st-mlbee {__version__}")
15
 
16
  st.markdown(msg, unsafe_allow_html=True)
st_mlbee/multipage.py CHANGED
@@ -31,13 +31,14 @@ class Multipage:
31
  """Dropdown to select the page to run."""
32
  # Dropdown to select the page to run
33
  st.markdown(
 
34
  """
35
  <style>
36
  section[data-testid="stSidebar"] > div:first-of-type {
37
  background-color: var(--secondary-background-color);
38
  background: var(--secondary-background-color);
39
- width: 274px;
40
- padding: 2rem 0;
41
  box-shadow: -2rem 0px 2rem 2rem rgba(0,0,0,0.16);
42
  }
43
  section[aria-expanded="true"] > div:nth-of-type(2) {
 
31
  """Dropdown to select the page to run."""
32
  # Dropdown to select the page to run
33
  st.markdown(
34
+ # 285/280/275/273 OK 270/272 NOK for Config
35
  """
36
  <style>
37
  section[data-testid="stSidebar"] > div:first-of-type {
38
  background-color: var(--secondary-background-color);
39
  background: var(--secondary-background-color);
40
+ width: 273px;
41
+ padding: 1rem 0;
42
  box-shadow: -2rem 0px 2rem 2rem rgba(0,0,0,0.16);
43
  }
44
  section[aria-expanded="true"] > div:nth-of-type(2) {
st_mlbee/settings.py CHANGED
@@ -61,7 +61,7 @@ def settings():
61
  logger.error("sourcecount index error: %s, setting to 0", e)
62
  index = 0
63
  sourcecount = st.radio(
64
- "Source Count",
65
  sourcecount_list,
66
  index=index,
67
  format_func=lambda x: f"{x:<3} |",
@@ -78,12 +78,12 @@ def settings():
78
  logger.error("sentali sindex error: %s, setting to 0", e)
79
  index = 0
80
  sentali = st.radio(
81
- "Split to Sents",
82
  sentali_list,
83
  index=index,
84
- format_func=lambda x: f"{str(x):<4} |",
85
- help="None: leave it as it is; yes: attempt to split to sents in a sensible manner.",
86
- disabled=True,
87
  )
88
  state.ns.sentali = sentali
89
 
 
61
  logger.error("sourcecount index error: %s, setting to 0", e)
62
  index = 0
63
  sourcecount = st.radio(
64
+ "Source count",
65
  sourcecount_list,
66
  index=index,
67
  format_func=lambda x: f"{x:<3} |",
 
78
  logger.error("sentali sindex error: %s, setting to 0", e)
79
  index = 0
80
  sentali = st.radio(
81
+ "Split to sents",
82
  sentali_list,
83
  index=index,
84
+ format_func=lambda x: f'{str(x) if x else "no":<4}|',
85
+ help="no: leave it as it is; yes: attempt to split to sents in a sensible manner.",
86
+ # disabled=True,
87
  )
88
  state.ns.sentali = sentali
89
 
st_mlbee/url2txt.py CHANGED
@@ -1,4 +1,5 @@
1
  """Fetch text from url."""
 
2
  from typing import Optional
3
  from urllib.parse import urlparse
4
 
 
1
  """Fetch text from url."""
2
+ # pylint: disable=too-many-branches
3
  from typing import Optional
4
  from urllib.parse import urlparse
5
 
st_mlbee/utils.py CHANGED
@@ -1,4 +1,5 @@
1
  """Prep front cover for sidebar (based on st-bumblebee-st_app.py)."""
 
2
  import base64
3
  from io import BytesIO
4
  from textwrap import dedent
@@ -9,7 +10,7 @@ import streamlit as st
9
  from logzero import logger
10
  from set_loglevel import set_loglevel
11
 
12
- from mlbee import __version__
13
 
14
  logzero.loglevel(set_loglevel())
15
 
@@ -36,6 +37,7 @@ msg = dedent(
36
  sents, paras of docus).
37
  Extremely long blocks will likely have a negative impact
38
  on aligning.
 
39
  """
40
  ).strip()
41
 
 
1
  """Prep front cover for sidebar (based on st-bumblebee-st_app.py)."""
2
+ # pylint: disable=abstract-class-instantiated
3
  import base64
4
  from io import BytesIO
5
  from textwrap import dedent
 
10
  from logzero import logger
11
  from set_loglevel import set_loglevel
12
 
13
+ from st_mlbee import __version__
14
 
15
  logzero.loglevel(set_loglevel())
16
 
 
37
  sents, paras of docus).
38
  Extremely long blocks will likely have a negative impact
39
  on aligning.
40
+ On a powerful computer such as an instance on huggingface spaces, the running time can be reduced by a factor of 10-20.
41
  """
42
  ).strip()
43