dragonSwing
commited on
Commit
·
949e803
1
Parent(s):
4747a28
Update README
Browse files
README.md
CHANGED
@@ -5,8 +5,7 @@ tags:
|
|
5 |
- capitalization
|
6 |
- punctuation
|
7 |
- token-classification
|
8 |
-
|
9 |
-
license: mit
|
10 |
datasets:
|
11 |
- oscar-corpus/OSCAR-2109
|
12 |
metrics:
|
@@ -32,19 +31,16 @@ import os
|
|
32 |
import shutil
|
33 |
import sys
|
34 |
from huggingface_hub import snapshot_download
|
35 |
-
|
36 |
cache_dir = "./capu"
|
37 |
def download_files(repo_id, cache_dir=None, ignore_regex=None):
|
38 |
download_dir = snapshot_download(repo_id=repo_id, cache_dir=cache_dir, ignore_regex=ignore_regex)
|
39 |
if cache_dir is None or download_dir == cache_dir:
|
40 |
return download_dir
|
41 |
-
|
42 |
file_names = os.listdir(download_dir)
|
43 |
for file_name in file_names:
|
44 |
shutil.move(os.path.join(download_dir, file_name), cache_dir)
|
45 |
os.rmdir(download_dir)
|
46 |
return cache_dir
|
47 |
-
|
48 |
download_files(repo_id="dragonSwing/vibert-capu", cache_dir=cache_dir, ignore_regex=["*.json", "*.bin"])
|
49 |
sys.path.append(cache_dir)
|
50 |
```
|
@@ -66,14 +62,14 @@ model("theo đó thủ tướng dự kiến tiếp bộ trưởng nông nghiệp
|
|
66 |
-----------------------------------------------
|
67 |
## 📡 Training data
|
68 |
Here is the number of product reviews we used for fine-tuning the model:
|
69 |
-
| Language | Number of text samples|
|
70 |
| -------- | ----------------- |
|
71 |
-
| Vietnamese | 5,600,000
|
72 |
-----------------------------------------------
|
73 |
## 🎯 Accuracy
|
74 |
Below is a breakdown of the performance of the model by each label on 120,000 held-out text samples:
|
75 |
-
| label | precision | recall | f1-score | support|
|
76 |
-
| --------- |
|
77 |
| **Upper** | 0.88 | 0.89 | 0.89 | 56497 |
|
78 |
| **Complex-Upper** | 0.92 | 0.83 | 0.88 | 480 |
|
79 |
| **.** | 0.81 | 0.82 | 0.82 | 18139 |
|
|
|
5 |
- capitalization
|
6 |
- punctuation
|
7 |
- token-classification
|
8 |
+
license: cc-by-sa-4.0
|
|
|
9 |
datasets:
|
10 |
- oscar-corpus/OSCAR-2109
|
11 |
metrics:
|
|
|
31 |
import shutil
|
32 |
import sys
|
33 |
from huggingface_hub import snapshot_download
|
|
|
34 |
cache_dir = "./capu"
|
35 |
def download_files(repo_id, cache_dir=None, ignore_regex=None):
|
36 |
download_dir = snapshot_download(repo_id=repo_id, cache_dir=cache_dir, ignore_regex=ignore_regex)
|
37 |
if cache_dir is None or download_dir == cache_dir:
|
38 |
return download_dir
|
|
|
39 |
file_names = os.listdir(download_dir)
|
40 |
for file_name in file_names:
|
41 |
shutil.move(os.path.join(download_dir, file_name), cache_dir)
|
42 |
os.rmdir(download_dir)
|
43 |
return cache_dir
|
|
|
44 |
download_files(repo_id="dragonSwing/vibert-capu", cache_dir=cache_dir, ignore_regex=["*.json", "*.bin"])
|
45 |
sys.path.append(cache_dir)
|
46 |
```
|
|
|
62 |
-----------------------------------------------
|
63 |
## 📡 Training data
|
64 |
Here is the number of product reviews we used for fine-tuning the model:
|
65 |
+
| Language | Number of text samples |
|
66 |
| -------- | ----------------- |
|
67 |
+
| Vietnamese | 5,600,000 |
|
68 |
-----------------------------------------------
|
69 |
## 🎯 Accuracy
|
70 |
Below is a breakdown of the performance of the model by each label on 120,000 held-out text samples:
|
71 |
+
| label | precision | recall | f1-score | support |
|
72 |
+
| --------- | ------------- | -------- | ---------- | -------- |
|
73 |
| **Upper** | 0.88 | 0.89 | 0.89 | 56497 |
|
74 |
| **Complex-Upper** | 0.92 | 0.83 | 0.88 | 480 |
|
75 |
| **.** | 0.81 | 0.82 | 0.82 | 18139 |
|