a100 commited on
Commit
661399b
1 Parent(s): eb558b7

feat: added vectors w/ description

Browse files
.gitattributes CHANGED
@@ -1,6 +1,7 @@
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
  *.ftz filter=lfs diff=lfs merge=lfs -text
6
  *.gz filter=lfs diff=lfs merge=lfs -text
@@ -9,14 +10,10 @@
9
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
  *.model filter=lfs diff=lfs merge=lfs -text
11
  *.msgpack filter=lfs diff=lfs merge=lfs -text
12
- *.npy filter=lfs diff=lfs merge=lfs -text
13
- *.npz filter=lfs diff=lfs merge=lfs -text
14
  *.onnx filter=lfs diff=lfs merge=lfs -text
15
  *.ot filter=lfs diff=lfs merge=lfs -text
16
  *.parquet filter=lfs diff=lfs merge=lfs -text
17
  *.pb filter=lfs diff=lfs merge=lfs -text
18
- *.pickle filter=lfs diff=lfs merge=lfs -text
19
- *.pkl filter=lfs diff=lfs merge=lfs -text
20
  *.pt filter=lfs diff=lfs merge=lfs -text
21
  *.pth filter=lfs diff=lfs merge=lfs -text
22
  *.rar filter=lfs diff=lfs merge=lfs -text
@@ -29,3 +26,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zstandard filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
  *.bz2 filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
 
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
  *.model filter=lfs diff=lfs merge=lfs -text
12
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
13
  *.onnx filter=lfs diff=lfs merge=lfs -text
14
  *.ot filter=lfs diff=lfs merge=lfs -text
15
  *.parquet filter=lfs diff=lfs merge=lfs -text
16
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
17
  *.pt filter=lfs diff=lfs merge=lfs -text
18
  *.pth filter=lfs diff=lfs merge=lfs -text
19
  *.rar filter=lfs diff=lfs merge=lfs -text
 
26
  *.zip filter=lfs diff=lfs merge=lfs -text
27
  *.zstandard filter=lfs diff=lfs merge=lfs -text
28
  *tfevents* filter=lfs diff=lfs merge=lfs -text
29
+ *.bin filter=lfs diff=lfs merge=lfs -text
30
+ *.floret filter=lfs diff=lfs merge=lfs -text
31
+ *.vec filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,47 @@
1
  ---
 
 
 
 
 
 
 
 
2
  license: cc-by-sa-4.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ tags:
3
+ - spacy
4
+ - floret
5
+ - fasttext
6
+ - feature-extraction
7
+ - token-classification
8
+ language:
9
+ - hu
10
  license: cc-by-sa-4.0
11
+ model-index:
12
+ - name: hu_vectors_web_md
13
+ results:
14
+ - task:
15
+ name: Analogical questions
16
+ type: token-classification
17
+ metrics:
18
+ - name: Accuracy
19
+ type: accuracy
20
+ value: 0.1094
21
+ - name: MRR
22
+ type: mrr
23
+ value: 0.2107
24
+
25
  ---
26
+ Hungarian word vectors for HuSpaCy.
27
+
28
+ The model is trained on the Hungarian Webcorpus 2.0 using floret with the following hyperparameters: `floret cbow -dim 100 -mode floret -bucket 200000 -minn 4 -maxn 6 -minCount 100 -neg 10 -hashCount 2 -lr 0.1 -thread 30 -epoch 5`
29
+
30
+ Vectors are published in fasttext and floret format.
31
+
32
+ | Feature | Description |
33
+ | --- | --- |
34
+ | **Name** | `hu_vectors_web_lg` |
35
+ | **Version** | `1.0` |
36
+ | **Vectors** | 200000 keys (300 dimensions) |
37
+ | **Sources** | [Hungarian Webcorpus 2.0](https://hlt.bme.hu/en/resources/webcorpus2) (Dávid Márk Nemeskey (SZTAKI-HLT)) |
38
+ | **License** | `cc-by-sa-4.0` |
39
+ | **Author** | [SzegedAI, MILAB](https://github.com/huspacy/huspacy) |
40
+
41
+
42
+ ### Accuracy
43
+
44
+ | Type | Score |
45
+ | --- | --- |
46
+ | `ACC` | 10.10 |
47
+ | `MRR` | 0.1772 |
floret/floret_vectors.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4db158aaf6311ececb55120a6db6715bff747d2763602859104030252ccb6fa
3
+ size 1308880517
floret/floret_vectors.floret ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de67b4b0955ce922c0eec1f89eeeb481bae1b980103b7b3acab4c8af55a6f3f3
3
+ size 150872955
floret/floret_vectors.vec ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c36a7373ef3baef1983e02caffff655215b4d2db05347b0933e3fd4f4ce083cc
3
+ size 1181547298