Stanislav Kalinin
commited on
Commit
•
ce41b2a
1
Parent(s):
2c7c0ba
feat: Update school notebooks models
Browse files- ocr/ocr_config.json +20 -44
- ocr/ocr_model.ckpt +2 -2
- ocr/ocr_model.onnx +3 -0
- pipeline_config.json +10 -12
- segm/segm_config.json +47 -24
- segm/segm_model.ckpt +2 -2
- segm/segm_model.onnx +3 -0
ocr/ocr_config.json
CHANGED
@@ -1,76 +1,52 @@
|
|
1 |
{
|
2 |
"alphabet": " !\"'()*+,-./0123456789:;<=>?ABCDEFGHIJKLMNOPRSTVWY[\\]_abcdefghiklmnoprstuvwxyz|}ЁАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШЩЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё’№",
|
3 |
-
"save_dir": "
|
4 |
-
"num_epochs":
|
5 |
-
"pretrain_path": "",
|
6 |
"image": {
|
7 |
-
"width":
|
8 |
-
"height":
|
9 |
},
|
10 |
"train": {
|
11 |
"datasets": [
|
12 |
{
|
13 |
-
"csv_path": "
|
14 |
-
"prob":
|
15 |
},
|
16 |
{
|
17 |
-
"csv_path": "
|
18 |
-
"prob":
|
19 |
-
},
|
20 |
-
{
|
21 |
-
"csv_path": "/home/jovyan/skalinin/OCR-model/data/segmentation_15102021/train.csv",
|
22 |
-
"prob": 0.45
|
23 |
-
},
|
24 |
-
{
|
25 |
-
"csv_path": "/home/jovyan/skalinin/OCR-model/data/segmentation_26012022/train.csv",
|
26 |
-
"prob": 0.4
|
27 |
}
|
28 |
],
|
29 |
"epoch_size": 100000,
|
30 |
-
"batch_size":
|
31 |
},
|
32 |
"val": {
|
33 |
"datasets": [
|
34 |
{
|
35 |
-
"csv_path": "
|
36 |
-
"prob":
|
37 |
-
},
|
38 |
-
{
|
39 |
-
"csv_path": "/home/jovyan/skalinin/OCR-model/data/segmentation_11032022_ru/val.csv",
|
40 |
-
"prob": 1
|
41 |
},
|
42 |
{
|
43 |
-
"csv_path": "
|
44 |
-
"prob":
|
45 |
-
},
|
46 |
-
{
|
47 |
-
"csv_path": "/home/jovyan/skalinin/OCR-model/data/segmentation_26012022/val.csv",
|
48 |
-
"prob": 1
|
49 |
}
|
50 |
],
|
51 |
"epoch_size": null,
|
52 |
-
"batch_size":
|
53 |
},
|
54 |
"test": {
|
55 |
"datasets": [
|
56 |
{
|
57 |
-
"csv_path": "
|
58 |
-
"prob":
|
59 |
-
},
|
60 |
-
{
|
61 |
-
"csv_path": "/home/jovyan/skalinin/OCR-model/data/segmentation_11032022_ru/test.csv",
|
62 |
-
"prob": 1
|
63 |
-
},
|
64 |
-
{
|
65 |
-
"csv_path": "/home/jovyan/skalinin/OCR-model/data/segmentation_15102021/test.csv",
|
66 |
-
"prob": 1
|
67 |
},
|
68 |
{
|
69 |
-
"csv_path": "
|
70 |
-
"prob":
|
71 |
}
|
72 |
],
|
73 |
"epoch_size": null,
|
74 |
-
"batch_size":
|
75 |
}
|
76 |
}
|
|
|
1 |
{
|
2 |
"alphabet": " !\"'()*+,-./0123456789:;<=>?ABCDEFGHIJKLMNOPRSTVWY[\\]_abcdefghiklmnoprstuvwxyz|}ЁАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШЩЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё’№",
|
3 |
+
"save_dir": "data/experiments/tetradi_64_512_exp2/",
|
4 |
+
"num_epochs": 300,
|
5 |
+
"pretrain_path": "data/pretrain/model-187-0.6814.ckpt",
|
6 |
"image": {
|
7 |
+
"width": 512,
|
8 |
+
"height": 64
|
9 |
},
|
10 |
"train": {
|
11 |
"datasets": [
|
12 |
{
|
13 |
+
"csv_path": "data/school_notebooks_EN/train/train.csv",
|
14 |
+
"prob": 14595
|
15 |
},
|
16 |
{
|
17 |
+
"csv_path": "data/school_notebooks_RU/train/train.csv",
|
18 |
+
"prob": 268350
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
}
|
20 |
],
|
21 |
"epoch_size": 100000,
|
22 |
+
"batch_size": 512
|
23 |
},
|
24 |
"val": {
|
25 |
"datasets": [
|
26 |
{
|
27 |
+
"csv_path": "data/school_notebooks_EN/val/val.csv",
|
28 |
+
"prob": 2129
|
|
|
|
|
|
|
|
|
29 |
},
|
30 |
{
|
31 |
+
"csv_path": "data/school_notebooks_RU/val/val.csv",
|
32 |
+
"prob": 27853
|
|
|
|
|
|
|
|
|
33 |
}
|
34 |
],
|
35 |
"epoch_size": null,
|
36 |
+
"batch_size": 512
|
37 |
},
|
38 |
"test": {
|
39 |
"datasets": [
|
40 |
{
|
41 |
+
"csv_path": "data/school_notebooks_EN/test/test.csv",
|
42 |
+
"prob": 1979
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
},
|
44 |
{
|
45 |
+
"csv_path": "data/school_notebooks_RU/test/test.csv",
|
46 |
+
"prob": 28109
|
47 |
}
|
48 |
],
|
49 |
"epoch_size": null,
|
50 |
+
"batch_size": 512
|
51 |
}
|
52 |
}
|
ocr/ocr_model.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74192c3c23532f8d655c806d09aeeccf88b7e9fb769a21579f724702b99771ec
|
3 |
+
size 50289625
|
ocr/ocr_model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc5ada98e82336c3e229b78aff6758cda6c92e6ca2cae5d9118b3e447994fd6e
|
3 |
+
size 50175647
|
pipeline_config.json
CHANGED
@@ -5,7 +5,9 @@
|
|
5 |
"SegmPrediction": {
|
6 |
"model_path": "segm/segm_model.ckpt",
|
7 |
"config_path": "segm/segm_config.json",
|
8 |
-
"
|
|
|
|
|
9 |
},
|
10 |
"RestoreImageAngle": {
|
11 |
"restoring_class_names": ["text_line"]
|
@@ -15,24 +17,20 @@
|
|
15 |
"model_path": "ocr/ocr_model.ckpt",
|
16 |
"config_path": "ocr/ocr_config.json",
|
17 |
"lm_path": "ocr/kenlm_corpus.arpa",
|
18 |
-
"
|
19 |
-
"
|
|
|
|
|
|
|
20 |
},
|
21 |
"LineFinder": {
|
22 |
"line_classes": ["text_line"],
|
23 |
-
"text_classes": ["
|
24 |
},
|
25 |
"PrepareJSON": {}
|
26 |
},
|
27 |
"classes": {
|
28 |
-
"
|
29 |
-
"contour_posptrocess": {
|
30 |
-
"BboxFromContour": {},
|
31 |
-
"UpscaleBbox": {"upscale_bbox": [1.4, 2.3]},
|
32 |
-
"CropByBbox": {}
|
33 |
-
}
|
34 |
-
},
|
35 |
-
"shrinked_comments": {
|
36 |
"contour_posptrocess": {
|
37 |
"BboxFromContour": {},
|
38 |
"UpscaleBbox": {"upscale_bbox": [1.4, 2.3]},
|
|
|
5 |
"SegmPrediction": {
|
6 |
"model_path": "segm/segm_model.ckpt",
|
7 |
"config_path": "segm/segm_config.json",
|
8 |
+
"num_threads": 8,
|
9 |
+
"device": "cuda",
|
10 |
+
"runtime": "Pytorch"
|
11 |
},
|
12 |
"RestoreImageAngle": {
|
13 |
"restoring_class_names": ["text_line"]
|
|
|
17 |
"model_path": "ocr/ocr_model.ckpt",
|
18 |
"config_path": "ocr/ocr_config.json",
|
19 |
"lm_path": "ocr/kenlm_corpus.arpa",
|
20 |
+
"num_threads": 8,
|
21 |
+
"classes_to_ocr": ["shrinked_text"],
|
22 |
+
"device": "cuda",
|
23 |
+
"batch_size": 128,
|
24 |
+
"runtime": "Pytorch"
|
25 |
},
|
26 |
"LineFinder": {
|
27 |
"line_classes": ["text_line"],
|
28 |
+
"text_classes": ["shrinked_text"]
|
29 |
},
|
30 |
"PrepareJSON": {}
|
31 |
},
|
32 |
"classes": {
|
33 |
+
"shrinked_text": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
"contour_posptrocess": {
|
35 |
"BboxFromContour": {},
|
36 |
"UpscaleBbox": {"upscale_bbox": [1.4, 2.3]},
|
segm/segm_config.json
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
{
|
2 |
-
"save_dir": "/
|
3 |
-
"num_epochs":
|
4 |
-
"pretrain_path": "
|
5 |
"image": {
|
6 |
-
"width":
|
7 |
-
"height":
|
8 |
},
|
9 |
"classes": {
|
10 |
-
"
|
11 |
-
"annotation_classes": ["pupil_text"],
|
12 |
"polygon2mask": {
|
13 |
"ShrinkMaskMaker": {"shrink_ratio": 0.5}
|
14 |
},
|
@@ -17,10 +17,10 @@
|
|
17 |
"min_area": 10
|
18 |
}
|
19 |
},
|
20 |
-
"
|
21 |
-
"annotation_classes": ["
|
22 |
"polygon2mask": {
|
23 |
-
"
|
24 |
},
|
25 |
"postprocess": {
|
26 |
"threshold": 0.8,
|
@@ -30,7 +30,7 @@
|
|
30 |
"text_line": {
|
31 |
"annotation_classes": ["text_line"],
|
32 |
"polygon2mask": {
|
33 |
-
"PolylineToMask": {"thickness": 2
|
34 |
},
|
35 |
"postprocess": {
|
36 |
"threshold": 0.8,
|
@@ -41,32 +41,55 @@
|
|
41 |
"train": {
|
42 |
"datasets": [
|
43 |
{
|
44 |
-
"json_path": "
|
45 |
-
"image_root": "
|
46 |
-
"processed_data_path": "/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
}
|
48 |
],
|
49 |
-
"
|
|
|
50 |
},
|
51 |
"val": {
|
52 |
"datasets": [
|
53 |
{
|
54 |
-
"json_path": "
|
55 |
-
"image_root": "
|
56 |
-
"processed_data_path": "/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
}
|
58 |
],
|
59 |
-
"
|
|
|
60 |
},
|
61 |
"test": {
|
62 |
"datasets": [
|
63 |
{
|
64 |
-
"json_path": "
|
65 |
-
"image_root": "
|
66 |
-
"processed_data_path": "/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
}
|
68 |
],
|
69 |
-
"
|
|
|
70 |
}
|
71 |
}
|
72 |
-
|
|
|
1 |
{
|
2 |
+
"save_dir": "data/experiments/tetradi_with_lines/",
|
3 |
+
"num_epochs": 1500,
|
4 |
+
"pretrain_path": "data/experiments/base_model_all_datasets/model-960-0.1979.ckpt",
|
5 |
"image": {
|
6 |
+
"width": 896,
|
7 |
+
"height": 896
|
8 |
},
|
9 |
"classes": {
|
10 |
+
"shrinked_text": {
|
11 |
+
"annotation_classes": ["pupil_text", "teacher_comment"],
|
12 |
"polygon2mask": {
|
13 |
"ShrinkMaskMaker": {"shrink_ratio": 0.5}
|
14 |
},
|
|
|
17 |
"min_area": 10
|
18 |
}
|
19 |
},
|
20 |
+
"bordered_text": {
|
21 |
+
"annotation_classes": ["pupil_text", "teacher_comment"],
|
22 |
"polygon2mask": {
|
23 |
+
"BorderMaskMaker": {"shrink_ratio": 0.5}
|
24 |
},
|
25 |
"postprocess": {
|
26 |
"threshold": 0.8,
|
|
|
30 |
"text_line": {
|
31 |
"annotation_classes": ["text_line"],
|
32 |
"polygon2mask": {
|
33 |
+
"PolylineToMask": {"thickness": 2}
|
34 |
},
|
35 |
"postprocess": {
|
36 |
"threshold": 0.8,
|
|
|
41 |
"train": {
|
42 |
"datasets": [
|
43 |
{
|
44 |
+
"json_path": "data/segm/school_notebooks_EN/annotations_train.json",
|
45 |
+
"image_root": "data/segm/school_notebooks_EN/images/",
|
46 |
+
"processed_data_path": "data/tetradi_with_lines/school_notebooks_EN/train/annotations_train.csv",
|
47 |
+
"prob": 70
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"json_path": "data/segm/school_notebooks_RU/annotations_train.json",
|
51 |
+
"image_root": "data/segm/school_notebooks_RU/images/",
|
52 |
+
"processed_data_path": "data/tetradi_with_lines/school_notebooks_RU/train/annotations_train.csv",
|
53 |
+
"prob": 1557
|
54 |
}
|
55 |
],
|
56 |
+
"epoch_size": 2000,
|
57 |
+
"batch_size": 20
|
58 |
},
|
59 |
"val": {
|
60 |
"datasets": [
|
61 |
{
|
62 |
+
"json_path": "data/segm/school_notebooks_EN/annotations_val.json",
|
63 |
+
"image_root": "data/segm/school_notebooks_EN/images/",
|
64 |
+
"processed_data_path": "data/tetradi_with_lines/school_notebooks_EN/val/annotations_val.csv",
|
65 |
+
"prob": 10
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"json_path": "data/segm/school_notebooks_RU/annotations_val.json",
|
69 |
+
"image_root": "data/segm/school_notebooks_RU/images/",
|
70 |
+
"processed_data_path": "data/tetradi_with_lines/school_notebooks_RU/val/annotations_val.csv",
|
71 |
+
"prob": 150
|
72 |
}
|
73 |
],
|
74 |
+
"epoch_size": null,
|
75 |
+
"batch_size": 20
|
76 |
},
|
77 |
"test": {
|
78 |
"datasets": [
|
79 |
{
|
80 |
+
"json_path": "data/segm/school_notebooks_EN/annotations_test.json",
|
81 |
+
"image_root": "data/segm/school_notebooks_EN/images/",
|
82 |
+
"processed_data_path": "data/tetradi_with_lines/school_notebooks_EN/test/annotations_test.csv",
|
83 |
+
"prob": 10
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"json_path": "data/segm/school_notebooks_RU/annotations_test.json",
|
87 |
+
"image_root": "data/segm/school_notebooks_RU/images/",
|
88 |
+
"processed_data_path": "data/tetradi_with_lines/school_notebooks_RU/test/annotations_test.csv",
|
89 |
+
"prob": 150
|
90 |
}
|
91 |
],
|
92 |
+
"epoch_size": null,
|
93 |
+
"batch_size": 20
|
94 |
}
|
95 |
}
|
|
segm/segm_model.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12331d25804fd5b291707cbb288d384b81f7a6460e514b8695cbc3264491cde9
|
3 |
+
size 115714321
|
segm/segm_model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:511b05e81ff544d236b7635b8a4367096cb679991c5d922d04c8fa7524f36127
|
3 |
+
size 115229451
|