File size: 654 Bytes
861182c
 
 
 
e6ad839
 
861182c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from pathlib import Path
from typing import TypedDict, Union, TypeAlias, Tuple

# Constants
PROJECT_NAME = 'hydra-classifier'

DATA_DIR = Path('./data')
RAW_DATA_DIR = DATA_DIR / 'raw'
PROCESSED_DATA_DIR = DATA_DIR / 'processed'
METADATA_FILEPATH = DATA_DIR / 'metadata.csv'

BATCH_SIZE = 8
EPOCHS = 1
BERT_BASE = 'bert-base-uncased'
MAX_SEQUENCE_LENGHT = 512
MODEL_DIR = Path('./model')

# Types
FilePath: TypeAlias = Union[str, Path]


class PageMetadata(TypedDict):
    page_number: int
    file_relpath: FilePath
    width: int
    height: int
    label: str


ImageSize: TypeAlias = Tuple[int, int]
ImageInputShape: TypeAlias = Tuple[int, int, int]