File size: 1,589 Bytes
243b1c6
 
 
 
 
b3cff30
 
 
 
 
 
 
 
 
 
 
 
 
b4c8256
 
 
 
 
 
b3cff30
 
 
 
 
 
 
 
 
 
 
b4c8256
 
 
 
 
 
b3cff30
 
 
 
 
 
 
b4c8256
 
 
 
 
 
b3cff30
 
 
 
 
 
b4c8256
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os
import msgpack
import numpy as np
from scipy.sparse import vstack, save_npz, load_npz

class File:
    def __init__(self, path):
        self.path = path

    def exists(self):
        return os.path.exists(self.path)

    def save(self, data):
        raise NotImplementedError

    def load(self):
        raise NotImplementedError

    def get_file_name(self) -> str:
        return 'file'

    def get_extension_file(self) -> str:
        raise ''

class MsgpackFile(File):
    def save(self, data):
        with open(self.path, "wb") as file:
            packed = msgpack.packb(data, use_bin_type=True)
            file.write(packed)

    def load(self):
        with open(self.path, "rb") as file:
            byte_data = file.read()
        return msgpack.unpackb(byte_data, raw=False)

    def get_file_name(self) -> str:
        return 'texts'

    def get_extension_file(self) -> str:
        return 'msgpack'

class DenseArrayNumpyFile(File):
    def save(self, data):
        np.savez_compressed(self.path, *data)

    def load(self):
        return list(np.load(self.path).values())

    def get_file_name(self) -> str:
        return 'dense'

    def get_extension_file(self) -> str:
        return 'npz'

class SparseArrayScipyFile(File):
    def save(self, sparse_matrices):
        combined_sparse_matrix = vstack(sparse_matrices)
        save_npz(self.path, combined_sparse_matrix)

    def load(self):
        return load_npz(self.path)

    def get_file_name(self) -> str:
        return 'sparse'

    def get_extension_file(self) -> str:
        return 'npz'