import os import msgpack import numpy as np from scipy.sparse import vstack, save_npz, load_npz class File: def __init__(self, path): self.path = path def exists(self): return os.path.exists(self.path) def save(self, data): raise NotImplementedError def load(self): raise NotImplementedError def get_file_name(self) -> str: return 'file' def get_extension_file(self) -> str: raise '' class MsgpackFile(File): def save(self, data): with open(self.path, "wb") as file: packed = msgpack.packb(data, use_bin_type=True) file.write(packed) def load(self): with open(self.path, "rb") as file: byte_data = file.read() return msgpack.unpackb(byte_data, raw=False) def get_file_name(self) -> str: return 'texts' def get_extension_file(self) -> str: return 'msgpack' class DenseArrayNumpyFile(File): def save(self, data): np.savez_compressed(self.path, *data) def load(self): return list(np.load(self.path).values()) def get_file_name(self) -> str: return 'dense' def get_extension_file(self) -> str: return 'npz' class SparseArrayScipyFile(File): def save(self, sparse_matrices): combined_sparse_matrix = vstack(sparse_matrices) save_npz(self.path, combined_sparse_matrix) def load(self): return load_npz(self.path) def get_file_name(self) -> str: return 'sparse' def get_extension_file(self) -> str: return 'npz'