LINC-BIT's picture
Upload 1912 files
b84549f verified
import torch
from torch import nn
from abc import ABC, abstractmethod
from utils.dl.common.model import get_model_device, get_model_latency, get_model_size
from utils.common.log import logger
class FM_to_MD_Util(ABC):
"""
Foundation Model (FM) to Master DNN (MD), where MD is a narrower FM (with smaller width but the same depth).
MD is pre-trained by knowledge distillation;
Moreover, we construct the index relationship between FM and MD in this process,
enabling the lightweight knowledge feedback from MD to FM.
NOTE: 索引建立在master DNN权重通道和LoRA的AB之间
"""
@abstractmethod
def init_md_from_fm_by_reducing_width(self, fm: nn.Module, reducing_width_ratio: int) -> nn.Module:
raise NotImplementedError
def init_md_from_fm_by_reducing_width_with_perf_test(self, fm: nn.Module, reducing_width_ratio: int,
samples: torch.Tensor) -> nn.Module:
fm_size = get_model_size(fm, True)
fm_latency = get_model_latency(fm, (1, *list(samples.size())[1:]), 20,
get_model_device(fm), 20, False)
master_dnn = self.init_md_from_fm_by_reducing_width(fm, reducing_width_ratio)
master_dnn_size = get_model_size(master_dnn, True)
logger.debug(f'inited master DNN: {master_dnn}')
# from utils.dl.common.model import get_module
# print('after generating')
# get_module(fm, 'head').debug()
# get_module(master_dnn, 'head').debug()
# print('test master latency')
master_dnn_latency = get_model_latency(master_dnn, (1, *list(samples.size())[1:]), 20,
get_model_device(master_dnn), 20, False)
logger.info(f'init master DNN (w/o FBS yet) by reducing foundation model\'s width (by {reducing_width_ratio:d}x)')
logger.info(f'foundation model ({fm_size:.3f}MB, {fm_latency:.4f}s/sample) -> '
f'master DNN ({master_dnn_size:.3f}MB, {master_dnn_latency:.4f}s/sample)\n'
f'(model size: ↓ {(fm_size / master_dnn_size):.2f}x, '
f'latency: ↓ {(fm_latency / master_dnn_latency):.2f}x)')
return master_dnn