|
import torch |
|
from torch import nn |
|
from abc import ABC, abstractmethod |
|
|
|
from utils.dl.common.model import get_model_device, get_model_latency, get_model_size |
|
from utils.common.log import logger |
|
|
|
|
|
class FM_to_MD_Util(ABC): |
|
""" |
|
Foundation Model (FM) to Master DNN (MD), where MD is a narrower FM (with smaller width but the same depth). |
|
|
|
MD is pre-trained by knowledge distillation; |
|
Moreover, we construct the index relationship between FM and MD in this process, |
|
enabling the lightweight knowledge feedback from MD to FM. |
|
|
|
NOTE: 索引建立在master DNN权重通道和LoRA的AB之间 |
|
""" |
|
|
|
@abstractmethod |
|
def init_md_from_fm_by_reducing_width(self, fm: nn.Module, reducing_width_ratio: int) -> nn.Module: |
|
raise NotImplementedError |
|
|
|
def init_md_from_fm_by_reducing_width_with_perf_test(self, fm: nn.Module, reducing_width_ratio: int, |
|
samples: torch.Tensor) -> nn.Module: |
|
fm_size = get_model_size(fm, True) |
|
fm_latency = get_model_latency(fm, (1, *list(samples.size())[1:]), 20, |
|
get_model_device(fm), 20, False) |
|
|
|
master_dnn = self.init_md_from_fm_by_reducing_width(fm, reducing_width_ratio) |
|
master_dnn_size = get_model_size(master_dnn, True) |
|
logger.debug(f'inited master DNN: {master_dnn}') |
|
|
|
|
|
|
|
|
|
|
|
master_dnn_latency = get_model_latency(master_dnn, (1, *list(samples.size())[1:]), 20, |
|
get_model_device(master_dnn), 20, False) |
|
|
|
logger.info(f'init master DNN (w/o FBS yet) by reducing foundation model\'s width (by {reducing_width_ratio:d}x)') |
|
logger.info(f'foundation model ({fm_size:.3f}MB, {fm_latency:.4f}s/sample) -> ' |
|
f'master DNN ({master_dnn_size:.3f}MB, {master_dnn_latency:.4f}s/sample)\n' |
|
f'(model size: ↓ {(fm_size / master_dnn_size):.2f}x, ' |
|
f'latency: ↓ {(fm_latency / master_dnn_latency):.2f}x)') |
|
|
|
return master_dnn |
|
|