Spaces:

LINC-BIT
/

EdgeTA

Running

App Files Files Community

EdgeTA / methods /elasticdnn /pipeline /offline /fm_to_md /base.py

LINC-BIT

Upload 1912 files

b84549f verified 11 months ago

raw

history blame contribute delete

2.31 kB

	import torch
	from torch import nn
	from abc import ABC, abstractmethod

	from utils.dl.common.model import get_model_device, get_model_latency, get_model_size
	from utils.common.log import logger


	class FM_to_MD_Util(ABC):
	"""
	Foundation Model (FM) to Master DNN (MD), where MD is a narrower FM (with smaller width but the same depth).

	MD is pre-trained by knowledge distillation;
	Moreover, we construct the index relationship between FM and MD in this process,
	enabling the lightweight knowledge feedback from MD to FM.

	NOTE: 索引建立在master DNN权重通道和LoRA的AB之间
	"""

	@abstractmethod
	def init_md_from_fm_by_reducing_width(self, fm: nn.Module, reducing_width_ratio: int) -> nn.Module:
	raise NotImplementedError

	def init_md_from_fm_by_reducing_width_with_perf_test(self, fm: nn.Module, reducing_width_ratio: int,
	samples: torch.Tensor) -> nn.Module:
	fm_size = get_model_size(fm, True)
	fm_latency = get_model_latency(fm, (1, *list(samples.size())[1:]), 20,
	get_model_device(fm), 20, False)

	master_dnn = self.init_md_from_fm_by_reducing_width(fm, reducing_width_ratio)
	master_dnn_size = get_model_size(master_dnn, True)
	logger.debug(f'inited master DNN: {master_dnn}')
	# from utils.dl.common.model import get_module
	# print('after generating')
	# get_module(fm, 'head').debug()
	# get_module(master_dnn, 'head').debug()
	# print('test master latency')
	master_dnn_latency = get_model_latency(master_dnn, (1, *list(samples.size())[1:]), 20,
	get_model_device(master_dnn), 20, False)

	logger.info(f'init master DNN (w/o FBS yet) by reducing foundation model\'s width (by {reducing_width_ratio:d}x)')
	logger.info(f'foundation model ({fm_size:.3f}MB, {fm_latency:.4f}s/sample) -> '
	f'master DNN ({master_dnn_size:.3f}MB, {master_dnn_latency:.4f}s/sample)\n'
	f'(model size: ↓ {(fm_size / master_dnn_size):.2f}x, '
	f'latency: ↓ {(fm_latency / master_dnn_latency):.2f}x)')

	return master_dnn