amd-shark
/

sdxl-quant-int8

Inference Endpoints

Model card Files Files and versions Community

sdxl-quant-int8 / test_quant_linear.py

nickfraser's picture

Update quant params structure (#2)

6b62ce4 verified 4 months ago

history blame contribute delete

1.04 kB

	import torch
	import torch.nn as nn
	from math_model import QuantLinear

	torch.manual_seed(0)

	batch_size = 1
	out_ch = 128
	in_ch = 64

	i = 2*torch.rand((batch_size,in_ch)) - 1.
	l = nn.Linear(in_ch, out_ch, bias=True)

	quant_params = {
	'smoothquant_mul': torch.rand((in_ch,)),
	'smoothquant_mul_shape': (1,in_ch),
	'weight_scale': torch.max(torch.abs(l.weight), dim=1).values / 128.,
	'weight_scale_shape': (out_ch,1),
	'weight_zp': torch.clamp(torch.round((torch.mean((l.weight), dim=1)) * (128 / torch.max(torch.abs(l.weight), dim=1).values)), -128, 127),
	'weight_zp_shape': (out_ch,1),
	'weight_zp_dtype': 'torch.int8',
	'input_scale': torch.max(torch.abs(i)) / 128.,
	'input_scale_shape': tuple(),
	'input_zp': torch.zeros((1,)),
	'input_zp_shape': tuple(),
	'input_zp_dtype': 'torch.int8',
	}

	print(quant_params)

	ql = QuantLinear(in_ch, out_ch, quant_params)
	ql.linear.load_state_dict(l.state_dict())
	o_qdq = ql(i)
	o_qop = ql(i, qop=True)
	print(o_qdq.shape)
	print(o_qop.shape)
	print(o_qdq - o_qop)