import json

import torch
import torch.nn as nn

from vita.model.vita_tts.decoder.ticodec.models import Encoder
from vita.model.vita_tts.decoder.ticodec.models import Generator
from vita.model.vita_tts.decoder.ticodec.models import Quantizer

class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self

class VQVAE(nn.Module):
    def __init__(self,
                 config_path,
                 ckpt_path,
                 with_encoder=False):
        super(VQVAE, self).__init__()
        ckpt = torch.load(ckpt_path)
        with open(config_path) as f:
            data = f.read()
        json_config = json.loads(data)
        self.h = AttrDict(json_config)
        # self.gst = GST()
        # self.gst = Proposed(n_specs=128, token_num=10, E=128, n_layers=4)
        self.quantizer = Quantizer(self.h)
        self.generator = Generator(self.h)
        self.generator.load_state_dict(ckpt['generator'])
        self.quantizer.load_state_dict(ckpt['quantizer'])
        # self.gst.load_state_dict(ckpt['gst'])
        if with_encoder:
            self.encoder = Encoder(self.h)
            self.encoder.load_state_dict(ckpt['encoder'])

    def forward(self, x, global_style_token):
        # x is the codebook
        # x.shape (B, T, Nq)
        quant_emb = self.quantizer.embed(x)
        global_style_quantized_emb = self.quantizer.embed_gst(global_style_token).squeeze(-1)
        return self.generator(quant_emb, global_style_quantized_emb)

    def encode(self, x):
        batch_size = x.size(0)
        if len(x.shape) == 3 and x.shape[-1] == 1:
            x = x.squeeze(-1)
        # print(x.shape)

        c, global_features = self.encoder(x.unsqueeze(1))
        # mid = mid.transpose(1, 2).unsqueeze(1)
        # global_style = self.gst(mid)
        q, loss_q, local_token, g, global_style_token = self.quantizer(c, global_features)
        local_token = [code.reshape(batch_size, -1) for code in local_token]
        global_style_token = torch.stack(global_style_token, -1).unsqueeze(1)
        # shape: [N, T, 4]
        return torch.stack(local_token, -1), global_style_token