import gradio as gr
from typing import Dict, List

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

import json
import pickle
from pathlib import Path

from dataset import SeqClsDataset
from utils import Vocab
from model import SeqClassifier

import ipdb

max_len = 128
hidden_size = 256
num_layers = 2
dropout = 0.1
bidirectional = True
lr = 1e-3
batch_size = 64
num_epoch = 5


TRAIN = "train"
DEV = "eval"
TEST = "test"
SPLITS = [TRAIN, DEV, TEST]

device = "cpu"
data_dir = Path("./data/intent/")
ckpt_dir = Path("./ckpt/intent/")
cache_dir = Path("./cache/intent/")
# Before executing, place intent2idx.json, embeddings.pt, vocab.pkl, and utils.py in /content
with open(cache_dir / "vocab.pkl", "rb") as f:
    vocab: Vocab = pickle.load(f)
intent_idx_path = cache_dir / "intent2idx.json"
intent2idx: Dict[str, int] = json.loads(intent_idx_path.read_text())
data_paths = {split: data_dir / f"{split}.json" for split in SPLITS}
data = {split: json.loads(path.read_text()) for split, path in data_paths.items()}
datasets: Dict[str, SeqClsDataset] = {
    split: SeqClsDataset(split_data, vocab, intent2idx, max_len)
    for split, split_data in data.items()
}
#ipdb.set_trace()
test_loader = DataLoader(datasets['test'], batch_size=batch_size, shuffle=False)
embeddings = torch.load(cache_dir / "embeddings.pt")
embeddings.to(device)

# Load the best model after training
# Initialize a new model with the same architecture
best_model = SeqClassifier(
    embeddings=embeddings,
    hidden_size=hidden_size,
    num_layers=num_layers,
    dropout=dropout,
    bidirectional=bidirectional,
    num_class=len(intent2idx)
).to(device)

# Define the path to the checkpoint file
ckpt_path = ckpt_dir / "model_checkpoint.pth"

# Load the model's state_dict and optimizer's state_dict from the checkpoint
checkpoint = torch.load(ckpt_path, map_location=torch.device('cpu'))

# Load the model's weights
best_model.load_state_dict(checkpoint['model_state_dict']).to(device)

# Reinitialize the optimizer with the model's parameters and load its state
'''weight_decay = 1e-5
optimizer = optim.Adam(best_model.parameters(), lr=lr, weight_decay=weight_decay)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])'''

# Retrieve the epoch number from the checkpoint
epoch = checkpoint['epoch']

# Set the best model to evaluation mode
best_model.eval()


dic_intent2idx: Dict[str, int] = json.loads(intent_idx_path.read_text())
dic_idx2label = {idx: intent for intent, idx in dic_intent2idx.items()}

def Tidx2label(idx: int):
    return dic_idx2label[idx]

with open(cache_dir / "vocab.pkl", "rb") as f:
    vocab: Vocab = pickle.load(f)

# 把句子做成embeddings的索引
def collate_fn(texts: str) -> torch.tensor:
    # 提取所有樣本的文本數據和標籤數據
    texts = texts.split()

    # 使用 vocab 將文本數據轉換為整數索引序列，並指定最大長度
    encoded_texts = vocab.encode_batch([[text for text in texts]], to_len=max_len)

    # 將整數索引序列轉換為 PyTorch 張量
    encoded_text = torch.tensor(encoded_texts)
    return encoded_text


def classify(text):
    encoded_text = collate_fn(text).to(device)
    output = best_model(encoded_text[0])
    Predicted_class = torch.argmax(output).item()
    prediction = Tidx2label(Predicted_class)
    return prediction

demo = gr.Interface(
        fn=classify,
        inputs=gr.Textbox(placeholder="請輸入一段文字..."),
        outputs="label",
        interpretation="default",
        examples=[
            ["Take me to church"],
            ["tell me what to call you"],
            ["could you be a person"]
        ]
    )

demo.launch()