import torch from torch.utils.data import Dataset from torch.nn.utils.rnn import pad_sequence import pickle, pandas as pd class IEMOCAPRobertaCometDataset(Dataset): def __init__(self, split): ''' label index mapping = {'hap':0, 'sad':1, 'neu':2, 'ang':3, 'exc':4, 'fru':5} ''' self.speakers, self.labels, \ self.roberta1, self.roberta2, self.roberta3, self.roberta4,\ self.sentences, self.trainIds, self.testIds, self.validIds \ = pickle.load(open('iemocap/iemocap_features_roberta.pkl', 'rb'), encoding='latin1') self.xIntent, self.xAttr, self.xNeed, self.xWant, self.xEffect, self.xReact, self.oWant, self.oEffect, self.oReact \ = pickle.load(open('iemocap/iemocap_features_comet.pkl', 'rb'), encoding='latin1') if split == 'train': self.keys = [x for x in self.trainIds] elif split == 'test': self.keys = [x for x in self.testIds] elif split == 'valid': self.keys = [x for x in self.validIds] self.len = len(self.keys) def __getitem__(self, index): vid = self.keys[index] return torch.FloatTensor(self.roberta1[vid]),\ torch.FloatTensor(self.roberta2[vid]),\ torch.FloatTensor(self.roberta3[vid]),\ torch.FloatTensor(self.roberta4[vid]),\ torch.FloatTensor(self.xIntent[vid]),\ torch.FloatTensor(self.xAttr[vid]),\ torch.FloatTensor(self.xNeed[vid]),\ torch.FloatTensor(self.xWant[vid]),\ torch.FloatTensor(self.xEffect[vid]),\ torch.FloatTensor(self.xReact[vid]),\ torch.FloatTensor(self.oWant[vid]),\ torch.FloatTensor(self.oEffect[vid]),\ torch.FloatTensor(self.oReact[vid]),\ torch.FloatTensor([[1,0] if x=='M' else [0,1] for x in self.speakers[vid]]),\ torch.FloatTensor([1]*len(self.labels[vid])),\ torch.LongTensor(self.labels[vid]),\ vid def __len__(self): return self.len def collate_fn(self, data): dat = pd.DataFrame(data) return [pad_sequence(dat[i]) if i<14 else pad_sequence(dat[i], True) if i<16 else dat[i].tolist() for i in dat] class MELDRobertaCometDataset(Dataset): def __init__(self, split, classify='emotion'): ''' label index mapping = ''' self.speakers, self.emotion_labels, self.sentiment_labels, \ self.roberta1, self.roberta2, self.roberta3, self.roberta4, \ self.sentences, self.trainIds, self.testIds, self.validIds \ = pickle.load(open('meld/meld_features_roberta.pkl', 'rb'), encoding='latin1') self.xIntent, self.xAttr, self.xNeed, self.xWant, self.xEffect, self.xReact, self.oWant, self.oEffect, self.oReact \ = pickle.load(open('meld/meld_features_comet.pkl', 'rb'), encoding='latin1') if split == 'train': self.keys = [x for x in self.trainIds] elif split == 'test': self.keys = [x for x in self.testIds] elif split == 'valid': self.keys = [x for x in self.validIds] if classify == 'emotion': self.labels = self.emotion_labels else: self.labels = self.sentiment_labels self.len = len(self.keys) def __getitem__(self, index): vid = self.keys[index] return torch.FloatTensor(self.roberta1[vid]),\ torch.FloatTensor(self.roberta2[vid]),\ torch.FloatTensor(self.roberta3[vid]),\ torch.FloatTensor(self.roberta4[vid]),\ torch.FloatTensor(self.xIntent[vid]),\ torch.FloatTensor(self.xAttr[vid]),\ torch.FloatTensor(self.xNeed[vid]),\ torch.FloatTensor(self.xWant[vid]),\ torch.FloatTensor(self.xEffect[vid]),\ torch.FloatTensor(self.xReact[vid]),\ torch.FloatTensor(self.oWant[vid]),\ torch.FloatTensor(self.oEffect[vid]),\ torch.FloatTensor(self.oReact[vid]),\ torch.FloatTensor(self.speakers[vid]),\ torch.FloatTensor([1]*len(self.labels[vid])),\ torch.LongTensor(self.labels[vid]),\ vid def __len__(self): return self.len def collate_fn(self, data): dat = pd.DataFrame(data) return [pad_sequence(dat[i]) if i<14 else pad_sequence(dat[i], True) if i<16 else dat[i].tolist() for i in dat] class RobertaCometDataset(Dataset): def __init__(self, split, path_roberta="epik/epik_features_roberta.pkl", path_comet="epik/epik_features_comet.pkl"): self.speakers, self.labels, \ self.roberta1, self.roberta2, self.roberta3, self.roberta4, \ self.sentences, self.trainIds, self.testIds, self.validIds \ = pickle.load(open(path_roberta, 'rb'), encoding='latin1') self.xIntent, self.xAttr, self.xNeed, self.xWant, self.xEffect, self.xReact, self.oWant, self.oEffect, self.oReact \ = pickle.load(open(path_comet, 'rb'), encoding='latin1') if split == 'train': self.keys = [x for x in self.trainIds] elif split == 'test': self.keys = [x for x in self.testIds] elif split == 'valid': self.keys = [x for x in self.validIds] self.len = len(self.keys) def __getitem__(self, index): vid = self.keys[index] return torch.FloatTensor(self.roberta1[vid]),\ torch.FloatTensor(self.roberta2[vid]),\ torch.FloatTensor(self.roberta3[vid]),\ torch.FloatTensor(self.roberta4[vid]),\ torch.FloatTensor(self.xIntent[vid]),\ torch.FloatTensor(self.xAttr[vid]),\ torch.FloatTensor(self.xNeed[vid]),\ torch.FloatTensor(self.xWant[vid]),\ torch.FloatTensor(self.xEffect[vid]),\ torch.FloatTensor(self.xReact[vid]),\ torch.FloatTensor(self.oWant[vid]),\ torch.FloatTensor(self.oEffect[vid]),\ torch.FloatTensor(self.oReact[vid]),\ torch.FloatTensor([[1,0] if x=='0' else [0,1] for x in self.speakers[vid]]),\ torch.FloatTensor([1]*len(self.labels[vid])),\ torch.LongTensor(self.labels[vid]),\ vid def __len__(self): return self.len def collate_fn(self, data): dat = pd.DataFrame(data) return [pad_sequence(dat[i]) if i<14 else pad_sequence(dat[i], True) if i<16 else dat[i].tolist() for i in dat] class DailyDialogueRobertaCometDataset(Dataset): def __init__(self, split): self.speakers, self.labels, \ self.roberta1, self.roberta2, self.roberta3, self.roberta4, \ self.sentences, self.trainIds, self.testIds, self.validIds \ = pickle.load(open('dailydialog/dailydialog_features_roberta.pkl', 'rb'), encoding='latin1') self.xIntent, self.xAttr, self.xNeed, self.xWant, self.xEffect, self.xReact, self.oWant, self.oEffect, self.oReact \ = pickle.load(open('dailydialog/dailydialog_features_comet.pkl', 'rb'), encoding='latin1') if split == 'train': self.keys = [x for x in self.trainIds] elif split == 'test': self.keys = [x for x in self.testIds] elif split == 'valid': self.keys = [x for x in self.validIds] self.len = len(self.keys) def __getitem__(self, index): vid = self.keys[index] return torch.FloatTensor(self.roberta1[vid]),\ torch.FloatTensor(self.roberta2[vid]),\ torch.FloatTensor(self.roberta3[vid]),\ torch.FloatTensor(self.roberta4[vid]),\ torch.FloatTensor(self.xIntent[vid]),\ torch.FloatTensor(self.xAttr[vid]),\ torch.FloatTensor(self.xNeed[vid]),\ torch.FloatTensor(self.xWant[vid]),\ torch.FloatTensor(self.xEffect[vid]),\ torch.FloatTensor(self.xReact[vid]),\ torch.FloatTensor(self.oWant[vid]),\ torch.FloatTensor(self.oEffect[vid]),\ torch.FloatTensor(self.oReact[vid]),\ torch.FloatTensor([[1,0] if x=='0' else [0,1] for x in self.speakers[vid]]),\ torch.FloatTensor([1]*len(self.labels[vid])),\ torch.LongTensor(self.labels[vid]),\ vid def __len__(self): return self.len def collate_fn(self, data): dat = pd.DataFrame(data) return [pad_sequence(dat[i]) if i<14 else pad_sequence(dat[i], True) if i<16 else dat[i].tolist() for i in dat] class EmoryNLPRobertaCometDataset(Dataset): def __init__(self, split, classify='emotion'): ''' label index mapping = {'Joyful': 0, 'Mad': 1, 'Peaceful': 2, 'Neutral': 3, 'Sad': 4, 'Powerful': 5, 'Scared': 6} ''' self.speakers, self.emotion_labels, \ self.roberta1, self.roberta2, self.roberta3, self.roberta4, \ self.sentences, self.trainId, self.testId, self.validId \ = pickle.load(open('emorynlp/emorynlp_features_roberta.pkl', 'rb'), encoding='latin1') sentiment_labels = {} for item in self.emotion_labels: array = [] # 0 negative, 1 neutral, 2 positive for e in self.emotion_labels[item]: if e in [1, 4, 6]: array.append(0) elif e == 3: array.append(1) elif e in [0, 2, 5]: array.append(2) sentiment_labels[item] = array self.xIntent, self.xAttr, self.xNeed, self.xWant, self.xEffect, self.xReact, self.oWant, self.oEffect, self.oReact \ = pickle.load(open('emorynlp/emorynlp_features_comet.pkl', 'rb'), encoding='latin1') if split == 'train': self.keys = [x for x in self.trainId] elif split == 'test': self.keys = [x for x in self.testId] elif split == 'valid': self.keys = [x for x in self.validId] if classify == 'emotion': self.labels = self.emotion_labels elif classify == 'sentiment': self.labels = sentiment_labels self.len = len(self.keys) def __getitem__(self, index): vid = self.keys[index] return torch.FloatTensor(self.roberta1[vid]),\ torch.FloatTensor(self.roberta2[vid]),\ torch.FloatTensor(self.roberta3[vid]),\ torch.FloatTensor(self.roberta4[vid]),\ torch.FloatTensor(self.xIntent[vid]),\ torch.FloatTensor(self.xAttr[vid]),\ torch.FloatTensor(self.xNeed[vid]),\ torch.FloatTensor(self.xWant[vid]),\ torch.FloatTensor(self.xEffect[vid]),\ torch.FloatTensor(self.xReact[vid]),\ torch.FloatTensor(self.oWant[vid]),\ torch.FloatTensor(self.oEffect[vid]),\ torch.FloatTensor(self.oReact[vid]),\ torch.FloatTensor([[1,0] if x=='0' else [0,1] for x in self.speakers[vid]]),\ torch.FloatTensor([1]*len(self.labels[vid])),\ torch.LongTensor(self.labels[vid]),\ vid def __len__(self): return self.len def collate_fn(self, data): dat = pd.DataFrame(data) return [pad_sequence(dat[i]) if i<14 else pad_sequence(dat[i], True) if i<16 else dat[i].tolist() for i in dat]