Epik / Model /COSMIC /erc_training /dataloader.py
Minh Q. Le
Pushed COSMIC code
a446b0b
raw
history blame
11.7 kB
import torch
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence
import pickle, pandas as pd
class IEMOCAPRobertaCometDataset(Dataset):
def __init__(self, split):
'''
label index mapping = {'hap':0, 'sad':1, 'neu':2, 'ang':3, 'exc':4, 'fru':5}
'''
self.speakers, self.labels, \
self.roberta1, self.roberta2, self.roberta3, self.roberta4,\
self.sentences, self.trainIds, self.testIds, self.validIds \
= pickle.load(open('iemocap/iemocap_features_roberta.pkl', 'rb'), encoding='latin1')
self.xIntent, self.xAttr, self.xNeed, self.xWant, self.xEffect, self.xReact, self.oWant, self.oEffect, self.oReact \
= pickle.load(open('iemocap/iemocap_features_comet.pkl', 'rb'), encoding='latin1')
if split == 'train':
self.keys = [x for x in self.trainIds]
elif split == 'test':
self.keys = [x for x in self.testIds]
elif split == 'valid':
self.keys = [x for x in self.validIds]
self.len = len(self.keys)
def __getitem__(self, index):
vid = self.keys[index]
return torch.FloatTensor(self.roberta1[vid]),\
torch.FloatTensor(self.roberta2[vid]),\
torch.FloatTensor(self.roberta3[vid]),\
torch.FloatTensor(self.roberta4[vid]),\
torch.FloatTensor(self.xIntent[vid]),\
torch.FloatTensor(self.xAttr[vid]),\
torch.FloatTensor(self.xNeed[vid]),\
torch.FloatTensor(self.xWant[vid]),\
torch.FloatTensor(self.xEffect[vid]),\
torch.FloatTensor(self.xReact[vid]),\
torch.FloatTensor(self.oWant[vid]),\
torch.FloatTensor(self.oEffect[vid]),\
torch.FloatTensor(self.oReact[vid]),\
torch.FloatTensor([[1,0] if x=='M' else [0,1] for x in self.speakers[vid]]),\
torch.FloatTensor([1]*len(self.labels[vid])),\
torch.LongTensor(self.labels[vid]),\
vid
def __len__(self):
return self.len
def collate_fn(self, data):
dat = pd.DataFrame(data)
return [pad_sequence(dat[i]) if i<14 else pad_sequence(dat[i], True) if i<16 else dat[i].tolist() for i in dat]
class MELDRobertaCometDataset(Dataset):
def __init__(self, split, classify='emotion'):
'''
label index mapping =
'''
self.speakers, self.emotion_labels, self.sentiment_labels, \
self.roberta1, self.roberta2, self.roberta3, self.roberta4, \
self.sentences, self.trainIds, self.testIds, self.validIds \
= pickle.load(open('meld/meld_features_roberta.pkl', 'rb'), encoding='latin1')
self.xIntent, self.xAttr, self.xNeed, self.xWant, self.xEffect, self.xReact, self.oWant, self.oEffect, self.oReact \
= pickle.load(open('meld/meld_features_comet.pkl', 'rb'), encoding='latin1')
if split == 'train':
self.keys = [x for x in self.trainIds]
elif split == 'test':
self.keys = [x for x in self.testIds]
elif split == 'valid':
self.keys = [x for x in self.validIds]
if classify == 'emotion':
self.labels = self.emotion_labels
else:
self.labels = self.sentiment_labels
self.len = len(self.keys)
def __getitem__(self, index):
vid = self.keys[index]
return torch.FloatTensor(self.roberta1[vid]),\
torch.FloatTensor(self.roberta2[vid]),\
torch.FloatTensor(self.roberta3[vid]),\
torch.FloatTensor(self.roberta4[vid]),\
torch.FloatTensor(self.xIntent[vid]),\
torch.FloatTensor(self.xAttr[vid]),\
torch.FloatTensor(self.xNeed[vid]),\
torch.FloatTensor(self.xWant[vid]),\
torch.FloatTensor(self.xEffect[vid]),\
torch.FloatTensor(self.xReact[vid]),\
torch.FloatTensor(self.oWant[vid]),\
torch.FloatTensor(self.oEffect[vid]),\
torch.FloatTensor(self.oReact[vid]),\
torch.FloatTensor(self.speakers[vid]),\
torch.FloatTensor([1]*len(self.labels[vid])),\
torch.LongTensor(self.labels[vid]),\
vid
def __len__(self):
return self.len
def collate_fn(self, data):
dat = pd.DataFrame(data)
return [pad_sequence(dat[i]) if i<14 else pad_sequence(dat[i], True) if i<16 else dat[i].tolist() for i in dat]
class RobertaCometDataset(Dataset):
def __init__(self, split, path_roberta="epik/epik_features_roberta.pkl", path_comet="epik/epik_features_comet.pkl"):
self.speakers, self.labels, \
self.roberta1, self.roberta2, self.roberta3, self.roberta4, \
self.sentences, self.trainIds, self.testIds, self.validIds \
= pickle.load(open(path_roberta, 'rb'), encoding='latin1')
self.xIntent, self.xAttr, self.xNeed, self.xWant, self.xEffect, self.xReact, self.oWant, self.oEffect, self.oReact \
= pickle.load(open(path_comet, 'rb'), encoding='latin1')
if split == 'train':
self.keys = [x for x in self.trainIds]
elif split == 'test':
self.keys = [x for x in self.testIds]
elif split == 'valid':
self.keys = [x for x in self.validIds]
self.len = len(self.keys)
def __getitem__(self, index):
vid = self.keys[index]
return torch.FloatTensor(self.roberta1[vid]),\
torch.FloatTensor(self.roberta2[vid]),\
torch.FloatTensor(self.roberta3[vid]),\
torch.FloatTensor(self.roberta4[vid]),\
torch.FloatTensor(self.xIntent[vid]),\
torch.FloatTensor(self.xAttr[vid]),\
torch.FloatTensor(self.xNeed[vid]),\
torch.FloatTensor(self.xWant[vid]),\
torch.FloatTensor(self.xEffect[vid]),\
torch.FloatTensor(self.xReact[vid]),\
torch.FloatTensor(self.oWant[vid]),\
torch.FloatTensor(self.oEffect[vid]),\
torch.FloatTensor(self.oReact[vid]),\
torch.FloatTensor([[1,0] if x=='0' else [0,1] for x in self.speakers[vid]]),\
torch.FloatTensor([1]*len(self.labels[vid])),\
torch.LongTensor(self.labels[vid]),\
vid
def __len__(self):
return self.len
def collate_fn(self, data):
dat = pd.DataFrame(data)
return [pad_sequence(dat[i]) if i<14 else pad_sequence(dat[i], True) if i<16 else dat[i].tolist() for i in dat]
class DailyDialogueRobertaCometDataset(Dataset):
def __init__(self, split):
self.speakers, self.labels, \
self.roberta1, self.roberta2, self.roberta3, self.roberta4, \
self.sentences, self.trainIds, self.testIds, self.validIds \
= pickle.load(open('dailydialog/dailydialog_features_roberta.pkl', 'rb'), encoding='latin1')
self.xIntent, self.xAttr, self.xNeed, self.xWant, self.xEffect, self.xReact, self.oWant, self.oEffect, self.oReact \
= pickle.load(open('dailydialog/dailydialog_features_comet.pkl', 'rb'), encoding='latin1')
if split == 'train':
self.keys = [x for x in self.trainIds]
elif split == 'test':
self.keys = [x for x in self.testIds]
elif split == 'valid':
self.keys = [x for x in self.validIds]
self.len = len(self.keys)
def __getitem__(self, index):
vid = self.keys[index]
return torch.FloatTensor(self.roberta1[vid]),\
torch.FloatTensor(self.roberta2[vid]),\
torch.FloatTensor(self.roberta3[vid]),\
torch.FloatTensor(self.roberta4[vid]),\
torch.FloatTensor(self.xIntent[vid]),\
torch.FloatTensor(self.xAttr[vid]),\
torch.FloatTensor(self.xNeed[vid]),\
torch.FloatTensor(self.xWant[vid]),\
torch.FloatTensor(self.xEffect[vid]),\
torch.FloatTensor(self.xReact[vid]),\
torch.FloatTensor(self.oWant[vid]),\
torch.FloatTensor(self.oEffect[vid]),\
torch.FloatTensor(self.oReact[vid]),\
torch.FloatTensor([[1,0] if x=='0' else [0,1] for x in self.speakers[vid]]),\
torch.FloatTensor([1]*len(self.labels[vid])),\
torch.LongTensor(self.labels[vid]),\
vid
def __len__(self):
return self.len
def collate_fn(self, data):
dat = pd.DataFrame(data)
return [pad_sequence(dat[i]) if i<14 else pad_sequence(dat[i], True) if i<16 else dat[i].tolist() for i in dat]
class EmoryNLPRobertaCometDataset(Dataset):
def __init__(self, split, classify='emotion'):
'''
label index mapping = {'Joyful': 0, 'Mad': 1, 'Peaceful': 2, 'Neutral': 3, 'Sad': 4, 'Powerful': 5, 'Scared': 6}
'''
self.speakers, self.emotion_labels, \
self.roberta1, self.roberta2, self.roberta3, self.roberta4, \
self.sentences, self.trainId, self.testId, self.validId \
= pickle.load(open('emorynlp/emorynlp_features_roberta.pkl', 'rb'), encoding='latin1')
sentiment_labels = {}
for item in self.emotion_labels:
array = []
# 0 negative, 1 neutral, 2 positive
for e in self.emotion_labels[item]:
if e in [1, 4, 6]:
array.append(0)
elif e == 3:
array.append(1)
elif e in [0, 2, 5]:
array.append(2)
sentiment_labels[item] = array
self.xIntent, self.xAttr, self.xNeed, self.xWant, self.xEffect, self.xReact, self.oWant, self.oEffect, self.oReact \
= pickle.load(open('emorynlp/emorynlp_features_comet.pkl', 'rb'), encoding='latin1')
if split == 'train':
self.keys = [x for x in self.trainId]
elif split == 'test':
self.keys = [x for x in self.testId]
elif split == 'valid':
self.keys = [x for x in self.validId]
if classify == 'emotion':
self.labels = self.emotion_labels
elif classify == 'sentiment':
self.labels = sentiment_labels
self.len = len(self.keys)
def __getitem__(self, index):
vid = self.keys[index]
return torch.FloatTensor(self.roberta1[vid]),\
torch.FloatTensor(self.roberta2[vid]),\
torch.FloatTensor(self.roberta3[vid]),\
torch.FloatTensor(self.roberta4[vid]),\
torch.FloatTensor(self.xIntent[vid]),\
torch.FloatTensor(self.xAttr[vid]),\
torch.FloatTensor(self.xNeed[vid]),\
torch.FloatTensor(self.xWant[vid]),\
torch.FloatTensor(self.xEffect[vid]),\
torch.FloatTensor(self.xReact[vid]),\
torch.FloatTensor(self.oWant[vid]),\
torch.FloatTensor(self.oEffect[vid]),\
torch.FloatTensor(self.oReact[vid]),\
torch.FloatTensor([[1,0] if x=='0' else [0,1] for x in self.speakers[vid]]),\
torch.FloatTensor([1]*len(self.labels[vid])),\
torch.LongTensor(self.labels[vid]),\
vid
def __len__(self):
return self.len
def collate_fn(self, data):
dat = pd.DataFrame(data)
return [pad_sequence(dat[i]) if i<14 else pad_sequence(dat[i], True) if i<16 else dat[i].tolist() for i in dat]