DepNLP-MentalRiskEs / src /roberta_regressor.py
simonsv's picture
made simple functional streamlit app to host the model
1eba40c
"""
Defines a wrapper class of RobertaPreTrainedModel model to do regression on text data.
Based on: https://www.kaggle.com/code/sumantindurkhya/bert-for-regression
"""
from typing import Optional, Tuple, Union
from tqdm import tqdm, trange
import numpy as np
import torch
import torch.nn.functional as F
import torch.utils.checkpoint
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
from transformers import BertModel, BertPreTrainedModel, RobertaPreTrainedModel, RobertaModel
class RobertaRegressor(RobertaPreTrainedModel):
def __init__(self, config, num_outputs=1, dropout=0.1, freeze_bert=False):
super().__init__(config)
self.num_outputs = num_outputs
self.roberta = RobertaModel(config)
if freeze_bert:
# freeze the roberta parameters
for param in self.roberta.parameters():
param.requires_grad = False
self.classifier = nn.Linear(config.hidden_size, 128)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(dropout)
self.tanh = nn.Tanh()
self.regressor = nn.Linear(128, num_outputs)
def forward(self, input_ids, attention_mask):
# forward pass of the model
base_out = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
logits = base_out.pooler_output
out = self.classifier(logits)
out = self.dropout(out)
out = self.relu(out)
out = self.tanh(out)
out = self.dropout(out)
out = self.regressor(out)
return out
def predict(self, text:str, tokenizer, device, numpy=True) -> Tuple[float, float, float, float]:
input_ids, attention_mask = tokenizer.encode_plus(text, padding=True, truncation=True, return_tensors='pt').values()
input_ids, attention_mask = input_ids.to(device), attention_mask.to(device)
output = self(input_ids, attention_mask).squeeze()
# free up memory
del input_ids, attention_mask
out = output.detach()
if numpy:
return out.cpu().numpy()
return out
class RobertaSeqMultiRegressor(RobertaPreTrainedModel):
"""
A wrapper class of RobertaPreTrainedModel model to do multi-output regression on text data.
This models the task of predicting multiple outputs from a single text input.
The problem is formulated in a sequential manner, where the model predicts the next output
conditioned on the previous outputs.
This approach is ideal for modeling problems where the outputs are correlated
such as probability distributions, where the sum of the outputs must be 1.
Or, for example, in the case of predicting the next word in a sentence, where the
model must predict the next word conditioned on the previous words.
The model is similar to the one described in the RobertaRegressor class, with the
exception that the head of the model is a sequential model, where the output of the
previous layer is fed as input to the next layer similar to how a RNN works.
"""
def __init__(self, config, num_outputs=1, dropout=0.1, freeze_bert=False):
super().__init__(config)
self.num_outputs = num_outputs
self.roberta = RobertaModel(config)
if freeze_bert:
# freeze the roberta parameters
for param in self.roberta.parameters():
param.requires_grad = False
# head of the model is a model that takes the output of the previous layer as input
# and outputs a single value until the number of outputs is reached
for i in range(num_outputs):
setattr(self, f"regressor_{i}", nn.Linear(config.hidden_size, 128))
self.relu = nn.ReLU()
self.dropout = nn.Dropout(dropout)
self.tanh = nn.Tanh()
def forward(self, input_ids, attention_mask):
# forward pass of the model
base_out = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
logits = base_out.pooler_output
outputs = []
for i in range(self.num_outputs):
out = getattr(self, f"regressor_{i}")(logits)
out = self.dropout(out)
out = self.relu(out)
out = self.tanh(out)
outputs.append(out)
return outputs
def sum_diff_loss(output, target):
return torch.sum(torch.abs(output - target))
def evaluate(model, criterion, dataloader, device, sum_diff_penalty=False):
model.eval()
mean_acc, mean_loss, count = 0, 0, 0
with torch.no_grad():
for input_ids, attention_mask, target in (dataloader):
input_ids, attention_mask, target = input_ids.to(device), attention_mask.to(device), target.to(device)
output = model(input_ids, attention_mask)
mean_loss += criterion(output.squeeze(), target.type_as(output)).item()
count += 1
return mean_loss/count
# def predict(model, dataloader, device):
# predicted_label = []
# actual_label = []
# with torch.no_grad():
# for input_ids, attention_mask, target in (dataloader):
# input_ids, attention_mask, target = input_ids.to(device), attention_mask.to(device), target.to(device)
# output = model(input_ids, attention_mask)
# predicted_label += output
# actual_label += target
# return predicted_label
def train(model, criterion, optimizer, train_loader, val_loader, epochs, device):
best_acc = 0
for epoch in trange(epochs, desc="Epoch"):
model.train()
train_loss = 0
for i, (input_ids, attention_mask, target) in enumerate(iterable=train_loader):
optimizer.zero_grad()
input_ids, attention_mask, target = input_ids.to(device), attention_mask.to(device), target.to(device)
output = model(input_ids=input_ids, attention_mask=attention_mask)
# out = model.classifier(output)
loss = criterion(output.squeeze(), target.type_as(output))
loss.backward()
optimizer.step()
train_loss += loss.item()
print(f"Training loss is {train_loss/len(train_loader)}")
val_loss = evaluate(model=model, criterion=criterion, dataloader=val_loader, device=device)
print("Epoch {} complete! Validation Loss : {}".format(epoch, val_loss))
def multi_reg_loss(loss='mse', sum_diff_penalty:float=0.0):
"""
A custom loss function that penalizes the sum of differences
between the predicted and actual values for multi-output regression.
This is done to guide the model to predict outputs where
sum(y_hat1, y_hat2, ...) = sum(y1, y2, ...)
e.g: in task d, we have that sum(label1, label2, label3, label4) = 1
since its a probability distribution.
Parameters
----------
loss : str, optional
The loss function to be used, by default 'mse'
Available options: 'mse' and 'cross_entropy'
for mean squared error and cross entropy loss respectively
sum_diff_penalty : float, optional
The penalty to be applied to the sum of differences between the predicted and actual values, by default 0.0 (no penalty)
"""
if loss == 'mse':
loss_func = F.mse_loss
elif loss == 'cross_entropy':
loss_func = F.cross_entropy
else:
raise ValueError("Invalid loss function. Available options: 'mse' and 'cross_entropy'")
def reg_loss(input, target):
# first compute the normal MSE loss
mse = loss_func(input, target)
# then penalize the sum of differences between the predicted and actual values
sum_diff = torch.square(torch.sum(input) - torch.sum(target))
return mse + sum_diff_penalty*sum_diff
return reg_loss