intent_classifier / model.py
xjlulu's picture
"GRU"
490169e
raw
history blame
2.59 kB
from typing import Dict
import torch
import torch.nn as nn
device = "cpu"
class SeqClassifier(nn.Module):
def __init__(
self,
embeddings: torch.tensor,
hidden_size: int,
num_layers: int,
dropout: float,
bidirectional: bool,
num_class: int,
) -> None:
super(SeqClassifier, self).__init__()
# Model parameters
self.hidden_size = hidden_size
self.num_layers = num_layers
self.dropout = dropout
self.bidirectional = bidirectional
self.num_class = num_class
# Word embeddings layer
self.embed = nn.Embedding.from_pretrained(embeddings, freeze=False)
# GRU layer
self.rnn = nn.GRU(
input_size=embeddings.size(1),
hidden_size=hidden_size,
num_layers=num_layers,
dropout=dropout,
bidirectional=bidirectional,
batch_first=True
)
# Dropout layer
self.dropout_layer = nn.Dropout(p=dropout)
# Fully connected layer for classification
self.fc = nn.Linear(self.encoder_output_size, num_class)
@property
def encoder_output_size(self) -> int:
# Calculate the output dimension of the RNN
if self.bidirectional:
return self.hidden_size * 2
else:
return self.hidden_size
def forward(self, batch) -> torch.Tensor:
# Embed the input into the word embedding space
embedded = self.embed(batch)
# Pass through the LSTM layer
rnn_output, _ = self.rnn(embedded)
rnn_output = self.dropout_layer(rnn_output)
if not self.training:
last_hidden_state_forward = rnn_output[-1, :self.hidden_size] # Forward hidden state
last_hidden_state_backward = rnn_output[0, self.hidden_size:] # Backward hidden state
combined_hidden_state = torch.cat((last_hidden_state_forward, last_hidden_state_backward), dim=0)
# Pass through the fully connected layer
logits = self.fc(combined_hidden_state)
return logits # Return predictions
last_hidden_state_forward = rnn_output[:, -1, :self.hidden_size] # Forward hidden state
last_hidden_state_backward = rnn_output[:, 0, self.hidden_size:] # Backward hidden state
combined_hidden_state = torch.cat((last_hidden_state_forward, last_hidden_state_backward), dim=1)
# Pass through the fully connected layer
logits = self.fc(combined_hidden_state)
return logits # Return predictions