slot_tagging / model.py
xjlulu's picture
"good run"
fba58f1
import torch
import torch.nn as nn
import torch.nn.functional as F
class SeqClassifier(nn.Module):
def __init__(
self,
embeddings: torch.tensor,
hidden_size: int,
num_layers: int,
dropout: float,
bidirectional: bool,
num_class: int,
) -> None:
super(SeqClassifier, self).__init__()
self.embed = nn.Embedding.from_pretrained(embeddings, freeze=False)
self.hidden_size = hidden_size
self.num_layers = num_layers
self.dropout = dropout
self.bidirectional = bidirectional
self.num_class = num_class
# Model architecture
self.rnn = nn.GRU(
input_size=embeddings.size(1),
hidden_size=hidden_size,
num_layers=num_layers,
dropout=dropout,
bidirectional=bidirectional,
batch_first=True
)
self.dropout_layer = nn.Dropout(p=self.dropout)
self.fc = nn.Linear(self.encoder_output_size, num_class)
@property
def encoder_output_size(self) -> int:
# Calculate the output dimension of RNN
if self.bidirectional:
return self.hidden_size * 2
else:
return self.hidden_size
class SeqTagger(SeqClassifier):
def __init__(self, embeddings, hidden_size, num_layers, dropout, bidirectional, num_class):
super(SeqTagger, self).__init__(embeddings, hidden_size, num_layers, dropout, bidirectional, num_class)
def forward(self, batch) -> torch.Tensor:
# Apply the embedding layer that maps each token to its embedding
batch = self.embed(batch)
# Run the LSTM along the sentences of length batch_max_len
batch, _ = self.rnn(batch) # dim: batch_size x max_len x hidden_size
batch = self.dropout_layer(batch)
if not self.training:
# Remove this block after completing train_slot, if batch and predict should be combined
batch = batch.reshape(-1, batch.shape[2]) # dim: batch_size*max_len x hidden_size
# Pass through the fully connected layer
batch = self.fc(batch)
return F.log_softmax(batch, dim=1) # dim: batch_size*max_len x num_tags
batch = batch.reshape(-1, batch.shape[2]) # dim: batch_size*max_len x hidden_size
# Pass through the fully connected layer
batch = self.fc(batch)
return F.log_softmax(batch, dim=1) # dim: batch_size*max_len x num_tags