I get this error while trying to build a multiclass text classification network using LSTM (RNN). The code seems to run fine for the training part of the code whereas it throws the error for the validation part. Below is the network architecture and training code. Appreciate any help here.
I tried taking an existing code that predicts sentiment using RNN and replaced sigmoid with softmax function in the end and loss function from BCE Loss to NLLLoss()
def forward(self, x, hidden):
"""
Perform a forward pass of our model on some input and hidden state.
"""
batch_size = x.size(0)
embeds = self.embedding(x)
lstm_out,hidden= self.lstm(embeds,hidden)
# stack up lstm outputs
lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
# dropout and fully-connected layer
out = self.dropout(lstm_out)
out = self.fc(out)
# softmax function
soft_out = self.sof(out)
# reshape to be batch_size first
soft_out = soft_out.view(batch_size, -1)
# soft_out = soft_out[:, -1] # get last batch of labels
# return last sigmoid output and hidden state
return soft_out, hidden
def init_hidden(self, batch_size):
''' Initializes hidden state '''
# Create two new tensors with sizes n_layers x batch_size x hidden_dim,
# initialized to zero, for hidden state and cell state of LSTM
weight = next(self.parameters()).data
if (train_on_gpu):
hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(),
weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda())
else:
hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
return hidden
# Instantiate the model w/ hyperparams
vocab_size = len(vocab_to_int)+1
output_size = 44
embedding_dim = 100
hidden_dim = 256
n_layers = 2
net = ClassificationRNN(vocab_size, output_size, embedding_dim, hidden_dim, n_layers)
print(net)
# loss and optimization functions
lr=0.001
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
# training params
epochs = 4 # 3-4 is approx where I noticed the validation loss stop decreasing
counter = 0
print_every = 100
clip=5 # gradient clipping
# move model to GPU, if available
if(train_on_gpu):
net.cuda()
net.train()
# train for some number of epochs
for e in range(epochs):
# initialize hidden state
h = net.init_hidden(batch_size)
# batch loop
for inputs, labels in train_loader:
counter += 1
if(train_on_gpu):
inputs, labels = inputs.cuda(), labels.cuda()
# Creating new variables for the hidden state, otherwise
# we'd backprop through the entire training history
h = tuple([each.data for each in h])
# zero accumulated gradients
net.zero_grad()
# get the output from the model
output, h = net(inputs, h)
# print('output:',output.squeeze())
# print('labels:',labels.float())
# calculate the loss and perform backprop
loss = criterion(output, labels)
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
nn.utils.clip_grad_norm_(net.parameters(), clip)
optimizer.step()
# loss stats
if counter % print_every == 0:
# Get validation loss
val_h = net.init_hidden(batch_size)
val_losses = []
net.eval()
for inputs, labels in valid_loader:
# Creating new variables for the hidden state, otherwise
# we'd backprop through the entire training history
val_h = tuple([each.data for each in val_h])
if(train_on_gpu):
inputs, labels = inputs.cuda(), labels.cuda()
output, val_h = net(inputs, val_h)
val_loss = criterion(output, labels)
val_losses.append(val_loss.item())
net.train()
print("Epoch: {}/{}...".format(e+1, epochs),
"Step: {}...".format(counter),
"Loss: {:.6f}...".format(loss.item()),
"Val Loss: {:.6f}".format(np.mean(val_losses)))
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-41-805ed880b453> in <module>()
58 inputs, labels = inputs.cuda(), labels.cuda()
59
---> 60 output, val_h = net(inputs, val_h)
61
62 val_loss = criterion(output, labels)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
487 result = self._slow_forward(*input, **kwargs)
488 else:
--> 489 result = self.forward(*input, **kwargs)
490 for hook in self._forward_hooks.values():
491 hook_result = hook(self, input, result)
<ipython-input-38-dbfb8d384231> in forward(self, x, hidden)
34 batch_size = x.size(0)
35 embeds = self.embedding(x)
---> 36 lstm_out,hidden= self.lstm(embeds,hidden)
37
38 # stack up lstm outputs