I'm trying to fill up GRU/LSTM with manually defined parameters in pytorch.
I have numpy arrays for parameters with shapes as defined in their documentation (https://pytorch.org/docs/stable/nn.html#torch.nn.GRU).
It seems to work but I'm not sure whether the returned values are correct.
Is this a right way to fill up GRU/LSTM with numpy parameters?
gru = nn.GRU(input_size, hidden_size, num_layers,
bias=True, batch_first=False, dropout=dropout, bidirectional=bidirectional)
def set_nn_wih(layer, parameter_name, w, l0=True):
param = getattr(layer, parameter_name)
if l0:
for i in range(3*hidden_size):
param.data[i] = w[i*input_size:(i+1)*input_size]
else:
for i in range(3*hidden_size):
param.data[i] = w[i*num_directions*hidden_size:(i+1)*num_directions*hidden_size]
def set_nn_whh(layer, parameter_name, w):
param = getattr(layer, parameter_name)
for i in range(3*hidden_size):
param.data[i] = w[i*hidden_size:(i+1)*hidden_size]
l0=True
for i in range(num_directions):
for j in range(num_layers):
if j == 0:
wih = w0[i, :, :3*input_size]
whh = w0[i, :, 3*input_size:] # check
l0=True
else:
wih = w[j-1, i, :, :num_directions*3*hidden_size]
whh = w[j-1, i, :, num_directions*3*hidden_size:]
l0=False
if i == 0:
set_nn_wih(
gru, "weight_ih_l{}".format(j), torch.from_numpy(wih.flatten()),l0)
set_nn_whh(
gru, "weight_hh_l{}".format(j), torch.from_numpy(whh.flatten()))
else:
set_nn_wih(
gru, "weight_ih_l{}_reverse".format(j), torch.from_numpy(wih.flatten()),l0)
set_nn_whh(
gru, "weight_hh_l{}_reverse".format(j), torch.from_numpy(whh.flatten()))
y, hn = gru(x_t, h_t)
numpy arrays are defined as following:
rng = np.random.RandomState(313)
w0 = rng.randn(num_directions, hidden_size, 3*(input_size +
hidden_size)).astype(np.float32)
w = rng.randn(max(1, num_layers-1), num_directions, hidden_size,
3*(num_directions*hidden_size + hidden_size)).astype(np.float32)
model.load_state_dict(dict, strict=False)
(strict=False is crucial). – Actuality