Get positive and negative part of gradient for loss function in PyTorch

def nmf(X, k, lr, epochs): # X: input matrix of size (m, n) # k: number of latent factors # lr: learning rate # epochs: number of training epochs m, n = X.shape W = torch.rand(m, k, requires_grad=True) # initialize W randomly H = torch.rand(k, n, requires_grad=True) # initialize H randomly # training loop for i in range(epochs): # compute reconstruction error loss = torch.norm(X - torch.matmul(W, H), p='fro') # compute gradients loss.backward() # update parameters using additive update rule with torch.no_grad(): W -= lr * W.grad H -= lr * H.grad W.grad.zero_() H.grad.zero_() if i % 10 == 0: print(f"Epoch {i}: loss = {loss.item()}") return W.detach(), H.detach()

In the multiplicative update rule, the positive and negative parts of the gradients are separated and the updates are computed based on the ratio of the positive and negative parts.

Note: small value eps is added to the denominators to avoid division by zero.

def nmf(X, k, lr, epochs):
    # X: input matrix of size (m, n)
    # k: number of latent factors
    # lr: learning rate
    # epochs: number of training epochs
    m, n = X.shape
    W = torch.rand(m, k, requires_grad=True)  # initialize W randomly
    H = torch.rand(k, n, requires_grad=True)  # initialize H randomly
    eps = 1e-9  # small value to avoid division by zero
    # training loop
    for i in range(epochs):
        # compute reconstruction error
        loss = torch.norm(X - torch.matmul(W, H), p='fro')
        # compute gradients
        W_pos = torch.relu(W)  # separate positive and negative parts of W
        W_neg = torch.relu(-W)
        H_pos = torch.relu(H)  # separate positive and negative parts of H
        H_neg = torch.relu(-H)
        grad_W_pos = torch.matmul((torch.matmul(W_pos, H_pos) - X), H_pos.t())
        grad_W_neg = torch.matmul((torch.matmul(W_neg, H_pos) - X), H_pos.t())
        grad_H_pos = torch.matmul(W_pos.t(), (torch.matmul(W_pos, H_pos) - X))
        grad_H_neg = torch.matmul(W_pos.t(), (torch.matmul(W_pos, H_neg) - X))
        # update parameters using multiplicative update rule
        W *= torch.sqrt((grad_W_pos + eps) / (grad_W_neg + eps))
        H *= torch.sqrt((grad_H_pos + eps) / (grad_H_neg + eps))
        if i % 10 == 0:
            print(f"Epoch {i}: loss = {loss.item()}")
    return W.detach(), H.detach()

However, implementing adaptive learning rates in PyTorch for NMF can be more complex and may require additional code

Recommended topics

Hot tags