1). use scaling/normalization for faster convergence of gradient descent
2). decrease learning_rate
3). exit loop according tolerance_param
# adopted from https://mcmap.net/q/296489/-gradient-descent-using-python-and-numpy
import numpy as np
import matplotlib.pyplot as plt
# m denotes the number of examples here, not the number of features
def gradientDescent(x, y, theta, alpha, m, numIterations):
xTrans = x.T
theta_prev= theta
cost_history = []
for i in range(0, numIterations):
Y_pred = np.dot(x, theta)
loss = Y_pred - y
# for the conveniences of further derivation of squared error cost: 1/(2m)*loss**2 derivative becomes 1/m*loss
squared_error_cost = (1 / (2 * m)) * np.sum((Y_pred - y) ** 2) # Univariate mse of (predicted - actual), division by 2 - for further convenience
# compute the derivative for weight[i]:
# Hint: the derivative of square root is = 2 * dot product of feature_column and errors.
# When you differentiate the error with respect to theta, you get X*(X*theta.T-y)
gradient = 1/m * x.T.dot(loss)
# for MINIMIZING MSE
# update theta = theta - alpha * gradient
theta[0] = theta[0] - (alpha * ((1/m) *
np.sum(Y_pred - y)))
theta[1] = theta[1] - (alpha * ((1/m) *
np.sum((Y_pred - y) * x.T)))
cost_history.append( squared_error_cost )
# accuracy
acc= 1-sum(
[
abs(Y_pred[i]-y[i])/y[i]
for i in range(m)
if y[i] != 0]
)/m
if i%100==0: print(f'at {i}-th step accuracy is {acc}')
# Check for tolerance .....
if(gradient[0] + theta_prev[0] <= 10**(-6)) and (gradient[1] + theta_prev[1] <= 10**(-6)): break
theta_prev = theta
return cost_history, theta, acc
X = np.array([41.9,43.4,43.9,44.5,47.3,47.5,47.9,50.2,52.8,53.2,56.7,57.0,63.5,65.3,71.1,77.0,77.8])
y = np.array([251.3,251.3,248.3,267.5,273.0,276.5,270.3,274.9,285.0,290.0,297.0,302.5,304.5,309.3,321.7,330.7,349.0])
# NORMALIZATION .....
X = (X-X.min())/(X.max()-X.min())
y = (y-y.min())/(y.max()-y.min())
plt.plot(X,y)
plt.show()
# Add a bias feature (constant 1) to the input matrix.....
n = np.max(X.shape)
X_bias = np.vstack([np.ones(n), X]).T
m, n = np.shape(X_bias)
##print(m) # 17
numIterations= 1000
alpha = 0.1
theta = np.ones(n)
cost_hist, theta, acc = gradientDescent(X_bias, y, theta, alpha, m, numIterations)
print("theta: ", theta)
print("Accuracy: ", acc)
yhat= theta@X_bias.T
print('yhat', yhat)
plt.plot(X, y, 'bo-', X_bias[:,1], yhat, 'k-')
plt.legend(['Data', 'Actual Relationship'])
plt.show()
# plot grad
x = [i for i in range(1, len(cost_hist)+1)]
plt.plot(x, cost_hist)
plt.xlabel('Iteration')
plt.ylabel('Cost')
plt.title('Cost History during Gradient Descent')
plt.show()
p.s. for multivariable - see here or here
import numpy as np
def normalize_features(X):
"""
Normalize features to have zero mean and unit variance.
"""
mean = np.mean(X, axis=0)
std = np.std(X, axis=0)
X_normalized = (X - mean) / std
return X_normalized, mean, std
def add_bias_feature(X):
"""
Add a bias feature (constant 1) to the input matrix.
"""
return np.column_stack((np.ones(len(X)), X))
def compute_cost(X, y, theta):
"""
Compute the cost function for linear regression.
"""
m = len(y)
h = X.dot(theta)
cost = (1 / (2 * m)) * np.sum((h - y) ** 2)
return cost
def gradient_descent(X, y, theta, alpha, num_iterations):
"""
Perform gradient descent to minimize the cost function.
"""
m = len(y)
cost_history = []
for _ in range(num_iterations):
h = X.dot(theta)
error = h - y
gradient = (1 / m) * X.T.dot(error)
theta = theta - alpha * gradient
cost = compute_cost(X, y, theta)
cost_history.append(cost)
return theta, cost_history
# Sample data generation
np.random.seed(42)
X = 2 * np.random.rand(100, 3) # 100 samples with 3 features
y = 4 + X.dot(np.array([3, 1.5, 2])) + np.random.randn(100)
# Normalize features and add bias feature
X_normalized, mean, std = normalize_features(X)
X_bias = add_bias_feature(X_normalized)
# Initial parameters
theta_initial = np.zeros(X_bias.shape[1])
# Hyperparameters
alpha = 0.01
num_iterations = 1000
# Run gradient descent
theta_final, cost_history = gradient_descent(X_bias, y, theta_initial, alpha, num_iterations)
# Print the final parameters and cost
print("Final Parameters:", theta_final)
print("Final Cost:", cost_history[-1])
# Plot the cost history
import matplotlib.pyplot as plt
plt.plot(cost_history)
plt.xlabel('Iteration')
plt.ylabel('Cost')
plt.title('Cost History during Gradient Descent')
plt.show()
X = np.array([41.9,43.4,43.9,44.5,47.3,47.5,47.9,50.2,52.8,53.2,56.7,57.0,63.5,65.3,71.1,77.0,77.8])[:, np.newaxis]
&y
as well with this implementation of Gradient Descent &maxIteration = 1000000
, &lr = 0.0001
in order to getw=[[152.9851407 ] [ 2.44436124]]
– Weakfish