I'm trying to implement a Gaussian fitting using TensorFlow-probability's Nelder-Mead optimizer: tfp.optimizer.nelder_mead_minimize()
. It does not converge, while scipy.optimizer.minimize()
provide good result in less than 1 second of computation time. I am probably doing something wrong but i can't figure what ? Can someone help me on this ?
I am using :
python 3.7.3
tensorflow-probability 0.8
tensorflow 2.0
Here's my code :
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import matplotlib as plt
# Define the gaussian model : y = 1/(sigma * sqrt(2 pi)) * exp(- (x-mu)²/(2*sigma²))
pi = np.pi
def model(x, theta):
y = 1/(theta[1]*tf.sqrt(2*pi)) * tf.exp(-(x-theta[0])**2 /(2*theta[1]**2))
return y
# Define the loss (least mean square)
def loss_function(theta, y, x, callback=False, n_iterations=1):
global n_epochs_cb
loss = tf.losses.mean_squared_error(y, model(x, theta))
if callback:
if n_epochs_cb % (n_iterations/10.) == 0:
print('{0:4d} {1:} {2: 3.6f}'.format(n_epochs_cb, theta, loss))
n_epochs_cb = n_epochs_cb + 1
return loss
# Generate some data
theta_true = [3, 2]
X = np.arange(-10, 10, 0.5)
Y = model(X, theta_true)
# fig, ax = plt.subplots(1, 1, figsize = [20, 10])
# ax.scatter(X, Y, label='data', alpha=0.5)
# initialize parameters
theta = tf.constant(tf.random.uniform([2], 0, 10), dtype=tf.float32, name='theta')
n_iterations = 100
n_epochs_cb = 1
# minimization
print('{0:4s} {1:9s} {2:9s}'.format('Iter', 'theta', 'loss'))
optim_results = tfp.optimizer.nelder_mead_minimize(lambda theta: loss_function(theta, X, Y, True, n_iterations),
initial_vertex=theta,
func_tolerance=1e-8,
position_tolerance=1e-8,
max_iterations=n_iterations)
print("theta_true", theta_true)
print("theta_est", optim_results.position.numpy())
print("convergenced:", optim_results.converged.numpy())
print("number of function evaluation", optim_results.num_objective_evaluations.numpy())
print("number of iterations", optim_results.num_iterations.numpy())
print("objective value", optim_results.objective_value.numpy())
Optimization stop around 50 iterations and return :
Iter theta loss
10 [0.1448533 6.7525005] 33.408031
20 [-0.2385819 28.76061 ] 33.382130
30 [ -4.1879644 260.84622 ] 33.375771
40 [ -34.722183 2053.5083 ] 33.375099
50 [ -418.6432 24589.836 ] 33.375008
theta_true [3, 2]
theta_est [ -488.44122 28687.352 ]
convergenced: True
number of function evaluation 55
number of iterations 35
objective value 33.375008
I run the same problem using minimize()
from scipy.optimize
with 'Nelder-Mead'
method and it gives :
Iter theta loss
10 [4.61612335 4.40795762] 0.007583
20 [3.19502416 2.09290338] 0.001023
30 [3.01845636 1.99504269] 0.000091
40 [2.99843397 2.00065615] 0.000010
Optimization terminated successfully.
Current function value: 0.000010
Iterations: 44
Function evaluations: 96
computation time 0.046 seconds
I would expect to have the same performance using tensorflow-probability's Nelder-Mead optimizer and Scipy.optimize's Nelder-mead optimizer.
What am I doing wrong ?
Edit : Found the mistake in the definition of the loss function. the following code is now converging :
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import matplotlib as plt
# Define the gaussian model : y = 1/(sigma * sqrt(2 pi)) * exp(- (x-mu)²/(2*sigma²))
pi = np.pi
def model(x, theta):
y = 1/(theta[1]*tf.sqrt(2*pi)) * tf.exp(-(x-theta[0])**2 /(2*theta[1]**2))
return y
# Define the loss (least mean square)
def loss_function(theta, y, x, callback=False, n_iterations=1):
global n_epochs_cb
loss = tf.losses.mean_squared_error(y, model(x, theta))
if callback:
if n_epochs_cb % (n_iterations/10.) == 0:
print('{0:4d} {1:} {2: 3.6f}'.format(n_epochs_cb, theta, loss))
n_epochs_cb = n_epochs_cb + 1
return loss
# Generate some data
theta_true = [3, 2]
X = np.arange(-10, 10, 0.5, dtype=np.float32)
Y = model(X, theta_true)
# fig, ax = plt.subplots(1, 1, figsize = [20, 10])
# ax.scatter(X, Y, label='data', alpha=0.5)
# initialize parameters
theta = tf.constant(tf.random.uniform([2], 0, 10), dtype=tf.float32, name='theta')
print("theta_true", theta_true)
print("theta_init", theta.numpy())
n_iterations = 100
n_epochs_cb = 1
# minimization
print('{0:4s} {1:9s} {2:9s}'.format('Iter', 'theta', 'loss'))
optim_results = tfp.optimizer.nelder_mead_minimize(lambda theta: loss_function(theta, Y, X, True, n_iterations),
initial_vertex=theta,
func_tolerance=1e-8,
position_tolerance=1e-8,
max_iterations=n_iterations)
print("theta_est", optim_results.position.numpy())
print("convergenced:", optim_results.converged.numpy())
print("number of function evaluation", optim_results.num_objective_evaluations.numpy())
print("number of iterations", optim_results.num_iterations.numpy())
print("objective value", optim_results.objective_value.numpy())
scipy.optimizer.minimize()
stops after 10 iterations on the good estimations whiletfp.optimizer.nelder_mead_minimize()
diverges as well as with random initialization – Medora