Non-linear optimization with Tensorflow-probability

I'm trying to implement a Gaussian fitting using TensorFlow-probability's Nelder-Mead optimizer: tfp.optimizer.nelder_mead_minimize(). It does not converge, while scipy.optimizer.minimize() provide good result in less than 1 second of computation time. I am probably doing something wrong but i can't figure what ? Can someone help me on this ?

I am using :

python                    3.7.3
tensorflow-probability    0.8
tensorflow                2.0

Here's my code :

import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import matplotlib as plt

# Define the gaussian model : y = 1/(sigma * sqrt(2 pi)) * exp(- (x-mu)²/(2*sigma²))
pi = np.pi
def model(x, theta):
    y = 1/(theta[1]*tf.sqrt(2*pi)) * tf.exp(-(x-theta[0])**2 /(2*theta[1]**2))
    return y


# Define the loss (least mean square)
def loss_function(theta, y, x, callback=False, n_iterations=1):
    global n_epochs_cb
    loss = tf.losses.mean_squared_error(y, model(x, theta))
    if callback:
        if n_epochs_cb % (n_iterations/10.) == 0:
            print('{0:4d}   {1:}   {2: 3.6f}'.format(n_epochs_cb, theta, loss))
    n_epochs_cb = n_epochs_cb + 1
    return loss


# Generate some data
theta_true = [3, 2]
X = np.arange(-10, 10, 0.5) 
Y = model(X, theta_true) 
# fig, ax = plt.subplots(1, 1, figsize = [20, 10])
# ax.scatter(X, Y, label='data', alpha=0.5)

# initialize parameters 
theta = tf.constant(tf.random.uniform([2], 0, 10), dtype=tf.float32, name='theta')
n_iterations = 100
n_epochs_cb = 1

# minimization
print('{0:4s}   {1:9s}               {2:9s}'.format('Iter', 'theta', 'loss'))
optim_results = tfp.optimizer.nelder_mead_minimize(lambda theta: loss_function(theta, X, Y, True, n_iterations),
                                                   initial_vertex=theta,
                                                   func_tolerance=1e-8,
                                                   position_tolerance=1e-8,
                                                   max_iterations=n_iterations)   

print("theta_true", theta_true)
print("theta_est", optim_results.position.numpy())
print("convergenced:", optim_results.converged.numpy())
print("number of function evaluation", optim_results.num_objective_evaluations.numpy())
print("number of iterations", optim_results.num_iterations.numpy())
print("objective value", optim_results.objective_value.numpy())

Optimization stop around 50 iterations and return :

Iter   theta                   loss     
  10   [0.1448533 6.7525005]    33.408031
  20   [-0.2385819 28.76061  ]    33.382130
  30   [ -4.1879644 260.84622  ]    33.375771
  40   [ -34.722183 2053.5083  ]    33.375099
  50   [ -418.6432 24589.836 ]    33.375008
theta_true [3, 2]
theta_est [ -488.44122 28687.352  ]
convergenced: True
number of function evaluation 55
number of iterations 35
objective value 33.375008

I run the same problem using minimize() from scipy.optimize with 'Nelder-Mead' method and it gives :

Iter   theta                                          loss     
  10   [4.61612335 4.40795762]    0.007583
  20   [3.19502416 2.09290338]    0.001023
  30   [3.01845636 1.99504269]    0.000091
  40   [2.99843397 2.00065615]    0.000010
Optimization terminated successfully.
         Current function value: 0.000010
         Iterations: 44
         Function evaluations: 96
computation time 0.046 seconds

I would expect to have the same performance using tensorflow-probability's Nelder-Mead optimizer and Scipy.optimize's Nelder-mead optimizer.

What am I doing wrong ?

Edit : Found the mistake in the definition of the loss function. the following code is now converging :

import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import matplotlib as plt

# Define the gaussian model : y = 1/(sigma * sqrt(2 pi)) * exp(- (x-mu)²/(2*sigma²))
pi = np.pi
def model(x, theta):
    y =  1/(theta[1]*tf.sqrt(2*pi)) * tf.exp(-(x-theta[0])**2 /(2*theta[1]**2))
    return y


# Define the loss (least mean square)
def loss_function(theta, y, x, callback=False, n_iterations=1):
    global n_epochs_cb
    loss = tf.losses.mean_squared_error(y, model(x, theta))
    if callback:
        if n_epochs_cb % (n_iterations/10.) == 0:
            print('{0:4d}   {1:}   {2: 3.6f}'.format(n_epochs_cb, theta, loss))
    n_epochs_cb = n_epochs_cb + 1
    return loss

# Generate some data
theta_true = [3, 2]
X = np.arange(-10, 10, 0.5, dtype=np.float32) 
Y = model(X, theta_true) 
# fig, ax = plt.subplots(1, 1, figsize = [20, 10])
# ax.scatter(X, Y, label='data', alpha=0.5)

# initialize parameters 
theta = tf.constant(tf.random.uniform([2], 0, 10), dtype=tf.float32, name='theta')

print("theta_true", theta_true)
print("theta_init", theta.numpy())

n_iterations = 100
n_epochs_cb = 1

# minimization
print('{0:4s}   {1:9s}               {2:9s}'.format('Iter', 'theta', 'loss'))
optim_results = tfp.optimizer.nelder_mead_minimize(lambda theta: loss_function(theta, Y, X, True, n_iterations),
                                                   initial_vertex=theta,
                                                   func_tolerance=1e-8,
                                                   position_tolerance=1e-8,
                                                   max_iterations=n_iterations)   


print("theta_est", optim_results.position.numpy())
print("convergenced:", optim_results.converged.numpy())
print("number of function evaluation", optim_results.num_objective_evaluations.numpy())
print("number of iterations", optim_results.num_iterations.numpy())
print("objective value", optim_results.objective_value.numpy())

Recommended topics

Hot tags