I was wondering if someone knew about a python package that implements MLE to estimate parameters of a Dirichlet distribution.
Python package :MLE for Dirichlet distribution
Asked Answered
Eric Suh has a package here.
$ pip install git+https://github.com/ericsuh/dirichlet.git
Then:
import numpy
import dirichlet
a0 = numpy.array([100, 299, 100])
D0 = numpy.random.dirichlet(a0, 1000)
dirichlet.mle(D0)
Late by all measures, but in case this can still help anyone out there...
from sklearn.base import BaseEstimator, RegressorMixin
from scipy.special import polygamma
import pandas as pd
import numpy as np
class DirichletDummyRegressor(BaseEstimator, RegressorMixin):
'''
Implements a static Dirichlet regressor compliant with the Sklearn API
providing the MLE estimates through the `fit` method.
Parameters
----------
`eps: float = 0.00001`
An optional error tolerance parameter for the numerical solver.
Defaults to `0.00001`.
`maxiter: int = 100`
An optional fail-safe parameter limiting the number of iterations the
numerical solver can perform. Defaults to `100`.
Raises
------
`AssertionError`
If the error tolerance is non-positive.
`AssertionError`
If the maximum number of iterations is non-positive.
'''
def __init__(self, eps: float = 0.00001, maxiter: int = 100) -> None:
assert eps > 0.0, 'Error tolerance must be positive'
assert maxiter > 0, 'Maximum number of iterations must be positive'
self.eps = eps
self.maxiter = maxiter
self.ndim = None
self.niter_ = None
self.status = None
self.shape = None
self.mean_array = None
def __str__(self) -> str:
return str({
'shape': None if self.shape is None else np.round(self.shape, 4),
'niter': self.niter, 'status': self.status
})
def fit(self, y: pd.DataFrame):
''' \
Calibrates the regressor by finding the maximum likelihood estimates
by way of Newton's method.
Parameters
----------
`y: pd.DataFrame`
A response matrix.
Raises
------
`AssertionError`
If one or more response values are negative.
`AssertionError`
If one or more response arrays does not sum to 100%.
'''
assert y.min() >= 0.0, 'The response matrix is out of bounds'
assert all((y.sum(axis=1) - 1.0) <= self.eps), 'The row-wise sum of the response must equal 100%' # noqa: E501
ndim = y.shape[1]
niter = 0
check = False
logy_mean = np.log(y).mean(axis=0)
a = np.zeros(ndim)
g = np.zeros(ndim)
h = np.zeros((ndim, ndim))
# Method of moments seeds
mean = y.mean(axis=0)
phi = (mean * (1.0 - mean)).sum() / ((y - mean) ** 2.0).sum(axis=1).mean() # noqa: E501
a = mean * phi
while not (check or niter > self.maxiter):
niter += 1
g = polygamma(n=0, x=a.sum()) * np.ones(ndim) + logy_mean - polygamma(n=0, x=a) # noqa: E501
h = polygamma(n=1, x=a.sum()) * np.ones((ndim, ndim)) - np.diag(polygamma(n=1, x=a)) # noqa: E501
if np.linalg.det(a=h) == 0.0:
break
delta = -np.linalg.inv(a=h) @ g
check = np.linalg.norm(x=delta) <= self.eps
a += delta
self.ndim_ = ndim
self.shape = a
self.mean_array = self.shape / self.shape.sum()
self.niter = niter
self.status = int(check)
return self
def predict(self, X: pd.DataFrame | None = None) -> np.ndarray:
''' \
Predicts the mean response array (decision rule consistent with the use
of quadratic loss function).
Parameters
----------
`X: pd.DataFrame | None = None`
An optional dummy feature matrix to provide compliance with the
Sklearn API.
Returns
-------
`np.ndarray`
An array of prediction.
'''
assert self.status == 1, 'Regressor must be calibrated prior to predicting' # noqa: E501
if X is not None:
return self.mean_array * np.ones((len(X), self.ndim))
return self.mean_array
if __name__ == '__main__':
size = 10_000
ndim = 10
shape = np.random.uniform(low=0.0, high=10.0, size=ndim)
y = np.random.dirichlet(alpha=shape, size=size)
reg = DirichletDummyRegressor().fit(y=y)
print({'shape': shape.round(4)})
print(reg)
Remark: I am calling this regressor "dummy" because it always predicts the same thing regardless of any predictive feature matrix it is fed with. That amounts to evaluating the MLE for an iid set of samples drawn from a Dirichlet distribution with fixed shape parameters (i.e., $\boldsymbol{X} \sim \mathcal{D}!\left(\boldsymbol{\alpha} = \left[\alpha_1, \ldots, \alpha_D\right]\right)$).
© 2022 - 2024 — McMap. All rights reserved.