I am trying to fit a multivariate linear regression model with statsmodels.api
. I get an error MissingDataError: exog contains inf or nans
. I have checked for nans and inf and find none. How is this possible? why am I getting this error?
CODE
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
import pandas as pd
import numpy as np
df = pd.read_csv('clean_df.csv')
x_multi = df.drop('price', axis=1) #feature variables.
x_multi_cons = sm.add_constant(x_multi) #add row of constants.
I checked all the exog variables for na values and found none.
x_multi_cons.isna().sum()
const 0
crime_rate 0
resid_area 0
air_qual 0
room_num 0
age 0
teachers 0
poor_prop 0
n_hos_beds 8
n_hot_rooms 0
rainfall 0
parks 0
avg_dist 0
airport_YES 0
waterbody_Lake 0
waterbody_Lake and River 0
waterbody_River 0
dtype: int64
I also checked the exog variables for inf values and found none.
np.isinf(x_multi_cons).sum()
const 0
crime_rate 0
resid_area 0
air_qual 0
room_num 0
age 0
teachers 0
poor_prop 0
n_hos_beds 0
n_hot_rooms 0
rainfall 0
parks 0
avg_dist 0
airport_YES 0
waterbody_Lake 0
waterbody_Lake and River 0
waterbody_River 0
dtype: int64
Here I am fitting the model.
y_multi = df['price'] # Dependent variable.
lm_multi = sm.OLS(y_multi, x_multi_cons).fit()
But I am still getting the Error: "MissingDataError: exog contains inf or nans". How is this possible?
ERROR:
MissingDataError Traceback (most recent call last)
<ipython-input-67-ca6d2e9ba2c0> in <module>
----> 1 lm_multi = sm.OLS(y_multi, x_multi_cons).fit()
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/regression/linear_model.py in __init__(self, endog, exog, missing, hasconst, **kwargs)
871 **kwargs):
872 super(OLS, self).__init__(endog, exog, missing=missing,
--> 873 hasconst=hasconst, **kwargs)
874 if "weights" in self._init_keys:
875 self._init_keys.remove("weights")
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/regression/linear_model.py in __init__(self, endog, exog, weights, missing, hasconst, **kwargs)
702 weights = weights.squeeze()
703 super(WLS, self).__init__(endog, exog, missing=missing,
--> 704 weights=weights, hasconst=hasconst, **kwargs)
705 nobs = self.exog.shape[0]
706 weights = self.weights
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/regression/linear_model.py in __init__(self, endog, exog, **kwargs)
188 """
189 def __init__(self, endog, exog, **kwargs):
--> 190 super(RegressionModel, self).__init__(endog, exog, **kwargs)
191 self._data_attr.extend(['pinv_wexog', 'weights'])
192
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/model.py in __init__(self, endog, exog, **kwargs)
235
236 def __init__(self, endog, exog=None, **kwargs):
--> 237 super(LikelihoodModel, self).__init__(endog, exog, **kwargs)
238 self.initialize()
239
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/model.py in __init__(self, endog, exog, **kwargs)
76 hasconst = kwargs.pop('hasconst', None)
77 self.data = self._handle_data(endog, exog, missing, hasconst,
---> 78 **kwargs)
79 self.k_constant = self.data.k_constant
80 self.exog = self.data.exog
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/model.py in _handle_data(self, endog, exog, missing, hasconst, **kwargs)
99
100 def _handle_data(self, endog, exog, missing, hasconst, **kwargs):
--> 101 data = handle_data(endog, exog, missing, hasconst, **kwargs)
102 # kwargs arrays could have changed, easier to just attach here
103 for key in kwargs:
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/data.py in handle_data(endog, exog, missing, hasconst, **kwargs)
671 klass = handle_data_class_factory(endog, exog)
672 return klass(endog, exog=exog, missing=missing, hasconst=hasconst,
--> 673 **kwargs)
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/data.py in __init__(self, endog, exog, missing, hasconst, **kwargs)
85 self.const_idx = None
86 self.k_constant = 0
---> 87 self._handle_constant(hasconst)
88 self._check_integrity()
89 self._cache = {}
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/data.py in _handle_constant(self, hasconst)
131 exog_max = np.max(self.exog, axis=0)
132 if not np.isfinite(exog_max).all():
--> 133 raise MissingDataError('exog contains inf or nans')
134 exog_min = np.min(self.exog, axis=0)
135 const_idx = np.where(exog_max == exog_min)[0].squeeze()
MissingDataError: exog contains inf or nans