I am getting above error when I am running an iteration using FOR loop to build multiple models. First two models having similar data sets build fine. While building third model I am getting this error. The code where error is thrown is when I call sm.logit() using Statsmodel package of python:
y = y_mort.convert_objects(convert_numeric=True)
#Building Logistic model_LSVC
print("Shape of y:", y.shape, " &&Shape of X_selected_lsvc:", X.shape)
print("y values:",y.head())
logit = sm.Logit(y,X,missing='drop')
The error that appears:
Shape of y: (9018,) &&Shape of X_selected_lsvc: (9018, 59)
y values: 0 0
1 1
2 0
3 0
4 0
Name: mort, dtype: int64
ValueError Traceback (most recent call last)
<ipython-input-8-fec746e2ee99> in <module>()
160 print("Shape of y:", y.shape, " &&Shape of X_selected_lsvc:", X.shape)
161 print("y values:",y.head())
--> 162 logit = sm.Logit(y,X,missing='drop')
163 # fit the model
164 est = logit.fit(method='cg')
D:\Anaconda3\lib\site-packages\statsmodels\discrete\discrete_model.py in __init__(self, endog, exog, **kwargs)
399
400 def __init__(self, endog, exog, **kwargs):
--> 401 super(BinaryModel, self).__init__(endog, exog, **kwargs)
402 if (self.__class__.__name__ != 'MNLogit' and
403 not np.all((self.endog >= 0) & (self.endog <= 1))):
D:\Anaconda3\lib\site-packages\statsmodels\discrete\discrete_model.py in __init__(self, endog, exog, **kwargs)
152 """
153 def __init__(self, endog, exog, **kwargs):
--> 154 super(DiscreteModel, self).__init__(endog, exog, **kwargs)
155 self.raise_on_perfect_prediction = True
156
D:\Anaconda3\lib\site-packages\statsmodels\base\model.py in __init__(self, endog, exog, **kwargs)
184
185 def __init__(self, endog, exog=None, **kwargs):
--> 186 super(LikelihoodModel, self).__init__(endog, exog, **kwargs)
187 self.initialize()
188
D:\Anaconda3\lib\site-packages\statsmodels\base\model.py in __init__(self, endog, exog, **kwargs)
58 hasconst = kwargs.pop('hasconst', None)
59 self.data = self._handle_data(endog, exog, missing, hasconst,
---> 60 **kwargs)
61 self.k_constant = self.data.k_constant
62 self.exog = self.data.exog
D:\Anaconda3\lib\site-packages\statsmodels\base\model.py in _handle_data(self, endog, exog, missing, hasconst, **kwargs)
82
83 def _handle_data(self, endog, exog, missing, hasconst, **kwargs):
---> 84 data = handle_data(endog, exog, missing, hasconst, **kwargs)
85 # kwargs arrays could have changed, easier to just attach here
86 for key in kwargs:
D:\Anaconda3\lib\site-packages\statsmodels\base\data.py in handle_data(endog, exog, missing, hasconst, **kwargs)
564 klass = handle_data_class_factory(endog, exog)
565 return klass(endog, exog=exog, missing=missing, hasconst=hasconst,
--> 566 **kwargs)
D:\Anaconda3\lib\site-packages\statsmodels\base\data.py in __init__(self, endog, exog, missing, hasconst, **kwargs)
74 # this has side-effects, attaches k_constant and const_idx
75 self._handle_constant(hasconst)
---> 76 self._check_integrity()
77 self._cache = resettable_cache()
78
D:\Anaconda3\lib\site-packages\statsmodels\base\data.py in _check_integrity(self)
450 (hasattr(endog, 'index') and hasattr(exog, 'index')) and
451 not self.orig_endog.index.equals(self.orig_exog.index)):
--> 452 raise ValueError("The indices for endog and exog are not aligned")
453 super(PandasData, self)._check_integrity()
454
ValueError: The indices for endog and exog are not aligned
The y matrix and X matrix have shape of (9018,),(9018, 59). Therefore any mismatch in dependent and independent variables doesn't appear. Any idea?
index
es don't match up, that's my guess. I don't think it's a shape/number of rows issue. You can convert pandas object to numpy array with asarray,Logit(np.asarray(y) np.asarray(X), ...
(I guessas_matrix
converts to numpy matrix not array. IIRC, numpy matrix is not supported by statsmodels, and it's use is strongly discouraged.) – Whitefly