BUG/TST: GEE fix predict closes statsmodels#1919 and fix conf_int for…

… cov_params_default add GEE to generic test, and adjust those (missing _results, summary2)
IFDAAS · Aug 21, 2014 · 9fe87d8 · 9fe87d8
1 parent 06c5245
commit 9fe87d8
Show file tree

Hide file tree

Showing 2 changed files with 105 additions and 7 deletions.
diff --git a/statsmodels/base/tests/test_generic_methods.py b/statsmodels/base/tests/test_generic_methods.py
@@ -89,8 +89,14 @@ def test_ftest_pvalues(self):
         string_use_t = 'P>|z|' if use_t is False else 'P>|t|'
         summ = str(res.summary())
         assert_(string_use_t in summ)
-        summ = str(res.summary2())
-        assert_(string_use_t in summ)
+
+        # try except for models that don't have summary2
+        try:
+            summ2 = str(res.summary2())
+        except AttributeError:
+            summ2 = None
+        if summ2 is not None:
+            assert_(string_use_t in summ2)
 
 
     # TODO The following is not (yet) guaranteed across models
@@ -99,7 +105,11 @@ def test_fitted(self):
         # ignore wrapper for isinstance check
         from statsmodels.genmod.generalized_linear_model import GLMResults
         from statsmodels.discrete.discrete_model import DiscreteResults
-        results = self.results._results
+        # FIXME: work around GEE has no wrapper
+        if hasattr(self.results, '_results'):
+            results = self.results._results
+        else:
+            results = self.results
         if (isinstance(results, GLMResults) or
             isinstance(results, DiscreteResults)):
             raise SkipTest
@@ -117,7 +127,13 @@ def test_predict_types(self):
         # ignore wrapper for isinstance check
         from statsmodels.genmod.generalized_linear_model import GLMResults
         from statsmodels.discrete.discrete_model import DiscreteResults
-        results = self.results._results
+
+        # FIXME: work around GEE has no wrapper
+        if hasattr(self.results, '_results'):
+            results = self.results._results
+        else:
+            results = self.results
+
         if (isinstance(results, GLMResults) or
             isinstance(results, DiscreteResults)):
             # SMOKE test only  TODO
@@ -241,5 +257,81 @@ def setup(self):
         y = x.sum(1) + np.random.randn(x.shape[0])
         self.results = sm.GLM(y, self.exog).fit()
 
+
+class TestGenericGEEPoisson(CheckGenericMixin):
+
+    def setup(self):
+        #fit for each test, because results will be changed by test
+        x = self.exog
+        np.random.seed(987689)
+        y_count = np.random.poisson(np.exp(x.sum(1) - x.mean()))
+        groups = np.random.randint(0, 4, size=x.shape[0])
+        # use start_params to speed up test, difficult convergence not tested
+        start_params = np.array([0., 1., 1., 1.])
+
+        # no sm. import
+        # vi = sm.dependence_structures.Independence()
+        from statsmodels.genmod.dependence_structures import Independence
+        vi = Independence()
+        family = sm.families.Poisson()
+        self.results = sm.GEE(y_count, self.exog, groups, family=family,
+                                cov_struct=vi).fit(start_params=start_params)
+
+        # patch the results for missing df_resid, see issue #1918
+        self.results.df_resid = x.shape[0]
+
+
+class TestGenericGEEPoissonNaive(CheckGenericMixin):
+
+    def setup(self):
+        #fit for each test, because results will be changed by test
+        x = self.exog
+        np.random.seed(987689)
+        #y_count = np.random.poisson(np.exp(x.sum(1) - x.mean()))
+        y_count = np.random.poisson(np.exp(x.sum(1) - x.sum(1).mean(0)))
+        groups = np.random.randint(0, 4, size=x.shape[0])
+        # use start_params to speed up test, difficult convergence not tested
+        start_params = np.array([0., 1., 1., 1.])
+
+        # no sm. import
+        # vi = sm.dependence_structures.Independence()
+        from statsmodels.genmod.dependence_structures import Independence
+        vi = Independence()
+        family = sm.families.Poisson()
+        self.results = sm.GEE(y_count, self.exog, groups, family=family,
+                                cov_struct=vi).fit(start_params=start_params,
+                                                   covariance_type='naive')
+
+        # patch the results for missing df_resid, see issue #1918
+        self.results.df_resid = x.shape[0]
+
+
+
+class TestGenericGEEPoissonBC(CheckGenericMixin):
+
+    def setup(self):
+        #fit for each test, because results will be changed by test
+        x = self.exog
+        np.random.seed(987689)
+        #y_count = np.random.poisson(np.exp(x.sum(1) - x.mean()))
+        y_count = np.random.poisson(np.exp(x.sum(1) - x.sum(1).mean(0)))
+        groups = np.random.randint(0, 4, size=x.shape[0])
+        # use start_params to speed up test, difficult convergence not tested
+        start_params = np.array([0., 1., 1., 1.])
+        # params_est = np.array([-0.0063238 ,  0.99463752,  1.02790201,  0.98080081])
+
+        # no sm. import
+        # vi = sm.dependence_structures.Independence()
+        from statsmodels.genmod.dependence_structures import Independence
+        vi = Independence()
+        family = sm.families.Poisson()
+        mod = sm.GEE(y_count, self.exog, groups, family=family, cov_struct=vi)
+        self.results = mod.fit(start_params=start_params,
+                               covariance_type='bias_reduced')
+
+        # patch the results for missing df_resid, see issue #1918
+        self.results.df_resid = x.shape[0]
+
+
 if __name__ == '__main__':
     pass
diff --git a/statsmodels/genmod/generalized_estimating_equations.py b/statsmodels/genmod/generalized_estimating_equations.py
@@ -796,7 +796,7 @@ def predict(self, params, exog=None, offset=None, linear=False):
         fitted = offset + np.dot(exog, params)
 
         if not linear:
-            fitted = self.family.link(fitted)
+            fitted = self.family.link.inverse(fitted)
 
         return fitted
 
@@ -1182,7 +1182,7 @@ def fittedvalues(self):
                                                      self.params))
 
     def conf_int(self, alpha=.05, cols=None,
-                 covariance_type="robust"):
+                 covariance_type=None):
         """
         Returns confidence intervals for the fitted parameters.
 
@@ -1202,7 +1202,13 @@ def conf_int(self, alpha=.05, cols=None,
         -----
         The confidence interval is based on the Gaussian distribution.
         """
-        bse = self.standard_errors(covariance_type=covariance_type)
+        # super doesn't allow to specify covariance_type and method is not
+        # implemented,
+        # FIXME: remove this method here
+        if covariance_type is None:
+            bse = self.bse
+        else:
+            bse = self.standard_errors(covariance_type=covariance_type)
         params = self.params
         dist = stats.norm
         q = dist.ppf(1 - alpha / 2)