diff --git a/coverage.xml b/coverage.xml index c3551824..ee737b53 100644 --- a/coverage.xml +++ b/coverage.xml @@ -1,5 +1,5 @@ - + @@ -28,12 +28,12 @@ - + - - + + @@ -491,9 +491,9 @@ - + - + @@ -509,25 +509,21 @@ - - - - + + + - - + - - @@ -536,12 +532,12 @@ + - - - - - + + + + @@ -549,139 +545,144 @@ + + - + - - - - - - - - - - + + + + + + - + + + + - + + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + - + - + - + + + - - - - + + + + + + - - - - - - - + + + + + + - + - - - - - - - - - + + + + - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - + + + + + + + - - - - - - - - - + + + + + + + + + + + + + + + + + @@ -1123,7 +1124,7 @@ - + @@ -1138,76 +1139,81 @@ - - - - - + + + + - - - - - - - - - - - - + + + + + + + + + + + + - - - - - - - - - - - - + + + + + - + - + - + + - + + - - + + - + + + - - - + + + + + - - - - - + + - - + + + + + + + + + + + + @@ -1324,7 +1330,7 @@ - + @@ -1466,98 +1472,98 @@ - - - + + + - - - - + + + + - + - - - + + + - + - - + + - + - + - - - - + + + + - + - - + + - + - - + + - - - - - + + + + + - - - - - + + + + + - - - - + + + + - - - + + + - - + + - - - - - - + + + + + + - + - - - + + + @@ -1566,16 +1572,16 @@ - + - - + + - - + + @@ -1583,134 +1589,134 @@ - + - - - + + + - + - - + + - - + + - + - - - + + + - - - - + + + + - - + + - - - + + + - - - + + + - - - - + + + + - - + + - - + + - + - + - - + + - + - + - - + + - + - + - + - - + + - + - + - + - + - + - - + + - + - - + + - - - - + + + + @@ -1718,10 +1724,10 @@ - - - - + + + + @@ -1729,130 +1735,130 @@ - + - + - - + + - - - - - - + + + + + + - - - - - - - - + + + + + + + + - + - + - - - - + + + + - + - - + + - + - + - + - - - - - + + + + + - + - + - - - - + + + + - - - - + + + + - + - + - + - + - - + + - - - - - - - + + + + + + + - + - + - - + + - + - + - - + + - + - + @@ -1860,59 +1866,59 @@ - - - - + + + + - - - + + + - - - - - + + + + + - + - + - + - + - - + + - + - + - + - - - - - + + + + + - - + + @@ -1921,176 +1927,176 @@ - - + + - + - + - - - + + + - + - + - - - - + + + + - + - + - + - + - - + + - + - + - - - - - - - - - - - - + + + + + + + + + + + + - + - - + + - - + + - + - + - + - - + + - - - - - - - - - + + + + + + + + + - + - + - + - + - + - - - - + + + + - + - - + + - - + + - - + + - - + + - + - - - - + + + + - - + + - - - - - + + + + + - + - - - - - + + + + + @@ -2102,100 +2108,100 @@ - - - + + + - + - - - + + + - + - - - - + + + + - + - + - + - + - - + + - - + + - + - + - + - + - - + + - + - + - - - + + + - - + + - - + + - + - - + + - - + + - - + + @@ -2204,23 +2210,24 @@ - + - + - - - + + + - - + + - + + @@ -2384,173 +2391,224 @@ - + - + - - + + - - - - + + + + - + - + - + - + + - - - - - + + + - - - - - - + + + - - + + + + - - - - - - - - - + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - + + + + + - + - + - + - - - - + + + + - + + - - - - - + + - + + - + - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + - - - - - + + + + + + + + + + - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -3035,7 +3093,7 @@ - + @@ -3045,7 +3103,7 @@ - + @@ -3148,7 +3206,7 @@ - + @@ -3330,196 +3388,213 @@ - + + - - - - - + + + + + - - - - - + + + + + - - - + + + + - + - - + + + - - - + + + - - - - + + + + - - - + + + - - - - + + - - - - - - - + + + + + + + - - - - + + + + + - - + + - - - - - - - + + + + + + - - - - - + + + + + - - + + + + - - - - - - - + + + + + + + - - - - - - + + + + + + - - - + + + - - - + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + - - + + - - - - - - - - - - + + + + + + - - - + + + - - + + + + + - - - + - - - - - - - + + + + + + + - - + + - - + - + + - + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pyfixest/estimation/FixestMulti_.py b/pyfixest/estimation/FixestMulti_.py index f4c84621..2fd7314f 100644 --- a/pyfixest/estimation/FixestMulti_.py +++ b/pyfixest/estimation/FixestMulti_.py @@ -27,13 +27,14 @@ def __init__( use_compression: bool, reps: Optional[int], seed: Optional[int], + separation_check: Optional[list[str]] = None, ) -> None: """ Initialize a class for multiple fixed effect estimations. Parameters ---------- - data : panda.DataFrame + data : pandas.DataFrame The input DataFrame for the object. copy_data : bool Whether to copy the data or not. @@ -56,6 +57,9 @@ def __init__( seed : Optional[int] Option to provide a random seed. Default is None. Only relevant for wild cluster bootstrap for use_compression=True. + separation_check: list[str], optional + Only used in "fepois". Methods to identify and drop separated observations. + Either "fe" or "ir". Executes both by default. Returns ------- @@ -69,6 +73,7 @@ def __init__( self._use_compression = use_compression self._reps = reps if use_compression else None self._seed = seed if use_compression else None + self._separation_check = separation_check data = _polars_to_pandas(data) @@ -170,6 +175,7 @@ def _estimate_all_models( collin_tol: float = 1e-6, iwls_maxiter: int = 25, iwls_tol: float = 1e-08, + separation_check: Optional[list[str]] = None, ) -> None: """ Estimate multiple regression models. @@ -190,6 +196,9 @@ def _estimate_all_models( iwls_tol : float, optional The tolerance level for the IWLS algorithm. Default is 1e-8. Only relevant for non-linear estimation strategies. + separation_check: list[str], optional + Only used in "fepois". Methods to identify and drop separated observations. + Either "fe" or "ir". Executes both by default. Returns ------- @@ -284,6 +293,7 @@ def _estimate_all_models( store_data=_store_data, copy_data=_copy_data, lean=_lean, + separation_check=separation_check, # solver=_solver ) FIT.prepare_model_matrix() diff --git a/pyfixest/estimation/estimation.py b/pyfixest/estimation/estimation.py index 11593254..bab7d046 100644 --- a/pyfixest/estimation/estimation.py +++ b/pyfixest/estimation/estimation.py @@ -1,4 +1,4 @@ -from typing import Optional, Union +from typing import List, Optional, Union import pandas as pd @@ -358,6 +358,7 @@ class for multiple models specified via `fml`. use_compression=use_compression, reps=reps, seed=seed, + separation_check=None, ) fixest = FixestMulti( @@ -397,6 +398,7 @@ def fepois( iwls_tol: float = 1e-08, iwls_maxiter: int = 25, collin_tol: float = 1e-10, + separation_check: Optional[list[str]] = ["fe"], drop_intercept: bool = False, i_ref1=None, copy_data: bool = True, @@ -446,6 +448,10 @@ def fepois( collin_tol : float, optional Tolerance for collinearity check, by default 1e-10. + separation_check: list[str], optional + Methods to identify and drop separated observations. + Either "fe" or "ir". Executes "fe" by default. + drop_intercept : bool, optional Whether to drop the intercept from the model, by default False. @@ -530,6 +536,7 @@ def fepois( use_compression=False, reps=None, seed=None, + separation_check=separation_check, ) fixest = FixestMulti( @@ -557,6 +564,7 @@ def fepois( iwls_tol=iwls_tol, iwls_maxiter=iwls_maxiter, collin_tol=collin_tol, + separation_check=separation_check, ) if fixest._is_multiple_estimation: @@ -581,6 +589,7 @@ def _estimation_input_checks( use_compression: bool, reps: Optional[int], seed: Optional[int], + separation_check: List[str] = None, ): if not isinstance(fml, str): raise TypeError("fml must be a string") @@ -664,3 +673,14 @@ def _estimation_input_checks( if seed is not None and not isinstance(seed, int): raise TypeError("The function argument `seed` must be of type int.") + + if separation_check is not None: + if not isinstance(separation_check, list): + raise TypeError( + "The function argument `separation_check` must be of type list." + ) + + if not all(x in ["fe", "ir"] for x in separation_check): + raise ValueError( + "The function argument `separation_check` must be a list of strings containing 'fe' and/or 'ir'." + ) diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py index f48e06e8..3b565c47 100644 --- a/pyfixest/estimation/fepois_.py +++ b/pyfixest/estimation/fepois_.py @@ -1,5 +1,6 @@ import warnings -from typing import Optional, Union +from importlib import import_module +from typing import Optional, Protocol, Union import numpy as np import pandas as pd @@ -81,6 +82,7 @@ def __init__( store_data: bool = True, copy_data: bool = True, lean: bool = False, + separation_check: Optional[list[str]] = None, ): super().__init__( FixestFormula, @@ -106,6 +108,7 @@ def __init__( self.tol = tol self._method = "fepois" self.convergence = False + self.separation_check = separation_check self._support_crv3_inference = True self._support_iid_inference = True @@ -129,12 +132,15 @@ def prepare_model_matrix(self): # check for separation na_separation: list[int] = [] - if self._fe is not None: - na_separation = _check_for_separation(Y=self._Y, fe=self._fe) - if na_separation: - warnings.warn( - f"{str(len(na_separation))} observations removed because of separation." - ) + if self._fe is not None and self.separation_check: + na_separation = _check_for_separation( + Y=self._Y, + X=self._X, + fe=self._fe, + fml=self._fml, + data=self._data, + methods=self.separation_check, + ) if na_separation: self._Y.drop(na_separation, axis=0, inplace=True) @@ -401,27 +407,125 @@ def predict( raise ValueError("type must be one of 'response' or 'link'.") -def _check_for_separation(Y: pd.DataFrame, fe: pd.DataFrame) -> list[int]: +def _check_for_separation( + fml: str, + data: pd.DataFrame, + Y: pd.DataFrame, + X: pd.DataFrame, + fe: pd.DataFrame, + methods: Optional[list[str]] = None, +) -> list[int]: """ Check for separation. Check for separation of Poisson Regression. For details, see the ppmlhdfe - documentation on separation checks. Currently, only the "fe" check is implemented. + documentation on separation checks. Parameters ---------- + fml : str + The formula used for estimation. + data : pd.DataFrame + The data used for estimation. Y : pd.DataFrame Dependent variable. + X : pd.DataFrame + Independent variables. fe : pd.DataFrame Fixed effects. + methods: list[str], optional + Methods used to check for separation. One of fixed effects ("fe") or + iterative rectifier ("ir"). Executes all methods by default. Returns ------- list List of indices of observations that are removed due to separation. """ + valid_methods: dict[str, _SeparationMethod] = { + "fe": _check_for_separation_fe, + "ir": _check_for_separation_ir, + } + + invalid_methods = [method for method in methods if method not in valid_methods] + if invalid_methods: + raise ValueError( + f"Invalid separation method. Expecting {list(valid_methods)}. Received {invalid_methods}" + ) + separation_na: set[int] = set() - if not (Y > 0).all(axis=0).all(): + for method in methods: + separation_na = separation_na.union( + valid_methods[method](fml=fml, data=data, Y=Y, X=X, fe=fe) + ) + + if separation_na: + warnings.warn( + f"{str(len(separation_na))} observations removed because of separation." + ) + + return list(separation_na) + + +class _SeparationMethod(Protocol): + def __call__( + self, + fml: str, + data: pd.DataFrame, + Y: pd.DataFrame, + X: pd.DataFrame, + fe: pd.DataFrame, + ) -> set[int]: + """ + Check for separation. + + Parameters + ---------- + fml : str + The formula used for estimation. + data : pd.DataFrame + The data used for estimation. + Y : pd.DataFrame + Dependent variable. + X : pd.DataFrame + Independent variables. + fe : pd.DataFrame + Fixed effects. + + Returns + ------- + set + Set of indices of separated observations. + """ + ... + + +def _check_for_separation_fe( + fml: str, data: pd.DataFrame, Y: pd.DataFrame, X: pd.DataFrame, fe: pd.DataFrame +) -> set[int]: + """ + Check for separation using the "fe" check. + + Parameters + ---------- + fml : str + The formula used for estimation. + data : pd.DataFrame + The data used for estimation. + Y : pd.DataFrame + Dependent variable. + X : pd.DataFrame + Independent variables. + fe : pd.DataFrame + Fixed effects. + + Returns + ------- + set + Set of indices of separated observations. + """ + separation_na: set[int] = set() + if fe is not None and not (Y > 0).all(axis=0).all(): Y_help = (Y > 0).astype(int).squeeze() # loop over all elements of fe @@ -442,7 +546,108 @@ def _check_for_separation(Y: pd.DataFrame, fe: pd.DataFrame) -> list[int]: dropset = set(fe[x][fe_in_droplist].index) separation_na = separation_na.union(dropset) - return list(separation_na) + return separation_na + + +def _check_for_separation_ir( + fml: str, + data: pd.DataFrame, + Y: pd.DataFrame, + X: pd.DataFrame, + fe: pd.DataFrame, + tol: float = 1e-4, + maxiter: int = 100, +) -> set[int]: + """ + Check for separation using the "iterative rectifier" algorithm + proposed by Correia et al. (2021). For details see http://arxiv.org/abs/1903.01633. + + Parameters + ---------- + fml : str + The formula used for estimation. + data : pd.DataFrame + The data used for estimation. + Y : pd.DataFrame + Dependent variable. + X : pd.DataFrame + Independent variables. + fe : pd.DataFrame + Fixed effects. + tol : float + Tolerance to detect separated observation. Defaults to 1e-4. + maxiter : int + Maximum number of iterations. Defaults to 100. + + Returns + ------- + set + Set of indices of separated observations. + """ + # lazy load to avoid circular import + fixest_module = import_module("pyfixest.estimation") + feols = getattr(fixest_module, "feols") + # initialize + separation_na: set[int] = set() + tmp_suffix = "_separationTmp" + # build formula + name_dependent, rest = fml.split("~") + name_dependent_separation = "U" + if name_dependent_separation in data.columns: + name_dependent_separation += tmp_suffix + + fml_separation = f"{name_dependent_separation} ~ {rest}" + + dependent: pd.Series = data[name_dependent] + is_interior = dependent > 0 + if is_interior.all(): + # no boundary sample, can exit + return separation_na + + # initialize variables + tmp: pd.DataFrame = pd.DataFrame(index=data.index) + tmp["U"] = (dependent == 0).astype(float).rename("U") + # weights + N0 = (dependent > 0).sum() + K = N0 / tol**2 + tmp["omega"] = pd.Series( + np.where(dependent > 0, K, 1), name="omega", index=data.index + ) + # combine data + # TODO: avoid create new object? + tmp = data.join(tmp, how="left", validate="one_to_one", rsuffix=tmp_suffix) + # TODO: need to ensure that join doesn't create duplicated columns + # assert not tmp.columns.duplicated().any() + + iteration = 0 + has_converged = False + while iteration < maxiter: + iteration += 1 + # regress U on X + # TODO: check acceleration in ppmlhdfe's implementation: https://github.com/sergiocorreia/ppmlhdfe/blob/master/src/ppmlhdfe_separation_relu.mata#L135 + fitted = feols(fml_separation, data=tmp, weights="omega") + tmp["Uhat"] = pd.Series(fitted.predict(), index=fitted._data.index, name="Uhat") + Uhat = tmp["Uhat"] + # update when within tolerance of zero + # need to be more strict below zero to avoid false positives + within_zero = (Uhat > -0.1 * tol) & (Uhat < tol) + Uhat.where(~(is_interior | within_zero.fillna(True)), 0, inplace=True) + if (Uhat >= 0).all(): + # all separated observations have been identified + has_converged = True + break + tmp.loc[~is_interior, "U"] = np.fmax( + Uhat[~is_interior], 0 + ) # rectified linear unit (ReLU) + + if has_converged: + separation_na = set(dependent[Uhat > 0].index) + else: + warnings.warn( + "iterative rectivier separation check: maximum number of iterations reached before convergence" + ) + + return separation_na def _fepois_input_checks(drop_singletons: bool, tol: float, maxiter: int): diff --git a/tests/data/pplmhdfe_separation_examples/01.csv b/tests/data/pplmhdfe_separation_examples/01.csv new file mode 100644 index 00000000..0639065c --- /dev/null +++ b/tests/data/pplmhdfe_separation_examples/01.csv @@ -0,0 +1,101 @@ +y,x1,x2,id1,id2,separated +0.0000000000,-0.9303550124,1,1,4,1 +0.0000000000,0.1835959703,1,2,1,1 +0.0000000000,-0.6371972561,0,2,6,0 +0.0000000000,-0.4237562418,0,2,7,0 +0.1527670026,-1.1799178123,0,8,4,0 +0.1553160399,0.8860545158,0,1,7,0 +0.1734523475,1.0502026081,0,8,3,0 +0.2217264324,-0.2490162849,0,9,1,0 +0.2260344625,0.9635434151,0,7,6,0 +0.2283350676,0.5023207068,0,3,5,0 +0.2368061543,0.9141282439,0,10,1,0 +0.2410950512,-1.3616287708,0,4,5,0 +0.2541858852,0.5753656030,0,3,3,0 +0.2637400925,-0.6333113909,0,3,10,0 +0.2677916288,1.0411013365,0,6,9,0 +0.2768439949,-1.1694648266,0,8,10,0 +0.2934476137,-0.7940499187,0,4,6,0 +0.3290584087,0.5041465163,0,2,4,0 +0.3606268466,-3.0584282875,0,3,8,0 +0.4013363719,0.6099517941,0,7,5,0 +0.4354907870,0.9624704719,0,6,1,0 +0.4908127189,-0.7442333698,0,5,2,0 +0.4976674914,-0.5138924718,0,6,4,0 +0.5012444854,-1.3591595888,0,9,7,0 +0.5456602573,0.0567612983,0,5,5,0 +0.5634447336,1.2903038263,0,7,8,0 +0.5983847380,-0.6872945428,0,6,5,0 +0.6183075905,0.7253564000,0,1,5,0 +0.6413634419,1.6118478775,0,4,3,0 +0.6482065916,1.2488127947,0,7,1,0 +0.6522977948,-0.4748489261,0,6,6,0 +0.6631931663,0.4219789803,0,4,8,0 +0.6953295469,-1.0251801014,0,10,6,0 +0.6986964941,-0.3038678169,0,9,9,0 +0.8503285050,1.8723217249,0,8,2,0 +0.9026033878,-1.0245078802,0,10,10,0 +0.9204394221,0.4229967892,0,6,10,0 +0.9228412509,0.4940861166,0,1,1,0 +0.9359286427,1.3081433773,0,9,2,0 +0.9685080647,1.2934249640,0,2,10,0 +0.9945486188,-0.5332730412,0,5,1,0 +1.0105472803,-0.1284428090,0,9,3,0 +1.0721468925,-1.5399883986,0,6,8,0 +1.1205748320,0.6894677877,0,8,5,0 +1.1252909899,-1.2204582691,0,1,10,0 +1.1561176777,-0.9787744284,0,9,8,0 +1.1946246624,-0.0799055845,0,10,8,0 +1.2046658993,-0.8231971860,0,6,7,0 +1.2189750671,0.5437637568,0,3,4,0 +1.2277959585,1.3177309036,0,9,10,0 +1.2413842678,0.6673717499,0,8,9,0 +1.2569460869,-0.0167010967,0,6,3,0 +1.2587834597,-0.4196293950,0,1,8,0 +1.2782599926,-0.6420007348,0,8,1,0 +1.2911227942,1.1136496067,0,9,4,0 +1.2973045111,-0.3824758530,0,7,9,0 +1.3675237894,1.2361305952,0,5,9,0 +1.3778325319,-1.0304020643,0,5,4,0 +1.3857760429,0.3235974312,0,2,3,0 +1.3960508108,-0.4157371819,0,2,5,0 +1.4190907478,0.9920675159,0,1,2,0 +1.4420653582,-0.9114651084,0,4,1,0 +1.5038720369,-1.0453398228,0,3,2,0 +1.5394419432,-0.1935533732,0,4,4,0 +1.5747014284,0.0698969364,0,9,6,0 +1.6199581623,1.3169367313,0,4,2,0 +1.6392902136,-0.3978092670,0,7,10,0 +1.6421631575,-0.7466211319,0,5,8,0 +1.6952790022,-0.0158907417,0,5,6,0 +1.7640979290,1.0598815680,0,7,4,0 +1.9505974054,0.0092241317,0,10,7,0 +2.0685675144,0.1434842199,0,8,8,0 +2.1190843582,0.6173521280,0,3,1,0 +2.1889939308,-1.9780639410,0,3,7,0 +2.2176725864,-1.5379956961,0,7,3,0 +2.2831020355,0.5082080960,0,2,2,0 +2.3055832386,1.0296376944,0,7,2,0 +2.3692295551,2.1091823578,0,10,2,0 +2.7510018349,0.2632481158,0,2,9,0 +2.7675759792,-0.0022486539,0,8,6,0 +2.7777233124,-1.3771806955,0,10,4,0 +2.7846245766,0.1415781677,0,10,5,0 +2.7860391140,-2.2442505360,0,4,9,0 +2.9671635628,0.2927849889,0,5,3,0 +2.9819300175,-0.8325243592,0,4,10,0 +3.1186814308,-0.4090226293,0,2,8,0 +3.2802021503,-0.4062994719,0,4,7,0 +3.4179122448,0.0959109142,0,8,7,0 +3.6803083420,2.3073217869,0,1,9,0 +3.7194297314,0.3930145800,0,3,9,0 +3.7777581215,0.2952103019,0,6,2,0 +4.1290211678,-1.4121559858,0,5,7,0 +4.2730326653,-0.5140260458,0,10,9,0 +4.2883334160,-1.0160779953,0,7,7,0 +4.3528537750,0.7201576829,0,10,3,0 +4.9981565475,0.2207358032,0,3,6,0 +5.0979351997,0.7166025043,0,9,5,0 +7.3969793320,2.1998977661,0,5,10,0 +8.4651517868,0.1178035960,0,1,6,0 +9.8326959610,0.7707119584,0,1,3,0 diff --git a/tests/data/pplmhdfe_separation_examples/readme.md b/tests/data/pplmhdfe_separation_examples/readme.md new file mode 100644 index 00000000..9989f224 --- /dev/null +++ b/tests/data/pplmhdfe_separation_examples/readme.md @@ -0,0 +1,4 @@ +## Separation Data Sets + +All files in this document stem from the [pplmhdfe test suite](https://github.com/sergiocorreia/ppmlhdfe/tree/master/test/separation_datasets), +published under MIT license. diff --git a/tests/test_poisson.py b/tests/test_poisson.py index 33460672..c23766cc 100644 --- a/tests/test_poisson.py +++ b/tests/test_poisson.py @@ -21,17 +21,41 @@ def test_separation(): """Test separation detection.""" - y = np.array([0, 0, 0, 1, 2, 3]) - df1 = np.array(["a", "a", "b", "b", "b", "c"]) - df2 = np.array(["c", "c", "d", "d", "d", "e"]) - x = np.random.normal(0, 1, 6) + example1 = pd.DataFrame.from_dict( + { + "Y": [0, 0, 0, 1, 2, 3], + "fe1": ["a", "a", "b", "b", "b", "c"], + "fe2": ["c", "c", "d", "d", "d", "e"], + "X": np.random.normal(0, 1, 6), + } + ) + + with pytest.warns( + UserWarning, match="2 observations removed because of separation." + ): + fepois("Y ~ X | fe1", data=example1, vcov="hetero", separation_check=["fe"]) # noqa: F841 - df = pd.DataFrame({"Y": y, "fe1": df1, "fe2": df2, "x": x}) + example2 = pd.DataFrame.from_dict( + { + "Y": [0, 0, 0, 1, 2, 3], + "X1": [2, -1, 0, 0, 5, 6], + "X2": [5, 10, 0, 0, -10, -12], + } + ) with pytest.warns( UserWarning, match="2 observations removed because of separation." ): - mod = fepois("Y ~ x | fe1", data=df, vcov="hetero") # noqa: F841 + fepois("Y ~ X1 | X2", data=example2, vcov="hetero", separation_check=["ir"]) # noqa: F841 + + data_01 = pd.read_csv("data/pplmhdfe_separations_examples/data_01.csv") + + # pplmhdfe test data sets: + with pytest.warns( + UserWarning, + match=f"{str(data_01.sum())} observations removed because of separation.", + ): + pf.fepois("y ~ x1 + x2 | id1 + id2", data=data_01, separation_check=["ir"]) @pytest.mark.parametrize("fml", ["Y ~ X1", "Y ~ X1 | f1"])