Skip to content

Commit

Permalink
Ignore covariates in binary WGR transform for linear response (#311)
Browse files Browse the repository at this point in the history
* Ignore covars for linear response in binary glowgr

Signed-off-by: Karen Feng <[email protected]>

* Clarify docs

Signed-off-by: Karen Feng <[email protected]>

* Use warnings lib

Signed-off-by: Karen Feng <[email protected]>
  • Loading branch information
karenfeng committed Dec 2, 2020
1 parent 1f3fecd commit 8af47ee
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 10 deletions.
18 changes: 9 additions & 9 deletions python/glow/wgr/linear_model/logistic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import pyspark.sql.functions as f
from typeguard import typechecked
from typing import Any, Dict, List
import warnings
from glow.logging import record_hls_event


Expand Down Expand Up @@ -153,7 +154,7 @@ def reduce_block_matrix(self, blockdf: DataFrame, labeldf: pd.DataFrame,
validation routine.
covdf : Pandas DataFrame containing covariates to be included in every model in the stacking
ensemble. The covariates should not include an explicit intercept term, as one will be
added automatically.
added automatically. Covariates will be ignored for a linear response.
response : String specifying what transformation to apply ("linear" or "sigmoid")
Returns:
Expand All @@ -163,6 +164,9 @@ def reduce_block_matrix(self, blockdf: DataFrame, labeldf: pd.DataFrame,
transform_key_pattern = ['sample_block', 'label']

if response == 'linear':
if not covdf.empty:
warnings.warn('Ignoring covariates for linear response')
covdf = pd.DataFrame({})
transform_udf = pandas_udf(
lambda key, pdf: apply_model(key, transform_key_pattern, pdf, labeldf,
sample_blocks, self.alphas, covdf),
Expand Down Expand Up @@ -208,7 +212,7 @@ def transform(self,
validation routine.
covdf : Pandas DataFrame containing covariates to be included in every model in the stacking
ensemble (optional). The covariates should not include an explicit intercept term, as one will be
added automatically.
added automatically. Covariates will be ignored for a linear response.
response : String specifying the desired output. Can be 'linear' to specify the direct output of the linear
WGR model (default) or 'sigmoid' to specify predicted label probabilities.
Expand Down Expand Up @@ -247,7 +251,7 @@ def transform_loco(self,
validation routine.
covdf : covdf : Pandas DataFrame containing covariates to be included in every model in the stacking
ensemble (optional). The covariates should not include an explicit intercept term, as one will be
added automatically.
added automatically. Covariates will be ignored for a linear response.
response : String specifying the desired output. Can be 'linear' to specify the direct output of the linear
WGR model (default) or 'sigmoid' to specify predicted label probabilities.
chromosomes : List of chromosomes for which to generate a prediction (optional). If not provided, the
Expand Down Expand Up @@ -286,7 +290,7 @@ def fit_transform(self,
sample_blocks : Dict containing a mapping of sample_block ID to a list of corresponding sample IDs
covdf : Pandas DataFrame containing covariates to be included in every model in the stacking
ensemble (optional). The covariates should not include an explicit intercept term, as one will be
added automatically.
added automatically. Covariates will be ignored during the transformation step for a linear response.
response : String specifying the desired output. Can be 'linear' to specify the direct output of the linear
WGR model (default) or 'sigmoid' to specify predicted label probabilities.
Expand All @@ -295,8 +299,4 @@ def fit_transform(self,
rows are indexed by sample ID and the columns by label. The column types are float64.
"""
modeldf, cvdf = self.fit(blockdf, labeldf, sample_blocks, covdf)
if response == 'linear':
return self.transform(blockdf, labeldf, sample_blocks, modeldf, cvdf, pd.DataFrame({}),
response)
else:
return self.transform(blockdf, labeldf, sample_blocks, modeldf, cvdf, covdf, response)
return self.transform(blockdf, labeldf, sample_blocks, modeldf, cvdf, covdf, response)
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def test_logistic_regression_transform(spark):

logreg = LogisticRegression(alpha_values)
modeldf, cvdf = logreg.fit(lvl1df, labeldf, sample_blocks, covdf)
wgr_cov_df = logreg.transform(lvl1df, labeldf, sample_blocks, modeldf, cvdf)
wgr_cov_df = logreg.transform(lvl1df, labeldf, sample_blocks, modeldf, cvdf, covdf)
wgr_cov_glow = wgr_cov_df[test_label].to_numpy()

assert (np.allclose(np.array(test_values['wgr_cov']), wgr_cov_glow))
Expand Down

0 comments on commit 8af47ee

Please sign in to comment.