forked from ModelOriented/DALEX
-
Notifications
You must be signed in to change notification settings - Fork 0
/
model_parts.Rd
81 lines (69 loc) · 3.56 KB
/
model_parts.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/model_parts.R
\name{model_parts}
\alias{model_parts}
\alias{variable_importance}
\alias{feature_importance}
\title{Dataset Level Variable Importance as Change in Loss Function after Variable Permutations}
\usage{
model_parts(
explainer,
loss_function = loss_default(explainer$model_info$type),
...,
type = "variable_importance",
N = n_sample,
n_sample = 1000
)
}
\arguments{
\item{explainer}{a model to be explained, preprocessed by the \code{explain} function}
\item{loss_function}{a function that will be used to assess variable importance. By default it is 1-AUC for classification, cross entropy for multilabel classification and RMSE for regression. Custom, user-made loss function should accept two obligatory parameters (observed, predicted), where \code{observed} states for actual values of the target, while \code{predicted} for predicted values. If attribute "loss_accuracy" is associated with function object, then it will be plotted as name of the loss function.}
\item{...}{other parameters}
\item{type}{character, type of transformation that should be applied for dropout loss. \code{variable_importance} and \code{raw} results raw drop lossess, \code{ratio} returns \code{drop_loss/drop_loss_full_model} while \code{difference} returns \code{drop_loss - drop_loss_full_model}}
\item{N}{number of observations that should be sampled for calculation of variable importance. If \code{NULL} then variable importance will be calculated on whole dataset (no sampling).}
\item{n_sample}{alias for \code{N} held for backwards compatibility. number of observations that should be sampled for calculation of variable importance.}
}
\value{
An object of the class \code{feature_importance}.
It's a data frame with calculated average response.
}
\description{
From DALEX version 1.0 this function calls the \code{\link[ingredients]{feature_importance}}
Find information how to use this function here: \url{https://ema.drwhy.ai/featureImportance.html}.
}
\examples{
\donttest{
# regression
library("ranger")
apartments_ranger_model <- ranger(m2.price~., data = apartments, num.trees = 50)
explainer_ranger <- explain(apartments_ranger_model, data = apartments[,-1],
y = apartments$m2.price, label = "Ranger Apartments")
model_parts_ranger_aps <- model_parts(explainer_ranger, type = "raw")
head(model_parts_ranger_aps, 8)
plot(model_parts_ranger_aps)
# binary classification
titanic_glm_model <- glm(survived~., data = titanic_imputed, family = "binomial")
explainer_glm_titanic <- explain(titanic_glm_model, data = titanic_imputed[,-8],
y = titanic_imputed$survived)
logit <- function(x) exp(x)/(1+exp(x))
custom_loss <- function(observed, predicted){
sum((observed - logit(predicted))^2)
}
attr(custom_loss, "loss_name") <- "Logit residuals"
model_parts_glm_titanic <- model_parts(explainer_glm_titanic, type = "raw",
loss_function = custom_loss)
head(model_parts_glm_titanic, 8)
plot(model_parts_glm_titanic)
# multilabel classification
HR_ranger_model_HR <- ranger(status~., data = HR, num.trees = 50,
probability = TRUE)
explainer_ranger_HR <- explain(HR_ranger_model_HR, data = HR[,-6],
y = HR$status, label = "Ranger HR")
model_parts_ranger_HR <- model_parts(explainer_ranger_HR, type = "raw")
head(model_parts_ranger_HR, 8)
plot(model_parts_ranger_HR)
}
}
\references{
Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. \url{https://ema.drwhy.ai/}
}