forked from pik-piam/mip
-
Notifications
You must be signed in to change notification settings - Fork 0
/
plotPercentiles.R
136 lines (128 loc) · 6.12 KB
/
plotPercentiles.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#' Comparison line plots with percentiles
#'
#' Line plots show median (50th percentile) of user selected variable(s) obtained from different scenario runs. If
#' available in the data, ribbon plots will also show the 33th - 67th percentile region in a darker color and the
#' 5th – 95th percentile region in a lighter color. Note: the 5th, 33th, 67th and 95th percentiles must be provided in
#' the data set as the percentiles are not computed
#'
#' @author Tonn Rüter
#' @param df The \code{quitte}-style data frame must contain all percentiles of the quantity of interest as individual
#' variables (e.g. for atmospheric CO2 concentrations "Atmospheric Concentrations|CO2|50th Percentile",
#' "Atmospheric Concentrations|CO2|33th Percentile", ..., must be present)
#' @param scenarios Character vector containing names of the desired scenarios. If \code{NULL}, all scenarios present in
#' the data will be displayed
#' @param variables Character vector containing names of the desired variables. If \code{NULL}, all variables present in
#' the data will be displayed. When selecting particular variables for display only use the "Any|Variable"-prefix and
#' omit the "X-th Percentile"-suffix (e.g. for atmospheric CO2 concentrations write "Atmospheric Concentrations|CO2")
#' @examples
#' \dontrun{
#' # Plot atmospheric CO2 concentrations for all scenarios available in the data
#' p <- plotPercentiles(
#' data,
#' # Use variable name without "X-th Percentile"-suffix
#' variables = c("AR6 climate diagnostics|Atmospheric Concentrations|CO2|MAGICCv7.5.3")
#' )
#' # Plot all available variables for selected scenarios
#' p <- plotPercentiles(data, scenarios = c("d_delfrag", "d_another"))
#' }
#' @section Example Plot:
#' \if{html}{\figure{plotPercentiles1.png}{Atmospheric CO2 concentrations for all scenarios available in the data}}
#' @importFrom dplyr filter mutate vars select
#' @importFrom reshape2 melt
#' @importFrom stringr str_extract
#' @importFrom tidyr pivot_wider all_of
#' @importFrom ggplot2 ggplot geom_line geom_ribbon facet_wrap facet_grid theme ylab
#' @export
plotPercentiles <- function(df, scenarios = NULL, variables = NULL) {
# Dropped "model" from Necessary columns
necessaryCols <- c("scenario", "region", "variable", "unit", "period", "value")
# In the quitte data frame all percentiles are given as individual variables. Manipulate input data frame such that
# all percentiles of a given quantity are transformed to individual columns
data <- df %>%
# Drop NA values
as.quitte(na.rm = TRUE) %>%
# Only use necessary columns. The .data-prefix in col names was deprecated in tidyselect since v1.2.0.
select(all_of(necessaryCols)) %>%
# Variable names in the quitte data frame follow the format "Any|Variable|5.0th Percentile". The regular
# expressions below divide the variable name into the prefix and the percentile specifier
mutate(
"percentile" = stringr::str_extract(.data$variable, "[^\\|]+?$"),
"variable" = gsub("\\|[^\\|]+$", "", .data$variable)
) %>%
# Remove all non-percentile variables
filter(grepl(" Percentile$", .data$percentile)) %>%
# Pivot data such that percentiles are transformed to individual columns
pivot_wider(
names_from = "percentile",
values_from = "value"
)
# Check which scenarios/variables are available
uniqueScenarios <- unique(data$scenario)
uniqueVariables <- unique(data$variable)
# Check which function parameters have been provided and default to unique values from the data frame in case none
# have. If scenarios/variables have been provided by user, check whether they are available in the data
if (!is.null(scenarios)) {
diffScenarios <- setdiff(scenarios, uniqueScenarios)
if (length(diffScenarios) > 0) {
stop(paste0("Missing scenarios: ", paste0(setdiff(scenarios, uniqueScenarios), collapse = ", "), "\n"))
}
theseScenarios <- scenarios
} else {
theseScenarios <- uniqueScenarios
}
if (!is.null(variables)) {
diffVariables <- setdiff(variables, uniqueVariables)
if (length(diffVariables) > 0) {
stop(paste0("Missing variables: ", paste0(diffVariables, collapse = ", "), "\n"))
}
theseVariables <- variables
} else {
theseVariables <- uniqueVariables
}
data <- data %>%
filter(.data$scenario %in% theseScenarios, .data$variable %in% theseVariables) %>%
droplevels()
theseScenarios <- intersect(theseScenarios, unique(data$scenario))
# Set up the plot
p <- ggplot()
# Fill plot by filtering for the requested variables and scenarios
for (thisVariable in theseVariables) {
for (thisScenario in theseScenarios) {
plotData <- filter(data, .data$variable == thisVariable & .data$scenario == thisScenario)
p <- p +
geom_line(
data = plotData, aes(x = .data$period, y = get("50.0th Percentile"))
) +
geom_ribbon(
data = plotData, aes(x = .data$period, ymin = get("33.0th Percentile"), ymax = get("67.0th Percentile")),
fill = "#68788a", alpha = 0.5
) +
geom_ribbon(
data = plotData, aes(x = .data$period, ymin = get("5.0th Percentile"), ymax = get("95.0th Percentile")),
fill = "#68788a", alpha = 0.2
)
}
}
# Depending on the function parameters, plots need to be arranged
if (length(theseScenarios) == 1) {
# Plots all parameters for a given scenario. Y-axes need to be independent
p <- p +
facet_wrap(vars(.data$variable), scales = "free_y", ncol = 1) +
theme(axis.title.x = element_blank()) +
ylab(unique(data$unit))
} else if (length(theseVariables) == 1) {
# Plots a given parameter for all scenarios. Lock y-axes to improve comparison
p <- p +
facet_wrap(vars(.data$scenario)) +
theme(axis.title.x = element_blank()) +
ylab(paste0(theseVariables, " (", unique(data$unit), ")"))
} else {
# Using facet grid when multiple variables in multiple scenarios are compared
p <- p +
facet_grid(.data$variable ~ .data$scenario, scales = "free_y") +
theme(axis.title.x = element_blank()) +
ylab(unique(data$unit))
}
p <- p + theme(text = element_text(size = 17))
return(p)
}