1 |
#' Kaplan Meier (KM) plot function for anchored and unanchored cases |
|
2 |
#' |
|
3 |
#' It is wrapper function of \code{basic_kmplot}. The argument setting is similar to \code{maic_anchored} and |
|
4 |
#' \code{maic_unanchored}, and it is used in those two functions. |
|
5 |
#' |
|
6 |
#' @param weights_object an object returned by \code{estimate_weight} |
|
7 |
#' @param tte_ipd a data frame of individual patient data (IPD) of internal trial, contain at least `"USUBJID"`, |
|
8 |
#' `"EVENT"`, `"TIME"` columns and a column indicating treatment assignment |
|
9 |
#' @param tte_pseudo_ipd a data frame of pseudo IPD by digitized KM curves of external trial (for time-to-event |
|
10 |
#' endpoint), contain at least `"EVENT"`, `"TIME"` |
|
11 |
#' @param trt_ipd a string, name of the interested investigation arm in internal trial \code{dat_igd} (real IPD) |
|
12 |
#' @param trt_agd a string, name of the interested investigation arm in external trial \code{dat_pseudo} (pseudo IPD) |
|
13 |
#' @param trt_common a string, name of the common comparator in internal and external trial, by default is NULL, |
|
14 |
#' indicating unanchored case |
|
15 |
#' @param trt_var_ipd a string, column name in \code{tte_ipd} that contains the treatment assignment |
|
16 |
#' @param trt_var_agd a string, column name in \code{tte_pseudo_ipd} that contains the treatment assignment |
|
17 |
#' @param normalize_weights logical, default is \code{FALSE}. If \code{TRUE}, |
|
18 |
#' \code{scaled_weights} (normalized weights) in \code{weights_object$data} will be used. |
|
19 |
#' @param km_conf_type a string, pass to \code{conf.type} of \code{survfit} |
|
20 |
#' @param km_layout a string, only applicable for unanchored case (\code{trt_common = NULL}), indicated the desired |
|
21 |
#' layout of output KM curve. |
|
22 |
#' @param ... other arguments in \code{basic_kmplot} |
|
23 |
#' |
|
24 |
#' @return In unanchored case, a KM plot with risk set table. In anchored case, depending on \code{km_layout}, |
|
25 |
#' \itemize{ |
|
26 |
#' \item if "by_trial", 2 by 1 plot, first all KM curves (incl. weighted) in IPD trial, and then KM curves in AgD |
|
27 |
#' trial, with risk set table. |
|
28 |
#' \item if "by_arm", 2 by 1 plot, first KM curves of \code{trt_agd} and \code{trt_ipd} (with and without weights), |
|
29 |
#' and then KM curves of \code{trt_common} in AgD trial and IPD trial (with and without weights). Risk set table is |
|
30 |
#' appended. |
|
31 |
#' \item if "all", 2 by 2 plot, all plots in "by_trial" and "by_arm" without risk set table appended. |
|
32 |
#' } |
|
33 |
#' @example inst/examples/kmplot_unanchored_ex.R |
|
34 |
#' @example inst/examples/kmplot_anchored_ex.R |
|
35 |
#' @export |
|
36 | ||
37 |
kmplot <- function(weights_object, |
|
38 |
tte_ipd, |
|
39 |
tte_pseudo_ipd, |
|
40 |
trt_ipd, |
|
41 |
trt_agd, |
|
42 |
trt_common = NULL, |
|
43 |
normalize_weights = FALSE, |
|
44 |
trt_var_ipd = "ARM", |
|
45 |
trt_var_agd = "ARM", |
|
46 |
km_conf_type = "log-log", |
|
47 |
km_layout = c("all", "by_trial", "by_arm"), |
|
48 |
...) { |
|
49 | 8x |
names(tte_ipd) <- toupper(names(tte_ipd)) |
50 | 8x |
names(tte_pseudo_ipd) <- toupper(names(tte_pseudo_ipd)) |
51 | 8x |
trt_var_ipd <- toupper(trt_var_ipd) |
52 | 8x |
trt_var_agd <- toupper(trt_var_agd) |
53 | ||
54 |
# pre check |
|
55 | 8x |
if (!"maicplus_estimate_weights" %in% class(weights_object)) { |
56 | ! |
stop("weights_object should be an object returned by estimate_weights") |
57 |
} |
|
58 | 8x |
if (!all(c("USUBJID", "TIME", "EVENT", trt_var_ipd) %in% names(tte_ipd))) { |
59 | ! |
stop("tte_ipd needs to include at least USUBJID, TIME, EVENT, ", trt_var_ipd) |
60 |
} |
|
61 | 8x |
if (!all(c("TIME", "EVENT", trt_var_agd) %in% names(tte_pseudo_ipd))) { |
62 | ! |
stop("tte_pseudo_ipd needs to include at least TIME, EVENT, ", trt_var_agd) |
63 |
} |
|
64 | 8x |
km_layout <- match.arg(km_layout, choices = c("all", "by_trial", "by_arm"), several.ok = FALSE) |
65 | ||
66 |
# preparing data |
|
67 | 8x |
is_anchored <- !is.null(trt_common) |
68 | 8x |
tte_ipd <- tte_ipd[tte_ipd[[trt_var_ipd]] %in% c(trt_ipd, trt_common), , drop = FALSE] |
69 | 8x |
tte_pseudo_ipd <- tte_pseudo_ipd[tte_pseudo_ipd[[trt_var_agd]] %in% c(trt_agd, trt_common), , drop = FALSE] |
70 | ||
71 | 8x |
if (normalize_weights) { |
72 | 2x |
tte_ipd$weights <- weights_object$data$scaled_weights[match(weights_object$data$USUBJID, tte_ipd$USUBJID)] |
73 |
} else { |
|
74 | 6x |
tte_ipd$weights <- weights_object$data$weights[match(weights_object$data$USUBJID, tte_ipd$USUBJID)] |
75 |
} |
|
76 | 8x |
tte_pseudo_ipd$weights <- 1 |
77 | ||
78 |
# generate plot |
|
79 | 8x |
if (!is_anchored) { |
80 |
## unanchored case |
|
81 | ! |
kmobj_B <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_agd)), |
82 | ! |
data = tte_pseudo_ipd, |
83 | ! |
conf.type = km_conf_type |
84 |
) |
|
85 | ! |
kmobj_A <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_ipd)), |
86 | ! |
data = tte_ipd, |
87 | ! |
conf.type = km_conf_type |
88 |
) |
|
89 | ! |
kmobj_A_adj <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_ipd)), |
90 | ! |
data = tte_ipd, |
91 | ! |
conf.type = km_conf_type, |
92 | ! |
weights = weights |
93 |
) |
|
94 | ||
95 | ! |
kmdat <- do.call( |
96 | ! |
rbind, |
97 | ! |
c( |
98 | ! |
survfit_makeup(kmobj_B, trt_agd), |
99 | ! |
survfit_makeup(kmobj_A, trt_ipd), |
100 | ! |
survfit_makeup(kmobj_A_adj, paste(trt_ipd, "(weighted)")) |
101 |
) |
|
102 |
) |
|
103 | ! |
kmdat$treatment <- factor(kmdat$treatment, levels = unique(kmdat$treatment)) |
104 | ||
105 | ! |
basic_kmplot(kmdat, |
106 | ! |
show_risk_set = TRUE, |
107 | ! |
main_title = "Kaplan-Meier Curves", |
108 | ! |
subplot_heights = NULL, |
109 | ! |
suppress_plot_layout = FALSE, |
110 |
... |
|
111 |
) |
|
112 |
} else { |
|
113 |
# anchored case |
|
114 |
# - agd trial km data |
|
115 | 8x |
kmobj_C_S1 <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_agd)), |
116 | 8x |
data = tte_pseudo_ipd, |
117 | 8x |
conf.type = km_conf_type, |
118 | 8x |
subset = eval(parse(text = paste0("(tte_pseudo_ipd$", trt_var_agd, " == '", trt_common, "')"))) |
119 |
) |
|
120 | 8x |
kmobj_B_S1 <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_agd)), |
121 | 8x |
data = tte_pseudo_ipd, |
122 | 8x |
conf.type = km_conf_type, |
123 | 8x |
subset = eval(parse(text = paste0("(tte_pseudo_ipd$", trt_var_agd, " == '", trt_agd, "')"))) |
124 |
) |
|
125 |
# - ipd trial km data |
|
126 | 8x |
kmobj_C_S2 <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_ipd)), |
127 | 8x |
data = tte_ipd, |
128 | 8x |
conf.type = km_conf_type, |
129 | 8x |
subset = eval(parse(text = paste0("(tte_ipd$", trt_var_ipd, " == '", trt_common, "')"))) |
130 |
) |
|
131 | 8x |
kmobj_A_S2 <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_ipd)), |
132 | 8x |
data = tte_ipd, |
133 | 8x |
conf.type = km_conf_type, |
134 | 8x |
subset = eval(parse(text = paste0("(tte_ipd$", trt_var_ipd, " == '", trt_ipd, "')"))) |
135 |
) |
|
136 |
# - ipd trial km data with weights |
|
137 | 8x |
kmobj_Cadj_S2 <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_ipd)), |
138 | 8x |
data = tte_ipd, |
139 | 8x |
conf.type = km_conf_type, |
140 | 8x |
weights = weights, |
141 | 8x |
subset = eval(parse(text = paste0("(tte_ipd$", trt_var_ipd, " == '", trt_common, "')"))) |
142 |
) |
|
143 | 8x |
kmobj_Aadj_S2 <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_ipd)), |
144 | 8x |
data = tte_ipd, |
145 | 8x |
conf.type = km_conf_type, |
146 | 8x |
weights = weights, |
147 | 8x |
subset = eval(parse(text = paste0("(tte_ipd$", trt_var_ipd, " == '", trt_ipd, "')"))) |
148 |
) |
|
149 |
# - plotdat for layout by trial |
|
150 | 8x |
kmdat_s2 <- do.call( |
151 | 8x |
rbind, |
152 | 8x |
c( |
153 | 8x |
survfit_makeup(kmobj_C_S2, trt_common), |
154 | 8x |
survfit_makeup(kmobj_A_S2, trt_ipd), |
155 | 8x |
survfit_makeup(kmobj_Aadj_S2, paste(trt_ipd, "(weighted)")), |
156 | 8x |
survfit_makeup(kmobj_Cadj_S2, paste(trt_common, "(weighted)")) |
157 |
) |
|
158 |
) |
|
159 | 8x |
kmdat_s2$treatment <- factor(kmdat_s2$treatment, levels = unique(kmdat_s2$treatment)) |
160 | 8x |
kmdat_s1 <- do.call( |
161 | 8x |
rbind, |
162 | 8x |
c( |
163 | 8x |
survfit_makeup(kmobj_C_S1, trt_common), |
164 | 8x |
survfit_makeup(kmobj_B_S1, trt_agd) |
165 |
) |
|
166 |
) |
|
167 | 8x |
kmdat_s1$treatment <- factor(kmdat_s1$treatment, levels = unique(kmdat_s1$treatment)) |
168 |
# - plotdat for layout by arm |
|
169 | 8x |
kmdat_a2 <- do.call( |
170 | 8x |
rbind, |
171 | 8x |
c( |
172 | 8x |
survfit_makeup(kmobj_B_S1, trt_agd), |
173 | 8x |
survfit_makeup(kmobj_A_S2, trt_ipd), |
174 | 8x |
survfit_makeup(kmobj_Aadj_S2, paste(trt_ipd, "(weighted)")) |
175 |
) |
|
176 |
) |
|
177 | 8x |
kmdat_a2$treatment <- factor(kmdat_a2$treatment, levels = unique(kmdat_a2$treatment)) |
178 | 8x |
kmdat_a1 <- do.call( |
179 | 8x |
rbind, |
180 | 8x |
c( |
181 | 8x |
survfit_makeup(kmobj_C_S1, paste(trt_common, "(AgD)")), |
182 | 8x |
survfit_makeup(kmobj_C_S2, paste(trt_common, "(IPD)")), |
183 | 8x |
survfit_makeup(kmobj_Cadj_S2, paste(trt_common, "(IPD,weighted)")) |
184 |
) |
|
185 |
) |
|
186 | 8x |
kmdat_a1$treatment <- factor(kmdat_a1$treatment, levels = unique(kmdat_a1$treatment)) |
187 | ||
188 | ||
189 |
# make plot depending on the layout |
|
190 | 8x |
if (km_layout == "by_trial") { |
191 |
# 1 by 2 plot, each plot is per trial |
|
192 | 2x |
subplot_heights <- c(7, 0.7 + 2 * 0.7, 0.8) |
193 | 2x |
layout_mat <- matrix(1:4, ncol = 2) |
194 | 2x |
layout(layout_mat, heights = subplot_heights) |
195 | ||
196 | 2x |
basic_kmplot(kmdat_s2, |
197 | 2x |
main_title = paste0("Kaplan-Meier Curves \n(", trt_ipd, " vs ", trt_common, ") in the IPD trial"), |
198 | 2x |
suppress_plot_layout = TRUE, ... |
199 |
) |
|
200 | ||
201 | 2x |
basic_kmplot(kmdat_s1, |
202 | 2x |
main_title = paste0("Kaplan-Meier Curves \n(", trt_agd, " vs ", trt_common, ") in the AgD trial"), |
203 | 2x |
suppress_plot_layout = TRUE, ... |
204 |
) |
|
205 | 6x |
} else if (km_layout == "by_arm") { |
206 |
# 1 by 2 plot, by 1 is for investigational arm, the other is for common comparator |
|
207 | 2x |
subplot_heights <- c(7, 0.7 + 2 * 0.7, 0.8) |
208 | 2x |
layout_mat <- matrix(1:4, ncol = 2) |
209 | 2x |
layout(layout_mat, heights = subplot_heights) |
210 | ||
211 | 2x |
basic_kmplot(kmdat_a2, |
212 | 2x |
main_title = paste0("Kaplan-Meier Curves \n(", trt_ipd, " vs ", trt_agd, ")"), |
213 | 2x |
suppress_plot_layout = TRUE, ... |
214 |
) |
|
215 | ||
216 | 2x |
basic_kmplot(kmdat_a1, |
217 | 2x |
main_title = paste0("Kaplan-Meier Curves of Common Comparator \n", trt_common, "(IPD vs AgD Trial)"), |
218 | 2x |
suppress_plot_layout = TRUE, ... |
219 |
) |
|
220 |
} else { |
|
221 |
# 2 by 2 plot, combine by trial and by arm |
|
222 | 4x |
layout_mat <- matrix(1:4, ncol = 2, byrow = TRUE) |
223 | 4x |
layout(layout_mat) |
224 | ||
225 | 4x |
basic_kmplot(kmdat_s2, |
226 | 4x |
main_title = paste0("Kaplan-Meier Curves \n(", trt_ipd, " vs ", trt_common, ") in the IPD trial"), |
227 | 4x |
show_risk_set = FALSE, |
228 | 4x |
suppress_plot_layout = TRUE, ... |
229 |
) |
|
230 | ||
231 | 4x |
basic_kmplot(kmdat_s1, |
232 | 4x |
main_title = paste0("Kaplan-Meier Curves \n(", trt_agd, " vs ", trt_common, ") in the AgD trial"), |
233 | 4x |
show_risk_set = FALSE, |
234 | 4x |
suppress_plot_layout = TRUE, ... |
235 |
) |
|
236 | ||
237 | 4x |
basic_kmplot(kmdat_a2, |
238 | 4x |
main_title = paste0("Kaplan-Meier Curves \n(", trt_ipd, " vs ", trt_agd, ")"), |
239 | 4x |
show_risk_set = FALSE, |
240 | 4x |
suppress_plot_layout = TRUE, ... |
241 |
) |
|
242 | ||
243 | 4x |
basic_kmplot(kmdat_a1, |
244 | 4x |
main_title = paste0("Kaplan-Meier Curves of Common Comparator \n", trt_common, "(IPD vs AgD Trial)"), |
245 | 4x |
show_risk_set = FALSE, |
246 | 4x |
suppress_plot_layout = TRUE, ... |
247 |
) |
|
248 |
} |
|
249 |
} |
|
250 | 8x |
invisible(NULL) |
251 |
} |
|
252 | ||
253 | ||
254 |
#' Basic Kaplan Meier (KM) plot function |
|
255 |
#' |
|
256 |
#' This function can generate a basic KM plot with or without risk set table appended at the bottom. In a single plot, |
|
257 |
#' it can include up to 4 KM curves. This depends on number of levels in 'treatment' column in the input data.frame |
|
258 |
#' \code{kmdat} |
|
259 |
#' |
|
260 |
#' @param kmdat a `data.frame`, must consist `treatment`, `time` (unit in days), `n.risk`, `censor`, `surv`, similar to |
|
261 |
#' an output from \code{maicplus:::survfit_makeup} |
|
262 |
#' @param endpoint_name a string, name of time to event endpoint, to be show in the last line of title |
|
263 |
#' @param time_scale a string, time unit of median survival time, taking a value of 'years', 'months', 'weeks' or 'days' |
|
264 |
#' @param time_grid a numeric vector in the unit of \code{time_scale}, risk set table and x axis of the km plot will be |
|
265 |
#' defined based on this time grid |
|
266 |
#' @param show_risk_set logical, show risk set table or not, TRUE by default |
|
267 |
#' @param main_title a string, main title of the KM plot |
|
268 |
#' @param subplot_heights a numeric vector, heights argument to \code{graphic::layout()},NULL by default which means |
|
269 |
#' user will use the default setting |
|
270 |
#' @param suppress_plot_layout logical, suppress the layout setting in this function so that user can specify layout |
|
271 |
#' outside of the function, FALSE by default |
|
272 |
#' @param use_colors a character vector of length up to 4, colors to the KM curves, it will be passed to `col` of |
|
273 |
#' \code{lines()} |
|
274 |
#' @param use_line_types a numeric vector of length up to 4, line type to the KM curves, it will be passed to `lty` of |
|
275 |
#' \code{lines()} |
|
276 |
#' @param use_pch_cex a scalar between 0 and 1, point size to indicate censored individuals on the KM curves, it will be |
|
277 |
#' passed to `cex` of \code{points()} |
|
278 |
#' @param use_pch_alpha a scalar between 0 and 255, degree of color transparency of points to indicate censored |
|
279 |
#' individuals on the KM curves, it will be passed to `cex` of \code{points()} |
|
280 |
#' |
|
281 |
#' @example inst/examples/basic_kmplot_ex.R |
|
282 |
#' |
|
283 |
#' @return a KM plot with or without risk set table appended at the bottom, with up to 4 KM curves |
|
284 |
#' @export |
|
285 | ||
286 |
basic_kmplot <- function(kmdat, |
|
287 |
endpoint_name = "Time to Event Endpoint", |
|
288 |
time_scale = NULL, |
|
289 |
time_grid = NULL, |
|
290 |
show_risk_set = TRUE, |
|
291 |
main_title = "Kaplan-Meier Curves", |
|
292 |
subplot_heights = NULL, |
|
293 |
suppress_plot_layout = FALSE, |
|
294 |
use_colors = NULL, |
|
295 |
use_line_types = NULL, |
|
296 |
use_pch_cex = 0.65, |
|
297 |
use_pch_alpha = 100) { |
|
298 | 24x |
original_par <- par("bty", "tcl", "mgp", "cex.lab", "cex.axis", "cex.main", "mar") |
299 | 24x |
on.exit(par(original_par)) |
300 |
# precheck |
|
301 | 24x |
if (!length(subplot_heights) %in% c(0, (1 + show_risk_set))) { |
302 | ! |
stop("length of subplot_heights should be ", (1 + show_risk_set)) |
303 |
} |
|
304 | 24x |
if (!is.factor(kmdat$treatment)) { |
305 | ! |
stop("kmdat$treatment needs to be a factor, its levels will be used in legend and title, first level is comparator") |
306 |
} |
|
307 | ! |
if (nlevels(kmdat$treatment) > 4) stop("kmdat$treatment cannot have more than 4 levels") |
308 | ||
309 |
# set up x axis (time) |
|
310 | 24x |
if (is.null(time_grid)) { |
311 | 4x |
max_t <- max(kmdat$time) |
312 | 4x |
t_range <- c(0, get_time_as(max_t, time_scale) * 1.07) |
313 | 4x |
time_grid <- pretty(t_range) |
314 |
} else { |
|
315 | 20x |
t_range <- c(0, max(time_grid)) |
316 |
} |
|
317 | ||
318 | ||
319 | ||
320 |
# plat layout in par |
|
321 | 24x |
if (!suppress_plot_layout) { |
322 | ! |
nr_subplot <- (1 + show_risk_set) |
323 | ! |
if (is.null(subplot_heights)) subplot_heights <- c(7, 0.7 + nlevels(kmdat$treatment) * 0.7, 0.8) |
324 | ! |
layout_mat <- matrix(1:nr_subplot, ncol = 1) |
325 | ! |
layout(layout_mat, heights = subplot_heights) |
326 |
} |
|
327 | ||
328 |
# plot cosmetic setup |
|
329 | 24x |
if (is.null(use_line_types)) { |
330 | 8x |
use_lty <- c(1, 1, 2, 2) |
331 | 8x |
use_lwd <- c(1.5, 1.5, 1.2, 1.2) |
332 |
} else { |
|
333 | 16x |
use_lty <- use_line_types |
334 | 16x |
use_lwd <- c(1.5, 1.5, 1.2, 1.2) |
335 |
} |
|
336 | ||
337 | 24x |
if (is.null(use_colors)) { |
338 | 20x |
use_col <- c("#5450E4", "#00857C", "#6ECEB2", "#7B68EE") |
339 |
} else { |
|
340 | 4x |
use_col <- use_colors |
341 |
} |
|
342 | 24x |
use_col2 <- col2rgb(use_col) # preparing semi-transparent colors |
343 | 24x |
use_col2 <- rgb(use_col2[1, ], use_col2[2, ], use_col2[3, ], alpha = use_pch_alpha, maxColorValue = 255) |
344 | ||
345 |
## : first subplot: KM curve ------------------- |
|
346 |
# base plot |
|
347 | 24x |
par(bty = "n", tcl = -0.15, mgp = c(1.8, 0.4, 0), cex.lab = 0.85, cex.axis = 0.8, cex.main = 0.9, mar = c(3, 4, 5, 1)) |
348 | 24x |
plot(0, 0, |
349 | 24x |
type = "n", xlab = paste0("Time in ", time_scale), ylab = "Survival Probability", |
350 | 24x |
ylim = c(0, 1), xlim = t_range, yaxt = "n", xaxt = "n", |
351 | 24x |
main = paste0(main_title, "\nEndpoint:", endpoint_name) |
352 |
) |
|
353 | 24x |
axis(2, las = 1) |
354 | 24x |
if (!is.null(time_grid)) { |
355 | 24x |
axis(1, at = time_grid) |
356 |
} else { |
|
357 | ! |
axis(1) |
358 |
} |
|
359 | ||
360 |
# add km line |
|
361 | 24x |
for (ii in 1:nlevels(kmdat$treatment)) { |
362 | 72x |
tmpkmdat <- kmdat[as.numeric(kmdat$treatment) == ii, , drop = FALSE] |
363 | 72x |
lines( |
364 | 72x |
y = tmpkmdat$surv, |
365 | 72x |
x = get_time_as(tmpkmdat$time, time_scale), |
366 | 72x |
col = use_col[ii], |
367 | 72x |
lty = use_lty[ii], |
368 | 72x |
lwd = use_lwd[ii], |
369 | 72x |
type = "s" |
370 |
) |
|
371 | 72x |
tmpid <- (tmpkmdat$censor != 0) # cannot just ==1, anticipating weighted case |
372 | 72x |
points( |
373 | 72x |
y = tmpkmdat$surv[tmpid], |
374 | 72x |
x = get_time_as(tmpkmdat$time[tmpid], time_scale), |
375 | 72x |
col = use_col2[ii], |
376 | 72x |
pch = 3, |
377 | 72x |
cex = use_pch_cex |
378 |
) |
|
379 |
} |
|
380 | ||
381 |
## : second subplot: risk set table (if applicable) ------------------- |
|
382 | 24x |
if (show_risk_set) { |
383 |
# add legend, with treatment index |
|
384 | 8x |
legend("topright", |
385 | 8x |
bty = "n", |
386 | 8x |
cex = 0.8, |
387 | 8x |
lty = use_lty[1:nlevels(kmdat$treatment)], |
388 | 8x |
lwd = use_lwd[1:nlevels(kmdat$treatment)], |
389 | 8x |
col = use_col[1:nlevels(kmdat$treatment)], |
390 | 8x |
legend = paste0("(T", 1:nlevels(kmdat$treatment), ") ", levels(kmdat$treatment)) |
391 |
) |
|
392 | ||
393 |
# add risk set table |
|
394 | 8x |
par(bty = "n", tcl = -0.15, mgp = c(1.8, 0.4, 0), mar = c(1, 4, 0, 1)) |
395 | 8x |
plot(0, 0, |
396 | 8x |
type = "n", xlab = "", ylab = "", main = NULL, |
397 | 8x |
ylim = c(nlevels(kmdat$treatment) + 1.2, -0.5), |
398 | 8x |
xlim = t_range, |
399 | 8x |
yaxt = "n", xaxt = "n" |
400 |
) |
|
401 | 8x |
axis(2, |
402 | 8x |
at = 1:nlevels(kmdat$treatment), labels = paste0("T", 1:nlevels(kmdat$treatment)), |
403 | 8x |
line = NA, lty = "blank", las = 1 |
404 |
) |
|
405 | ||
406 | 8x |
for (ii in 1:nlevels(kmdat$treatment)) { |
407 | 24x |
tmpkmdat <- kmdat[as.numeric(kmdat$treatment) == ii, , drop = FALSE] |
408 | 24x |
tmptime <- get_time_as(tmpkmdat$time, time_scale) |
409 | 24x |
tmpnr <- sapply(time_grid, function(kk) { |
410 | 216x |
tmpid <- which(tmptime > kk) |
411 | 216x |
if (length(tmpid) == 0) { |
412 | 58x |
if (min(tmpkmdat$n.risk) == 0) { |
413 | ! |
tmpid <- which.min(tmpkmdat$n.risk)[1] |
414 |
} else { |
|
415 | 58x |
tmpid <- NULL |
416 |
} |
|
417 |
} |
|
418 | 216x |
tout <- ifelse(is.null(tmpid), "n/a", round(tmpkmdat$n.risk[tmpid], 1)) |
419 |
}) |
|
420 | 24x |
text(0, 0, labels = "Number at risk", pos = 4, cex = 0.8, offset = -0.8) |
421 | 24x |
text( |
422 | 24x |
y = rep(ii, length(time_grid)), |
423 | 24x |
x = time_grid, |
424 | 24x |
labels = tmpnr, |
425 | 24x |
col = use_col[ii], |
426 | 24x |
cex = 0.75 |
427 |
) |
|
428 | 24x |
text(0, nlevels(kmdat$treatment) + 1, |
429 | 24x |
pos = 4, cex = 0.7, offset = -0.8, col = "gray30", |
430 | 24x |
labels = "Note: Number at risk for adjusted/weighted treament arm is the sum of individual weight at risk." |
431 |
) |
|
432 |
} |
|
433 |
} else { |
|
434 |
# add simple |
|
435 | 16x |
legend("topright", |
436 | 16x |
bty = "n", |
437 | 16x |
cex = 0.8, |
438 | 16x |
lty = use_lty[1:nlevels(kmdat$treatment)], |
439 | 16x |
lwd = use_lwd[1:nlevels(kmdat$treatment)], |
440 | 16x |
col = use_col[1:nlevels(kmdat$treatment)], |
441 | 16x |
legend = levels(kmdat$treatment) |
442 |
) |
|
443 |
} |
|
444 | 24x |
invisible(NULL) |
445 |
} |
|
446 | ||
447 | ||
448 |
#' Diagnosis plot of proportional hazard assumption for anchored and unanchored |
|
449 |
#' |
|
450 |
#' @param weights_object an object returned by \code{estimate_weight} |
|
451 |
#' @param tte_ipd a data frame of individual patient data (IPD) of internal trial, contain at least "USUBJID", "EVENT", |
|
452 |
#' "TIME" columns and a column indicating treatment assignment |
|
453 |
#' @param tte_pseudo_ipd a data frame of pseudo IPD by digitized KM curves of external trial (for time-to-event |
|
454 |
#' endpoint), contain at least "EVENT", "TIME" |
|
455 |
#' @param trt_ipd a string, name of the interested investigation arm in internal trial \code{tte_ipd} (real IPD) |
|
456 |
#' @param trt_agd a string, name of the interested investigation arm in external trial |
|
457 |
#' \code{tte_pseudo_ipd} (pseudo IPD) |
|
458 |
#' @param trt_common a string, name of the common comparator in internal and external trial, by default is NULL, |
|
459 |
#' indicating unanchored case |
|
460 |
#' @param trt_var_ipd a string, column name in \code{tte_ipd} that contains the treatment assignment |
|
461 |
#' @param trt_var_agd a string, column name in \code{tte_pseudo_ipd} that contains the treatment assignment |
|
462 |
#' @param endpoint_name a string, name of time to event endpoint, to be show in the last line of title |
|
463 |
#' @param time_scale a string, time unit of median survival time, taking a value of 'years', 'months', 'weeks' or 'days' |
|
464 |
#' @param zph_transform a string, pass to \code{survival::cox.zph}, default is "log" |
|
465 |
#' @param zph_log_hazard a logical, if TRUE (default), y axis of the time dependent hazard function is log-hazard, |
|
466 |
#' otherwise, hazard. |
|
467 |
#' |
|
468 |
#' @return a 3 by 2 plot, include log-cumulative hazard plot, time dependent hazard function and unscaled Schoenfeld |
|
469 |
#' residual plot, before and after matching |
|
470 |
#' |
|
471 |
#' @example inst/examples/ph_diagplot_unanchored_ex.R |
|
472 |
#' @example inst/examples/ph_diagplot_anchored_ex.R |
|
473 |
#' @export |
|
474 |
ph_diagplot <- function(weights_object, |
|
475 |
tte_ipd, |
|
476 |
tte_pseudo_ipd, |
|
477 |
trt_ipd, |
|
478 |
trt_agd, |
|
479 |
trt_common = NULL, |
|
480 |
trt_var_ipd = "ARM", |
|
481 |
trt_var_agd = "ARM", |
|
482 |
endpoint_name = "Time to Event Endpoint", |
|
483 |
time_scale, |
|
484 |
zph_transform = "log", |
|
485 |
zph_log_hazard = TRUE) { |
|
486 | 4x |
names(tte_ipd) <- toupper(names(tte_ipd)) |
487 | 4x |
names(tte_pseudo_ipd) <- toupper(names(tte_pseudo_ipd)) |
488 | 4x |
trt_var_ipd <- toupper(trt_var_ipd) |
489 | 4x |
trt_var_agd <- toupper(trt_var_agd) |
490 | ||
491 |
# pre check |
|
492 | 4x |
if (!"maicplus_estimate_weights" %in% class(weights_object)) { |
493 | ! |
stop("weights_object should be an object returned by estimate_weights") |
494 |
} |
|
495 | 4x |
if (!all(c("USUBJID", "TIME", "EVENT", trt_var_ipd) %in% names(tte_ipd))) { |
496 | ! |
stop("tte_ipd needs to include at least USUBJID, TIME, EVENT, ", trt_var_ipd) |
497 |
} |
|
498 | 4x |
if (!all(c("TIME", "EVENT", trt_var_agd) %in% names(tte_pseudo_ipd))) { |
499 | ! |
stop("tte_ipd needs to include at least TIME, EVENT, ", trt_var_agd) |
500 |
} |
|
501 | ||
502 |
# preparing analysis data |
|
503 | 4x |
is_anchored <- ifelse(is.null(trt_common), FALSE, TRUE) |
504 | 4x |
tte_ipd <- tte_ipd[tte_ipd[[trt_var_ipd]] %in% c(trt_ipd, trt_common), , drop = TRUE] |
505 | 4x |
tte_pseudo_ipd <- tte_pseudo_ipd[tte_pseudo_ipd[[trt_var_agd]] %in% c(trt_agd, trt_common), , drop = TRUE] |
506 | 4x |
tte_ipd$weights <- weights_object$data$weights[match(weights_object$data$USUBJID, tte_ipd$USUBJID)] |
507 | 4x |
tte_pseudo_ipd$weights <- 1 |
508 | 4x |
tte_ipd$TIME2 <- get_time_as(tte_ipd$TIME, as = time_scale) # for cox.zph |
509 | 4x |
tte_pseudo_ipd$TIME2 <- get_time_as(tte_pseudo_ipd$TIME, as = time_scale) # for cox.zph |
510 | 4x |
if (!"USUBJID" %in% names(tte_pseudo_ipd)) tte_pseudo_ipd$USUBJID <- paste0("ID", seq_len(nrow(tte_pseudo_ipd))) |
511 | ! |
if (trt_var_ipd != "ARM") tte_ipd$ARM <- tte_ipd[[trt_var_ipd]] |
512 | ! |
if (trt_var_agd != "ARM") tte_pseudo_ipd$ARM <- tte_pseudo_ipd[[trt_var_agd]] |
513 | ||
514 |
# prepare plot data |
|
515 | 4x |
retain_cols <- c("USUBJID", "TIME", "TIME2", "EVENT", "ARM", "weights") |
516 | 4x |
if (!is_anchored) { |
517 |
# unanchored case |
|
518 | 1x |
tte_dat <- rbind( |
519 | 1x |
tte_ipd[, retain_cols, drop = FALSE], |
520 | 1x |
tte_pseudo_ipd[, retain_cols, drop = FALSE] |
521 |
) |
|
522 |
} else { |
|
523 | 3x |
tte_dat <- tte_ipd[, retain_cols, drop = FALSE] |
524 |
} |
|
525 | 4x |
kmobj <- survival::survfit(Surv(TIME, EVENT) ~ ARM, tte_dat, conf.type = "log-log") |
526 | 4x |
kmobj_adj <- survival::survfit(Surv(TIME, EVENT) ~ ARM, tte_dat, conf.type = "log-log", weights = weights) |
527 | 4x |
coxobj <- survival::coxph(Surv(TIME, EVENT) ~ ARM, data = tte_dat) |
528 | 4x |
coxobj2 <- survival::coxph(Surv(TIME2, EVENT) ~ ARM, data = tte_dat) |
529 | 4x |
zphobj <- survival::cox.zph(coxobj2, transform = zph_transform, global = FALSE) |
530 | 4x |
coxobj_adj <- survival::coxph(Surv(TIME, EVENT) ~ ARM, data = tte_dat, weights = weights) |
531 | 4x |
coxobj_adj2 <- survival::coxph(Surv(TIME2, EVENT) ~ ARM, data = tte_dat, weights = weights) |
532 | 4x |
zphobj_adj <- survival::cox.zph(coxobj_adj2, transform = zph_transform, global = FALSE) |
533 | ||
534 |
# making the plot |
|
535 | 4x |
original_par <- par(mfrow = c(3, 2), cex.lab = 0.85, cex.axis = 0.8, cex.main = 0.9) |
536 | 4x |
on.exit(par(original_par)) |
537 |
# log-cum-hazard plot |
|
538 | 4x |
ph_diagplot_lch(kmobj, |
539 | 4x |
time_scale = time_scale, |
540 | 4x |
log_time = TRUE, |
541 | 4x |
endpoint_name = endpoint_name, |
542 | 4x |
subtitle = "(Before Matching)" |
543 |
) |
|
544 | ||
545 | 4x |
ph_diagplot_lch(kmobj_adj, |
546 | 4x |
time_scale = time_scale, |
547 | 4x |
log_time = TRUE, |
548 | 4x |
endpoint_name = endpoint_name, |
549 | 4x |
subtitle = "(After Matching)" |
550 |
) |
|
551 |
# time dependent hazard plot |
|
552 | 4x |
plot(zphobj, |
553 | 4x |
main = paste0( |
554 | 4x |
"Time-dependent Hazard function (scaled Schoenfeld residual)\n", |
555 | 4x |
"Endpoint:", endpoint_name, "\n(Before Matching)" |
556 |
), |
|
557 | 4x |
resid = FALSE, se = TRUE, df = 4, nsmo = 40, |
558 |
# xlim = range(0,zphobj$time), |
|
559 | 4x |
ylab = ifelse(zph_log_hazard, "Log Hazard", "Hazard"), |
560 | 4x |
xlab = paste("Time in", time_scale), |
561 | 4x |
lty = 1:2, lwd = 2, pch = 16, cex = 0.8, |
562 | 4x |
col = rgb(0, 0, 128, alpha = 120, maxColorValue = 255), |
563 | 4x |
hr = (!zph_log_hazard), yaxt = "n" |
564 |
) |
|
565 | 4x |
axis(2, las = 1) |
566 | 4x |
pv <- as.data.frame(zphobj$table)$p |
567 | 4x |
pv <- ifelse(round(pv, 4) < 0.0001, "<0.0001", format(round(pv, 4), nsmall = 4)) |
568 | 4x |
legend("bottomright", |
569 | 4x |
cex = 0.75, bty = "n", text.col = "dodgerblue3", |
570 | 4x |
legend = c(paste0("p-value: ", pv), paste0("time-transform: ", zph_transform)), |
571 | 4x |
title = "PH test (survival::cox.zph)" |
572 |
) |
|
573 | ||
574 | 4x |
plot(zphobj_adj, |
575 | 4x |
main = paste0( |
576 | 4x |
"Time-dependent Hazard function (scaled Schoenfeld residual)\n", |
577 | 4x |
"Endpoint:", endpoint_name, "\n(After Matching)" |
578 |
), |
|
579 | 4x |
resid = FALSE, se = TRUE, df = 4, nsmo = 40, |
580 |
# xlim = range(0,zphobj$time), |
|
581 | 4x |
ylab = ifelse(zph_log_hazard, "Log Hazard", "Hazard"), |
582 | 4x |
xlab = paste("Time in", time_scale), |
583 | 4x |
lty = 1:2, lwd = 2, pch = 16, cex = 0.8, |
584 | 4x |
col = rgb(0, 0, 128, alpha = 120, maxColorValue = 255), |
585 | 4x |
hr = (!zph_log_hazard), yaxt = "n" |
586 |
) |
|
587 | 4x |
axis(2, las = 1) |
588 | 4x |
pv <- as.data.frame(zphobj_adj$table)$p |
589 | 4x |
pv <- ifelse(round(pv, 4) < 0.0001, "<0.0001", format(round(pv, 4), nsmall = 4)) |
590 | 4x |
legend("bottomright", |
591 | 4x |
cex = 0.75, bty = "n", text.col = "dodgerblue3", |
592 | 4x |
legend = c(paste0("p-value: ", pv), paste0("time-transform: ", zph_transform)), |
593 | 4x |
title = "PH test (survival::cox.zph)" |
594 |
) |
|
595 | ||
596 |
# unscaled schoenfeld residual |
|
597 | 4x |
ph_diagplot_schoenfeld(coxobj, |
598 | 4x |
time_scale = time_scale, |
599 | 4x |
log_time = FALSE, |
600 | 4x |
endpoint_name = endpoint_name, |
601 | 4x |
subtitle = "(Before Matching)" |
602 |
) |
|
603 | ||
604 | 4x |
ph_diagplot_schoenfeld(coxobj_adj, |
605 | 4x |
time_scale = time_scale, |
606 | 4x |
log_time = FALSE, |
607 | 4x |
endpoint_name = endpoint_name, |
608 | 4x |
subtitle = "(After Matching)" |
609 |
) |
|
610 |
} |
|
611 | ||
612 |
#' PH Diagnosis Plot of Log Cumulative Hazard Rate versus time or log-time |
|
613 |
#' |
|
614 |
#' This plot is also known as log negative log survival rate. |
|
615 |
#' |
|
616 |
#' a diagnosis plot for proportional hazard assumption, versus log-time (default) or time |
|
617 |
#' |
|
618 |
#' @param km_fit returned object from \code{survival::survfit} |
|
619 |
#' @param time_scale a character string, 'years', 'months', 'weeks' or 'days', time unit of median survival time |
|
620 |
#' @param log_time logical, TRUE (default) or FALSE |
|
621 |
#' @param endpoint_name a character string, name of the endpoint |
|
622 |
#' @param subtitle a character string, subtitle of the plot |
|
623 |
#' @param exclude_censor logical, should censored data point be plotted |
|
624 |
#' @examples |
|
625 |
#' library(survival) |
|
626 |
#' data(adtte_sat) |
|
627 |
#' data(pseudo_ipd_sat) |
|
628 |
#' combined_data <- rbind(adtte_sat[, c("TIME", "EVENT", "ARM")], pseudo_ipd_sat) |
|
629 |
#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data, conf.type = "log-log") |
|
630 |
#' ph_diagplot_lch(kmobj, |
|
631 |
#' time_scale = "month", log_time = TRUE, |
|
632 |
#' endpoint_name = "OS", subtitle = "(Before Matching)" |
|
633 |
#' ) |
|
634 |
#' @return a plot of log cumulative hazard rate |
|
635 |
#' @export |
|
636 | ||
637 |
ph_diagplot_lch <- function(km_fit, |
|
638 |
time_scale, |
|
639 |
log_time = TRUE, |
|
640 |
endpoint_name = "", |
|
641 |
subtitle = "", |
|
642 |
exclude_censor = TRUE) { |
|
643 | 10x |
time_scale <- match.arg(arg = time_scale, choices = c("days", "weeks", "months", "years")) |
644 | ||
645 | 10x |
clldat <- survfit_makeup(km_fit) |
646 | ||
647 | 10x |
if (exclude_censor) { |
648 | 10x |
clldat <- lapply(clldat, function(xxt) xxt[xxt$censor == 0, , drop = FALSE]) |
649 |
} |
|
650 | ||
651 | 10x |
all.times <- get_time_as(do.call(rbind, clldat)$time, time_scale) |
652 | 10x |
if (log_time) all.times <- log(all.times) |
653 | 10x |
t_range <- range(all.times) |
654 | 10x |
y_range <- range(log(do.call(rbind, clldat)$cumhaz)) |
655 | ||
656 | 10x |
original_par <- par("mar", "bty", "tcl", "mgp") |
657 | 10x |
par(mar = c(4, 4, 4, 1), bty = "n", tcl = -0.15, mgp = c(1.5, 0.3, 0)) |
658 | 10x |
on.exit(par(original_par)) |
659 | 10x |
plot(0, 0, |
660 | 10x |
type = "n", xlab = paste0(ifelse(log_time, "Log-", ""), "Time in ", time_scale), |
661 | 10x |
ylab = "Log-Cumulative Hazard Rate", |
662 | 10x |
ylim = y_range, xlim = t_range, yaxt = "n", |
663 | 10x |
main = paste0( |
664 | 10x |
"Log Cumulative Hazard versus Log Time\nEndpoint: ", endpoint_name, |
665 | 10x |
ifelse(subtitle == "", "", "\n"), subtitle |
666 |
) |
|
667 |
) |
|
668 | 10x |
axis(2, las = 1) |
669 | ||
670 | 10x |
trts <- names(clldat) |
671 | 10x |
cols <- c("dodgerblue3", "firebrick3") |
672 | 10x |
pchs <- c(1, 4) |
673 | 10x |
for (i in seq_along(clldat)) { |
674 | 20x |
use_x <- get_time_as(clldat[[i]]$time, time_scale) |
675 | 20x |
if (log_time) use_x <- log(use_x) |
676 | ||
677 | 20x |
lines( |
678 | 20x |
y = log(clldat[[i]]$cumhaz), |
679 | 20x |
x = use_x, col = cols[i], |
680 | 20x |
type = "s", |
681 |
) |
|
682 | 20x |
points( |
683 | 20x |
y = log(clldat[[i]]$cumhaz), |
684 | 20x |
x = use_x, |
685 | 20x |
col = cols[i], pch = pchs[i], cex = 0.7 |
686 |
) |
|
687 |
} |
|
688 | 10x |
legend("bottomright", |
689 | 10x |
bty = "n", lty = c(1, 1, 2), cex = 0.8, |
690 | 10x |
col = cols, pch = pchs, legend = paste0("Treatment: ", trts) |
691 |
) |
|
692 |
} |
|
693 | ||
694 | ||
695 |
#' PH Diagnosis Plot of Schoenfeld residuals for a Cox model fit |
|
696 |
#' |
|
697 |
#' @param coxobj object returned from \code{\link[survival]{coxph}} |
|
698 |
#' @param time_scale a character string, 'years', 'months', 'weeks' or 'days', time unit of median survival time |
|
699 |
#' @param log_time logical, TRUE (default) or FALSE |
|
700 |
#' @param endpoint_name a character string, name of the endpoint |
|
701 |
#' @param subtitle a character string, subtitle of the plot |
|
702 |
#' @examples |
|
703 |
#' library(survival) |
|
704 |
#' data(adtte_sat) |
|
705 |
#' data(pseudo_ipd_sat) |
|
706 |
#' combined_data <- rbind(adtte_sat[, c("TIME", "EVENT", "ARM")], pseudo_ipd_sat) |
|
707 |
#' unweighted_cox <- coxph(Surv(TIME, EVENT == 1) ~ ARM, data = combined_data) |
|
708 |
#' ph_diagplot_schoenfeld(unweighted_cox, |
|
709 |
#' time_scale = "month", log_time = TRUE, |
|
710 |
#' endpoint_name = "OS", subtitle = "(Before Matching)" |
|
711 |
#' ) |
|
712 |
#' @return a plot of Schoenfeld residuals |
|
713 |
#' @export |
|
714 | ||
715 |
ph_diagplot_schoenfeld <- function(coxobj, |
|
716 |
time_scale = "months", |
|
717 |
log_time = TRUE, |
|
718 |
endpoint_name = "", |
|
719 |
subtitle = "") { |
|
720 |
# pre-check |
|
721 | 10x |
time_scale <- match.arg(arg = time_scale, choices = c("days", "weeks", "months", "years")) |
722 | ||
723 |
# prepare data |
|
724 | 10x |
schresid <- residuals(coxobj, type = "schoenfeld") |
725 | 10x |
plot_x <- get_time_as(as.numeric(names(schresid)), time_scale) |
726 | 2x |
if (log_time) plot_x <- log(plot_x) |
727 | ||
728 |
# loewss fit |
|
729 | 10x |
fit0 <- predict(loess(schresid ~ plot_x), se = TRUE) |
730 | 10x |
uppband <- fit0$fit + qt(0.975, fit0$df) * fit0$se |
731 | 10x |
lowband <- fit0$fit - qt(0.975, fit0$df) * fit0$se |
732 | 10x |
use_yrange <- range(schresid, uppband, lowband) |
733 | ||
734 |
# making the plot |
|
735 | 10x |
original_par <- par(bty = "n", mar = c(4, 4, 4, 1), tcl = -0.15, mgp = c(1.5, 0.3, 0)) |
736 | 10x |
on.exit(par(original_par)) |
737 | 10x |
plot(schresid ~ plot_x, |
738 | 10x |
type = "n", |
739 | 10x |
yaxt = "n", ylim = use_yrange, |
740 | 10x |
ylab = "Unscaled Schoenfeld Residual", |
741 | 10x |
xlab = paste0(ifelse(log_time, "Log-", ""), "Time in ", time_scale), |
742 | 10x |
main = paste0( |
743 | 10x |
"Unscaled Schoenfeld Residual\nEndpoint: ", endpoint_name, |
744 | 10x |
ifelse(subtitle == "", "", "\n"), subtitle |
745 |
) |
|
746 |
) |
|
747 | 10x |
axis(2, las = 1) |
748 | 10x |
lines(fit0$fit ~ plot_x, lty = 2, lwd = 1, col = rgb(0, 0, 128, 150, maxColorValue = 255)) |
749 |
# lines(uppband ~ plot_x, lty =2, lwd=1, col = rgb(0,0,128,120,maxColorValue = 255)) |
|
750 |
# lines(lowband ~ plot_x, lty =2, lwd=1, col = rgb(0,0,128,120,maxColorValue = 255)) |
|
751 | 10x |
polygon( |
752 | 10x |
x = c(plot_x, rev(plot_x)), |
753 | 10x |
y = c(uppband, rev(lowband)), |
754 | 10x |
col = rgb(0, 0, 128, 60, maxColorValue = 255), |
755 | 10x |
border = NA |
756 |
) |
|
757 | 10x |
abline(h = 0, lty = 1, lwd = 1, col = "deeppink") |
758 | 10x |
points(schresid ~ plot_x, |
759 | 10x |
pch = 16, |
760 | 10x |
cex = 0.85, |
761 | 10x |
col = rgb(169, 169, 169, 120, maxColorValue = 255) |
762 |
) |
|
763 |
} |
1 |
# Functions for matching step: estimation of individual weights |
|
2 | ||
3 |
# functions to be exported --------------------------------------- |
|
4 | ||
5 |
#' Derive individual weights in the matching step of MAIC |
|
6 |
#' |
|
7 |
#' Assuming data is properly processed, this function takes individual patient data (IPD) with centered covariates |
|
8 |
#' (effect modifiers and/or prognostic variables) as input, and generates weights for each individual in IPD trial to |
|
9 |
#' match the covariates in aggregate data. |
|
10 |
#' |
|
11 |
#' @param data a numeric matrix, centered covariates of IPD, no missing value in any cell is allowed |
|
12 |
#' @param centered_colnames a character or numeric vector (column indicators) of centered covariates |
|
13 |
#' @param start_val a scalar, the starting value for all coefficients of the propensity score regression |
|
14 |
#' @param method a string, name of the optimization algorithm (see 'method' argument of \code{base::optim()}) The |
|
15 |
#' default is `"BFGS"`, other options are `"Nelder-Mead"`, `"CG"`, `"L-BFGS-B"`, `"SANN"`, and `"Brent"` |
|
16 |
#' @param n_boot_iteration an integer, number of bootstrap iterations. By default is NULL which means bootstrapping |
|
17 |
#' procedure will not be triggered, and hence the element `"boot"` of output list object will be NULL. |
|
18 |
#' @param set_seed_boot a scalar, the random seed for conducting the bootstrapping, only relevant if |
|
19 |
#' \code{n_boot_iteration} is not NULL. By default, use seed 1234 |
|
20 |
#' @param boot_strata a character vector of column names in \code{data} that defines the strata for bootstrapping. |
|
21 |
#' This ensures that samples are drawn proportionally from each defined stratum. If \code{NULL}, |
|
22 |
#' no stratification during bootstrapping process. By default, it is "ARM" |
|
23 |
#' @param ... Additional `control` parameters passed to [stats::optim]. |
|
24 |
#' |
|
25 |
#' @return a list with the following 4 elements, |
|
26 |
#' \describe{ |
|
27 |
#' \item{data}{a data.frame, includes the input \code{data} with appended column 'weights' and 'scaled_weights'. |
|
28 |
#' Scaled weights has a summation to be the number of rows in \code{data} that has no missing value in any of the |
|
29 |
#' effect modifiers} |
|
30 |
#' \item{centered_colnames}{column names of centered effect modifiers in \code{data}} |
|
31 |
#' \item{nr_missing}{number of rows in \code{data} that has at least 1 missing value in specified centered effect |
|
32 |
#' modifiers} |
|
33 |
#' \item{ess}{effective sample size, square of sum divided by sum of squares} |
|
34 |
#' \item{opt}{R object returned by \code{base::optim()}, for assess convergence and other details} |
|
35 |
#' \item{boot_strata}{'strata' from a boot::boot object} |
|
36 |
#' \item{boot_seed}{column names in \code{data} of the stratification factors} |
|
37 |
#' \item{boot}{a n by 2 by k array or NA, where n equals to number of rows in \code{data}, and k equals |
|
38 |
#' \code{n_boot_iteration}. The 2 columns in the second dimension include a column of numeric indexes of the rows |
|
39 |
#' in \code{data} that are selected at a bootstrapping iteration and a column of weights. \code{boot} is NA when |
|
40 |
#' argument \code{n_boot_iteration} is set as NULL |
|
41 |
#' } |
|
42 |
#' } |
|
43 |
#' @importFrom boot boot |
|
44 |
#' @examples |
|
45 |
#' data(centered_ipd_sat) |
|
46 |
#' centered_colnames <- grep("_CENTERED", colnames(centered_ipd_sat), value = TRUE) |
|
47 |
#' weighted_data <- estimate_weights(data = centered_ipd_sat, centered_colnames = centered_colnames) |
|
48 |
#' \donttest{ |
|
49 |
#' # To later estimate bootstrap confidence intervals, we calculate the weights |
|
50 |
#' # for the bootstrap samples: |
|
51 |
#' weighted_data_boot <- estimate_weights( |
|
52 |
#' data = centered_ipd_sat, centered_colnames = centered_colnames, n_boot_iteration = 100 |
|
53 |
#' ) |
|
54 |
#' } |
|
55 |
#' @export |
|
56 | ||
57 |
estimate_weights <- function(data, |
|
58 |
centered_colnames = NULL, |
|
59 |
start_val = 0, |
|
60 |
method = "BFGS", |
|
61 |
n_boot_iteration = NULL, |
|
62 |
set_seed_boot = 1234, |
|
63 |
boot_strata = "ARM", |
|
64 |
...) { |
|
65 |
# pre check |
|
66 | 18x |
ch1 <- is.data.frame(data) |
67 | 18x |
if (!ch1) { |
68 | 1x |
stop("'data' is not a data.frame") |
69 |
} |
|
70 | ||
71 | 17x |
ch2 <- (!is.null(centered_colnames)) |
72 | 17x |
if (ch2 && is.numeric(centered_colnames)) { |
73 | ! |
ch2b <- any(centered_colnames < 1 | centered_colnames > ncol(data)) |
74 | ! |
if (ch2b) { |
75 | ! |
stop("specified centered_colnames are out of bound") |
76 |
} |
|
77 | 17x |
} else if (ch2 && is.character(centered_colnames)) { |
78 | 17x |
ch2b <- !all(centered_colnames %in% names(data)) |
79 | 17x |
if (ch2b) { |
80 | 1x |
stop("1 or more specified centered_colnames are not found in 'data'") |
81 |
} |
|
82 |
} else { |
|
83 | ! |
stop("'centered_colnames' should be either a numeric or character vector") |
84 |
} |
|
85 | ||
86 | 16x |
ch3 <- sapply(centered_colnames, function(ii) { |
87 | 97x |
!is.numeric(data[[ii]]) |
88 |
}) |
|
89 | 16x |
if (any(ch3)) { |
90 | 1x |
stop(paste0( |
91 | 1x |
"following columns of 'data' are not numeric for the calculation:", |
92 | 1x |
paste(which(ch3), collapse = ",") |
93 |
)) |
|
94 |
} |
|
95 | ||
96 | 15x |
if (!is.null(boot_strata)) { |
97 | 15x |
ch4 <- boot_strata %in% names(data) |
98 | 15x |
if (!all(ch4)) { |
99 | 1x |
stop("Some variables in boot_strata are not in data: ", toString(boot_strata[!ch4])) |
100 |
} |
|
101 |
} |
|
102 | ||
103 |
# prepare data for optimization |
|
104 | ! |
if (is.null(centered_colnames)) centered_colnames <- seq_len(ncol(data)) |
105 | 14x |
EM <- data[, centered_colnames, drop = FALSE] |
106 | 14x |
ind <- apply(EM, 1, function(xx) any(is.na(xx))) |
107 | 14x |
nr_missing <- sum(ind) |
108 | 14x |
rows_with_missing <- which(ind) |
109 | 14x |
EM <- as.matrix(EM[!ind, , drop = FALSE]) |
110 | ||
111 |
# estimate weights |
|
112 | 14x |
opt1 <- optimise_weights(matrix = EM, par = rep(start_val, ncol(EM)), method = method, ...) |
113 | 14x |
alpha <- opt1$alpha |
114 | 14x |
wt <- opt1$wt |
115 | 14x |
wt_rs <- (wt / sum(wt)) * nrow(EM) |
116 | ||
117 |
# bootstrapping |
|
118 | 14x |
outboot <- if (is.null(n_boot_iteration)) { |
119 | 8x |
boot_seed <- NULL |
120 | 8x |
boot_strata_out <- NULL |
121 | 8x |
NULL |
122 |
} else { |
|
123 |
# Make sure to leave '.Random.seed' as-is on exit |
|
124 | 6x |
old_seed <- globalenv()$.Random.seed |
125 | 6x |
on.exit(suspendInterrupts(set_random_seed(old_seed))) |
126 | 6x |
set.seed(set_seed_boot) |
127 | ||
128 | 6x |
if (!is.null(boot_strata)) { |
129 | 6x |
use_strata <- interaction(data[!ind, boot_strata]) |
130 |
} else { |
|
131 | ! |
use_strata <- rep(1, nrow(EM)) |
132 |
} |
|
133 | 6x |
boot_statistic <- function(d, w) optimise_weights(d[w, ], par = alpha, method = method, ...)$wt[, 1] |
134 | 6x |
boot_out <- boot::boot(EM, statistic = boot_statistic, R = n_boot_iteration, strata = use_strata) |
135 | ||
136 | 6x |
boot_array <- array(dim = list(nrow(EM), 2, n_boot_iteration)) |
137 | 6x |
dimnames(boot_array) <- list(sampled_patient = NULL, c("rowid", "weight"), bootstrap_iteration = NULL) |
138 | 6x |
boot_array[, 1, ] <- t(boot.array(boot_out, TRUE)) |
139 | 6x |
boot_array[, 2, ] <- t(boot_out$t) |
140 | 6x |
boot_seed <- boot_out$seed |
141 | 6x |
boot_strata_out <- boot_out$strata |
142 | 6x |
boot_array |
143 |
} |
|
144 | ||
145 |
# append weights to data |
|
146 | 14x |
data$weights <- NA |
147 | 14x |
data$weights[!ind] <- wt |
148 | ||
149 | 14x |
data$scaled_weights <- NA |
150 | 14x |
data$scaled_weights[!ind] <- wt_rs |
151 | ||
152 | ! |
if (is.numeric(centered_colnames)) centered_colnames <- names(data)[centered_colnames] |
153 | ||
154 |
# Output |
|
155 | 14x |
outdata <- list( |
156 | 14x |
data = data, |
157 | 14x |
centered_colnames = centered_colnames, |
158 | 14x |
nr_missing = nr_missing, |
159 | 14x |
ess = sum(wt)^2 / sum(wt^2), |
160 | 14x |
opt = opt1$opt, |
161 | 14x |
boot = outboot, |
162 | 14x |
boot_seed = boot_seed, |
163 | 14x |
boot_strata = boot_strata_out, |
164 | 14x |
rows_with_missing = rows_with_missing |
165 |
) |
|
166 | ||
167 | 14x |
class(outdata) <- c("maicplus_estimate_weights", "list") |
168 | 14x |
outdata |
169 |
} |
|
170 | ||
171 | ||
172 |
#' Estimate weights using `optim` |
|
173 |
#' |
|
174 |
#' @param matrix Matrix of data to be used for estimating weights |
|
175 |
#' @param par Vector of starting values for the parameters with length equal to the number of columns in `matrix` |
|
176 |
#' @param method Method parameter passed to [stats::optim] |
|
177 |
#' @param ... Additional `control` parameters passed to [stats::optim] |
|
178 |
#' |
|
179 |
#' @return List containing estimated `alpha` values and `wt` weights for all rows of matrix |
|
180 |
#' @noRd |
|
181 |
optimise_weights <- function(matrix, |
|
182 |
par = rep(0, ncol(matrix)), |
|
183 |
method = "BFGS", |
|
184 |
maxit = 300, |
|
185 |
trace = 0, |
|
186 |
...) { |
|
187 | 572x |
if (!all(is.numeric(par) || is.finite(par), length(par) == ncol(matrix))) { |
188 | ! |
stop("par must be a numeric vector with finite values of length equal to the number of columns in 'matrix'") |
189 |
} |
|
190 | 572x |
opt1 <- optim( |
191 | 572x |
par = par, |
192 | 572x |
fn = function(alpha, X) sum(exp(X %*% alpha)), |
193 | 572x |
gr = function(alpha, X) colSums(sweep(X, 1, exp(X %*% alpha), "*")), |
194 | 572x |
X = matrix, |
195 | 572x |
method = method, |
196 | 572x |
control = list(maxit = maxit, trace = trace, ...) |
197 |
) |
|
198 | 572x |
if (opt1$convergence != 0) { |
199 | 1x |
warning( |
200 | 1x |
"optim() did not converge. ", |
201 | 1x |
opt1$message, |
202 | 1x |
"\nSee ?optim for more information on convergence code: ", opt1$convergence |
203 |
) |
|
204 |
} |
|
205 | 572x |
list( |
206 | 572x |
opt = opt1, |
207 | 572x |
alpha = opt1$par, |
208 | 572x |
wt = exp(matrix %*% opt1$par) |
209 |
) |
|
210 |
} |
|
211 | ||
212 |
#' Calculate Statistics for Weight Plot Legend |
|
213 |
#' |
|
214 |
#' Calculates ESS reduction and median weights which is used to create legend for weights plot |
|
215 |
#' |
|
216 |
#' @param weighted_data object returned after calculating weights using [estimate_weights] |
|
217 |
#' |
|
218 |
#' @return list of ESS, ESS reduction, median value of scaled and unscaled weights, and missing count |
|
219 |
#' @examples |
|
220 |
#' data("weighted_sat") |
|
221 |
#' calculate_weights_legend(weighted_sat) |
|
222 |
#' @export |
|
223 |
#' @keywords internal |
|
224 | ||
225 |
calculate_weights_legend <- function(weighted_data) { |
|
226 | 4x |
if (!inherits(weighted_data, "maicplus_estimate_weights")) { |
227 | ! |
stop("weighted_data must be class `maicplus_estimate_weights` generated by estimate_weights()") |
228 |
} |
|
229 | 4x |
ess <- weighted_data$ess |
230 | 4x |
wt <- weighted_data$data$weights |
231 | 4x |
wt_scaled <- weighted_data$data$scaled_weights |
232 | ||
233 |
# calculate sample size and exclude NA from wt |
|
234 | 4x |
nr_na <- sum(is.na(wt)) |
235 | 4x |
n <- length(wt) - nr_na |
236 | 4x |
wt <- na.omit(wt) |
237 | 4x |
wt_scaled <- na.omit(wt_scaled) |
238 | ||
239 |
# calculate ess reduction and median weights |
|
240 | 4x |
ess_reduction <- (1 - (ess / n)) * 100 |
241 | 4x |
wt_median <- median(wt) |
242 | 4x |
wt_scaled_median <- median(wt_scaled) |
243 | ||
244 | 4x |
list( |
245 | 4x |
ess = round(ess, 2), |
246 | 4x |
ess_reduction = round(ess_reduction, 2), |
247 | 4x |
wt_median = round(wt_median, 4), |
248 | 4x |
wt_scaled_median = round(wt_scaled_median, 4), |
249 | 4x |
nr_na = nr_na |
250 |
) |
|
251 |
} |
|
252 | ||
253 |
#' Plot MAIC weights in a histogram with key statistics in legend |
|
254 |
#' |
|
255 |
#' Generates a base R histogram of weights. Default is to plot either unscaled or scaled weights and not both. |
|
256 |
#' |
|
257 |
#' @param weighted_data object returned after calculating weights using [estimate_weights] |
|
258 |
#' @param bin_col a string, color for the bins of histogram |
|
259 |
#' @param vline_col a string, color for the vertical line in the histogram |
|
260 |
#' @param main_title title of the plot |
|
261 |
#' @param scaled_weights an indicator for using scaled weights instead of regular weights |
|
262 |
#' |
|
263 |
#' @return a plot of unscaled or scaled weights |
|
264 |
#' @examples |
|
265 |
#' plot_weights_base(weighted_sat, |
|
266 |
#' bin_col = "#6ECEB2", |
|
267 |
#' vline_col = "#688CE8", |
|
268 |
#' main_title = c("Scaled Individual Weights", "Unscaled Individual Weights"), |
|
269 |
#' scaled_weights = TRUE |
|
270 |
#' ) |
|
271 |
#' @export |
|
272 | ||
273 |
plot_weights_base <- function(weighted_data, |
|
274 |
bin_col, vline_col, main_title, |
|
275 |
scaled_weights) { |
|
276 | 2x |
weights_stat <- calculate_weights_legend(weighted_data) |
277 | ||
278 | 2x |
if (scaled_weights) { |
279 | 2x |
wt <- weighted_data$data$scaled_weights |
280 | 2x |
median_wt <- weights_stat$wt_scaled_median |
281 |
} else { |
|
282 | ! |
wt <- weighted_data$data$weights |
283 | ! |
median_wt <- weights_stat$wt_median |
284 |
} |
|
285 | ||
286 |
# prepare legend |
|
287 | 2x |
plot_legend <- c( |
288 | 2x |
paste0("Median = ", median_wt), |
289 | 2x |
paste0("ESS = ", weights_stat$ess), |
290 | 2x |
paste0("Reduction% = ", weights_stat$ess_reduct) |
291 |
) |
|
292 | 2x |
plot_lty <- c(2, NA, NA) |
293 | ||
294 | 2x |
if (weights_stat$nr_na > 0) { |
295 | ! |
plot_legend <- c(plot_legend, paste0("#Missing Weights = ", weights_stat$nr_na)) |
296 | ! |
plot_lty <- c(plot_lty, NA) |
297 |
} |
|
298 | ||
299 |
# plot |
|
300 | 2x |
original_par <- par(mgp = c(2.3, 0.5, 0), cex.axis = 0.9, cex.lab = 0.95, bty = "n") |
301 | 2x |
on.exit(par(original_par)) |
302 | 2x |
hist(wt, border = "white", col = bin_col, main = main_title, breaks = 20, yaxt = "n", xlab = "") |
303 | 2x |
axis(2, las = 1) |
304 | 2x |
abline(v = median(wt), lty = 2, col = vline_col, lwd = 2) |
305 | 2x |
legend("topright", bty = "n", lty = plot_lty, cex = 0.8, legend = plot_legend) |
306 |
} |
|
307 | ||
308 |
#' Plot MAIC weights in a histogram with key statistics in legend using `ggplot2` |
|
309 |
#' |
|
310 |
#' Generates a `ggplot` histogram of weights. Default is to plot both unscaled and scaled weights on a same graph. |
|
311 |
#' |
|
312 |
#' @param weighted_data object returned after calculating weights using [estimate_weights] |
|
313 |
#' @param bin_col a string, color for the bins of histogram |
|
314 |
#' @param vline_col a string, color for the vertical line in the histogram |
|
315 |
#' @param main_title Name of scaled weights plot and unscaled weights plot, respectively. |
|
316 |
#' @param bins number of bin parameter to use |
|
317 |
#' |
|
318 |
#' @return a plot of unscaled and scaled weights |
|
319 |
#' @examples |
|
320 |
#' if (requireNamespace("ggplot2")) { |
|
321 |
#' plot_weights_ggplot(weighted_sat, |
|
322 |
#' bin_col = "#6ECEB2", |
|
323 |
#' vline_col = "#688CE8", |
|
324 |
#' main_title = c("Scaled Individual Weights", "Unscaled Individual Weights"), |
|
325 |
#' bins = 50 |
|
326 |
#' ) |
|
327 |
#' } |
|
328 |
#' @export |
|
329 | ||
330 |
plot_weights_ggplot <- function(weighted_data, bin_col, vline_col, |
|
331 |
main_title, |
|
332 |
bins) { |
|
333 |
# check if ggplot2 package is installed |
|
334 | 1x |
if (!requireNamespace("ggplot2", quietly = TRUE)) { |
335 | ! |
stop("ggplot2 package is needed to run this function") |
336 |
} |
|
337 | ||
338 | 1x |
weights_stat <- calculate_weights_legend(weighted_data) |
339 | ||
340 |
# prepare dataset to use in ggplot |
|
341 | 1x |
wt_data0 <- weighted_data$data[, c("weights", "scaled_weights")] |
342 | 1x |
colnames(wt_data0) <- main_title |
343 | 1x |
wt_data <- stack(wt_data0) |
344 | 1x |
wt_data$median <- ifelse(wt_data$ind == main_title[1], |
345 | 1x |
weights_stat$wt_median, weights_stat$wt_scaled_median |
346 |
) |
|
347 | ||
348 |
# create legend data |
|
349 | 1x |
lab <- with(weights_stat, { |
350 | 1x |
lab <- c(paste0("Median = ", wt_median), paste0("Median = ", wt_scaled_median)) |
351 | 1x |
lab <- paste0(lab, "\nESS = ", ess, "\nReduction% = ", ess_reduction) |
352 | ! |
if (nr_na > 0) lab <- paste0(lab, "\n#Missing Weights = ", nr_na) |
353 | 1x |
lab |
354 |
}) |
|
355 | 1x |
legend_data <- data.frame(ind = main_title, lab = lab) |
356 | ||
357 | 1x |
hist_plot <- ggplot2::ggplot(wt_data) + |
358 | 1x |
ggplot2::geom_histogram(ggplot2::aes_string(x = "values"), bins = bins, color = bin_col, fill = bin_col) + |
359 | 1x |
ggplot2::geom_vline(ggplot2::aes_string(xintercept = "median"), |
360 | 1x |
color = vline_col, |
361 | 1x |
linetype = "dashed" |
362 |
) + |
|
363 | 1x |
ggplot2::theme_bw() + |
364 | 1x |
ggplot2::facet_wrap(~ind, ncol = 1) + |
365 | 1x |
ggplot2::geom_text( |
366 | 1x |
data = legend_data, |
367 | 1x |
ggplot2::aes_string(label = "lab"), x = Inf, y = Inf, hjust = 1, vjust = 1, size = 3 |
368 |
) + |
|
369 | 1x |
ggplot2::theme( |
370 | 1x |
axis.title = ggplot2::element_text(size = 12), |
371 | 1x |
axis.text = ggplot2::element_text(size = 12) |
372 |
) + |
|
373 | 1x |
ggplot2::ylab("Frequency") + |
374 | 1x |
ggplot2::xlab("Weight") |
375 | ||
376 | 1x |
return(hist_plot) |
377 |
} |
|
378 | ||
379 | ||
380 |
#' Plot method for Estimate Weights objects |
|
381 |
#' |
|
382 |
#' The plot function displays individuals weights with key summary in top right legend that includes |
|
383 |
#' median weight, effective sample size (ESS), and reduction percentage (what percent ESS is reduced from the |
|
384 |
#' original sample size). There are two options of plotting: base R plot and `ggplot`. The default |
|
385 |
#' for base R plot is to plot unscaled and scaled separately. The default |
|
386 |
#' for `ggplot` is to plot unscaled and scaled weights on a same plot. |
|
387 |
#' |
|
388 |
#' @param x object from [estimate_weights] |
|
389 |
#' @param ggplot indicator to print base weights plot or `ggplot` weights plot |
|
390 |
#' @param bin_col a string, color for the bins of histogram |
|
391 |
#' @param vline_col a string, color for the vertical line in the histogram |
|
392 |
#' @param main_title title of the plot. For ggplot, name of scaled weights plot and unscaled weights plot, respectively. |
|
393 |
#' @param scaled_weights (base plot only) an indicator for using scaled weights instead of regular weights |
|
394 |
#' @param bins (`ggplot` only) number of bin parameter to use |
|
395 |
#' |
|
396 |
#' @examples |
|
397 |
#' plot(weighted_sat) |
|
398 |
#' |
|
399 |
#' if (requireNamespace("ggplot2")) { |
|
400 |
#' plot(weighted_sat, ggplot = TRUE) |
|
401 |
#' } |
|
402 |
#' @describeIn estimate_weights Plot method for estimate_weights objects |
|
403 |
#' @export |
|
404 | ||
405 |
plot.maicplus_estimate_weights <- function(x, ggplot = FALSE, |
|
406 |
bin_col = "#6ECEB2", vline_col = "#688CE8", |
|
407 |
main_title = NULL, |
|
408 |
scaled_weights = TRUE, |
|
409 |
bins = 50, ...) { |
|
410 | 1x |
if (ggplot) { |
411 | ! |
if (is.null(main_title)) main_title <- c("Scaled Individual Weights", "Unscaled Individual Weights") |
412 | ! |
plot_weights_ggplot(x, bin_col, vline_col, main_title, bins) |
413 |
} else { |
|
414 | 1x |
if (is.null(main_title)) { |
415 | 1x |
main_title <- ifelse(scaled_weights, "Scaled Individual Weights", "Unscaled Individual Weights") |
416 |
} |
|
417 | 1x |
plot_weights_base(x, bin_col, vline_col, main_title, scaled_weights) |
418 |
} |
|
419 |
} |
|
420 | ||
421 | ||
422 |
#' Check to see if weights are optimized correctly |
|
423 |
#' |
|
424 |
#' This function checks to see if the optimization is done properly by checking the covariate averages |
|
425 |
#' before and after adjustment. |
|
426 |
#' |
|
427 |
#' @param weighted_data object returned after calculating weights using \code{\link{estimate_weights}} |
|
428 |
#' @param processed_agd a data frame, object returned after using \code{\link{process_agd}} or |
|
429 |
#' aggregated data following the same naming convention |
|
430 |
#' |
|
431 |
#' @examples |
|
432 |
#' data(weighted_sat) |
|
433 |
#' data(agd) |
|
434 |
#' check_weights(weighted_sat, process_agd(agd)) |
|
435 |
#' |
|
436 |
#' @import DescTools |
|
437 |
#' |
|
438 |
#' @return data.frame of weighted and unweighted covariate averages of the IPD, |
|
439 |
#' average of aggregate data, and sum of inner products of covariate \eqn{x_i} and the weights (\eqn{exp(x_i\beta)}) |
|
440 |
#' @export |
|
441 | ||
442 |
check_weights <- function(weighted_data, processed_agd) { |
|
443 | 1x |
ipd_with_weights <- weighted_data$data |
444 | 1x |
match_cov <- weighted_data$centered_colnames |
445 | ||
446 |
# if algorithm is correct, all centered columns should have a weighted summation to a very small number around zero |
|
447 | 1x |
num_check <- ipd_with_weights$weights %*% as.matrix(ipd_with_weights[, match_cov, drop = FALSE]) |
448 | 1x |
num_check <- round(num_check, 4) |
449 | ||
450 |
# for reporting |
|
451 | 1x |
outdata <- data.frame( |
452 | 1x |
covariate = gsub("_CENTERED$", "", match_cov), |
453 | 1x |
match_stat = NA, |
454 | 1x |
internal_trial = NA, |
455 | 1x |
internal_trial_after_weighted = NA, |
456 | 1x |
external_trial = NA, |
457 | 1x |
sum_centered_IPD_with_weights = as.vector(num_check) |
458 |
) |
|
459 | 1x |
attr(outdata, "footer") <- list() |
460 |
# find item that was matched by mean |
|
461 | 1x |
ind_mean <- lapply(outdata$covariate, grep, x = names(processed_agd), value = TRUE) |
462 | 1x |
ind_mean <- sapply(ind_mean, function(ii) any(grepl("_MEAN$", ii))) |
463 | 1x |
outdata$match_stat <- ifelse(grepl("_MEDIAN$", outdata$covariate), "Median", |
464 | 1x |
ifelse(grepl("_SQUARED$", outdata$covariate), "SD", |
465 | 1x |
ifelse(ind_mean, "Mean", "Prop") |
466 |
) |
|
467 |
) |
|
468 | 1x |
outdata$covariate <- gsub("_MEDIAN|_SQUARED", "", outdata$covariate) |
469 |
# fill in corresponding agd data |
|
470 | 1x |
outdata$external_trial <- unlist(processed_agd[paste(outdata$covariate, toupper(outdata$match_stat), sep = "_")]) |
471 | ||
472 |
# fill in stat from unweighted and weighted IPD |
|
473 | 1x |
for (ii in seq_len(nrow(outdata))) { |
474 | 6x |
covname <- outdata$covariate[ii] |
475 | 6x |
if (outdata$match_stat[ii] %in% c("Mean", "Prop")) { |
476 | 4x |
outdata$internal_trial[ii] <- mean(ipd_with_weights[[covname]], na.rm = TRUE) |
477 | 4x |
outdata$internal_trial_after_weighted[ii] <- weighted.mean( |
478 | 4x |
ipd_with_weights[[covname]], |
479 | 4x |
w = ipd_with_weights$weights, na.rm = TRUE |
480 |
) |
|
481 | 2x |
} else if (outdata$match_stat[ii] == "Median") { |
482 | 1x |
outdata$internal_trial[ii] <- quantile(ipd_with_weights[[covname]], |
483 | 1x |
probs = 0.5, |
484 | 1x |
na.rm = TRUE, |
485 | 1x |
type = 2, |
486 | 1x |
names = FALSE |
487 | 1x |
) # SAS default |
488 | 1x |
outdata$internal_trial_after_weighted[ii] <- DescTools::Quantile(ipd_with_weights[[covname]], |
489 | 1x |
weights = ipd_with_weights$weights, |
490 | 1x |
probs = 0.5, |
491 | 1x |
na.rm = TRUE, |
492 | 1x |
type = 5, |
493 | 1x |
names = FALSE |
494 |
) |
|
495 |
# no IPD equals to reported AgD median |
|
496 | 1x |
msg_ind <- !any(ipd_with_weights[[covname]] == outdata$external_trial[ii], na.rm = TRUE) |
497 | 1x |
if (msg_ind) { |
498 | ! |
msg_txt <- paste0( |
499 | ! |
"For covariate ", covname, ", it was matched to AgD median, but there is no IPD identical to AgD median,", |
500 | ! |
"hence median after weighted will not equal to AgD median exactly." |
501 |
) |
|
502 | ! |
attr(outdata, "footer") <- c(attr(outdata, "footer"), msg_txt) |
503 |
} |
|
504 | 1x |
} else if (outdata$match_stat[ii] == "SD") { |
505 | 1x |
outdata$internal_trial[ii] <- sd(ipd_with_weights[[covname]], na.rm = TRUE) |
506 | 1x |
wm_squared <- weighted.mean(ipd_with_weights[[covname]]^2, w = ipd_with_weights$weights, na.rm = TRUE) |
507 | 1x |
ms_agd <- processed_agd[[paste0(outdata$covariate[ii], "_MEAN")]]^2 |
508 | 1x |
outdata$internal_trial_after_weighted[ii] <- sqrt(wm_squared - ms_agd) |
509 |
} |
|
510 |
} |
|
511 | ||
512 |
# output |
|
513 | 1x |
class(outdata) <- c("maicplus_check_weights", "data.frame") |
514 | 1x |
outdata |
515 |
} |
|
516 | ||
517 | ||
518 |
#' Print method for Check Weights objects |
|
519 |
#' |
|
520 |
#' @param x object from [check_weights] |
|
521 |
#' @param mean_digits number of digits for rounding mean columns in the output |
|
522 |
#' @param prop_digits number of digits for rounding proportion columns in the output |
|
523 |
#' @param sd_digits number of digits for rounding mean columns in the output |
|
524 |
#' @param digits minimal number of significant digits, see [print.default]. |
|
525 |
#' @param ... further arguments to [print.data.frame] |
|
526 |
#' @describeIn check_weights Print method for check_weights objects |
|
527 |
#' @export |
|
528 | ||
529 |
print.maicplus_check_weights <- function(x, |
|
530 |
mean_digits = 2, |
|
531 |
prop_digits = 2, |
|
532 |
sd_digits = 3, |
|
533 |
digits = getOption("digits"), ...) { |
|
534 | ! |
round_digits <- c("Mean" = mean_digits, "Prop" = prop_digits, "SD" = sd_digits)[x$match_stat] |
535 | ! |
round_digits[is.na(round_digits)] <- digits |
536 | ||
537 | ! |
x$external_trial <- round(x$external_trial, round_digits) |
538 | ! |
x$internal_trial <- round(x$internal_trial, round_digits) |
539 | ! |
x$internal_trial_after_weighted <- round(x$internal_trial_after_weighted, round_digits) |
540 | ||
541 | ! |
print.data.frame(x, ...) |
542 | ! |
footer <- unlist(attr(x, "footer")) |
543 | ! |
if (length(footer)) { |
544 | ! |
cat("\n") |
545 | ! |
for (f in seq_along(footer)) { |
546 | ! |
cat(paste0("[", f, "] ", footer[f])) |
547 |
} |
|
548 |
} |
|
549 |
} |
|
550 | ||
551 |
#' Note on Expected Sample Size Reduction |
|
552 |
#' |
|
553 |
#' @param width Number of characters to break string into new lines (`\n`). |
|
554 |
#' |
|
555 |
#' @return A character string |
|
556 |
#' @keywords internal |
|
557 |
ess_footnote_text <- function(width = 0.9 * getOption("width")) { |
|
558 | 1x |
text <- "An ESS reduction up to ~60% is not unexpected based on the 2021 survey of NICE's technology appraisals |
559 | 1x |
(https://onlinelibrary.wiley.com/doi/full/10.1002/jrsm.1511), whereas a reduction of >75% is less common |
560 | 1x |
and it may be considered suboptimal." |
561 | 1x |
paste0(strwrap(text, width = width), collapse = "\n") |
562 |
} |
1 |
#' Helper function to retrieve median survival time from a `survival::survfit` object |
|
2 |
#' |
|
3 |
#' Extract and display median survival time with confidence interval |
|
4 |
#' |
|
5 |
#' @param km_fit returned object from \code{survival::survfit} |
|
6 |
#' @param legend a character string, name used in 'type' column in returned data frame |
|
7 |
#' @param time_scale a character string, 'years', 'months', 'weeks' or 'days', time unit of median survival time |
|
8 |
#' |
|
9 |
#' @examples |
|
10 |
#' data(adtte_sat) |
|
11 |
#' data(pseudo_ipd_sat) |
|
12 |
#' library(survival) |
|
13 |
#' combined_data <- rbind(adtte_sat[, c("TIME", "EVENT", "ARM")], pseudo_ipd_sat) |
|
14 |
#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data, conf.type = "log-log") |
|
15 |
#' |
|
16 |
#' # Derive median survival time |
|
17 |
#' medSurv <- medSurv_makeup(kmobj, legend = "before matching", time_scale = "day") |
|
18 |
#' medSurv |
|
19 |
#' @return a data frame with a index column 'type', median survival time and confidence interval |
|
20 |
#' @export |
|
21 | ||
22 |
medSurv_makeup <- function(km_fit, legend = "before matching", time_scale) { |
|
23 | 10x |
time_scale <- match.arg(time_scale, choices = c("years", "months", "weeks", "days")) |
24 | ||
25 |
# km_fit is the returned object from survival::survfit |
|
26 | 10x |
km_fit <- summary(km_fit)$table |
27 | 10x |
km_fit[, 5:ncol(km_fit)] <- get_time_as(km_fit[, 5:ncol(km_fit)], time_scale) |
28 | ||
29 | 10x |
toyadd <- data.frame( |
30 | 10x |
treatment = gsub("ARM=", "", rownames(km_fit)), |
31 | 10x |
type = rep(legend, 2) |
32 |
) |
|
33 | ||
34 | 10x |
km_fit <- cbind(toyadd, km_fit) |
35 | 10x |
rownames(km_fit) <- NULL |
36 | ||
37 | 10x |
km_fit |
38 |
} |
|
39 | ||
40 | ||
41 |
#' Helper function to select set of variables used for Kaplan-Meier plot |
|
42 |
#' |
|
43 |
#' @param km_fit returned object from \code{survival::survfit} |
|
44 |
#' @param single_trt_name name of treatment if no strata are specified in `km_fit` |
|
45 |
#' |
|
46 |
#' @examples |
|
47 |
#' library(survival) |
|
48 |
#' data(adtte_sat) |
|
49 |
#' data(pseudo_ipd_sat) |
|
50 |
#' combined_data <- rbind(adtte_sat[, c("TIME", "EVENT", "ARM")], pseudo_ipd_sat) |
|
51 |
#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data, conf.type = "log-log") |
|
52 |
#' survfit_makeup(kmobj) |
|
53 |
#' @return a list of data frames of variables from [survival::survfit()]. Data frame is divided by treatment. |
|
54 |
#' @export |
|
55 | ||
56 |
survfit_makeup <- function(km_fit, single_trt_name = "treatment") { |
|
57 |
# in case km_fit is only for single arm |
|
58 | 116x |
if ("strata" %in% names(km_fit)) { |
59 | 20x |
use_trt <- mapply(rep, 1:2, each = km_fit$strata) |
60 | 20x |
if (is.list(use_trt)) use_trt <- unlist(use_trt) |
61 | ! |
if (is.matrix(use_trt)) use_trt <- as.vector(use_trt) |
62 | 20x |
is_single <- FALSE |
63 |
} else { |
|
64 | 96x |
use_trt <- rep(single_trt_name, length(km_fit$time)) |
65 | 96x |
is_single <- TRUE |
66 |
} |
|
67 | ||
68 | 116x |
kmdat <- data.frame( |
69 | 116x |
time = km_fit$time, |
70 | 116x |
treatment = use_trt, |
71 | 116x |
n.risk = km_fit$n.risk, |
72 | 116x |
n.event = km_fit$n.event, |
73 | 116x |
censor = km_fit$n.censor, |
74 | 116x |
surv = km_fit$surv, |
75 | 116x |
lower = km_fit$lower, |
76 | 116x |
upper = km_fit$upper, |
77 | 116x |
cumhaz = km_fit$cumhaz |
78 |
) |
|
79 | 20x |
if (!is_single) kmdat$treatment <- sapply(strsplit(names(km_fit$strata), "="), "[[", 2)[kmdat$treatment] |
80 | 116x |
split(kmdat, f = kmdat$treatment) |
81 |
} |
1 |
#' Kaplan-Meier (KM) plot function for anchored and unanchored cases using ggplot |
|
2 |
#' |
|
3 |
#' This is wrapper function of \code{basic_kmplot2}. |
|
4 |
#' The argument setting is similar to \code{maic_anchored} and \code{maic_unanchored}, |
|
5 |
#' and it is used in those two functions. |
|
6 |
#' |
|
7 |
#' @param weights_object an object returned by \code{estimate_weight} |
|
8 |
#' @param tte_ipd a data frame of individual patient data (IPD) of internal trial, contain at least `"USUBJID"`, |
|
9 |
#' `"EVENT"`, `"TIME"` columns and a column indicating treatment assignment |
|
10 |
#' @param tte_pseudo_ipd a data frame of pseudo IPD by digitized KM curves of external trial (for time-to-event |
|
11 |
#' endpoint), contain at least `"EVENT"`, `"TIME"` |
|
12 |
#' @param trt_ipd a string, name of the interested investigation arm in internal trial \code{dat_igd} (real IPD) |
|
13 |
#' @param trt_agd a string, name of the interested investigation arm in external trial \code{dat_pseudo} (pseudo IPD) |
|
14 |
#' @param trt_common a string, name of the common comparator in internal and external trial, by default is NULL, |
|
15 |
#' indicating unanchored case |
|
16 |
#' @param trt_var_ipd a string, column name in \code{tte_ipd} that contains the treatment assignment |
|
17 |
#' @param trt_var_agd a string, column name in \code{tte_pseudo_ipd} that contains the treatment assignment |
|
18 |
#' @param normalize_weights logical, default is \code{FALSE}. If \code{TRUE}, |
|
19 |
#' \code{scaled_weights} (normalized weights) in \code{weights_object$data} will be used. |
|
20 |
#' @param km_conf_type a string, pass to \code{conf.type} of \code{survfit} |
|
21 |
#' @param km_layout a string, only applicable for unanchored case (\code{trt_common = NULL}), indicated the |
|
22 |
#' desired layout of output KM curve. |
|
23 |
#' @param time_scale a string, time unit of median survival time, taking a value of 'years', 'months', |
|
24 |
#' weeks' or 'days' |
|
25 |
#' @param ... other arguments in \code{basic_kmplot2} |
|
26 |
#' |
|
27 |
#' @return In unanchored case, a KM plot with risk set table. In anchored case, depending on \code{km_layout}, |
|
28 |
#' \itemize{ |
|
29 |
#' \item if "by_trial", 2 by 1 plot, first all KM curves (incl. weighted) in IPD trial, and then KM curves in AgD |
|
30 |
#' trial, with risk set table. |
|
31 |
#' \item if "by_arm", 2 by 1 plot, first KM curves of \code{trt_agd} and \code{trt_ipd} (with and without weights), |
|
32 |
#' and then KM curves of \code{trt_common} in AgD trial and IPD trial (with and without weights). Risk set table is |
|
33 |
#' appended. |
|
34 |
#' \item if "all", 2 by 2 plot, all plots in "by_trial" and "by_arm" without risk set table appended. |
|
35 |
#' } |
|
36 |
#' @example inst/examples/kmplot2_unanchored_ex.R |
|
37 |
#' @example inst/examples/kmplot2_anchored_ex.R |
|
38 |
#' @export |
|
39 | ||
40 |
kmplot2 <- function(weights_object, |
|
41 |
tte_ipd, |
|
42 |
tte_pseudo_ipd, |
|
43 |
trt_ipd, |
|
44 |
trt_agd, |
|
45 |
trt_common = NULL, |
|
46 |
normalize_weights = FALSE, |
|
47 |
trt_var_ipd = "ARM", |
|
48 |
trt_var_agd = "ARM", |
|
49 |
km_conf_type = "log-log", |
|
50 |
km_layout = c("all", "by_trial", "by_arm"), |
|
51 |
time_scale, |
|
52 |
...) { |
|
53 | ! |
if (!requireNamespace("survminer", quietly = TRUE)) stop("survminer package is required for this function") |
54 | ||
55 | 1x |
names(tte_ipd) <- toupper(names(tte_ipd)) |
56 | 1x |
names(tte_pseudo_ipd) <- toupper(names(tte_pseudo_ipd)) |
57 | 1x |
trt_var_ipd <- toupper(trt_var_ipd) |
58 | 1x |
trt_var_agd <- toupper(trt_var_agd) |
59 | ||
60 |
# pre check |
|
61 | 1x |
if (!"maicplus_estimate_weights" %in% class(weights_object)) { |
62 | ! |
stop("weights_object should be an object returned by estimate_weights") |
63 |
} |
|
64 | 1x |
if (!all(c("USUBJID", "TIME", "EVENT", trt_var_ipd) %in% names(tte_ipd))) { |
65 | ! |
stop(paste("tte_ipd needs to include at least USUBJID, TIME, EVENT,", trt_var_ipd)) |
66 |
} |
|
67 | 1x |
if (!all(c("TIME", "EVENT", trt_var_agd) %in% names(tte_pseudo_ipd))) { |
68 | ! |
stop(paste("tte_pseudo_ipd needs to include at least TIME, EVENT,", trt_var_agd)) |
69 |
} |
|
70 | 1x |
km_layout <- match.arg(km_layout, choices = c("all", "by_trial", "by_arm"), several.ok = FALSE) |
71 | ||
72 |
# preparing data |
|
73 | 1x |
is_anchored <- !is.null(trt_common) |
74 | 1x |
tte_ipd <- tte_ipd[tte_ipd[[trt_var_ipd]] %in% c(trt_ipd, trt_common), , drop = FALSE] |
75 | 1x |
tte_pseudo_ipd <- tte_pseudo_ipd[tte_pseudo_ipd[[trt_var_agd]] %in% c(trt_agd, trt_common), , drop = FALSE] |
76 | 1x |
if (normalize_weights) { |
77 | ! |
tte_ipd$weights <- weights_object$data$scaled_weights[match(weights_object$data$USUBJID, tte_ipd$USUBJID)] |
78 |
} else { |
|
79 | 1x |
tte_ipd$weights <- weights_object$data$weights[match(weights_object$data$USUBJID, tte_ipd$USUBJID)] |
80 |
} |
|
81 | 1x |
tte_pseudo_ipd$weights <- 1 |
82 | ||
83 | 1x |
tte_ipd$TIME <- get_time_as(tte_ipd$TIME, as = time_scale) |
84 | 1x |
tte_pseudo_ipd$TIME <- get_time_as(tte_pseudo_ipd$TIME, as = time_scale) |
85 | 1x |
my_survfit <- function(data, weighted = FALSE) { |
86 | 6x |
if (weighted) { |
87 | 2x |
survfit(Surv(TIME, EVENT) ~ 1, data = data, conf.type = km_conf_type, weights = data$weights) |
88 |
} else { |
|
89 | 4x |
survfit(Surv(TIME, EVENT) ~ 1, data = data, conf.type = km_conf_type) |
90 |
} |
|
91 |
} |
|
92 | ||
93 | 1x |
if (!is_anchored) { |
94 | ! |
kmlist <- list( |
95 | ! |
kmobj_B = my_survfit(data = tte_pseudo_ipd), |
96 | ! |
kmobj_A = my_survfit(data = tte_ipd), |
97 | ! |
kmobj_A_adj = my_survfit(data = tte_ipd, weighted = TRUE) |
98 |
) |
|
99 | ! |
kmlist_name <- c(trt_agd, trt_ipd, paste0(trt_ipd, " (weighted)")) |
100 | ! |
basic_kmplot2(kmlist, kmlist_name, ...) |
101 | 1x |
} else if (is_anchored) { |
102 | 1x |
all_km <- list( |
103 | 1x |
kmobj_A = my_survfit(data = tte_ipd[tte_ipd[, trt_var_ipd] == trt_ipd, ]), |
104 | 1x |
kmobj_B = my_survfit(data = tte_pseudo_ipd[tte_pseudo_ipd[, trt_var_agd] == trt_agd, ]), |
105 | 1x |
kmobj_A_adj = my_survfit(data = tte_ipd[tte_ipd[, trt_var_ipd] == trt_ipd, ], weighted = TRUE), |
106 | 1x |
kmobj_C = my_survfit(data = tte_ipd[tte_ipd[, trt_var_ipd] == trt_common, ]), |
107 | 1x |
kmobj_C_adj = my_survfit(data = tte_ipd[tte_ipd[, trt_var_ipd] == trt_common, ], weighted = TRUE), |
108 | 1x |
kmobj_C_agd = my_survfit(data = tte_pseudo_ipd[tte_pseudo_ipd[, trt_var_agd] == trt_common, ]) |
109 |
) |
|
110 | ||
111 | 1x |
kmlist_combined <- list() |
112 | 1x |
if (km_layout %in% c("by_trial", "all")) { |
113 | 1x |
kmlist_1_2 <- list( |
114 | 1x |
setNames( |
115 | 1x |
all_km[c(4, 1, 3, 5)], |
116 | 1x |
c(trt_common, trt_ipd, paste0(trt_ipd, " (weighted)"), paste0(trt_common, " (weighted)")) |
117 |
), |
|
118 | 1x |
setNames(all_km[c(6, 2)], c(trt_common, trt_agd)) |
119 |
) |
|
120 | 1x |
names(kmlist_1_2) <- c( |
121 | 1x |
paste0("Kaplan-Meier Curves \n(", trt_ipd, " vs ", trt_common, ") in the IPD trial"), |
122 | 1x |
paste0("Kaplan-Meier Curves \n(", trt_agd, " vs ", trt_common, ") in the AgD trial") |
123 |
) |
|
124 | 1x |
kmlist_combined <- c(kmlist_combined, kmlist_1_2) |
125 |
} |
|
126 | 1x |
if (km_layout %in% c("by_arm", "all")) { |
127 | ! |
kmlist_3_4 <- list( |
128 | ! |
setNames(all_km[c(2, 1, 3)], c(trt_agd, trt_ipd, paste0(trt_ipd, " (weighted)"))), |
129 | ! |
setNames(all_km[c(6, 4, 5)], paste(trt_common, c("(AgD)", "(IPD)", "(IPD,weighted)"))) |
130 |
) |
|
131 | ! |
names(kmlist_3_4) <- c( |
132 | ! |
paste0("Kaplan-Meier Curves \n(", trt_ipd, " vs ", trt_agd, ")"), |
133 | ! |
paste0("Kaplan-Meier Curves of Common Comparator \n", trt_common, "(IPD vs AgD Trial)") |
134 |
) |
|
135 | ! |
kmlist_combined <- c(kmlist_combined, kmlist_3_4) |
136 |
} |
|
137 | 1x |
if (km_layout == "all") { |
138 | ! |
kmlist_combined <- kmlist_combined[c(1, 3, 2, 4)] |
139 |
} |
|
140 | ||
141 | 1x |
splots <- mapply( |
142 | 1x |
FUN = basic_kmplot2, |
143 | 1x |
kmlist = kmlist_combined, |
144 | 1x |
kmlist_name = lapply(kmlist_combined, names), |
145 | 1x |
main_title = names(kmlist_combined), |
146 | 1x |
MoreArgs = list(...), |
147 | 1x |
SIMPLIFY = FALSE |
148 |
) |
|
149 | 1x |
survminer::arrange_ggsurvplots(splots, nrow = 1 + (km_layout == "all")) |
150 |
} |
|
151 |
} |
|
152 | ||
153 |
#' Basic Kaplan Meier (KM) plot function using ggplot |
|
154 |
#' |
|
155 |
#' This function generates a basic KM plot using ggplot. |
|
156 |
#' |
|
157 |
#' @param kmlist a list of \code{survfit} object |
|
158 |
#' @param kmlist_name a vector indicating the treatment names of each \code{survfit} object |
|
159 |
#' @param endpoint_name a string, name of time to event endpoint, to be show in the |
|
160 |
#' last line of title |
|
161 |
#' @param show_risk_set logical, show risk set table or not, TRUE by default |
|
162 |
#' @param main_title a string, main title of the KM plot |
|
163 |
#' @param break_x_by bin parameter for \code{survminer} |
|
164 |
#' @param censor indicator to include censor information |
|
165 |
#' @param xlab label name for x-axis of the plot |
|
166 |
#' @param xlim x limit for the x-axis of the plot |
|
167 |
#' @param use_colors a character vector of length up to 4, colors to the KM curves, |
|
168 |
#' it will be passed to 'col' of \code{lines()} |
|
169 |
#' @param use_line_types a numeric vector of length up to 4, line type to the KM curves, |
|
170 |
#' it will be passed to \code{lty} of \code{lines()} |
|
171 |
#' @example inst/examples/basic_kmplot2_ex.R |
|
172 |
#' @export |
|
173 | ||
174 |
basic_kmplot2 <- function(kmlist, |
|
175 |
kmlist_name, |
|
176 |
endpoint_name = "Time to Event Endpoint", |
|
177 |
show_risk_set = TRUE, |
|
178 |
main_title = "Kaplan-Meier Curves", |
|
179 |
break_x_by = NULL, |
|
180 |
censor = TRUE, |
|
181 |
xlab = "Time", |
|
182 |
xlim = NULL, |
|
183 |
use_colors = NULL, |
|
184 |
use_line_types = NULL) { |
|
185 | ! |
if (!requireNamespace("survminer", quietly = TRUE)) stop("survminer package is required for this function") |
186 | ||
187 | 2x |
if (is.null(use_line_types)) { |
188 | 2x |
use_line_types <- c(1, 1, 2, 2) |
189 |
} |
|
190 | ||
191 | 2x |
if (is.null(use_colors)) { |
192 | 2x |
use_colors <- c("#5450E4", "#00857C", "#6ECEB2", "#7B68EE") |
193 |
} |
|
194 | ||
195 |
# Produce the Kaplan-Meier plot |
|
196 | 2x |
survminer_plot <- survminer::ggsurvplot(kmlist, |
197 | 2x |
linetype = use_line_types, |
198 | 2x |
palette = use_colors, |
199 | 2x |
size = 0.2, |
200 | 2x |
combine = TRUE, |
201 | 2x |
risk.table = show_risk_set, |
202 | 2x |
risk.table.y.text.col = TRUE, |
203 | 2x |
risk.table.y.text = FALSE, |
204 | 2x |
break.x.by = break_x_by, |
205 | 2x |
censor = censor, |
206 | 2x |
censor.size = 2, |
207 | 2x |
xlab = xlab, |
208 | 2x |
ylab = endpoint_name, |
209 | 2x |
legend.title = "Treatment", |
210 | 2x |
legend = c(0.85, 0.82), |
211 | 2x |
title = paste0(main_title, "\nEndpoint: ", endpoint_name), |
212 | 2x |
legend.labs = kmlist_name, |
213 | 2x |
tables.theme = survminer::theme_cleantable(), |
214 | 2x |
ggtheme = ggplot2::theme_classic(base_size = 10), |
215 | 2x |
fontsize = 3, |
216 | 2x |
conf.int = FALSE, |
217 | 2x |
xlim = xlim |
218 |
) |
|
219 | 2x |
survminer_plot |
220 |
} |
1 |
#' Unanchored MAIC for binary and time-to-event endpoint |
|
2 |
#' |
|
3 |
#' This is a wrapper function to provide adjusted effect estimates and relevant statistics in unanchored case (i.e. |
|
4 |
#' there is no common comparator arm in the internal and external trial). |
|
5 |
#' |
|
6 |
#' @param weights_object an object returned by \code{estimate_weight} |
|
7 |
#' @param ipd a data frame that meet format requirements in 'Details', individual patient data (IPD) of internal trial |
|
8 |
#' @param pseudo_ipd a data frame, pseudo IPD from digitized KM curve of external trial (for time-to-event endpoint) or |
|
9 |
#' from contingency table (for binary endpoint) |
|
10 |
#' @param trt_ipd a string, name of the interested investigation arm in internal trial \code{dat_igd} (real IPD) |
|
11 |
#' @param trt_agd a string, name of the interested investigation arm in external trial \code{pseudo_ipd} (pseudo IPD) |
|
12 |
#' @param trt_var_ipd a string, column name in \code{ipd} that contains the treatment assignment |
|
13 |
#' @param trt_var_agd a string, column name in \code{ipd} that contains the treatment assignment |
|
14 |
#' @param normalize_weights logical, default is \code{FALSE}. If \code{TRUE}, |
|
15 |
#' \code{scaled_weights} (normalized weights) in \code{weights_object$data} will be used. |
|
16 |
#' @param endpoint_type a string, one out of the following "binary", "tte" (time to event) |
|
17 |
#' @param eff_measure a string, "RD" (risk difference), "OR" (odds ratio), "RR" (relative risk) for a binary endpoint; |
|
18 |
#' "HR" for a time-to-event endpoint. By default is \code{NULL}, "OR" is used for binary case, otherwise "HR" is used. |
|
19 |
#' @param boot_ci_type a string, one of `c("norm","basic", "stud", "perc", "bca")` to select the type of bootstrap |
|
20 |
#' confidence interval. See [boot::boot.ci] for more details. |
|
21 |
#' @param endpoint_name a string, name of time to event endpoint, to be show in the last line of title |
|
22 |
#' @param time_scale a string, time unit of median survival time, taking a value of 'years', 'months', 'weeks' or |
|
23 |
#' 'days'. NOTE: it is assumed that values in TIME column of \code{ipd} and \code{pseudo_ipd} is in the unit of days |
|
24 |
#' @param km_conf_type a string, pass to \code{conf.type} of \code{survfit} |
|
25 |
#' @param binary_robust_cov_type a string to pass to argument `type` of [sandwich::vcovHC], see possible options in the |
|
26 |
#' documentation of that function. Default is `"HC3"` |
|
27 |
#' |
|
28 |
#' @details For time-to-event analysis, it is required that input \code{ipd} and \code{pseudo_ipd} to have the following |
|
29 |
#' columns. This function is not sensitive to upper or lower case of letters in column names. |
|
30 |
#' \itemize{ |
|
31 |
#' \item USUBJID - character, unique subject ID |
|
32 |
#' \item ARM - character or factor, treatment indicator, column name does not have to be 'ARM'. User specify in |
|
33 |
#' \code{trt_var_ipd} and \code{trt_var_agd} |
|
34 |
#' \item EVENT - numeric, 1 for censored/death, 0 for otherwise |
|
35 |
#' \item TIME - numeric column, observation time of the \code{EVENT}; unit in days |
|
36 |
#' } |
|
37 |
#' |
|
38 |
#' @importFrom survival survfit Surv coxph |
|
39 |
#' @importFrom lmtest coeftest coefci |
|
40 |
#' @importFrom sandwich vcovHC |
|
41 |
#' @importFrom boot boot boot.ci |
|
42 |
#' @return A list, contains 'descriptive' and 'inferential' |
|
43 |
#' @example inst/examples/maic_unanchored_ex.R |
|
44 |
#' @example inst/examples/maic_unanchored_binary_ex.R |
|
45 |
#' @export |
|
46 | ||
47 |
maic_unanchored <- function(weights_object, |
|
48 |
ipd, |
|
49 |
pseudo_ipd, |
|
50 |
trt_ipd, |
|
51 |
trt_agd, |
|
52 |
trt_var_ipd = "ARM", |
|
53 |
trt_var_agd = "ARM", |
|
54 |
normalize_weights = FALSE, |
|
55 |
endpoint_type = "tte", |
|
56 |
endpoint_name = "Time to Event Endpoint", |
|
57 |
eff_measure = c("HR", "OR", "RR", "RD"), |
|
58 |
boot_ci_type = c("norm", "basic", "stud", "perc", "bca"), |
|
59 |
# time to event specific args |
|
60 |
time_scale = "months", |
|
61 |
km_conf_type = "log-log", |
|
62 |
# binary specific args |
|
63 |
binary_robust_cov_type = "HC3") { |
|
64 |
# ==> Initial Setup ------------------------------------------ |
|
65 |
# ~~~ Create the hull for the output from this function |
|
66 | 4x |
res <- list( |
67 | 4x |
descriptive = list(), |
68 | 4x |
inferential = list() |
69 |
) |
|
70 | ||
71 | 4x |
res_AB_unadj <- res_AB <- list( |
72 | 4x |
est = NA, |
73 | 4x |
se = NA, |
74 | 4x |
ci_l = NA, |
75 | 4x |
ci_u = NA, |
76 | 4x |
pval = NA |
77 |
) |
|
78 | ||
79 |
# ~~~ Initial colname process and precheck on effect measure |
|
80 | 4x |
names(ipd) <- toupper(names(ipd)) |
81 | 4x |
names(pseudo_ipd) <- toupper(names(pseudo_ipd)) |
82 | 4x |
trt_var_ipd <- toupper(trt_var_ipd) |
83 | 4x |
trt_var_agd <- toupper(trt_var_agd) |
84 | ||
85 | ! |
if (length(eff_measure) > 1) eff_measure <- NULL |
86 | ! |
if (is.null(eff_measure)) eff_measure <- list(binary = "OR", tte = "HR")[[endpoint_type]] |
87 | ||
88 |
# ~~~ Setup ARM column and make related pre-checks |
|
89 | ! |
if (!trt_var_ipd %in% names(ipd)) stop("cannot find arm indicator column trt_var_ipd in ipd") |
90 | ! |
if (!trt_var_agd %in% names(pseudo_ipd)) stop("cannot find arm indicator column trt_var_agd in pseudo_ipd") |
91 | ! |
if (trt_var_ipd != "ARM") ipd$ARM <- ipd[[trt_var_ipd]] |
92 | ! |
if (trt_var_agd != "ARM") pseudo_ipd$ARM <- pseudo_ipd[[trt_var_agd]] |
93 | 4x |
ipd$ARM <- as.character(ipd$ARM) # just to avoid potential error when merging |
94 | 4x |
pseudo_ipd$ARM <- as.character(pseudo_ipd$ARM) # just to avoid potential error when merging |
95 | ! |
if (!trt_ipd %in% ipd$ARM) stop("trt_ipd does not exist in ipd$ARM") |
96 | ! |
if (!trt_agd %in% pseudo_ipd$ARM) stop("trt_agd does not exist in pseudo_ipd$ARM") |
97 | ||
98 |
# ~~~ More pre-checks |
|
99 | 4x |
endpoint_type <- match.arg(endpoint_type, c("binary", "tte")) |
100 | 4x |
if (!"maicplus_estimate_weights" %in% class(weights_object)) { |
101 | ! |
stop("weights_object should be an object returned by estimate_weights") |
102 |
} |
|
103 | 4x |
if (any(duplicated(ipd$USUBJID))) { |
104 | ! |
warning( |
105 | ! |
"check your ipd, it has duplicated usubjid, this indicates, ", |
106 | ! |
"it might contain multiple endpoints for each subject" |
107 |
) |
|
108 |
} |
|
109 | 4x |
if (!all(ipd$USUBJID %in% weights_object$data$USUBJID)) { |
110 | ! |
stop( |
111 | ! |
"These pts in ipd cannot be found in weights_object ", |
112 | ! |
toString(setdiff(ipd$USUBJID, weights_object$USUBJID)) |
113 |
) |
|
114 |
} |
|
115 | 4x |
time_scale <- match.arg(arg = time_scale, choices = c("days", "weeks", "months", "years")) |
116 | 4x |
if (endpoint_type == "binary") { # for binary effect measure |
117 | ||
118 | ! |
if (any(!c("USUBJID", "RESPONSE") %in% names(ipd))) stop("ipd should have 'USUBJID', 'RESPONSE' columns at minimum") |
119 | 2x |
eff_measure <- match.arg(eff_measure, choices = c("OR", "RD", "RR"), several.ok = FALSE) |
120 | 2x |
binary_robust_cov_type <- match.arg( |
121 | 2x |
binary_robust_cov_type, |
122 | 2x |
choices = c("HC3", "const", "HC", "HC0", "HC1", "HC2", "HC4", "HC4m", "HC5") |
123 |
) |
|
124 | 2x |
} else if (endpoint_type == "tte") { # for time to event effect measure |
125 | ||
126 | 2x |
if (!all(c("USUBJID", "TIME", "EVENT", trt_var_ipd) %in% names(ipd))) { |
127 | ! |
stop("ipd needs to include at least USUBJID, TIME, EVENT, ", trt_var_ipd) |
128 |
} |
|
129 | 2x |
if (!all(c("TIME", "EVENT", trt_var_agd) %in% names(pseudo_ipd))) { |
130 | ! |
stop("pseudo_ipd needs to include at least TIME, EVENT, ", trt_var_agd) |
131 |
} |
|
132 | 2x |
eff_measure <- match.arg(eff_measure, choices = c("HR"), several.ok = FALSE) |
133 |
} |
|
134 | 4x |
boot_ci_type <- match.arg(boot_ci_type) |
135 | ||
136 |
# ==> IPD and AgD data preparation ------------------------------------------ |
|
137 |
# : subset ipd, retain only ipd from interested trts |
|
138 | 4x |
ipd <- ipd[ipd$ARM == trt_ipd, , drop = TRUE] |
139 | 4x |
pseudo_ipd <- pseudo_ipd[pseudo_ipd$ARM == trt_agd, , drop = TRUE] |
140 | ||
141 |
# : assign weights to real and pseudo ipd |
|
142 | 4x |
if (normalize_weights) { |
143 | ! |
ipd$weights <- weights_object$data$scaled_weights[match(weights_object$data$USUBJID, ipd$USUBJID)] |
144 |
} else { |
|
145 | 4x |
ipd$weights <- weights_object$data$weights[match(weights_object$data$USUBJID, ipd$USUBJID)] |
146 |
} |
|
147 | 4x |
pseudo_ipd$weights <- 1 |
148 | ||
149 |
# : necessary formatting for pseudo ipd |
|
150 | 2x |
if (!"USUBJID" %in% names(pseudo_ipd)) pseudo_ipd$USUBJID <- paste0("ID", seq_len(nrow(pseudo_ipd))) |
151 | 4x |
if ("RESPONSE" %in% names(pseudo_ipd) && is.logical(pseudo_ipd$RESPONSE)) { |
152 | 2x |
pseudo_ipd$RESPONSE <- as.numeric(pseudo_ipd$RESPONSE) |
153 |
} |
|
154 | ||
155 |
# : give warning when individual pts in IPD has no weights |
|
156 | 4x |
if (any(is.na(ipd$weights))) { |
157 | ! |
ipd <- ipd[!is.na(ipd$weights), , drop = FALSE] |
158 | ! |
warning( |
159 | ! |
paste( |
160 | ! |
"these usubjid in ipd have no weight in weights_object, and hence excluded from analysis:", |
161 | ! |
paste(ipd$USUBJID[is.na(ipd$weights)], collapse = ",") |
162 |
) |
|
163 |
) |
|
164 | ! |
if (nrow(ipd) == 0) stop("there is no pts with weight in IPD!!") |
165 |
} |
|
166 | ||
167 |
# : retain necessary columns |
|
168 | 4x |
if (endpoint_type == "tte") { |
169 | 2x |
retain_cols <- c("USUBJID", "ARM", "TIME", "EVENT", "weights") |
170 |
} else { |
|
171 | 2x |
retain_cols <- c("USUBJID", "ARM", "RESPONSE", "weights") |
172 |
} |
|
173 | 4x |
ipd <- ipd[, retain_cols, drop = FALSE] |
174 | 4x |
pseudo_ipd <- pseudo_ipd[, retain_cols, drop = FALSE] |
175 | ||
176 |
# : merge real and pseudo ipds |
|
177 | 4x |
dat <- rbind(ipd, pseudo_ipd) |
178 | 4x |
dat$ARM <- factor(dat$ARM, levels = c(trt_agd, trt_ipd)) |
179 | ||
180 |
# ==> Inferential output ------------------------------------------ |
|
181 | ||
182 | 4x |
result <- if (endpoint_type == "tte") { |
183 | 2x |
maic_unanchored_tte( |
184 | 2x |
res, res_AB, res_AB_unadj, dat, ipd, pseudo_ipd, km_conf_type, time_scale, |
185 | 2x |
weights_object, endpoint_name, normalize_weights, boot_ci_type, trt_ipd, trt_agd |
186 |
) |
|
187 | 4x |
} else if (endpoint_type == "binary") { |
188 | 2x |
maic_unanchored_binary( |
189 | 2x |
res, res_AB, res_AB_unadj, dat, ipd, pseudo_ipd, binary_robust_cov_type, |
190 | 2x |
weights_object, endpoint_name, normalize_weights, eff_measure, boot_ci_type, trt_ipd, trt_agd |
191 |
) |
|
192 |
} else { |
|
193 | ! |
stop("Endpoint type ", endpoint_type, " currently unsupported.") |
194 |
} |
|
195 | ||
196 |
# output |
|
197 | 4x |
result |
198 |
} |
|
199 | ||
200 |
# MAIC inference functions for TTE outcome type ------------ |
|
201 | ||
202 |
maic_unanchored_tte <- function(res, |
|
203 |
res_AB, |
|
204 |
res_AB_unadj, |
|
205 |
dat, |
|
206 |
ipd, |
|
207 |
pseudo_ipd, |
|
208 |
km_conf_type, |
|
209 |
time_scale, |
|
210 |
weights_object, |
|
211 |
endpoint_name, |
|
212 |
normalize_weights, |
|
213 |
boot_ci_type, |
|
214 |
trt_ipd, |
|
215 |
trt_agd) { |
|
216 |
# ~~~ Descriptive table before and after matching |
|
217 |
# : derive km w and w/o weights |
|
218 | 2x |
kmobj_dat <- survfit(Surv(TIME, EVENT) ~ ARM, dat, conf.type = km_conf_type) |
219 | 2x |
kmobj_dat_adj <- survfit(Surv(TIME, EVENT) ~ ARM, dat, weights = dat$weights, conf.type = km_conf_type) |
220 | 2x |
res$descriptive[["survfit_before"]] <- survfit_makeup(kmobj_dat) |
221 | 2x |
res$descriptive[["survfit_after"]] <- survfit_makeup(kmobj_dat_adj) |
222 |
# : derive median survival time |
|
223 | 2x |
medSurv_dat <- medSurv_makeup(kmobj_dat, legend = "Before matching", time_scale = time_scale) |
224 | 2x |
medSurv_dat_adj <- medSurv_makeup(kmobj_dat_adj, legend = "After matching", time_scale = time_scale) |
225 | 2x |
medSurv_out <- rbind(medSurv_dat, medSurv_dat_adj) |
226 | 2x |
medSurv_out <- cbind(trt_ind = c("B", "A")[match(medSurv_out$treatment, levels(dat$ARM))], medSurv_out) |
227 | ||
228 | 2x |
res$descriptive[["summary"]] <- medSurv_out |
229 | ||
230 |
# ~~~ Analysis table (Cox model) before and after matching |
|
231 |
# : fit PH Cox regression model |
|
232 | 2x |
coxobj_dat <- coxph(Surv(TIME, EVENT) ~ ARM, dat) |
233 | 2x |
coxobj_dat_adj <- coxph(Surv(TIME, EVENT) ~ ARM, dat, weights = weights, robust = TRUE) |
234 | ||
235 |
# : derive adjusted estimate for ipd exp arm vs agd exp arm |
|
236 | 2x |
res_AB$est <- summary(coxobj_dat_adj)$conf.int[1] |
237 | 2x |
mu <- summary(coxobj_dat_adj)$coef[1] |
238 | 2x |
sig <- summary(coxobj_dat_adj)$coef[4] |
239 | 2x |
res_AB$se <- sqrt((exp(sig^2) - 1) * exp(2 * mu + sig^2)) # log normal parametrization |
240 | 2x |
res_AB$ci_l <- summary(coxobj_dat_adj)$conf.int[3] |
241 | 2x |
res_AB$ci_u <- summary(coxobj_dat_adj)$conf.int[4] |
242 | 2x |
res_AB$pval <- summary(coxobj_dat_adj)$coef[6] |
243 | ||
244 |
# : derive unadjusted estimate |
|
245 | 2x |
res_AB_unadj$est <- summary(coxobj_dat)$conf.int[1] |
246 | 2x |
mu <- summary(coxobj_dat)$coef[1] |
247 | 2x |
sig <- summary(coxobj_dat)$coef[3] |
248 | 2x |
res_AB_unadj$se <- sqrt((exp(sig^2) - 1) * exp(2 * mu + sig^2)) # log normal parametrization |
249 | 2x |
res_AB_unadj$ci_l <- summary(coxobj_dat)$conf.int[3] |
250 | 2x |
res_AB_unadj$ci_u <- summary(coxobj_dat)$conf.int[4] |
251 | 2x |
res_AB_unadj$pval <- summary(coxobj_dat)$coef[5] |
252 | ||
253 |
# : get bootstrapped estimates if applicable |
|
254 | 2x |
if (!is.null(weights_object$boot)) { |
255 | 1x |
keep_rows <- setdiff(seq_len(nrow(weights_object$data)), weights_object$rows_with_missing) |
256 | 1x |
boot_ipd_id <- weights_object$data[keep_rows, "USUBJID", drop = FALSE] |
257 | ||
258 | 1x |
boot_ipd <- merge(boot_ipd_id, ipd, by = "USUBJID", all.x = TRUE) |
259 | ! |
if (nrow(boot_ipd) != nrow(boot_ipd_id)) stop("ipd has multiple observations for some patients") |
260 | 1x |
boot_ipd <- boot_ipd[match(boot_ipd$USUBJID, boot_ipd_id$USUBJID), ] |
261 | ||
262 | 1x |
stat_fun <- function(data, index, w_obj, pseudo_ipd, normalize) { |
263 | 501x |
boot_ipd <- data[index, ] |
264 | 501x |
r <- dynGet("r", ifnotfound = NA) # Get bootstrap iteration |
265 | 501x |
if (!is.na(r)) { |
266 | ! |
if (!all(index == w_obj$boot[, 1, r])) stop("Bootstrap and weight indices don't match") |
267 | 500x |
boot_ipd$weights <- w_obj$boot[, 2, r] |
268 | ! |
if (normalize) boot_ipd$weights <- boot_ipd$weights / mean(boot_ipd$weights, na.rm = TRUE) |
269 |
} |
|
270 | 501x |
boot_dat <- rbind(boot_ipd, pseudo_ipd) |
271 | 501x |
boot_dat$ARM <- factor(boot_dat$ARM, levels = c(trt_agd, trt_ipd)) |
272 | 501x |
boot_coxobj_dat_adj <- coxph(Surv(TIME, EVENT) ~ ARM, boot_dat, weights = weights) |
273 | 501x |
c(est = coef(boot_coxobj_dat_adj)[1], var = vcov(boot_coxobj_dat_adj)[1, 1]) |
274 |
} |
|
275 | ||
276 |
# Revert seed to how it was for weight bootstrap sampling |
|
277 | 1x |
old_seed <- globalenv()$.Random.seed |
278 | 1x |
on.exit(suspendInterrupts(set_random_seed(old_seed))) |
279 | 1x |
set_random_seed(weights_object$boot_seed) |
280 | 1x |
R <- dim(weights_object$boot)[3] |
281 | ||
282 | 1x |
boot_res <- boot( |
283 | 1x |
boot_ipd, |
284 | 1x |
stat_fun, |
285 | 1x |
R = R, |
286 | 1x |
w_obj = weights_object, |
287 | 1x |
pseudo_ipd = pseudo_ipd, |
288 | 1x |
normalize = normalize_weights, |
289 | 1x |
strata = weights_object$boot_strata |
290 |
) |
|
291 | 1x |
boot_ci <- boot.ci(boot_res, type = boot_ci_type, w_obj = weights_object, pseudo_ipd = pseudo_ipd) |
292 | ||
293 | 1x |
l_u_index <- switch(boot_ci_type, |
294 | 1x |
"norm" = list(2, 3, "normal"), |
295 | 1x |
"basic" = list(4, 5, "basic"), |
296 | 1x |
"stud" = list(4, 5, "student"), |
297 | 1x |
"perc" = list(4, 5, "percent"), |
298 | 1x |
"bca" = list(4, 5, "bca") |
299 |
) |
|
300 | ||
301 | 1x |
transform_estimate <- exp |
302 | 1x |
boot_res_AB <- list( |
303 | 1x |
est = as.vector(transform_estimate(boot_res$t0[1])), |
304 | 1x |
se = NA, |
305 | 1x |
ci_l = transform_estimate(boot_ci[[l_u_index[[3]]]][l_u_index[[1]]]), |
306 | 1x |
ci_u = transform_estimate(boot_ci[[l_u_index[[3]]]][l_u_index[[2]]]), |
307 | 1x |
pval = NA |
308 |
) |
|
309 |
} else { |
|
310 | 1x |
boot_res_AB <- NULL |
311 | 1x |
boot_res <- NULL |
312 |
} |
|
313 | ||
314 |
# : report all raw fitted obj |
|
315 | 2x |
res$inferential[["fit"]] <- list( |
316 | 2x |
km_before = kmobj_dat, |
317 | 2x |
km_after = kmobj_dat_adj, |
318 | 2x |
model_before = coxobj_dat, |
319 | 2x |
model_after = coxobj_dat_adj, |
320 | 2x |
res_AB = res_AB, |
321 | 2x |
res_AB_unadj = res_AB_unadj, |
322 | 2x |
boot_res = boot_res, |
323 | 2x |
boot_res_AB = boot_res_AB |
324 |
) |
|
325 | ||
326 |
# : compile HR result |
|
327 | 2x |
res$inferential[["summary"]] <- data.frame( |
328 | 2x |
case = c("AB", "adjusted_AB"), |
329 | 2x |
HR = c(res_AB_unadj$est, res_AB$est), |
330 | 2x |
LCL = c(res_AB_unadj$ci_l, res_AB$ci_l), |
331 | 2x |
UCL = c(res_AB_unadj$ci_u, res_AB$ci_u), |
332 | 2x |
pval = c(res_AB_unadj$pval, res_AB$pval) |
333 |
) |
|
334 | ||
335 |
# output |
|
336 | 2x |
res |
337 |
} |
|
338 | ||
339 |
# MAIC inference functions for Binary outcome type ------------ |
|
340 | ||
341 |
maic_unanchored_binary <- function(res, |
|
342 |
res_AB, |
|
343 |
res_AB_unadj, |
|
344 |
dat, |
|
345 |
ipd, |
|
346 |
pseudo_ipd, |
|
347 |
binary_robust_cov_type, |
|
348 |
weights_object, |
|
349 |
endpoint_name, |
|
350 |
normalize_weights, |
|
351 |
eff_measure, |
|
352 |
boot_ci_type, |
|
353 |
trt_ipd, |
|
354 |
trt_agd) { |
|
355 |
# ~~~ Analysis table |
|
356 |
# : set up proper link |
|
357 | 2x |
glm_link <- switch(eff_measure, |
358 | 2x |
"RD" = poisson(link = "identity"), |
359 | 2x |
"RR" = poisson(link = "log"), |
360 | 2x |
"OR" = binomial(link = "logit") |
361 |
) |
|
362 | 2x |
transform_estimate <- switch(eff_measure, |
363 | 2x |
"RD" = function(x) x * 100, |
364 | 2x |
"RR" = exp, |
365 | 2x |
"OR" = exp |
366 |
) |
|
367 | ||
368 |
# : fit glm for binary outcome and robust estimate with weights |
|
369 | 2x |
binobj_dat <- glm(RESPONSE ~ ARM, dat, family = glm_link) |
370 | 2x |
binobj_dat_adj <- glm(RESPONSE ~ ARM, dat, weights = weights, family = glm_link) |> suppressWarnings() |
371 | ||
372 | 2x |
bin_robust_cov <- sandwich::vcovHC(binobj_dat_adj, type = binary_robust_cov_type) |
373 | 2x |
bin_robust_coef <- lmtest::coeftest(binobj_dat_adj, vcov. = bin_robust_cov) |
374 | 2x |
bin_robust_ci <- lmtest::coefci(binobj_dat_adj, vcov. = bin_robust_cov) |
375 | ||
376 |
# : make general summary |
|
377 | 2x |
glmDesc_dat <- glm_makeup(binobj_dat, legend = "Before matching", weighted = FALSE) |
378 | 2x |
glmDesc_dat_adj <- glm_makeup(binobj_dat_adj, legend = "After matching", weighted = TRUE) |
379 | 2x |
glmDesc <- rbind(glmDesc_dat, glmDesc_dat_adj) |
380 | 2x |
glmDesc <- cbind(trt_ind = c("B", "A")[match(glmDesc$treatment, levels(dat$ARM))], glmDesc) |
381 | 2x |
rownames(glmDesc) <- NULL |
382 | 2x |
res$descriptive[["summary"]] <- glmDesc |
383 | ||
384 |
# : derive adjusted estimate |
|
385 | 2x |
res_AB$est <- bin_robust_coef[2, "Estimate"] |
386 | 2x |
res_AB$se <- bin_robust_coef[2, "Std. Error"] |
387 | 2x |
res_AB$ci_l <- bin_robust_ci[2, "2.5 %"] |
388 | 2x |
res_AB$ci_u <- bin_robust_ci[2, "97.5 %"] |
389 | 2x |
res_AB$pval <- bin_robust_coef[2, "Pr(>|z|)"] |
390 | ||
391 |
# : derive unadjusted estimate |
|
392 | 2x |
binobj_dat_summary <- summary(binobj_dat) |
393 | 2x |
res_AB_unadj$est <- binobj_dat_summary$coefficients[2, "Estimate"] |
394 | 2x |
res_AB_unadj$se <- binobj_dat_summary$coefficients[2, "Std. Error"] |
395 | 2x |
res_AB_unadj$ci_l <- confint.default(binobj_dat)[2, "2.5 %"] |
396 | 2x |
res_AB_unadj$ci_u <- confint.default(binobj_dat)[2, "97.5 %"] |
397 | 2x |
res_AB_unadj$pval <- binobj_dat_summary$coefficients[2, "Pr(>|z|)"] |
398 | ||
399 |
# : transform |
|
400 | 2x |
if (eff_measure %in% c("RR", "OR")) { |
401 | 2x |
res_AB <- transform_ratio(res_AB) |
402 | 2x |
res_AB_unadj <- transform_ratio(res_AB_unadj) |
403 | ! |
} else if (eff_measure == "RD") { |
404 | ! |
res_AB <- transform_absolute(res_AB) |
405 | ! |
res_AB_unadj <- transform_absolute(res_AB_unadj) |
406 |
} |
|
407 | ||
408 |
# : get bootstrapped estimates if applicable |
|
409 | 2x |
if (!is.null(weights_object$boot)) { |
410 | 1x |
keep_rows <- setdiff(seq_len(nrow(weights_object$data)), weights_object$rows_with_missing) |
411 | 1x |
boot_ipd_id <- weights_object$data[keep_rows, "USUBJID", drop = FALSE] |
412 | ||
413 | 1x |
boot_ipd <- merge(boot_ipd_id, ipd, by = "USUBJID", all.x = TRUE) |
414 | ! |
if (nrow(boot_ipd) != nrow(boot_ipd_id)) stop("ipd has multiple observations for some patients") |
415 | 1x |
boot_ipd <- boot_ipd[match(boot_ipd$USUBJID, boot_ipd_id$USUBJID), ] |
416 | ||
417 | 1x |
stat_fun <- function(data, index, w_obj, pseudo_ipd, normalize) { |
418 | 21x |
boot_ipd <- data[index, ] |
419 | 21x |
r <- dynGet("r", ifnotfound = NA) # Get bootstrap iteration |
420 | 21x |
if (!is.na(r)) { |
421 | ! |
if (!all(index == w_obj$boot[, 1, r])) stop("Bootstrap and weight indices don't match") |
422 | 20x |
boot_ipd$weights <- w_obj$boot[, 2, r] |
423 | ! |
if (normalize) boot_ipd$weights <- boot_ipd$weights / mean(boot_ipd$weights, na.rm = TRUE) |
424 |
} |
|
425 | 21x |
boot_dat <- rbind(boot_ipd, pseudo_ipd) |
426 | 21x |
boot_dat$ARM <- factor(boot_dat$ARM, levels = c(trt_agd, trt_ipd)) |
427 | 21x |
boot_binobj_dat_adj <- glm(RESPONSE ~ ARM, boot_dat, weights = weights, family = glm_link) |> suppressWarnings() |
428 | 21x |
c(est = coef(boot_binobj_dat_adj)[2], var = vcov(boot_binobj_dat_adj)[2, 2]) |
429 |
} |
|
430 | ||
431 |
# Revert seed to how it was for weight bootstrap sampling |
|
432 | 1x |
old_seed <- globalenv()$.Random.seed |
433 | 1x |
on.exit(suspendInterrupts(set_random_seed(old_seed))) |
434 | 1x |
set_random_seed(weights_object$boot_seed) |
435 | 1x |
R <- dim(weights_object$boot)[3] |
436 | 1x |
boot_res <- boot( |
437 | 1x |
boot_ipd, |
438 | 1x |
stat_fun, |
439 | 1x |
R = R, |
440 | 1x |
w_obj = weights_object, |
441 | 1x |
pseudo_ipd = pseudo_ipd, |
442 | 1x |
normalize = normalize_weights, |
443 | 1x |
strata = weights_object$boot_strata |
444 |
) |
|
445 | 1x |
boot_ci <- boot.ci(boot_res, type = boot_ci_type, w_obj = weights_object, pseudo_ipd = pseudo_ipd) |
446 | ||
447 | 1x |
l_u_index <- switch(boot_ci_type, |
448 | 1x |
"norm" = list(2, 3, "normal"), |
449 | 1x |
"basic" = list(4, 5, "basic"), |
450 | 1x |
"stud" = list(4, 5, "student"), |
451 | 1x |
"perc" = list(4, 5, "percent"), |
452 | 1x |
"bca" = list(4, 5, "bca") |
453 |
) |
|
454 | ||
455 | 1x |
boot_res_AB <- list( |
456 | 1x |
est = as.vector(transform_estimate(boot_res$t0[1])), |
457 | 1x |
se = NA, |
458 | 1x |
ci_l = transform_estimate(boot_ci[[l_u_index[[3]]]][l_u_index[[1]]]), |
459 | 1x |
ci_u = transform_estimate(boot_ci[[l_u_index[[3]]]][l_u_index[[2]]]), |
460 | 1x |
pval = NA |
461 |
) |
|
462 |
} else { |
|
463 | 1x |
boot_res_AB <- NULL |
464 | 1x |
boot_res <- NULL |
465 |
} |
|
466 | ||
467 |
# : report all raw fitted obj |
|
468 | 2x |
res$inferential[["fit"]] <- list( |
469 | 2x |
model_before = binobj_dat, |
470 | 2x |
model_after = binobj_dat_adj, |
471 | 2x |
res_AB = res_AB, |
472 | 2x |
res_AB_unadj = res_AB_unadj, |
473 | 2x |
boot_res = boot_res, |
474 | 2x |
boot_res_AB = boot_res_AB |
475 |
) |
|
476 | ||
477 |
# : compile binary effect estimate result |
|
478 | 2x |
res$inferential[["summary"]] <- data.frame( |
479 | 2x |
case = c("AB", "adjusted_AB"), |
480 | 2x |
EST = c( |
481 | 2x |
res_AB_unadj$est, |
482 | 2x |
res_AB$est |
483 |
), |
|
484 | 2x |
LCL = c( |
485 | 2x |
res_AB_unadj$ci_l, |
486 | 2x |
res_AB$ci_l |
487 |
), |
|
488 | 2x |
UCL = c( |
489 | 2x |
res_AB_unadj$ci_u, |
490 | 2x |
res_AB$ci_u |
491 |
), |
|
492 | 2x |
pval = c( |
493 | 2x |
res_AB_unadj$pval, |
494 | 2x |
res_AB$pval |
495 |
) |
|
496 |
) |
|
497 | 2x |
names(res$inferential[["summary"]])[2] <- eff_measure |
498 | ||
499 |
# : output |
|
500 | 2x |
res |
501 |
} |
1 |
# Functions for pre-processing data before conduct MAIC |
|
2 | ||
3 |
# Functions to be exported --------------------------------------- |
|
4 | ||
5 |
#' Pre-process aggregate data |
|
6 |
#' |
|
7 |
#' This function checks the format of the aggregate data. |
|
8 |
#' Data is required to have three columns: STUDY, ARM, and N. |
|
9 |
#' Column names that do not have legal suffixes (MEAN, MEDIAN, SD, COUNT, or PROP) are dropped. |
|
10 |
#' If a variable is a count variable, it is converted to proportions by dividing the sample size (N). |
|
11 |
#' Note, when the count is specified, proportion is always calculated based on the count, that is, |
|
12 |
#' specified proportion will be ignored if applicable. |
|
13 |
#' If the aggregated data comes from multiple sources (i.e. different analysis population) and |
|
14 |
#' sample size differs for each variable, one option is to specify proportion directly instead of count by using suffix |
|
15 |
#' _PROP. |
|
16 |
#' |
|
17 |
#' @param raw_agd raw aggregate data should contain STUDY, ARM, and N. Variable names should be followed |
|
18 |
#' by legal suffixes (i.e. MEAN, MEDIAN, SD, COUNT, or PROP). |
|
19 |
#' |
|
20 |
#' @examples |
|
21 |
#' data(agd) |
|
22 |
#' agd <- process_agd(agd) |
|
23 |
#' |
|
24 |
#' @return pre-processed aggregate level data |
|
25 |
#' @export |
|
26 | ||
27 |
process_agd <- function(raw_agd) { |
|
28 | 6x |
raw_agd <- as.data.frame(raw_agd) |
29 |
# make all column names to be capital letters to avoid different style |
|
30 | 6x |
names(raw_agd) <- toupper(names(raw_agd)) |
31 | ||
32 |
# define column name patterns[-] |
|
33 | 6x |
must_exist <- c("STUDY", "ARM", "N") |
34 | 6x |
legal_suffix <- c("MEAN", "MEDIAN", "SD", "COUNT", "PROP") |
35 | ||
36 |
# swap "TREATMENT" column to "ARM", if applicable |
|
37 | 6x |
if ("TREATMENT" %in% names(raw_agd) && (!"ARM" %in% names(raw_agd))) { |
38 | 1x |
raw_agd$ARM <- raw_agd$TREATMENT |
39 | 1x |
raw_agd <- raw_agd[, names(raw_agd) != "TREATMENT"] |
40 | 1x |
warning("'TREATMENT' column is renamed as 'ARM'") |
41 |
} |
|
42 | ||
43 |
# check: must exist |
|
44 | 6x |
if (!all(must_exist %in% names(raw_agd))) { |
45 | ! |
stop("At least 1 of the must-exists columns (STUDY, ARM, N) cannot be found in raw_agd!") |
46 |
} |
|
47 | ||
48 |
# check: legal suffix |
|
49 | 6x |
other_colnames <- setdiff(names(raw_agd), must_exist) |
50 | 6x |
ind1 <- grepl("_", other_colnames, fixed = TRUE) |
51 | 6x |
ind2 <- sapply(other_colnames, function(xx) { |
52 | 42x |
tmp <- unlist(strsplit(xx, split = "_")) |
53 | 42x |
tmp[length(tmp)] # this deployment is robust to the cases that there are multiple _ in the column name |
54 |
}) |
|
55 | 6x |
ind2 <- (ind2 %in% legal_suffix) |
56 | ||
57 | 6x |
use_cols <- other_colnames[ind1 & ind2] |
58 | 6x |
use_agd <- raw_agd[, c(must_exist, use_cols), drop = FALSE] |
59 | 6x |
if (!all(other_colnames %in% use_cols)) { |
60 | ! |
warning(paste0( |
61 | ! |
"following columns are ignored since it does not follow the naming conventions:", |
62 | ! |
paste(setdiff(other_colnames, use_cols), collapse = ",") |
63 |
)) |
|
64 |
} |
|
65 | ||
66 |
# If the aggregate data is divided by different arms, calculate pooled arm statistics using |
|
67 |
# complete_agd function; complete statistics is specified by ARM=="Total" |
|
68 | 6x |
if (!"total" %in% tolower(use_agd$ARM)) { |
69 | ! |
use_agd <- complete_agd(use_agd) |
70 |
} |
|
71 | ||
72 |
# calculate percentage columns |
|
73 | 6x |
ind <- grepl("_COUNT$", names(use_agd)) |
74 | 6x |
if (any(ind)) { |
75 | 6x |
for (i in which(ind)) { |
76 | 18x |
tmp_prop <- use_agd[[i]] / use_agd$N |
77 |
# in case some count are not specified, but proportion are specified, copy over those proportions |
|
78 |
# this also means, in case count is specified, proportion is ignored even it is specified |
|
79 | 18x |
prop_name_i <- gsub("_COUNT$", "_PROP", names(use_agd)[i]) |
80 | 18x |
if (prop_name_i %in% names(use_agd)) { |
81 | ! |
tmp_prop[is.na(tmp_prop)] <- use_agd[is.na(tmp_prop), prop_name_i] |
82 | ! |
names(use_agd)[names(use_agd) == prop_name_i] <- paste0(prop_name_i, "_redundant") |
83 |
} |
|
84 | 18x |
use_agd[[i]] <- tmp_prop |
85 |
} |
|
86 | 6x |
names(use_agd) <- gsub("_COUNT$", "_PROP", names(use_agd)) |
87 |
} |
|
88 | 6x |
use_agd <- use_agd[, !grepl("_redundant$", names(use_agd))] |
89 | ||
90 |
# output |
|
91 | 6x |
with(use_agd, use_agd[tolower(ARM) == "total", , drop = FALSE]) |
92 |
} |
|
93 | ||
94 | ||
95 |
#' Create dummy variables from categorical variables in an individual patient data (ipd) |
|
96 |
#' |
|
97 |
#' This is a convenient function to convert categorical variables into dummy binary variables. |
|
98 |
#' This would be especially useful if the variable has more than two factors. |
|
99 |
#' Note that the original variable is kept after a variable is dummized. |
|
100 |
#' |
|
101 |
#' @param raw_ipd ipd data that contains variable to dummize |
|
102 |
#' @param dummize_cols vector of column names to binarize |
|
103 |
#' @param dummize_ref_level vector of reference level of the variables to binarize |
|
104 |
#' |
|
105 |
#' @examples |
|
106 |
#' data(adsl_twt) |
|
107 |
#' dummize_ipd(adsl_twt, dummize_cols = c("SEX"), dummize_ref_level = c("Male")) |
|
108 |
#' |
|
109 |
#' @return ipd with dummized columns |
|
110 |
#' @export |
|
111 | ||
112 |
dummize_ipd <- function(raw_ipd, dummize_cols, dummize_ref_level) { |
|
113 | 2x |
for (i in seq_along(dummize_cols)) { |
114 | 3x |
yy <- raw_ipd[[dummize_cols[i]]] |
115 | 3x |
yy_levels <- na.omit(unique(yy)) |
116 | 3x |
yy <- factor(as.character(yy), levels = c(dummize_ref_level[i], setdiff(yy_levels, dummize_ref_level[i]))) |
117 | 3x |
new_yy <- sapply(levels(yy)[-1], function(j) { |
118 | 3x |
as.numeric(yy == j) |
119 |
}) |
|
120 | 3x |
new_yy <- as.data.frame(new_yy) |
121 | 3x |
names(new_yy) <- toupper(paste(dummize_cols[i], levels(yy)[-1], sep = "_")) |
122 | 3x |
raw_ipd <- cbind(raw_ipd, new_yy) |
123 |
} |
|
124 | 2x |
raw_ipd |
125 |
} |
|
126 | ||
127 | ||
128 |
#' Center individual patient data (IPD) variables using aggregate data averages |
|
129 |
#' |
|
130 |
#' This function subtracts IPD variables (prognostic variables and/or effect modifiers) |
|
131 |
#' by the aggregate data averages. This centering is needed in order to calculate weights. |
|
132 |
#' IPD and aggregate data variable names should match. |
|
133 |
#' |
|
134 |
#' @param ipd IPD variable names should match the aggregate data names without the suffix. |
|
135 |
#' This would involve either changing the aggregate data name or the ipd name. |
|
136 |
#' For instance, if we binarize SEX variable with MALE as a reference using [dummize_ipd], |
|
137 |
#' function names the new variable as SEX_MALE. |
|
138 |
#' In this case, SEX_MALE should also be available in the aggregate data. |
|
139 |
#' @param agd pre-processed aggregate data which contain STUDY, ARM, and N. Variable names |
|
140 |
#' should be followed by legal suffixes (i.e. MEAN, MEDIAN, SD, or PROP). Note that COUNT |
|
141 |
#' suffix is no longer accepted. |
|
142 |
#' @examples |
|
143 |
#' data(adsl_sat) |
|
144 |
#' data(agd) |
|
145 |
#' agd <- process_agd(agd) |
|
146 |
#' ipd_centered <- center_ipd(ipd = adsl_sat, agd = agd) |
|
147 |
#' @return centered ipd using aggregate level data averages |
|
148 |
#' @export |
|
149 | ||
150 |
center_ipd <- function(ipd, agd) { |
|
151 |
# regularized column name patterns |
|
152 | 9x |
must_exist <- c("STUDY", "ARM", "N") |
153 | 9x |
legal_suffix <- c("MEAN", "MEDIAN", "SD", "PROP") |
154 | 9x |
suffix_pat <- paste(paste0("_", legal_suffix, "$"), collapse = "|") |
155 | ||
156 | 9x |
for (i in seq_len(nrow(agd))) { # study i |
157 | 9x |
study_id <- agd$STUDY[i] |
158 | 9x |
use_agd <- agd[i, !names(agd) %in% must_exist, drop = FALSE] |
159 | 9x |
param_id <- gsub(suffix_pat, "", names(use_agd)) |
160 | ||
161 | 9x |
for (j in seq_len(ncol(use_agd))) { # effect modifier j |
162 | ! |
if (is.na(use_agd[[j]])) next |
163 | ||
164 | 63x |
ipd_param <- param_id[j] |
165 | ||
166 | 63x |
if (grepl("_MEAN$|_PROP$", names(use_agd)[j])) { |
167 | 36x |
ipd[[paste0(ipd_param, "_", "CENTERED")]] <- ipd[[ipd_param]] - use_agd[[j]] |
168 | 27x |
} else if (grepl("_MEDIAN$", names(use_agd)[j])) { |
169 | 18x |
ipd[[paste0(ipd_param, "_MEDIAN_", "CENTERED")]] <- ipd[[ipd_param]] > use_agd[[j]] |
170 | 18x |
ipd[[paste0(ipd_param, "_MEDIAN_", "CENTERED")]] <- ipd[[paste0(ipd_param, "_MEDIAN_", "CENTERED")]] - 0.5 |
171 | 9x |
} else if (grepl("_SD$", names(use_agd)[j])) { |
172 | 9x |
ipd[[paste0(ipd_param, "_SQUARED_", "CENTERED")]] <- ipd[[ipd_param]]^2 |
173 | 9x |
tmp_aim <- use_agd[[j]]^2 + (use_agd[[paste0(ipd_param, "_MEAN")]]^2) |
174 | 9x |
ipd[[paste0(ipd_param, "_SQUARED_", "CENTERED")]] <- ipd[[paste0(ipd_param, "_SQUARED_", "CENTERED")]] - tmp_aim |
175 |
} |
|
176 |
} # end of j |
|
177 |
} # end of i |
|
178 | ||
179 |
# output |
|
180 | 9x |
ipd |
181 |
} |
|
182 | ||
183 | ||
184 |
#' Calculate pooled arm statistics in Aggregated Data (AgD) based on arm-specific statistics |
|
185 |
#' |
|
186 |
#' This is a convenient function to pool arm statistics. This function is called |
|
187 |
#' within process_agd and when the ARM is not equal to "Total". Note pooled |
|
188 |
#' median can't be calculated and it is only an approximation. |
|
189 |
#' |
|
190 |
#' @param use_agd aggregated data that is processed within process_agd |
|
191 |
#' @noRd |
|
192 |
#' @return Complete N, count, mean, sd, and median for the pooled arm |
|
193 | ||
194 |
complete_agd <- function(use_agd) { |
|
195 | 1x |
use_agd <- as.data.frame(use_agd) |
196 | 1x |
use_agd <- with(use_agd, { |
197 | 1x |
use_agd[tolower(ARM) != "total", , drop = FALSE] |
198 |
}) |
|
199 | ||
200 | ! |
if (nrow(use_agd) < 2) stop("error in call complete_agd: need to have at least 2 rows that ARM!='total' ") |
201 | ||
202 | 1x |
rowId <- nrow(use_agd) + 1 |
203 | 1x |
use_agd[rowId, ] <- NA |
204 | 1x |
use_agd$STUDY[rowId] <- use_agd$STUDY[1] |
205 | 1x |
use_agd$ARM[rowId] <- "total" |
206 | ||
207 |
# complete N and count |
|
208 | 1x |
NN <- use_agd[["N"]][rowId] <- sum(use_agd[["N"]], na.rm = TRUE) |
209 | 1x |
nn <- use_agd[["N"]][-rowId] |
210 | 1x |
for (i in grep("_COUNT$", names(use_agd), value = TRUE)) { |
211 | 1x |
use_agd[[i]][rowId] <- sum(use_agd[[i]][-rowId], na.rm = TRUE) |
212 |
} |
|
213 | ||
214 |
# complete MEAN |
|
215 | 1x |
for (i in grep("_MEAN$", names(use_agd), value = TRUE)) { |
216 | 1x |
use_agd[[i]][rowId] <- sum(use_agd[[i]][-rowId] * nn) / NN |
217 |
} |
|
218 | ||
219 |
# complete SD |
|
220 | 1x |
for (i in grep("_SD$", names(use_agd), value = TRUE)) { |
221 | 1x |
use_agd[[i]][rowId] <- sqrt(sum(use_agd[[i]][-rowId]^2 * (nn - 1)) / (NN - 1)) |
222 |
} |
|
223 | ||
224 |
# complete MEDIAN, approximately!! |
|
225 | 1x |
for (i in grep("_MEDIAN$", names(use_agd), value = TRUE)) { |
226 | 1x |
use_agd[[i]][rowId] <- mean(use_agd[[i]][-rowId]) |
227 |
} |
|
228 | ||
229 |
# output |
|
230 | 1x |
rownames(use_agd) <- NULL |
231 | 1x |
use_agd |
232 |
} |
|
233 | ||
234 | ||
235 |
#' helper function: transform TTE ADaM data to suitable input for survival R package |
|
236 |
#' |
|
237 |
#' @param dd data frame, ADTTE read via `haven::read_sas` |
|
238 |
#' @param time_scale a character string, 'years', 'months', 'weeks' or 'days', time unit of median survival time |
|
239 |
#' @param trt values to include in treatment column |
|
240 |
#' |
|
241 |
#' @return a data frame that can be used as input to `survival::Surv` |
|
242 |
#' @keywords internal |
|
243 | ||
244 |
ext_tte_transfer <- function(dd, time_scale = "months", trt = NULL) { |
|
245 | ! |
time_scale <- match.arg(time_scale, choices = c("years", "months", "weeks", "days")) |
246 | ! |
time_units <- get_time_conversion(time_scale) |
247 | ||
248 | ! |
if ("CENSOR" %in% names(dd)) { |
249 | ! |
dd <- dd[!is.na(dd$CENSOR), ] |
250 | ! |
dd$status <- as.numeric(dd$CENSOR == 0) |
251 |
} |
|
252 | ! |
if ("EVENT" %in% names(dd)) { |
253 | ! |
dd$status <- as.numeric(as.character(dd$EVENT)) |
254 |
} |
|
255 | ! |
if ("TIME" %in% names(dd)) { |
256 | ! |
dd$AVAL <- as.numeric(as.character(dd$TIME)) |
257 |
} |
|
258 | ||
259 | ! |
dd$time <- dd$AVAL * time_units |
260 | ! |
if (!is.null(trt)) dd$treatment <- trt |
261 | ! |
as.data.frame(dd) |
262 |
} |
1 |
# Create an environment for settings |
|
2 |
settings_env <- new.env() |
|
3 | ||
4 |
#' Get and Set Time Conversion Factors |
|
5 |
#' |
|
6 |
#' @param default The default time scale, commonly whichever has factor = 1 |
|
7 |
#' @param days Factor to divide data time units to get time in days |
|
8 |
#' @param weeks Factor to divide data time units to get time in weeks |
|
9 |
#' @param months Factor to divide data time units to get time in months |
|
10 |
#' @param years Factor to divide data time units to get time in years |
|
11 |
#' |
|
12 |
#' @return No value returned. Conversion factors are stored internally and used within functions. |
|
13 |
#' @export |
|
14 |
#' @rdname time_conversion |
|
15 |
#' |
|
16 |
#' @examples |
|
17 |
#' # The default time scale is days: |
|
18 |
#' set_time_conversion(default = "days", days = 1, weeks = 7, months = 365.25 / 12, years = 365.25) |
|
19 |
#' |
|
20 |
#' # Set the default time scale to years |
|
21 |
#' set_time_conversion( |
|
22 |
#' default = "years", |
|
23 |
#' days = 1 / 365.25, |
|
24 |
#' weeks = 1 / 52.17857, |
|
25 |
#' months = 1 / 12, |
|
26 |
#' years = 1 |
|
27 |
#' ) |
|
28 |
#' |
|
29 |
set_time_conversion <- function(default = "days", days = 1, weeks = 7, months = 365.25 / 12, years = 365.25) { |
|
30 | 3x |
if (!default %in% c("days", "weeks", "months", "years")) { |
31 | ! |
stop("default must be one of \"days\", \"weeks\", \"months\", \"years\")") |
32 |
} |
|
33 | 3x |
factors <- c(days = days, weeks = weeks, months = months, years = years) |
34 | 3x |
check_factors <- vapply(factors, function(x) isFALSE(!is.finite(x) || x == 0), logical(1L)) |
35 | 3x |
if (!all(check_factors)) { |
36 | 1x |
stop( |
37 | 1x |
"Conversion factors must be finite non-zero numerical values: ", |
38 | 1x |
paste0(names(factors)[!check_factors], " = ", factors[!check_factors], collapse = ", ") |
39 |
) |
|
40 |
} |
|
41 | 2x |
settings_env$time_conversion <- factors |
42 | 2x |
settings_env$default_time_scale <- default |
43 |
} |
|
44 | ||
45 | ||
46 |
#' @param factor Time factor to get. |
|
47 |
#' @rdname time_conversion |
|
48 |
#' @export |
|
49 |
#' |
|
50 |
#' @examples |
|
51 |
#' # Get time scale factors: |
|
52 |
#' get_time_conversion("years") |
|
53 |
#' get_time_conversion("weeks") |
|
54 |
get_time_conversion <- function(factor = c("days", "weeks", "months", "years")) { |
|
55 | 238x |
factor <- match.arg(factor, several.ok = TRUE) |
56 | 238x |
if (!exists("time_conversion", settings_env)) { |
57 | ! |
warning("No time conversion factors previously set. Setting defaults.") |
58 | ! |
set_time_conversion() |
59 |
} |
|
60 | 238x |
settings_env$time_conversion[factor] |
61 |
} |
|
62 | ||
63 | ||
64 |
#' Convert Time Values Using Scaling Factors |
|
65 |
#' |
|
66 |
#' @param times Numeric time values |
|
67 |
#' @param as A time scale to convert to. One of "days", "weeks", "months", "years" |
|
68 |
#' |
|
69 |
#' @return Returns a numeric vector calculated from `times / get_time_conversion(factor = as)` |
|
70 |
#' @export |
|
71 |
#' @examples |
|
72 |
#' get_time_as(50, as = "months") |
|
73 |
get_time_as <- function(times, as = NULL) { |
|
74 | 42x |
if (is.null(as)) as <- settings_env$default_time_scale |
75 | 1x |
if (!is.numeric(times)) stop("times arguments must be numeric") |
76 | 236x |
as <- match.arg(as, c("days", "weeks", "months", "years")) |
77 | 236x |
times / get_time_conversion(as) |
78 |
} |
1 |
#' Anchored MAIC for binary and time-to-event endpoint |
|
2 |
#' |
|
3 |
#' This is a wrapper function to provide adjusted effect estimates and relevant statistics in anchored case (i.e. there |
|
4 |
#' is a common comparator arm in the internal and external trial). |
|
5 |
#' |
|
6 |
#' @param weights_object an object returned by \code{estimate_weight} |
|
7 |
#' @param ipd a data frame that meet format requirements in 'Details', individual patient data (IPD) of internal trial |
|
8 |
#' @param pseudo_ipd a data frame, pseudo IPD from digitized KM curve of external trial (for time-to-event endpoint) or |
|
9 |
#' from contingency table (for binary endpoint) |
|
10 |
#' @param trt_ipd a string, name of the interested investigation arm in internal trial \code{ipd} (internal IPD) |
|
11 |
#' @param trt_agd a string, name of the interested investigation arm in external trial \code{pseudo_ipd} (pseudo IPD) |
|
12 |
#' @param trt_common a string, name of the common comparator in internal and external trial |
|
13 |
#' @param trt_var_ipd a string, column name in \code{ipd} that contains the treatment assignment |
|
14 |
#' @param trt_var_agd a string, column name in \code{ipd} that contains the treatment assignment |
|
15 |
#' @param normalize_weights logical, default is \code{FALSE}. If \code{TRUE}, |
|
16 |
#' \code{scaled_weights} (normalized weights) in \code{weights_object$data} will be used. |
|
17 |
#' @param endpoint_type a string, one out of the following "binary", "tte" (time to event) |
|
18 |
#' @param endpoint_name a string, name of time to event endpoint, to be show in the last line of title |
|
19 |
#' @param time_scale a string, time unit of median survival time, taking a value of 'years', 'months', 'weeks' or |
|
20 |
#' 'days'. NOTE: it is assumed that values in TIME column of \code{ipd} and \code{pseudo_ipd} is in the unit of days |
|
21 |
#' @param km_conf_type a string, pass to \code{conf.type} of \code{survfit} |
|
22 |
#' @param eff_measure a string, "RD" (risk difference), "OR" (odds ratio), "RR" (relative risk) for a binary endpoint; |
|
23 |
#' "HR" for a time-to-event endpoint. By default is \code{NULL}, "OR" is used for binary case, otherwise "HR" is used. |
|
24 |
#' @param boot_ci_type a string, one of `c("norm","basic", "stud", "perc", "bca")` to select the type of bootstrap |
|
25 |
#' confidence interval. See [boot::boot.ci] for more details. |
|
26 |
#' @param binary_robust_cov_type a string to pass to argument `type` of [sandwich::vcovHC], see possible options in the |
|
27 |
#' documentation of that function. Default is `"HC3"` |
|
28 |
#' |
|
29 |
#' @details It is required that input \code{ipd} and \code{pseudo_ipd} to have the following |
|
30 |
#' columns. This function is not sensitive to upper or lower case of letters in column names. |
|
31 |
#' \itemize{ |
|
32 |
#' \item USUBJID - character, unique subject ID |
|
33 |
#' \item ARM - character or factor, treatment indicator, column name does not have to be 'ARM'. User specify in |
|
34 |
#' \code{trt_var_ipd} and \code{trt_var_agd} |
|
35 |
#' } |
|
36 |
#' For time-to-event analysis, the follow columns are required: |
|
37 |
#' \itemize{ |
|
38 |
#' \item EVENT - numeric, `1` for censored/death, `0` otherwise |
|
39 |
#' \item TIME - numeric column, observation time of the \code{EVENT}; unit in days |
|
40 |
#' } |
|
41 |
#' For binary outcomes: |
|
42 |
#' \itemize{ |
|
43 |
#' \item RESPONSE - numeric, `1` for event occurred, `0` otherwise |
|
44 |
#' } |
|
45 |
#' |
|
46 |
#' @importFrom survival survfit Surv coxph |
|
47 |
#' @importFrom lmtest coeftest coefci |
|
48 |
#' @importFrom sandwich vcovHC |
|
49 |
#' @importFrom boot boot boot.ci |
|
50 |
#' @return A list, contains 'descriptive' and 'inferential' |
|
51 |
#' @example inst/examples/maic_anchored_ex.R |
|
52 |
#' @example inst/examples/maic_anchored_binary_ex.R |
|
53 |
#' @export |
|
54 | ||
55 |
maic_anchored <- function(weights_object, |
|
56 |
ipd, |
|
57 |
pseudo_ipd, |
|
58 |
trt_ipd, |
|
59 |
trt_agd, |
|
60 |
trt_common, |
|
61 |
trt_var_ipd = "ARM", |
|
62 |
trt_var_agd = "ARM", |
|
63 |
normalize_weights = FALSE, |
|
64 |
endpoint_type = "tte", |
|
65 |
endpoint_name = "Time to Event Endpoint", |
|
66 |
eff_measure = c("HR", "OR", "RR", "RD"), |
|
67 |
boot_ci_type = c("norm", "basic", "stud", "perc", "bca"), |
|
68 |
# time to event specific args |
|
69 |
time_scale = "months", |
|
70 |
km_conf_type = "log-log", |
|
71 |
# binary specific args |
|
72 |
binary_robust_cov_type = "HC3") { |
|
73 |
# ==> Initial Setup ------------------------------------------ |
|
74 |
# ~~~ Create the hull for the output from this function |
|
75 | 4x |
res <- list( |
76 | 4x |
descriptive = list(), |
77 | 4x |
inferential = list() |
78 |
) |
|
79 | ||
80 | 4x |
res_AB <- list( |
81 | 4x |
est = NA, |
82 | 4x |
se = NA, |
83 | 4x |
ci_l = NA, |
84 | 4x |
ci_u = NA, |
85 | 4x |
pval = NA |
86 |
) |
|
87 | ||
88 |
# ~~~ Initial colname process and precheck on effect measure |
|
89 | 4x |
names(ipd) <- toupper(names(ipd)) |
90 | 4x |
names(pseudo_ipd) <- toupper(names(pseudo_ipd)) |
91 | 4x |
trt_var_ipd <- toupper(trt_var_ipd) |
92 | 4x |
trt_var_agd <- toupper(trt_var_agd) |
93 | ! |
if (length(eff_measure) > 1) eff_measure <- NULL |
94 | ! |
if (is.null(eff_measure)) eff_measure <- list(binary = "OR", tte = "HR")[[endpoint_type]] |
95 | ||
96 |
# ~~~ Setup ARM column and make related pre-checks |
|
97 | ! |
if (!trt_var_ipd %in% names(ipd)) stop("cannot find arm indicator column trt_var_ipd in ipd") |
98 | ! |
if (!trt_var_agd %in% names(pseudo_ipd)) stop("cannot find arm indicator column trt_var_agd in pseudo_ipd") |
99 | ! |
if (trt_var_ipd != "ARM") ipd$ARM <- ipd[[trt_var_ipd]] |
100 | ! |
if (trt_var_agd != "ARM") pseudo_ipd$ARM <- pseudo_ipd[[trt_var_agd]] |
101 | 4x |
ipd$ARM <- as.character(ipd$ARM) # just to avoid potential error when merging |
102 | ||
103 |
# ~~~ More pre-checks |
|
104 | 4x |
pseudo_ipd$ARM <- as.character(pseudo_ipd$ARM) # just to avoid potential error when merging |
105 | ! |
if (!trt_ipd %in% ipd$ARM) stop("trt_ipd does not exist in ipd$ARM") |
106 | ! |
if (!trt_agd %in% pseudo_ipd$ARM) stop("trt_agd does not exist in pseudo_ipd$ARM") |
107 | ! |
if (!trt_common %in% ipd$ARM) stop("trt_common does not exist in ipd$ARM") |
108 | ! |
if (!trt_common %in% pseudo_ipd$ARM) stop("trt_common does not exist in pseudo_ipd$ARM") |
109 | 4x |
ipd_arms <- unique(ipd$ARM) |
110 | 4x |
pseudo_ipd_arms <- unique(pseudo_ipd$ARM) |
111 | 4x |
if (!length(ipd_arms) >= 2) { |
112 | ! |
stop("In anchored case, there should be at least two arms in ipd, but you have: ", toString(ipd_arms)) |
113 |
} |
|
114 | 4x |
if (!length(pseudo_ipd_arms) >= 2) { |
115 | ! |
stop("In anchored case, there should be at least two arms in pseudo_ipd, but you have: ", toString(pseudo_ipd_arms)) |
116 |
} |
|
117 | 4x |
endpoint_type <- match.arg(endpoint_type, c("binary", "tte")) |
118 | 4x |
if (!"maicplus_estimate_weights" %in% class(weights_object)) { |
119 | ! |
stop("weights_object should be an object returned by estimate_weights") |
120 |
} |
|
121 | 4x |
if (any(duplicated(ipd$USUBJID))) { |
122 | ! |
warning( |
123 | ! |
"check your ipd, it has duplicated usubjid, this indicates, ", |
124 | ! |
"it might contain multiple endpoints for each subject" |
125 |
) |
|
126 |
} |
|
127 | 4x |
if (!all(ipd$USUBJID %in% weights_object$data$USUBJID)) { |
128 | ! |
stop( |
129 | ! |
"These patients in ipd cannot be found in weights_object ", |
130 | ! |
toString(setdiff(ipd$USUBJID, weights_object$USUBJID)) |
131 |
) |
|
132 |
} |
|
133 | 4x |
time_scale <- match.arg(arg = time_scale, choices = c("days", "weeks", "months", "years")) |
134 | 4x |
if (endpoint_type == "binary") { # for binary effect measure |
135 | ||
136 | ! |
if (any(!c("USUBJID", "RESPONSE") %in% names(ipd))) stop("ipd should have 'USUBJID', 'RESPONSE' columns at minimum") |
137 | 2x |
eff_measure <- match.arg(eff_measure, choices = c("OR", "RD", "RR"), several.ok = FALSE) |
138 | 2x |
} else if (endpoint_type == "tte") { # for time to event effect measure |
139 | ||
140 | 2x |
if (!all(c("USUBJID", "TIME", "EVENT", trt_var_ipd) %in% names(ipd))) { |
141 | ! |
stop("ipd needs to include at least USUBJID, TIME, EVENT, ", toString(trt_var_ipd)) |
142 |
} |
|
143 | 2x |
if (!all(c("TIME", "EVENT", trt_var_agd) %in% names(pseudo_ipd))) { |
144 | ! |
stop("pseudo_ipd needs to include at least TIME, EVENT, ", toString(trt_var_agd)) |
145 |
} |
|
146 | 2x |
eff_measure <- match.arg(eff_measure, choices = c("HR"), several.ok = FALSE) |
147 |
} |
|
148 | 4x |
boot_ci_type <- match.arg(boot_ci_type) |
149 | ||
150 |
# ==> IPD and AgD data preparation ------------------------------------------ |
|
151 |
# : subset ipd, retain only ipd from interested trts |
|
152 | 4x |
ipd <- ipd[ipd$ARM %in% c(trt_ipd, trt_common), , drop = TRUE] |
153 | 4x |
pseudo_ipd <- pseudo_ipd[pseudo_ipd$ARM %in% c(trt_agd, trt_common), , drop = TRUE] |
154 | ||
155 |
# : assign weights to real and pseudo ipd |
|
156 | 4x |
if (normalize_weights) { |
157 | ! |
ipd$weights <- weights_object$data$scaled_weights[match(weights_object$data$USUBJID, ipd$USUBJID)] |
158 |
} else { |
|
159 | 4x |
ipd$weights <- weights_object$data$weights[match(weights_object$data$USUBJID, ipd$USUBJID)] |
160 |
} |
|
161 | 4x |
pseudo_ipd$weights <- 1 |
162 | 2x |
if (!"USUBJID" %in% names(pseudo_ipd)) pseudo_ipd$USUBJID <- paste0("ID", seq_len(nrow(pseudo_ipd))) |
163 | ||
164 |
# : give warning when individual pts in IPD has no weights |
|
165 | 4x |
if (any(is.na(ipd$weights))) { |
166 | ! |
ipd <- ipd[!is.na(ipd$weights), , drop = FALSE] |
167 | ! |
warning( |
168 | ! |
"These USUBJID in IPD have no weight in weights_object and hence excluded from analysis: ", |
169 | ! |
toString(ipd$USUBJID[is.na(ipd$weights)]) |
170 |
) |
|
171 | ! |
if (nrow(ipd) == 0) stop("There are no patients with valid weights in IPD!") |
172 |
} |
|
173 | ||
174 |
# : retain necessary columns |
|
175 | 4x |
outcome_cols <- if (endpoint_type == "tte") c("TIME", "EVENT") else "RESPONSE" |
176 | 4x |
retain_cols <- c("USUBJID", "ARM", outcome_cols, "weights") |
177 | ||
178 | 4x |
ipd <- ipd[, retain_cols, drop = FALSE] |
179 | 4x |
pseudo_ipd <- pseudo_ipd[, retain_cols, drop = FALSE] |
180 | ||
181 |
# : merge real and pseudo ipds, only used if apply contrast method, |
|
182 |
# since contrast method is not implemented in v0.1, this R obj is not used |
|
183 |
# just a placeholder |
|
184 | 4x |
dat <- rbind(ipd, pseudo_ipd) |
185 | ||
186 |
# : setup ARM column as a factor, |
|
187 |
# * these line cannot be move prior to "dat <- rbind(ipd, pseudo_ipd)" |
|
188 | 4x |
ipd$ARM <- factor(ipd$ARM, levels = c(trt_common, trt_ipd)) |
189 | 4x |
pseudo_ipd$ARM <- factor(pseudo_ipd$ARM, levels = c(trt_common, trt_agd)) |
190 | 4x |
dat$ARM <- factor(dat$ARM, levels = c(trt_common, trt_agd, trt_ipd)) |
191 | ||
192 |
# ==> Inferential output ------------------------------------------ |
|
193 | 4x |
result <- if (endpoint_type == "tte") { |
194 | 2x |
maic_anchored_tte( |
195 | 2x |
res, |
196 | 2x |
res_BC = NULL, |
197 | 2x |
dat, |
198 | 2x |
ipd, |
199 | 2x |
pseudo_ipd, |
200 | 2x |
km_conf_type, |
201 | 2x |
time_scale, |
202 | 2x |
weights_object, |
203 | 2x |
endpoint_name, |
204 | 2x |
normalize_weights, |
205 | 2x |
trt_ipd, |
206 | 2x |
trt_agd, |
207 | 2x |
boot_ci_type |
208 |
) |
|
209 | 4x |
} else if (endpoint_type == "binary") { |
210 | 2x |
maic_anchored_binary( |
211 | 2x |
res, |
212 | 2x |
res_BC = NULL, |
213 | 2x |
dat, |
214 | 2x |
ipd, |
215 | 2x |
pseudo_ipd, |
216 | 2x |
binary_robust_cov_type, |
217 | 2x |
weights_object, |
218 | 2x |
endpoint_name, |
219 | 2x |
normalize_weights, |
220 | 2x |
eff_measure, |
221 | 2x |
trt_ipd, |
222 | 2x |
trt_agd, |
223 | 2x |
boot_ci_type |
224 |
) |
|
225 |
} else { |
|
226 | ! |
stop("Endpoint type ", endpoint_type, " currently unsupported.") |
227 |
} |
|
228 | ||
229 | 4x |
result |
230 |
} |
|
231 | ||
232 | ||
233 |
# MAIC inference functions for TTE outcome type ------------ |
|
234 |
maic_anchored_tte <- function(res, |
|
235 |
res_BC = NULL, |
|
236 |
dat, |
|
237 |
ipd, |
|
238 |
pseudo_ipd, |
|
239 |
km_conf_type, |
|
240 |
time_scale, |
|
241 |
weights_object, |
|
242 |
endpoint_name, |
|
243 |
normalize_weights, |
|
244 |
trt_ipd, |
|
245 |
trt_agd, |
|
246 |
boot_ci_type) { |
|
247 |
# Analysis table (Cox model) before and after matching, incl Median Survival Time |
|
248 |
# derive km w and w/o weights |
|
249 | 2x |
kmobj_ipd <- survfit(Surv(TIME, EVENT) ~ ARM, ipd, conf.type = km_conf_type) |
250 | 2x |
kmobj_ipd_adj <- survfit(Surv(TIME, EVENT) ~ ARM, ipd, weights = ipd$weights, conf.type = km_conf_type) |
251 | 2x |
kmobj_agd <- survfit(Surv(TIME, EVENT) ~ ARM, pseudo_ipd, conf.type = km_conf_type) |
252 | ||
253 | 2x |
res$descriptive[["survfit_ipd_before"]] <- survfit_makeup(kmobj_ipd) |
254 | 2x |
res$descriptive[["survfit_ipd_after"]] <- survfit_makeup(kmobj_ipd_adj) |
255 | 2x |
res$descriptive[["survfit_pseudo"]] <- survfit_makeup(kmobj_agd) |
256 |
# derive median survival time |
|
257 | 2x |
medSurv_ipd <- medSurv_makeup(kmobj_ipd, legend = "IPD, before matching", time_scale = time_scale) |
258 | 2x |
medSurv_ipd_adj <- medSurv_makeup(kmobj_ipd_adj, legend = "IPD, after matching", time_scale = time_scale) |
259 | 2x |
medSurv_agd <- medSurv_makeup(kmobj_agd, legend = "AgD, external", time_scale = time_scale) |
260 | 2x |
medSurv_out <- rbind(medSurv_ipd, medSurv_ipd_adj, medSurv_agd) |
261 | 2x |
medSurv_out <- cbind(medSurv_out[, 1:6], |
262 | 2x |
`events%` = medSurv_out$events * 100 / medSurv_out$n.max, |
263 | 2x |
medSurv_out[7:ncol(medSurv_out)] |
264 |
) |
|
265 | 2x |
medSurv_out <- cbind(trt_ind = c("C", "B", "A")[match(medSurv_out$treatment, levels(dat$ARM))], medSurv_out) |
266 | ||
267 | 2x |
res$descriptive[["summary"]] <- medSurv_out |
268 | ||
269 |
# ~~~ Analysis table (Cox model) before and after matching |
|
270 |
# fit PH Cox regression model |
|
271 | 2x |
coxobj_ipd <- coxph(Surv(TIME, EVENT) ~ ARM, ipd) # robust = TRUE or not makes a diff |
272 | 2x |
coxobj_ipd_adj <- coxph(Surv(TIME, EVENT) ~ ARM, ipd, weights = weights, robust = TRUE) |
273 | 2x |
coxobj_agd <- coxph(Surv(TIME, EVENT) ~ ARM, pseudo_ipd) |
274 | ||
275 |
# derive ipd exp arm vs agd exp arm via bucher |
|
276 | 2x |
res_AC_unadj <- as.list(summary(coxobj_ipd)$coef)[c(1, 3)] # est, se |
277 | 2x |
res_AC <- as.list(summary(coxobj_ipd_adj)$coef)[c(1, 4)] # est, robust se |
278 | 2x |
if (is.null(res_BC)) res_BC <- as.list(summary(coxobj_agd)$coef)[c(1, 3)] # est, se |
279 | ||
280 | 2x |
names(res_AC_unadj) <- names(res_AC) <- names(res_BC) <- c("est", "se") |
281 | ||
282 | 2x |
coxobj_ipd_summary <- summary(coxobj_ipd) |
283 | 2x |
res_AC_unadj$ci_l <- coxobj_ipd_summary$conf.int[3] |
284 | 2x |
res_AC_unadj$ci_u <- coxobj_ipd_summary$conf.int[4] |
285 | 2x |
res_AC_unadj$pval <- as.vector(coxobj_ipd_summary$waldtest[3]) |
286 | ||
287 | 2x |
coxobj_ipd_adj_summary <- summary(coxobj_ipd_adj) |
288 | 2x |
res_AC$ci_l <- coxobj_ipd_adj_summary$conf.int[3] |
289 | 2x |
res_AC$ci_u <- coxobj_ipd_adj_summary$conf.int[4] |
290 | 2x |
res_AC$pval <- as.vector(coxobj_ipd_adj_summary$waldtest[3]) |
291 | ||
292 | 2x |
coxobj_agd_summary <- summary(coxobj_agd) |
293 | 2x |
res_BC$ci_l <- coxobj_agd_summary$conf.int[3] |
294 | 2x |
res_BC$ci_u <- coxobj_agd_summary$conf.int[4] |
295 | 2x |
res_BC$pval <- as.vector(coxobj_agd_summary$waldtest[3]) |
296 | ||
297 | 2x |
res_AB <- bucher(res_AC, res_BC, conf_lv = 0.95) |
298 | 2x |
res_AB_unadj <- bucher(res_AC_unadj, res_BC, conf_lv = 0.95) |
299 | ||
300 |
# : get bootstrapped estimates if applicable |
|
301 | 2x |
if (!is.null(weights_object$boot)) { |
302 | 1x |
keep_rows <- setdiff(seq_len(nrow(weights_object$data)), weights_object$rows_with_missing) |
303 | 1x |
boot_ipd_id <- weights_object$data[keep_rows, "USUBJID", drop = FALSE] |
304 | ||
305 | 1x |
boot_ipd <- merge(boot_ipd_id, ipd, by = "USUBJID", all.x = TRUE) |
306 | ! |
if (nrow(boot_ipd) != nrow(boot_ipd_id)) stop("ipd has multiple observations for some patients") |
307 | 1x |
boot_ipd <- boot_ipd[match(boot_ipd$USUBJID, boot_ipd_id$USUBJID), ] |
308 | ||
309 | 1x |
stat_fun <- function(data, index, w_obj, normalize) { |
310 | 6x |
r <- dynGet("r", ifnotfound = NA) # Get bootstrap iteration |
311 | 6x |
if (!is.na(r)) { |
312 | ! |
if (!all(index == w_obj$boot[, 1, r])) stop("Bootstrap and weight indices don't match") |
313 | 5x |
boot_ipd <- data[w_obj$boot[, 1, r], ] |
314 | 5x |
boot_ipd$weights <- w_obj$boot[, 2, r] |
315 | ||
316 | 5x |
if (normalize) { |
317 | ! |
boot_ipd$weights <- ave( |
318 | ! |
boot_ipd$weights, |
319 | ! |
boot_ipd$ARM, |
320 | ! |
FUN = function(w) w / mean(w, na.rm = TRUE) |
321 |
) |
|
322 |
} |
|
323 |
} |
|
324 | 6x |
boot_coxobj_dat_adj <- coxph(Surv(TIME, EVENT) ~ ARM, boot_ipd, weights = boot_ipd$weights, robust = TRUE) |
325 | 6x |
boot_res_AC <- list(est = coef(boot_coxobj_dat_adj)[1], se = sqrt(vcov(boot_coxobj_dat_adj)[1, 1])) |
326 |
# temp method to source in variance of BC in AgD via monte carlo, may be removed in future |
|
327 | 6x |
res_BC_mc <- res_BC |
328 | 6x |
res_BC_mc$est <- rnorm(1, mean = res_BC$est, sd = res_BC$se) |
329 | 6x |
boot_res_AB <- bucher(boot_res_AC, res_BC_mc, conf_lv = 0.95) |
330 | 6x |
c( |
331 | 6x |
est_AB = boot_res_AB$est, |
332 | 6x |
var_AB = boot_res_AB$se^2, |
333 | 6x |
se_AB = boot_res_AB$se, |
334 | 6x |
est_AC = boot_res_AC$est, |
335 | 6x |
se_AC = boot_res_AC$se, |
336 | 6x |
var_AC = boot_res_AC$se^2 |
337 |
) |
|
338 |
} |
|
339 | ||
340 |
# Revert seed to how it was for weight bootstrap sampling |
|
341 | 1x |
old_seed <- globalenv()$.Random.seed |
342 | 1x |
on.exit(suspendInterrupts(set_random_seed(old_seed))) |
343 | 1x |
set_random_seed(weights_object$boot_seed) |
344 | ||
345 | 1x |
R <- dim(weights_object$boot)[3] |
346 | 1x |
boot_res <- boot( |
347 | 1x |
boot_ipd, |
348 | 1x |
stat_fun, |
349 | 1x |
R = R, |
350 | 1x |
w_obj = weights_object, |
351 | 1x |
normalize = normalize_weights, |
352 | 1x |
strata = weights_object$boot_strata |
353 |
) |
|
354 | 1x |
boot_ci <- boot.ci(boot_res, type = boot_ci_type, w_obj = weights_object, normalize = normalize_weights) |
355 | ||
356 | 1x |
l_u_index <- switch(boot_ci_type, |
357 | 1x |
"norm" = list(2, 3, "normal"), |
358 | 1x |
"basic" = list(4, 5, "basic"), |
359 | 1x |
"stud" = list(4, 5, "student"), |
360 | 1x |
"perc" = list(4, 5, "percent"), |
361 | 1x |
"bca" = list(4, 5, "bca") |
362 |
) |
|
363 | ||
364 |
# boot results for A v B, method 1 (maybe retired in future version) |
|
365 | 1x |
boot_res_AB <- list( |
366 | 1x |
est = as.vector(exp(boot_res$t0[1])), |
367 | 1x |
se = NA, |
368 | 1x |
ci_l = exp(boot_ci[[l_u_index[[3]]]][l_u_index[[1]]]), |
369 | 1x |
ci_u = exp(boot_ci[[l_u_index[[3]]]][l_u_index[[2]]]), |
370 | 1x |
pval = NA |
371 |
) |
|
372 | ||
373 |
# boot results for A v C |
|
374 | 1x |
boot_ci_ac <- boot.ci(boot_res, type = boot_ci_type, w_obj = weights_object, index = c(4, 6)) |
375 | 1x |
boot_res_AC <- list( |
376 | 1x |
est = as.vector(exp(boot_res$t0[4])), |
377 | 1x |
se = NA, |
378 | 1x |
ci_l = exp(boot_ci_ac[[l_u_index[[3]]]][l_u_index[[1]]]), |
379 | 1x |
ci_u = exp(boot_ci_ac[[l_u_index[[3]]]][l_u_index[[2]]]), |
380 | 1x |
pval = NA |
381 |
) |
|
382 | ||
383 |
# boot results for A v B, method 2 |
|
384 | 1x |
boot_res_AC2 <- list( |
385 | 1x |
est = as.vector(boot_res$t0[4]), |
386 | 1x |
se = NA, |
387 | 1x |
ci_l = boot_ci_ac[[l_u_index[[3]]]][l_u_index[[1]]], |
388 | 1x |
ci_u = boot_ci_ac[[l_u_index[[3]]]][l_u_index[[2]]], |
389 | 1x |
pval = NA |
390 |
) |
|
391 | 1x |
boot_res_AC2$se <- find_SE_from_CI(boot_res_AC2$ci_l, boot_res_AC2$ci_u, 0.95, log = FALSE) |
392 | 1x |
boot_res_AB2 <- bucher(boot_res_AC2, res_BC, conf_lv = 0.95) |
393 | 1x |
boot_res_AB2 <- list( |
394 | 1x |
est = exp(boot_res_AB2$est), |
395 | 1x |
se = NA, |
396 | 1x |
ci_l = exp(boot_res_AB2$ci_l), |
397 | 1x |
ci_u = exp(boot_res_AB2$ci_u), |
398 | 1x |
pval = NA |
399 |
) |
|
400 |
} else { |
|
401 | 1x |
boot_res <- NULL |
402 | 1x |
boot_res_AB <- NULL |
403 | 1x |
boot_res_AB2 <- NULL |
404 | 1x |
boot_res_AC <- NULL |
405 |
} |
|
406 | ||
407 |
# transform |
|
408 | 2x |
res_AB$est <- exp(res_AB$est) |
409 | 2x |
res_AB$ci_l <- exp(res_AB$ci_l) |
410 | 2x |
res_AB$ci_u <- exp(res_AB$ci_u) |
411 | 2x |
res_AB_unadj$est <- exp(res_AB_unadj$est) |
412 | 2x |
res_AB_unadj$ci_l <- exp(res_AB_unadj$ci_l) |
413 | 2x |
res_AB_unadj$ci_u <- exp(res_AB_unadj$ci_u) |
414 | ||
415 | 2x |
res_AC$est <- exp(res_AC$est) |
416 | 2x |
res_AC_unadj$est <- exp(res_AC_unadj$est) |
417 | 2x |
res_BC$est <- exp(res_BC$est) |
418 | ||
419 |
# : report all raw fitted obj |
|
420 | 2x |
res$inferential[["fit"]] <- list( |
421 | 2x |
km_before_ipd = kmobj_ipd, |
422 | 2x |
km_after_ipd = kmobj_ipd_adj, |
423 | 2x |
km_agd = kmobj_agd, |
424 | 2x |
model_before_ipd = coxobj_ipd, |
425 | 2x |
model_after_ipd = coxobj_ipd_adj, |
426 | 2x |
model_agd = coxobj_agd, |
427 | 2x |
res_AC = res_AC, |
428 | 2x |
res_AC_unadj = res_AC_unadj, |
429 | 2x |
res_BC = res_BC, |
430 | 2x |
res_AB = res_AB, |
431 | 2x |
res_AB_unadj = res_AB_unadj, |
432 | 2x |
boot_res = boot_res, |
433 | 2x |
boot_res_AC = boot_res_AC, |
434 | 2x |
boot_res_AB_mc = boot_res_AB, |
435 | 2x |
boot_res_AB = boot_res_AB2 |
436 |
) |
|
437 | ||
438 |
# : compile HR result |
|
439 | 2x |
res$inferential[["summary"]] <- data.frame( |
440 | 2x |
case = c("AC", "adjusted_AC", "BC", "AB", "adjusted_AB"), |
441 | 2x |
HR = c( |
442 | 2x |
summary(coxobj_ipd)$conf.int[1], |
443 | 2x |
summary(coxobj_ipd_adj)$conf.int[1], |
444 | 2x |
summary(coxobj_agd)$conf.int[1], |
445 | 2x |
res_AB_unadj$est, res_AB$est |
446 |
), |
|
447 | 2x |
LCL = c( |
448 | 2x |
summary(coxobj_ipd)$conf.int[3], |
449 | 2x |
summary(coxobj_ipd_adj)$conf.int[3], |
450 | 2x |
summary(coxobj_agd)$conf.int[3], |
451 | 2x |
res_AB_unadj$ci_l, res_AB$ci_l |
452 |
), |
|
453 | 2x |
UCL = c( |
454 | 2x |
summary(coxobj_ipd)$conf.int[4], |
455 | 2x |
summary(coxobj_ipd_adj)$conf.int[4], |
456 | 2x |
summary(coxobj_agd)$conf.int[4], |
457 | 2x |
res_AB_unadj$ci_u, res_AB$ci_u |
458 |
), |
|
459 | 2x |
pval = c( |
460 | 2x |
summary(coxobj_ipd)$waldtest[3], |
461 | 2x |
summary(coxobj_ipd_adj)$waldtest[3], |
462 | 2x |
summary(coxobj_agd)$waldtest[3], |
463 | 2x |
res_AB_unadj$pval, res_AB$pval |
464 |
) |
|
465 |
) |
|
466 | ||
467 |
# output |
|
468 | 2x |
res |
469 |
} |
|
470 | ||
471 |
# MAIC inference functions for Binary outcome type ------------ |
|
472 |
maic_anchored_binary <- function(res, |
|
473 |
res_BC = NULL, |
|
474 |
dat, |
|
475 |
ipd, |
|
476 |
pseudo_ipd, |
|
477 |
binary_robust_cov_type, |
|
478 |
weights_object, |
|
479 |
endpoint_name, |
|
480 |
normalize_weights, |
|
481 |
eff_measure, |
|
482 |
trt_ipd, |
|
483 |
trt_agd, |
|
484 |
boot_ci_type) { |
|
485 |
# ~~~ Analysis table |
|
486 |
# : set up proper link |
|
487 | 2x |
glm_link <- switch(eff_measure, |
488 | 2x |
"RD" = poisson(link = "identity"), |
489 | 2x |
"RR" = poisson(link = "log"), |
490 | 2x |
"OR" = binomial(link = "logit") |
491 |
) |
|
492 | 2x |
res_template <- list( |
493 | 2x |
est = NA, |
494 | 2x |
se = NA, |
495 | 2x |
ci_l = NA, |
496 | 2x |
ci_u = NA, |
497 | 2x |
pval = NA |
498 |
) |
|
499 | ||
500 |
# : fit glm for binary outcome and robust estimate with weights |
|
501 | 2x |
binobj_ipd <- glm(RESPONSE ~ ARM, ipd, family = glm_link) |
502 | 2x |
binobj_ipd_adj <- suppressWarnings(glm(RESPONSE ~ ARM, ipd, weights = weights, family = glm_link)) |
503 | 2x |
binobj_agd <- glm(RESPONSE ~ ARM, pseudo_ipd, family = glm_link) |
504 | ||
505 | 2x |
bin_robust_cov <- sandwich::vcovHC(binobj_ipd_adj, type = binary_robust_cov_type) |
506 | 2x |
bin_robust_coef <- lmtest::coeftest(binobj_ipd_adj, vcov. = bin_robust_cov) |
507 | 2x |
bin_robust_ci <- lmtest::coefci(binobj_ipd_adj, vcov. = bin_robust_cov) |
508 | ||
509 |
# : make general summary |
|
510 | 2x |
glmDesc_ipd <- glm_makeup(binobj_ipd, legend = "IPD, before matching", weighted = FALSE) |
511 | 2x |
glmDesc_ipd_adj <- glm_makeup(binobj_ipd_adj, legend = "IPD, after matching", weighted = TRUE) |
512 | 2x |
glmDesc_agd <- glm_makeup(binobj_agd, legend = "AgD, external", weighted = FALSE) |
513 | 2x |
glmDesc <- rbind(glmDesc_ipd, glmDesc_ipd_adj, glmDesc_agd) |
514 | 2x |
glmDesc <- cbind(trt_ind = c("C", "B", "A")[match(glmDesc$treatment, levels(dat$ARM))], glmDesc) |
515 | 2x |
rownames(glmDesc) <- NULL |
516 | 2x |
res$descriptive[["summary"]] <- glmDesc |
517 | ||
518 |
# derive ipd exp arm vs agd exp arm via bucher |
|
519 | 2x |
res_AC <- res_template |
520 | 2x |
res_AC$est <- bin_robust_coef[2, "Estimate"] |
521 | 2x |
res_AC$se <- bin_robust_coef[2, "Std. Error"] |
522 | 2x |
res_AC$ci_l <- bin_robust_ci[2, "2.5 %"] |
523 | 2x |
res_AC$ci_u <- bin_robust_ci[2, "97.5 %"] |
524 | 2x |
res_AC$pval <- bin_robust_coef[2, "Pr(>|z|)"] |
525 | ||
526 |
# unadjusted AC |
|
527 | 2x |
res_AC_unadj <- res_template |
528 | 2x |
res_AC_unadj$est <- summary(binobj_ipd)$coefficients[2, "Estimate"] |
529 | 2x |
res_AC_unadj$se <- summary(binobj_ipd)$coefficients[2, "Std. Error"] |
530 | 2x |
res_AC_unadj$ci_l <- confint.default(binobj_ipd)[2, "2.5 %"] |
531 | 2x |
res_AC_unadj$ci_u <- confint.default(binobj_ipd)[2, "97.5 %"] |
532 | 2x |
res_AC_unadj$pval <- summary(binobj_ipd)$coefficients[2, "Pr(>|z|)"] |
533 | ||
534 |
# BC |
|
535 | 2x |
if (is.null(res_BC)) { |
536 | 2x |
res_BC <- res_template |
537 | 2x |
res_BC$est <- summary(binobj_agd)$coefficients[2, "Estimate"] |
538 | 2x |
res_BC$se <- summary(binobj_agd)$coefficients[2, "Std. Error"] |
539 | 2x |
res_BC$ci_l <- confint.default(binobj_agd)[2, "2.5 %"] |
540 | 2x |
res_BC$ci_u <- confint.default(binobj_agd)[2, "97.5 %"] |
541 | 2x |
res_BC$pval <- summary(binobj_agd)$coefficients[2, "Pr(>|z|)"] |
542 |
} |
|
543 | ||
544 |
# derive AB |
|
545 | 2x |
res_AB <- bucher(res_AC, res_BC, conf_lv = 0.95) |
546 | 2x |
res_AB_unadj <- bucher(res_AC_unadj, res_BC, conf_lv = 0.95) |
547 | ||
548 |
# : get bootstrapped estimates if applicable |
|
549 | 2x |
if (!is.null(weights_object$boot)) { |
550 | 1x |
keep_rows <- setdiff(seq_len(nrow(weights_object$data)), weights_object$rows_with_missing) |
551 | 1x |
boot_ipd_id <- weights_object$data[keep_rows, "USUBJID", drop = FALSE] |
552 | ||
553 | 1x |
boot_ipd <- merge(boot_ipd_id, ipd, by = "USUBJID", all.x = TRUE) |
554 | ! |
if (nrow(boot_ipd) != nrow(boot_ipd_id)) stop("ipd has multiple observations for some patients") |
555 | 1x |
boot_ipd <- boot_ipd[match(boot_ipd$USUBJID, boot_ipd_id$USUBJID), ] |
556 | ||
557 | 1x |
stat_fun <- function(data, index, w_obj, eff_measure, normalize) { |
558 | 21x |
r <- dynGet("r", ifnotfound = NA) # Get bootstrap iteration |
559 | 21x |
if (!is.na(r)) { |
560 | ! |
if (!all(index == w_obj$boot[, 1, r])) stop("Bootstrap and weight indices don't match") |
561 | 20x |
boot_ipd <- data[w_obj$boot[, 1, r], ] |
562 | 20x |
boot_ipd$weights <- w_obj$boot[, 2, r] |
563 | ||
564 | 20x |
if (normalize) { |
565 | ! |
boot_ipd$weights <- ave( |
566 | ! |
boot_ipd$weights, |
567 | ! |
boot_ipd$ARM, |
568 | ! |
FUN = function(w) w / mean(w, na.rm = TRUE) |
569 |
) |
|
570 |
} |
|
571 |
} |
|
572 | ||
573 | 21x |
boot_binobj_dat_adj <- suppressWarnings( |
574 | 21x |
glm(RESPONSE ~ ARM, boot_ipd, weights = boot_ipd$weights, family = glm_link) |
575 |
) |
|
576 | 21x |
boot_AC_est <- coef(boot_binobj_dat_adj)[2] |
577 | 21x |
boot_AC_var <- vcov(boot_binobj_dat_adj)[2, 2] |
578 | ||
579 | 21x |
boot_res_AC <- list(est = boot_AC_est, se = sqrt(boot_AC_var)) |
580 |
# temp method to source in variance of BC in AgD via monte carlo, may be removed in future |
|
581 | 21x |
res_BC_mc <- res_BC |
582 | 21x |
res_BC_mc$est <- rnorm(1, mean = res_BC$est, sd = res_BC$se) |
583 | ||
584 | 21x |
boot_res_AB <- bucher(boot_res_AC, res_BC_mc, conf_lv = 0.95) |
585 | ||
586 | 21x |
c( |
587 | 21x |
est_AB = boot_res_AB$est, |
588 | 21x |
var_AB = boot_res_AB$se^2, |
589 | 21x |
se_AB = boot_res_AB$se, |
590 | 21x |
est_AC = boot_res_AC$est, |
591 | 21x |
se_AC = boot_res_AC$se, |
592 | 21x |
var_AC = boot_res_AC$se^2 |
593 |
) |
|
594 |
} |
|
595 | ||
596 |
# Revert seed to how it was for weight bootstrap sampling |
|
597 | 1x |
old_seed <- globalenv()$.Random.seed |
598 | 1x |
on.exit(suspendInterrupts(set_random_seed(old_seed))) |
599 | 1x |
set_random_seed(weights_object$boot_seed) |
600 | ||
601 | 1x |
R <- dim(weights_object$boot)[3] |
602 | 1x |
boot_res <- boot( |
603 | 1x |
boot_ipd, |
604 | 1x |
stat_fun, |
605 | 1x |
R = R, |
606 | 1x |
w_obj = weights_object, |
607 | 1x |
eff_measure = eff_measure, |
608 | 1x |
normalize = normalize_weights, |
609 | 1x |
strata = weights_object$boot_strata |
610 |
) |
|
611 | 1x |
boot_ci <- boot.ci(boot_res, type = boot_ci_type, w_obj = weights_object) |
612 | ||
613 | 1x |
l_u_index <- switch(boot_ci_type, |
614 | 1x |
"norm" = list(2, 3, "normal"), |
615 | 1x |
"basic" = list(4, 5, "basic"), |
616 | 1x |
"stud" = list(4, 5, "student"), |
617 | 1x |
"perc" = list(4, 5, "percent"), |
618 | 1x |
"bca" = list(4, 5, "bca") |
619 |
) |
|
620 | ||
621 | 1x |
transform_estimate <- switch(eff_measure, |
622 | 1x |
"RD" = function(x) x * 100, |
623 | 1x |
"RR" = exp, |
624 | 1x |
"OR" = exp |
625 |
) |
|
626 | ||
627 |
# boot results for A v B, method 1 (maybe retired in future version) |
|
628 | 1x |
boot_res_AB <- list( |
629 | 1x |
est = as.vector(transform_estimate(boot_res$t0[1])), |
630 | 1x |
se = NA, |
631 | 1x |
ci_l = transform_estimate(boot_ci[[l_u_index[[3]]]][l_u_index[[1]]]), |
632 | 1x |
ci_u = transform_estimate(boot_ci[[l_u_index[[3]]]][l_u_index[[2]]]), |
633 | 1x |
pval = NA |
634 |
) |
|
635 | ||
636 |
# boot results for A v C |
|
637 | 1x |
boot_ci_ac <- boot.ci(boot_res, type = boot_ci_type, w_obj = weights_object, index = c(4, 6)) |
638 | 1x |
boot_res_AC <- list( |
639 | 1x |
est = as.vector(transform_estimate(boot_res$t0[4])), |
640 | 1x |
se = NA, |
641 | 1x |
ci_l = transform_estimate(boot_ci_ac[[l_u_index[[3]]]][l_u_index[[1]]]), |
642 | 1x |
ci_u = transform_estimate(boot_ci_ac[[l_u_index[[3]]]][l_u_index[[2]]]), |
643 | 1x |
pval = NA |
644 |
) |
|
645 | ||
646 |
# boot results for A v B, method 2 |
|
647 | 1x |
boot_res_AC2 <- list( |
648 | 1x |
est = as.vector(boot_res$t0[4]), |
649 | 1x |
se = NA, |
650 | 1x |
ci_l = boot_ci_ac[[l_u_index[[3]]]][l_u_index[[1]]], |
651 | 1x |
ci_u = boot_ci_ac[[l_u_index[[3]]]][l_u_index[[2]]], |
652 | 1x |
pval = NA |
653 |
) |
|
654 | 1x |
boot_res_AC2$se <- find_SE_from_CI(boot_res_AC2$ci_l, boot_res_AC2$ci_u, 0.95, log = FALSE) |
655 | 1x |
boot_res_AB2 <- bucher(boot_res_AC2, res_BC, conf_lv = 0.95) |
656 | 1x |
boot_res_AB2 <- list( |
657 | 1x |
est = transform_estimate(boot_res_AB2$est), |
658 | 1x |
se = NA, |
659 | 1x |
ci_l = transform_estimate(boot_res_AB2$ci_l), |
660 | 1x |
ci_u = transform_estimate(boot_res_AB2$ci_u), |
661 | 1x |
pval = NA |
662 |
) |
|
663 |
} else { |
|
664 | 1x |
boot_res_AC <- NULL |
665 | 1x |
boot_res_AB <- NULL |
666 | 1x |
boot_res_AB2 <- NULL |
667 | 1x |
boot_res <- NULL |
668 |
} |
|
669 | ||
670 |
# transform effect measures |
|
671 | 2x |
if (eff_measure %in% c("RR", "OR")) { |
672 | 2x |
res_AB <- transform_ratio(res_AB) |
673 | 2x |
res_AB_unadj <- transform_ratio(res_AB_unadj) |
674 | 2x |
res_AC <- transform_ratio(res_AC) |
675 | 2x |
res_AC_unadj <- transform_ratio(res_AC_unadj) |
676 | 2x |
res_BC <- transform_ratio(res_BC) |
677 | ! |
} else if (eff_measure == "RD") { |
678 | ! |
res_AB <- transform_absolute(res_AB) |
679 | ! |
res_AB_unadj <- transform_absolute(res_AB_unadj) |
680 | ! |
res_AC <- transform_absolute(res_AC) |
681 | ! |
res_AC_unadj <- transform_absolute(res_AC_unadj) |
682 | ! |
res_BC <- transform_absolute(res_BC) |
683 |
} |
|
684 | ||
685 | ||
686 |
# report all raw fitted obj |
|
687 | 2x |
res$inferential[["fit"]] <- list( |
688 | 2x |
model_before_ipd = binobj_ipd, |
689 | 2x |
model_after_ipd = binobj_ipd_adj, |
690 | 2x |
model_agd = binobj_agd, |
691 | 2x |
res_AC = res_AC, |
692 | 2x |
res_AC_unadj = res_AC_unadj, |
693 | 2x |
res_BC = res_BC, |
694 | 2x |
res_AB = res_AB, |
695 | 2x |
res_AB_unadj = res_AB_unadj, |
696 | 2x |
boot_res = boot_res, |
697 | 2x |
boot_res_AC = boot_res_AC, |
698 | 2x |
boot_res_AB_mc = boot_res_AB, |
699 | 2x |
boot_res_AB = boot_res_AB2 |
700 |
) |
|
701 | ||
702 |
# compile binary effect estimate result |
|
703 | 2x |
res$inferential[["summary"]] <- data.frame( |
704 | 2x |
case = c("AC", "adjusted_AC", "BC", "AB", "adjusted_AB"), |
705 | 2x |
EST = c( |
706 | 2x |
res_AC$est, |
707 | 2x |
res_AC_unadj$est, |
708 | 2x |
res_BC$est, |
709 | 2x |
res_AB$est, |
710 | 2x |
res_AB_unadj$est |
711 |
), |
|
712 | 2x |
LCL = c( |
713 | 2x |
res_AC$ci_l, |
714 | 2x |
res_AC_unadj$ci_l, |
715 | 2x |
res_BC$ci_l, |
716 | 2x |
res_AB$ci_l, |
717 | 2x |
res_AB_unadj$ci_l |
718 |
), |
|
719 | 2x |
UCL = c( |
720 | 2x |
res_AC$ci_u, |
721 | 2x |
res_AC_unadj$ci_u, |
722 | 2x |
res_BC$ci_u, |
723 | 2x |
res_AB$ci_u, |
724 | 2x |
res_AB_unadj$ci_u |
725 |
), |
|
726 | 2x |
pval = c( |
727 | 2x |
res_AC$pval, |
728 | 2x |
res_AC_unadj$pval, |
729 | 2x |
res_BC$pval, |
730 | 2x |
res_AB$pval, |
731 | 2x |
res_AB_unadj$pval |
732 |
) |
|
733 |
) |
|
734 | 2x |
names(res$inferential[["summary"]])[2] <- eff_measure |
735 | ||
736 |
# output |
|
737 | 2x |
res |
738 |
} |
1 |
# Restore the RNG back to a previous state using the global .Random.seed |
|
2 |
set_random_seed <- function(old_seed) { |
|
3 | 16x |
if (is.null(old_seed)) { |
4 | 8x |
rm(".Random.seed", envir = globalenv(), inherits = FALSE) |
5 |
} else { |
|
6 | 8x |
assign(".Random.seed", value = old_seed, envir = globalenv(), inherits = FALSE) |
7 |
} |
|
8 |
} |
|
9 | ||
10 |
construct_boot_data <- function(weighted_data, i = 1) { |
|
11 | ! |
if (is.null(weighted_data$boot)) stop("Must contain bootstrap results from estimate_weights()") |
12 | ! |
i <- as.integer(i) |
13 | ! |
R <- dim(weighted_data$boot)[3] |
14 | ! |
if (i < 1 || i > R) stop("i must be integer between 1 and ", R) |
15 | ||
16 | ! |
boot_data <- weighted_data$boot[, , i] |
17 | ! |
weighted_data$data <- weighted_data$data[boot_data[, 1], ] |
18 | ! |
weighted_data$data$weights <- boot_data[, 2] |
19 | ! |
weighted_data$data$scaled_weights <- boot_data[, 2] / sum(boot_data[, 2]) |
20 | ! |
weighted_data |
21 |
} |
|
22 | ||
23 | ||
24 |
transform_ratio <- function(object) { |
|
25 | 15x |
result <- object |
26 | 15x |
result$est <- exp(object$est) |
27 |
# log normal parameterization for SE |
|
28 | 15x |
result$se <- sqrt((exp(object$se^2) - 1) * exp(2 * object$est + object$se^2)) |
29 | 15x |
result$ci_l <- exp(object$ci_l) |
30 | 15x |
result$ci_u <- exp(object$ci_u) |
31 | 15x |
result |
32 |
} |
|
33 | ||
34 |
transform_absolute <- function(object) { |
|
35 | 1x |
result <- object |
36 | 1x |
result$est <- object$est * 100 |
37 | 1x |
result$se <- object$se * 100 |
38 | 1x |
result$ci_l <- object$ci_l * 100 |
39 | 1x |
result$ci_u <- object$ci_u * 100 |
40 | 1x |
result |
41 |
} |
1 |
#' Create pseudo IPD given aggregated binary data |
|
2 |
#' |
|
3 |
#' @param binary_agd a data.frame that take different formats depending on \code{format} |
|
4 |
#' @param format a string, "stacked" or "unstacked" |
|
5 |
#' |
|
6 |
#' @return a data.frame of pseudo binary IPD, with columns USUBJID, ARM, RESPONSE |
|
7 |
#' @example inst/examples/get_pseudo_ipd_binary_ex.R |
|
8 |
#' @export |
|
9 | ||
10 |
get_pseudo_ipd_binary <- function(binary_agd, format = c("stacked", "unstacked")) { |
|
11 |
# pre check |
|
12 | 4x |
if (format == "stacked") { |
13 | 3x |
if (!is.data.frame(binary_agd)) { |
14 | ! |
stop("stacked binary_agd should be data.frame with columns 'ARM', 'RESPONSE', 'COUNT'") |
15 |
} |
|
16 | 3x |
names(binary_agd) <- toupper(names(binary_agd)) |
17 | 3x |
if (!all(c("ARM", "RESPONSE", "COUNT") %in% names(binary_agd))) { |
18 | ! |
stop("stacked binary_agd should be data.frame with columns 'ARM', 'Response', 'Count'") |
19 |
} |
|
20 | 3x |
if (!is.logical(binary_agd$RESPONSE) && !all(toupper(binary_agd$RESPONSE) %in% c("YES", "NO"))) { |
21 | ! |
stop("'RESPONSE' column in stacked binary_agd should be either logical vector or character vector of 'Yes'/'No'") |
22 |
} |
|
23 | 3x |
if (nrow(binary_agd) %% 2 != 0) { |
24 | ! |
stop("nrow(binary_agd) is not even number, you may miss to provide 1 level of binary response to certain arm") |
25 |
} |
|
26 | 1x |
} else if (format == "unstacked") { |
27 | 1x |
if (!(is.data.frame(binary_agd) || is.matrix(binary_agd))) { |
28 | ! |
stop("unstacked binary_agd should be either a 1x2 or 2x2 data frame or matrix") |
29 |
} |
|
30 | 1x |
if (ncol(binary_agd) != 2 || !nrow(binary_agd) %in% c(1, 2)) { |
31 | ! |
stop("unstacked binary_agd should be either a 1x2 or 2x2 data frame or matrix") |
32 |
} |
|
33 | 1x |
bin_res <- toupper(colnames(binary_agd)) |
34 | 1x |
bin_res <- sort(bin_res) |
35 | 1x |
if (!(identical(bin_res, c("FALSE", "TRUE")) || identical(bin_res, c("NO", "YES")))) { |
36 | ! |
stop("column names of unstacked binary_agd should be either TRUE/FALSE or Yes/No") |
37 |
} |
|
38 |
} |
|
39 | ||
40 |
# pre process binary_agd, depending on format |
|
41 | 4x |
use_binary_agd <- switch(format, |
42 | 4x |
"stacked" = { |
43 | 3x |
names(binary_agd) <- toupper(names(binary_agd)) |
44 | 3x |
if (!is.logical(binary_agd$RESPONSE)) { |
45 | 3x |
binary_agd$RESPONSE <- toupper(binary_agd$RESPONSE) |
46 | 3x |
binary_agd$RESPONSE <- binary_agd$RESPONSE == "YES" |
47 |
} |
|
48 | 3x |
binary_agd |
49 |
}, |
|
50 | 4x |
"unstacked" = { |
51 | 1x |
trt_names <- rownames(binary_agd) |
52 | 1x |
bin_res <- toupper(colnames(binary_agd)) |
53 | 1x |
if ("YES" %in% bin_res) { |
54 | 1x |
bin_res <- ifelse(bin_res == "YES", "TRUE", "FALSE") |
55 | 1x |
colnames(binary_agd) <- bin_res |
56 |
} |
|
57 | 1x |
tmpout <- utils::stack(binary_agd) |
58 | 1x |
tmpout <- cbind(ARM = rep(trt_names, each = 2), tmpout) |
59 | 1x |
names(tmpout) <- c("ARM", "COUNT", "RESPONSE") |
60 | 1x |
rownames(tmpout) <- NULL |
61 | 1x |
tmpout$RESPONSE <- as.logical(tmpout$RESPONSE) |
62 | 1x |
tmpout |
63 |
} |
|
64 |
) |
|
65 | ||
66 |
# create pseudo binary IPD |
|
67 | 4x |
use_binary_agd$ARM <- factor(use_binary_agd$ARM, levels = unique(use_binary_agd$ARM)) |
68 | 4x |
n_per_arm <- tapply(use_binary_agd$COUNT, use_binary_agd$ARM, sum) |
69 | 4x |
n_yes_per_arm <- use_binary_agd$COUNT[use_binary_agd$RESPONSE] # use_binary_agd is already ordered as per factor ARM |
70 | ||
71 | 4x |
tmpipd <- data.frame( |
72 | 4x |
USUBJID = NA, |
73 | 4x |
ARM = unlist( |
74 | 4x |
mapply(rep, x = levels(use_binary_agd$ARM), each = n_per_arm, SIMPLIFY = FALSE, USE.NAMES = FALSE) |
75 |
), |
|
76 | 4x |
RESPONSE = unlist( |
77 | 4x |
lapply(seq_along(n_per_arm), function(ii) { |
78 | 5x |
c(rep(TRUE, n_yes_per_arm[ii]), rep(FALSE, n_per_arm[ii] - n_yes_per_arm[ii])) |
79 |
}) |
|
80 |
) |
|
81 |
) |
|
82 | 4x |
tmpipd$USUBJID <- paste0("pseudo_binary_subj_", seq_len(nrow(tmpipd))) |
83 | ||
84 |
# output |
|
85 | 4x |
tmpipd |
86 |
} |
|
87 | ||
88 | ||
89 |
#' Helper function to summarize outputs from glm fit |
|
90 |
#' |
|
91 |
#' @param binobj returned object from \code{stats::glm} |
|
92 |
#' @param legend label to indicate the binary fit |
|
93 |
#' @param weighted logical flag indicating whether weights have been applied in the glm fit |
|
94 |
#' @examples |
|
95 |
#' data(adrs_sat) |
|
96 |
#' pseudo_adrs <- get_pseudo_ipd_binary( |
|
97 |
#' binary_agd = data.frame( |
|
98 |
#' ARM = rep("B", 2), |
|
99 |
#' RESPONSE = c("YES", "NO"), |
|
100 |
#' COUNT = c(280, 120) |
|
101 |
#' ), |
|
102 |
#' format = "stacked" |
|
103 |
#' ) |
|
104 |
#' pseudo_adrs$RESPONSE <- as.numeric(pseudo_adrs$RESPONSE) |
|
105 |
#' combined_data <- rbind(adrs_sat[, c("USUBJID", "ARM", "RESPONSE")], pseudo_adrs) |
|
106 |
#' combined_data$ARM <- as.factor(combined_data$ARM) |
|
107 |
#' binobj_dat <- stats::glm(RESPONSE ~ ARM, combined_data, family = binomial("logit")) |
|
108 |
#' glm_makeup(binobj_dat) |
|
109 |
#' @export |
|
110 |
glm_makeup <- function(binobj, legend = "before matching", weighted = FALSE) { |
|
111 | 10x |
arm <- levels(binobj$data$ARM) |
112 | 10x |
if (!weighted) { |
113 | 6x |
n <- tapply(binobj$data$USUBJID, binobj$data$ARM, length) |
114 | 6x |
n_event <- tapply(binobj$data$RESPONSE, binobj$data$ARM, sum) |
115 |
} else { |
|
116 | 4x |
n <- tapply(binobj$data$weights, binobj$data$ARM, length) |
117 | 4x |
n_event <- tapply(binobj$data$weights * binobj$data$RESPONSE, binobj$data$ARM, sum) |
118 |
} |
|
119 | 10x |
data.frame( |
120 | 10x |
treatment = arm, |
121 | 10x |
type = legend, |
122 | 10x |
n = n, |
123 | 10x |
events = n_event, |
124 | 10x |
events_pct = n_event * 100 / n |
125 |
) |
|
126 |
} |
1 |
#' Bucher method for combining treatment effects |
|
2 |
#' |
|
3 |
#' Given two treatment effects of A vs. C and B vs. C |
|
4 |
#' derive the treatment effects of A vs. B using the Bucher method. |
|
5 |
#' Two-sided confidence interval and Z-test p-value are also calculated. |
|
6 |
#' Treatment effects and standard errors should be in log scale |
|
7 |
#' for hazard ratio, odds ratio, and risk ratio. |
|
8 |
#' Treatment effects and standard errors should be in natural scale |
|
9 |
#' for risk difference and mean difference. |
|
10 |
#' |
|
11 |
#' @param trt a list of two scalars for the study with the |
|
12 |
#' experimental arm. `'est'` is the point estimate and `'se'` |
|
13 |
#' is the standard error of the treatment effect. |
|
14 |
#' For time-to-event data, `'est'` and `'se'` should be point estimate and |
|
15 |
#' standard error of the log hazard ratio. |
|
16 |
#' For binary data, `'est'` and `'se'` should be point estimate and |
|
17 |
#' standard error of the log odds ratio, log risk ratio, or risk |
|
18 |
#' difference. |
|
19 |
#' For continuous data, `'est'` and `'se'` should be point estimate and |
|
20 |
#' standard error of the mean difference. |
|
21 |
#' @param com same as \code{trt}, but for the study with the |
|
22 |
#' control arm |
|
23 |
#' @param conf_lv a numerical scalar, prescribe confidence level to derive |
|
24 |
#' two-sided confidence interval for the treatment effect |
|
25 |
#' |
|
26 |
#' @return a list with 5 elements, |
|
27 |
#' \describe{ |
|
28 |
#' \item{est}{a scalar, point estimate of the treatment effect} |
|
29 |
#' \item{se}{a scalar, standard error of the treatment effect} |
|
30 |
#' \item{ci_l}{a scalar, lower confidence limit of a two-sided CI |
|
31 |
#' with prescribed nominal level by \code{conf_lv}} |
|
32 |
#' \item{ci_u}{a scalar, upper confidence limit of a two-sided CI |
|
33 |
#' with prescribed nominal level by \code{conf_lv}} |
|
34 |
#' \item{pval}{p-value of Z-test, with null hypothesis that |
|
35 |
#' \code{est} is zero} |
|
36 |
#' } |
|
37 |
#' @export |
|
38 |
#' @examples |
|
39 |
#' trt <- list(est = log(1.1), se = 0.2) |
|
40 |
#' com <- list(est = log(1.3), se = 0.18) |
|
41 |
#' result <- bucher(trt, com, conf_lv = 0.9) |
|
42 |
#' print(result, ci_digits = 3, pval_digits = 3) |
|
43 |
bucher <- function(trt, com, conf_lv = 0.95) { |
|
44 | 1x |
if (!isTRUE(is.finite(trt$est))) stop("trt$est is not valid: ", trt$est) |
45 | 1x |
if (!isTRUE(is.finite(trt$se))) stop("trt$se is not valid: ", trt$se) |
46 | 1x |
if (!isTRUE(is.finite(com$est))) stop("com$est is not valid: ", com$est) |
47 | 1x |
if (!isTRUE(is.finite(com$se))) stop("com$se is not valid: ", com$se) |
48 | 1x |
if (conf_lv < 0 || 1 < conf_lv) stop("conf_lv must be in (0, 1): ", conf_lv) |
49 | ||
50 | 40x |
est <- trt$est - com$est |
51 | 40x |
se <- sqrt(trt$se^2 + com$se^2) |
52 | 40x |
ci_l <- est - stats::qnorm(0.5 + conf_lv / 2) * se |
53 | 40x |
ci_u <- est + stats::qnorm(0.5 + conf_lv / 2) * se |
54 | 40x |
if (est > 0) { |
55 | 8x |
pval <- 2 * (1 - stats::pnorm(est, 0, se)) |
56 |
} else { |
|
57 | 32x |
pval <- 2 * stats::pnorm(est, 0, se) |
58 |
} |
|
59 | ||
60 | 40x |
outdata <- list( |
61 | 40x |
est = est, |
62 | 40x |
se = se, |
63 | 40x |
ci_l = ci_l, |
64 | 40x |
ci_u = ci_u, |
65 | 40x |
pval = pval |
66 |
) |
|
67 | ||
68 | 40x |
class(outdata) <- c("maicplus_bucher", "list") |
69 | 40x |
outdata |
70 |
} |
|
71 | ||
72 | ||
73 |
#' Calculate standard error from the reported confidence interval. |
|
74 |
#' |
|
75 |
#' Comparator studies often only report confidence interval of the |
|
76 |
#' treatment effects. This function calculates standard error of the |
|
77 |
#' treatment effect given the reported confidence interval. |
|
78 |
#' For relative treatment effect (i.e. hazard ratio, odds ratio, and |
|
79 |
#' risk ratio), the function would log the confidence interval. |
|
80 |
#' For risk difference and mean difference, |
|
81 |
#' we do not log the confidence interval. |
|
82 |
#' The option to log the confidence interval is controlled |
|
83 |
#' by `'log'` parameter. |
|
84 |
#' |
|
85 |
#' @param CI_lower Reported lower percentile value of the |
|
86 |
#' treatment effect |
|
87 |
#' @param CI_upper Reported upper percentile value of the |
|
88 |
#' treatment effect |
|
89 |
#' @param CI_perc Percentage of confidence interval reported |
|
90 |
#' @param log Whether the confidence interval should be logged. |
|
91 |
#' For relative treatment effect, log should be applied because |
|
92 |
#' estimated log treatment effect is approximately normally distributed. |
|
93 |
#' @return Standard error of log relative treatment effect if `'log'` |
|
94 |
#' is true and standard error of the treatment effect if `'log'` |
|
95 |
#' is false |
|
96 |
#' @examples |
|
97 |
#' find_SE_from_CI(CI_lower = 0.55, CI_upper = 0.90, CI_perc = 0.95) |
|
98 |
#' @export |
|
99 | ||
100 |
find_SE_from_CI <- function(CI_lower = NULL, CI_upper = NULL, |
|
101 |
CI_perc = 0.95, log = TRUE) { |
|
102 | 3x |
if (CI_perc > 1 || CI_perc < 0) { |
103 | ! |
stop("CI_perc has to be between 0 and 1") |
104 |
} |
|
105 | ||
106 | 3x |
if (is.null(CI_lower) || is.null(CI_upper)) { |
107 | ! |
stop("Both CI_lower and CI_upper need to be specified") |
108 |
} |
|
109 | ||
110 | 3x |
if (!is.numeric(CI_lower) || !is.numeric(CI_upper)) { |
111 | ! |
stop("Both CI_lower and CI_upper need to be specified") |
112 |
} |
|
113 | ||
114 | 3x |
alpha <- 1 - CI_perc |
115 | 3x |
se <- ifelse(log, |
116 | 3x |
(log(CI_upper) - log(CI_lower)) / (2 * qnorm(1 - alpha / 2)), |
117 | 3x |
(CI_upper - CI_lower) / (2 * qnorm(1 - alpha / 2)) |
118 |
) |
|
119 | 3x |
return(se) |
120 |
} |
|
121 | ||
122 |
#' Print method for `maicplus_bucher` object |
|
123 |
#' |
|
124 |
#' @param x `maicplus_bucher` object |
|
125 |
#' @param ci_digits an integer, number of decimal places for point |
|
126 |
#' estimate and derived confidence limits |
|
127 |
#' @param pval_digits an integer, number of decimal places to display |
|
128 |
#' Z-test p-value |
|
129 |
#' @param exponentiate whether the treatment effect and confidence |
|
130 |
#' interval should be exponentiated. This applies to relative |
|
131 |
#' treatment effects. Default is set to false. |
|
132 |
#' @param ... not used |
|
133 |
#' @describeIn bucher Print method for `maicplus_bucher` objects |
|
134 |
#' @export |
|
135 | ||
136 |
print.maicplus_bucher <- function(x, ci_digits = 2, pval_digits = 3, |
|
137 |
exponentiate = FALSE, ...) { |
|
138 | 1x |
output <- reformat(x, ci_digits, pval_digits, |
139 | 1x |
show_pval = TRUE, exponentiate |
140 |
) |
|
141 | 1x |
print(output) |
142 |
} |
|
143 | ||
144 |
#' Reformat `maicplus_bucher` alike object |
|
145 |
#' |
|
146 |
#' @param x a list, structured like a `maicplus_bucher` object |
|
147 |
#' @param ci_digits an integer, number of decimal places for point |
|
148 |
#' estimate and derived confidence limits |
|
149 |
#' @param pval_digits an integer, number of decimal places to display |
|
150 |
#' Z-test p-value |
|
151 |
#' @param show_pval a logical value, default is TRUE. If FALSE, p-value will not |
|
152 |
#' be output as the second element of the character vector |
|
153 |
#' @param exponentiate whether the treatment effect and confidence |
|
154 |
#' interval should be exponentiated. This applies to relative |
|
155 |
#' treatment effects. Default is set to false. |
|
156 |
#' @keywords internal |
|
157 | ||
158 |
reformat <- function(x, ci_digits = 2, pval_digits = 3, |
|
159 |
show_pval = TRUE, exponentiate = FALSE) { |
|
160 | 1x |
transform_this <- function(x) { |
161 | 3x |
ifelse(exponentiate, exp(x), x) |
162 |
} |
|
163 | ||
164 | 1x |
a <- format(round(transform_this(x$est), ci_digits), |
165 | 1x |
nsmall = ci_digits |
166 |
) |
|
167 | 1x |
b <- format(round(transform_this(x$ci_l), ci_digits), |
168 | 1x |
nsmall = ci_digits |
169 |
) |
|
170 | 1x |
c <- format(round(transform_this(x$ci_u), ci_digits), |
171 | 1x |
nsmall = ci_digits |
172 |
) |
|
173 | 1x |
res <- paste0(a, "[", b, "; ", c, "]") |
174 | ||
175 | 1x |
disp_pval <- round(x$pval, pval_digits) |
176 | 1x |
disp_pval <- |
177 | 1x |
ifelse(disp_pval == 0, |
178 | 1x |
paste0("<", 1 / (10^pval_digits)), |
179 | 1x |
format(disp_pval, nsmall = pval_digits) |
180 |
) |
|
181 | ||
182 | 1x |
if (show_pval) { |
183 | 1x |
output <- c(res, disp_pval) |
184 | 1x |
names(output) <- c("result", "pvalue") |
185 |
} else { |
|
186 | ! |
output <- res |
187 | ! |
names(output) <- "result" |
188 |
} |
|
189 | ||
190 | 1x |
output |
191 |
} |
1 |
.onLoad <- function(libname, pkgname) { |
|
2 | ! |
set_time_conversion() |
3 |
} |