| 1 |
# Functions for pre-processing data before conduct MAIC |
|
| 2 | ||
| 3 |
# Functions to be exported --------------------------------------- |
|
| 4 | ||
| 5 |
#' Pre-process aggregate data |
|
| 6 |
#' |
|
| 7 |
#' This function checks the format of the aggregate data. |
|
| 8 |
#' Data is required to have three columns: STUDY, ARM, and N. |
|
| 9 |
#' Column names that do not have legal suffixes (MEAN, MEDIAN, SD, COUNT, or PROP) are dropped. |
|
| 10 |
#' If a variable is a count variable, it is converted to proportions by dividing the sample size (N). |
|
| 11 |
#' Note, when the count is specified, proportion is always calculated based on the count, that is, |
|
| 12 |
#' specified proportion will be ignored if applicable. |
|
| 13 |
#' If the aggregated data comes from multiple sources (i.e. different analysis population) and |
|
| 14 |
#' sample size differs for each variable, one option is to specify proportion directly instead of count by using suffix |
|
| 15 |
#' _PROP. |
|
| 16 |
#' |
|
| 17 |
#' @param raw_agd raw aggregate data should contain STUDY, ARM, and N. Variable names should be followed |
|
| 18 |
#' by legal suffixes (i.e. MEAN, MEDIAN, SD, COUNT, or PROP). |
|
| 19 |
#' |
|
| 20 |
#' @examples |
|
| 21 |
#' data(agd) |
|
| 22 |
#' agd <- process_agd(agd) |
|
| 23 |
#' |
|
| 24 |
#' @return pre-processed aggregate level data |
|
| 25 |
#' @export |
|
| 26 | ||
| 27 |
process_agd <- function(raw_agd) {
|
|
| 28 | 6x |
raw_agd <- as.data.frame(raw_agd) |
| 29 |
# make all column names to be capital letters to avoid different style |
|
| 30 | 6x |
names(raw_agd) <- toupper(names(raw_agd)) |
| 31 | ||
| 32 |
# define column name patterns[-] |
|
| 33 | 6x |
must_exist <- c("STUDY", "ARM", "N")
|
| 34 | 6x |
legal_suffix <- c("MEAN", "MEDIAN", "SD", "COUNT", "PROP")
|
| 35 | ||
| 36 |
# swap "TREATMENT" column to "ARM", if applicable |
|
| 37 | 6x |
if ("TREATMENT" %in% names(raw_agd) && (!"ARM" %in% names(raw_agd))) {
|
| 38 | 1x |
raw_agd$ARM <- raw_agd$TREATMENT |
| 39 | 1x |
raw_agd <- raw_agd[, names(raw_agd) != "TREATMENT"] |
| 40 | 1x |
warning("'TREATMENT' column is renamed as 'ARM'")
|
| 41 |
} |
|
| 42 | ||
| 43 |
# check: must exist |
|
| 44 | 6x |
if (!all(must_exist %in% names(raw_agd))) {
|
| 45 | ! |
stop("At least 1 of the must-exists columns (STUDY, ARM, N) cannot be found in raw_agd!")
|
| 46 |
} |
|
| 47 | ||
| 48 |
# check: legal suffix |
|
| 49 | 6x |
other_colnames <- setdiff(names(raw_agd), must_exist) |
| 50 | 6x |
ind1 <- grepl("_", other_colnames, fixed = TRUE)
|
| 51 | 6x |
ind2 <- sapply(other_colnames, function(xx) {
|
| 52 | 42x |
tmp <- unlist(strsplit(xx, split = "_")) |
| 53 | 42x |
tmp[length(tmp)] # this deployment is robust to the cases that there are multiple _ in the column name |
| 54 |
}) |
|
| 55 | 6x |
ind2 <- (ind2 %in% legal_suffix) |
| 56 | ||
| 57 | 6x |
use_cols <- other_colnames[ind1 & ind2] |
| 58 | 6x |
use_agd <- raw_agd[, c(must_exist, use_cols), drop = FALSE] |
| 59 | 6x |
if (!all(other_colnames %in% use_cols)) {
|
| 60 | ! |
warning(paste0( |
| 61 | ! |
"following columns are ignored since it does not follow the naming conventions:", |
| 62 | ! |
paste(setdiff(other_colnames, use_cols), collapse = ",") |
| 63 |
)) |
|
| 64 |
} |
|
| 65 | ||
| 66 |
# If the aggregate data is divided by different arms, calculate pooled arm statistics using |
|
| 67 |
# complete_agd function; complete statistics is specified by ARM=="Total" |
|
| 68 | 6x |
if (!"total" %in% tolower(use_agd$ARM)) {
|
| 69 | ! |
use_agd <- complete_agd(use_agd) |
| 70 |
} |
|
| 71 | ||
| 72 |
# calculate percentage columns |
|
| 73 | 6x |
ind <- grepl("_COUNT$", names(use_agd))
|
| 74 | 6x |
if (any(ind)) {
|
| 75 | 6x |
for (i in which(ind)) {
|
| 76 | 18x |
tmp_prop <- use_agd[[i]] / use_agd$N |
| 77 |
# in case some count are not specified, but proportion are specified, copy over those proportions |
|
| 78 |
# this also means, in case count is specified, proportion is ignored even it is specified |
|
| 79 | 18x |
prop_name_i <- gsub("_COUNT$", "_PROP", names(use_agd)[i])
|
| 80 | 18x |
if (prop_name_i %in% names(use_agd)) {
|
| 81 | ! |
tmp_prop[is.na(tmp_prop)] <- use_agd[is.na(tmp_prop), prop_name_i] |
| 82 | ! |
names(use_agd)[names(use_agd) == prop_name_i] <- paste0(prop_name_i, "_redundant") |
| 83 |
} |
|
| 84 | 18x |
use_agd[[i]] <- tmp_prop |
| 85 |
} |
|
| 86 | 6x |
names(use_agd) <- gsub("_COUNT$", "_PROP", names(use_agd))
|
| 87 |
} |
|
| 88 | 6x |
use_agd <- use_agd[, !grepl("_redundant$", names(use_agd))]
|
| 89 | ||
| 90 |
# output |
|
| 91 | 6x |
with(use_agd, use_agd[tolower(ARM) == "total", , drop = FALSE]) |
| 92 |
} |
|
| 93 | ||
| 94 | ||
| 95 |
#' Create dummy variables from categorical variables in an individual patient data (ipd) |
|
| 96 |
#' |
|
| 97 |
#' This is a convenient function to convert categorical variables into dummy binary variables. |
|
| 98 |
#' This would be especially useful if the variable has more than two factors. |
|
| 99 |
#' Note that the original variable is kept after a variable is dummized. |
|
| 100 |
#' |
|
| 101 |
#' @param raw_ipd ipd data that contains variable to dummize |
|
| 102 |
#' @param dummize_cols vector of column names to binarize |
|
| 103 |
#' @param dummize_ref_level vector of reference level of the variables to binarize |
|
| 104 |
#' |
|
| 105 |
#' @examples |
|
| 106 |
#' data(adsl_twt) |
|
| 107 |
#' dummize_ipd(adsl_twt, dummize_cols = c("SEX"), dummize_ref_level = c("Male"))
|
|
| 108 |
#' |
|
| 109 |
#' @return ipd with dummized columns |
|
| 110 |
#' @export |
|
| 111 | ||
| 112 |
dummize_ipd <- function(raw_ipd, dummize_cols, dummize_ref_level) {
|
|
| 113 | 2x |
for (i in seq_along(dummize_cols)) {
|
| 114 | 3x |
yy <- raw_ipd[[dummize_cols[i]]] |
| 115 | 3x |
yy_levels <- na.omit(unique(yy)) |
| 116 | 3x |
yy <- factor(as.character(yy), levels = c(dummize_ref_level[i], setdiff(yy_levels, dummize_ref_level[i]))) |
| 117 | 3x |
new_yy <- sapply(levels(yy)[-1], function(j) {
|
| 118 | 3x |
as.numeric(yy == j) |
| 119 |
}) |
|
| 120 | 3x |
new_yy <- as.data.frame(new_yy) |
| 121 | 3x |
names(new_yy) <- toupper(paste(dummize_cols[i], levels(yy)[-1], sep = "_")) |
| 122 | 3x |
raw_ipd <- cbind(raw_ipd, new_yy) |
| 123 |
} |
|
| 124 | 2x |
raw_ipd |
| 125 |
} |
|
| 126 | ||
| 127 | ||
| 128 |
#' Center individual patient data (IPD) variables using aggregate data averages |
|
| 129 |
#' |
|
| 130 |
#' This function subtracts IPD variables (prognostic variables and/or effect modifiers) |
|
| 131 |
#' by the aggregate data averages. This centering is needed in order to calculate weights. |
|
| 132 |
#' IPD and aggregate data variable names should match. |
|
| 133 |
#' |
|
| 134 |
#' @param ipd IPD variable names should match the aggregate data names without the suffix. |
|
| 135 |
#' This would involve either changing the aggregate data name or the ipd name. |
|
| 136 |
#' For instance, if we binarize SEX variable with MALE as a reference using [dummize_ipd], |
|
| 137 |
#' function names the new variable as SEX_MALE. |
|
| 138 |
#' In this case, SEX_MALE should also be available in the aggregate data. |
|
| 139 |
#' @param agd pre-processed aggregate data which contain STUDY, ARM, and N. Variable names |
|
| 140 |
#' should be followed by legal suffixes (i.e. MEAN, MEDIAN, SD, or PROP). Note that COUNT |
|
| 141 |
#' suffix is no longer accepted. |
|
| 142 |
#' @examples |
|
| 143 |
#' data(adsl_sat) |
|
| 144 |
#' data(agd) |
|
| 145 |
#' agd <- process_agd(agd) |
|
| 146 |
#' ipd_centered <- center_ipd(ipd = adsl_sat, agd = agd) |
|
| 147 |
#' @return centered ipd using aggregate level data averages |
|
| 148 |
#' @export |
|
| 149 | ||
| 150 |
center_ipd <- function(ipd, agd) {
|
|
| 151 |
# regularized column name patterns |
|
| 152 | 10x |
must_exist <- c("STUDY", "ARM", "N")
|
| 153 | 10x |
legal_suffix <- c("MEAN", "MEDIAN", "SD", "PROP")
|
| 154 | 10x |
suffix_pat <- paste(paste0("_", legal_suffix, "$"), collapse = "|")
|
| 155 | ||
| 156 | 10x |
for (i in seq_len(nrow(agd))) { # study i
|
| 157 | 10x |
study_id <- agd$STUDY[i] |
| 158 | 10x |
use_agd <- agd[i, !names(agd) %in% must_exist, drop = FALSE] |
| 159 | 10x |
param_id <- gsub(suffix_pat, "", names(use_agd)) |
| 160 | ||
| 161 | 10x |
for (j in seq_len(ncol(use_agd))) { # effect modifier j
|
| 162 | ! |
if (is.na(use_agd[[j]])) next |
| 163 | ||
| 164 | 69x |
ipd_param <- param_id[j] |
| 165 | ||
| 166 | 69x |
if (grepl("_MEAN$|_PROP$", names(use_agd)[j])) {
|
| 167 | 40x |
ipd[[paste0(ipd_param, "_", "CENTERED")]] <- ipd[[ipd_param]] - use_agd[[j]] |
| 168 | 29x |
} else if (grepl("_MEDIAN$", names(use_agd)[j])) {
|
| 169 | 19x |
ipd[[paste0(ipd_param, "_MEDIAN_", "CENTERED")]] <- ipd[[ipd_param]] > use_agd[[j]] |
| 170 | 19x |
ipd[[paste0(ipd_param, "_MEDIAN_", "CENTERED")]] <- ipd[[paste0(ipd_param, "_MEDIAN_", "CENTERED")]] - 0.5 |
| 171 | 10x |
} else if (grepl("_SD$", names(use_agd)[j])) {
|
| 172 | 10x |
ipd[[paste0(ipd_param, "_SQUARED_", "CENTERED")]] <- ipd[[ipd_param]]^2 |
| 173 | 10x |
tmp_aim <- use_agd[[j]]^2 + (use_agd[[paste0(ipd_param, "_MEAN")]]^2) |
| 174 | 10x |
ipd[[paste0(ipd_param, "_SQUARED_", "CENTERED")]] <- ipd[[paste0(ipd_param, "_SQUARED_", "CENTERED")]] - tmp_aim |
| 175 |
} |
|
| 176 |
} # end of j |
|
| 177 |
} # end of i |
|
| 178 | ||
| 179 |
# output |
|
| 180 | 10x |
ipd |
| 181 |
} |
|
| 182 | ||
| 183 | ||
| 184 |
#' Calculate pooled arm statistics in Aggregated Data (AgD) based on arm-specific statistics |
|
| 185 |
#' |
|
| 186 |
#' This is a convenient function to pool arm statistics. This function is called |
|
| 187 |
#' within process_agd and when the ARM is not equal to "Total". Note pooled |
|
| 188 |
#' median can't be calculated and it is only an approximation. |
|
| 189 |
#' |
|
| 190 |
#' @param use_agd aggregated data that is processed within process_agd |
|
| 191 |
#' @noRd |
|
| 192 |
#' @return Complete N, count, mean, sd, and median for the pooled arm |
|
| 193 | ||
| 194 |
complete_agd <- function(use_agd) {
|
|
| 195 | 1x |
use_agd <- as.data.frame(use_agd) |
| 196 | 1x |
use_agd <- with(use_agd, {
|
| 197 | 1x |
use_agd[tolower(ARM) != "total", , drop = FALSE] |
| 198 |
}) |
|
| 199 | ||
| 200 | ! |
if (nrow(use_agd) < 2) stop("error in call complete_agd: need to have at least 2 rows that ARM!='total' ")
|
| 201 | ||
| 202 | 1x |
rowId <- nrow(use_agd) + 1 |
| 203 | 1x |
use_agd[rowId, ] <- NA |
| 204 | 1x |
use_agd$STUDY[rowId] <- use_agd$STUDY[1] |
| 205 | 1x |
use_agd$ARM[rowId] <- "total" |
| 206 | ||
| 207 |
# complete N and count |
|
| 208 | 1x |
NN <- use_agd[["N"]][rowId] <- sum(use_agd[["N"]], na.rm = TRUE) |
| 209 | 1x |
nn <- use_agd[["N"]][-rowId] |
| 210 | 1x |
for (i in grep("_COUNT$", names(use_agd), value = TRUE)) {
|
| 211 | 1x |
use_agd[[i]][rowId] <- sum(use_agd[[i]][-rowId], na.rm = TRUE) |
| 212 |
} |
|
| 213 | ||
| 214 |
# complete MEAN |
|
| 215 | 1x |
for (i in grep("_MEAN$", names(use_agd), value = TRUE)) {
|
| 216 | 1x |
use_agd[[i]][rowId] <- sum(use_agd[[i]][-rowId] * nn) / NN |
| 217 |
} |
|
| 218 | ||
| 219 |
# complete SD |
|
| 220 | 1x |
for (i in grep("_SD$", names(use_agd), value = TRUE)) {
|
| 221 | 1x |
use_agd[[i]][rowId] <- sqrt(sum(use_agd[[i]][-rowId]^2 * (nn - 1)) / (NN - 1)) |
| 222 |
} |
|
| 223 | ||
| 224 |
# complete MEDIAN, approximately!! |
|
| 225 | 1x |
for (i in grep("_MEDIAN$", names(use_agd), value = TRUE)) {
|
| 226 | 1x |
use_agd[[i]][rowId] <- mean(use_agd[[i]][-rowId]) |
| 227 |
} |
|
| 228 | ||
| 229 |
# output |
|
| 230 | 1x |
rownames(use_agd) <- NULL |
| 231 | 1x |
use_agd |
| 232 |
} |
|
| 233 | ||
| 234 | ||
| 235 |
#' helper function: transform TTE ADaM data to suitable input for survival R package |
|
| 236 |
#' |
|
| 237 |
#' @param dd data frame, ADTTE read via `haven::read_sas` |
|
| 238 |
#' @param time_scale a character string, 'years', 'months', 'weeks' or 'days', time unit of median survival time |
|
| 239 |
#' @param trt values to include in treatment column |
|
| 240 |
#' |
|
| 241 |
#' @return a data frame that can be used as input to `survival::Surv` |
|
| 242 |
#' @keywords internal |
|
| 243 | ||
| 244 |
ext_tte_transfer <- function(dd, time_scale = "months", trt = NULL) {
|
|
| 245 | ! |
time_scale <- match.arg(time_scale, choices = c("years", "months", "weeks", "days"))
|
| 246 | ! |
time_units <- get_time_conversion(time_scale) |
| 247 | ||
| 248 | ! |
if ("CENSOR" %in% names(dd)) {
|
| 249 | ! |
dd <- dd[!is.na(dd$CENSOR), ] |
| 250 | ! |
dd$status <- as.numeric(dd$CENSOR == 0) |
| 251 |
} |
|
| 252 | ! |
if ("EVENT" %in% names(dd)) {
|
| 253 | ! |
dd$status <- as.numeric(as.character(dd$EVENT)) |
| 254 |
} |
|
| 255 | ! |
if ("TIME" %in% names(dd)) {
|
| 256 | ! |
dd$AVAL <- as.numeric(as.character(dd$TIME)) |
| 257 |
} |
|
| 258 | ||
| 259 | ! |
dd$time <- dd$AVAL * time_units |
| 260 | ! |
if (!is.null(trt)) dd$treatment <- trt |
| 261 | ! |
as.data.frame(dd) |
| 262 |
} |
| 1 |
#' Unanchored MAIC for binary and time-to-event endpoint |
|
| 2 |
#' |
|
| 3 |
#' This is a wrapper function to provide adjusted effect estimates and relevant statistics in unanchored case (i.e. |
|
| 4 |
#' there is no common comparator arm in the internal and external trial). |
|
| 5 |
#' |
|
| 6 |
#' @param weights_object an object returned by \code{estimate_weight}
|
|
| 7 |
#' @param ipd a data frame that meet format requirements in 'Details', individual patient data (IPD) of internal trial |
|
| 8 |
#' @param pseudo_ipd a data frame, pseudo IPD from digitized KM curve of external trial (for time-to-event endpoint) or |
|
| 9 |
#' from contingency table (for binary endpoint) |
|
| 10 |
#' @param trt_ipd a string, name of the interested investigation arm in internal trial \code{dat_igd} (real IPD)
|
|
| 11 |
#' @param trt_agd a string, name of the interested investigation arm in external trial \code{pseudo_ipd} (pseudo IPD)
|
|
| 12 |
#' @param trt_var_ipd a string, column name in \code{ipd} that contains the treatment assignment
|
|
| 13 |
#' @param trt_var_agd a string, column name in \code{ipd} that contains the treatment assignment
|
|
| 14 |
#' @param normalize_weights logical, default is \code{FALSE}. If \code{TRUE},
|
|
| 15 |
#' \code{scaled_weights} (normalized weights) in \code{weights_object$data} will be used.
|
|
| 16 |
#' @param endpoint_type a string, one out of the following "binary", "tte" (time to event) |
|
| 17 |
#' @param eff_measure a string, "RD" (risk difference), "OR" (odds ratio), "RR" (relative risk) for a binary endpoint; |
|
| 18 |
#' "HR" for a time-to-event endpoint. By default is \code{NULL}, "OR" is used for binary case, otherwise "HR" is used.
|
|
| 19 |
#' @param boot_ci_type a string, one of `c("norm","basic", "stud", "perc", "bca")` to select the type of bootstrap
|
|
| 20 |
#' confidence interval. See [boot::boot.ci] for more details. |
|
| 21 |
#' @param endpoint_name a string, name of time to event endpoint, to be show in the last line of title |
|
| 22 |
#' @param time_scale a string, time unit of median survival time, taking a value of 'years', 'months', 'weeks' or |
|
| 23 |
#' 'days'. NOTE: it is assumed that values in TIME column of \code{ipd} and \code{pseudo_ipd} is in the unit of days
|
|
| 24 |
#' @param km_conf_type a string, pass to \code{conf.type} of \code{survfit}
|
|
| 25 |
#' @param binary_robust_cov_type a string to pass to argument `type` of [sandwich::vcovHC], see possible options in the |
|
| 26 |
#' documentation of that function. Default is `"HC3"` |
|
| 27 |
#' |
|
| 28 |
#' @details For time-to-event analysis, it is required that input \code{ipd} and \code{pseudo_ipd} to have the following
|
|
| 29 |
#' columns. This function is not sensitive to upper or lower case of letters in column names. |
|
| 30 |
#' \itemize{
|
|
| 31 |
#' \item USUBJID - character, unique subject ID |
|
| 32 |
#' \item ARM - character or factor, treatment indicator, column name does not have to be 'ARM'. User specify in |
|
| 33 |
#' \code{trt_var_ipd} and \code{trt_var_agd}
|
|
| 34 |
#' \item EVENT - numeric, 1 for censored/death, 0 for otherwise |
|
| 35 |
#' \item TIME - numeric column, observation time of the \code{EVENT}; unit in days
|
|
| 36 |
#' } |
|
| 37 |
#' |
|
| 38 |
#' @importFrom survival survfit Surv coxph |
|
| 39 |
#' @importFrom lmtest coeftest coefci |
|
| 40 |
#' @importFrom sandwich vcovHC |
|
| 41 |
#' @importFrom boot boot boot.ci |
|
| 42 |
#' @return A list, contains 'descriptive' and 'inferential' |
|
| 43 |
#' @example inst/examples/maic_unanchored_ex.R |
|
| 44 |
#' @example inst/examples/maic_unanchored_binary_ex.R |
|
| 45 |
#' @export |
|
| 46 | ||
| 47 |
maic_unanchored <- function(weights_object, |
|
| 48 |
ipd, |
|
| 49 |
pseudo_ipd, |
|
| 50 |
trt_ipd, |
|
| 51 |
trt_agd, |
|
| 52 |
trt_var_ipd = "ARM", |
|
| 53 |
trt_var_agd = "ARM", |
|
| 54 |
normalize_weights = FALSE, |
|
| 55 |
endpoint_type = "tte", |
|
| 56 |
endpoint_name = "Time to Event Endpoint", |
|
| 57 |
eff_measure = c("HR", "OR", "RR", "RD"),
|
|
| 58 |
boot_ci_type = c("norm", "basic", "stud", "perc", "bca"),
|
|
| 59 |
# time to event specific args |
|
| 60 |
time_scale = "months", |
|
| 61 |
km_conf_type = "log-log", |
|
| 62 |
# binary specific args |
|
| 63 |
binary_robust_cov_type = "HC3") {
|
|
| 64 |
# ==> Initial Setup ------------------------------------------ |
|
| 65 |
# ~~~ Create the hull for the output from this function |
|
| 66 | 6x |
res <- list( |
| 67 | 6x |
descriptive = list(), |
| 68 | 6x |
inferential = list() |
| 69 |
) |
|
| 70 | ||
| 71 | 6x |
res_AB_unadj <- res_AB <- list( |
| 72 | 6x |
est = NA, |
| 73 | 6x |
se = NA, |
| 74 | 6x |
ci_l = NA, |
| 75 | 6x |
ci_u = NA, |
| 76 | 6x |
pval = NA |
| 77 |
) |
|
| 78 | ||
| 79 |
# ~~~ Initial colname process and precheck on effect measure |
|
| 80 | 6x |
names(ipd) <- toupper(names(ipd)) |
| 81 | 6x |
names(pseudo_ipd) <- toupper(names(pseudo_ipd)) |
| 82 | 6x |
trt_var_ipd <- toupper(trt_var_ipd) |
| 83 | 6x |
trt_var_agd <- toupper(trt_var_agd) |
| 84 | ||
| 85 | ! |
if (length(eff_measure) > 1) eff_measure <- NULL |
| 86 | ! |
if (is.null(eff_measure)) eff_measure <- list(binary = "OR", tte = "HR")[[endpoint_type]] |
| 87 | ||
| 88 |
# ~~~ Setup ARM column and make related pre-checks |
|
| 89 | ! |
if (!trt_var_ipd %in% names(ipd)) stop("cannot find arm indicator column trt_var_ipd in ipd")
|
| 90 | ! |
if (!trt_var_agd %in% names(pseudo_ipd)) stop("cannot find arm indicator column trt_var_agd in pseudo_ipd")
|
| 91 | ! |
if (trt_var_ipd != "ARM") ipd$ARM <- ipd[[trt_var_ipd]] |
| 92 | ! |
if (trt_var_agd != "ARM") pseudo_ipd$ARM <- pseudo_ipd[[trt_var_agd]] |
| 93 | 6x |
ipd$ARM <- as.character(ipd$ARM) # just to avoid potential error when merging |
| 94 | 6x |
pseudo_ipd$ARM <- as.character(pseudo_ipd$ARM) # just to avoid potential error when merging |
| 95 | ! |
if (!trt_ipd %in% ipd$ARM) stop("trt_ipd does not exist in ipd$ARM")
|
| 96 | ! |
if (!trt_agd %in% pseudo_ipd$ARM) stop("trt_agd does not exist in pseudo_ipd$ARM")
|
| 97 | ||
| 98 |
# ~~~ More pre-checks |
|
| 99 | 6x |
endpoint_type <- match.arg(endpoint_type, c("binary", "tte"))
|
| 100 | 6x |
if (!"maicplus_estimate_weights" %in% class(weights_object)) {
|
| 101 | ! |
stop("weights_object should be an object returned by estimate_weights")
|
| 102 |
} |
|
| 103 | 6x |
if (any(duplicated(ipd$USUBJID))) {
|
| 104 | ! |
warning( |
| 105 | ! |
"check your ipd, it has duplicated usubjid, this indicates, ", |
| 106 | ! |
"it might contain multiple endpoints for each subject" |
| 107 |
) |
|
| 108 |
} |
|
| 109 | 6x |
if (!all(ipd$USUBJID %in% weights_object$data$USUBJID)) {
|
| 110 | ! |
stop( |
| 111 | ! |
"These pts in ipd cannot be found in weights_object ", |
| 112 | ! |
toString(setdiff(ipd$USUBJID, weights_object$USUBJID)) |
| 113 |
) |
|
| 114 |
} |
|
| 115 | 6x |
time_scale <- match.arg(arg = time_scale, choices = c("days", "weeks", "months", "years"))
|
| 116 | 6x |
if (endpoint_type == "binary") { # for binary effect measure
|
| 117 | ||
| 118 | ! |
if (any(!c("USUBJID", "RESPONSE") %in% names(ipd))) stop("ipd should have 'USUBJID', 'RESPONSE' columns at minimum")
|
| 119 | 4x |
eff_measure <- match.arg(eff_measure, choices = c("OR", "RD", "RR"), several.ok = FALSE)
|
| 120 | 4x |
binary_robust_cov_type <- match.arg( |
| 121 | 4x |
binary_robust_cov_type, |
| 122 | 4x |
choices = c("HC3", "const", "HC", "HC0", "HC1", "HC2", "HC4", "HC4m", "HC5")
|
| 123 |
) |
|
| 124 | 2x |
} else if (endpoint_type == "tte") { # for time to event effect measure
|
| 125 | ||
| 126 | 2x |
if (!all(c("USUBJID", "TIME", "EVENT", trt_var_ipd) %in% names(ipd))) {
|
| 127 | ! |
stop("ipd needs to include at least USUBJID, TIME, EVENT, ", trt_var_ipd)
|
| 128 |
} |
|
| 129 | 2x |
if (!all(c("TIME", "EVENT", trt_var_agd) %in% names(pseudo_ipd))) {
|
| 130 | ! |
stop("pseudo_ipd needs to include at least TIME, EVENT, ", trt_var_agd)
|
| 131 |
} |
|
| 132 | 2x |
eff_measure <- match.arg(eff_measure, choices = c("HR"), several.ok = FALSE)
|
| 133 |
} |
|
| 134 | 6x |
boot_ci_type <- match.arg(boot_ci_type) |
| 135 | ||
| 136 |
# ==> IPD and AgD data preparation ------------------------------------------ |
|
| 137 |
# : subset ipd, retain only ipd from interested trts |
|
| 138 | 6x |
ipd <- ipd[ipd$ARM == trt_ipd, , drop = TRUE] |
| 139 | 6x |
pseudo_ipd <- pseudo_ipd[pseudo_ipd$ARM == trt_agd, , drop = TRUE] |
| 140 | ||
| 141 |
# : assign weights to real and pseudo ipd |
|
| 142 | 6x |
if (normalize_weights) {
|
| 143 | ! |
ipd$weights <- weights_object$data$scaled_weights[match(ipd$USUBJID, weights_object$data$USUBJID)] |
| 144 |
} else {
|
|
| 145 | 6x |
ipd$weights <- weights_object$data$weights[match(ipd$USUBJID, weights_object$data$USUBJID)] |
| 146 |
} |
|
| 147 | 6x |
pseudo_ipd$weights <- 1 |
| 148 | ||
| 149 |
# : necessary formatting for pseudo ipd |
|
| 150 | 2x |
if (!"USUBJID" %in% names(pseudo_ipd)) pseudo_ipd$USUBJID <- paste0("ID", seq_len(nrow(pseudo_ipd)))
|
| 151 | 6x |
if ("RESPONSE" %in% names(pseudo_ipd) && is.logical(pseudo_ipd$RESPONSE)) {
|
| 152 | 4x |
pseudo_ipd$RESPONSE <- as.numeric(pseudo_ipd$RESPONSE) |
| 153 |
} |
|
| 154 | ||
| 155 |
# : give warning when individual pts in IPD has no weights |
|
| 156 | 6x |
if (any(is.na(ipd$weights))) {
|
| 157 | ! |
ipd <- ipd[!is.na(ipd$weights), , drop = FALSE] |
| 158 | ! |
warning( |
| 159 | ! |
paste( |
| 160 | ! |
"these usubjid in ipd have no weight in weights_object, and hence excluded from analysis:", |
| 161 | ! |
paste(ipd$USUBJID[is.na(ipd$weights)], collapse = ",") |
| 162 |
) |
|
| 163 |
) |
|
| 164 | ! |
if (nrow(ipd) == 0) stop("there is no pts with weight in IPD!!")
|
| 165 |
} |
|
| 166 | ||
| 167 |
# : retain necessary columns |
|
| 168 | 6x |
if (endpoint_type == "tte") {
|
| 169 | 2x |
retain_cols <- c("USUBJID", "ARM", "TIME", "EVENT", "weights")
|
| 170 |
} else {
|
|
| 171 | 4x |
retain_cols <- c("USUBJID", "ARM", "RESPONSE", "weights")
|
| 172 |
} |
|
| 173 | 6x |
ipd <- ipd[, retain_cols, drop = FALSE] |
| 174 | 6x |
pseudo_ipd <- pseudo_ipd[, retain_cols, drop = FALSE] |
| 175 | ||
| 176 |
# : merge real and pseudo ipds |
|
| 177 | 6x |
dat <- rbind(ipd, pseudo_ipd) |
| 178 | 6x |
dat$ARM <- factor(dat$ARM, levels = c(trt_agd, trt_ipd)) |
| 179 | ||
| 180 |
# ==> Inferential output ------------------------------------------ |
|
| 181 | ||
| 182 | 6x |
result <- if (endpoint_type == "tte") {
|
| 183 | 2x |
maic_unanchored_tte( |
| 184 | 2x |
res, res_AB, res_AB_unadj, dat, ipd, pseudo_ipd, km_conf_type, time_scale, |
| 185 | 2x |
weights_object, endpoint_name, normalize_weights, boot_ci_type, trt_ipd, trt_agd |
| 186 |
) |
|
| 187 | 6x |
} else if (endpoint_type == "binary") {
|
| 188 | 4x |
maic_unanchored_binary( |
| 189 | 4x |
res, res_AB, res_AB_unadj, dat, ipd, pseudo_ipd, binary_robust_cov_type, |
| 190 | 4x |
weights_object, endpoint_name, normalize_weights, eff_measure, boot_ci_type, trt_ipd, trt_agd |
| 191 |
) |
|
| 192 |
} else {
|
|
| 193 | ! |
stop("Endpoint type ", endpoint_type, " currently unsupported.")
|
| 194 |
} |
|
| 195 | ||
| 196 |
# output |
|
| 197 | 6x |
result |
| 198 |
} |
|
| 199 | ||
| 200 |
# MAIC inference functions for TTE outcome type ------------ |
|
| 201 | ||
| 202 |
maic_unanchored_tte <- function(res, |
|
| 203 |
res_AB, |
|
| 204 |
res_AB_unadj, |
|
| 205 |
dat, |
|
| 206 |
ipd, |
|
| 207 |
pseudo_ipd, |
|
| 208 |
km_conf_type, |
|
| 209 |
time_scale, |
|
| 210 |
weights_object, |
|
| 211 |
endpoint_name, |
|
| 212 |
normalize_weights, |
|
| 213 |
boot_ci_type, |
|
| 214 |
trt_ipd, |
|
| 215 |
trt_agd) {
|
|
| 216 |
# ~~~ Descriptive table before and after matching |
|
| 217 |
# : derive km w and w/o weights |
|
| 218 | 2x |
kmobj_dat <- survfit(Surv(TIME, EVENT) ~ ARM, dat, conf.type = km_conf_type) |
| 219 | 2x |
kmobj_dat_adj <- survfit(Surv(TIME, EVENT) ~ ARM, dat, weights = dat$weights, conf.type = km_conf_type) |
| 220 | 2x |
res$descriptive[["survfit_before"]] <- survfit_makeup(kmobj_dat) |
| 221 | 2x |
res$descriptive[["survfit_after"]] <- survfit_makeup(kmobj_dat_adj) |
| 222 |
# : derive median survival time |
|
| 223 | 2x |
medSurv_dat <- medSurv_makeup(kmobj_dat, legend = "Before matching", time_scale = time_scale) |
| 224 | 2x |
medSurv_dat_adj <- medSurv_makeup(kmobj_dat_adj, legend = "After matching", time_scale = time_scale) |
| 225 | 2x |
medSurv_out <- rbind(medSurv_dat, medSurv_dat_adj) |
| 226 | 2x |
medSurv_out <- cbind(trt_ind = c("B", "A")[match(medSurv_out$treatment, levels(dat$ARM))], medSurv_out)
|
| 227 | ||
| 228 | 2x |
res$descriptive[["summary"]] <- medSurv_out |
| 229 | ||
| 230 |
# ~~~ Analysis table (Cox model) before and after matching |
|
| 231 |
# : fit PH Cox regression model |
|
| 232 | 2x |
coxobj_dat <- coxph(Surv(TIME, EVENT) ~ ARM, dat) |
| 233 | 2x |
coxobj_dat_adj <- coxph(Surv(TIME, EVENT) ~ ARM, dat, weights = weights, robust = TRUE) |
| 234 | ||
| 235 |
# : derive adjusted estimate for ipd exp arm vs agd exp arm |
|
| 236 | 2x |
res_AB$est <- summary(coxobj_dat_adj)$conf.int[1] |
| 237 | 2x |
mu <- summary(coxobj_dat_adj)$coef[1] |
| 238 | 2x |
sig <- summary(coxobj_dat_adj)$coef[4] |
| 239 | 2x |
res_AB$se <- sqrt((exp(sig^2) - 1) * exp(2 * mu + sig^2)) # log normal parametrization |
| 240 | 2x |
res_AB$ci_l <- summary(coxobj_dat_adj)$conf.int[3] |
| 241 | 2x |
res_AB$ci_u <- summary(coxobj_dat_adj)$conf.int[4] |
| 242 | 2x |
res_AB$pval <- summary(coxobj_dat_adj)$coef[6] |
| 243 | ||
| 244 |
# : derive unadjusted estimate |
|
| 245 | 2x |
res_AB_unadj$est <- summary(coxobj_dat)$conf.int[1] |
| 246 | 2x |
mu <- summary(coxobj_dat)$coef[1] |
| 247 | 2x |
sig <- summary(coxobj_dat)$coef[3] |
| 248 | 2x |
res_AB_unadj$se <- sqrt((exp(sig^2) - 1) * exp(2 * mu + sig^2)) # log normal parametrization |
| 249 | 2x |
res_AB_unadj$ci_l <- summary(coxobj_dat)$conf.int[3] |
| 250 | 2x |
res_AB_unadj$ci_u <- summary(coxobj_dat)$conf.int[4] |
| 251 | 2x |
res_AB_unadj$pval <- summary(coxobj_dat)$coef[5] |
| 252 | ||
| 253 |
# : get bootstrapped estimates if applicable |
|
| 254 | 2x |
if (!is.null(weights_object$boot)) {
|
| 255 | 1x |
keep_rows <- setdiff(seq_len(nrow(weights_object$data)), weights_object$rows_with_missing) |
| 256 | 1x |
boot_ipd_id <- weights_object$data[keep_rows, "USUBJID", drop = FALSE] |
| 257 | ||
| 258 | 1x |
boot_ipd <- merge(boot_ipd_id, ipd, by = "USUBJID", all.x = TRUE) |
| 259 | ! |
if (nrow(boot_ipd) != nrow(boot_ipd_id)) stop("ipd has multiple observations for some patients")
|
| 260 | 1x |
boot_ipd <- boot_ipd[match(boot_ipd$USUBJID, boot_ipd_id$USUBJID), ] |
| 261 | ||
| 262 | 1x |
stat_fun <- function(data, index, w_obj, pseudo_ipd, normalize) {
|
| 263 | 501x |
boot_ipd <- data[index, ] |
| 264 | 501x |
r <- dynGet("r", ifnotfound = NA) # Get bootstrap iteration
|
| 265 | 501x |
if (!is.na(r)) {
|
| 266 | ! |
if (!all(index == w_obj$boot[, 1, r])) stop("Bootstrap and weight indices don't match")
|
| 267 | 500x |
boot_ipd$weights <- w_obj$boot[, 2, r] |
| 268 | ! |
if (normalize) boot_ipd$weights <- boot_ipd$weights / mean(boot_ipd$weights, na.rm = TRUE) |
| 269 |
} |
|
| 270 | 501x |
boot_dat <- rbind(boot_ipd, pseudo_ipd) |
| 271 | 501x |
boot_dat$ARM <- factor(boot_dat$ARM, levels = c(trt_agd, trt_ipd)) |
| 272 | 501x |
boot_coxobj_dat_adj <- coxph(Surv(TIME, EVENT) ~ ARM, boot_dat, weights = weights) |
| 273 | 501x |
c(est = coef(boot_coxobj_dat_adj)[1], var = vcov(boot_coxobj_dat_adj)[1, 1]) |
| 274 |
} |
|
| 275 | ||
| 276 |
# Revert seed to how it was for weight bootstrap sampling |
|
| 277 | 1x |
old_seed <- globalenv()$.Random.seed |
| 278 | 1x |
on.exit(suspendInterrupts(set_random_seed(old_seed))) |
| 279 | 1x |
set_random_seed(weights_object$boot_seed) |
| 280 | 1x |
R <- dim(weights_object$boot)[3] |
| 281 | ||
| 282 | 1x |
boot_res <- boot( |
| 283 | 1x |
boot_ipd, |
| 284 | 1x |
stat_fun, |
| 285 | 1x |
R = R, |
| 286 | 1x |
w_obj = weights_object, |
| 287 | 1x |
pseudo_ipd = pseudo_ipd, |
| 288 | 1x |
normalize = normalize_weights, |
| 289 | 1x |
strata = weights_object$boot_strata |
| 290 |
) |
|
| 291 | 1x |
boot_ci <- boot.ci(boot_res, type = boot_ci_type, w_obj = weights_object, pseudo_ipd = pseudo_ipd) |
| 292 | ||
| 293 | 1x |
l_u_index <- switch(boot_ci_type, |
| 294 | 1x |
"norm" = list(2, 3, "normal"), |
| 295 | 1x |
"basic" = list(4, 5, "basic"), |
| 296 | 1x |
"stud" = list(4, 5, "student"), |
| 297 | 1x |
"perc" = list(4, 5, "percent"), |
| 298 | 1x |
"bca" = list(4, 5, "bca") |
| 299 |
) |
|
| 300 | ||
| 301 | 1x |
transform_estimate <- exp |
| 302 | 1x |
boot_res_AB <- list( |
| 303 | 1x |
est = as.vector(transform_estimate(boot_res$t0[1])), |
| 304 | 1x |
se = NA, |
| 305 | 1x |
ci_l = transform_estimate(boot_ci[[l_u_index[[3]]]][l_u_index[[1]]]), |
| 306 | 1x |
ci_u = transform_estimate(boot_ci[[l_u_index[[3]]]][l_u_index[[2]]]), |
| 307 | 1x |
pval = NA |
| 308 |
) |
|
| 309 |
} else {
|
|
| 310 | 1x |
boot_res_AB <- NULL |
| 311 | 1x |
boot_res <- NULL |
| 312 |
} |
|
| 313 | ||
| 314 |
# : report all raw fitted obj |
|
| 315 | 2x |
res$inferential[["fit"]] <- list( |
| 316 | 2x |
km_before = kmobj_dat, |
| 317 | 2x |
km_after = kmobj_dat_adj, |
| 318 | 2x |
model_before = coxobj_dat, |
| 319 | 2x |
model_after = coxobj_dat_adj, |
| 320 | 2x |
res_AB = res_AB, |
| 321 | 2x |
res_AB_unadj = res_AB_unadj, |
| 322 | 2x |
boot_res = boot_res, |
| 323 | 2x |
boot_res_AB = boot_res_AB |
| 324 |
) |
|
| 325 | ||
| 326 |
# : compile HR result |
|
| 327 | 2x |
res$inferential[["summary"]] <- data.frame( |
| 328 | 2x |
case = c("AB", "adjusted_AB"),
|
| 329 | 2x |
HR = c(res_AB_unadj$est, res_AB$est), |
| 330 | 2x |
LCL = c(res_AB_unadj$ci_l, res_AB$ci_l), |
| 331 | 2x |
UCL = c(res_AB_unadj$ci_u, res_AB$ci_u), |
| 332 | 2x |
pval = c(res_AB_unadj$pval, res_AB$pval) |
| 333 |
) |
|
| 334 | ||
| 335 |
# output |
|
| 336 | 2x |
res |
| 337 |
} |
|
| 338 | ||
| 339 |
# MAIC inference functions for Binary outcome type ------------ |
|
| 340 | ||
| 341 |
maic_unanchored_binary <- function(res, |
|
| 342 |
res_AB, |
|
| 343 |
res_AB_unadj, |
|
| 344 |
dat, |
|
| 345 |
ipd, |
|
| 346 |
pseudo_ipd, |
|
| 347 |
binary_robust_cov_type, |
|
| 348 |
weights_object, |
|
| 349 |
endpoint_name, |
|
| 350 |
normalize_weights, |
|
| 351 |
eff_measure, |
|
| 352 |
boot_ci_type, |
|
| 353 |
trt_ipd, |
|
| 354 |
trt_agd) {
|
|
| 355 |
# ~~~ Analysis table |
|
| 356 |
# : set up proper link |
|
| 357 | 4x |
glm_link <- switch(eff_measure, |
| 358 | 4x |
"RD" = "identity", |
| 359 | 4x |
"RR" = "log", |
| 360 | 4x |
"OR" = "logit" |
| 361 |
) |
|
| 362 | 4x |
transform_estimate <- switch(eff_measure, |
| 363 | 4x |
"RD" = function(x) x * 100, |
| 364 | 4x |
"RR" = exp, |
| 365 | 4x |
"OR" = exp |
| 366 |
) |
|
| 367 | ||
| 368 |
# : fit glm for binary outcome and robust estimate with weights |
|
| 369 | 4x |
binobj_dat <- glm(RESPONSE ~ ARM, dat, family = binomial(link = glm_link)) |
| 370 | 4x |
binobj_dat_adj <- suppressWarnings(glm(RESPONSE ~ ARM, dat, weights = weights, family = binomial(link = glm_link))) |
| 371 | ||
| 372 | 4x |
bin_robust_cov <- sandwich::vcovHC(binobj_dat_adj, type = binary_robust_cov_type) |
| 373 | 4x |
bin_robust_coef <- lmtest::coeftest(binobj_dat_adj, vcov. = bin_robust_cov) |
| 374 | 4x |
bin_robust_ci <- lmtest::coefci(binobj_dat_adj, vcov. = bin_robust_cov) |
| 375 | ||
| 376 |
# : make general summary |
|
| 377 | 4x |
glmDesc_dat <- glm_makeup(binobj_dat, legend = "Before matching", weighted = FALSE) |
| 378 | 4x |
glmDesc_dat_adj <- glm_makeup(binobj_dat_adj, legend = "After matching", weighted = TRUE) |
| 379 | 4x |
glmDesc <- rbind(glmDesc_dat, glmDesc_dat_adj) |
| 380 | 4x |
glmDesc <- cbind(trt_ind = c("B", "A")[match(glmDesc$treatment, levels(dat$ARM))], glmDesc)
|
| 381 | 4x |
rownames(glmDesc) <- NULL |
| 382 | 4x |
res$descriptive[["summary"]] <- glmDesc |
| 383 | ||
| 384 |
# : derive adjusted estimate |
|
| 385 | 4x |
res_AB$est <- bin_robust_coef[2, "Estimate"] |
| 386 | 4x |
res_AB$se <- bin_robust_coef[2, "Std. Error"] |
| 387 | 4x |
res_AB$ci_l <- bin_robust_ci[2, "2.5 %"] |
| 388 | 4x |
res_AB$ci_u <- bin_robust_ci[2, "97.5 %"] |
| 389 | 4x |
res_AB$pval <- bin_robust_coef[2, "Pr(>|z|)"] |
| 390 | ||
| 391 |
# : derive unadjusted estimate |
|
| 392 | 4x |
binobj_dat_summary <- summary(binobj_dat) |
| 393 | 4x |
res_AB_unadj$est <- binobj_dat_summary$coefficients[2, "Estimate"] |
| 394 | 4x |
res_AB_unadj$se <- binobj_dat_summary$coefficients[2, "Std. Error"] |
| 395 | 4x |
res_AB_unadj$ci_l <- confint.default(binobj_dat)[2, "2.5 %"] |
| 396 | 4x |
res_AB_unadj$ci_u <- confint.default(binobj_dat)[2, "97.5 %"] |
| 397 | 4x |
res_AB_unadj$pval <- binobj_dat_summary$coefficients[2, "Pr(>|z|)"] |
| 398 | ||
| 399 |
# : transform |
|
| 400 | 4x |
if (eff_measure %in% c("RR", "OR")) {
|
| 401 | 3x |
res_AB <- transform_ratio(res_AB) |
| 402 | 3x |
res_AB_unadj <- transform_ratio(res_AB_unadj) |
| 403 | 1x |
} else if (eff_measure == "RD") {
|
| 404 | 1x |
res_AB <- transform_absolute(res_AB) |
| 405 | 1x |
res_AB_unadj <- transform_absolute(res_AB_unadj) |
| 406 |
} |
|
| 407 | ||
| 408 |
# : get bootstrapped estimates if applicable |
|
| 409 | 4x |
if (!is.null(weights_object$boot)) {
|
| 410 | 1x |
keep_rows <- setdiff(seq_len(nrow(weights_object$data)), weights_object$rows_with_missing) |
| 411 | 1x |
boot_ipd_id <- weights_object$data[keep_rows, "USUBJID", drop = FALSE] |
| 412 | ||
| 413 | 1x |
boot_ipd <- merge(boot_ipd_id, ipd, by = "USUBJID", all.x = TRUE) |
| 414 | ! |
if (nrow(boot_ipd) != nrow(boot_ipd_id)) stop("ipd has multiple observations for some patients")
|
| 415 | 1x |
boot_ipd <- boot_ipd[match(boot_ipd$USUBJID, boot_ipd_id$USUBJID), ] |
| 416 | ||
| 417 | 1x |
stat_fun <- function(data, index, w_obj, pseudo_ipd, normalize) {
|
| 418 | 21x |
boot_ipd <- data[index, ] |
| 419 | 21x |
r <- dynGet("r", ifnotfound = NA) # Get bootstrap iteration
|
| 420 | 21x |
if (!is.na(r)) {
|
| 421 | ! |
if (!all(index == w_obj$boot[, 1, r])) stop("Bootstrap and weight indices don't match")
|
| 422 | 20x |
boot_ipd$weights <- w_obj$boot[, 2, r] |
| 423 | ! |
if (normalize) boot_ipd$weights <- boot_ipd$weights / mean(boot_ipd$weights, na.rm = TRUE) |
| 424 |
} |
|
| 425 | 21x |
boot_dat <- rbind(boot_ipd, pseudo_ipd) |
| 426 | 21x |
boot_dat$ARM <- factor(boot_dat$ARM, levels = c(trt_agd, trt_ipd)) |
| 427 | 21x |
boot_binobj_dat_adj <- suppressWarnings( |
| 428 | 21x |
glm(RESPONSE ~ ARM, boot_dat, weights = weights, family = binomial(link = glm_link)) |
| 429 |
) |
|
| 430 | 21x |
c(est = coef(boot_binobj_dat_adj)[2], var = vcov(boot_binobj_dat_adj)[2, 2]) |
| 431 |
} |
|
| 432 | ||
| 433 |
# Revert seed to how it was for weight bootstrap sampling |
|
| 434 | 1x |
old_seed <- globalenv()$.Random.seed |
| 435 | 1x |
on.exit(suspendInterrupts(set_random_seed(old_seed))) |
| 436 | 1x |
set_random_seed(weights_object$boot_seed) |
| 437 | 1x |
R <- dim(weights_object$boot)[3] |
| 438 | 1x |
boot_res <- boot( |
| 439 | 1x |
boot_ipd, |
| 440 | 1x |
stat_fun, |
| 441 | 1x |
R = R, |
| 442 | 1x |
w_obj = weights_object, |
| 443 | 1x |
pseudo_ipd = pseudo_ipd, |
| 444 | 1x |
normalize = normalize_weights, |
| 445 | 1x |
strata = weights_object$boot_strata |
| 446 |
) |
|
| 447 | 1x |
boot_ci <- boot.ci(boot_res, type = boot_ci_type, w_obj = weights_object, pseudo_ipd = pseudo_ipd) |
| 448 | ||
| 449 | 1x |
l_u_index <- switch(boot_ci_type, |
| 450 | 1x |
"norm" = list(2, 3, "normal"), |
| 451 | 1x |
"basic" = list(4, 5, "basic"), |
| 452 | 1x |
"stud" = list(4, 5, "student"), |
| 453 | 1x |
"perc" = list(4, 5, "percent"), |
| 454 | 1x |
"bca" = list(4, 5, "bca") |
| 455 |
) |
|
| 456 | ||
| 457 | 1x |
boot_res_AB <- list( |
| 458 | 1x |
est = as.vector(transform_estimate(boot_res$t0[1])), |
| 459 | 1x |
se = NA, |
| 460 | 1x |
ci_l = transform_estimate(boot_ci[[l_u_index[[3]]]][l_u_index[[1]]]), |
| 461 | 1x |
ci_u = transform_estimate(boot_ci[[l_u_index[[3]]]][l_u_index[[2]]]), |
| 462 | 1x |
pval = NA |
| 463 |
) |
|
| 464 |
} else {
|
|
| 465 | 3x |
boot_res_AB <- NULL |
| 466 | 3x |
boot_res <- NULL |
| 467 |
} |
|
| 468 | ||
| 469 |
# : report all raw fitted obj |
|
| 470 | 4x |
res$inferential[["fit"]] <- list( |
| 471 | 4x |
model_before = binobj_dat, |
| 472 | 4x |
model_after = binobj_dat_adj, |
| 473 | 4x |
res_AB = res_AB, |
| 474 | 4x |
res_AB_unadj = res_AB_unadj, |
| 475 | 4x |
boot_res = boot_res, |
| 476 | 4x |
boot_res_AB = boot_res_AB |
| 477 |
) |
|
| 478 | ||
| 479 |
# : compile binary effect estimate result |
|
| 480 | 4x |
res$inferential[["summary"]] <- data.frame( |
| 481 | 4x |
case = c("AB", "adjusted_AB"),
|
| 482 | 4x |
EST = c( |
| 483 | 4x |
res_AB_unadj$est, |
| 484 | 4x |
res_AB$est |
| 485 |
), |
|
| 486 | 4x |
LCL = c( |
| 487 | 4x |
res_AB_unadj$ci_l, |
| 488 | 4x |
res_AB$ci_l |
| 489 |
), |
|
| 490 | 4x |
UCL = c( |
| 491 | 4x |
res_AB_unadj$ci_u, |
| 492 | 4x |
res_AB$ci_u |
| 493 |
), |
|
| 494 | 4x |
pval = c( |
| 495 | 4x |
res_AB_unadj$pval, |
| 496 | 4x |
res_AB$pval |
| 497 |
) |
|
| 498 |
) |
|
| 499 | 4x |
names(res$inferential[["summary"]])[2] <- eff_measure |
| 500 | ||
| 501 |
# : output |
|
| 502 | 4x |
res |
| 503 |
} |
| 1 |
#' Anchored MAIC for binary and time-to-event endpoint |
|
| 2 |
#' |
|
| 3 |
#' This is a wrapper function to provide adjusted effect estimates and relevant statistics in anchored case (i.e. there |
|
| 4 |
#' is a common comparator arm in the internal and external trial). |
|
| 5 |
#' |
|
| 6 |
#' @param weights_object an object returned by \code{estimate_weight}
|
|
| 7 |
#' @param ipd a data frame that meet format requirements in 'Details', individual patient data (IPD) of internal trial |
|
| 8 |
#' @param pseudo_ipd a data frame, pseudo IPD from digitized KM curve of external trial (for time-to-event endpoint) or |
|
| 9 |
#' from contingency table (for binary endpoint) |
|
| 10 |
#' @param trt_ipd a string, name of the interested investigation arm in internal trial \code{ipd} (internal IPD)
|
|
| 11 |
#' @param trt_agd a string, name of the interested investigation arm in external trial \code{pseudo_ipd} (pseudo IPD)
|
|
| 12 |
#' @param trt_common a string, name of the common comparator in internal and external trial |
|
| 13 |
#' @param trt_var_ipd a string, column name in \code{ipd} that contains the treatment assignment
|
|
| 14 |
#' @param trt_var_agd a string, column name in \code{ipd} that contains the treatment assignment
|
|
| 15 |
#' @param normalize_weights logical, default is \code{FALSE}. If \code{TRUE},
|
|
| 16 |
#' \code{scaled_weights} (normalized weights) in \code{weights_object$data} will be used.
|
|
| 17 |
#' @param endpoint_type a string, one out of the following "binary", "tte" (time to event) |
|
| 18 |
#' @param endpoint_name a string, name of time to event endpoint, to be show in the last line of title |
|
| 19 |
#' @param time_scale a string, time unit of median survival time, taking a value of 'years', 'months', 'weeks' or |
|
| 20 |
#' 'days'. NOTE: it is assumed that values in TIME column of \code{ipd} and \code{pseudo_ipd} is in the unit of days
|
|
| 21 |
#' @param km_conf_type a string, pass to \code{conf.type} of \code{survfit}
|
|
| 22 |
#' @param eff_measure a string, "RD" (risk difference), "OR" (odds ratio), "RR" (relative risk) for a binary endpoint; |
|
| 23 |
#' "HR" for a time-to-event endpoint. By default is \code{NULL}, "OR" is used for binary case, otherwise "HR" is used.
|
|
| 24 |
#' @param boot_ci_type a string, one of `c("norm","basic", "stud", "perc", "bca")` to select the type of bootstrap
|
|
| 25 |
#' confidence interval. See [boot::boot.ci] for more details. |
|
| 26 |
#' @param binary_robust_cov_type a string to pass to argument `type` of [sandwich::vcovHC], see possible options in the |
|
| 27 |
#' documentation of that function. Default is `"HC3"` |
|
| 28 |
#' |
|
| 29 |
#' @details It is required that input \code{ipd} and \code{pseudo_ipd} to have the following
|
|
| 30 |
#' columns. This function is not sensitive to upper or lower case of letters in column names. |
|
| 31 |
#' \itemize{
|
|
| 32 |
#' \item USUBJID - character, unique subject ID |
|
| 33 |
#' \item ARM - character or factor, treatment indicator, column name does not have to be 'ARM'. User specify in |
|
| 34 |
#' \code{trt_var_ipd} and \code{trt_var_agd}
|
|
| 35 |
#' } |
|
| 36 |
#' For time-to-event analysis, the follow columns are required: |
|
| 37 |
#' \itemize{
|
|
| 38 |
#' \item EVENT - numeric, `1` for censored/death, `0` otherwise |
|
| 39 |
#' \item TIME - numeric column, observation time of the \code{EVENT}; unit in days
|
|
| 40 |
#' } |
|
| 41 |
#' For binary outcomes: |
|
| 42 |
#' \itemize{
|
|
| 43 |
#' \item RESPONSE - numeric, `1` for event occurred, `0` otherwise |
|
| 44 |
#' } |
|
| 45 |
#' |
|
| 46 |
#' @importFrom survival survfit Surv coxph |
|
| 47 |
#' @importFrom lmtest coeftest coefci |
|
| 48 |
#' @importFrom sandwich vcovHC |
|
| 49 |
#' @importFrom boot boot boot.ci |
|
| 50 |
#' @return A list, contains 'descriptive' and 'inferential' |
|
| 51 |
#' @example inst/examples/maic_anchored_ex.R |
|
| 52 |
#' @example inst/examples/maic_anchored_binary_ex.R |
|
| 53 |
#' @export |
|
| 54 | ||
| 55 |
maic_anchored <- function(weights_object, |
|
| 56 |
ipd, |
|
| 57 |
pseudo_ipd, |
|
| 58 |
trt_ipd, |
|
| 59 |
trt_agd, |
|
| 60 |
trt_common, |
|
| 61 |
trt_var_ipd = "ARM", |
|
| 62 |
trt_var_agd = "ARM", |
|
| 63 |
normalize_weights = FALSE, |
|
| 64 |
endpoint_type = "tte", |
|
| 65 |
endpoint_name = "Time to Event Endpoint", |
|
| 66 |
eff_measure = c("HR", "OR", "RR", "RD"),
|
|
| 67 |
boot_ci_type = c("norm", "basic", "stud", "perc", "bca"),
|
|
| 68 |
# time to event specific args |
|
| 69 |
time_scale = "months", |
|
| 70 |
km_conf_type = "log-log", |
|
| 71 |
# binary specific args |
|
| 72 |
binary_robust_cov_type = "HC3") {
|
|
| 73 |
# ==> Initial Setup ------------------------------------------ |
|
| 74 |
# ~~~ Create the hull for the output from this function |
|
| 75 | 8x |
res <- list( |
| 76 | 8x |
descriptive = list(), |
| 77 | 8x |
inferential = list() |
| 78 |
) |
|
| 79 | ||
| 80 | 8x |
res_AB <- list( |
| 81 | 8x |
est = NA, |
| 82 | 8x |
se = NA, |
| 83 | 8x |
ci_l = NA, |
| 84 | 8x |
ci_u = NA, |
| 85 | 8x |
pval = NA |
| 86 |
) |
|
| 87 | ||
| 88 |
# ~~~ Initial colname process and precheck on effect measure |
|
| 89 | 8x |
names(ipd) <- toupper(names(ipd)) |
| 90 | 8x |
names(pseudo_ipd) <- toupper(names(pseudo_ipd)) |
| 91 | 8x |
trt_var_ipd <- toupper(trt_var_ipd) |
| 92 | 8x |
trt_var_agd <- toupper(trt_var_agd) |
| 93 | ! |
if (length(eff_measure) > 1) eff_measure <- NULL |
| 94 | ! |
if (is.null(eff_measure)) eff_measure <- list(binary = "OR", tte = "HR")[[endpoint_type]] |
| 95 | ||
| 96 |
# ~~~ Setup ARM column and make related pre-checks |
|
| 97 | ! |
if (!trt_var_ipd %in% names(ipd)) stop("cannot find arm indicator column trt_var_ipd in ipd")
|
| 98 | ! |
if (!trt_var_agd %in% names(pseudo_ipd)) stop("cannot find arm indicator column trt_var_agd in pseudo_ipd")
|
| 99 | ! |
if (trt_var_ipd != "ARM") ipd$ARM <- ipd[[trt_var_ipd]] |
| 100 | ! |
if (trt_var_agd != "ARM") pseudo_ipd$ARM <- pseudo_ipd[[trt_var_agd]] |
| 101 | 8x |
ipd$ARM <- as.character(ipd$ARM) # just to avoid potential error when merging |
| 102 | ||
| 103 |
# ~~~ More pre-checks |
|
| 104 | 8x |
pseudo_ipd$ARM <- as.character(pseudo_ipd$ARM) # just to avoid potential error when merging |
| 105 | ! |
if (!trt_ipd %in% ipd$ARM) stop("trt_ipd does not exist in ipd$ARM")
|
| 106 | ! |
if (!trt_agd %in% pseudo_ipd$ARM) stop("trt_agd does not exist in pseudo_ipd$ARM")
|
| 107 | ! |
if (!trt_common %in% ipd$ARM) stop("trt_common does not exist in ipd$ARM")
|
| 108 | ! |
if (!trt_common %in% pseudo_ipd$ARM) stop("trt_common does not exist in pseudo_ipd$ARM")
|
| 109 | 8x |
ipd_arms <- unique(ipd$ARM) |
| 110 | 8x |
pseudo_ipd_arms <- unique(pseudo_ipd$ARM) |
| 111 | 8x |
if (!length(ipd_arms) >= 2) {
|
| 112 | ! |
stop("In anchored case, there should be at least two arms in ipd, but you have: ", toString(ipd_arms))
|
| 113 |
} |
|
| 114 | 8x |
if (!length(pseudo_ipd_arms) >= 2) {
|
| 115 | ! |
stop("In anchored case, there should be at least two arms in pseudo_ipd, but you have: ", toString(pseudo_ipd_arms))
|
| 116 |
} |
|
| 117 | 8x |
endpoint_type <- match.arg(endpoint_type, c("binary", "tte"))
|
| 118 | 8x |
if (!"maicplus_estimate_weights" %in% class(weights_object)) {
|
| 119 | ! |
stop("weights_object should be an object returned by estimate_weights")
|
| 120 |
} |
|
| 121 | 8x |
if (any(duplicated(ipd$USUBJID))) {
|
| 122 | ! |
warning( |
| 123 | ! |
"check your ipd, it has duplicated usubjid, this indicates, ", |
| 124 | ! |
"it might contain multiple endpoints for each subject" |
| 125 |
) |
|
| 126 |
} |
|
| 127 | 8x |
if (!all(ipd$USUBJID %in% weights_object$data$USUBJID)) {
|
| 128 | ! |
stop( |
| 129 | ! |
"These patients in ipd cannot be found in weights_object ", |
| 130 | ! |
toString(setdiff(ipd$USUBJID, weights_object$USUBJID)) |
| 131 |
) |
|
| 132 |
} |
|
| 133 | 8x |
time_scale <- match.arg(arg = time_scale, choices = c("days", "weeks", "months", "years"))
|
| 134 | 8x |
if (endpoint_type == "binary") { # for binary effect measure
|
| 135 | ||
| 136 | ! |
if (any(!c("USUBJID", "RESPONSE") %in% names(ipd))) stop("ipd should have 'USUBJID', 'RESPONSE' columns at minimum")
|
| 137 | 5x |
eff_measure <- match.arg(eff_measure, choices = c("OR", "RD", "RR"), several.ok = FALSE)
|
| 138 | 3x |
} else if (endpoint_type == "tte") { # for time to event effect measure
|
| 139 | ||
| 140 | 3x |
if (!all(c("USUBJID", "TIME", "EVENT", trt_var_ipd) %in% names(ipd))) {
|
| 141 | ! |
stop("ipd needs to include at least USUBJID, TIME, EVENT, ", toString(trt_var_ipd))
|
| 142 |
} |
|
| 143 | 3x |
if (!all(c("TIME", "EVENT", trt_var_agd) %in% names(pseudo_ipd))) {
|
| 144 | ! |
stop("pseudo_ipd needs to include at least TIME, EVENT, ", toString(trt_var_agd))
|
| 145 |
} |
|
| 146 | 3x |
eff_measure <- match.arg(eff_measure, choices = c("HR"), several.ok = FALSE)
|
| 147 |
} |
|
| 148 | 8x |
boot_ci_type <- match.arg(boot_ci_type) |
| 149 | ||
| 150 |
# ==> IPD and AgD data preparation ------------------------------------------ |
|
| 151 |
# : subset ipd, retain only ipd from interested trts |
|
| 152 | 8x |
ipd <- ipd[ipd$ARM %in% c(trt_ipd, trt_common), , drop = TRUE] |
| 153 | 8x |
pseudo_ipd <- pseudo_ipd[pseudo_ipd$ARM %in% c(trt_agd, trt_common), , drop = TRUE] |
| 154 | ||
| 155 |
# : assign weights to real and pseudo ipd |
|
| 156 | 8x |
if (normalize_weights) {
|
| 157 | ! |
ipd$weights <- weights_object$data$scaled_weights[match(ipd$USUBJID, weights_object$data$USUBJID)] |
| 158 |
} else {
|
|
| 159 | 8x |
ipd$weights <- weights_object$data$weights[match(ipd$USUBJID, weights_object$data$USUBJID)] |
| 160 |
} |
|
| 161 | 8x |
pseudo_ipd$weights <- 1 |
| 162 | 3x |
if (!"USUBJID" %in% names(pseudo_ipd)) pseudo_ipd$USUBJID <- paste0("ID", seq_len(nrow(pseudo_ipd)))
|
| 163 | ||
| 164 |
# : give warning when individual pts in IPD has no weights |
|
| 165 | 8x |
if (any(is.na(ipd$weights))) {
|
| 166 | ! |
ipd <- ipd[!is.na(ipd$weights), , drop = FALSE] |
| 167 | ! |
warning( |
| 168 | ! |
"These USUBJID in IPD have no weight in weights_object and hence excluded from analysis: ", |
| 169 | ! |
toString(ipd$USUBJID[is.na(ipd$weights)]) |
| 170 |
) |
|
| 171 | ! |
if (nrow(ipd) == 0) stop("There are no patients with valid weights in IPD!")
|
| 172 |
} |
|
| 173 | ||
| 174 |
# : retain necessary columns |
|
| 175 | 8x |
outcome_cols <- if (endpoint_type == "tte") c("TIME", "EVENT") else "RESPONSE"
|
| 176 | 8x |
retain_cols <- c("USUBJID", "ARM", outcome_cols, "weights")
|
| 177 | ||
| 178 | 8x |
ipd <- ipd[, retain_cols, drop = FALSE] |
| 179 | 8x |
pseudo_ipd <- pseudo_ipd[, retain_cols, drop = FALSE] |
| 180 | ||
| 181 |
# : merge real and pseudo ipds, only used if apply contrast method, |
|
| 182 |
# since contrast method is not implemented in v0.1, this R obj is not used |
|
| 183 |
# just a placeholder |
|
| 184 | 8x |
dat <- rbind(ipd, pseudo_ipd) |
| 185 | ||
| 186 |
# : setup ARM column as a factor, |
|
| 187 |
# * these line cannot be move prior to "dat <- rbind(ipd, pseudo_ipd)" |
|
| 188 | 8x |
ipd$ARM <- factor(ipd$ARM, levels = c(trt_common, trt_ipd)) |
| 189 | 8x |
pseudo_ipd$ARM <- factor(pseudo_ipd$ARM, levels = c(trt_common, trt_agd)) |
| 190 | 8x |
dat$ARM <- factor(dat$ARM, levels = c(trt_common, trt_agd, trt_ipd)) |
| 191 | ||
| 192 |
# ==> Inferential output ------------------------------------------ |
|
| 193 | 8x |
result <- if (endpoint_type == "tte") {
|
| 194 | 3x |
maic_anchored_tte( |
| 195 | 3x |
res, |
| 196 | 3x |
res_BC = NULL, |
| 197 | 3x |
dat, |
| 198 | 3x |
ipd, |
| 199 | 3x |
pseudo_ipd, |
| 200 | 3x |
km_conf_type, |
| 201 | 3x |
time_scale, |
| 202 | 3x |
weights_object, |
| 203 | 3x |
endpoint_name, |
| 204 | 3x |
normalize_weights, |
| 205 | 3x |
trt_ipd, |
| 206 | 3x |
trt_agd, |
| 207 | 3x |
boot_ci_type |
| 208 |
) |
|
| 209 | 8x |
} else if (endpoint_type == "binary") {
|
| 210 | 5x |
maic_anchored_binary( |
| 211 | 5x |
res, |
| 212 | 5x |
res_BC = NULL, |
| 213 | 5x |
dat, |
| 214 | 5x |
ipd, |
| 215 | 5x |
pseudo_ipd, |
| 216 | 5x |
binary_robust_cov_type, |
| 217 | 5x |
weights_object, |
| 218 | 5x |
endpoint_name, |
| 219 | 5x |
normalize_weights, |
| 220 | 5x |
eff_measure, |
| 221 | 5x |
trt_ipd, |
| 222 | 5x |
trt_agd, |
| 223 | 5x |
boot_ci_type |
| 224 |
) |
|
| 225 |
} else {
|
|
| 226 | ! |
stop("Endpoint type ", endpoint_type, " currently unsupported.")
|
| 227 |
} |
|
| 228 | ||
| 229 | 8x |
result |
| 230 |
} |
|
| 231 | ||
| 232 | ||
| 233 |
# MAIC inference functions for TTE outcome type ------------ |
|
| 234 |
maic_anchored_tte <- function(res, |
|
| 235 |
res_BC = NULL, |
|
| 236 |
dat, |
|
| 237 |
ipd, |
|
| 238 |
pseudo_ipd, |
|
| 239 |
km_conf_type, |
|
| 240 |
time_scale, |
|
| 241 |
weights_object, |
|
| 242 |
endpoint_name, |
|
| 243 |
normalize_weights, |
|
| 244 |
trt_ipd, |
|
| 245 |
trt_agd, |
|
| 246 |
boot_ci_type) {
|
|
| 247 |
# Analysis table (Cox model) before and after matching, incl Median Survival Time |
|
| 248 |
# derive km w and w/o weights |
|
| 249 | 3x |
kmobj_ipd <- survfit(Surv(TIME, EVENT) ~ ARM, ipd, conf.type = km_conf_type) |
| 250 | 3x |
kmobj_ipd_adj <- survfit(Surv(TIME, EVENT) ~ ARM, ipd, weights = ipd$weights, conf.type = km_conf_type) |
| 251 | 3x |
kmobj_agd <- survfit(Surv(TIME, EVENT) ~ ARM, pseudo_ipd, conf.type = km_conf_type) |
| 252 | ||
| 253 | 3x |
res$descriptive[["survfit_ipd_before"]] <- survfit_makeup(kmobj_ipd) |
| 254 | 3x |
res$descriptive[["survfit_ipd_after"]] <- survfit_makeup(kmobj_ipd_adj) |
| 255 | 3x |
res$descriptive[["survfit_pseudo"]] <- survfit_makeup(kmobj_agd) |
| 256 |
# derive median survival time |
|
| 257 | 3x |
medSurv_ipd <- medSurv_makeup(kmobj_ipd, legend = "IPD, before matching", time_scale = time_scale) |
| 258 | 3x |
medSurv_ipd_adj <- medSurv_makeup(kmobj_ipd_adj, legend = "IPD, after matching", time_scale = time_scale) |
| 259 | 3x |
medSurv_agd <- medSurv_makeup(kmobj_agd, legend = "AgD, external", time_scale = time_scale) |
| 260 | 3x |
medSurv_out <- rbind(medSurv_ipd, medSurv_ipd_adj, medSurv_agd) |
| 261 | 3x |
medSurv_out <- cbind(medSurv_out[, 1:6], |
| 262 | 3x |
`events%` = medSurv_out$events * 100 / medSurv_out$n.max, |
| 263 | 3x |
medSurv_out[7:ncol(medSurv_out)] |
| 264 |
) |
|
| 265 | 3x |
medSurv_out <- cbind(trt_ind = c("C", "B", "A")[match(medSurv_out$treatment, levels(dat$ARM))], medSurv_out)
|
| 266 | ||
| 267 | 3x |
res$descriptive[["summary"]] <- medSurv_out |
| 268 | ||
| 269 |
# ~~~ Analysis table (Cox model) before and after matching |
|
| 270 |
# fit PH Cox regression model |
|
| 271 | 3x |
coxobj_ipd <- coxph(Surv(TIME, EVENT) ~ ARM, ipd) # robust = TRUE or not makes a diff |
| 272 | 3x |
coxobj_ipd_adj <- coxph(Surv(TIME, EVENT) ~ ARM, ipd, weights = weights, robust = TRUE) |
| 273 | 3x |
coxobj_agd <- coxph(Surv(TIME, EVENT) ~ ARM, pseudo_ipd) |
| 274 | ||
| 275 |
# derive ipd exp arm vs agd exp arm via bucher |
|
| 276 | 3x |
res_AC_unadj <- as.list(summary(coxobj_ipd)$coef)[c(1, 3)] # est, se |
| 277 | 3x |
res_AC <- as.list(summary(coxobj_ipd_adj)$coef)[c(1, 4)] # est, robust se |
| 278 | 3x |
if (is.null(res_BC)) res_BC <- as.list(summary(coxobj_agd)$coef)[c(1, 3)] # est, se |
| 279 | ||
| 280 | 3x |
names(res_AC_unadj) <- names(res_AC) <- names(res_BC) <- c("est", "se")
|
| 281 | ||
| 282 | 3x |
coxobj_ipd_summary <- summary(coxobj_ipd) |
| 283 | 3x |
res_AC_unadj$ci_l <- coxobj_ipd_summary$conf.int[3] |
| 284 | 3x |
res_AC_unadj$ci_u <- coxobj_ipd_summary$conf.int[4] |
| 285 | 3x |
res_AC_unadj$pval <- as.vector(coxobj_ipd_summary$waldtest[3]) |
| 286 | ||
| 287 | 3x |
coxobj_ipd_adj_summary <- summary(coxobj_ipd_adj) |
| 288 | 3x |
res_AC$ci_l <- coxobj_ipd_adj_summary$conf.int[3] |
| 289 | 3x |
res_AC$ci_u <- coxobj_ipd_adj_summary$conf.int[4] |
| 290 | 3x |
res_AC$pval <- as.vector(coxobj_ipd_adj_summary$waldtest[3]) |
| 291 | ||
| 292 | 3x |
coxobj_agd_summary <- summary(coxobj_agd) |
| 293 | 3x |
res_BC$ci_l <- coxobj_agd_summary$conf.int[3] |
| 294 | 3x |
res_BC$ci_u <- coxobj_agd_summary$conf.int[4] |
| 295 | 3x |
res_BC$pval <- as.vector(coxobj_agd_summary$waldtest[3]) |
| 296 | ||
| 297 | 3x |
res_AB <- bucher(res_AC, res_BC, conf_lv = 0.95) |
| 298 | 3x |
res_AB_unadj <- bucher(res_AC_unadj, res_BC, conf_lv = 0.95) |
| 299 | ||
| 300 |
# : get bootstrapped estimates if applicable |
|
| 301 | 3x |
if (!is.null(weights_object$boot)) {
|
| 302 | 1x |
keep_rows <- setdiff(seq_len(nrow(weights_object$data)), weights_object$rows_with_missing) |
| 303 | 1x |
boot_ipd_id <- weights_object$data[keep_rows, "USUBJID", drop = FALSE] |
| 304 | ||
| 305 | 1x |
boot_ipd <- merge(boot_ipd_id, ipd, by = "USUBJID", all.x = TRUE) |
| 306 | ! |
if (nrow(boot_ipd) != nrow(boot_ipd_id)) stop("ipd has multiple observations for some patients")
|
| 307 | 1x |
boot_ipd <- boot_ipd[match(boot_ipd$USUBJID, boot_ipd_id$USUBJID), ] |
| 308 | ||
| 309 | 1x |
stat_fun <- function(data, index, w_obj, normalize) {
|
| 310 | 6x |
r <- dynGet("r", ifnotfound = NA) # Get bootstrap iteration
|
| 311 | 6x |
if (!is.na(r)) {
|
| 312 | ! |
if (!all(index == w_obj$boot[, 1, r])) stop("Bootstrap and weight indices don't match")
|
| 313 | 5x |
boot_ipd <- data[w_obj$boot[, 1, r], ] |
| 314 | 5x |
boot_ipd$weights <- w_obj$boot[, 2, r] |
| 315 | ||
| 316 | 5x |
if (normalize) {
|
| 317 | ! |
boot_ipd$weights <- ave( |
| 318 | ! |
boot_ipd$weights, |
| 319 | ! |
boot_ipd$ARM, |
| 320 | ! |
FUN = function(w) w / mean(w, na.rm = TRUE) |
| 321 |
) |
|
| 322 |
} |
|
| 323 |
} |
|
| 324 | 6x |
boot_coxobj_dat_adj <- coxph(Surv(TIME, EVENT) ~ ARM, boot_ipd, weights = boot_ipd$weights, robust = TRUE) |
| 325 | 6x |
boot_res_AC <- list(est = coef(boot_coxobj_dat_adj)[1], se = sqrt(vcov(boot_coxobj_dat_adj)[1, 1])) |
| 326 |
# temp method to source in variance of BC in AgD via monte carlo, may be removed in future |
|
| 327 | 6x |
res_BC_mc <- res_BC |
| 328 | 6x |
res_BC_mc$est <- rnorm(1, mean = res_BC$est, sd = res_BC$se) |
| 329 | 6x |
boot_res_AB <- bucher(boot_res_AC, res_BC_mc, conf_lv = 0.95) |
| 330 | 6x |
c( |
| 331 | 6x |
est_AB = boot_res_AB$est, |
| 332 | 6x |
var_AB = boot_res_AB$se^2, |
| 333 | 6x |
se_AB = boot_res_AB$se, |
| 334 | 6x |
est_AC = boot_res_AC$est, |
| 335 | 6x |
se_AC = boot_res_AC$se, |
| 336 | 6x |
var_AC = boot_res_AC$se^2 |
| 337 |
) |
|
| 338 |
} |
|
| 339 | ||
| 340 |
# Revert seed to how it was for weight bootstrap sampling |
|
| 341 | 1x |
old_seed <- globalenv()$.Random.seed |
| 342 | 1x |
on.exit(suspendInterrupts(set_random_seed(old_seed))) |
| 343 | 1x |
set_random_seed(weights_object$boot_seed) |
| 344 | ||
| 345 | 1x |
R <- dim(weights_object$boot)[3] |
| 346 | 1x |
boot_res <- boot( |
| 347 | 1x |
boot_ipd, |
| 348 | 1x |
stat_fun, |
| 349 | 1x |
R = R, |
| 350 | 1x |
w_obj = weights_object, |
| 351 | 1x |
normalize = normalize_weights, |
| 352 | 1x |
strata = weights_object$boot_strata |
| 353 |
) |
|
| 354 | 1x |
boot_ci <- boot.ci(boot_res, type = boot_ci_type, w_obj = weights_object, normalize = normalize_weights) |
| 355 | ||
| 356 | 1x |
l_u_index <- switch(boot_ci_type, |
| 357 | 1x |
"norm" = list(2, 3, "normal"), |
| 358 | 1x |
"basic" = list(4, 5, "basic"), |
| 359 | 1x |
"stud" = list(4, 5, "student"), |
| 360 | 1x |
"perc" = list(4, 5, "percent"), |
| 361 | 1x |
"bca" = list(4, 5, "bca") |
| 362 |
) |
|
| 363 | ||
| 364 |
# boot results for A v B, method 1 (maybe retired in future version) |
|
| 365 | 1x |
boot_res_AB <- list( |
| 366 | 1x |
est = as.vector(exp(boot_res$t0[1])), |
| 367 | 1x |
se = NA, |
| 368 | 1x |
ci_l = exp(boot_ci[[l_u_index[[3]]]][l_u_index[[1]]]), |
| 369 | 1x |
ci_u = exp(boot_ci[[l_u_index[[3]]]][l_u_index[[2]]]), |
| 370 | 1x |
pval = NA |
| 371 |
) |
|
| 372 | ||
| 373 |
# boot results for A v C |
|
| 374 | 1x |
boot_ci_ac <- boot.ci(boot_res, type = boot_ci_type, w_obj = weights_object, index = c(4, 6)) |
| 375 | 1x |
boot_res_AC <- list( |
| 376 | 1x |
est = as.vector(exp(boot_res$t0[4])), |
| 377 | 1x |
se = NA, |
| 378 | 1x |
ci_l = exp(boot_ci_ac[[l_u_index[[3]]]][l_u_index[[1]]]), |
| 379 | 1x |
ci_u = exp(boot_ci_ac[[l_u_index[[3]]]][l_u_index[[2]]]), |
| 380 | 1x |
pval = NA |
| 381 |
) |
|
| 382 | ||
| 383 |
# boot results for A v B, method 2 |
|
| 384 | 1x |
boot_res_AC2 <- list( |
| 385 | 1x |
est = as.vector(boot_res$t0[4]), |
| 386 | 1x |
se = NA, |
| 387 | 1x |
ci_l = boot_ci_ac[[l_u_index[[3]]]][l_u_index[[1]]], |
| 388 | 1x |
ci_u = boot_ci_ac[[l_u_index[[3]]]][l_u_index[[2]]], |
| 389 | 1x |
pval = NA |
| 390 |
) |
|
| 391 | 1x |
boot_res_AC2$se <- find_SE_from_CI(boot_res_AC2$ci_l, boot_res_AC2$ci_u, 0.95, log = FALSE) |
| 392 | 1x |
boot_res_AB2 <- bucher(boot_res_AC2, res_BC, conf_lv = 0.95) |
| 393 | 1x |
boot_res_AB2 <- list( |
| 394 | 1x |
est = exp(boot_res_AB2$est), |
| 395 | 1x |
se = NA, |
| 396 | 1x |
ci_l = exp(boot_res_AB2$ci_l), |
| 397 | 1x |
ci_u = exp(boot_res_AB2$ci_u), |
| 398 | 1x |
pval = NA |
| 399 |
) |
|
| 400 |
} else {
|
|
| 401 | 2x |
boot_res <- NULL |
| 402 | 2x |
boot_res_AB <- NULL |
| 403 | 2x |
boot_res_AB2 <- NULL |
| 404 | 2x |
boot_res_AC <- NULL |
| 405 |
} |
|
| 406 | ||
| 407 |
# transform |
|
| 408 | 3x |
res_AB$est <- exp(res_AB$est) |
| 409 | 3x |
res_AB$ci_l <- exp(res_AB$ci_l) |
| 410 | 3x |
res_AB$ci_u <- exp(res_AB$ci_u) |
| 411 | 3x |
res_AB_unadj$est <- exp(res_AB_unadj$est) |
| 412 | 3x |
res_AB_unadj$ci_l <- exp(res_AB_unadj$ci_l) |
| 413 | 3x |
res_AB_unadj$ci_u <- exp(res_AB_unadj$ci_u) |
| 414 | ||
| 415 | 3x |
res_AC$est <- exp(res_AC$est) |
| 416 | 3x |
res_AC_unadj$est <- exp(res_AC_unadj$est) |
| 417 | 3x |
res_BC$est <- exp(res_BC$est) |
| 418 | ||
| 419 |
# : report all raw fitted obj |
|
| 420 | 3x |
res$inferential[["fit"]] <- list( |
| 421 | 3x |
km_before_ipd = kmobj_ipd, |
| 422 | 3x |
km_after_ipd = kmobj_ipd_adj, |
| 423 | 3x |
km_agd = kmobj_agd, |
| 424 | 3x |
model_before_ipd = coxobj_ipd, |
| 425 | 3x |
model_after_ipd = coxobj_ipd_adj, |
| 426 | 3x |
model_agd = coxobj_agd, |
| 427 | 3x |
res_AC = res_AC, |
| 428 | 3x |
res_AC_unadj = res_AC_unadj, |
| 429 | 3x |
res_BC = res_BC, |
| 430 | 3x |
res_AB = res_AB, |
| 431 | 3x |
res_AB_unadj = res_AB_unadj, |
| 432 | 3x |
boot_res = boot_res, |
| 433 | 3x |
boot_res_AC = boot_res_AC, |
| 434 | 3x |
boot_res_AB_mc = boot_res_AB, |
| 435 | 3x |
boot_res_AB = boot_res_AB2 |
| 436 |
) |
|
| 437 | ||
| 438 |
# : compile HR result |
|
| 439 | 3x |
res$inferential[["summary"]] <- data.frame( |
| 440 | 3x |
case = c("AC", "adjusted_AC", "BC", "AB", "adjusted_AB"),
|
| 441 | 3x |
HR = c( |
| 442 | 3x |
summary(coxobj_ipd)$conf.int[1], |
| 443 | 3x |
summary(coxobj_ipd_adj)$conf.int[1], |
| 444 | 3x |
summary(coxobj_agd)$conf.int[1], |
| 445 | 3x |
res_AB_unadj$est, res_AB$est |
| 446 |
), |
|
| 447 | 3x |
LCL = c( |
| 448 | 3x |
summary(coxobj_ipd)$conf.int[3], |
| 449 | 3x |
summary(coxobj_ipd_adj)$conf.int[3], |
| 450 | 3x |
summary(coxobj_agd)$conf.int[3], |
| 451 | 3x |
res_AB_unadj$ci_l, res_AB$ci_l |
| 452 |
), |
|
| 453 | 3x |
UCL = c( |
| 454 | 3x |
summary(coxobj_ipd)$conf.int[4], |
| 455 | 3x |
summary(coxobj_ipd_adj)$conf.int[4], |
| 456 | 3x |
summary(coxobj_agd)$conf.int[4], |
| 457 | 3x |
res_AB_unadj$ci_u, res_AB$ci_u |
| 458 |
), |
|
| 459 | 3x |
pval = c( |
| 460 | 3x |
summary(coxobj_ipd)$waldtest[3], |
| 461 | 3x |
summary(coxobj_ipd_adj)$waldtest[3], |
| 462 | 3x |
summary(coxobj_agd)$waldtest[3], |
| 463 | 3x |
res_AB_unadj$pval, res_AB$pval |
| 464 |
) |
|
| 465 |
) |
|
| 466 | ||
| 467 |
# output |
|
| 468 | 3x |
res |
| 469 |
} |
|
| 470 | ||
| 471 |
# MAIC inference functions for Binary outcome type ------------ |
|
| 472 |
maic_anchored_binary <- function(res, |
|
| 473 |
res_BC = NULL, |
|
| 474 |
dat, |
|
| 475 |
ipd, |
|
| 476 |
pseudo_ipd, |
|
| 477 |
binary_robust_cov_type, |
|
| 478 |
weights_object, |
|
| 479 |
endpoint_name, |
|
| 480 |
normalize_weights, |
|
| 481 |
eff_measure, |
|
| 482 |
trt_ipd, |
|
| 483 |
trt_agd, |
|
| 484 |
boot_ci_type) {
|
|
| 485 |
# ~~~ Analysis table |
|
| 486 |
# : set up proper link |
|
| 487 | 5x |
glm_link <- switch(eff_measure, |
| 488 | 5x |
"RD" = "identity", |
| 489 | 5x |
"RR" = "log", |
| 490 | 5x |
"OR" = "logit" |
| 491 |
) |
|
| 492 | 5x |
res_template <- list( |
| 493 | 5x |
est = NA, |
| 494 | 5x |
se = NA, |
| 495 | 5x |
ci_l = NA, |
| 496 | 5x |
ci_u = NA, |
| 497 | 5x |
pval = NA |
| 498 |
) |
|
| 499 | ||
| 500 |
# : fit glm for binary outcome and robust estimate with weights |
|
| 501 | 5x |
binobj_ipd <- glm(RESPONSE ~ ARM, ipd, family = binomial(link = glm_link)) |
| 502 | 5x |
binobj_ipd_adj <- suppressWarnings(glm(RESPONSE ~ ARM, ipd, weights = weights, family = binomial(link = glm_link))) |
| 503 | 5x |
binobj_agd <- glm(RESPONSE ~ ARM, pseudo_ipd, family = binomial(link = glm_link)) |
| 504 | ||
| 505 | 5x |
bin_robust_cov <- sandwich::vcovHC(binobj_ipd_adj, type = binary_robust_cov_type) |
| 506 | 5x |
bin_robust_coef <- lmtest::coeftest(binobj_ipd_adj, vcov. = bin_robust_cov) |
| 507 | 5x |
bin_robust_ci <- lmtest::coefci(binobj_ipd_adj, vcov. = bin_robust_cov) |
| 508 | ||
| 509 |
# : make general summary |
|
| 510 | 5x |
glmDesc_ipd <- glm_makeup(binobj_ipd, legend = "IPD, before matching", weighted = FALSE) |
| 511 | 5x |
glmDesc_ipd_adj <- glm_makeup(binobj_ipd_adj, legend = "IPD, after matching", weighted = TRUE) |
| 512 | 5x |
glmDesc_agd <- glm_makeup(binobj_agd, legend = "AgD, external", weighted = FALSE) |
| 513 | 5x |
glmDesc <- rbind(glmDesc_ipd, glmDesc_ipd_adj, glmDesc_agd) |
| 514 | 5x |
glmDesc <- cbind(trt_ind = c("C", "B", "A")[match(glmDesc$treatment, levels(dat$ARM))], glmDesc)
|
| 515 | 5x |
rownames(glmDesc) <- NULL |
| 516 | 5x |
res$descriptive[["summary"]] <- glmDesc |
| 517 | ||
| 518 |
# derive ipd exp arm vs agd exp arm via bucher |
|
| 519 | 5x |
res_AC <- res_template |
| 520 | 5x |
res_AC$est <- bin_robust_coef[2, "Estimate"] |
| 521 | 5x |
res_AC$se <- bin_robust_coef[2, "Std. Error"] |
| 522 | 5x |
res_AC$ci_l <- bin_robust_ci[2, "2.5 %"] |
| 523 | 5x |
res_AC$ci_u <- bin_robust_ci[2, "97.5 %"] |
| 524 | 5x |
res_AC$pval <- bin_robust_coef[2, "Pr(>|z|)"] |
| 525 | ||
| 526 |
# unadjusted AC |
|
| 527 | 5x |
res_AC_unadj <- res_template |
| 528 | 5x |
res_AC_unadj$est <- summary(binobj_ipd)$coefficients[2, "Estimate"] |
| 529 | 5x |
res_AC_unadj$se <- summary(binobj_ipd)$coefficients[2, "Std. Error"] |
| 530 | 5x |
res_AC_unadj$ci_l <- confint.default(binobj_ipd)[2, "2.5 %"] |
| 531 | 5x |
res_AC_unadj$ci_u <- confint.default(binobj_ipd)[2, "97.5 %"] |
| 532 | 5x |
res_AC_unadj$pval <- summary(binobj_ipd)$coefficients[2, "Pr(>|z|)"] |
| 533 | ||
| 534 |
# BC |
|
| 535 | 5x |
if (is.null(res_BC)) {
|
| 536 | 5x |
res_BC <- res_template |
| 537 | 5x |
res_BC$est <- summary(binobj_agd)$coefficients[2, "Estimate"] |
| 538 | 5x |
res_BC$se <- summary(binobj_agd)$coefficients[2, "Std. Error"] |
| 539 | 5x |
res_BC$ci_l <- confint.default(binobj_agd)[2, "2.5 %"] |
| 540 | 5x |
res_BC$ci_u <- confint.default(binobj_agd)[2, "97.5 %"] |
| 541 | 5x |
res_BC$pval <- summary(binobj_agd)$coefficients[2, "Pr(>|z|)"] |
| 542 |
} |
|
| 543 | ||
| 544 |
# derive AB |
|
| 545 | 5x |
res_AB <- bucher(res_AC, res_BC, conf_lv = 0.95) |
| 546 | 5x |
res_AB_unadj <- bucher(res_AC_unadj, res_BC, conf_lv = 0.95) |
| 547 | ||
| 548 |
# : get bootstrapped estimates if applicable |
|
| 549 | 5x |
if (!is.null(weights_object$boot)) {
|
| 550 | 1x |
keep_rows <- setdiff(seq_len(nrow(weights_object$data)), weights_object$rows_with_missing) |
| 551 | 1x |
boot_ipd_id <- weights_object$data[keep_rows, "USUBJID", drop = FALSE] |
| 552 | ||
| 553 | 1x |
boot_ipd <- merge(boot_ipd_id, ipd, by = "USUBJID", all.x = TRUE) |
| 554 | ! |
if (nrow(boot_ipd) != nrow(boot_ipd_id)) stop("ipd has multiple observations for some patients")
|
| 555 | 1x |
boot_ipd <- boot_ipd[match(boot_ipd$USUBJID, boot_ipd_id$USUBJID), ] |
| 556 | ||
| 557 | 1x |
stat_fun <- function(data, index, w_obj, eff_measure, normalize) {
|
| 558 | 21x |
r <- dynGet("r", ifnotfound = NA) # Get bootstrap iteration
|
| 559 | 21x |
if (!is.na(r)) {
|
| 560 | ! |
if (!all(index == w_obj$boot[, 1, r])) stop("Bootstrap and weight indices don't match")
|
| 561 | 20x |
boot_ipd <- data[w_obj$boot[, 1, r], ] |
| 562 | 20x |
boot_ipd$weights <- w_obj$boot[, 2, r] |
| 563 | ||
| 564 | 20x |
if (normalize) {
|
| 565 | ! |
boot_ipd$weights <- ave( |
| 566 | ! |
boot_ipd$weights, |
| 567 | ! |
boot_ipd$ARM, |
| 568 | ! |
FUN = function(w) w / mean(w, na.rm = TRUE) |
| 569 |
) |
|
| 570 |
} |
|
| 571 |
} |
|
| 572 | ||
| 573 | 21x |
boot_binobj_dat_adj <- suppressWarnings( |
| 574 | 21x |
glm(RESPONSE ~ ARM, boot_ipd, weights = boot_ipd$weights, family = binomial(link = glm_link)) |
| 575 |
) |
|
| 576 | 21x |
boot_AC_est <- coef(boot_binobj_dat_adj)[2] |
| 577 | 21x |
boot_AC_var <- vcov(boot_binobj_dat_adj)[2, 2] |
| 578 | ||
| 579 | 21x |
boot_res_AC <- list(est = boot_AC_est, se = sqrt(boot_AC_var)) |
| 580 |
# temp method to source in variance of BC in AgD via monte carlo, may be removed in future |
|
| 581 | 21x |
res_BC_mc <- res_BC |
| 582 | 21x |
res_BC_mc$est <- rnorm(1, mean = res_BC$est, sd = res_BC$se) |
| 583 | ||
| 584 | 21x |
boot_res_AB <- bucher(boot_res_AC, res_BC_mc, conf_lv = 0.95) |
| 585 | ||
| 586 | 21x |
c( |
| 587 | 21x |
est_AB = boot_res_AB$est, |
| 588 | 21x |
var_AB = boot_res_AB$se^2, |
| 589 | 21x |
se_AB = boot_res_AB$se, |
| 590 | 21x |
est_AC = boot_res_AC$est, |
| 591 | 21x |
se_AC = boot_res_AC$se, |
| 592 | 21x |
var_AC = boot_res_AC$se^2 |
| 593 |
) |
|
| 594 |
} |
|
| 595 | ||
| 596 |
# Revert seed to how it was for weight bootstrap sampling |
|
| 597 | 1x |
old_seed <- globalenv()$.Random.seed |
| 598 | 1x |
on.exit(suspendInterrupts(set_random_seed(old_seed))) |
| 599 | 1x |
set_random_seed(weights_object$boot_seed) |
| 600 | ||
| 601 | 1x |
R <- dim(weights_object$boot)[3] |
| 602 | 1x |
boot_res <- boot( |
| 603 | 1x |
boot_ipd, |
| 604 | 1x |
stat_fun, |
| 605 | 1x |
R = R, |
| 606 | 1x |
w_obj = weights_object, |
| 607 | 1x |
eff_measure = eff_measure, |
| 608 | 1x |
normalize = normalize_weights, |
| 609 | 1x |
strata = weights_object$boot_strata |
| 610 |
) |
|
| 611 | 1x |
boot_ci <- boot.ci(boot_res, type = boot_ci_type, w_obj = weights_object) |
| 612 | ||
| 613 | 1x |
l_u_index <- switch(boot_ci_type, |
| 614 | 1x |
"norm" = list(2, 3, "normal"), |
| 615 | 1x |
"basic" = list(4, 5, "basic"), |
| 616 | 1x |
"stud" = list(4, 5, "student"), |
| 617 | 1x |
"perc" = list(4, 5, "percent"), |
| 618 | 1x |
"bca" = list(4, 5, "bca") |
| 619 |
) |
|
| 620 | ||
| 621 | 1x |
transform_estimate <- switch(eff_measure, |
| 622 | 1x |
"RD" = function(x) x * 100, |
| 623 | 1x |
"RR" = exp, |
| 624 | 1x |
"OR" = exp |
| 625 |
) |
|
| 626 | ||
| 627 |
# boot results for A v B, method 1 (maybe retired in future version) |
|
| 628 | 1x |
boot_res_AB <- list( |
| 629 | 1x |
est = as.vector(transform_estimate(boot_res$t0[1])), |
| 630 | 1x |
se = NA, |
| 631 | 1x |
ci_l = transform_estimate(boot_ci[[l_u_index[[3]]]][l_u_index[[1]]]), |
| 632 | 1x |
ci_u = transform_estimate(boot_ci[[l_u_index[[3]]]][l_u_index[[2]]]), |
| 633 | 1x |
pval = NA |
| 634 |
) |
|
| 635 | ||
| 636 |
# boot results for A v C |
|
| 637 | 1x |
boot_ci_ac <- boot.ci(boot_res, type = boot_ci_type, w_obj = weights_object, index = c(4, 6)) |
| 638 | 1x |
boot_res_AC <- list( |
| 639 | 1x |
est = as.vector(transform_estimate(boot_res$t0[4])), |
| 640 | 1x |
se = NA, |
| 641 | 1x |
ci_l = transform_estimate(boot_ci_ac[[l_u_index[[3]]]][l_u_index[[1]]]), |
| 642 | 1x |
ci_u = transform_estimate(boot_ci_ac[[l_u_index[[3]]]][l_u_index[[2]]]), |
| 643 | 1x |
pval = NA |
| 644 |
) |
|
| 645 | ||
| 646 |
# boot results for A v B, method 2 |
|
| 647 | 1x |
boot_res_AC2 <- list( |
| 648 | 1x |
est = as.vector(boot_res$t0[4]), |
| 649 | 1x |
se = NA, |
| 650 | 1x |
ci_l = boot_ci_ac[[l_u_index[[3]]]][l_u_index[[1]]], |
| 651 | 1x |
ci_u = boot_ci_ac[[l_u_index[[3]]]][l_u_index[[2]]], |
| 652 | 1x |
pval = NA |
| 653 |
) |
|
| 654 | 1x |
boot_res_AC2$se <- find_SE_from_CI(boot_res_AC2$ci_l, boot_res_AC2$ci_u, 0.95, log = FALSE) |
| 655 | 1x |
boot_res_AB2 <- bucher(boot_res_AC2, res_BC, conf_lv = 0.95) |
| 656 | 1x |
boot_res_AB2 <- list( |
| 657 | 1x |
est = transform_estimate(boot_res_AB2$est), |
| 658 | 1x |
se = NA, |
| 659 | 1x |
ci_l = transform_estimate(boot_res_AB2$ci_l), |
| 660 | 1x |
ci_u = transform_estimate(boot_res_AB2$ci_u), |
| 661 | 1x |
pval = NA |
| 662 |
) |
|
| 663 |
} else {
|
|
| 664 | 4x |
boot_res_AC <- NULL |
| 665 | 4x |
boot_res_AB <- NULL |
| 666 | 4x |
boot_res_AB2 <- NULL |
| 667 | 4x |
boot_res <- NULL |
| 668 |
} |
|
| 669 | ||
| 670 |
# transform effect measures |
|
| 671 | 5x |
if (eff_measure %in% c("RR", "OR")) {
|
| 672 | 4x |
res_AB <- transform_ratio(res_AB) |
| 673 | 4x |
res_AB_unadj <- transform_ratio(res_AB_unadj) |
| 674 | 4x |
res_AC <- transform_ratio(res_AC) |
| 675 | 4x |
res_AC_unadj <- transform_ratio(res_AC_unadj) |
| 676 | 4x |
res_BC <- transform_ratio(res_BC) |
| 677 | 1x |
} else if (eff_measure == "RD") {
|
| 678 | 1x |
res_AB <- transform_absolute(res_AB) |
| 679 | 1x |
res_AB_unadj <- transform_absolute(res_AB_unadj) |
| 680 | 1x |
res_AC <- transform_absolute(res_AC) |
| 681 | 1x |
res_AC_unadj <- transform_absolute(res_AC_unadj) |
| 682 | 1x |
res_BC <- transform_absolute(res_BC) |
| 683 |
} |
|
| 684 | ||
| 685 | ||
| 686 |
# report all raw fitted obj |
|
| 687 | 5x |
res$inferential[["fit"]] <- list( |
| 688 | 5x |
model_before_ipd = binobj_ipd, |
| 689 | 5x |
model_after_ipd = binobj_ipd_adj, |
| 690 | 5x |
model_agd = binobj_agd, |
| 691 | 5x |
res_AC = res_AC, |
| 692 | 5x |
res_AC_unadj = res_AC_unadj, |
| 693 | 5x |
res_BC = res_BC, |
| 694 | 5x |
res_AB = res_AB, |
| 695 | 5x |
res_AB_unadj = res_AB_unadj, |
| 696 | 5x |
boot_res = boot_res, |
| 697 | 5x |
boot_res_AC = boot_res_AC, |
| 698 | 5x |
boot_res_AB_mc = boot_res_AB, |
| 699 | 5x |
boot_res_AB = boot_res_AB2 |
| 700 |
) |
|
| 701 | ||
| 702 |
# compile binary effect estimate result |
|
| 703 | 5x |
res$inferential[["summary"]] <- data.frame( |
| 704 | 5x |
case = c("AC", "adjusted_AC", "BC", "AB", "adjusted_AB"),
|
| 705 | 5x |
EST = c( |
| 706 | 5x |
res_AC_unadj$est, |
| 707 | 5x |
res_AC$est, |
| 708 | 5x |
res_BC$est, |
| 709 | 5x |
res_AB_unadj$est, |
| 710 | 5x |
res_AB$est |
| 711 |
), |
|
| 712 | 5x |
LCL = c( |
| 713 | 5x |
res_AC_unadj$ci_l, |
| 714 | 5x |
res_AC$ci_l, |
| 715 | 5x |
res_BC$ci_l, |
| 716 | 5x |
res_AB_unadj$ci_l, |
| 717 | 5x |
res_AB$ci_l |
| 718 |
), |
|
| 719 | 5x |
UCL = c( |
| 720 | 5x |
res_AC_unadj$ci_u, |
| 721 | 5x |
res_AC$ci_u, |
| 722 | 5x |
res_BC$ci_u, |
| 723 | 5x |
res_AB_unadj$ci_u, |
| 724 | 5x |
res_AB$ci_u |
| 725 |
), |
|
| 726 | 5x |
pval = c( |
| 727 | 5x |
res_AC_unadj$pval, |
| 728 | 5x |
res_AC$pval, |
| 729 | 5x |
res_BC$pval, |
| 730 | 5x |
res_AB_unadj$pval, |
| 731 | 5x |
res_AB$pval |
| 732 |
) |
|
| 733 |
) |
|
| 734 | 5x |
names(res$inferential[["summary"]])[2] <- eff_measure |
| 735 | ||
| 736 |
# output |
|
| 737 | 5x |
res |
| 738 |
} |
| 1 |
#' Kaplan Meier (KM) plot function for anchored and unanchored cases |
|
| 2 |
#' |
|
| 3 |
#' It is wrapper function of \code{basic_kmplot}. The argument setting is similar to \code{maic_anchored} and
|
|
| 4 |
#' \code{maic_unanchored}, and it is used in those two functions.
|
|
| 5 |
#' |
|
| 6 |
#' @param weights_object an object returned by \code{estimate_weight}
|
|
| 7 |
#' @param tte_ipd a data frame of individual patient data (IPD) of internal trial, contain at least `"USUBJID"`, |
|
| 8 |
#' `"EVENT"`, `"TIME"` columns and a column indicating treatment assignment |
|
| 9 |
#' @param tte_pseudo_ipd a data frame of pseudo IPD by digitized KM curves of external trial (for time-to-event |
|
| 10 |
#' endpoint), contain at least `"EVENT"`, `"TIME"` |
|
| 11 |
#' @param trt_ipd a string, name of the interested investigation arm in internal trial \code{dat_igd} (real IPD)
|
|
| 12 |
#' @param trt_agd a string, name of the interested investigation arm in external trial \code{dat_pseudo} (pseudo IPD)
|
|
| 13 |
#' @param trt_common a string, name of the common comparator in internal and external trial, by default is NULL, |
|
| 14 |
#' indicating unanchored case |
|
| 15 |
#' @param trt_var_ipd a string, column name in \code{tte_ipd} that contains the treatment assignment
|
|
| 16 |
#' @param trt_var_agd a string, column name in \code{tte_pseudo_ipd} that contains the treatment assignment
|
|
| 17 |
#' @param normalize_weights logical, default is \code{FALSE}. If \code{TRUE},
|
|
| 18 |
#' \code{scaled_weights} (normalized weights) in \code{weights_object$data} will be used.
|
|
| 19 |
#' @param km_conf_type a string, pass to \code{conf.type} of \code{survfit}
|
|
| 20 |
#' @param km_layout a string, only applicable for unanchored case (\code{trt_common = NULL}), indicated the desired
|
|
| 21 |
#' layout of output KM curve. |
|
| 22 |
#' @param ... other arguments in \code{basic_kmplot}
|
|
| 23 |
#' |
|
| 24 |
#' @return In unanchored case, a KM plot with risk set table. In anchored case, depending on \code{km_layout},
|
|
| 25 |
#' \itemize{
|
|
| 26 |
#' \item if "by_trial", 2 by 1 plot, first all KM curves (incl. weighted) in IPD trial, and then KM curves in AgD |
|
| 27 |
#' trial, with risk set table. |
|
| 28 |
#' \item if "by_arm", 2 by 1 plot, first KM curves of \code{trt_agd} and \code{trt_ipd} (with and without weights),
|
|
| 29 |
#' and then KM curves of \code{trt_common} in AgD trial and IPD trial (with and without weights). Risk set table is
|
|
| 30 |
#' appended. |
|
| 31 |
#' \item if "all", 2 by 2 plot, all plots in "by_trial" and "by_arm" without risk set table appended. |
|
| 32 |
#' } |
|
| 33 |
#' @example inst/examples/kmplot_unanchored_ex.R |
|
| 34 |
#' @example inst/examples/kmplot_anchored_ex.R |
|
| 35 |
#' @export |
|
| 36 | ||
| 37 |
kmplot <- function(weights_object, |
|
| 38 |
tte_ipd, |
|
| 39 |
tte_pseudo_ipd, |
|
| 40 |
trt_ipd, |
|
| 41 |
trt_agd, |
|
| 42 |
trt_common = NULL, |
|
| 43 |
normalize_weights = FALSE, |
|
| 44 |
trt_var_ipd = "ARM", |
|
| 45 |
trt_var_agd = "ARM", |
|
| 46 |
km_conf_type = "log-log", |
|
| 47 |
km_layout = c("all", "by_trial", "by_arm"),
|
|
| 48 |
...) {
|
|
| 49 | 8x |
names(tte_ipd) <- toupper(names(tte_ipd)) |
| 50 | 8x |
names(tte_pseudo_ipd) <- toupper(names(tte_pseudo_ipd)) |
| 51 | 8x |
trt_var_ipd <- toupper(trt_var_ipd) |
| 52 | 8x |
trt_var_agd <- toupper(trt_var_agd) |
| 53 | ||
| 54 |
# pre check |
|
| 55 | 8x |
if (!"maicplus_estimate_weights" %in% class(weights_object)) {
|
| 56 | ! |
stop("weights_object should be an object returned by estimate_weights")
|
| 57 |
} |
|
| 58 | 8x |
if (!all(c("USUBJID", "TIME", "EVENT", trt_var_ipd) %in% names(tte_ipd))) {
|
| 59 | ! |
stop("tte_ipd needs to include at least USUBJID, TIME, EVENT, ", trt_var_ipd)
|
| 60 |
} |
|
| 61 | 8x |
if (!all(c("TIME", "EVENT", trt_var_agd) %in% names(tte_pseudo_ipd))) {
|
| 62 | ! |
stop("tte_pseudo_ipd needs to include at least TIME, EVENT, ", trt_var_agd)
|
| 63 |
} |
|
| 64 | 8x |
km_layout <- match.arg(km_layout, choices = c("all", "by_trial", "by_arm"), several.ok = FALSE)
|
| 65 | ||
| 66 |
# preparing data |
|
| 67 | 8x |
is_anchored <- !is.null(trt_common) |
| 68 | 8x |
tte_ipd <- tte_ipd[tte_ipd[[trt_var_ipd]] %in% c(trt_ipd, trt_common), , drop = FALSE] |
| 69 | 8x |
tte_pseudo_ipd <- tte_pseudo_ipd[tte_pseudo_ipd[[trt_var_agd]] %in% c(trt_agd, trt_common), , drop = FALSE] |
| 70 | ||
| 71 | 8x |
if (normalize_weights) {
|
| 72 | 2x |
tte_ipd$weights <- weights_object$data$scaled_weights[match(weights_object$data$USUBJID, tte_ipd$USUBJID)] |
| 73 |
} else {
|
|
| 74 | 6x |
tte_ipd$weights <- weights_object$data$weights[match(weights_object$data$USUBJID, tte_ipd$USUBJID)] |
| 75 |
} |
|
| 76 | 8x |
tte_pseudo_ipd$weights <- 1 |
| 77 | ||
| 78 |
# generate plot |
|
| 79 | 8x |
if (!is_anchored) {
|
| 80 |
## unanchored case |
|
| 81 | ! |
kmobj_B <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_agd)),
|
| 82 | ! |
data = tte_pseudo_ipd, |
| 83 | ! |
conf.type = km_conf_type |
| 84 |
) |
|
| 85 | ! |
kmobj_A <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_ipd)),
|
| 86 | ! |
data = tte_ipd, |
| 87 | ! |
conf.type = km_conf_type |
| 88 |
) |
|
| 89 | ! |
kmobj_A_adj <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_ipd)),
|
| 90 | ! |
data = tte_ipd, |
| 91 | ! |
conf.type = km_conf_type, |
| 92 | ! |
weights = weights |
| 93 |
) |
|
| 94 | ||
| 95 | ! |
kmdat <- do.call( |
| 96 | ! |
rbind, |
| 97 | ! |
c( |
| 98 | ! |
survfit_makeup(kmobj_B, trt_agd), |
| 99 | ! |
survfit_makeup(kmobj_A, trt_ipd), |
| 100 | ! |
survfit_makeup(kmobj_A_adj, paste(trt_ipd, "(weighted)")) |
| 101 |
) |
|
| 102 |
) |
|
| 103 | ! |
kmdat$treatment <- factor(kmdat$treatment, levels = unique(kmdat$treatment)) |
| 104 | ||
| 105 | ! |
basic_kmplot(kmdat, |
| 106 | ! |
show_risk_set = TRUE, |
| 107 | ! |
main_title = "Kaplan-Meier Curves", |
| 108 | ! |
subplot_heights = NULL, |
| 109 | ! |
suppress_plot_layout = FALSE, |
| 110 |
... |
|
| 111 |
) |
|
| 112 |
} else {
|
|
| 113 |
# anchored case |
|
| 114 |
# - agd trial km data |
|
| 115 | 8x |
kmobj_C_S1 <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_agd)),
|
| 116 | 8x |
data = tte_pseudo_ipd, |
| 117 | 8x |
conf.type = km_conf_type, |
| 118 | 8x |
subset = eval(parse(text = paste0("(tte_pseudo_ipd$", trt_var_agd, " == '", trt_common, "')")))
|
| 119 |
) |
|
| 120 | 8x |
kmobj_B_S1 <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_agd)),
|
| 121 | 8x |
data = tte_pseudo_ipd, |
| 122 | 8x |
conf.type = km_conf_type, |
| 123 | 8x |
subset = eval(parse(text = paste0("(tte_pseudo_ipd$", trt_var_agd, " == '", trt_agd, "')")))
|
| 124 |
) |
|
| 125 |
# - ipd trial km data |
|
| 126 | 8x |
kmobj_C_S2 <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_ipd)),
|
| 127 | 8x |
data = tte_ipd, |
| 128 | 8x |
conf.type = km_conf_type, |
| 129 | 8x |
subset = eval(parse(text = paste0("(tte_ipd$", trt_var_ipd, " == '", trt_common, "')")))
|
| 130 |
) |
|
| 131 | 8x |
kmobj_A_S2 <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_ipd)),
|
| 132 | 8x |
data = tte_ipd, |
| 133 | 8x |
conf.type = km_conf_type, |
| 134 | 8x |
subset = eval(parse(text = paste0("(tte_ipd$", trt_var_ipd, " == '", trt_ipd, "')")))
|
| 135 |
) |
|
| 136 |
# - ipd trial km data with weights |
|
| 137 | 8x |
kmobj_Cadj_S2 <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_ipd)),
|
| 138 | 8x |
data = tte_ipd, |
| 139 | 8x |
conf.type = km_conf_type, |
| 140 | 8x |
weights = weights, |
| 141 | 8x |
subset = eval(parse(text = paste0("(tte_ipd$", trt_var_ipd, " == '", trt_common, "')")))
|
| 142 |
) |
|
| 143 | 8x |
kmobj_Aadj_S2 <- survfit(as.formula(paste("Surv(TIME, EVENT) ~", trt_var_ipd)),
|
| 144 | 8x |
data = tte_ipd, |
| 145 | 8x |
conf.type = km_conf_type, |
| 146 | 8x |
weights = weights, |
| 147 | 8x |
subset = eval(parse(text = paste0("(tte_ipd$", trt_var_ipd, " == '", trt_ipd, "')")))
|
| 148 |
) |
|
| 149 |
# - plotdat for layout by trial |
|
| 150 | 8x |
kmdat_s2 <- do.call( |
| 151 | 8x |
rbind, |
| 152 | 8x |
c( |
| 153 | 8x |
survfit_makeup(kmobj_C_S2, trt_common), |
| 154 | 8x |
survfit_makeup(kmobj_A_S2, trt_ipd), |
| 155 | 8x |
survfit_makeup(kmobj_Aadj_S2, paste(trt_ipd, "(weighted)")), |
| 156 | 8x |
survfit_makeup(kmobj_Cadj_S2, paste(trt_common, "(weighted)")) |
| 157 |
) |
|
| 158 |
) |
|
| 159 | 8x |
kmdat_s2$treatment <- factor(kmdat_s2$treatment, levels = unique(kmdat_s2$treatment)) |
| 160 | 8x |
kmdat_s1 <- do.call( |
| 161 | 8x |
rbind, |
| 162 | 8x |
c( |
| 163 | 8x |
survfit_makeup(kmobj_C_S1, trt_common), |
| 164 | 8x |
survfit_makeup(kmobj_B_S1, trt_agd) |
| 165 |
) |
|
| 166 |
) |
|
| 167 | 8x |
kmdat_s1$treatment <- factor(kmdat_s1$treatment, levels = unique(kmdat_s1$treatment)) |
| 168 |
# - plotdat for layout by arm |
|
| 169 | 8x |
kmdat_a2 <- do.call( |
| 170 | 8x |
rbind, |
| 171 | 8x |
c( |
| 172 | 8x |
survfit_makeup(kmobj_B_S1, trt_agd), |
| 173 | 8x |
survfit_makeup(kmobj_A_S2, trt_ipd), |
| 174 | 8x |
survfit_makeup(kmobj_Aadj_S2, paste(trt_ipd, "(weighted)")) |
| 175 |
) |
|
| 176 |
) |
|
| 177 | 8x |
kmdat_a2$treatment <- factor(kmdat_a2$treatment, levels = unique(kmdat_a2$treatment)) |
| 178 | 8x |
kmdat_a1 <- do.call( |
| 179 | 8x |
rbind, |
| 180 | 8x |
c( |
| 181 | 8x |
survfit_makeup(kmobj_C_S1, paste(trt_common, "(AgD)")), |
| 182 | 8x |
survfit_makeup(kmobj_C_S2, paste(trt_common, "(IPD)")), |
| 183 | 8x |
survfit_makeup(kmobj_Cadj_S2, paste(trt_common, "(IPD,weighted)")) |
| 184 |
) |
|
| 185 |
) |
|
| 186 | 8x |
kmdat_a1$treatment <- factor(kmdat_a1$treatment, levels = unique(kmdat_a1$treatment)) |
| 187 | ||
| 188 | ||
| 189 |
# make plot depending on the layout |
|
| 190 | 8x |
if (km_layout == "by_trial") {
|
| 191 |
# 1 by 2 plot, each plot is per trial |
|
| 192 | 2x |
subplot_heights <- c(7, 0.7 + 2 * 0.7, 0.8) |
| 193 | 2x |
layout_mat <- matrix(1:4, ncol = 2) |
| 194 | 2x |
layout(layout_mat, heights = subplot_heights) |
| 195 | ||
| 196 | 2x |
basic_kmplot(kmdat_s2, |
| 197 | 2x |
main_title = paste0("Kaplan-Meier Curves \n(", trt_ipd, " vs ", trt_common, ") in the IPD trial"),
|
| 198 | 2x |
suppress_plot_layout = TRUE, ... |
| 199 |
) |
|
| 200 | ||
| 201 | 2x |
basic_kmplot(kmdat_s1, |
| 202 | 2x |
main_title = paste0("Kaplan-Meier Curves \n(", trt_agd, " vs ", trt_common, ") in the AgD trial"),
|
| 203 | 2x |
suppress_plot_layout = TRUE, ... |
| 204 |
) |
|
| 205 | 6x |
} else if (km_layout == "by_arm") {
|
| 206 |
# 1 by 2 plot, by 1 is for investigational arm, the other is for common comparator |
|
| 207 | 2x |
subplot_heights <- c(7, 0.7 + 2 * 0.7, 0.8) |
| 208 | 2x |
layout_mat <- matrix(1:4, ncol = 2) |
| 209 | 2x |
layout(layout_mat, heights = subplot_heights) |
| 210 | ||
| 211 | 2x |
basic_kmplot(kmdat_a2, |
| 212 | 2x |
main_title = paste0("Kaplan-Meier Curves \n(", trt_ipd, " vs ", trt_agd, ")"),
|
| 213 | 2x |
suppress_plot_layout = TRUE, ... |
| 214 |
) |
|
| 215 | ||
| 216 | 2x |
basic_kmplot(kmdat_a1, |
| 217 | 2x |
main_title = paste0("Kaplan-Meier Curves of Common Comparator \n", trt_common, "(IPD vs AgD Trial)"),
|
| 218 | 2x |
suppress_plot_layout = TRUE, ... |
| 219 |
) |
|
| 220 |
} else {
|
|
| 221 |
# 2 by 2 plot, combine by trial and by arm |
|
| 222 | 4x |
layout_mat <- matrix(1:4, ncol = 2, byrow = TRUE) |
| 223 | 4x |
layout(layout_mat) |
| 224 | ||
| 225 | 4x |
basic_kmplot(kmdat_s2, |
| 226 | 4x |
main_title = paste0("Kaplan-Meier Curves \n(", trt_ipd, " vs ", trt_common, ") in the IPD trial"),
|
| 227 | 4x |
show_risk_set = FALSE, |
| 228 | 4x |
suppress_plot_layout = TRUE, ... |
| 229 |
) |
|
| 230 | ||
| 231 | 4x |
basic_kmplot(kmdat_s1, |
| 232 | 4x |
main_title = paste0("Kaplan-Meier Curves \n(", trt_agd, " vs ", trt_common, ") in the AgD trial"),
|
| 233 | 4x |
show_risk_set = FALSE, |
| 234 | 4x |
suppress_plot_layout = TRUE, ... |
| 235 |
) |
|
| 236 | ||
| 237 | 4x |
basic_kmplot(kmdat_a2, |
| 238 | 4x |
main_title = paste0("Kaplan-Meier Curves \n(", trt_ipd, " vs ", trt_agd, ")"),
|
| 239 | 4x |
show_risk_set = FALSE, |
| 240 | 4x |
suppress_plot_layout = TRUE, ... |
| 241 |
) |
|
| 242 | ||
| 243 | 4x |
basic_kmplot(kmdat_a1, |
| 244 | 4x |
main_title = paste0("Kaplan-Meier Curves of Common Comparator \n", trt_common, "(IPD vs AgD Trial)"),
|
| 245 | 4x |
show_risk_set = FALSE, |
| 246 | 4x |
suppress_plot_layout = TRUE, ... |
| 247 |
) |
|
| 248 |
} |
|
| 249 |
} |
|
| 250 | 8x |
invisible(NULL) |
| 251 |
} |
|
| 252 | ||
| 253 | ||
| 254 |
#' Basic Kaplan Meier (KM) plot function |
|
| 255 |
#' |
|
| 256 |
#' This function can generate a basic KM plot with or without risk set table appended at the bottom. In a single plot, |
|
| 257 |
#' it can include up to 4 KM curves. This depends on number of levels in 'treatment' column in the input data.frame |
|
| 258 |
#' \code{kmdat}
|
|
| 259 |
#' |
|
| 260 |
#' @param kmdat a `data.frame`, must consist `treatment`, `time` (unit in days), `n.risk`, `censor`, `surv`, similar to |
|
| 261 |
#' an output from \code{maicplus:::survfit_makeup}
|
|
| 262 |
#' @param endpoint_name a string, name of time to event endpoint, to be show in the last line of title |
|
| 263 |
#' @param time_scale a string, time unit of median survival time, taking a value of 'years', 'months', 'weeks' or 'days' |
|
| 264 |
#' @param time_grid a numeric vector in the unit of \code{time_scale}, risk set table and x axis of the km plot will be
|
|
| 265 |
#' defined based on this time grid |
|
| 266 |
#' @param show_risk_set logical, show risk set table or not, TRUE by default |
|
| 267 |
#' @param main_title a string, main title of the KM plot |
|
| 268 |
#' @param subplot_heights a numeric vector, heights argument to \code{graphic::layout()},NULL by default which means
|
|
| 269 |
#' user will use the default setting |
|
| 270 |
#' @param suppress_plot_layout logical, suppress the layout setting in this function so that user can specify layout |
|
| 271 |
#' outside of the function, FALSE by default |
|
| 272 |
#' @param use_colors a character vector of length up to 4, colors to the KM curves, it will be passed to `col` of |
|
| 273 |
#' \code{lines()}
|
|
| 274 |
#' @param use_line_types a numeric vector of length up to 4, line type to the KM curves, it will be passed to `lty` of |
|
| 275 |
#' \code{lines()}
|
|
| 276 |
#' @param use_pch_cex a scalar between 0 and 1, point size to indicate censored individuals on the KM curves, it will be |
|
| 277 |
#' passed to `cex` of \code{points()}
|
|
| 278 |
#' @param use_pch_alpha a scalar between 0 and 255, degree of color transparency of points to indicate censored |
|
| 279 |
#' individuals on the KM curves, it will be passed to `cex` of \code{points()}
|
|
| 280 |
#' |
|
| 281 |
#' @example inst/examples/basic_kmplot_ex.R |
|
| 282 |
#' |
|
| 283 |
#' @return a KM plot with or without risk set table appended at the bottom, with up to 4 KM curves |
|
| 284 |
#' @export |
|
| 285 | ||
| 286 |
basic_kmplot <- function(kmdat, |
|
| 287 |
endpoint_name = "Time to Event Endpoint", |
|
| 288 |
time_scale = NULL, |
|
| 289 |
time_grid = NULL, |
|
| 290 |
show_risk_set = TRUE, |
|
| 291 |
main_title = "Kaplan-Meier Curves", |
|
| 292 |
subplot_heights = NULL, |
|
| 293 |
suppress_plot_layout = FALSE, |
|
| 294 |
use_colors = NULL, |
|
| 295 |
use_line_types = NULL, |
|
| 296 |
use_pch_cex = 0.65, |
|
| 297 |
use_pch_alpha = 100) {
|
|
| 298 | 24x |
original_par <- par("bty", "tcl", "mgp", "cex.lab", "cex.axis", "cex.main", "mar")
|
| 299 | 24x |
on.exit(par(original_par)) |
| 300 |
# precheck |
|
| 301 | 24x |
if (!length(subplot_heights) %in% c(0, (1 + show_risk_set))) {
|
| 302 | ! |
stop("length of subplot_heights should be ", (1 + show_risk_set))
|
| 303 |
} |
|
| 304 | 24x |
if (!is.factor(kmdat$treatment)) {
|
| 305 | ! |
stop("kmdat$treatment needs to be a factor, its levels will be used in legend and title, first level is comparator")
|
| 306 |
} |
|
| 307 | ! |
if (nlevels(kmdat$treatment) > 4) stop("kmdat$treatment cannot have more than 4 levels")
|
| 308 | ||
| 309 |
# set up x axis (time) |
|
| 310 | 24x |
if (is.null(time_grid)) {
|
| 311 | 4x |
max_t <- max(kmdat$time) |
| 312 | 4x |
t_range <- c(0, get_time_as(max_t, time_scale) * 1.07) |
| 313 | 4x |
time_grid <- pretty(t_range) |
| 314 |
} else {
|
|
| 315 | 20x |
t_range <- c(0, max(time_grid)) |
| 316 |
} |
|
| 317 | ||
| 318 | ||
| 319 | ||
| 320 |
# plat layout in par |
|
| 321 | 24x |
if (!suppress_plot_layout) {
|
| 322 | ! |
nr_subplot <- (1 + show_risk_set) |
| 323 | ! |
if (is.null(subplot_heights)) subplot_heights <- c(7, 0.7 + nlevels(kmdat$treatment) * 0.7, 0.8) |
| 324 | ! |
layout_mat <- matrix(1:nr_subplot, ncol = 1) |
| 325 | ! |
layout(layout_mat, heights = subplot_heights) |
| 326 |
} |
|
| 327 | ||
| 328 |
# plot cosmetic setup |
|
| 329 | 24x |
if (is.null(use_line_types)) {
|
| 330 | 8x |
use_lty <- c(1, 1, 2, 2) |
| 331 | 8x |
use_lwd <- c(1.5, 1.5, 1.2, 1.2) |
| 332 |
} else {
|
|
| 333 | 16x |
use_lty <- use_line_types |
| 334 | 16x |
use_lwd <- c(1.5, 1.5, 1.2, 1.2) |
| 335 |
} |
|
| 336 | ||
| 337 | 24x |
if (is.null(use_colors)) {
|
| 338 | 20x |
use_col <- c("#5450E4", "#00857C", "#6ECEB2", "#7B68EE")
|
| 339 |
} else {
|
|
| 340 | 4x |
use_col <- use_colors |
| 341 |
} |
|
| 342 | 24x |
use_col2 <- col2rgb(use_col) # preparing semi-transparent colors |
| 343 | 24x |
use_col2 <- rgb(use_col2[1, ], use_col2[2, ], use_col2[3, ], alpha = use_pch_alpha, maxColorValue = 255) |
| 344 | ||
| 345 |
## : first subplot: KM curve ------------------- |
|
| 346 |
# base plot |
|
| 347 | 24x |
par(bty = "n", tcl = -0.15, mgp = c(1.8, 0.4, 0), cex.lab = 0.85, cex.axis = 0.8, cex.main = 0.9, mar = c(3, 4, 5, 1)) |
| 348 | 24x |
plot(0, 0, |
| 349 | 24x |
type = "n", xlab = paste0("Time in ", time_scale), ylab = "Survival Probability",
|
| 350 | 24x |
ylim = c(0, 1), xlim = t_range, yaxt = "n", xaxt = "n", |
| 351 | 24x |
main = paste0(main_title, "\nEndpoint:", endpoint_name) |
| 352 |
) |
|
| 353 | 24x |
axis(2, las = 1) |
| 354 | 24x |
if (!is.null(time_grid)) {
|
| 355 | 24x |
axis(1, at = time_grid) |
| 356 |
} else {
|
|
| 357 | ! |
axis(1) |
| 358 |
} |
|
| 359 | ||
| 360 |
# add km line |
|
| 361 | 24x |
for (ii in 1:nlevels(kmdat$treatment)) {
|
| 362 | 72x |
tmpkmdat <- kmdat[as.numeric(kmdat$treatment) == ii, , drop = FALSE] |
| 363 | 72x |
lines( |
| 364 | 72x |
y = tmpkmdat$surv, |
| 365 | 72x |
x = get_time_as(tmpkmdat$time, time_scale), |
| 366 | 72x |
col = use_col[ii], |
| 367 | 72x |
lty = use_lty[ii], |
| 368 | 72x |
lwd = use_lwd[ii], |
| 369 | 72x |
type = "s" |
| 370 |
) |
|
| 371 | 72x |
tmpid <- (tmpkmdat$censor != 0) # cannot just ==1, anticipating weighted case |
| 372 | 72x |
points( |
| 373 | 72x |
y = tmpkmdat$surv[tmpid], |
| 374 | 72x |
x = get_time_as(tmpkmdat$time[tmpid], time_scale), |
| 375 | 72x |
col = use_col2[ii], |
| 376 | 72x |
pch = 3, |
| 377 | 72x |
cex = use_pch_cex |
| 378 |
) |
|
| 379 |
} |
|
| 380 | ||
| 381 |
## : second subplot: risk set table (if applicable) ------------------- |
|
| 382 | 24x |
if (show_risk_set) {
|
| 383 |
# add legend, with treatment index |
|
| 384 | 8x |
legend("topright",
|
| 385 | 8x |
bty = "n", |
| 386 | 8x |
cex = 0.8, |
| 387 | 8x |
lty = use_lty[1:nlevels(kmdat$treatment)], |
| 388 | 8x |
lwd = use_lwd[1:nlevels(kmdat$treatment)], |
| 389 | 8x |
col = use_col[1:nlevels(kmdat$treatment)], |
| 390 | 8x |
legend = paste0("(T", 1:nlevels(kmdat$treatment), ") ", levels(kmdat$treatment))
|
| 391 |
) |
|
| 392 | ||
| 393 |
# add risk set table |
|
| 394 | 8x |
par(bty = "n", tcl = -0.15, mgp = c(1.8, 0.4, 0), mar = c(1, 4, 0, 1)) |
| 395 | 8x |
plot(0, 0, |
| 396 | 8x |
type = "n", xlab = "", ylab = "", main = NULL, |
| 397 | 8x |
ylim = c(nlevels(kmdat$treatment) + 1.2, -0.5), |
| 398 | 8x |
xlim = t_range, |
| 399 | 8x |
yaxt = "n", xaxt = "n" |
| 400 |
) |
|
| 401 | 8x |
axis(2, |
| 402 | 8x |
at = 1:nlevels(kmdat$treatment), labels = paste0("T", 1:nlevels(kmdat$treatment)),
|
| 403 | 8x |
line = NA, lty = "blank", las = 1 |
| 404 |
) |
|
| 405 | ||
| 406 | 8x |
for (ii in 1:nlevels(kmdat$treatment)) {
|
| 407 | 24x |
tmpkmdat <- kmdat[as.numeric(kmdat$treatment) == ii, , drop = FALSE] |
| 408 | 24x |
tmptime <- get_time_as(tmpkmdat$time, time_scale) |
| 409 | 24x |
tmpnr <- sapply(time_grid, function(kk) {
|
| 410 | 216x |
tmpid <- which(tmptime > kk) |
| 411 | 216x |
if (length(tmpid) == 0) {
|
| 412 | 58x |
if (min(tmpkmdat$n.risk) == 0) {
|
| 413 | ! |
tmpid <- which.min(tmpkmdat$n.risk)[1] |
| 414 |
} else {
|
|
| 415 | 58x |
tmpid <- NULL |
| 416 |
} |
|
| 417 |
} |
|
| 418 | 216x |
tout <- ifelse(is.null(tmpid), "n/a", round(tmpkmdat$n.risk[tmpid], 1)) |
| 419 |
}) |
|
| 420 | 24x |
text(0, 0, labels = "Number at risk", pos = 4, cex = 0.8, offset = -0.8) |
| 421 | 24x |
text( |
| 422 | 24x |
y = rep(ii, length(time_grid)), |
| 423 | 24x |
x = time_grid, |
| 424 | 24x |
labels = tmpnr, |
| 425 | 24x |
col = use_col[ii], |
| 426 | 24x |
cex = 0.75 |
| 427 |
) |
|
| 428 | 24x |
text(0, nlevels(kmdat$treatment) + 1, |
| 429 | 24x |
pos = 4, cex = 0.7, offset = -0.8, col = "gray30", |
| 430 | 24x |
labels = "Note: Number at risk for adjusted/weighted treament arm is the sum of individual weight at risk." |
| 431 |
) |
|
| 432 |
} |
|
| 433 |
} else {
|
|
| 434 |
# add simple |
|
| 435 | 16x |
legend("topright",
|
| 436 | 16x |
bty = "n", |
| 437 | 16x |
cex = 0.8, |
| 438 | 16x |
lty = use_lty[1:nlevels(kmdat$treatment)], |
| 439 | 16x |
lwd = use_lwd[1:nlevels(kmdat$treatment)], |
| 440 | 16x |
col = use_col[1:nlevels(kmdat$treatment)], |
| 441 | 16x |
legend = levels(kmdat$treatment) |
| 442 |
) |
|
| 443 |
} |
|
| 444 | 24x |
invisible(NULL) |
| 445 |
} |
|
| 446 | ||
| 447 | ||
| 448 |
#' Diagnosis plot of proportional hazard assumption for anchored and unanchored |
|
| 449 |
#' |
|
| 450 |
#' @param weights_object an object returned by \code{estimate_weight}
|
|
| 451 |
#' @param tte_ipd a data frame of individual patient data (IPD) of internal trial, contain at least "USUBJID", "EVENT", |
|
| 452 |
#' "TIME" columns and a column indicating treatment assignment |
|
| 453 |
#' @param tte_pseudo_ipd a data frame of pseudo IPD by digitized KM curves of external trial (for time-to-event |
|
| 454 |
#' endpoint), contain at least "EVENT", "TIME" |
|
| 455 |
#' @param trt_ipd a string, name of the interested investigation arm in internal trial \code{tte_ipd} (real IPD)
|
|
| 456 |
#' @param trt_agd a string, name of the interested investigation arm in external trial |
|
| 457 |
#' \code{tte_pseudo_ipd} (pseudo IPD)
|
|
| 458 |
#' @param trt_common a string, name of the common comparator in internal and external trial, by default is NULL, |
|
| 459 |
#' indicating unanchored case |
|
| 460 |
#' @param trt_var_ipd a string, column name in \code{tte_ipd} that contains the treatment assignment
|
|
| 461 |
#' @param trt_var_agd a string, column name in \code{tte_pseudo_ipd} that contains the treatment assignment
|
|
| 462 |
#' @param endpoint_name a string, name of time to event endpoint, to be show in the last line of title |
|
| 463 |
#' @param time_scale a string, time unit of median survival time, taking a value of 'years', 'months', 'weeks' or 'days' |
|
| 464 |
#' @param zph_transform a string, pass to \code{survival::cox.zph}, default is "log"
|
|
| 465 |
#' @param zph_log_hazard a logical, if TRUE (default), y axis of the time dependent hazard function is log-hazard, |
|
| 466 |
#' otherwise, hazard. |
|
| 467 |
#' |
|
| 468 |
#' @return a 3 by 2 plot, include log-cumulative hazard plot, time dependent hazard function and unscaled Schoenfeld |
|
| 469 |
#' residual plot, before and after matching |
|
| 470 |
#' |
|
| 471 |
#' @example inst/examples/ph_diagplot_unanchored_ex.R |
|
| 472 |
#' @example inst/examples/ph_diagplot_anchored_ex.R |
|
| 473 |
#' @export |
|
| 474 |
ph_diagplot <- function(weights_object, |
|
| 475 |
tte_ipd, |
|
| 476 |
tte_pseudo_ipd, |
|
| 477 |
trt_ipd, |
|
| 478 |
trt_agd, |
|
| 479 |
trt_common = NULL, |
|
| 480 |
trt_var_ipd = "ARM", |
|
| 481 |
trt_var_agd = "ARM", |
|
| 482 |
endpoint_name = "Time to Event Endpoint", |
|
| 483 |
time_scale, |
|
| 484 |
zph_transform = "log", |
|
| 485 |
zph_log_hazard = TRUE) {
|
|
| 486 | 4x |
names(tte_ipd) <- toupper(names(tte_ipd)) |
| 487 | 4x |
names(tte_pseudo_ipd) <- toupper(names(tte_pseudo_ipd)) |
| 488 | 4x |
trt_var_ipd <- toupper(trt_var_ipd) |
| 489 | 4x |
trt_var_agd <- toupper(trt_var_agd) |
| 490 | ||
| 491 |
# pre check |
|
| 492 | 4x |
if (!"maicplus_estimate_weights" %in% class(weights_object)) {
|
| 493 | ! |
stop("weights_object should be an object returned by estimate_weights")
|
| 494 |
} |
|
| 495 | 4x |
if (!all(c("USUBJID", "TIME", "EVENT", trt_var_ipd) %in% names(tte_ipd))) {
|
| 496 | ! |
stop("tte_ipd needs to include at least USUBJID, TIME, EVENT, ", trt_var_ipd)
|
| 497 |
} |
|
| 498 | 4x |
if (!all(c("TIME", "EVENT", trt_var_agd) %in% names(tte_pseudo_ipd))) {
|
| 499 | ! |
stop("tte_ipd needs to include at least TIME, EVENT, ", trt_var_agd)
|
| 500 |
} |
|
| 501 | ||
| 502 |
# preparing analysis data |
|
| 503 | 4x |
is_anchored <- ifelse(is.null(trt_common), FALSE, TRUE) |
| 504 | 4x |
tte_ipd <- tte_ipd[tte_ipd[[trt_var_ipd]] %in% c(trt_ipd, trt_common), , drop = TRUE] |
| 505 | 4x |
tte_pseudo_ipd <- tte_pseudo_ipd[tte_pseudo_ipd[[trt_var_agd]] %in% c(trt_agd, trt_common), , drop = TRUE] |
| 506 | 4x |
tte_ipd$weights <- weights_object$data$weights[match(weights_object$data$USUBJID, tte_ipd$USUBJID)] |
| 507 | 4x |
tte_pseudo_ipd$weights <- 1 |
| 508 | 4x |
tte_ipd$TIME2 <- get_time_as(tte_ipd$TIME, as = time_scale) # for cox.zph |
| 509 | 4x |
tte_pseudo_ipd$TIME2 <- get_time_as(tte_pseudo_ipd$TIME, as = time_scale) # for cox.zph |
| 510 | 4x |
if (!"USUBJID" %in% names(tte_pseudo_ipd)) tte_pseudo_ipd$USUBJID <- paste0("ID", seq_len(nrow(tte_pseudo_ipd)))
|
| 511 | ! |
if (trt_var_ipd != "ARM") tte_ipd$ARM <- tte_ipd[[trt_var_ipd]] |
| 512 | ! |
if (trt_var_agd != "ARM") tte_pseudo_ipd$ARM <- tte_pseudo_ipd[[trt_var_agd]] |
| 513 | ||
| 514 |
# prepare plot data |
|
| 515 | 4x |
retain_cols <- c("USUBJID", "TIME", "TIME2", "EVENT", "ARM", "weights")
|
| 516 | 4x |
if (!is_anchored) {
|
| 517 |
# unanchored case |
|
| 518 | 1x |
tte_dat <- rbind( |
| 519 | 1x |
tte_ipd[, retain_cols, drop = FALSE], |
| 520 | 1x |
tte_pseudo_ipd[, retain_cols, drop = FALSE] |
| 521 |
) |
|
| 522 |
} else {
|
|
| 523 | 3x |
tte_dat <- tte_ipd[, retain_cols, drop = FALSE] |
| 524 |
} |
|
| 525 | 4x |
kmobj <- survival::survfit(Surv(TIME, EVENT) ~ ARM, tte_dat, conf.type = "log-log") |
| 526 | 4x |
kmobj_adj <- survival::survfit(Surv(TIME, EVENT) ~ ARM, tte_dat, conf.type = "log-log", weights = weights) |
| 527 | 4x |
coxobj <- survival::coxph(Surv(TIME, EVENT) ~ ARM, data = tte_dat) |
| 528 | 4x |
coxobj2 <- survival::coxph(Surv(TIME2, EVENT) ~ ARM, data = tte_dat) |
| 529 | 4x |
zphobj <- survival::cox.zph(coxobj2, transform = zph_transform, global = FALSE) |
| 530 | 4x |
coxobj_adj <- survival::coxph(Surv(TIME, EVENT) ~ ARM, data = tte_dat, weights = weights) |
| 531 | 4x |
coxobj_adj2 <- survival::coxph(Surv(TIME2, EVENT) ~ ARM, data = tte_dat, weights = weights) |
| 532 | 4x |
zphobj_adj <- survival::cox.zph(coxobj_adj2, transform = zph_transform, global = FALSE) |
| 533 | ||
| 534 |
# making the plot |
|
| 535 | 4x |
original_par <- par(mfrow = c(3, 2), cex.lab = 0.85, cex.axis = 0.8, cex.main = 0.9) |
| 536 | 4x |
on.exit(par(original_par)) |
| 537 |
# log-cum-hazard plot |
|
| 538 | 4x |
ph_diagplot_lch(kmobj, |
| 539 | 4x |
time_scale = time_scale, |
| 540 | 4x |
log_time = TRUE, |
| 541 | 4x |
endpoint_name = endpoint_name, |
| 542 | 4x |
subtitle = "(Before Matching)" |
| 543 |
) |
|
| 544 | ||
| 545 | 4x |
ph_diagplot_lch(kmobj_adj, |
| 546 | 4x |
time_scale = time_scale, |
| 547 | 4x |
log_time = TRUE, |
| 548 | 4x |
endpoint_name = endpoint_name, |
| 549 | 4x |
subtitle = "(After Matching)" |
| 550 |
) |
|
| 551 |
# time dependent hazard plot |
|
| 552 | 4x |
plot(zphobj, |
| 553 | 4x |
main = paste0( |
| 554 | 4x |
"Time-dependent Hazard function (scaled Schoenfeld residual)\n", |
| 555 | 4x |
"Endpoint:", endpoint_name, "\n(Before Matching)" |
| 556 |
), |
|
| 557 | 4x |
resid = FALSE, se = TRUE, df = 4, nsmo = 40, |
| 558 |
# xlim = range(0,zphobj$time), |
|
| 559 | 4x |
ylab = ifelse(zph_log_hazard, "Log Hazard", "Hazard"), |
| 560 | 4x |
xlab = paste("Time in", time_scale),
|
| 561 | 4x |
lty = 1:2, lwd = 2, pch = 16, cex = 0.8, |
| 562 | 4x |
col = rgb(0, 0, 128, alpha = 120, maxColorValue = 255), |
| 563 | 4x |
hr = (!zph_log_hazard), yaxt = "n" |
| 564 |
) |
|
| 565 | 4x |
axis(2, las = 1) |
| 566 | 4x |
pv <- as.data.frame(zphobj$table)$p |
| 567 | 4x |
pv <- ifelse(round(pv, 4) < 0.0001, "<0.0001", format(round(pv, 4), nsmall = 4)) |
| 568 | 4x |
legend("bottomright",
|
| 569 | 4x |
cex = 0.75, bty = "n", text.col = "dodgerblue3", |
| 570 | 4x |
legend = c(paste0("p-value: ", pv), paste0("time-transform: ", zph_transform)),
|
| 571 | 4x |
title = "PH test (survival::cox.zph)" |
| 572 |
) |
|
| 573 | ||
| 574 | 4x |
plot(zphobj_adj, |
| 575 | 4x |
main = paste0( |
| 576 | 4x |
"Time-dependent Hazard function (scaled Schoenfeld residual)\n", |
| 577 | 4x |
"Endpoint:", endpoint_name, "\n(After Matching)" |
| 578 |
), |
|
| 579 | 4x |
resid = FALSE, se = TRUE, df = 4, nsmo = 40, |
| 580 |
# xlim = range(0,zphobj$time), |
|
| 581 | 4x |
ylab = ifelse(zph_log_hazard, "Log Hazard", "Hazard"), |
| 582 | 4x |
xlab = paste("Time in", time_scale),
|
| 583 | 4x |
lty = 1:2, lwd = 2, pch = 16, cex = 0.8, |
| 584 | 4x |
col = rgb(0, 0, 128, alpha = 120, maxColorValue = 255), |
| 585 | 4x |
hr = (!zph_log_hazard), yaxt = "n" |
| 586 |
) |
|
| 587 | 4x |
axis(2, las = 1) |
| 588 | 4x |
pv <- as.data.frame(zphobj_adj$table)$p |
| 589 | 4x |
pv <- ifelse(round(pv, 4) < 0.0001, "<0.0001", format(round(pv, 4), nsmall = 4)) |
| 590 | 4x |
legend("bottomright",
|
| 591 | 4x |
cex = 0.75, bty = "n", text.col = "dodgerblue3", |
| 592 | 4x |
legend = c(paste0("p-value: ", pv), paste0("time-transform: ", zph_transform)),
|
| 593 | 4x |
title = "PH test (survival::cox.zph)" |
| 594 |
) |
|
| 595 | ||
| 596 |
# unscaled schoenfeld residual |
|
| 597 | 4x |
ph_diagplot_schoenfeld(coxobj, |
| 598 | 4x |
time_scale = time_scale, |
| 599 | 4x |
log_time = FALSE, |
| 600 | 4x |
endpoint_name = endpoint_name, |
| 601 | 4x |
subtitle = "(Before Matching)" |
| 602 |
) |
|
| 603 | ||
| 604 | 4x |
ph_diagplot_schoenfeld(coxobj_adj, |
| 605 | 4x |
time_scale = time_scale, |
| 606 | 4x |
log_time = FALSE, |
| 607 | 4x |
endpoint_name = endpoint_name, |
| 608 | 4x |
subtitle = "(After Matching)" |
| 609 |
) |
|
| 610 |
} |
|
| 611 | ||
| 612 |
#' PH Diagnosis Plot of Log Cumulative Hazard Rate versus time or log-time |
|
| 613 |
#' |
|
| 614 |
#' This plot is also known as log negative log survival rate. |
|
| 615 |
#' |
|
| 616 |
#' a diagnosis plot for proportional hazard assumption, versus log-time (default) or time |
|
| 617 |
#' |
|
| 618 |
#' @param km_fit returned object from \code{survival::survfit}
|
|
| 619 |
#' @param time_scale a character string, 'years', 'months', 'weeks' or 'days', time unit of median survival time |
|
| 620 |
#' @param log_time logical, TRUE (default) or FALSE |
|
| 621 |
#' @param endpoint_name a character string, name of the endpoint |
|
| 622 |
#' @param subtitle a character string, subtitle of the plot |
|
| 623 |
#' @param exclude_censor logical, should censored data point be plotted |
|
| 624 |
#' @examples |
|
| 625 |
#' library(survival) |
|
| 626 |
#' data(adtte_sat) |
|
| 627 |
#' data(pseudo_ipd_sat) |
|
| 628 |
#' combined_data <- rbind(adtte_sat[, c("TIME", "EVENT", "ARM")], pseudo_ipd_sat)
|
|
| 629 |
#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data, conf.type = "log-log") |
|
| 630 |
#' ph_diagplot_lch(kmobj, |
|
| 631 |
#' time_scale = "month", log_time = TRUE, |
|
| 632 |
#' endpoint_name = "OS", subtitle = "(Before Matching)" |
|
| 633 |
#' ) |
|
| 634 |
#' @return a plot of log cumulative hazard rate |
|
| 635 |
#' @export |
|
| 636 | ||
| 637 |
ph_diagplot_lch <- function(km_fit, |
|
| 638 |
time_scale, |
|
| 639 |
log_time = TRUE, |
|
| 640 |
endpoint_name = "", |
|
| 641 |
subtitle = "", |
|
| 642 |
exclude_censor = TRUE) {
|
|
| 643 | 10x |
time_scale <- match.arg(arg = time_scale, choices = c("days", "weeks", "months", "years"))
|
| 644 | ||
| 645 | 10x |
clldat <- survfit_makeup(km_fit) |
| 646 | ||
| 647 | 10x |
if (exclude_censor) {
|
| 648 | 10x |
clldat <- lapply(clldat, function(xxt) xxt[xxt$censor == 0, , drop = FALSE]) |
| 649 |
} |
|
| 650 | ||
| 651 | 10x |
all.times <- get_time_as(do.call(rbind, clldat)$time, time_scale) |
| 652 | 10x |
if (log_time) all.times <- log(all.times) |
| 653 | 10x |
t_range <- range(all.times) |
| 654 | 10x |
y_range <- range(log(do.call(rbind, clldat)$cumhaz)) |
| 655 | ||
| 656 | 10x |
original_par <- par("mar", "bty", "tcl", "mgp")
|
| 657 | 10x |
par(mar = c(4, 4, 4, 1), bty = "n", tcl = -0.15, mgp = c(1.5, 0.3, 0)) |
| 658 | 10x |
on.exit(par(original_par)) |
| 659 | 10x |
plot(0, 0, |
| 660 | 10x |
type = "n", xlab = paste0(ifelse(log_time, "Log-", ""), "Time in ", time_scale), |
| 661 | 10x |
ylab = "Log-Cumulative Hazard Rate", |
| 662 | 10x |
ylim = y_range, xlim = t_range, yaxt = "n", |
| 663 | 10x |
main = paste0( |
| 664 | 10x |
"Log Cumulative Hazard versus Log Time\nEndpoint: ", endpoint_name, |
| 665 | 10x |
ifelse(subtitle == "", "", "\n"), subtitle |
| 666 |
) |
|
| 667 |
) |
|
| 668 | 10x |
axis(2, las = 1) |
| 669 | ||
| 670 | 10x |
trts <- names(clldat) |
| 671 | 10x |
cols <- c("dodgerblue3", "firebrick3")
|
| 672 | 10x |
pchs <- c(1, 4) |
| 673 | 10x |
for (i in seq_along(clldat)) {
|
| 674 | 20x |
use_x <- get_time_as(clldat[[i]]$time, time_scale) |
| 675 | 20x |
if (log_time) use_x <- log(use_x) |
| 676 | ||
| 677 | 20x |
lines( |
| 678 | 20x |
y = log(clldat[[i]]$cumhaz), |
| 679 | 20x |
x = use_x, col = cols[i], |
| 680 | 20x |
type = "s", |
| 681 |
) |
|
| 682 | 20x |
points( |
| 683 | 20x |
y = log(clldat[[i]]$cumhaz), |
| 684 | 20x |
x = use_x, |
| 685 | 20x |
col = cols[i], pch = pchs[i], cex = 0.7 |
| 686 |
) |
|
| 687 |
} |
|
| 688 | 10x |
legend("bottomright",
|
| 689 | 10x |
bty = "n", lty = c(1, 1, 2), cex = 0.8, |
| 690 | 10x |
col = cols, pch = pchs, legend = paste0("Treatment: ", trts)
|
| 691 |
) |
|
| 692 |
} |
|
| 693 | ||
| 694 | ||
| 695 |
#' PH Diagnosis Plot of Schoenfeld residuals for a Cox model fit |
|
| 696 |
#' |
|
| 697 |
#' @param coxobj object returned from \code{\link[survival]{coxph}}
|
|
| 698 |
#' @param time_scale a character string, 'years', 'months', 'weeks' or 'days', time unit of median survival time |
|
| 699 |
#' @param log_time logical, TRUE (default) or FALSE |
|
| 700 |
#' @param endpoint_name a character string, name of the endpoint |
|
| 701 |
#' @param subtitle a character string, subtitle of the plot |
|
| 702 |
#' @examples |
|
| 703 |
#' library(survival) |
|
| 704 |
#' data(adtte_sat) |
|
| 705 |
#' data(pseudo_ipd_sat) |
|
| 706 |
#' combined_data <- rbind(adtte_sat[, c("TIME", "EVENT", "ARM")], pseudo_ipd_sat)
|
|
| 707 |
#' unweighted_cox <- coxph(Surv(TIME, EVENT == 1) ~ ARM, data = combined_data) |
|
| 708 |
#' ph_diagplot_schoenfeld(unweighted_cox, |
|
| 709 |
#' time_scale = "month", log_time = TRUE, |
|
| 710 |
#' endpoint_name = "OS", subtitle = "(Before Matching)" |
|
| 711 |
#' ) |
|
| 712 |
#' @return a plot of Schoenfeld residuals |
|
| 713 |
#' @export |
|
| 714 | ||
| 715 |
ph_diagplot_schoenfeld <- function(coxobj, |
|
| 716 |
time_scale = "months", |
|
| 717 |
log_time = TRUE, |
|
| 718 |
endpoint_name = "", |
|
| 719 |
subtitle = "") {
|
|
| 720 |
# pre-check |
|
| 721 | 10x |
time_scale <- match.arg(arg = time_scale, choices = c("days", "weeks", "months", "years"))
|
| 722 | ||
| 723 |
# prepare data |
|
| 724 | 10x |
schresid <- residuals(coxobj, type = "schoenfeld") |
| 725 | 10x |
plot_x <- get_time_as(as.numeric(names(schresid)), time_scale) |
| 726 | 2x |
if (log_time) plot_x <- log(plot_x) |
| 727 | ||
| 728 |
# loewss fit |
|
| 729 | 10x |
fit0 <- predict(loess(schresid ~ plot_x), se = TRUE) |
| 730 | 10x |
uppband <- fit0$fit + qt(0.975, fit0$df) * fit0$se |
| 731 | 10x |
lowband <- fit0$fit - qt(0.975, fit0$df) * fit0$se |
| 732 | 10x |
use_yrange <- range(schresid, uppband, lowband) |
| 733 | ||
| 734 |
# making the plot |
|
| 735 | 10x |
original_par <- par(bty = "n", mar = c(4, 4, 4, 1), tcl = -0.15, mgp = c(1.5, 0.3, 0)) |
| 736 | 10x |
on.exit(par(original_par)) |
| 737 | 10x |
plot(schresid ~ plot_x, |
| 738 | 10x |
type = "n", |
| 739 | 10x |
yaxt = "n", ylim = use_yrange, |
| 740 | 10x |
ylab = "Unscaled Schoenfeld Residual", |
| 741 | 10x |
xlab = paste0(ifelse(log_time, "Log-", ""), "Time in ", time_scale), |
| 742 | 10x |
main = paste0( |
| 743 | 10x |
"Unscaled Schoenfeld Residual\nEndpoint: ", endpoint_name, |
| 744 | 10x |
ifelse(subtitle == "", "", "\n"), subtitle |
| 745 |
) |
|
| 746 |
) |
|
| 747 | 10x |
axis(2, las = 1) |
| 748 | 10x |
lines(fit0$fit ~ plot_x, lty = 2, lwd = 1, col = rgb(0, 0, 128, 150, maxColorValue = 255)) |
| 749 |
# lines(uppband ~ plot_x, lty =2, lwd=1, col = rgb(0,0,128,120,maxColorValue = 255)) |
|
| 750 |
# lines(lowband ~ plot_x, lty =2, lwd=1, col = rgb(0,0,128,120,maxColorValue = 255)) |
|
| 751 | 10x |
polygon( |
| 752 | 10x |
x = c(plot_x, rev(plot_x)), |
| 753 | 10x |
y = c(uppband, rev(lowband)), |
| 754 | 10x |
col = rgb(0, 0, 128, 60, maxColorValue = 255), |
| 755 | 10x |
border = NA |
| 756 |
) |
|
| 757 | 10x |
abline(h = 0, lty = 1, lwd = 1, col = "deeppink") |
| 758 | 10x |
points(schresid ~ plot_x, |
| 759 | 10x |
pch = 16, |
| 760 | 10x |
cex = 0.85, |
| 761 | 10x |
col = rgb(169, 169, 169, 120, maxColorValue = 255) |
| 762 |
) |
|
| 763 |
} |
| 1 |
# Functions for matching step: estimation of individual weights |
|
| 2 | ||
| 3 |
# functions to be exported --------------------------------------- |
|
| 4 | ||
| 5 |
#' Derive individual weights in the matching step of MAIC |
|
| 6 |
#' |
|
| 7 |
#' Assuming data is properly processed, this function takes individual patient data (IPD) with centered covariates |
|
| 8 |
#' (effect modifiers and/or prognostic variables) as input, and generates weights for each individual in IPD trial to |
|
| 9 |
#' match the covariates in aggregate data. |
|
| 10 |
#' |
|
| 11 |
#' @param data a numeric matrix, centered covariates of IPD, no missing value in any cell is allowed |
|
| 12 |
#' @param centered_colnames a character or numeric vector (column indicators) of centered covariates |
|
| 13 |
#' @param start_val a scalar, the starting value for all coefficients of the propensity score regression |
|
| 14 |
#' @param method a string, name of the optimization algorithm (see 'method' argument of \code{base::optim()}) The
|
|
| 15 |
#' default is `"BFGS"`, other options are `"Nelder-Mead"`, `"CG"`, `"L-BFGS-B"`, `"SANN"`, and `"Brent"` |
|
| 16 |
#' @param n_boot_iteration an integer, number of bootstrap iterations. By default is NULL which means bootstrapping |
|
| 17 |
#' procedure will not be triggered, and hence the element `"boot"` of output list object will be NULL. |
|
| 18 |
#' @param set_seed_boot a scalar, the random seed for conducting the bootstrapping, only relevant if |
|
| 19 |
#' \code{n_boot_iteration} is not NULL. By default, use seed 1234
|
|
| 20 |
#' @param boot_strata a character vector of column names in \code{data} that defines the strata for bootstrapping.
|
|
| 21 |
#' This ensures that samples are drawn proportionally from each defined stratum. If \code{NULL},
|
|
| 22 |
#' no stratification during bootstrapping process. By default, it is "ARM" |
|
| 23 |
#' @param ... Additional `control` parameters passed to [stats::optim]. |
|
| 24 |
#' |
|
| 25 |
#' @return a list with the following 4 elements, |
|
| 26 |
#' \describe{
|
|
| 27 |
#' \item{data}{a data.frame, includes the input \code{data} with appended column 'weights' and 'scaled_weights'.
|
|
| 28 |
#' Scaled weights has a summation to be the number of rows in \code{data} that has no missing value in any of the
|
|
| 29 |
#' effect modifiers} |
|
| 30 |
#' \item{centered_colnames}{column names of centered effect modifiers in \code{data}}
|
|
| 31 |
#' \item{nr_missing}{number of rows in \code{data} that has at least 1 missing value in specified centered effect
|
|
| 32 |
#' modifiers} |
|
| 33 |
#' \item{ess}{effective sample size, square of sum divided by sum of squares}
|
|
| 34 |
#' \item{opt}{R object returned by \code{base::optim()}, for assess convergence and other details}
|
|
| 35 |
#' \item{boot_strata}{'strata' from a boot::boot object}
|
|
| 36 |
#' \item{boot_seed}{column names in \code{data} of the stratification factors}
|
|
| 37 |
#' \item{boot}{a n by 2 by k array or NA, where n equals to number of rows in \code{data}, and k equals
|
|
| 38 |
#' \code{n_boot_iteration}. The 2 columns in the second dimension include a column of numeric indexes of the rows
|
|
| 39 |
#' in \code{data} that are selected at a bootstrapping iteration and a column of weights. \code{boot} is NA when
|
|
| 40 |
#' argument \code{n_boot_iteration} is set as NULL
|
|
| 41 |
#' } |
|
| 42 |
#' } |
|
| 43 |
#' @importFrom boot boot |
|
| 44 |
#' @examples |
|
| 45 |
#' data(centered_ipd_sat) |
|
| 46 |
#' centered_colnames <- grep("_CENTERED", colnames(centered_ipd_sat), value = TRUE)
|
|
| 47 |
#' weighted_data <- estimate_weights(data = centered_ipd_sat, centered_colnames = centered_colnames) |
|
| 48 |
#' \donttest{
|
|
| 49 |
#' # To later estimate bootstrap confidence intervals, we calculate the weights |
|
| 50 |
#' # for the bootstrap samples: |
|
| 51 |
#' weighted_data_boot <- estimate_weights( |
|
| 52 |
#' data = centered_ipd_sat, centered_colnames = centered_colnames, n_boot_iteration = 100 |
|
| 53 |
#' ) |
|
| 54 |
#' } |
|
| 55 |
#' @export |
|
| 56 | ||
| 57 |
estimate_weights <- function(data, |
|
| 58 |
centered_colnames = NULL, |
|
| 59 |
start_val = 0, |
|
| 60 |
method = "BFGS", |
|
| 61 |
n_boot_iteration = NULL, |
|
| 62 |
set_seed_boot = 1234, |
|
| 63 |
boot_strata = "ARM", |
|
| 64 |
...) {
|
|
| 65 |
# pre check |
|
| 66 | 21x |
ch1 <- is.data.frame(data) |
| 67 | 21x |
if (!ch1) {
|
| 68 | 1x |
stop("'data' is not a data.frame")
|
| 69 |
} |
|
| 70 | ||
| 71 | 20x |
ch2 <- (!is.null(centered_colnames)) |
| 72 | 20x |
if (ch2 && is.numeric(centered_colnames)) {
|
| 73 | ! |
ch2b <- any(centered_colnames < 1 | centered_colnames > ncol(data)) |
| 74 | ! |
if (ch2b) {
|
| 75 | ! |
stop("specified centered_colnames are out of bound")
|
| 76 |
} |
|
| 77 | 20x |
} else if (ch2 && is.character(centered_colnames)) {
|
| 78 | 20x |
ch2b <- !all(centered_colnames %in% names(data)) |
| 79 | 20x |
if (ch2b) {
|
| 80 | 1x |
stop("1 or more specified centered_colnames are not found in 'data'")
|
| 81 |
} |
|
| 82 |
} else {
|
|
| 83 | ! |
stop("'centered_colnames' should be either a numeric or character vector")
|
| 84 |
} |
|
| 85 | ||
| 86 | 19x |
ch3 <- sapply(centered_colnames, function(ii) {
|
| 87 | 115x |
!is.numeric(data[[ii]]) |
| 88 |
}) |
|
| 89 | 19x |
if (any(ch3)) {
|
| 90 | 1x |
stop(paste0( |
| 91 | 1x |
"following columns of 'data' are not numeric for the calculation:", |
| 92 | 1x |
paste(which(ch3), collapse = ",") |
| 93 |
)) |
|
| 94 |
} |
|
| 95 | ||
| 96 | 18x |
if (!is.null(boot_strata)) {
|
| 97 | 18x |
ch4 <- boot_strata %in% names(data) |
| 98 | 18x |
if (!all(ch4)) {
|
| 99 | 1x |
stop("Some variables in boot_strata are not in data: ", toString(boot_strata[!ch4]))
|
| 100 |
} |
|
| 101 |
} |
|
| 102 | ||
| 103 |
# prepare data for optimization |
|
| 104 | ! |
if (is.null(centered_colnames)) centered_colnames <- seq_len(ncol(data)) |
| 105 | 17x |
EM <- data[, centered_colnames, drop = FALSE] |
| 106 | 17x |
ind <- apply(EM, 1, function(xx) any(is.na(xx))) |
| 107 | 17x |
nr_missing <- sum(ind) |
| 108 | 17x |
rows_with_missing <- which(ind) |
| 109 | 17x |
EM <- as.matrix(EM[!ind, , drop = FALSE]) |
| 110 | ||
| 111 |
# estimate weights |
|
| 112 | 17x |
opt1 <- optimise_weights(matrix = EM, par = rep(start_val, ncol(EM)), method = method, ...) |
| 113 | 17x |
alpha <- opt1$alpha |
| 114 | 17x |
wt <- opt1$wt |
| 115 | 17x |
wt_rs <- (wt / sum(wt)) * nrow(EM) |
| 116 | ||
| 117 |
# bootstrapping |
|
| 118 | 17x |
outboot <- if (is.null(n_boot_iteration)) {
|
| 119 | 11x |
boot_seed <- NULL |
| 120 | 11x |
boot_strata_out <- NULL |
| 121 | 11x |
NULL |
| 122 |
} else {
|
|
| 123 |
# Make sure to leave '.Random.seed' as-is on exit |
|
| 124 | 6x |
old_seed <- globalenv()$.Random.seed |
| 125 | 6x |
on.exit(suspendInterrupts(set_random_seed(old_seed))) |
| 126 | 6x |
set.seed(set_seed_boot) |
| 127 | ||
| 128 | 6x |
if (!is.null(boot_strata)) {
|
| 129 | 6x |
use_strata <- interaction(data[!ind, boot_strata]) |
| 130 |
} else {
|
|
| 131 | ! |
use_strata <- rep(1, nrow(EM)) |
| 132 |
} |
|
| 133 | 6x |
boot_statistic <- function(d, w) optimise_weights(d[w, ], par = alpha, method = method, ...)$wt[, 1] |
| 134 | 6x |
boot_out <- boot::boot(EM, statistic = boot_statistic, R = n_boot_iteration, strata = use_strata) |
| 135 | ||
| 136 | 6x |
boot_array <- array(dim = list(nrow(EM), 2, n_boot_iteration)) |
| 137 | 6x |
dimnames(boot_array) <- list(sampled_patient = NULL, c("rowid", "weight"), bootstrap_iteration = NULL)
|
| 138 | 6x |
boot_array[, 1, ] <- t(boot.array(boot_out, TRUE)) |
| 139 | 6x |
boot_array[, 2, ] <- t(boot_out$t) |
| 140 | 6x |
boot_seed <- boot_out$seed |
| 141 | 6x |
boot_strata_out <- boot_out$strata |
| 142 | 6x |
boot_array |
| 143 |
} |
|
| 144 | ||
| 145 |
# append weights to data |
|
| 146 | 17x |
data$weights <- NA |
| 147 | 17x |
data$weights[!ind] <- wt |
| 148 | ||
| 149 | 17x |
data$scaled_weights <- NA |
| 150 | 17x |
data$scaled_weights[!ind] <- wt_rs |
| 151 | ||
| 152 | ! |
if (is.numeric(centered_colnames)) centered_colnames <- names(data)[centered_colnames] |
| 153 | ||
| 154 |
# Output |
|
| 155 | 17x |
outdata <- list( |
| 156 | 17x |
data = data, |
| 157 | 17x |
centered_colnames = centered_colnames, |
| 158 | 17x |
nr_missing = nr_missing, |
| 159 | 17x |
ess = sum(wt)^2 / sum(wt^2), |
| 160 | 17x |
opt = opt1$opt, |
| 161 | 17x |
boot = outboot, |
| 162 | 17x |
boot_seed = boot_seed, |
| 163 | 17x |
boot_strata = boot_strata_out, |
| 164 | 17x |
rows_with_missing = rows_with_missing |
| 165 |
) |
|
| 166 | ||
| 167 | 17x |
class(outdata) <- c("maicplus_estimate_weights", "list")
|
| 168 | 17x |
outdata |
| 169 |
} |
|
| 170 | ||
| 171 | ||
| 172 |
#' Estimate weights using `optim` |
|
| 173 |
#' |
|
| 174 |
#' @param matrix Matrix of data to be used for estimating weights |
|
| 175 |
#' @param par Vector of starting values for the parameters with length equal to the number of columns in `matrix` |
|
| 176 |
#' @param method Method parameter passed to [stats::optim] |
|
| 177 |
#' @param ... Additional `control` parameters passed to [stats::optim] |
|
| 178 |
#' |
|
| 179 |
#' @return List containing estimated `alpha` values and `wt` weights for all rows of matrix |
|
| 180 |
#' @noRd |
|
| 181 |
optimise_weights <- function(matrix, |
|
| 182 |
par = rep(0, ncol(matrix)), |
|
| 183 |
method = "BFGS", |
|
| 184 |
maxit = 300, |
|
| 185 |
trace = 0, |
|
| 186 |
...) {
|
|
| 187 | 575x |
if (!all(is.numeric(par) || is.finite(par), length(par) == ncol(matrix))) {
|
| 188 | ! |
stop("par must be a numeric vector with finite values of length equal to the number of columns in 'matrix'")
|
| 189 |
} |
|
| 190 | 575x |
opt1 <- optim( |
| 191 | 575x |
par = par, |
| 192 | 575x |
fn = function(alpha, X) sum(exp(X %*% alpha)), |
| 193 | 575x |
gr = function(alpha, X) colSums(sweep(X, 1, exp(X %*% alpha), "*")), |
| 194 | 575x |
X = matrix, |
| 195 | 575x |
method = method, |
| 196 | 575x |
control = list(maxit = maxit, trace = trace, ...) |
| 197 |
) |
|
| 198 | 575x |
if (opt1$convergence != 0) {
|
| 199 | 1x |
warning( |
| 200 | 1x |
"optim() did not converge. ", |
| 201 | 1x |
opt1$message, |
| 202 | 1x |
"\nSee ?optim for more information on convergence code: ", opt1$convergence |
| 203 |
) |
|
| 204 |
} |
|
| 205 | 575x |
list( |
| 206 | 575x |
opt = opt1, |
| 207 | 575x |
alpha = opt1$par, |
| 208 | 575x |
wt = exp(matrix %*% opt1$par) |
| 209 |
) |
|
| 210 |
} |
|
| 211 | ||
| 212 |
#' Calculate Statistics for Weight Plot Legend |
|
| 213 |
#' |
|
| 214 |
#' Calculates ESS reduction and median weights which is used to create legend for weights plot |
|
| 215 |
#' |
|
| 216 |
#' @param weighted_data object returned after calculating weights using [estimate_weights] |
|
| 217 |
#' |
|
| 218 |
#' @return list of ESS, ESS reduction, median value of scaled and unscaled weights, and missing count |
|
| 219 |
#' @examples |
|
| 220 |
#' data("weighted_sat")
|
|
| 221 |
#' calculate_weights_legend(weighted_sat) |
|
| 222 |
#' @export |
|
| 223 |
#' @keywords internal |
|
| 224 | ||
| 225 |
calculate_weights_legend <- function(weighted_data) {
|
|
| 226 | 4x |
if (!inherits(weighted_data, "maicplus_estimate_weights")) {
|
| 227 | ! |
stop("weighted_data must be class `maicplus_estimate_weights` generated by estimate_weights()")
|
| 228 |
} |
|
| 229 | 4x |
ess <- weighted_data$ess |
| 230 | 4x |
wt <- weighted_data$data$weights |
| 231 | 4x |
wt_scaled <- weighted_data$data$scaled_weights |
| 232 | ||
| 233 |
# calculate sample size and exclude NA from wt |
|
| 234 | 4x |
nr_na <- sum(is.na(wt)) |
| 235 | 4x |
n <- length(wt) - nr_na |
| 236 | 4x |
wt <- na.omit(wt) |
| 237 | 4x |
wt_scaled <- na.omit(wt_scaled) |
| 238 | ||
| 239 |
# calculate ess reduction and median weights |
|
| 240 | 4x |
ess_reduction <- (1 - (ess / n)) * 100 |
| 241 | 4x |
wt_median <- median(wt) |
| 242 | 4x |
wt_scaled_median <- median(wt_scaled) |
| 243 | ||
| 244 | 4x |
list( |
| 245 | 4x |
ess = round(ess, 2), |
| 246 | 4x |
ess_reduction = round(ess_reduction, 2), |
| 247 | 4x |
wt_median = round(wt_median, 4), |
| 248 | 4x |
wt_scaled_median = round(wt_scaled_median, 4), |
| 249 | 4x |
nr_na = nr_na |
| 250 |
) |
|
| 251 |
} |
|
| 252 | ||
| 253 |
#' Plot MAIC weights in a histogram with key statistics in legend |
|
| 254 |
#' |
|
| 255 |
#' Generates a base R histogram of weights. Default is to plot either unscaled or scaled weights and not both. |
|
| 256 |
#' |
|
| 257 |
#' @param weighted_data object returned after calculating weights using [estimate_weights] |
|
| 258 |
#' @param bin_col a string, color for the bins of histogram |
|
| 259 |
#' @param vline_col a string, color for the vertical line in the histogram |
|
| 260 |
#' @param main_title title of the plot |
|
| 261 |
#' @param scaled_weights an indicator for using scaled weights instead of regular weights |
|
| 262 |
#' |
|
| 263 |
#' @return a plot of unscaled or scaled weights |
|
| 264 |
#' @examples |
|
| 265 |
#' plot_weights_base(weighted_sat, |
|
| 266 |
#' bin_col = "#6ECEB2", |
|
| 267 |
#' vline_col = "#688CE8", |
|
| 268 |
#' main_title = c("Scaled Individual Weights", "Unscaled Individual Weights"),
|
|
| 269 |
#' scaled_weights = TRUE |
|
| 270 |
#' ) |
|
| 271 |
#' @export |
|
| 272 | ||
| 273 |
plot_weights_base <- function(weighted_data, |
|
| 274 |
bin_col, vline_col, main_title, |
|
| 275 |
scaled_weights) {
|
|
| 276 | 2x |
weights_stat <- calculate_weights_legend(weighted_data) |
| 277 | ||
| 278 | 2x |
if (scaled_weights) {
|
| 279 | 2x |
wt <- weighted_data$data$scaled_weights |
| 280 | 2x |
median_wt <- weights_stat$wt_scaled_median |
| 281 |
} else {
|
|
| 282 | ! |
wt <- weighted_data$data$weights |
| 283 | ! |
median_wt <- weights_stat$wt_median |
| 284 |
} |
|
| 285 | ||
| 286 |
# prepare legend |
|
| 287 | 2x |
plot_legend <- c( |
| 288 | 2x |
paste0("Median = ", median_wt),
|
| 289 | 2x |
paste0("ESS = ", weights_stat$ess),
|
| 290 | 2x |
paste0("Reduction% = ", weights_stat$ess_reduct)
|
| 291 |
) |
|
| 292 | 2x |
plot_lty <- c(2, NA, NA) |
| 293 | ||
| 294 | 2x |
if (weights_stat$nr_na > 0) {
|
| 295 | ! |
plot_legend <- c(plot_legend, paste0("#Missing Weights = ", weights_stat$nr_na))
|
| 296 | ! |
plot_lty <- c(plot_lty, NA) |
| 297 |
} |
|
| 298 | ||
| 299 |
# plot |
|
| 300 | 2x |
original_par <- par(mgp = c(2.3, 0.5, 0), cex.axis = 0.9, cex.lab = 0.95, bty = "n") |
| 301 | 2x |
on.exit(par(original_par)) |
| 302 | 2x |
hist(wt, border = "white", col = bin_col, main = main_title, breaks = 20, yaxt = "n", xlab = "") |
| 303 | 2x |
axis(2, las = 1) |
| 304 | 2x |
abline(v = median(wt), lty = 2, col = vline_col, lwd = 2) |
| 305 | 2x |
legend("topright", bty = "n", lty = plot_lty, cex = 0.8, legend = plot_legend)
|
| 306 |
} |
|
| 307 | ||
| 308 |
#' Plot MAIC weights in a histogram with key statistics in legend using `ggplot2` |
|
| 309 |
#' |
|
| 310 |
#' Generates a `ggplot` histogram of weights. Default is to plot both unscaled and scaled weights on a same graph. |
|
| 311 |
#' |
|
| 312 |
#' @param weighted_data object returned after calculating weights using [estimate_weights] |
|
| 313 |
#' @param bin_col a string, color for the bins of histogram |
|
| 314 |
#' @param vline_col a string, color for the vertical line in the histogram |
|
| 315 |
#' @param main_title Name of scaled weights plot and unscaled weights plot, respectively. |
|
| 316 |
#' @param bins number of bin parameter to use |
|
| 317 |
#' |
|
| 318 |
#' @return a plot of unscaled and scaled weights |
|
| 319 |
#' @examples |
|
| 320 |
#' if (requireNamespace("ggplot2")) {
|
|
| 321 |
#' plot_weights_ggplot(weighted_sat, |
|
| 322 |
#' bin_col = "#6ECEB2", |
|
| 323 |
#' vline_col = "#688CE8", |
|
| 324 |
#' main_title = c("Scaled Individual Weights", "Unscaled Individual Weights"),
|
|
| 325 |
#' bins = 50 |
|
| 326 |
#' ) |
|
| 327 |
#' } |
|
| 328 |
#' @export |
|
| 329 | ||
| 330 |
plot_weights_ggplot <- function(weighted_data, bin_col, vline_col, |
|
| 331 |
main_title, |
|
| 332 |
bins) {
|
|
| 333 |
# check if ggplot2 package is installed |
|
| 334 | 1x |
if (!requireNamespace("ggplot2", quietly = TRUE)) {
|
| 335 | ! |
stop("ggplot2 package is needed to run this function")
|
| 336 |
} |
|
| 337 | ||
| 338 | 1x |
weights_stat <- calculate_weights_legend(weighted_data) |
| 339 | ||
| 340 |
# prepare dataset to use in ggplot |
|
| 341 | 1x |
wt_data0 <- weighted_data$data[, c("weights", "scaled_weights")]
|
| 342 | 1x |
colnames(wt_data0) <- main_title |
| 343 | 1x |
wt_data <- stack(wt_data0) |
| 344 | 1x |
wt_data$median <- ifelse(wt_data$ind == main_title[1], |
| 345 | 1x |
weights_stat$wt_median, weights_stat$wt_scaled_median |
| 346 |
) |
|
| 347 | ||
| 348 |
# create legend data |
|
| 349 | 1x |
lab <- with(weights_stat, {
|
| 350 | 1x |
lab <- c(paste0("Median = ", wt_median), paste0("Median = ", wt_scaled_median))
|
| 351 | 1x |
lab <- paste0(lab, "\nESS = ", ess, "\nReduction% = ", ess_reduction) |
| 352 | ! |
if (nr_na > 0) lab <- paste0(lab, "\n#Missing Weights = ", nr_na) |
| 353 | 1x |
lab |
| 354 |
}) |
|
| 355 | 1x |
legend_data <- data.frame(ind = main_title, lab = lab) |
| 356 | ||
| 357 | 1x |
values <- median <- NULL # dummy assignment for undefined variable check |
| 358 | 1x |
hist_plot <- ggplot2::ggplot(wt_data) + |
| 359 | 1x |
ggplot2::geom_histogram(ggplot2::aes(x = values), bins = bins, color = bin_col, fill = bin_col) + |
| 360 | 1x |
ggplot2::geom_vline(ggplot2::aes(xintercept = median), |
| 361 | 1x |
color = vline_col, |
| 362 | 1x |
linetype = "dashed" |
| 363 |
) + |
|
| 364 | 1x |
ggplot2::theme_bw() + |
| 365 | 1x |
ggplot2::facet_wrap(~ind, ncol = 1) + |
| 366 | 1x |
ggplot2::geom_text( |
| 367 | 1x |
data = legend_data, |
| 368 | 1x |
ggplot2::aes(label = lab), x = Inf, y = Inf, hjust = 1, vjust = 1, size = 3 |
| 369 |
) + |
|
| 370 | 1x |
ggplot2::theme( |
| 371 | 1x |
axis.title = ggplot2::element_text(size = 12), |
| 372 | 1x |
axis.text = ggplot2::element_text(size = 12) |
| 373 |
) + |
|
| 374 | 1x |
ggplot2::ylab("Frequency") +
|
| 375 | 1x |
ggplot2::xlab("Weight")
|
| 376 | ||
| 377 | 1x |
hist_plot |
| 378 |
} |
|
| 379 | ||
| 380 | ||
| 381 |
#' Plot method for Estimate Weights objects |
|
| 382 |
#' |
|
| 383 |
#' The plot function displays individuals weights with key summary in top right legend that includes |
|
| 384 |
#' median weight, effective sample size (ESS), and reduction percentage (what percent ESS is reduced from the |
|
| 385 |
#' original sample size). There are two options of plotting: base R plot and `ggplot`. The default |
|
| 386 |
#' for base R plot is to plot unscaled and scaled separately. The default |
|
| 387 |
#' for `ggplot` is to plot unscaled and scaled weights on a same plot. |
|
| 388 |
#' |
|
| 389 |
#' @param x object from [estimate_weights] |
|
| 390 |
#' @param ggplot indicator to print base weights plot or `ggplot` weights plot |
|
| 391 |
#' @param bin_col a string, color for the bins of histogram |
|
| 392 |
#' @param vline_col a string, color for the vertical line in the histogram |
|
| 393 |
#' @param main_title title of the plot. For ggplot, name of scaled weights plot and unscaled weights plot, respectively. |
|
| 394 |
#' @param scaled_weights (base plot only) an indicator for using scaled weights instead of regular weights |
|
| 395 |
#' @param bins (`ggplot` only) number of bin parameter to use |
|
| 396 |
#' |
|
| 397 |
#' @examples |
|
| 398 |
#' plot(weighted_sat) |
|
| 399 |
#' |
|
| 400 |
#' if (requireNamespace("ggplot2")) {
|
|
| 401 |
#' plot(weighted_sat, ggplot = TRUE) |
|
| 402 |
#' } |
|
| 403 |
#' @describeIn estimate_weights Plot method for estimate_weights objects |
|
| 404 |
#' @export |
|
| 405 | ||
| 406 |
plot.maicplus_estimate_weights <- function(x, ggplot = FALSE, |
|
| 407 |
bin_col = "#6ECEB2", vline_col = "#688CE8", |
|
| 408 |
main_title = NULL, |
|
| 409 |
scaled_weights = TRUE, |
|
| 410 |
bins = 50, ...) {
|
|
| 411 | 1x |
if (ggplot) {
|
| 412 | ! |
if (is.null(main_title)) main_title <- c("Scaled Individual Weights", "Unscaled Individual Weights")
|
| 413 | ! |
plot_weights_ggplot(x, bin_col, vline_col, main_title, bins) |
| 414 |
} else {
|
|
| 415 | 1x |
if (is.null(main_title)) {
|
| 416 | 1x |
main_title <- ifelse(scaled_weights, "Scaled Individual Weights", "Unscaled Individual Weights") |
| 417 |
} |
|
| 418 | 1x |
plot_weights_base(x, bin_col, vline_col, main_title, scaled_weights) |
| 419 |
} |
|
| 420 |
} |
|
| 421 | ||
| 422 | ||
| 423 |
#' Check to see if weights are optimized correctly |
|
| 424 |
#' |
|
| 425 |
#' This function checks to see if the optimization is done properly |
|
| 426 |
#' by checking the covariate averages before and after adjustment. |
|
| 427 |
#' In case of ties when calculating median, |
|
| 428 |
#' we return the mean of the two numbers. For more details, |
|
| 429 |
#' see `ties` parameter in [matrixStats::weightedMedian]. |
|
| 430 |
#' |
|
| 431 |
#' @param weighted_data object returned after calculating weights using \code{\link{estimate_weights}}
|
|
| 432 |
#' @param processed_agd a data frame, object returned after using \code{\link{process_agd}} or
|
|
| 433 |
#' aggregated data following the same naming convention |
|
| 434 |
#' |
|
| 435 |
#' @examples |
|
| 436 |
#' data(weighted_sat) |
|
| 437 |
#' data(agd) |
|
| 438 |
#' check_weights(weighted_sat, process_agd(agd)) |
|
| 439 |
#' @importFrom matrixStats weightedMedian |
|
| 440 |
#' |
|
| 441 |
#' @return data.frame of weighted and unweighted covariate averages of the IPD, |
|
| 442 |
#' average of aggregate data, and sum of inner products of covariate \eqn{x_i} and the weights (\eqn{exp(x_i\beta)})
|
|
| 443 |
#' @export |
|
| 444 | ||
| 445 |
check_weights <- function(weighted_data, processed_agd) {
|
|
| 446 | 2x |
ipd_with_weights <- weighted_data$data |
| 447 | 2x |
match_cov <- weighted_data$centered_colnames |
| 448 | ||
| 449 |
# if algorithm is correct, all centered columns should have a weighted summation to a very small number around zero |
|
| 450 | 2x |
num_check <- ipd_with_weights$weights %*% as.matrix(ipd_with_weights[, match_cov, drop = FALSE]) |
| 451 | 2x |
num_check <- round(num_check, 4) |
| 452 | ||
| 453 |
# for reporting |
|
| 454 | 2x |
outdata <- data.frame( |
| 455 | 2x |
covariate = gsub("_CENTERED$", "", match_cov),
|
| 456 | 2x |
match_stat = NA, |
| 457 | 2x |
internal_trial = NA, |
| 458 | 2x |
internal_trial_after_weighted = NA, |
| 459 | 2x |
external_trial = NA, |
| 460 | 2x |
sum_centered_IPD_with_weights = as.vector(num_check) |
| 461 |
) |
|
| 462 | 2x |
attr(outdata, "footer") <- list() |
| 463 |
# find item that was matched by mean |
|
| 464 | 2x |
ind_mean <- lapply(outdata$covariate, grep, x = names(processed_agd), value = TRUE) |
| 465 | 2x |
ind_mean <- sapply(ind_mean, function(ii) any(grepl("_MEAN$", ii)))
|
| 466 | 2x |
outdata$match_stat <- ifelse(grepl("_MEDIAN$", outdata$covariate), "Median",
|
| 467 | 2x |
ifelse(grepl("_SQUARED$", outdata$covariate), "SD",
|
| 468 | 2x |
ifelse(ind_mean, "Mean", "Prop") |
| 469 |
) |
|
| 470 |
) |
|
| 471 | 2x |
outdata$covariate <- gsub("_MEDIAN|_SQUARED", "", outdata$covariate)
|
| 472 |
# fill in corresponding agd data |
|
| 473 | 2x |
outdata$external_trial <- unlist(processed_agd[paste(outdata$covariate, toupper(outdata$match_stat), sep = "_")]) |
| 474 | ||
| 475 |
# fill in stat from unweighted and weighted IPD |
|
| 476 | 2x |
for (ii in seq_len(nrow(outdata))) {
|
| 477 | 12x |
covname <- outdata$covariate[ii] |
| 478 | 12x |
if (outdata$match_stat[ii] %in% c("Mean", "Prop")) {
|
| 479 | 8x |
outdata$internal_trial[ii] <- mean(ipd_with_weights[[covname]], na.rm = TRUE) |
| 480 | 8x |
outdata$internal_trial_after_weighted[ii] <- weighted.mean( |
| 481 | 8x |
ipd_with_weights[[covname]], |
| 482 | 8x |
w = ipd_with_weights$weights, na.rm = TRUE |
| 483 |
) |
|
| 484 | 4x |
} else if (outdata$match_stat[ii] == "Median") {
|
| 485 | 2x |
outdata$internal_trial[ii] <- quantile(ipd_with_weights[[covname]], |
| 486 | 2x |
probs = 0.5, |
| 487 | 2x |
na.rm = TRUE, |
| 488 | 2x |
type = 2, |
| 489 | 2x |
names = FALSE |
| 490 | 2x |
) # SAS default |
| 491 | 2x |
outdata$internal_trial_after_weighted[ii] <- weightedMedian( |
| 492 | 2x |
x = ipd_with_weights[[covname]], |
| 493 | 2x |
w = ipd_with_weights$weights, |
| 494 | 2x |
interpolate = FALSE, |
| 495 | 2x |
ties = "mean", |
| 496 | 2x |
na.rm = TRUE |
| 497 |
) |
|
| 498 |
# no IPD equals to reported AgD median |
|
| 499 | 2x |
msg_ind <- !any(ipd_with_weights[[covname]] == outdata$external_trial[ii], na.rm = TRUE) |
| 500 | 2x |
if (msg_ind) {
|
| 501 | ! |
msg_txt <- paste0( |
| 502 | ! |
"For covariate ", covname, ", it was matched to AgD median, but there is no IPD identical to AgD median,", |
| 503 | ! |
"hence median after weighted will not equal to AgD median exactly." |
| 504 |
) |
|
| 505 | ! |
attr(outdata, "footer") <- c(attr(outdata, "footer"), msg_txt) |
| 506 |
} |
|
| 507 | 2x |
} else if (outdata$match_stat[ii] == "SD") {
|
| 508 | 2x |
outdata$internal_trial[ii] <- sd(ipd_with_weights[[covname]], na.rm = TRUE) |
| 509 | 2x |
wm_squared <- weighted.mean(ipd_with_weights[[covname]]^2, w = ipd_with_weights$weights, na.rm = TRUE) |
| 510 | 2x |
ms_agd <- processed_agd[[paste0(outdata$covariate[ii], "_MEAN")]]^2 |
| 511 | 2x |
outdata$internal_trial_after_weighted[ii] <- sqrt(wm_squared - ms_agd) |
| 512 |
} |
|
| 513 |
} |
|
| 514 | ||
| 515 |
# output |
|
| 516 | 2x |
class(outdata) <- c("maicplus_check_weights", "data.frame")
|
| 517 | 2x |
outdata |
| 518 |
} |
|
| 519 | ||
| 520 | ||
| 521 |
#' Print method for Check Weights objects |
|
| 522 |
#' |
|
| 523 |
#' @param x object from [check_weights] |
|
| 524 |
#' @param mean_digits number of digits for rounding mean columns in the output |
|
| 525 |
#' @param prop_digits number of digits for rounding proportion columns in the output |
|
| 526 |
#' @param sd_digits number of digits for rounding mean columns in the output |
|
| 527 |
#' @param digits minimal number of significant digits, see [print.default]. |
|
| 528 |
#' @param ... further arguments to [print.data.frame] |
|
| 529 |
#' @describeIn check_weights Print method for check_weights objects |
|
| 530 |
#' @export |
|
| 531 | ||
| 532 |
print.maicplus_check_weights <- function(x, |
|
| 533 |
mean_digits = 2, |
|
| 534 |
prop_digits = 2, |
|
| 535 |
sd_digits = 3, |
|
| 536 |
digits = getOption("digits"), ...) {
|
|
| 537 | 1x |
round_digits <- c("Mean" = mean_digits, "Prop" = prop_digits, "SD" = sd_digits)[x$match_stat]
|
| 538 | 1x |
round_digits[is.na(round_digits)] <- digits |
| 539 | ||
| 540 | 1x |
x$external_trial <- round(x$external_trial, round_digits) |
| 541 | 1x |
x$internal_trial <- round(x$internal_trial, round_digits) |
| 542 | 1x |
x$internal_trial_after_weighted <- round(x$internal_trial_after_weighted, round_digits) |
| 543 | ||
| 544 | 1x |
print.data.frame(x, ...) |
| 545 | 1x |
footer <- unlist(attr(x, "footer")) |
| 546 | 1x |
if (length(footer)) {
|
| 547 | ! |
cat("\n")
|
| 548 | ! |
for (f in seq_along(footer)) {
|
| 549 | ! |
cat(paste0("[", f, "] ", footer[f]))
|
| 550 |
} |
|
| 551 |
} |
|
| 552 |
} |
|
| 553 | ||
| 554 |
#' Note on Expected Sample Size Reduction |
|
| 555 |
#' |
|
| 556 |
#' @param width Number of characters to break string into new lines (`\n`). |
|
| 557 |
#' |
|
| 558 |
#' @return A character string |
|
| 559 |
#' @keywords internal |
|
| 560 |
ess_footnote_text <- function(width = 0.9 * getOption("width")) {
|
|
| 561 | 1x |
text <- "An ESS reduction up to ~60% is not unexpected based on the 2021 survey of NICE's technology appraisals |
| 562 | 1x |
(https://onlinelibrary.wiley.com/doi/full/10.1002/jrsm.1511), whereas a reduction of >75% is less common |
| 563 | 1x |
and it may be considered suboptimal." |
| 564 | 1x |
paste0(strwrap(text, width = width), collapse = "\n") |
| 565 |
} |
| 1 |
# Restore the RNG back to a previous state using the global .Random.seed |
|
| 2 |
set_random_seed <- function(old_seed) {
|
|
| 3 | 16x |
if (is.null(old_seed)) {
|
| 4 | 8x |
rm(".Random.seed", envir = globalenv(), inherits = FALSE)
|
| 5 |
} else {
|
|
| 6 | 8x |
assign(".Random.seed", value = old_seed, envir = globalenv(), inherits = FALSE)
|
| 7 |
} |
|
| 8 |
} |
|
| 9 | ||
| 10 |
construct_boot_data <- function(weighted_data, i = 1) {
|
|
| 11 | ! |
if (is.null(weighted_data$boot)) stop("Must contain bootstrap results from estimate_weights()")
|
| 12 | ! |
i <- as.integer(i) |
| 13 | ! |
R <- dim(weighted_data$boot)[3] |
| 14 | ! |
if (i < 1 || i > R) stop("i must be integer between 1 and ", R)
|
| 15 | ||
| 16 | ! |
boot_data <- weighted_data$boot[, , i] |
| 17 | ! |
weighted_data$data <- weighted_data$data[boot_data[, 1], ] |
| 18 | ! |
weighted_data$data$weights <- boot_data[, 2] |
| 19 | ! |
weighted_data$data$scaled_weights <- boot_data[, 2] / sum(boot_data[, 2]) |
| 20 | ! |
weighted_data |
| 21 |
} |
|
| 22 | ||
| 23 | ||
| 24 |
transform_ratio <- function(object) {
|
|
| 25 | 27x |
result <- object |
| 26 | 27x |
result$est <- exp(object$est) |
| 27 |
# log normal parameterization for SE |
|
| 28 | 27x |
result$se <- sqrt((exp(object$se^2) - 1) * exp(2 * object$est + object$se^2)) |
| 29 | 27x |
result$ci_l <- exp(object$ci_l) |
| 30 | 27x |
result$ci_u <- exp(object$ci_u) |
| 31 | 27x |
result |
| 32 |
} |
|
| 33 | ||
| 34 |
transform_absolute <- function(object) {
|
|
| 35 | 8x |
result <- object |
| 36 | 8x |
result$est <- object$est * 100 |
| 37 | 8x |
result$se <- object$se * 100 |
| 38 | 8x |
result$ci_l <- object$ci_l * 100 |
| 39 | 8x |
result$ci_u <- object$ci_u * 100 |
| 40 | 8x |
result |
| 41 |
} |
| 1 |
#' Kaplan-Meier (KM) plot function for anchored and unanchored cases using ggplot |
|
| 2 |
#' |
|
| 3 |
#' This is wrapper function of \code{basic_kmplot2}.
|
|
| 4 |
#' The argument setting is similar to \code{maic_anchored} and \code{maic_unanchored},
|
|
| 5 |
#' and it is used in those two functions. |
|
| 6 |
#' |
|
| 7 |
#' @param weights_object an object returned by \code{estimate_weight}
|
|
| 8 |
#' @param tte_ipd a data frame of individual patient data (IPD) of internal trial, contain at least `"USUBJID"`, |
|
| 9 |
#' `"EVENT"`, `"TIME"` columns and a column indicating treatment assignment |
|
| 10 |
#' @param tte_pseudo_ipd a data frame of pseudo IPD by digitized KM curves of external trial (for time-to-event |
|
| 11 |
#' endpoint), contain at least `"EVENT"`, `"TIME"` |
|
| 12 |
#' @param trt_ipd a string, name of the interested investigation arm in internal trial \code{dat_igd} (real IPD)
|
|
| 13 |
#' @param trt_agd a string, name of the interested investigation arm in external trial \code{dat_pseudo} (pseudo IPD)
|
|
| 14 |
#' @param trt_common a string, name of the common comparator in internal and external trial, by default is NULL, |
|
| 15 |
#' indicating unanchored case |
|
| 16 |
#' @param trt_var_ipd a string, column name in \code{tte_ipd} that contains the treatment assignment
|
|
| 17 |
#' @param trt_var_agd a string, column name in \code{tte_pseudo_ipd} that contains the treatment assignment
|
|
| 18 |
#' @param normalize_weights logical, default is \code{FALSE}. If \code{TRUE},
|
|
| 19 |
#' \code{scaled_weights} (normalized weights) in \code{weights_object$data} will be used.
|
|
| 20 |
#' @param km_conf_type a string, pass to \code{conf.type} of \code{survfit}
|
|
| 21 |
#' @param km_layout a string, only applicable for unanchored case (\code{trt_common = NULL}), indicated the
|
|
| 22 |
#' desired layout of output KM curve. |
|
| 23 |
#' @param time_scale a string, time unit of median survival time, taking a value of 'years', 'months', |
|
| 24 |
#' weeks' or 'days' |
|
| 25 |
#' @param ... other arguments in \code{basic_kmplot2}
|
|
| 26 |
#' |
|
| 27 |
#' @return In unanchored case, a KM plot with risk set table. In anchored case, depending on \code{km_layout},
|
|
| 28 |
#' \itemize{
|
|
| 29 |
#' \item if "by_trial", 2 by 1 plot, first all KM curves (incl. weighted) in IPD trial, and then KM curves in AgD |
|
| 30 |
#' trial, with risk set table. |
|
| 31 |
#' \item if "by_arm", 2 by 1 plot, first KM curves of \code{trt_agd} and \code{trt_ipd} (with and without weights),
|
|
| 32 |
#' and then KM curves of \code{trt_common} in AgD trial and IPD trial (with and without weights). Risk set table is
|
|
| 33 |
#' appended. |
|
| 34 |
#' \item if "all", 2 by 2 plot, all plots in "by_trial" and "by_arm" without risk set table appended. |
|
| 35 |
#' } |
|
| 36 |
#' @example inst/examples/kmplot2_unanchored_ex.R |
|
| 37 |
#' @example inst/examples/kmplot2_anchored_ex.R |
|
| 38 |
#' @export |
|
| 39 | ||
| 40 |
kmplot2 <- function(weights_object, |
|
| 41 |
tte_ipd, |
|
| 42 |
tte_pseudo_ipd, |
|
| 43 |
trt_ipd, |
|
| 44 |
trt_agd, |
|
| 45 |
trt_common = NULL, |
|
| 46 |
normalize_weights = FALSE, |
|
| 47 |
trt_var_ipd = "ARM", |
|
| 48 |
trt_var_agd = "ARM", |
|
| 49 |
km_conf_type = "log-log", |
|
| 50 |
km_layout = c("all", "by_trial", "by_arm"),
|
|
| 51 |
time_scale, |
|
| 52 |
...) {
|
|
| 53 | ! |
if (!requireNamespace("survminer", quietly = TRUE)) stop("survminer package is required for this function")
|
| 54 | ||
| 55 | 1x |
names(tte_ipd) <- toupper(names(tte_ipd)) |
| 56 | 1x |
names(tte_pseudo_ipd) <- toupper(names(tte_pseudo_ipd)) |
| 57 | 1x |
trt_var_ipd <- toupper(trt_var_ipd) |
| 58 | 1x |
trt_var_agd <- toupper(trt_var_agd) |
| 59 | ||
| 60 |
# pre check |
|
| 61 | 1x |
if (!"maicplus_estimate_weights" %in% class(weights_object)) {
|
| 62 | ! |
stop("weights_object should be an object returned by estimate_weights")
|
| 63 |
} |
|
| 64 | 1x |
if (!all(c("USUBJID", "TIME", "EVENT", trt_var_ipd) %in% names(tte_ipd))) {
|
| 65 | ! |
stop(paste("tte_ipd needs to include at least USUBJID, TIME, EVENT,", trt_var_ipd))
|
| 66 |
} |
|
| 67 | 1x |
if (!all(c("TIME", "EVENT", trt_var_agd) %in% names(tte_pseudo_ipd))) {
|
| 68 | ! |
stop(paste("tte_pseudo_ipd needs to include at least TIME, EVENT,", trt_var_agd))
|
| 69 |
} |
|
| 70 | 1x |
km_layout <- match.arg(km_layout, choices = c("all", "by_trial", "by_arm"), several.ok = FALSE)
|
| 71 | ||
| 72 |
# preparing data |
|
| 73 | 1x |
is_anchored <- !is.null(trt_common) |
| 74 | 1x |
tte_ipd <- tte_ipd[tte_ipd[[trt_var_ipd]] %in% c(trt_ipd, trt_common), , drop = FALSE] |
| 75 | 1x |
tte_pseudo_ipd <- tte_pseudo_ipd[tte_pseudo_ipd[[trt_var_agd]] %in% c(trt_agd, trt_common), , drop = FALSE] |
| 76 | 1x |
if (normalize_weights) {
|
| 77 | ! |
tte_ipd$weights <- weights_object$data$scaled_weights[match(weights_object$data$USUBJID, tte_ipd$USUBJID)] |
| 78 |
} else {
|
|
| 79 | 1x |
tte_ipd$weights <- weights_object$data$weights[match(weights_object$data$USUBJID, tte_ipd$USUBJID)] |
| 80 |
} |
|
| 81 | 1x |
tte_pseudo_ipd$weights <- 1 |
| 82 | ||
| 83 | 1x |
tte_ipd$TIME <- get_time_as(tte_ipd$TIME, as = time_scale) |
| 84 | 1x |
tte_pseudo_ipd$TIME <- get_time_as(tte_pseudo_ipd$TIME, as = time_scale) |
| 85 | 1x |
my_survfit <- function(data, weighted = FALSE) {
|
| 86 | 6x |
if (weighted) {
|
| 87 | 2x |
survfit(Surv(TIME, EVENT) ~ 1, data = data, conf.type = km_conf_type, weights = data$weights) |
| 88 |
} else {
|
|
| 89 | 4x |
survfit(Surv(TIME, EVENT) ~ 1, data = data, conf.type = km_conf_type) |
| 90 |
} |
|
| 91 |
} |
|
| 92 | ||
| 93 | 1x |
if (!is_anchored) {
|
| 94 | ! |
kmlist <- list( |
| 95 | ! |
kmobj_B = my_survfit(data = tte_pseudo_ipd), |
| 96 | ! |
kmobj_A = my_survfit(data = tte_ipd), |
| 97 | ! |
kmobj_A_adj = my_survfit(data = tte_ipd, weighted = TRUE) |
| 98 |
) |
|
| 99 | ! |
kmlist_name <- c(trt_agd, trt_ipd, paste0(trt_ipd, " (weighted)")) |
| 100 | ! |
basic_kmplot2(kmlist, kmlist_name, ...) |
| 101 | 1x |
} else if (is_anchored) {
|
| 102 | 1x |
all_km <- list( |
| 103 | 1x |
kmobj_A = my_survfit(data = tte_ipd[tte_ipd[, trt_var_ipd] == trt_ipd, ]), |
| 104 | 1x |
kmobj_B = my_survfit(data = tte_pseudo_ipd[tte_pseudo_ipd[, trt_var_agd] == trt_agd, ]), |
| 105 | 1x |
kmobj_A_adj = my_survfit(data = tte_ipd[tte_ipd[, trt_var_ipd] == trt_ipd, ], weighted = TRUE), |
| 106 | 1x |
kmobj_C = my_survfit(data = tte_ipd[tte_ipd[, trt_var_ipd] == trt_common, ]), |
| 107 | 1x |
kmobj_C_adj = my_survfit(data = tte_ipd[tte_ipd[, trt_var_ipd] == trt_common, ], weighted = TRUE), |
| 108 | 1x |
kmobj_C_agd = my_survfit(data = tte_pseudo_ipd[tte_pseudo_ipd[, trt_var_agd] == trt_common, ]) |
| 109 |
) |
|
| 110 | ||
| 111 | 1x |
kmlist_combined <- list() |
| 112 | 1x |
if (km_layout %in% c("by_trial", "all")) {
|
| 113 | 1x |
kmlist_1_2 <- list( |
| 114 | 1x |
setNames( |
| 115 | 1x |
all_km[c(4, 1, 3, 5)], |
| 116 | 1x |
c(trt_common, trt_ipd, paste0(trt_ipd, " (weighted)"), paste0(trt_common, " (weighted)")) |
| 117 |
), |
|
| 118 | 1x |
setNames(all_km[c(6, 2)], c(trt_common, trt_agd)) |
| 119 |
) |
|
| 120 | 1x |
names(kmlist_1_2) <- c( |
| 121 | 1x |
paste0("Kaplan-Meier Curves \n(", trt_ipd, " vs ", trt_common, ") in the IPD trial"),
|
| 122 | 1x |
paste0("Kaplan-Meier Curves \n(", trt_agd, " vs ", trt_common, ") in the AgD trial")
|
| 123 |
) |
|
| 124 | 1x |
kmlist_combined <- c(kmlist_combined, kmlist_1_2) |
| 125 |
} |
|
| 126 | 1x |
if (km_layout %in% c("by_arm", "all")) {
|
| 127 | ! |
kmlist_3_4 <- list( |
| 128 | ! |
setNames(all_km[c(2, 1, 3)], c(trt_agd, trt_ipd, paste0(trt_ipd, " (weighted)"))), |
| 129 | ! |
setNames(all_km[c(6, 4, 5)], paste(trt_common, c("(AgD)", "(IPD)", "(IPD,weighted)")))
|
| 130 |
) |
|
| 131 | ! |
names(kmlist_3_4) <- c( |
| 132 | ! |
paste0("Kaplan-Meier Curves \n(", trt_ipd, " vs ", trt_agd, ")"),
|
| 133 | ! |
paste0("Kaplan-Meier Curves of Common Comparator \n", trt_common, "(IPD vs AgD Trial)")
|
| 134 |
) |
|
| 135 | ! |
kmlist_combined <- c(kmlist_combined, kmlist_3_4) |
| 136 |
} |
|
| 137 | 1x |
if (km_layout == "all") {
|
| 138 | ! |
kmlist_combined <- kmlist_combined[c(1, 3, 2, 4)] |
| 139 |
} |
|
| 140 | ||
| 141 | 1x |
splots <- mapply( |
| 142 | 1x |
FUN = basic_kmplot2, |
| 143 | 1x |
kmlist = kmlist_combined, |
| 144 | 1x |
kmlist_name = lapply(kmlist_combined, names), |
| 145 | 1x |
main_title = names(kmlist_combined), |
| 146 | 1x |
MoreArgs = list(...), |
| 147 | 1x |
SIMPLIFY = FALSE |
| 148 |
) |
|
| 149 | 1x |
survminer::arrange_ggsurvplots(splots, nrow = 1 + (km_layout == "all")) |
| 150 |
} |
|
| 151 |
} |
|
| 152 | ||
| 153 |
#' Basic Kaplan Meier (KM) plot function using ggplot |
|
| 154 |
#' |
|
| 155 |
#' This function generates a basic KM plot using ggplot. |
|
| 156 |
#' |
|
| 157 |
#' @param kmlist a list of \code{survfit} object
|
|
| 158 |
#' @param kmlist_name a vector indicating the treatment names of each \code{survfit} object
|
|
| 159 |
#' @param endpoint_name a string, name of time to event endpoint, to be show in the |
|
| 160 |
#' last line of title |
|
| 161 |
#' @param show_risk_set logical, show risk set table or not, TRUE by default |
|
| 162 |
#' @param main_title a string, main title of the KM plot |
|
| 163 |
#' @param break_x_by bin parameter for \code{survminer}
|
|
| 164 |
#' @param censor indicator to include censor information |
|
| 165 |
#' @param xlab label name for x-axis of the plot |
|
| 166 |
#' @param xlim x limit for the x-axis of the plot |
|
| 167 |
#' @param use_colors a character vector of length up to 4, colors to the KM curves, |
|
| 168 |
#' it will be passed to 'col' of \code{lines()}
|
|
| 169 |
#' @param use_line_types a numeric vector of length up to 4, line type to the KM curves, |
|
| 170 |
#' it will be passed to \code{lty} of \code{lines()}
|
|
| 171 |
#' @example inst/examples/basic_kmplot2_ex.R |
|
| 172 |
#' @returns A Kaplan-Meier plot object created with `survminer::ggsurvplot()`. |
|
| 173 |
#' @export |
|
| 174 | ||
| 175 |
basic_kmplot2 <- function(kmlist, |
|
| 176 |
kmlist_name, |
|
| 177 |
endpoint_name = "Time to Event Endpoint", |
|
| 178 |
show_risk_set = TRUE, |
|
| 179 |
main_title = "Kaplan-Meier Curves", |
|
| 180 |
break_x_by = NULL, |
|
| 181 |
censor = TRUE, |
|
| 182 |
xlab = "Time", |
|
| 183 |
xlim = NULL, |
|
| 184 |
use_colors = NULL, |
|
| 185 |
use_line_types = NULL) {
|
|
| 186 | ! |
if (!requireNamespace("survminer", quietly = TRUE)) stop("survminer package is required for this function")
|
| 187 | ||
| 188 | 2x |
if (is.null(use_line_types)) {
|
| 189 | 2x |
use_line_types <- c(1, 1, 2, 2) |
| 190 |
} |
|
| 191 | ||
| 192 | 2x |
if (is.null(use_colors)) {
|
| 193 | 2x |
use_colors <- c("#5450E4", "#00857C", "#6ECEB2", "#7B68EE")
|
| 194 |
} |
|
| 195 | ||
| 196 |
# Produce the Kaplan-Meier plot |
|
| 197 | 2x |
survminer_plot <- survminer::ggsurvplot(kmlist, |
| 198 | 2x |
linetype = use_line_types, |
| 199 | 2x |
palette = use_colors, |
| 200 | 2x |
size = 0.2, |
| 201 | 2x |
combine = TRUE, |
| 202 | 2x |
risk.table = show_risk_set, |
| 203 | 2x |
risk.table.y.text.col = TRUE, |
| 204 | 2x |
risk.table.y.text = FALSE, |
| 205 | 2x |
break.x.by = break_x_by, |
| 206 | 2x |
censor = censor, |
| 207 | 2x |
censor.size = 2, |
| 208 | 2x |
xlab = xlab, |
| 209 | 2x |
ylab = endpoint_name, |
| 210 | 2x |
legend.title = "Treatment", |
| 211 | 2x |
legend = c(0.85, 0.82), |
| 212 | 2x |
title = paste0(main_title, "\nEndpoint: ", endpoint_name), |
| 213 | 2x |
legend.labs = kmlist_name, |
| 214 | 2x |
tables.theme = survminer::theme_cleantable(), |
| 215 | 2x |
ggtheme = ggplot2::theme_classic(base_size = 10), |
| 216 | 2x |
fontsize = 3, |
| 217 | 2x |
conf.int = FALSE, |
| 218 | 2x |
xlim = xlim |
| 219 |
) |
|
| 220 | 2x |
survminer_plot |
| 221 |
} |
| 1 |
#' Bucher method for combining treatment effects |
|
| 2 |
#' |
|
| 3 |
#' Given two treatment effects of A vs. C and B vs. C |
|
| 4 |
#' derive the treatment effects of A vs. B using the Bucher method. |
|
| 5 |
#' Two-sided confidence interval and Z-test p-value are also calculated. |
|
| 6 |
#' Treatment effects and standard errors should be in log scale |
|
| 7 |
#' for hazard ratio, odds ratio, and risk ratio. |
|
| 8 |
#' Treatment effects and standard errors should be in natural scale |
|
| 9 |
#' for risk difference and mean difference. |
|
| 10 |
#' |
|
| 11 |
#' @param trt a list of two scalars for the study with the |
|
| 12 |
#' experimental arm. `'est'` is the point estimate and `'se'` |
|
| 13 |
#' is the standard error of the treatment effect. |
|
| 14 |
#' For time-to-event data, `'est'` and `'se'` should be point estimate and |
|
| 15 |
#' standard error of the log hazard ratio. |
|
| 16 |
#' For binary data, `'est'` and `'se'` should be point estimate and |
|
| 17 |
#' standard error of the log odds ratio, log risk ratio, or risk |
|
| 18 |
#' difference. |
|
| 19 |
#' For continuous data, `'est'` and `'se'` should be point estimate and |
|
| 20 |
#' standard error of the mean difference. |
|
| 21 |
#' @param com same as \code{trt}, but for the study with the
|
|
| 22 |
#' control arm |
|
| 23 |
#' @param conf_lv a numerical scalar, prescribe confidence level to derive |
|
| 24 |
#' two-sided confidence interval for the treatment effect |
|
| 25 |
#' |
|
| 26 |
#' @return a list with 5 elements, |
|
| 27 |
#' \describe{
|
|
| 28 |
#' \item{est}{a scalar, point estimate of the treatment effect}
|
|
| 29 |
#' \item{se}{a scalar, standard error of the treatment effect}
|
|
| 30 |
#' \item{ci_l}{a scalar, lower confidence limit of a two-sided CI
|
|
| 31 |
#' with prescribed nominal level by \code{conf_lv}}
|
|
| 32 |
#' \item{ci_u}{a scalar, upper confidence limit of a two-sided CI
|
|
| 33 |
#' with prescribed nominal level by \code{conf_lv}}
|
|
| 34 |
#' \item{pval}{p-value of Z-test, with null hypothesis that
|
|
| 35 |
#' \code{est} is zero}
|
|
| 36 |
#' } |
|
| 37 |
#' @export |
|
| 38 |
#' @examples |
|
| 39 |
#' trt <- list(est = log(1.1), se = 0.2) |
|
| 40 |
#' com <- list(est = log(1.3), se = 0.18) |
|
| 41 |
#' result <- bucher(trt, com, conf_lv = 0.9) |
|
| 42 |
#' print(result, ci_digits = 3, pval_digits = 3) |
|
| 43 |
bucher <- function(trt, com, conf_lv = 0.95) {
|
|
| 44 | 1x |
if (!isTRUE(is.finite(trt$est))) stop("trt$est is not valid: ", trt$est)
|
| 45 | 1x |
if (!isTRUE(is.finite(trt$se))) stop("trt$se is not valid: ", trt$se)
|
| 46 | 1x |
if (!isTRUE(is.finite(com$est))) stop("com$est is not valid: ", com$est)
|
| 47 | 1x |
if (!isTRUE(is.finite(com$se))) stop("com$se is not valid: ", com$se)
|
| 48 | 1x |
if (conf_lv < 0 || 1 < conf_lv) stop("conf_lv must be in (0, 1): ", conf_lv)
|
| 49 | ||
| 50 | 48x |
est <- trt$est - com$est |
| 51 | 48x |
se <- sqrt(trt$se^2 + com$se^2) |
| 52 | 48x |
ci_l <- est - stats::qnorm(0.5 + conf_lv / 2) * se |
| 53 | 48x |
ci_u <- est + stats::qnorm(0.5 + conf_lv / 2) * se |
| 54 | 48x |
if (est > 0) {
|
| 55 | 8x |
pval <- 2 * (1 - stats::pnorm(est, 0, se)) |
| 56 |
} else {
|
|
| 57 | 40x |
pval <- 2 * stats::pnorm(est, 0, se) |
| 58 |
} |
|
| 59 | ||
| 60 | 48x |
outdata <- list( |
| 61 | 48x |
est = est, |
| 62 | 48x |
se = se, |
| 63 | 48x |
ci_l = ci_l, |
| 64 | 48x |
ci_u = ci_u, |
| 65 | 48x |
pval = pval |
| 66 |
) |
|
| 67 | ||
| 68 | 48x |
class(outdata) <- c("maicplus_bucher", "list")
|
| 69 | 48x |
outdata |
| 70 |
} |
|
| 71 | ||
| 72 | ||
| 73 |
#' Calculate standard error from the reported confidence interval. |
|
| 74 |
#' |
|
| 75 |
#' Comparator studies often only report confidence interval of the |
|
| 76 |
#' treatment effects. This function calculates standard error of the |
|
| 77 |
#' treatment effect given the reported confidence interval. |
|
| 78 |
#' For relative treatment effect (i.e. hazard ratio, odds ratio, and |
|
| 79 |
#' risk ratio), the function would log the confidence interval. |
|
| 80 |
#' For risk difference and mean difference, |
|
| 81 |
#' we do not log the confidence interval. |
|
| 82 |
#' The option to log the confidence interval is controlled |
|
| 83 |
#' by `'log'` parameter. |
|
| 84 |
#' |
|
| 85 |
#' @param CI_lower Reported lower percentile value of the |
|
| 86 |
#' treatment effect |
|
| 87 |
#' @param CI_upper Reported upper percentile value of the |
|
| 88 |
#' treatment effect |
|
| 89 |
#' @param CI_perc Percentage of confidence interval reported |
|
| 90 |
#' @param log Whether the confidence interval should be logged. |
|
| 91 |
#' For relative treatment effect, log should be applied because |
|
| 92 |
#' estimated log treatment effect is approximately normally distributed. |
|
| 93 |
#' @return Standard error of log relative treatment effect if `'log'` |
|
| 94 |
#' is true and standard error of the treatment effect if `'log'` |
|
| 95 |
#' is false |
|
| 96 |
#' @examples |
|
| 97 |
#' find_SE_from_CI(CI_lower = 0.55, CI_upper = 0.90, CI_perc = 0.95) |
|
| 98 |
#' @export |
|
| 99 | ||
| 100 |
find_SE_from_CI <- function(CI_lower = NULL, CI_upper = NULL, |
|
| 101 |
CI_perc = 0.95, log = TRUE) {
|
|
| 102 | 3x |
if (CI_perc > 1 || CI_perc < 0) {
|
| 103 | ! |
stop("CI_perc has to be between 0 and 1")
|
| 104 |
} |
|
| 105 | ||
| 106 | 3x |
if (is.null(CI_lower) || is.null(CI_upper)) {
|
| 107 | ! |
stop("Both CI_lower and CI_upper need to be specified")
|
| 108 |
} |
|
| 109 | ||
| 110 | 3x |
if (!is.numeric(CI_lower) || !is.numeric(CI_upper)) {
|
| 111 | ! |
stop("Both CI_lower and CI_upper need to be specified")
|
| 112 |
} |
|
| 113 | ||
| 114 | 3x |
alpha <- 1 - CI_perc |
| 115 | 3x |
se <- ifelse(log, |
| 116 | 3x |
(log(CI_upper) - log(CI_lower)) / (2 * qnorm(1 - alpha / 2)), |
| 117 | 3x |
(CI_upper - CI_lower) / (2 * qnorm(1 - alpha / 2)) |
| 118 |
) |
|
| 119 | 3x |
return(se) |
| 120 |
} |
|
| 121 | ||
| 122 |
#' Print method for `maicplus_bucher` object |
|
| 123 |
#' |
|
| 124 |
#' @param x `maicplus_bucher` object |
|
| 125 |
#' @param ci_digits an integer, number of decimal places for point |
|
| 126 |
#' estimate and derived confidence limits |
|
| 127 |
#' @param pval_digits an integer, number of decimal places to display |
|
| 128 |
#' Z-test p-value |
|
| 129 |
#' @param exponentiate whether the treatment effect and confidence |
|
| 130 |
#' interval should be exponentiated. This applies to relative |
|
| 131 |
#' treatment effects. Default is set to false. |
|
| 132 |
#' @param ... not used |
|
| 133 |
#' @describeIn bucher Print method for `maicplus_bucher` objects |
|
| 134 |
#' @export |
|
| 135 | ||
| 136 |
print.maicplus_bucher <- function(x, ci_digits = 2, pval_digits = 3, |
|
| 137 |
exponentiate = FALSE, ...) {
|
|
| 138 | 1x |
output <- reformat(x, ci_digits, pval_digits, |
| 139 | 1x |
show_pval = TRUE, exponentiate |
| 140 |
) |
|
| 141 | 1x |
print(output) |
| 142 |
} |
|
| 143 | ||
| 144 |
#' Reformat `maicplus_bucher` alike object |
|
| 145 |
#' |
|
| 146 |
#' @param x a list, structured like a `maicplus_bucher` object |
|
| 147 |
#' @param ci_digits an integer, number of decimal places for point |
|
| 148 |
#' estimate and derived confidence limits |
|
| 149 |
#' @param pval_digits an integer, number of decimal places to display |
|
| 150 |
#' Z-test p-value |
|
| 151 |
#' @param show_pval a logical value, default is TRUE. If FALSE, p-value will not |
|
| 152 |
#' be output as the second element of the character vector |
|
| 153 |
#' @param exponentiate whether the treatment effect and confidence |
|
| 154 |
#' interval should be exponentiated. This applies to relative |
|
| 155 |
#' treatment effects. Default is set to false. |
|
| 156 |
#' @keywords internal |
|
| 157 | ||
| 158 |
reformat <- function(x, ci_digits = 2, pval_digits = 3, |
|
| 159 |
show_pval = TRUE, exponentiate = FALSE) {
|
|
| 160 | 1x |
transform_this <- function(x) {
|
| 161 | 3x |
ifelse(exponentiate, exp(x), x) |
| 162 |
} |
|
| 163 | ||
| 164 | 1x |
a <- format(round(transform_this(x$est), ci_digits), |
| 165 | 1x |
nsmall = ci_digits |
| 166 |
) |
|
| 167 | 1x |
b <- format(round(transform_this(x$ci_l), ci_digits), |
| 168 | 1x |
nsmall = ci_digits |
| 169 |
) |
|
| 170 | 1x |
c <- format(round(transform_this(x$ci_u), ci_digits), |
| 171 | 1x |
nsmall = ci_digits |
| 172 |
) |
|
| 173 | 1x |
res <- paste0(a, "[", b, "; ", c, "]") |
| 174 | ||
| 175 | 1x |
disp_pval <- round(x$pval, pval_digits) |
| 176 | 1x |
disp_pval <- |
| 177 | 1x |
ifelse(disp_pval == 0, |
| 178 | 1x |
paste0("<", 1 / (10^pval_digits)),
|
| 179 | 1x |
format(disp_pval, nsmall = pval_digits) |
| 180 |
) |
|
| 181 | ||
| 182 | 1x |
if (show_pval) {
|
| 183 | 1x |
output <- c(res, disp_pval) |
| 184 | 1x |
names(output) <- c("result", "pvalue")
|
| 185 |
} else {
|
|
| 186 | ! |
output <- res |
| 187 | ! |
names(output) <- "result" |
| 188 |
} |
|
| 189 | ||
| 190 | 1x |
output |
| 191 |
} |
| 1 |
#' Forest Plot for One or More MAIC Objects |
|
| 2 |
#' |
|
| 3 |
#' This function compiles effect estimates (and their confidence intervals) from one or more |
|
| 4 |
#' "MAIC objects" – typically the output from [maic_anchored()] or [maic_unanchored()] functions – |
|
| 5 |
#' and creates a forest plot alongside a summary table of those effect estimates. |
|
| 6 |
#' |
|
| 7 |
#' @param ... One or more MAIC objects. Each object must contain an `inferential$summary` data frame |
|
| 8 |
#' with the columns `"HR"`, `"OR"`, or `"RR"` (one of these must be present), along with `"LCL"`, |
|
| 9 |
#' `"UCL"`, `"pval"`, and a `case` identifier column. |
|
| 10 |
#' @param xlim A numeric vector of length two, specifying the limits of the effect-size axis |
|
| 11 |
#' in the resulting forest plot. Defaults to `c(0, 1.5)`. |
|
| 12 |
#' @param reference_line A numeric value specifying where to draw the "no-effect" reference line |
|
| 13 |
#' on the forest plot. Defaults to `1`. |
|
| 14 |
#' |
|
| 15 |
#' @details |
|
| 16 |
#' This function extracts the effect estimates (e.g., HR, OR, or RR) and their confidence intervals |
|
| 17 |
#' from each provided MAIC object. It then stacks all estimates into a single data frame for plotting. |
|
| 18 |
#' A forest plot is generated using **ggplot2** with vertical error bars displaying the confidence intervals. |
|
| 19 |
#' The `reference_line` is drawn as a dashed line to indicate the null value (usually 1, meaning no difference). |
|
| 20 |
#' |
|
| 21 |
#' Below the forest plot, a table is constructed showing the point estimate and 95% confidence interval |
|
| 22 |
#' for each row, along with its p-value. If the p-value is less than 0.001, it is displayed as `"< 0.001"`, |
|
| 23 | ||
| 24 |
#' otherwise it is displayed to three decimal places. |
|
| 25 |
#' |
|
| 26 |
#' @return A [patchwork][patchwork::patchwork] object that combines: |
|
| 27 |
#' \itemize{
|
|
| 28 |
#' \item A forest plot of the provided effect estimates and their 95\% confidence intervals. |
|
| 29 |
#' \item A corresponding table listing each estimate in numeric form, along with the p-value. |
|
| 30 |
#' } |
|
| 31 |
#' Printing or plotting this returned object will display both the forest plot and the summary table. |
|
| 32 |
#' @example inst/examples/maic_forest_plot_ex.R |
|
| 33 |
#' @export |
|
| 34 | ||
| 35 | ||
| 36 |
maic_forest_plot <- function(..., |
|
| 37 |
xlim = c(0, 1.5), |
|
| 38 |
reference_line = 1) {
|
|
| 39 | 2x |
if (!requireNamespace("ggplot2", quietly = TRUE)) {
|
| 40 | ! |
stop("ggplot2 package is required for this function")
|
| 41 |
} |
|
| 42 | 2x |
if (!requireNamespace("patchwork", quietly = TRUE)) {
|
| 43 | ! |
stop("patchwork package is required for this function")
|
| 44 |
} |
|
| 45 | ||
| 46 |
# 1) Gather all objects |
|
| 47 | 2x |
objs_list <- list(...) |
| 48 | 2x |
if (length(objs_list) == 0) {
|
| 49 | ! |
stop("No MAIC objects were provided. Pass at least one object with $inferential$summary.")
|
| 50 |
} |
|
| 51 | ||
| 52 | 2x |
change_case_name <- |
| 53 | 2x |
function(data0_case_col, A_name, B_name, C_name) {
|
| 54 | 2x |
case_renamed <- data0_case_col |
| 55 | 2x |
for (i in seq_along(data0_case_col)) {
|
| 56 | 10x |
if (data0_case_col[i] == "AC") {
|
| 57 | 2x |
case_renamed[i] <- paste0(A_name, " vs. ", C_name) |
| 58 | 8x |
} else if (data0_case_col[i] == "adjusted_AC") {
|
| 59 | 2x |
case_renamed[i] <- paste0("Adjusted ", A_name, " vs. ", C_name)
|
| 60 | 6x |
} else if (data0_case_col[i] == "BC") {
|
| 61 | 2x |
case_renamed[i] <- paste0(B_name, " vs. ", C_name) |
| 62 | 4x |
} else if (data0_case_col[i] == "AB") {
|
| 63 | 2x |
case_renamed[i] <- paste0(A_name, " vs. ", B_name) |
| 64 | 2x |
} else if (data0_case_col[i] == "adjusted_AB") {
|
| 65 | 2x |
case_renamed[i] <- paste0("Adjusted ", A_name, " vs. ", B_name)
|
| 66 |
} |
|
| 67 |
} |
|
| 68 | 2x |
case_renamed |
| 69 |
} |
|
| 70 | ||
| 71 |
# 2) Extract and combine inferential summaries and descriptive summaries |
|
| 72 | 2x |
df_list <- |
| 73 | 2x |
lapply(objs_list, function(x) {
|
| 74 | 2x |
if (!("inferential" %in% names(x)) || !("summary" %in% names(x$inferential))) {
|
| 75 | ! |
stop("One of the objects doesn't have 'inferential$summary'. Check your inputs.")
|
| 76 |
} |
|
| 77 | 2x |
inferential_df <- x$inferential$summary |
| 78 | 2x |
if (!("descriptive" %in% names(x)) || !("summary" %in% names(x$descriptive))) {
|
| 79 | ! |
stop("One of the objects doesn't have 'descriptive$summary'. Check your inputs.")
|
| 80 |
} |
|
| 81 | 2x |
descriptive_df <- x$descriptive$summary |
| 82 | 2x |
inferential_fit_obj <- x$inferential$fit |
| 83 | 2x |
safely_extract_name <- function(df, trt_char) {
|
| 84 | 6x |
if (trt_char %in% df$trt_ind) {
|
| 85 | 6x |
df[df$trt_ind == trt_char, ]$treatment[1] |
| 86 |
} else {
|
|
| 87 | 2x |
NA_character_ |
| 88 |
} |
|
| 89 |
} |
|
| 90 | 2x |
C_name <- safely_extract_name(descriptive_df, "C") |
| 91 | 2x |
A_name <- safely_extract_name(descriptive_df, "A") |
| 92 | 2x |
B_name <- safely_extract_name(descriptive_df, "B") |
| 93 | 2x |
effect_measure_col_name <- NULL |
| 94 | 2x |
if ("HR" %in% names(inferential_df)) {
|
| 95 | 2x |
effect_measure_col_name <- "HR" |
| 96 | ! |
} else if ("OR" %in% names(inferential_df)) {
|
| 97 | ! |
effect_measure_col_name <- "OR" |
| 98 | ! |
} else if ("RR" %in% names(inferential_df)) {
|
| 99 | ! |
effect_measure_col_name <- "RR" |
| 100 |
} |
|
| 101 |
# consider the bootstrap result if exists |
|
| 102 | 2x |
if (!is.null(effect_measure_col_name) && "boot_res_AB" %in% names(inferential_fit_obj)) {
|
| 103 | 2x |
boot_results <- inferential_fit_obj$boot_res_AB |
| 104 | 2x |
adjusted_AB_row_index <- which(inferential_df$case == "adjusted_AB") |
| 105 | ||
| 106 |
# If the "adjusted_AB" row exists and bootstrap results are valid |
|
| 107 | 2x |
if ( |
| 108 | 2x |
length(adjusted_AB_row_index) > 0 && |
| 109 | 2x |
!is.null(boot_results$est) && |
| 110 | 2x |
!is.null(boot_results$ci_l) && !is.null(boot_results$ci_u) |
| 111 |
) {
|
|
| 112 |
# Update the values for the 'adjusted_AB' row |
|
| 113 | ! |
inferential_df[[effect_measure_col_name]][adjusted_AB_row_index] <- boot_results$est |
| 114 | ! |
inferential_df$LCL[adjusted_AB_row_index] <- boot_results$ci_l |
| 115 | ! |
inferential_df$UCL[adjusted_AB_row_index] <- boot_results$ci_u |
| 116 |
} |
|
| 117 |
} |
|
| 118 | 2x |
inferential_df$case <- |
| 119 | 2x |
change_case_name(inferential_df$case, A_name, B_name, C_name) |
| 120 | 2x |
inferential_df |
| 121 |
}) |
|
| 122 | ||
| 123 | 2x |
forest_data <- do.call(rbind, df_list) |
| 124 | 2x |
rownames(forest_data) <- NULL |
| 125 | ||
| 126 | 2x |
if ("HR" %in% names(forest_data)) {
|
| 127 | 2x |
effect_col <- "HR" |
| 128 | ! |
} else if ("OR" %in% names(forest_data)) {
|
| 129 | ! |
effect_col <- "OR" |
| 130 | ! |
} else if ("RR" %in% names(forest_data)) {
|
| 131 | ! |
effect_col <- "RR" |
| 132 |
} else {
|
|
| 133 | ! |
stop("No recognized effect measure (HR, OR, or RR) in the summary data.")
|
| 134 |
} |
|
| 135 | ||
| 136 |
# Convert to numeric if needed |
|
| 137 | 2x |
forest_data$effect_est <- as.numeric(forest_data[[effect_col]]) |
| 138 | 2x |
forest_data$LCL <- as.numeric(forest_data$LCL) |
| 139 | 2x |
forest_data$UCL <- as.numeric(forest_data$UCL) |
| 140 | 2x |
forest_data$pval <- as.numeric(forest_data$pval) |
| 141 | 2x |
forest_data$row_index <- seq_len(nrow(forest_data)) |
| 142 | ||
| 143 |
# 2c) Make group_id a factor in reversed order so row 1 is at the TOP |
|
| 144 | 2x |
forest_data$group_id <- factor(forest_data$row_index, |
| 145 | 2x |
levels = forest_data$row_index |
| 146 |
) |
|
| 147 |
# 3) Create the forest plot |
|
| 148 | 2x |
col_grid <- rgb(235, 235, 235, 100, maxColorValue = 255) |
| 149 | ||
| 150 | 2x |
group_id <- effect_est <- LCL <- UCL <- case <- NULL |
| 151 | ||
| 152 | 2x |
forest <- ggplot2::ggplot( |
| 153 | 2x |
data = forest_data, |
| 154 | 2x |
ggplot2::aes( |
| 155 | 2x |
x = group_id, |
| 156 | 2x |
y = effect_est, |
| 157 | 2x |
ymin = LCL, |
| 158 | 2x |
ymax = UCL |
| 159 |
) |
|
| 160 |
) + |
|
| 161 | 2x |
ggplot2::geom_pointrange(ggplot2::aes(color = case)) + |
| 162 | 2x |
ggplot2::geom_errorbar( |
| 163 | 2x |
ggplot2::aes( |
| 164 | 2x |
ymin = LCL, |
| 165 | 2x |
ymax = UCL, |
| 166 | 2x |
color = case |
| 167 |
), |
|
| 168 | 2x |
width = 0, |
| 169 | 2x |
linewidth = 1 |
| 170 |
) + |
|
| 171 | 2x |
ggplot2::geom_hline( |
| 172 | 2x |
yintercept = reference_line, |
| 173 | 2x |
colour = "red", |
| 174 | 2x |
linetype = "dashed", |
| 175 | 2x |
alpha = 0.5 |
| 176 |
) + |
|
| 177 | 2x |
ggplot2::coord_flip() + |
| 178 | 2x |
ggplot2::scale_y_continuous(limits = xlim) + |
| 179 | 2x |
ggplot2::scale_x_discrete( |
| 180 | 2x |
labels = rev(forest_data$case), |
| 181 | 2x |
limits = rev(levels(forest_data$group_id)) |
| 182 |
) + |
|
| 183 | 2x |
ggplot2::xlab("Experimental vs. Comparator Treatment") +
|
| 184 | 2x |
ggplot2::ylab(paste0(effect_col, " (95% CI)")) + |
| 185 | 2x |
ggplot2::theme_classic() + |
| 186 | 2x |
ggplot2::theme( |
| 187 | 2x |
panel.background = ggplot2::element_blank(), |
| 188 | 2x |
strip.background = ggplot2::element_rect(colour = NA, fill = NA), |
| 189 | 2x |
panel.grid.major.y = ggplot2::element_line(colour = col_grid, linewidth = 0.5), |
| 190 | 2x |
panel.border = ggplot2::element_rect(fill = NA, color = "black"), |
| 191 | 2x |
legend.position = "none", |
| 192 | 2x |
axis.text = ggplot2::element_text(face = "bold"), |
| 193 | 2x |
axis.title = ggplot2::element_text(face = "bold"), |
| 194 | 2x |
plot.title = ggplot2::element_text( |
| 195 | 2x |
face = "bold", |
| 196 | 2x |
hjust = 0.5, |
| 197 | 2x |
size = 13 |
| 198 |
) |
|
| 199 |
) |
|
| 200 | ||
| 201 |
# 4) Build a table showing [HR (LCL, UCL)] and p-value |
|
| 202 | 2x |
dat_table <- forest_data |
| 203 | 2x |
dat_table$pval_str <- |
| 204 | 2x |
ifelse(dat_table$pval < 0.001, |
| 205 | 2x |
"< 0.001", |
| 206 | 2x |
sprintf("%.3f", dat_table$pval)
|
| 207 |
) |
|
| 208 | 2x |
dat_table$effect_est_ci_str <- paste0( |
| 209 | 2x |
sprintf("%.2f", dat_table$effect_est),
|
| 210 |
" [", |
|
| 211 | 2x |
sprintf("%.2f", dat_table$LCL),
|
| 212 |
", ", |
|
| 213 | 2x |
sprintf("%.2f", dat_table$UCL),
|
| 214 |
"]" |
|
| 215 |
) |
|
| 216 | 2x |
dat_table <- |
| 217 | 2x |
dat_table[, c("group_id", "case", "effect_est_ci_str", "pval_str")]
|
| 218 | ||
| 219 | 2x |
df_effect <- data.frame( |
| 220 | 2x |
group_id = dat_table$group_id, |
| 221 | 2x |
case = dat_table$case, |
| 222 | 2x |
stat = "effect_est_ci_str", |
| 223 | 2x |
value = dat_table$effect_est_ci_str, |
| 224 | 2x |
stringsAsFactors = FALSE |
| 225 |
) |
|
| 226 | ||
| 227 | 2x |
df_pval <- data.frame( |
| 228 | 2x |
group_id = dat_table$group_id, |
| 229 | 2x |
case = dat_table$case, |
| 230 | 2x |
stat = "pval_str", |
| 231 | 2x |
value = dat_table$pval_str, |
| 232 | 2x |
stringsAsFactors = FALSE |
| 233 |
) |
|
| 234 | ||
| 235 | 2x |
dat_table_long <- rbind(df_effect, df_pval) |
| 236 | ||
| 237 | 2x |
dat_table_long$stat <- |
| 238 | 2x |
factor(dat_table_long$stat, |
| 239 | 2x |
levels = c("effect_est_ci_str", "pval_str")
|
| 240 |
) |
|
| 241 | ||
| 242 |
# 5) Table plot |
|
| 243 | 2x |
stat <- value <- NULL |
| 244 | 2x |
table_base <- |
| 245 | 2x |
ggplot2::ggplot( |
| 246 | 2x |
dat_table_long, |
| 247 | 2x |
ggplot2::aes(x = stat, y = group_id, label = value) |
| 248 |
) + |
|
| 249 | 2x |
ggplot2::geom_text(size = 3) + |
| 250 | 2x |
ggplot2::scale_x_discrete( |
| 251 | 2x |
position = "top", |
| 252 | 2x |
labels = c(paste0(effect_col, " (95% CI)"), "P value") |
| 253 |
) + |
|
| 254 | 2x |
ggplot2::scale_y_discrete( |
| 255 | 2x |
labels = forest_data$case, |
| 256 | 2x |
limits = rev(levels(dat_table_long$group_id)) |
| 257 |
) + |
|
| 258 | 2x |
ggplot2::labs(x = NULL, y = NULL) + |
| 259 | 2x |
ggplot2::theme_classic() + |
| 260 | 2x |
ggplot2::theme( |
| 261 | 2x |
strip.background = ggplot2::element_blank(), |
| 262 | 2x |
panel.grid.major = ggplot2::element_blank(), |
| 263 | 2x |
panel.border = ggplot2::element_blank(), |
| 264 | 2x |
axis.line = ggplot2::element_blank(), |
| 265 | 2x |
axis.text.y = ggplot2::element_blank(), |
| 266 | 2x |
axis.text.x = ggplot2::element_text(size = 12), |
| 267 | 2x |
axis.ticks = ggplot2::element_blank(), |
| 268 | 2x |
axis.title = ggplot2::element_text(face = "bold") |
| 269 |
) |
|
| 270 | ||
| 271 |
# 6) Combine forest & table |
|
| 272 |
# final_plot <- forest + table_base + patchwork::plot_layout(widths = c(10, 4)) |
|
| 273 | 2x |
final_plot <- |
| 274 | 2x |
patchwork::wrap_plots(forest, table_base) + patchwork::plot_layout(widths = c(10, 4)) |
| 275 | ||
| 276 | 2x |
return(final_plot) |
| 277 |
} |
| 1 |
#' Create pseudo IPD given aggregated binary data |
|
| 2 |
#' |
|
| 3 |
#' @param binary_agd a data.frame that take different formats depending on \code{format}
|
|
| 4 |
#' @param format a string, "stacked" or "unstacked" |
|
| 5 |
#' |
|
| 6 |
#' @return a data.frame of pseudo binary IPD, with columns USUBJID, ARM, RESPONSE |
|
| 7 |
#' @example inst/examples/get_pseudo_ipd_binary_ex.R |
|
| 8 |
#' @export |
|
| 9 | ||
| 10 |
get_pseudo_ipd_binary <- function(binary_agd, format = c("stacked", "unstacked")) {
|
|
| 11 |
# pre check |
|
| 12 | 5x |
if (format == "stacked") {
|
| 13 | 4x |
if (!is.data.frame(binary_agd)) {
|
| 14 | ! |
stop("stacked binary_agd should be data.frame with columns 'ARM', 'RESPONSE', 'COUNT'")
|
| 15 |
} |
|
| 16 | 4x |
names(binary_agd) <- toupper(names(binary_agd)) |
| 17 | 4x |
if (!all(c("ARM", "RESPONSE", "COUNT") %in% names(binary_agd))) {
|
| 18 | ! |
stop("stacked binary_agd should be data.frame with columns 'ARM', 'Response', 'Count'")
|
| 19 |
} |
|
| 20 | 4x |
if (!is.logical(binary_agd$RESPONSE) && !all(toupper(binary_agd$RESPONSE) %in% c("YES", "NO"))) {
|
| 21 | ! |
stop("'RESPONSE' column in stacked binary_agd should be either logical vector or character vector of 'Yes'/'No'")
|
| 22 |
} |
|
| 23 | 4x |
if (nrow(binary_agd) %% 2 != 0) {
|
| 24 | ! |
stop("nrow(binary_agd) is not even number, you may miss to provide 1 level of binary response to certain arm")
|
| 25 |
} |
|
| 26 | 1x |
} else if (format == "unstacked") {
|
| 27 | 1x |
if (!(is.data.frame(binary_agd) || is.matrix(binary_agd))) {
|
| 28 | ! |
stop("unstacked binary_agd should be either a 1x2 or 2x2 data frame or matrix")
|
| 29 |
} |
|
| 30 | 1x |
if (ncol(binary_agd) != 2 || !nrow(binary_agd) %in% c(1, 2)) {
|
| 31 | ! |
stop("unstacked binary_agd should be either a 1x2 or 2x2 data frame or matrix")
|
| 32 |
} |
|
| 33 | 1x |
bin_res <- toupper(colnames(binary_agd)) |
| 34 | 1x |
bin_res <- sort(bin_res) |
| 35 | 1x |
if (!(identical(bin_res, c("FALSE", "TRUE")) || identical(bin_res, c("NO", "YES")))) {
|
| 36 | ! |
stop("column names of unstacked binary_agd should be either TRUE/FALSE or Yes/No")
|
| 37 |
} |
|
| 38 |
} |
|
| 39 | ||
| 40 |
# pre process binary_agd, depending on format |
|
| 41 | 5x |
use_binary_agd <- switch(format, |
| 42 | 5x |
"stacked" = {
|
| 43 | 4x |
names(binary_agd) <- toupper(names(binary_agd)) |
| 44 | 4x |
if (!is.logical(binary_agd$RESPONSE)) {
|
| 45 | 4x |
binary_agd$RESPONSE <- toupper(binary_agd$RESPONSE) |
| 46 | 4x |
binary_agd$RESPONSE <- binary_agd$RESPONSE == "YES" |
| 47 |
} |
|
| 48 | 4x |
binary_agd |
| 49 |
}, |
|
| 50 | 5x |
"unstacked" = {
|
| 51 | 1x |
trt_names <- rownames(binary_agd) |
| 52 | 1x |
bin_res <- toupper(colnames(binary_agd)) |
| 53 | 1x |
if ("YES" %in% bin_res) {
|
| 54 | 1x |
bin_res <- ifelse(bin_res == "YES", "TRUE", "FALSE") |
| 55 | 1x |
colnames(binary_agd) <- bin_res |
| 56 |
} |
|
| 57 | 1x |
tmpout <- utils::stack(binary_agd) |
| 58 | 1x |
tmpout <- cbind(ARM = rep(trt_names, each = 2), tmpout) |
| 59 | 1x |
names(tmpout) <- c("ARM", "COUNT", "RESPONSE")
|
| 60 | 1x |
rownames(tmpout) <- NULL |
| 61 | 1x |
tmpout$RESPONSE <- as.logical(tmpout$RESPONSE) |
| 62 | 1x |
tmpout |
| 63 |
} |
|
| 64 |
) |
|
| 65 | ||
| 66 |
# create pseudo binary IPD |
|
| 67 | 5x |
use_binary_agd$ARM <- factor(use_binary_agd$ARM, levels = unique(use_binary_agd$ARM)) |
| 68 | 5x |
n_per_arm <- tapply(use_binary_agd$COUNT, use_binary_agd$ARM, sum) |
| 69 | 5x |
n_yes_per_arm <- use_binary_agd$COUNT[use_binary_agd$RESPONSE] # use_binary_agd is already ordered as per factor ARM |
| 70 | ||
| 71 | 5x |
tmpipd <- data.frame( |
| 72 | 5x |
USUBJID = NA, |
| 73 | 5x |
ARM = unlist( |
| 74 | 5x |
mapply(rep, x = levels(use_binary_agd$ARM), each = n_per_arm, SIMPLIFY = FALSE, USE.NAMES = FALSE) |
| 75 |
), |
|
| 76 | 5x |
RESPONSE = unlist( |
| 77 | 5x |
lapply(seq_along(n_per_arm), function(ii) {
|
| 78 | 7x |
c(rep(TRUE, n_yes_per_arm[ii]), rep(FALSE, n_per_arm[ii] - n_yes_per_arm[ii])) |
| 79 |
}) |
|
| 80 |
) |
|
| 81 |
) |
|
| 82 | 5x |
tmpipd$USUBJID <- paste0("pseudo_binary_subj_", seq_len(nrow(tmpipd)))
|
| 83 | ||
| 84 |
# output |
|
| 85 | 5x |
tmpipd |
| 86 |
} |
|
| 87 | ||
| 88 | ||
| 89 |
#' Helper function to summarize outputs from glm fit |
|
| 90 |
#' |
|
| 91 |
#' @param binobj returned object from \code{stats::glm}
|
|
| 92 |
#' @param legend label to indicate the binary fit |
|
| 93 |
#' @param weighted logical flag indicating whether weights have been applied in the glm fit |
|
| 94 |
#' @returns A `data.frame` containing a summary of the number of events and subjects in a logistic |
|
| 95 |
#' regression model. |
|
| 96 |
#' @examples |
|
| 97 |
#' data(adrs_sat) |
|
| 98 |
#' pseudo_adrs <- get_pseudo_ipd_binary( |
|
| 99 |
#' binary_agd = data.frame( |
|
| 100 |
#' ARM = rep("B", 2),
|
|
| 101 |
#' RESPONSE = c("YES", "NO"),
|
|
| 102 |
#' COUNT = c(280, 120) |
|
| 103 |
#' ), |
|
| 104 |
#' format = "stacked" |
|
| 105 |
#' ) |
|
| 106 |
#' pseudo_adrs$RESPONSE <- as.numeric(pseudo_adrs$RESPONSE) |
|
| 107 |
#' combined_data <- rbind(adrs_sat[, c("USUBJID", "ARM", "RESPONSE")], pseudo_adrs)
|
|
| 108 |
#' combined_data$ARM <- as.factor(combined_data$ARM) |
|
| 109 |
#' binobj_dat <- stats::glm(RESPONSE ~ ARM, combined_data, family = binomial("logit"))
|
|
| 110 |
#' glm_makeup(binobj_dat) |
|
| 111 |
#' @export |
|
| 112 |
glm_makeup <- function(binobj, legend = "before matching", weighted = FALSE) {
|
|
| 113 | 23x |
arm <- levels(binobj$data$ARM) |
| 114 | 23x |
if (!weighted) {
|
| 115 | 14x |
n <- tapply(binobj$data$USUBJID, binobj$data$ARM, length) |
| 116 | 14x |
n_event <- tapply(binobj$data$RESPONSE, binobj$data$ARM, sum) |
| 117 |
} else {
|
|
| 118 | 9x |
n <- tapply(binobj$data$weights, binobj$data$ARM, sum) |
| 119 | 9x |
n_event <- tapply(binobj$data$weights * binobj$data$RESPONSE, binobj$data$ARM, sum) |
| 120 |
} |
|
| 121 | 23x |
data.frame( |
| 122 | 23x |
treatment = arm, |
| 123 | 23x |
type = legend, |
| 124 | 23x |
n = n, |
| 125 | 23x |
events = n_event, |
| 126 | 23x |
events_pct = n_event * 100 / n |
| 127 |
) |
|
| 128 |
} |
| 1 |
# Create an environment for settings |
|
| 2 |
settings_env <- new.env() |
|
| 3 | ||
| 4 |
#' Get and Set Time Conversion Factors |
|
| 5 |
#' |
|
| 6 |
#' @param default The default time scale, commonly whichever has factor = 1 |
|
| 7 |
#' @param days Factor to divide data time units to get time in days |
|
| 8 |
#' @param weeks Factor to divide data time units to get time in weeks |
|
| 9 |
#' @param months Factor to divide data time units to get time in months |
|
| 10 |
#' @param years Factor to divide data time units to get time in years |
|
| 11 |
#' |
|
| 12 |
#' @return No value returned. Conversion factors are stored internally and used within functions. |
|
| 13 |
#' @export |
|
| 14 |
#' @rdname time_conversion |
|
| 15 |
#' |
|
| 16 |
#' @examples |
|
| 17 |
#' # The default time scale is days: |
|
| 18 |
#' set_time_conversion(default = "days", days = 1, weeks = 7, months = 365.25 / 12, years = 365.25) |
|
| 19 |
#' |
|
| 20 |
#' # Set the default time scale to years |
|
| 21 |
#' set_time_conversion( |
|
| 22 |
#' default = "years", |
|
| 23 |
#' days = 1 / 365.25, |
|
| 24 |
#' weeks = 1 / 52.17857, |
|
| 25 |
#' months = 1 / 12, |
|
| 26 |
#' years = 1 |
|
| 27 |
#' ) |
|
| 28 |
#' |
|
| 29 |
set_time_conversion <- function(default = "days", days = 1, weeks = 7, months = 365.25 / 12, years = 365.25) {
|
|
| 30 | 3x |
if (!default %in% c("days", "weeks", "months", "years")) {
|
| 31 | ! |
stop("default must be one of \"days\", \"weeks\", \"months\", \"years\")")
|
| 32 |
} |
|
| 33 | 3x |
factors <- c(days = days, weeks = weeks, months = months, years = years) |
| 34 | 3x |
check_factors <- vapply(factors, function(x) isFALSE(!is.finite(x) || x == 0), logical(1L)) |
| 35 | 3x |
if (!all(check_factors)) {
|
| 36 | 1x |
stop( |
| 37 | 1x |
"Conversion factors must be finite non-zero numerical values: ", |
| 38 | 1x |
paste0(names(factors)[!check_factors], " = ", factors[!check_factors], collapse = ", ") |
| 39 |
) |
|
| 40 |
} |
|
| 41 | 2x |
settings_env$time_conversion <- factors |
| 42 | 2x |
settings_env$default_time_scale <- default |
| 43 |
} |
|
| 44 | ||
| 45 | ||
| 46 |
#' @param factor Time factor to get. |
|
| 47 |
#' @rdname time_conversion |
|
| 48 |
#' @export |
|
| 49 |
#' |
|
| 50 |
#' @examples |
|
| 51 |
#' # Get time scale factors: |
|
| 52 |
#' get_time_conversion("years")
|
|
| 53 |
#' get_time_conversion("weeks")
|
|
| 54 |
get_time_conversion <- function(factor = c("days", "weeks", "months", "years")) {
|
|
| 55 | 241x |
factor <- match.arg(factor, several.ok = TRUE) |
| 56 | 241x |
if (!exists("time_conversion", settings_env)) {
|
| 57 | ! |
warning("No time conversion factors previously set. Setting defaults.")
|
| 58 | ! |
set_time_conversion() |
| 59 |
} |
|
| 60 | 241x |
settings_env$time_conversion[factor] |
| 61 |
} |
|
| 62 | ||
| 63 | ||
| 64 |
#' Convert Time Values Using Scaling Factors |
|
| 65 |
#' |
|
| 66 |
#' @param times Numeric time values |
|
| 67 |
#' @param as A time scale to convert to. One of "days", "weeks", "months", "years" |
|
| 68 |
#' |
|
| 69 |
#' @return Returns a numeric vector calculated from `times / get_time_conversion(factor = as)` |
|
| 70 |
#' @export |
|
| 71 |
#' @examples |
|
| 72 |
#' get_time_as(50, as = "months") |
|
| 73 |
get_time_as <- function(times, as = NULL) {
|
|
| 74 | 42x |
if (is.null(as)) as <- settings_env$default_time_scale |
| 75 | 1x |
if (!is.numeric(times)) stop("times arguments must be numeric")
|
| 76 | 239x |
as <- match.arg(as, c("days", "weeks", "months", "years"))
|
| 77 | 239x |
times / get_time_conversion(as) |
| 78 |
} |
| 1 |
#' Helper function to retrieve median survival time from a `survival::survfit` object |
|
| 2 |
#' |
|
| 3 |
#' Extract and display median survival time with confidence interval |
|
| 4 |
#' |
|
| 5 |
#' @param km_fit returned object from \code{survival::survfit}
|
|
| 6 |
#' @param legend a character string, name used in 'type' column in returned data frame |
|
| 7 |
#' @param time_scale a character string, 'years', 'months', 'weeks' or 'days', time unit of median survival time |
|
| 8 |
#' |
|
| 9 |
#' @examples |
|
| 10 |
#' data(adtte_sat) |
|
| 11 |
#' data(pseudo_ipd_sat) |
|
| 12 |
#' library(survival) |
|
| 13 |
#' combined_data <- rbind(adtte_sat[, c("TIME", "EVENT", "ARM")], pseudo_ipd_sat)
|
|
| 14 |
#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data, conf.type = "log-log") |
|
| 15 |
#' |
|
| 16 |
#' # Derive median survival time |
|
| 17 |
#' medSurv <- medSurv_makeup(kmobj, legend = "before matching", time_scale = "day") |
|
| 18 |
#' medSurv |
|
| 19 |
#' @return a data frame with a index column 'type', median survival time and confidence interval |
|
| 20 |
#' @export |
|
| 21 | ||
| 22 |
medSurv_makeup <- function(km_fit, legend = "before matching", time_scale) {
|
|
| 23 | 13x |
time_scale <- match.arg(time_scale, choices = c("years", "months", "weeks", "days"))
|
| 24 | ||
| 25 |
# km_fit is the returned object from survival::survfit |
|
| 26 | 13x |
km_fit <- summary(km_fit)$table |
| 27 | 13x |
km_fit[, 5:ncol(km_fit)] <- get_time_as(km_fit[, 5:ncol(km_fit)], time_scale) |
| 28 | ||
| 29 | 13x |
toyadd <- data.frame( |
| 30 | 13x |
treatment = gsub("ARM=", "", rownames(km_fit)),
|
| 31 | 13x |
type = rep(legend, 2) |
| 32 |
) |
|
| 33 | ||
| 34 | 13x |
km_fit <- cbind(toyadd, km_fit) |
| 35 | 13x |
rownames(km_fit) <- NULL |
| 36 | ||
| 37 | 13x |
km_fit |
| 38 |
} |
|
| 39 | ||
| 40 | ||
| 41 |
#' Helper function to select set of variables used for Kaplan-Meier plot |
|
| 42 |
#' |
|
| 43 |
#' @param km_fit returned object from \code{survival::survfit}
|
|
| 44 |
#' @param single_trt_name name of treatment if no strata are specified in `km_fit` |
|
| 45 |
#' |
|
| 46 |
#' @examples |
|
| 47 |
#' library(survival) |
|
| 48 |
#' data(adtte_sat) |
|
| 49 |
#' data(pseudo_ipd_sat) |
|
| 50 |
#' combined_data <- rbind(adtte_sat[, c("TIME", "EVENT", "ARM")], pseudo_ipd_sat)
|
|
| 51 |
#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data, conf.type = "log-log") |
|
| 52 |
#' survfit_makeup(kmobj) |
|
| 53 |
#' @return a list of data frames of variables from [survival::survfit()]. Data frame is divided by treatment. |
|
| 54 |
#' @export |
|
| 55 | ||
| 56 |
survfit_makeup <- function(km_fit, single_trt_name = "treatment") {
|
|
| 57 |
# in case km_fit is only for single arm |
|
| 58 | 119x |
if ("strata" %in% names(km_fit)) {
|
| 59 | 23x |
use_trt <- mapply(rep, 1:2, each = km_fit$strata) |
| 60 | 23x |
if (is.list(use_trt)) use_trt <- unlist(use_trt) |
| 61 | ! |
if (is.matrix(use_trt)) use_trt <- as.vector(use_trt) |
| 62 | 23x |
is_single <- FALSE |
| 63 |
} else {
|
|
| 64 | 96x |
use_trt <- rep(single_trt_name, length(km_fit$time)) |
| 65 | 96x |
is_single <- TRUE |
| 66 |
} |
|
| 67 | ||
| 68 | 119x |
kmdat <- data.frame( |
| 69 | 119x |
time = km_fit$time, |
| 70 | 119x |
treatment = use_trt, |
| 71 | 119x |
n.risk = km_fit$n.risk, |
| 72 | 119x |
n.event = km_fit$n.event, |
| 73 | 119x |
censor = km_fit$n.censor, |
| 74 | 119x |
surv = km_fit$surv, |
| 75 | 119x |
lower = km_fit$lower, |
| 76 | 119x |
upper = km_fit$upper, |
| 77 | 119x |
cumhaz = km_fit$cumhaz |
| 78 |
) |
|
| 79 | 23x |
if (!is_single) kmdat$treatment <- sapply(strsplit(names(km_fit$strata), "="), "[[", 2)[kmdat$treatment] |
| 80 | 119x |
split(kmdat, f = kmdat$treatment) |
| 81 |
} |
| 1 |
.onLoad <- function(libname, pkgname) {
|
|
| 2 | ! |
set_time_conversion() |
| 3 |
} |