-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBMT-cure-short.R
94 lines (85 loc) · 3.9 KB
/
BMT-cure-short.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#' This file contains the code supporting Section 2 of the paper
#' "Cure models in survival analysis: Issues with identifiability and
#' competing risks" by Hein Putter and Per Kragh Andersen.
#'
#' It uses the BMT data as used in the book by Andersen & Ravn (2023),
#' available at "https://multi-state-book.github.io/companion/data/bmt.csv".
# Packages used in this analysis
library(survival)
library(prodlim)
library(smcure)
library(intsurv)
# Read in the data
bmt <- read.csv("https://multi-state-book.github.io/companion/data/bmt.csv")
# Have a look at the events of interest, here relapse and death
head(bmt)
table(bmt$death)
table(bmt$rel)
table(bmt$rel, bmt$death)
# The only covariates that we will use are all (selecting AML, all=0) and age
table(bmt$all)
summary(bmt$age)
hist(bmt$age)
#' A cure model would focus on relapse and fit a cure model where subjects that
#' died are treated as censored observations. We favour a competing risks model
#' where death without prior relapse is considered as a competing risk.
#'
#' First prepare the data for a competing risks analysis by defining time and
#' status variables.
bmt$relcr <- bmt$rel
bmt$relcr[bmt$death == 1 & bmt$rel==0] <- 2
table(bmt$relcr)
bmt$timecr <- bmt$timedeath
bmt$timecr[bmt$relcr == 1] <- bmt$timerel[bmt$relcr == 1]
#' We will use age very crudely as above or below 30 years. Define this variable,
#' select the AML patients, retain only the relevant variables and look at the
#' first six lines.
bmt$Age <- cut(bmt$age, c(0, 30, 100))
aml <- subset(bmt, all==0)
table(aml$Age, aml$relcr)
#' Start with the incorrect naive Kaplan-Meiers for relapse.
kmfit <- prodlim(Hist(timecr, rel) ~ Age, data = aml)
plot(kmfit,
xlim = c(0, 144), xlab = "Months since transplantation", ylim = c(0.6, 1),
axis1.at = seq(0, 144, 24), axis2.at = seq(0.6, 1, 0.1),
atrisk.at = seq(0, 144, 24), atrisk.title = "",
legend.x = "topright", legend.cex = 1.25)
#' There are the plateaus of the KM's.
summary(kmfit, times=120)
#' Fit a cure model with `smcure`.
curefit <- smcure(formula = Surv(timecr, rel) ~ Age,
cureform = ~ Age, data = aml,
model = "ph", Var = TRUE)
#' I tried the same thing now using another package that can fit
#' cure models, called `intsurv`. Adapting some code from the help
#' from `intsurv`, I was able to obtain much more plausible
#' estimates, at least of the cure probability model.
mm <- model.matrix(~ Age, data = aml)[, -1, drop=FALSE] # without intercept
set.seed(2024) # To get reproducible results for the variance (using bootstrap)
fit1 <- cox_cure.fit(mm, mm, aml$timecr, aml$rel, bootstrap = 100)
summary(fit1)
#' Checking whether the KM plateaus agree with the cure probabilities.
b <- coef(fit1)$cure
1 - plogis(b[1]) # very close to the KM plateau of AML
1 - plogis(b[1] + b[2]) # very close to the KM plateau of ALL
#' Let's stick with the competing risks analysis and plot the non-parametric
#' estimates of the cumulative incidence curves, using prodlim.
ajfit <- prodlim(Hist(timecr, relcr) ~ Age, data = aml)
plot(ajfit, # relapse
xlim = c(0, 144), xlab = "Months since transplantation", ylim = c(0, 0.4),
axis1.at = seq(0, 144, 24), axis2.at = seq(0, 0.4, 0.1),
atrisk.at = seq(0, 144, 24), atrisk.title = "",
legend.x = "topright", legend.cex = 1.25)
title(main = "Relapse")
plot(ajfit, cause=2, # death in remission
xlim = c(0, 144), xlab = "Months since transplantation", ylim = c(0, 0.4),
axis1.at = seq(0, 144, 24), axis2.at = seq(0, 0.4, 0.1),
atrisk.at = seq(0, 144, 24), atrisk.title = "",
legend.x = "bottomright", legend.cex = 1.25)
title(main = "Death in remission")
#' The following fits proportional cause-specific hazards models in the AML
#' data with age (categorized older or younger than 30) as sole covariate.
# Cause 1, relapse
coxph(Surv(timecr, relcr==1) ~ Age, data = aml)
# Cause 2, non-relapse mortality
coxph(Surv(timecr, relcr==2) ~ Age, data = aml)