我的目标是使用 survfit 对象在 90% 置信水平的中位数置信上限和下限的情况下估计中位存活率。
churn_dat <-read_csv("https://raw.githubusercontent.com/square/pysurvival/master/pysurvival/datasets/churn.csv")
churn_dat <- churn_dat %>% filter(months_active > 0)
#create a function of the dataframe by sizes
boot <- function(size,n_sims){
#1. filter data into a particular size
df <- churn_dat %>% filter(company_size == size)
n = nrow(df)
#2. run the bootstrap
experiments = tibble(experiment = rep(1:n_sims, each = n),
index = sample(1:n, size = n * n_sims, replace = TRUE),
time_star = df$months_active[index],
event_star = df$churned[index])
return(experiments)
}
#create a function for plotting
plot_boot_data <- function(experiments){
fit <- survfit(Surv(time_star, event_star) ~ experiment, data = experiments)
#get the median of surv
med <- surv_median(fit)
med <- data.frame(med = med$median)
ggplot(med , aes(x = med, fill= med)) +
geom_histogram(binwidth = .8)+theme_bw()
}
df_10to50 <- boot("10-50",10)
plot_boot_data(df_10to50)
我找到了类似的函数,即 surv_median() 来执行此操作,但置信度为 95 %
如何在置信水平设置为 90 % 的情况下构建相同的东西