R/pretty_output_functions.R
run_pretty_km_output.Rd
This function takes a dataset, along with variables names for time and event status for KM fit, and possibly strata
run_pretty_km_output(strata_in = NA, model_data, time_in, event_in, event_level = NULL, time_est = NULL, group_name = NULL, title_name = NULL, conf_level = 0.95, surv_est_prefix = "Time", surv_est_digits = 2, median_est_digits = 1, p_digits = 4, output_type = NULL, sig_alpha = 0.05, background = "yellow", ...)
strata_in | name of strata variable, or NA (default) if no strata desired |
---|---|
model_data | dataset that contains |
time_in | name of time variable component of outcome measure |
event_in | name of event status variable. If |
event_level | event level for event status variable. |
time_est | numerical vector of time estimates. If NULL (default) no time estimates are calculated |
group_name | strata variable name. If NULL and strata exists then using variable |
title_name | title to use |
conf_level | the confidence level required (default is 0.95). |
surv_est_prefix | prefix to use in survival estimate names. Default is Time (i.e. Time:5, Time:10,...) |
surv_est_digits | number of digits to round p values for survival estimates for specified times |
median_est_digits | number of digits to round p values for Median Survival Estimates |
p_digits | number of digits to round p values for Log-Rank p value |
output_type | output type, either NULL (default), "latex", or "html" (making special charaters latex friendly) |
sig_alpha | the defined significance level. Default = 0.05 |
background | background color of significant values, or no highlighting if NULL. Default is "yellow" |
... | other params to pass to |
A tibble with: Name
(if provided), Group
(if strata variable in fit), Level
(if strata variable in fit), Time:X
(Survival estimates for each time provided), Median Estimate
. In no strata variable tibble is one row, otherwise nrows = number of strata levels.
# Basic survival model examples set.seed(542542522) ybin <- sample(0:1, 100, replace = TRUE) ybin2 <- sample(0:1, 100, replace = TRUE) ybin3 <- sample(c('Dead','Alive'), 100, replace = TRUE) y <- rexp(100,.1) x1 <- factor(sample(LETTERS[1:2],100,replace = TRUE)) x2 <- factor(sample(letters[1:4],100,replace = TRUE)) my_data <- data.frame(y, ybin, ybin2, ybin3, x1, x2) Hmisc::label(my_data$x1) <- "X1 Variable" # Single runs run_pretty_km_output(strata_in = 'x1', model_data = my_data, time_in = 'y', event_in = 'ybin', time_est = NULL)#> # A tibble: 2 x 6 #> Group Level N `N Events` `Median Estimate` `Log-Rank P` #> <chr> <chr> <int> <dbl> <chr> <chr> #> 1 X1 Variable A 50 24 16.2 (9.6, N.E.) 0.7536 #> 2 X1 Variable B 50 24 23.0 (8.6, N.E.) ""run_pretty_km_output(strata_in = 'x1', model_data = my_data, time_in = 'y', event_in = 'ybin', time_est = c(5,10))#> # A tibble: 2 x 8 #> Group Level N `N Events` `Median Estimat… `Time:5` `Time:10` `Log-Rank P` #> <chr> <chr> <int> <dbl> <chr> <chr> <chr> <chr> #> 1 X1 Va… A 50 24 16.2 (9.6, N.E.) 0.76 (0… 0.62 (0.… 0.7536 #> 2 X1 Va… B 50 24 23.0 (8.6, N.E.) 0.78 (0… 0.61 (0.… ""run_pretty_km_output(strata_in = 'x2', model_data = my_data, time_in = 'y', event_in = 'ybin3', event_level = 'Dead', time_est = c(5,10))#> # A tibble: 4 x 8 #> Group Level N `N Events` `Median Estimate` `Time:5` `Time:10` `Log-Rank P` #> <chr> <chr> <int> <dbl> <chr> <chr> <chr> <chr> #> 1 x2 a 24 12 16.6 (9.4, N.E.) 0.82 (0… 0.66 (0.… 0.9215 #> 2 x2 b 33 17 16.1 (7.2, N.E.) 0.71 (0… 0.60 (0.… "" #> 3 x2 c 20 8 N.E. (4.6, N.E.) 0.64 (0… 0.51 (0.… "" #> 4 x2 d 23 10 15.3 (11.8, N.E.) 0.91 (0… 0.77 (0.… ""# Multiple runs for different variables library(dplyr) vars_to_run = c(NA, 'x1', 'x2') purrr::map_dfr(vars_to_run, run_pretty_km_output, model_data = my_data, time_in = 'y', event_in = 'ybin', event_level = '0', time_est = NULL) %>% select(Group, Level, everything())#> # A tibble: 7 x 6 #> Group Level N `N Events` `Median Estimate` `Log-Rank P` #> <chr> <chr> <int> <dbl> <chr> <chr> #> 1 Overall <NA> 100 52 15.0 (11.8, 22.0) <NA> #> 2 X1 Variable A 50 26 11.8 (9.8, 25.0) 0.6549 #> 3 X1 Variable B 50 26 16.5 (12.6, 28.4) "" #> 4 x2 a 24 9 22.0 (16.5, N.E.) 0.5486 #> 5 x2 b 33 18 11.4 (8.3, N.E.) "" #> 6 x2 c 20 11 13.6 (4.2, N.E.) "" #> 7 x2 d 23 14 13.8 (8.7, N.E.) ""km_info <- purrr::map_dfr(vars_to_run, run_pretty_km_output, model_data = my_data, time_in = 'y', event_in = 'ybin3', event_level = 'Dead', time_est = c(5,10), surv_est_prefix = 'Year', title_name = 'Overall Survival') %>% select(Group, Level, everything()) km_info2 <- purrr::map_dfr(vars_to_run, run_pretty_km_output, model_data = my_data, time_in = 'y', event_in = 'ybin2', time_est = c(5,10), surv_est_prefix = 'Year', title_name = 'Cancer Specific Survival') %>% select(Group, Level, everything()) options(knitr.kable.NA = '') kableExtra::kable(bind_rows(km_info, km_info2), escape = F, longtable = F, booktabs = TRUE, linesep = '', caption = 'Survival Percentage Estimates at 5 and 10 Years') %>% kableExtra::collapse_rows(c(1:2), row_group_label_position = 'stack', headers_to_remove = 1:2)#> <table> #> <caption>Survival Percentage Estimates at 5 and 10 Years</caption> #> <thead> #> <tr> #> <th style="text-align:left;"> Group </th> #> <th style="text-align:left;"> Level </th> #> <th style="text-align:left;"> Name </th> #> <th style="text-align:right;"> N </th> #> <th style="text-align:right;"> N Events </th> #> <th style="text-align:left;"> Median Estimate </th> #> <th style="text-align:left;"> Year:5 </th> #> <th style="text-align:left;"> Year:10 </th> #> <th style="text-align:left;"> Log-Rank P </th> #> </tr> #> </thead> #> <tbody> #> <tr> #> <td style="text-align:left;"> Overall </td> #> <td style="text-align:left;"> </td> #> <td style="text-align:left;"> Overall Survival </td> #> <td style="text-align:right;"> 100 </td> #> <td style="text-align:right;"> 47 </td> #> <td style="text-align:left;"> 16.5 (11.8, 36.6) </td> #> <td style="text-align:left;"> 0.77 (0.69, 0.86) </td> #> <td style="text-align:left;"> 0.64 (0.54, 0.75) </td> #> <td style="text-align:left;"> </td> #> </tr> #> <tr> #> <td style="text-align:left;vertical-align: middle !important;" rowspan="2"> X1 Variable </td> #> <td style="text-align:left;"> A </td> #> <td style="text-align:left;"> Overall Survival </td> #> <td style="text-align:right;"> 50 </td> #> <td style="text-align:right;"> 23 </td> #> <td style="text-align:left;"> 15.3 (10.6, N.E.) </td> #> <td style="text-align:left;"> 0.77 (0.66, 0.91) </td> #> <td style="text-align:left;"> 0.65 (0.52, 0.82) </td> #> <td style="text-align:left;"> 0.9024 </td> #> </tr> #> <tr> #> #> <td style="text-align:left;"> B </td> #> <td style="text-align:left;"> Overall Survival </td> #> <td style="text-align:right;"> 50 </td> #> <td style="text-align:right;"> 24 </td> #> <td style="text-align:left;"> 16.6 (7.2, N.E.) </td> #> <td style="text-align:left;"> 0.77 (0.65, 0.90) </td> #> <td style="text-align:left;"> 0.63 (0.50, 0.79) </td> #> <td style="text-align:left;"> </td> #> </tr> #> <tr> #> <td style="text-align:left;vertical-align: middle !important;" rowspan="4"> x2 </td> #> <td style="text-align:left;"> a </td> #> <td style="text-align:left;"> Overall Survival </td> #> <td style="text-align:right;"> 24 </td> #> <td style="text-align:right;"> 12 </td> #> <td style="text-align:left;"> 16.6 (9.4, N.E.) </td> #> <td style="text-align:left;"> 0.82 (0.68, 1.00) </td> #> <td style="text-align:left;"> 0.66 (0.49, 0.91) </td> #> <td style="text-align:left;"> 0.9215 </td> #> </tr> #> <tr> #> #> <td style="text-align:left;"> b </td> #> <td style="text-align:left;"> Overall Survival </td> #> <td style="text-align:right;"> 33 </td> #> <td style="text-align:right;"> 17 </td> #> <td style="text-align:left;"> 16.1 (7.2, N.E.) </td> #> <td style="text-align:left;"> 0.71 (0.56, 0.89) </td> #> <td style="text-align:left;"> 0.60 (0.45, 0.81) </td> #> <td style="text-align:left;"> </td> #> </tr> #> <tr> #> #> <td style="text-align:left;"> c </td> #> <td style="text-align:left;"> Overall Survival </td> #> <td style="text-align:right;"> 20 </td> #> <td style="text-align:right;"> 8 </td> #> <td style="text-align:left;"> N.E. (4.6, N.E.) </td> #> <td style="text-align:left;"> 0.64 (0.45, 0.92) </td> #> <td style="text-align:left;"> 0.51 (0.31, 0.83) </td> #> <td style="text-align:left;"> </td> #> </tr> #> <tr> #> #> <td style="text-align:left;"> d </td> #> <td style="text-align:left;"> Overall Survival </td> #> <td style="text-align:right;"> 23 </td> #> <td style="text-align:right;"> 10 </td> #> <td style="text-align:left;"> 15.3 (11.8, N.E.) </td> #> <td style="text-align:left;"> 0.91 (0.80, 1.00) </td> #> <td style="text-align:left;"> 0.77 (0.60, 1.00) </td> #> <td style="text-align:left;"> </td> #> </tr> #> <tr> #> <td style="text-align:left;"> Overall </td> #> <td style="text-align:left;"> </td> #> <td style="text-align:left;"> Cancer Specific Survival </td> #> <td style="text-align:right;"> 100 </td> #> <td style="text-align:right;"> 59 </td> #> <td style="text-align:left;"> 12.6 (9.6, 16.6) </td> #> <td style="text-align:left;"> 0.77 (0.69, 0.86) </td> #> <td style="text-align:left;"> 0.59 (0.49, 0.70) </td> #> <td style="text-align:left;"> </td> #> </tr> #> <tr> #> <td style="text-align:left;vertical-align: middle !important;" rowspan="2"> X1 Variable </td> #> <td style="text-align:left;"> A </td> #> <td style="text-align:left;"> Cancer Specific Survival </td> #> <td style="text-align:right;"> 50 </td> #> <td style="text-align:right;"> 28 </td> #> <td style="text-align:left;"> 10.8 (8.7, 19.7) </td> #> <td style="text-align:left;"> 0.75 (0.63, 0.88) </td> #> <td style="text-align:left;"> 0.56 (0.43, 0.73) </td> #> <td style="text-align:left;"> 0.9272 </td> #> </tr> #> <tr> #> #> <td style="text-align:left;"> B </td> #> <td style="text-align:left;"> Cancer Specific Survival </td> #> <td style="text-align:right;"> 50 </td> #> <td style="text-align:right;"> 31 </td> #> <td style="text-align:left;"> 13.6 (7.7, 25.5) </td> #> <td style="text-align:left;"> 0.79 (0.68, 0.92) </td> #> <td style="text-align:left;"> 0.62 (0.49, 0.78) </td> #> <td style="text-align:left;"> </td> #> </tr> #> <tr> #> <td style="text-align:left;vertical-align: middle !important;" rowspan="4"> x2 </td> #> <td style="text-align:left;"> a </td> #> <td style="text-align:left;"> Cancer Specific Survival </td> #> <td style="text-align:right;"> 24 </td> #> <td style="text-align:right;"> 16 </td> #> <td style="text-align:left;"> 15.0 (9.4, N.E.) </td> #> <td style="text-align:left;"> 0.82 (0.67, 1.00) </td> #> <td style="text-align:left;"> 0.62 (0.44, 0.87) </td> #> <td style="text-align:left;"> 0.9464 </td> #> </tr> #> <tr> #> #> <td style="text-align:left;"> b </td> #> <td style="text-align:left;"> Cancer Specific Survival </td> #> <td style="text-align:right;"> 33 </td> #> <td style="text-align:right;"> 18 </td> #> <td style="text-align:left;"> 16.2 (7.6, N.E.) </td> #> <td style="text-align:left;"> 0.72 (0.57, 0.89) </td> #> <td style="text-align:left;"> 0.56 (0.40, 0.78) </td> #> <td style="text-align:left;"> </td> #> </tr> #> <tr> #> #> <td style="text-align:left;"> c </td> #> <td style="text-align:left;"> Cancer Specific Survival </td> #> <td style="text-align:right;"> 20 </td> #> <td style="text-align:right;"> 10 </td> #> <td style="text-align:left;"> 13.6 (5.5, N.E.) </td> #> <td style="text-align:left;"> 0.79 (0.63, 1.00) </td> #> <td style="text-align:left;"> 0.63 (0.43, 0.93) </td> #> <td style="text-align:left;"> </td> #> </tr> #> <tr> #> #> <td style="text-align:left;"> d </td> #> <td style="text-align:left;"> Cancer Specific Survival </td> #> <td style="text-align:right;"> 23 </td> #> <td style="text-align:right;"> 15 </td> #> <td style="text-align:left;"> 10.8 (6.7, N.E.) </td> #> <td style="text-align:left;"> 0.77 (0.62, 0.97) </td> #> <td style="text-align:left;"> 0.55 (0.37, 0.83) </td> #> <td style="text-align:left;"> </td> #> </tr> #> </tbody> #> </table># Real World Example data(Bladder_Cancer) vars_to_run = c(NA, 'Gender', 'Clinical_Stage_Grouped', 'PT0N0', 'Any_Downstaging') purrr::map_dfr(vars_to_run, run_pretty_km_output, model_data = Bladder_Cancer, time_in = 'Survival_Months', event_in = 'Vital_Status', event_level = 'Dead', time_est = c(24,60), surv_est_prefix = 'Month', p_digits=5) %>% select(Group, Level, everything())#> # A tibble: 10 x 8 #> Group Level N `N Events` `Median Estimat… `Month:24` `Month:60` #> <chr> <chr> <int> <dbl> <chr> <chr> <chr> #> 1 Over… <NA> 166 59 70.3 (48.7, N.E… 0.69 (0.6… 0.58 (0.4… #> 2 Gend… Fema… 42 12 N.E. (75.6, N.E… 0.74 (0.6… 0.70 (0.5… #> 3 Gend… Male 124 47 68.9 (39.0, N.E… 0.66 (0.5… 0.54 (0.4… #> 4 Clin… Stag… 118 34 N.E. (61.5, N.E… 0.73 (0.6… 0.64 (0.5… #> 5 Clin… Stag… 32 19 35.7 (14.3, N.E… 0.55 (0.4… 0.42 (0.2… #> 6 Clin… Stag… 16 6 47.0 (11.5, N.E… 0.67 (0.4… 0.50 (0.2… #> 7 Down… No C… 131 57 48.7 (30.3, N.E… 0.62 (0.5… 0.49 (0.4… #> 8 Down… Comp… 35 2 N.E. 0.94 (0.8… 0.94 (0.8… #> 9 Any … No D… 97 49 32.5 (21.6, 70.… 0.56 (0.4… 0.41 (0.3… #> 10 Any … Down… 69 10 N.E. 0.86 (0.7… 0.83 (0.7… #> # … with 1 more variable: `Log-Rank P` <chr>