我想通过下表的 R Markdown 使用 datasummary 导出带有数字和因子变量的 PDF 文件,如下所示:
---
title: "R Notebook"
output:
html_document:
df_print: paged
html_notebook: default
pdf_document: default
---
Table 1 example:
```{r, warning=FALSE, message=FALSE, echo=FALSE}
library(tidyverse)
library(modelsummary)
library(kableExtra)
tmp <- mtcars[, c("mpg", "hp")]
tmp$class <- 0
tmp$class[15:32] <- 1
tmp$class <- as.factor(tmp$class)
tmp$region <- "A"
tmp$region[15:20] <- "B"
tmp$region[21:32] <- "C"
tmp$region <- as.factor(tmp$region)
## change position of varianbles
tmp <- tmp[,c("mpg","class","region","hp")]
# create a list with individual variables
# remove missing and rescale
tmp_scaled <- tmp
tmp_scaled$mpg <- scale(tmp_scaled$mpg)
tmp_scaled$hp <- scale(tmp_scaled$hp)
tmp_scaled_list <- lapply(tmp_scaled, na.omit)
tmp_scaled_list[2] <- list(NULL)
tmp_scaled_list[3] <- list(NULL)
N_alt <- function(x) paste0(N(x), ' (', round((as.numeric(N(x))/32)*100,digits=1), ')')
# create a table with `datasummary`
emptycol = function(x) " "
datasummary(mpg + class + region + hp ~ Heading("N (%)") * N_alt + Mean + SD + Heading("Boxplot") * emptycol + Heading("Histogram") * emptycol, data = tmp) %>%
column_spec(column = 6, image = spec_boxplot(tmp_scaled_list[c(1,4)])) %>%
column_spec(column = 7, image = spec_hist(tmp_scaled_list[c(1,4)]))
```
我现在面临3个问题:
1-如果我尝试编织到 PDF,我会收到以下错误消息:
! Package siunitx Error: Invalid token 'N' in numerical input.
Error: LaTeX failed to compile test_table1.tex. See https://yihui.org/tinytex/r/#debugging for debugging tips. See test_table1.log for more info.
关于这可能是什么的任何想法?
2-箱线图和直方图不正确。它们被重复是因为只有 2 个数值变量。如何确保为每个数值变量显示正确的箱线图和直方图,而因子变量不显示任何内容?
3-您知道如何将因子变量移动到数字变量下并为“类别”创建标题以包含因子变量的级别,例如:
Category N(%) Mean SD Boxplot Histogram
mpg
class 0
1
region A
B
C
hp
非常感谢!
--编辑:
关于第 3 个问题,我只漏掉了 1 分。我的代码是:
library(modelsummary)
library(kableExtra)
tmp <- mtcars[, c("mpg", "hp")]
tmp$class <- 0
tmp$class[15:32] <- 1
tmp$class <- as.factor(tmp$class)
tmp$region <- 1
tmp$region[15:20] <- 2
tmp$region[21:32] <- 3
tmp$region <- as.factor(tmp$region)
tmp$class <- 0
tmp$region <- 0
## change position of varianbles
tmp <- tmp[,c("mpg","class","region","hp")]
# create a list with individual variables
# remove missing and rescale
tmp_scaled <- tmp
tmp_scaled$mpg <- scale(tmp_scaled$mpg)
tmp_scaled$hp <- scale(tmp_scaled$hp)
tmp_scaled_list <- lapply(tmp_scaled, na.omit)
tmp_scaled_list[2] <- list(NULL)
tmp_scaled_list[3] <- list(NULL)
N_alt = function(x) {
if (x %in% c(tmp$class)) {
paste0('[14 (43.8); 18 (56.3)]')
} else if (x %in% c(tmp$region)) {
paste0('[14 (43.8); 6 (18.8); 12 (37.5)]')
} else {
paste0('[32 (100)]')
}
}
Mean_alt = function(x) {
if (x %in% c(tmp$class, tmp$region)) {
paste0("")
} else {
mean(x)
}
}
# create a table with `datasummary`
emptycol = function(x) " "
datasummary(mpg + (`class [0,1]`= class) + (`region [A,B,C]`= region) + hp ~ Heading("N (%)") * N_alt + Heading("Mean") * Mean_alt + Heading("Boxplot") * emptycol + Heading("Histogram") * emptycol, data = tmp) %>%
column_spec(column = 4, image = spec_boxplot(tmp_scaled_list)) %>%
column_spec(column = 5, image = spec_hist(tmp_scaled_list))
我的N_alt
功能无法正常工作。有谁知道我在这里想念什么?