0

我想通过下表的 R Markdown 使用 datasummary 导出带有数字和因子变量的 PDF 文件,如下所示:

---
title: "R Notebook"
output:
  html_document:
    df_print: paged
  html_notebook: default
  pdf_document: default
---

Table 1 example:

```{r, warning=FALSE, message=FALSE, echo=FALSE}
library(tidyverse)
library(modelsummary)
library(kableExtra)

tmp <- mtcars[, c("mpg", "hp")]

tmp$class <- 0
tmp$class[15:32] <- 1
tmp$class <- as.factor(tmp$class)

tmp$region <- "A"
tmp$region[15:20] <- "B"
tmp$region[21:32] <- "C"
tmp$region <- as.factor(tmp$region)

## change position of varianbles
tmp <- tmp[,c("mpg","class","region","hp")]

# create a list with individual variables
# remove missing and rescale
tmp_scaled <- tmp
tmp_scaled$mpg <- scale(tmp_scaled$mpg)
tmp_scaled$hp <- scale(tmp_scaled$hp)

tmp_scaled_list <- lapply(tmp_scaled, na.omit)

tmp_scaled_list[2] <- list(NULL)
tmp_scaled_list[3] <- list(NULL)

N_alt <- function(x) paste0(N(x), ' (', round((as.numeric(N(x))/32)*100,digits=1), ')')

# create a table with `datasummary`
emptycol = function(x) " "
datasummary(mpg + class + region + hp ~ Heading("N (%)") * N_alt + Mean + SD + Heading("Boxplot") * emptycol + Heading("Histogram") * emptycol, data = tmp) %>%
  column_spec(column = 6, image = spec_boxplot(tmp_scaled_list[c(1,4)])) %>%
  column_spec(column = 7, image = spec_hist(tmp_scaled_list[c(1,4)]))

```

这是我编织到 HTML 时看到的当前输出: 在此处输入图像描述

我现在面临3个问题:

1-如果我尝试编织到 PDF,我会收到以下错误消息:

! Package siunitx Error: Invalid token 'N' in numerical input.

Error: LaTeX failed to compile test_table1.tex. See https://yihui.org/tinytex/r/#debugging for debugging tips. See test_table1.log for more info.

关于这可能是什么的任何想法?

2-箱线图和直方图不正确。它们被重复是因为只有 2 个数值变量。如何确保为每个数值变量显示正确的箱线图和直方图,而因子变量不显示任何内容?

3-您知道如何将因子变量移动到数字变量下并为“类别”创建标题以包含因子变量的级别,例如:

         Category   N(%)   Mean   SD   Boxplot   Histogram
mpg
class    0
         1
region   A
         B
         C
hp

非常感谢!

--编辑:

关于第 3 个问题,我只漏掉了 1 分。我的代码是:

library(modelsummary)
library(kableExtra)

tmp <- mtcars[, c("mpg", "hp")]

tmp$class <- 0
tmp$class[15:32] <- 1
tmp$class <- as.factor(tmp$class)

tmp$region <- 1
tmp$region[15:20] <- 2
tmp$region[21:32] <- 3
tmp$region <- as.factor(tmp$region)

tmp$class <- 0
tmp$region <- 0

## change position of varianbles
tmp <- tmp[,c("mpg","class","region","hp")]

# create a list with individual variables
# remove missing and rescale
tmp_scaled <- tmp
tmp_scaled$mpg <- scale(tmp_scaled$mpg)
tmp_scaled$hp <- scale(tmp_scaled$hp)

tmp_scaled_list <- lapply(tmp_scaled, na.omit)

tmp_scaled_list[2] <- list(NULL)
tmp_scaled_list[3] <- list(NULL)

N_alt = function(x) {
  if (x %in% c(tmp$class)) {
    paste0('[14 (43.8); 18 (56.3)]') 
  } else if (x %in% c(tmp$region)) {
    paste0('[14 (43.8); 6 (18.8); 12 (37.5)]')  
  } else {
    paste0('[32 (100)]')
  }
}

Mean_alt = function(x) {
  if (x %in% c(tmp$class, tmp$region)) {
    paste0("")
  } else {
    mean(x)  
  }
}

# create a table with `datasummary`
emptycol = function(x) " "
datasummary(mpg + (`class [0,1]`= class) + (`region [A,B,C]`= region) + hp ~ Heading("N (%)") * N_alt + Heading("Mean") * Mean_alt + Heading("Boxplot") * emptycol + Heading("Histogram") * emptycol, data = tmp) %>%
  column_spec(column = 4, image = spec_boxplot(tmp_scaled_list)) %>%
  column_spec(column = 5, image = spec_hist(tmp_scaled_list))

这给了我: 在此处输入图像描述

我的N_alt功能无法正常工作。有谁知道我在这里想念什么?

4

1 回答 1

1

解决问题 2 的代码:

---
title: "R Notebook"
output:
  html_document:
    df_print: paged
  html_notebook: default
  pdf_document: default
---

Table 1 example:

```{r, warning=FALSE, message=FALSE, echo=FALSE}
library(magrittr)
library(tidyverse)
library(modelsummary)
library(kableExtra)

tmp <- mtcars[, c("mpg", "hp")]

tmp$class <- 0
tmp$class[15:32] <- 1
tmp$class <- as.factor(tmp$class)

tmp$region <- "A"
tmp$region[15:20] <- "B"
tmp$region[21:32] <- "C"
tmp$region <- as.factor(tmp$region)

## change position of varianbles
tmp <- tmp[,c("mpg","class","region","hp")]

# create a list with individual variables
# remove missing and rescale
tmp_scaled <- tmp
tmp_scaled$mpg <- scale(tmp_scaled$mpg)
tmp_scaled$hp <- scale(tmp_scaled$hp)
tmp_scaled$class2 <- tmp_scaled$class
tmp_scaled$region2 <- tmp_scaled$region
tmp_scaled$region3 <- tmp_scaled$region

tmp_scaled <- tmp_scaled[,c("mpg","class","class2","region","region2","region3","hp")]

tmp_scaled_list <- lapply(tmp_scaled, na.omit)

tmp_scaled_list[2] <- list(NULL)
tmp_scaled_list[3] <- list(NULL)
tmp_scaled_list[4] <- list(NULL)
tmp_scaled_list[5] <- list(NULL)
tmp_scaled_list[6] <- list(NULL)

N_alt <- function(x) paste0(N(x), ' (', round((as.numeric(N(x))/32)*100,digits=1), ')')

# create a table with `datasummary`
emptycol = function(x) " "
datasummary(mpg + class + region + hp ~ Heading("N (%)") * N_alt + Mean + SD + Heading("Boxplot") * emptycol + Heading("Histogram") * emptycol, data = tmp) %>%
  column_spec(column = 6, image = spec_boxplot(tmp_scaled_list)) %>%
  column_spec(column = 7, image = spec_hist(tmp_scaled_list))


```

现在输出在箱线图/直方图方面是正确的:

在此处输入图像描述

于 2021-11-01T18:27:10.050 回答