0

我正在努力使我在 R 中的模型拟合过程更有效。目前,我为 15 个变量生成了 1500 个模拟人生的所有数据。这些数据存储在一个数组中,每个级别是一个 sim,每一行是一个“人”,每一列是 15 个变量之一(例如,300 x 15 x 1500)。mplusObject然后,我多次通过数组的一层,拟合不同的 LPA 模型(一类、二类等)。对于这些模型中的每一个,都有许多结果需要报告和保存。考虑到数据是预先生成的并且数组的一层不依赖于另一层,我已经工作了一段时间,试图弄清楚如何使用并行处理来加快速度。我将在下面显示我目前拥有的内容,但它不起作用,所以我 我想知道我是否需要一个不同的包。谢谢!

inp <- array(1:(300*15*1500), dim=(300,15,1500)) #Really there's actual data here, not random values, but the data generation process is a whole other thing. 
results <- results = matrix(NA,1500,129) #A results table for values to be written to, filled with NAs, 1500 simulations, 129 results.   
num_sims=1500

foreach(i=1:num_sims, .packages=c('mclust','MplusAutomation')) %dopar% {
     working <- inp[,,i]
     sim_num=i
     results[sim_num,1] = working[1,17] #number of groups
     results[sim_num,2] = working[1,18] #sample size 1
     results[sim_num,3] = working[1,19] #sample size 2
     results[sim_num,4] = working[1,20] #sample size 3
     results[sim_num,5] = working[1,21] #dist2
     results[sim_num,6] = working[1,22] #dist3
     df <- as.data.frame(working[,1:15])
  
     lpa1_15 <- mplusObject(
     TITLE = "1-Class LPA;",
     VARIABLE = "USEVARIABLES = x01-x15;
     CLASSES=c(1);",
     ANALYSIS = "ESTIMATOR = MLR;
     TYPE=MIXTURE;",
     MODEL = "
     %OVERALL%
     x01-x15;
     [x01-x15];
     %c#1%
     x01-x15;
     [x01-x15];",
     usevariables = c("x01", "x02", "x03", "x04", "x05",
                      "x06", "x07", "x08", "x09", "x10",
                      "x11", "x12", "x13", "x14", "x15"),
     rdata = df)
  
   lpa1_15_fit = mplusModeler(lpa1_15, "df.dat", modelout = "lpa1_15.inp", killOnFail = FALSE, run = 1L)
  
   if (!is.null(lpa1_15_fit$results$summaries$LL)){
     results[sim_num,7]  = -2 * lpa1_15_fit$results$summaries$LL
     results[sim_num,8]  =      lpa1_15_fit$results$summaries$BIC
     results[sim_num,9]  =      lpa1_15_fit$results$summaries$aBIC
     results[sim_num,10]  =     lpa1_15_fit$results$summaries$AIC
     results[sim_num,11]  =     lpa1_15_fit$results$summaries$AICC}
   
lpa2_15 <- mplusObject(
     TITLE = "2-Class LPA;",
     VARIABLE = "USEVARIABLES = x01-x15;
     CLASSES=c(2);",
     ANALYSIS = "ESTIMATOR = MLR;
     TYPE=MIXTURE;",
     MODEL = "
     %OVERALL%
     x01-x15;
     [x01-x15];
     %c#1%
     x01-x15;
    [x01-x15];
     %c#2%
     x01-x15;
     [x01-x15];",
     OUTPUT = "TECH11;",
     usevariables = c("x01", "x02", "x03", "x04", "x05",
                      "x06", "x07", "x08", "x09", "x10",
                      "x11", "x12", "x13", "x14", "x15"),
     rdata = df)
  
   lpa2_15_fit = mplusModeler(lpa2_15, "df.dat", modelout = "lpa2_15.inp", killOnFail = FALSE, run = 1L)
  
   if (!is.null(lpa2_15_fit$results$summaries$LL)){
     results[sim_num,12]  = -2 * lpa2_15_fit$results$summaries$LL
     results[sim_num,13]  =      lpa2_15_fit$results$summaries$BIC
     results[sim_num,14]  =      lpa2_15_fit$results$summaries$aBIC
     results[sim_num,15]  =      lpa2_15_fit$results$summaries$AIC
     results[sim_num,16]  =      lpa2_15_fit$results$summaries$AICC
     results[sim_num,17]  =      lpa2_15_fit$results$summaries$Entropy
   if (!is.null(lpa2_15_fit$results$summaries$T11_VLMR_2xLLDiff)){
     results[sim_num,18]  =      lpa2_15_fit$results$summaries$T11_VLMR_2xLLDiff
     results[sim_num,19]  =      lpa2_15_fit$results$summaries$T11_VLMR_PValue
     results[sim_num,20]  =      lpa2_15_fit$results$summaries$T11_LMR_Value
     results[sim_num,21]  =      lpa2_15_fit$results$summaries$T11_LMR_PValue}
   
... and so on...
} 

我从运行中得到的结果是:

[[1]]
[1] 0.491

[[2]]
[1] 0.7037

我尝试过使用并行、foreach 和 dopar 以及 parLapply,但无法让它们工作。我得到的最接近的是使用 foreach 函数,但它为每个函数返回一个值,并且没有任何结果保存到结果表中。我可以提供我如何尝试这些的代码,但它们都没有真正起作用,所以在这一点上我质疑它是否可以完成(如果可以,哪种方法/方法最适合这种设置)。

我还应该指出,数据级别可以按任何顺序运行(例如,[,,1], [,,5], [,,3])是可以的,但是一旦该级别被称为完整功能(或者无论如何应该设置)应该运行,因为几个测试将当前模型与该数据集的先前模型(3 类 vs 2 类)进行比较,因此从这个意义上说,数据确实必须按顺序运行。

感谢您提供的任何帮助或建议!

4

0 回答 0