1

我想确定同一网络中的所有人通过友谊提名直接或间接连接而没有来自不同网络的学生连接的网络。

我正在使用添加健康数据。每个学生最多提名 10 位朋友。比如说,样本数据可能如下所示:

ID  FID_1   FID_2   FID_3   FID_4   FID_5   FID_6   FID_7   FID_8   FID_9   FID_10
1   2           6   7          9    10        NA     NA     NA        NA    NA
2   5           9   12        45    13        90     87     6         NA    NA
3   1           2   4          7    8          9     10     14        16    18
100   110       120   122      125   169     178    190    200       500  520
500    100      110   122      125   169     178    190    200       500  520
700    800      789    900     NA     NA       NA     NA    NA        NA   NA
1000   789     2000     820    900    NA       NA     NA    NA        NA   NA

大约有85,000人。谁能告诉我如何获得网络ID?所以,我希望数据如下所示

ID   network_ID           ID  network_ID
1     1                   700   3  
2     1                   789   3
3     1                   800   3
4     1                   820   3
5     1                   900   3
6     1                  1000   3
7     1                  2000   3
8     1
9     1
10    1
12    1
13    1
14    1
16    1
18    1
90    1
87    1
100   2
110   2
120   2
122   2
125   2
169   2
178   2
190   2
200   2
500   2
520   2

因此,直接或间接连接到 ID 1 的每个人都属于网络 1。2 是 1 的朋友。因此,直接或间接连接到 2 的每个人也都在 1 的网络中,依此类推。700 未连接到 1 或 1 的朋友或 1 的朋友的朋友,依此类推。因此 700 位于不同的网络中,即网络 3。

任何帮助都感激不尽...

4

1 回答 1

2

更新

library(igraph)
library(dplyr)
library(data.table)

setDT(df) %>%
    melt(id.var = "ID", variable.name = "FID", value.name = "ID2") %>%
    na.omit() %>%
    setcolorder(c("ID", "ID2", "FID")) %>%
    graph_from_data_frame() %>%
    components() %>%
    membership() %>%
    stack() %>%
    setNames(c("Network_ID", "ID")) %>%
    rev() %>%
    type.convert(as.is = TRUE) %>%
    arrange(Network_ID, ID)

     ID Network_ID
1     1          1
2     2          1
3     3          1
4     4          1
5     5          1
6     6          1
7     7          1
8     8          1
9     9          1
10   10          1
11   12          1
12   13          1
13   14          1
14   16          1
15   18          1
16   45          1
17   87          1
18   90          1
19  100          2
20  110          2
21  120          2
22  122          2
23  125          2
24  169          2
25  178          2
26  190          2
27  200          2
28  500          2
29  520          2
30  700          3
31  789          3
32  800          3
33  820          3
34  900          3
35 1000          3
36 2000          3

数据

> dput(df)
structure(list(ID = c(1L, 2L, 3L, 100L, 500L, 700L, 1000L), FID_1 = c(2L,
5L, 1L, 110L, 100L, 800L, 789L), FID_2 = c(6L, 9L, 2L, 120L,
110L, 789L, 2000L), FID_3 = c(7L, 12L, 4L, 122L, 122L, 900L,
820L), FID_4 = c(9L, 45L, 7L, 125L, 125L, NA, 900L), FID_5 = c(10L,
13L, 8L, 169L, 169L, NA, NA), FID_6 = c(NA, 90L, 9L, 178L, 178L,
NA, NA), FID_7 = c(NA, 87L, 10L, 190L, 190L, NA, NA), FID_8 = c(NA,
6L, 14L, 200L, 200L, NA, NA), FID_9 = c(NA, NA, 16L, 500L, 500L,
NA, NA), FID_10 = c(NA, NA, 18L, 520L, 520L, NA, NA)), class = "data.frame", row.names = c(NA,
-7L))

你在寻找这样的东西吗?

library(data.table)
library(dplyr)

setDT(df) %>%
    melt(id.var = "ID", variable.name = "FID", value.name = "ID2") %>%
    na.omit() %>%
    setcolorder(c("ID", "ID2", "FID")) %>%
    graph_from_data_frame() %>%
    plot(edge.label = E(.)$FID)

在此处输入图像描述


数据

structure(list(ID = 1:3, FID_1 = c(2L, 5L, 1L), FID_2 = c(6L,
9L, 2L), FID_3 = c(7L, 12L, 4L), FID_4 = c(9L, 45L, 7L), FID_5 = c(10L,
12L, 8L), FID_6 = c(NA, 90L, 9L), FID_7 = c(NA, 87L, 10L), FID_8 = c(NA,
6L, 14L), FID_9 = c(NA, NA, 16L), FID_10 = c(NA, NA, 18L)), class = "data.frame", row.names = c(NA,
-3L))
于 2021-06-16T21:12:35.333 回答