对 R 中数据帧的嵌套列表进行过滤和重新分类
我有一个由 n
元素组成的列表,每个元素都包含一个数据框。让我们以 start_list
为例:
start_list <- list(ENSG0000014 = structure(list(name = c("E-1122O", "E-11EM3",
"E-11EMC", "E-1442O", "E-1132O"), ENSG = c("ENSG0000014", "ENSG0000014",
"ENSG0000014", "ENSG0000014", "ENSG0000014"), expr = c(" 9.940670e-02",
" 1.289670e-01", "-7.394904e-03", " 9.940670e-02", " 9.940670e-02"
), `1_43222779_A_G_b37` = c("1", "1", "2", "1", "0"), `1_43222856_A_G_b37` = c("0",
"0", "0", "1", "1"), `1_43223126_C_T_b37` = c("0", "1", "0",
"1", "2"), `1_43223317_T_C_b37` = c("1", "0", "0", "2", "1")), row.names = c(NA,
-5L), class = c("tbl_df", "tbl", "data.frame")), ENSG0000015 = structure(list(
name = c("E-1122O", "E-11EM3", "E-11EMC", "E-1442O", "E-1132O"
), ENSG = c("ENSG0000015", "ENSG0000015", "ENSG0000015",
"ENSG0000015", "ENSG0000015"), expr = c(" 9.940670e-02",
" 1.289670e-01", "-7.394904e-03", " 9.940670e-02", " 1.289670e-01"
), `1_43222779_A_G_b37` = c("0", "1", "0", "1", "2"),
`1_43222856_A_G_b37` = c("1", "1", "2", "1", "0")),
row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame")))
此外,还有一个名为 set_id
的数据框,其中包含 name
列中的个人列表start_list 分为五组 TRUE/FALSE 字符:
set_id <- structure(list(IID = c("E-1122O", "E-11EM3", "E-11EMC", "E-1442O",
"E-1132O"), set_1 = c(TRUE, FALSE, TRUE, TRUE, TRUE), set_2 = c(TRUE,
TRUE, FALSE, FALSE, TRUE), set_3 = c(FALSE, TRUE, TRUE, FALSE,
TRUE), set_4 = c(TRUE, FALSE, TRUE, TRUE, FALSE), set_5 = c(TRUE,
FALSE, FALSE, TRUE, TRUE)), row.names = c(NA, -5L), class = "data.frame")
我需要根据这些个体组过滤 start_list
以保留这些 IID
(如果相等)为“FALSE”并且还删除 start_list、ENSG、expr
的第二列和第三列并创建一个新列表“list_prime_out”:
list_prime_out <- list(ENSG0000014 = list(set_1 = structure(list(name = "E-11EM3",
`1_43222779_A_G_b37` = "1", `1_43222856_A_G_b37` = "0", `1_43223126_C_T_b37` = "0",
`1_43223317_T_C_b37` = "1"), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), set_2 = structure(list(name = c("E-11EMC",
"E-14420"), `1_43222779_A_G_b37` = c("1", "0"), `1_43222856_A_G_b37` = c("1",
"1"), `1_43223126_C_T_b37` = c("2", "0"), `1_43223317_T_C_b37` = c("2",
"0")), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
)), set_3 = structure(list(name = c("E-1122O", "E-1442O"), `1_43222779_A_G_b37` = "1",
`1_43222856_A_G_b37` = "0", `1_43223126_C_T_b37` = c("1",
"1"), `1_43223317_T_C_b37` = c("1", "2")), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame")), set_4 = structure(list(
name = c("E-11EM3", "E-1132O"), `1_43222779_A_G_b37` = c("1",
"0"), `1_43222856_A_G_b37` = c("1", "1"), `1_43223126_C_T_b37` = c("0",
"0"), `1_43223317_T_C_b37` = c("0", "0")), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame")), set_5 = structure(list(
name = c("E-11EM3", "E-11EMC"), `1_43222779_A_G_b37` = c("1",
"0"), `1_43222856_A_G_b37` = c("1", "1"), `1_43223126_C_T_b37` = c("2",
"0"), `1_43223317_T_C_b37` = c("1", "2")), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame"))), ENSG0000015 = list(
set_1 = structure(list(name = "E-11EM3", `1_43222779_A_G_b37` = "1",
`1_43222856_A_G_b37` = "0", `1_43223126_C_T_b37` = "0",
`1_43223317_T_C_b37` = "1"), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), set_2 = structure(list(name = c("E-11EMC",
"E-14420"), `1_43222779_A_G_b37` = c("1", "0"), `1_43222856_A_G_b37` = c("1",
"1"), `1_43223126_C_T_b37` = c("2", "0"), `1_43223317_T_C_b37` = c("2",
"0")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), set_3 = structure(list(name = c("E-1122O",
"E-1442O"), `1_43222779_A_G_b37` = "1", `1_43222856_A_G_b37` = "0",
`1_43223126_C_T_b37` = c("1", "1"), `1_43223317_T_C_b37` = c("1",
"2")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), set_4 = structure(list(name = c("E-11EM3",
"E-1132O"), `1_43222779_A_G_b37` = c("1", "0"), `1_43222856_A_G_b37` = c("1",
"1"), `1_43223126_C_T_b37` = c("0", "0"), `1_43223317_T_C_b37` = c("0",
"0")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), set_5 = structure(list(name = c("E-11EM3",
"E-11EMC"), `1_43222779_A_G_b37` = c("1", "0"), `1_43222856_A_G_b37` = c("1",
"1"), `1_43223126_C_T_b37` = c("2", "0"), `1_43223317_T_C_b37` = c("1",
"2")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame"))))
str(list_prime_out)
List of 2
$ ENSG0000014:List of 5
..$ set_1: tibble [1 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr "E-11EM3"
.. ..$ 1_43222779_A_G_b37: chr "1"
.. ..$ 1_43222856_A_G_b37: chr "0"
.. ..$ 1_43223126_C_T_b37: chr "0"
.. ..$ 1_43223317_T_C_b37: chr "1"
..$ set_2: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EMC" "E-14420"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "2" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "2" "0"
..$ set_3: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-1122O" "E-1442O"
.. ..$ 1_43222779_A_G_b37: chr "1"
.. ..$ 1_43222856_A_G_b37: chr "0"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "1" "1"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "1" "2"
..$ set_4: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EM3" "E-1132O"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "0" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "0" "0"
..$ set_5: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EM3" "E-11EMC"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "2" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "1" "2"
$ ENSG0000015:List of 5
..$ set_1: tibble [1 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr "E-11EM3"
.. ..$ 1_43222779_A_G_b37: chr "1"
.. ..$ 1_43222856_A_G_b37: chr "0"
.. ..$ 1_43223126_C_T_b37: chr "0"
.. ..$ 1_43223317_T_C_b37: chr "1"
..$ set_2: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EMC" "E-14420"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "2" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "2" "0"
..$ set_3: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-1122O" "E-1442O"
.. ..$ 1_43222779_A_G_b37: chr "1"
.. ..$ 1_43222856_A_G_b37: chr "0"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "1" "1"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "1" "2"
..$ set_4: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EM3" "E-1132O"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "0" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "0" "0"
..$ set_5: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EM3" "E-11EMC"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "2" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "1" "2"
非常感谢您的帮助。
I have a list of n
elements, each including a data frame. Let's take start_list
as an example:
start_list <- list(ENSG0000014 = structure(list(name = c("E-1122O", "E-11EM3",
"E-11EMC", "E-1442O", "E-1132O"), ENSG = c("ENSG0000014", "ENSG0000014",
"ENSG0000014", "ENSG0000014", "ENSG0000014"), expr = c(" 9.940670e-02",
" 1.289670e-01", "-7.394904e-03", " 9.940670e-02", " 9.940670e-02"
), `1_43222779_A_G_b37` = c("1", "1", "2", "1", "0"), `1_43222856_A_G_b37` = c("0",
"0", "0", "1", "1"), `1_43223126_C_T_b37` = c("0", "1", "0",
"1", "2"), `1_43223317_T_C_b37` = c("1", "0", "0", "2", "1")), row.names = c(NA,
-5L), class = c("tbl_df", "tbl", "data.frame")), ENSG0000015 = structure(list(
name = c("E-1122O", "E-11EM3", "E-11EMC", "E-1442O", "E-1132O"
), ENSG = c("ENSG0000015", "ENSG0000015", "ENSG0000015",
"ENSG0000015", "ENSG0000015"), expr = c(" 9.940670e-02",
" 1.289670e-01", "-7.394904e-03", " 9.940670e-02", " 1.289670e-01"
), `1_43222779_A_G_b37` = c("0", "1", "0", "1", "2"),
`1_43222856_A_G_b37` = c("1", "1", "2", "1", "0")),
row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame")))
Also, there is a data frame named set_id
which contains a list of individuals from name
column of start_list
that are categorized in five sets of TRUE/FALSE characters:
set_id <- structure(list(IID = c("E-1122O", "E-11EM3", "E-11EMC", "E-1442O",
"E-1132O"), set_1 = c(TRUE, FALSE, TRUE, TRUE, TRUE), set_2 = c(TRUE,
TRUE, FALSE, FALSE, TRUE), set_3 = c(FALSE, TRUE, TRUE, FALSE,
TRUE), set_4 = c(TRUE, FALSE, TRUE, TRUE, FALSE), set_5 = c(TRUE,
FALSE, FALSE, TRUE, TRUE)), row.names = c(NA, -5L), class = "data.frame")
I need to filter the start_list
based on these groups of individuals to keep those IID
if is equal to 'FALSE' and also remove the second and third columns of start_list, ENSG, expr
and create a new list, 'list_prime_out':
list_prime_out <- list(ENSG0000014 = list(set_1 = structure(list(name = "E-11EM3",
`1_43222779_A_G_b37` = "1", `1_43222856_A_G_b37` = "0", `1_43223126_C_T_b37` = "0",
`1_43223317_T_C_b37` = "1"), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), set_2 = structure(list(name = c("E-11EMC",
"E-14420"), `1_43222779_A_G_b37` = c("1", "0"), `1_43222856_A_G_b37` = c("1",
"1"), `1_43223126_C_T_b37` = c("2", "0"), `1_43223317_T_C_b37` = c("2",
"0")), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
)), set_3 = structure(list(name = c("E-1122O", "E-1442O"), `1_43222779_A_G_b37` = "1",
`1_43222856_A_G_b37` = "0", `1_43223126_C_T_b37` = c("1",
"1"), `1_43223317_T_C_b37` = c("1", "2")), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame")), set_4 = structure(list(
name = c("E-11EM3", "E-1132O"), `1_43222779_A_G_b37` = c("1",
"0"), `1_43222856_A_G_b37` = c("1", "1"), `1_43223126_C_T_b37` = c("0",
"0"), `1_43223317_T_C_b37` = c("0", "0")), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame")), set_5 = structure(list(
name = c("E-11EM3", "E-11EMC"), `1_43222779_A_G_b37` = c("1",
"0"), `1_43222856_A_G_b37` = c("1", "1"), `1_43223126_C_T_b37` = c("2",
"0"), `1_43223317_T_C_b37` = c("1", "2")), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame"))), ENSG0000015 = list(
set_1 = structure(list(name = "E-11EM3", `1_43222779_A_G_b37` = "1",
`1_43222856_A_G_b37` = "0", `1_43223126_C_T_b37` = "0",
`1_43223317_T_C_b37` = "1"), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), set_2 = structure(list(name = c("E-11EMC",
"E-14420"), `1_43222779_A_G_b37` = c("1", "0"), `1_43222856_A_G_b37` = c("1",
"1"), `1_43223126_C_T_b37` = c("2", "0"), `1_43223317_T_C_b37` = c("2",
"0")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), set_3 = structure(list(name = c("E-1122O",
"E-1442O"), `1_43222779_A_G_b37` = "1", `1_43222856_A_G_b37` = "0",
`1_43223126_C_T_b37` = c("1", "1"), `1_43223317_T_C_b37` = c("1",
"2")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), set_4 = structure(list(name = c("E-11EM3",
"E-1132O"), `1_43222779_A_G_b37` = c("1", "0"), `1_43222856_A_G_b37` = c("1",
"1"), `1_43223126_C_T_b37` = c("0", "0"), `1_43223317_T_C_b37` = c("0",
"0")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), set_5 = structure(list(name = c("E-11EM3",
"E-11EMC"), `1_43222779_A_G_b37` = c("1", "0"), `1_43222856_A_G_b37` = c("1",
"1"), `1_43223126_C_T_b37` = c("2", "0"), `1_43223317_T_C_b37` = c("1",
"2")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame"))))
str(list_prime_out)
List of 2
$ ENSG0000014:List of 5
..$ set_1: tibble [1 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr "E-11EM3"
.. ..$ 1_43222779_A_G_b37: chr "1"
.. ..$ 1_43222856_A_G_b37: chr "0"
.. ..$ 1_43223126_C_T_b37: chr "0"
.. ..$ 1_43223317_T_C_b37: chr "1"
..$ set_2: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EMC" "E-14420"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "2" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "2" "0"
..$ set_3: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-1122O" "E-1442O"
.. ..$ 1_43222779_A_G_b37: chr "1"
.. ..$ 1_43222856_A_G_b37: chr "0"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "1" "1"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "1" "2"
..$ set_4: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EM3" "E-1132O"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "0" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "0" "0"
..$ set_5: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EM3" "E-11EMC"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "2" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "1" "2"
$ ENSG0000015:List of 5
..$ set_1: tibble [1 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr "E-11EM3"
.. ..$ 1_43222779_A_G_b37: chr "1"
.. ..$ 1_43222856_A_G_b37: chr "0"
.. ..$ 1_43223126_C_T_b37: chr "0"
.. ..$ 1_43223317_T_C_b37: chr "1"
..$ set_2: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EMC" "E-14420"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "2" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "2" "0"
..$ set_3: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-1122O" "E-1442O"
.. ..$ 1_43222779_A_G_b37: chr "1"
.. ..$ 1_43222856_A_G_b37: chr "0"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "1" "1"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "1" "2"
..$ set_4: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EM3" "E-1132O"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "0" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "0" "0"
..$ set_5: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EM3" "E-11EMC"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "2" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "1" "2"
I'd appreciate your help.
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
这是使用 {dplyr} 和 {purrr} 的解决方案:
输出:
由 reprex 包于 2022 年 3 月 2 日创建 (v2.0.1)
Here's a solution using {dplyr} and {purrr}:
Output:
Created on 2022-03-02 by the reprex package (v2.0.1)