如何在数据上并行化明确的循环。表(通过参考)
问题语句
我有此函数 访问三个数据表:snp_pairs
,zygosity
和ae
。
我正在迭代snp_pairs
,并用两个新列对其进行更新。
数据表zygosity
和ae
仅读取而根本没有修改。
我想知道在共享这三个数据表的同时是否有一种平行的方式,即不要将它们复制到线程(我认为是什么Eg Parallel :: mclapply() /代码>会做)。
函数daeqtl_mapping()
daeqtl_mapping_ <- function(snp_pairs, zygosity, ae) {
# Ensure that the data tables are all keyed.
# `snp_pairs` keys: first two columns
data.table::setkeyv(snp_pairs, colnames(snp_pairs)[1:2])
# `zygosity` keys: first column
data.table::setkeyv(zygosity, colnames(zygosity)[1])
# `ae` keys: first column
data.table::setkeyv(ae, colnames(ae)[1])
n <- nrow(snp_pairs)
for(i in seq_len(n)) {
candidate_snp <- snp_pairs[[i, 'candidate_snp']]
dae_snp <- snp_pairs[[i, 'dae_snp']]
if(dae_snp %is_not_in% snp_pairs) {
datatable::set(snp_pairs, i = i, j = 'pvalue', value = NA)
datatable::set(snp_pairs, i = i, j = 'case', value = 0L)
break
}
if(dae_snp %is_not_in% ae) {
datatable::set(snp_pairs, i = i, j = 'pvalue', value = NA)
datatable::set(snp_pairs, i = i, j = 'case', value = 0L)
break
}
# `csnp_hom`: lgl indicating homozygous samples for the candidate snp
csnp_hom <- is_hom(dt = zygosity, snp = candidate_snp)
# `csnp_het`: lgl indicating heterozygous samples for the candidate snp
csnp_het <- is_het(dt = zygosity, snp = candidate_snp)
# `dsnp_het`: lgl indicating heterozygous samples for the dae snp
dsnp_het <- is_het(dt = zygosity, snp = dae_snp)
# `ae_hom`: dbl with allelic ratios of homozygous samples (candidate snp)
ae_hom <- as.numeric(ae[dae_snp][, 1 := NULL][, dsnp_het & csnp_hom, with = FALSE])
# `ae_het`: dbl with allelic ratios of heterozygous samples (candidate snp)
ae_het <- as.numeric(ae[dae_snp][, 1 := NULL][, dsnp_het & csnp_het, with = FALSE])
df <- daeqtl_test(ae_hom = ae_hom, ae_het = ae_het)
for (col in names(df)) data.table::set(snp_pairs, i = i, j = col, value = df[[col]])
}
}
mwe
请先安装daeqtlr,其中远程>: install_github(“ maialab/daeqtlr”)
。
library(daeqtlr)
# Download the RDS files from this google drive folder
# https://drive.google.com/drive/folders/1lefDRPW2W6jBMnwUQIy4VGD9ssks4R-E?usp=sharing
snp_pairs <- data.table::setDT(readRDS(file = '~/dwl/snp_pairs.rds'))
zygosity <- data.table::setDT(readRDS(file = '~/dwl/zygosity.rds'))
ae <- data.table::setDT(readRDS(file = '~/dwl/ae.rds'))
daeqtlr:::daeqtl_mapping_(snp_pairs, zygosity, ae)
snp_pairs
#> dae_snp candidate_snp chromosome dae_snp_position
#> 1: rs10035235 rs1001420 5 40798542
#> 2: rs10035235 rs1001684 5 40798542
#> 3: rs10035235 rs1002424 5 40798542
#> 4: rs10035235 rs1002922 5 40798542
#> 5: rs10035235 rs10043093 5 40798542
#> ---
#> 1482: rs10035418 rs9324855 5 141437664
#> 1483: rs10035418 rs9324858 5 141437664
#> 1484: rs10035418 rs9324859 5 141437664
#> 1485: rs10035418 rs9686896 5 141437664
#> 1486: rs10035418 rs998794 5 141437664
#> candidate_snnp_position pvalue case
#> 1: 41000241 0.1155707 4
#> 2: 40810324 0.4341583 4
#> 3: 40767295 0.2739109 4
#> 4: 40386453 0.9556968 4
#> 5: 40324118 0.3911479 4
#> ---
#> 1482: 141693599 0.9098420 4
#> 1483: 141884896 0.8291068 4
#> 1484: 141899785 0.7482426 4
#> 1485: 141731340 0.9556116 4
#> 1486: 141187135 0.6369524 4
在2022-04-26创建的 reprex package (v2.0.1)
Problem statement
I have this function daeqtl_mapping_()
that takes in three data tables: snp_pairs
, zygosity
and ae
.
I am iterating over each row of snp_pairs
, and updating it with two new columns.
The data tables zygosity
and ae
are only read and not modified at all.
I'd like to know if there is a way of parallelizing the for-loop while sharing these three data tables, i.e. not copy them over to threads (which is, I think, what e.g. parallel::mclapply()
would do).
Function daeqtl_mapping()
daeqtl_mapping_ <- function(snp_pairs, zygosity, ae) {
# Ensure that the data tables are all keyed.
# `snp_pairs` keys: first two columns
data.table::setkeyv(snp_pairs, colnames(snp_pairs)[1:2])
# `zygosity` keys: first column
data.table::setkeyv(zygosity, colnames(zygosity)[1])
# `ae` keys: first column
data.table::setkeyv(ae, colnames(ae)[1])
n <- nrow(snp_pairs)
for(i in seq_len(n)) {
candidate_snp <- snp_pairs[[i, 'candidate_snp']]
dae_snp <- snp_pairs[[i, 'dae_snp']]
if(dae_snp %is_not_in% snp_pairs) {
datatable::set(snp_pairs, i = i, j = 'pvalue', value = NA)
datatable::set(snp_pairs, i = i, j = 'case', value = 0L)
break
}
if(dae_snp %is_not_in% ae) {
datatable::set(snp_pairs, i = i, j = 'pvalue', value = NA)
datatable::set(snp_pairs, i = i, j = 'case', value = 0L)
break
}
# `csnp_hom`: lgl indicating homozygous samples for the candidate snp
csnp_hom <- is_hom(dt = zygosity, snp = candidate_snp)
# `csnp_het`: lgl indicating heterozygous samples for the candidate snp
csnp_het <- is_het(dt = zygosity, snp = candidate_snp)
# `dsnp_het`: lgl indicating heterozygous samples for the dae snp
dsnp_het <- is_het(dt = zygosity, snp = dae_snp)
# `ae_hom`: dbl with allelic ratios of homozygous samples (candidate snp)
ae_hom <- as.numeric(ae[dae_snp][, 1 := NULL][, dsnp_het & csnp_hom, with = FALSE])
# `ae_het`: dbl with allelic ratios of heterozygous samples (candidate snp)
ae_het <- as.numeric(ae[dae_snp][, 1 := NULL][, dsnp_het & csnp_het, with = FALSE])
df <- daeqtl_test(ae_hom = ae_hom, ae_het = ae_het)
for (col in names(df)) data.table::set(snp_pairs, i = i, j = col, value = df[[col]])
}
}
MWE
Please first install daeqtlr with remotes::install_github("maialab/daeqtlr")
.
library(daeqtlr)
# Download the RDS files from this google drive folder
# https://drive.google.com/drive/folders/1lefDRPW2W6jBMnwUQIy4VGD9ssks4R-E?usp=sharing
snp_pairs <- data.table::setDT(readRDS(file = '~/dwl/snp_pairs.rds'))
zygosity <- data.table::setDT(readRDS(file = '~/dwl/zygosity.rds'))
ae <- data.table::setDT(readRDS(file = '~/dwl/ae.rds'))
daeqtlr:::daeqtl_mapping_(snp_pairs, zygosity, ae)
snp_pairs
#> dae_snp candidate_snp chromosome dae_snp_position
#> 1: rs10035235 rs1001420 5 40798542
#> 2: rs10035235 rs1001684 5 40798542
#> 3: rs10035235 rs1002424 5 40798542
#> 4: rs10035235 rs1002922 5 40798542
#> 5: rs10035235 rs10043093 5 40798542
#> ---
#> 1482: rs10035418 rs9324855 5 141437664
#> 1483: rs10035418 rs9324858 5 141437664
#> 1484: rs10035418 rs9324859 5 141437664
#> 1485: rs10035418 rs9686896 5 141437664
#> 1486: rs10035418 rs998794 5 141437664
#> candidate_snnp_position pvalue case
#> 1: 41000241 0.1155707 4
#> 2: 40810324 0.4341583 4
#> 3: 40767295 0.2739109 4
#> 4: 40386453 0.9556968 4
#> 5: 40324118 0.3911479 4
#> ---
#> 1482: 141693599 0.9098420 4
#> 1483: 141884896 0.8291068 4
#> 1484: 141899785 0.7482426 4
#> 1485: 141731340 0.9556116 4
#> 1486: 141187135 0.6369524 4
Created on 2022-04-26 by the reprex package (v2.0.1)
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论