如何使rscript为合作者功能
我正在帮助开发研究人员的管道,其中一个步骤需要解析大型Excel电子表格,以根据某些关键列和参数范围来子集和导出文件。
我已经编写了代码,并通过 gitlab 托管了存储库,并可以在组织内克隆。
但是,我的合作者无法运行代码,我已经通过团队进行了会话,以查看他们是否正确克隆了存储库,具有正确的输入文件以及他们是否正确输入了命令和参数。
使用rscript
该命令在我的 biolinux 终端上绝对正确地运行,但是由于未知原因,我的协作者一直收到似乎与终端而不是r的奇数错误消息:
colargs中的错误:无效的参数类型 呼叫:... col_spec_standardise-> as.col_spec-> my_cols-> 执行停止
有关弄清楚到底发生了什么建议的任何建议?
编辑:以下是我正在使用的代码
#!/usr/bin/env Rscript
args = commandArgs(trailingOnly = T)
# args = list("allDB_03282022.txt", "ATL", 2018, 1, 12, "SURRG_ATL_2018_JAN_DEC.csv")
# Import magrittr expressions
`%>%` <- magrittr::`%>%`
`%<>%` <- magrittr::`%<>%`
# Define function my_cols
my_cols <- function(..., .default = readr::col_guess()) {
dots <- rlang::enexprs(...)
colargs <- purrr::flatten_chr(unname(
purrr::imap(dots, ~ {
colnames <- rlang::syms(.x)
colnames <- colnames[colnames != rlang::sym("c")]
coltypes <- purrr::rep_along(colnames, .y)
purrr::set_names(coltypes, colnames)
})
))
readr::cols(!!!colargs, .default = .default)
}
surrg_all <- readr::read_tsv(file = args[1], col_names = TRUE, na = "NA",
col_types = my_cols(D = c(TEST_DATE, SPECIMEN_COLLECTION_DATE, DATE_WGSID_ASSIGNED, DATE_RESULT_RELEASED),
# f = c(GC_FACILITY_CODE, SEQUENCING_LAB, GCWGS_DL_FROM_ARLN, SHIPPED_ARLN,
# ARLN_PHL, JURISDICTION_PHL, PATIENT_GENDER, SPECIMEN_TYPE,
# BETALAC_RESULT_CODE, SPECIMEN_QUALITY, YR_COLLECT, MTH_COLLECT)
))
# Change select variable types to factor
surrg_all %<>%
dplyr::mutate(dplyr::across(c(GC_FACILITY_CODE, YR_COLLECT, MTH_COLLECT), as.factor))
# remove rows with NA in any of the variables GC_FACILITY_CODE, YR_COLLECT or MTH_COLLECT
surrg_all %<>% tidyr::drop_na(GC_FACILITY_CODE, YR_COLLECT, MTH_COLLECT)
# Use forcats::fct_collapse() to assign levels to shared GC_FACILITY_CODE factors
surrg_all %<>% dplyr::mutate(GC_FACILITY_CODE2 = forcats::fct_collapse(surrg_all$GC_FACILITY_CODE,
ALB = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^ALB")))),
ANC = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^ANC")))),
ATL = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^ATL")))),
BAL = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^BAL")))),
BHM = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^BHM")))),
BOS = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^BOS")))),
BUF = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^BUF")))),
CAM = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^CAM")))),
CHI = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^CHI")))),
CLE = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^CLE")))),
COL = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^COL")))),
DAL = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^DAL")))),
DEN = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^DEN")))),
GRB = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^GRB")))),
HON = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^HON")))),
IND = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^IND")))),
JAC = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^JAC")))),
KAZ = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^KAZ")))),
KCY = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^KCY")))),
LA = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^LA")))),
LVG = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^LVG")))),
MIA = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^MIA")))),
MIL = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^MIL")))),
MIN = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^MIN")))),
NOR = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^NOR")))),
NYC = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^NYC")))),
ORA = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^ORA")))),
PHI = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^PHI")))),
PHX = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^PHX")))),
PON = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^PON")))),
POR = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^POR")))),
SDG = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^SDG")))),
SEA = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^SEA")))),
SFO = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^SFO")))),
TRP = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^TRP")))),
WDC = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^WDC"))))
), .after = GC_FACILITY_CODE) # %>% View()
# Export output file
surrg_all %>%
dplyr::filter(GC_FACILITY_CODE2 == args[2], YR_COLLECT == args[3], MTH_COLLECT %in% args[[4]]:args[[5]]) %>%
dplyr::select(!GC_FACILITY_CODE2) %>%
readr::write_csv(file = args[[6]] , quote = "none", na = "")
I'm helping in developing a pipeline for researchers and one of the steps require parsing of a large Excel spreadsheet in order to subset and export a file based on some key columns and parameter ranges.
I've written the code and have the repo hosted via GitLab and available for cloning within my organization.
However my collaborators have been unable to get the code to run and I've gone over a session via Teams to see whether they were cloning the repo correctly, had the correct input file and whether they were entering the command and arguments correctly.
Using Rscript
the command is running absolutely correctly on my Biolinux terminal however for unknown reasons my collaborators keep getting odd error messages that seem to be related to the terminal and not R:
Error in !colargs: invalid argument type
Calls: ... col_spec_standardise-> as.col_spec->my_cols->
Execution halted
Any suggestions on figuring out what exactly is going on?
Edit: Below is the code I'm using
#!/usr/bin/env Rscript
args = commandArgs(trailingOnly = T)
# args = list("allDB_03282022.txt", "ATL", 2018, 1, 12, "SURRG_ATL_2018_JAN_DEC.csv")
# Import magrittr expressions
`%>%` <- magrittr::`%>%`
`%<>%` <- magrittr::`%<>%`
# Define function my_cols
my_cols <- function(..., .default = readr::col_guess()) {
dots <- rlang::enexprs(...)
colargs <- purrr::flatten_chr(unname(
purrr::imap(dots, ~ {
colnames <- rlang::syms(.x)
colnames <- colnames[colnames != rlang::sym("c")]
coltypes <- purrr::rep_along(colnames, .y)
purrr::set_names(coltypes, colnames)
})
))
readr::cols(!!!colargs, .default = .default)
}
surrg_all <- readr::read_tsv(file = args[1], col_names = TRUE, na = "NA",
col_types = my_cols(D = c(TEST_DATE, SPECIMEN_COLLECTION_DATE, DATE_WGSID_ASSIGNED, DATE_RESULT_RELEASED),
# f = c(GC_FACILITY_CODE, SEQUENCING_LAB, GCWGS_DL_FROM_ARLN, SHIPPED_ARLN,
# ARLN_PHL, JURISDICTION_PHL, PATIENT_GENDER, SPECIMEN_TYPE,
# BETALAC_RESULT_CODE, SPECIMEN_QUALITY, YR_COLLECT, MTH_COLLECT)
))
# Change select variable types to factor
surrg_all %<>%
dplyr::mutate(dplyr::across(c(GC_FACILITY_CODE, YR_COLLECT, MTH_COLLECT), as.factor))
# remove rows with NA in any of the variables GC_FACILITY_CODE, YR_COLLECT or MTH_COLLECT
surrg_all %<>% tidyr::drop_na(GC_FACILITY_CODE, YR_COLLECT, MTH_COLLECT)
# Use forcats::fct_collapse() to assign levels to shared GC_FACILITY_CODE factors
surrg_all %<>% dplyr::mutate(GC_FACILITY_CODE2 = forcats::fct_collapse(surrg_all$GC_FACILITY_CODE,
ALB = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^ALB")))),
ANC = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^ANC")))),
ATL = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^ATL")))),
BAL = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^BAL")))),
BHM = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^BHM")))),
BOS = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^BOS")))),
BUF = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^BUF")))),
CAM = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^CAM")))),
CHI = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^CHI")))),
CLE = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^CLE")))),
COL = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^COL")))),
DAL = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^DAL")))),
DEN = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^DEN")))),
GRB = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^GRB")))),
HON = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^HON")))),
IND = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^IND")))),
JAC = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^JAC")))),
KAZ = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^KAZ")))),
KCY = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^KCY")))),
LA = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^LA")))),
LVG = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^LVG")))),
MIA = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^MIA")))),
MIL = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^MIL")))),
MIN = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^MIN")))),
NOR = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^NOR")))),
NYC = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^NYC")))),
ORA = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^ORA")))),
PHI = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^PHI")))),
PHX = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^PHX")))),
PON = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^PON")))),
POR = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^POR")))),
SDG = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^SDG")))),
SEA = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^SEA")))),
SFO = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^SFO")))),
TRP = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^TRP")))),
WDC = as.character(unique(subset(surrg_all$GC_FACILITY_CODE, stringr::str_detect(surrg_all$GC_FACILITY_CODE, "^WDC"))))
), .after = GC_FACILITY_CODE) # %>% View()
# Export output file
surrg_all %>%
dplyr::filter(GC_FACILITY_CODE2 == args[2], YR_COLLECT == args[3], MTH_COLLECT %in% args[[4]]:args[[5]]) %>%
dplyr::select(!GC_FACILITY_CODE2) %>%
readr::write_csv(file = args[[6]] , quote = "none", na = "")
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论