如何在多列上执行IFELSE?

发布于 2025-02-13 07:22:17 字数 1844 浏览 2 评论 0原文

我有示例dataframe

dat <- data.frame(
  variable1 = c(NA,2,3,4,5,6,99),
  variable2 = c(NA,2,99,4,5,6,7),
  variable3 = c(NA,2,3,4,5,6,7),
  variable4 = c(5:11),
  variable5 = c(1,2,3,4,5,6,999),
  variable6 = c(1,2,3,4,999,6,7),
  variable7 = c(1:7)
)

  variable1 variable2 variable3 variable4 variable5 variable6 variable7 
1        NA        NA        NA         5         1         1         1 
2         2         2         2         6         2         2         2 
3         3        99         3         7         3         3         3 
4         4         4         4         8         4         4         4
5         5         5         5         9         5       999         5
6         6         6         6        10         6         6         6
7        99         7         7        11       999         7         7 

我想执行一个ifelse,说明是否变量1和变量2和变量3是na, 采用变量4,变量5,变量6, 否则,请变量1,变量2,变量3 到新列 变量8,变量9,变量10。

因此,新的数据框应该看起来像这样

  variable1 variable2 variable3 variable4 variable5 variable6 variable7 variable8 variable9 variable10
1        NA        NA        NA         5         1         1         1         5         1         1
2         2         2         2         6         2         2         2         2         2         2 
3         3        99         3         7         3         3         3         3        99         3
4         4         4         4         8         4         4         4         4         4         4 
5         5         5         5         9         5       999         5         5         5         5
6         6         6         6        10         6         6         6         6         6         6
7        99         7         7        11       999         7         7         99        7         7

我更喜欢Dplyr解决方案;)

I have sample dataframe

dat <- data.frame(
  variable1 = c(NA,2,3,4,5,6,99),
  variable2 = c(NA,2,99,4,5,6,7),
  variable3 = c(NA,2,3,4,5,6,7),
  variable4 = c(5:11),
  variable5 = c(1,2,3,4,5,6,999),
  variable6 = c(1,2,3,4,999,6,7),
  variable7 = c(1:7)
)

  variable1 variable2 variable3 variable4 variable5 variable6 variable7 
1        NA        NA        NA         5         1         1         1 
2         2         2         2         6         2         2         2 
3         3        99         3         7         3         3         3 
4         4         4         4         8         4         4         4
5         5         5         5         9         5       999         5
6         6         6         6        10         6         6         6
7        99         7         7        11       999         7         7 

I want to perform an ifelse saying if variable1 and variable2 and variable3 are NA,
take variable4, variable5, variable6,
otherwise take variable1, variable2, variable3
to the new columns
variable 8, variable9, variable10.

so the new data frame should look like this

  variable1 variable2 variable3 variable4 variable5 variable6 variable7 variable8 variable9 variable10
1        NA        NA        NA         5         1         1         1         5         1         1
2         2         2         2         6         2         2         2         2         2         2 
3         3        99         3         7         3         3         3         3        99         3
4         4         4         4         8         4         4         4         4         4         4 
5         5         5         5         9         5       999         5         5         5         5
6         6         6         6        10         6         6         6         6         6         6
7        99         7         7        11       999         7         7         99        7         7

I prefer a dplyr solution ;)

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(2

说不完的你爱 2025-02-20 07:22:17

dplyr :: case_when是完美的:

library(dplyr)
dat %>% 
  mutate(across(all_of(1:7), ~as.numeric(.))) %>% #important for all data to be numeric
  mutate(variable8 = case_when(is.na(variable1) & is.na(variable2) & is.na(variable3) ~ variable4,
                               TRUE ~ variable1)) %>%
  mutate(variable9 = case_when(is.na(variable1) & is.na(variable2) & is.na(variable3) ~ variable5,
                               TRUE ~ variable2)) %>%
  mutate(variable10 = case_when(is.na(variable1) & is.na(variable2) & is.na(variable3) ~ variable6,
                               TRUE ~ variable3))

  variable1 variable2 variable3 variable4 variable5
1        NA        NA        NA         5         1
2         2         2         2         6         2
3         3        99         3         7         3
4         4         4         4         8         4
5         5         5         5         9         5
6         6         6         6        10         6
7        99         7         7        11       999
  variable6 variable7 variable8 variable9 variable10
1         1         1         5         1          1
2         2         2         2         2          2
3         3         3         3        99          3
4         4         4         4         4          4
5       999         5         5         5          5
6         6         6         6         6          6
7         7         7        99         7          7

dplyr::case_when is perfect for this:

library(dplyr)
dat %>% 
  mutate(across(all_of(1:7), ~as.numeric(.))) %>% #important for all data to be numeric
  mutate(variable8 = case_when(is.na(variable1) & is.na(variable2) & is.na(variable3) ~ variable4,
                               TRUE ~ variable1)) %>%
  mutate(variable9 = case_when(is.na(variable1) & is.na(variable2) & is.na(variable3) ~ variable5,
                               TRUE ~ variable2)) %>%
  mutate(variable10 = case_when(is.na(variable1) & is.na(variable2) & is.na(variable3) ~ variable6,
                               TRUE ~ variable3))

  variable1 variable2 variable3 variable4 variable5
1        NA        NA        NA         5         1
2         2         2         2         6         2
3         3        99         3         7         3
4         4         4         4         8         4
5         5         5         5         9         5
6         6         6         6        10         6
7        99         7         7        11       999
  variable6 variable7 variable8 variable9 variable10
1         1         1         5         1          1
2         2         2         2         2          2
3         3         3         3        99          3
4         4         4         4         4          4
5       999         5         5         5          5
6         6         6         6         6          6
7         7         7        99         7          7
衣神在巴黎 2025-02-20 07:22:17

另一个可能的解决方案:

library(tidyverse)

dat %>% 
  mutate(pmap_dfr(., ~ (if (all(is.na(c(...)[1:3]))) {c(...)[4:6]} else 
    {c(...)[1:3]}) %>% set_names(str_c("variable", 8:10))))

#>   variable1 variable2 variable3 variable4 variable5 variable6 variable7
#> 1        NA        NA        NA         5         1         1         1
#> 2         2         2         2         6         2         2         2
#> 3         3        99         3         7         3         3         3
#> 4         4         4         4         8         4         4         4
#> 5         5         5         5         9         5       999         5
#> 6         6         6         6        10         6         6         6
#> 7        99         7         7        11       999         7         7
#>   variable8 variable9 variable10
#> 1         5         1          1
#> 2         2         2          2
#> 3         3        99          3
#> 4         4         4          4
#> 5         5         5          5
#> 6         6         6          6
#> 7        99         7          7

Another possible solution:

library(tidyverse)

dat %>% 
  mutate(pmap_dfr(., ~ (if (all(is.na(c(...)[1:3]))) {c(...)[4:6]} else 
    {c(...)[1:3]}) %>% set_names(str_c("variable", 8:10))))

#>   variable1 variable2 variable3 variable4 variable5 variable6 variable7
#> 1        NA        NA        NA         5         1         1         1
#> 2         2         2         2         6         2         2         2
#> 3         3        99         3         7         3         3         3
#> 4         4         4         4         8         4         4         4
#> 5         5         5         5         9         5       999         5
#> 6         6         6         6        10         6         6         6
#> 7        99         7         7        11       999         7         7
#>   variable8 variable9 variable10
#> 1         5         1          1
#> 2         2         2          2
#> 3         3        99          3
#> 4         4         4          4
#> 5         5         5          5
#> 6         6         6          6
#> 7        99         7          7
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文