从一列到下一个给定条件的复制值

发布于 2025-02-12 19:09:25 字数 697 浏览 2 评论 0原文

我的二进制数据如下:

ID <- c("A", "B", "C", "D", "E", "F")
Q0 <- c(0, 0, 0, 0, 0, 0)
Q1 <- c(0, 1, 0, 0, NA, 1) 
Q2 <- c(0, NA, 1, 0, NA, 1) 
Q3 <- c(0, NA, NA, 1, NA, 1) 
Q4 <- c(0, NA, NA, 1, NA, 1)

dta <- data.frame(ID, Q0, Q1, Q2, Q3, Q4)

如果其中一列中有1行,则所有后续列也应为1。如果有0或NA,则下一个列应该保持原样。

陈述不同,如何根据相对位置处的列值有条件地更改多个列的值?

上述数据框架的预期输出是:

ID    Q0    Q1    Q2    Q3    Q4
A     0     0     0     0     0
B     0     1     1     1     1
C     0     0     1     1     1
D     0     0     0     1     1
E     0     NA    NA    NA    NA
F     0     1     1     1     1

我该怎么做?也许使用应用或循环的

I have binary data as below:

ID <- c("A", "B", "C", "D", "E", "F")
Q0 <- c(0, 0, 0, 0, 0, 0)
Q1 <- c(0, 1, 0, 0, NA, 1) 
Q2 <- c(0, NA, 1, 0, NA, 1) 
Q3 <- c(0, NA, NA, 1, NA, 1) 
Q4 <- c(0, NA, NA, 1, NA, 1)

dta <- data.frame(ID, Q0, Q1, Q2, Q3, Q4)

If there is 1 for a row in one of the columns, all the subsequent columns should be 1 as well. If there is 0 or NA, the next column should stay as is.

Stated differently, how can I change the value of multiple columns based conditionally on the value of a column in a relative position?

The intended output for the above data frame is:

ID    Q0    Q1    Q2    Q3    Q4
A     0     0     0     0     0
B     0     1     1     1     1
C     0     0     1     1     1
D     0     0     0     1     1
E     0     NA    NA    NA    NA
F     0     1     1     1     1

How can I do this? Perhaps using apply or a for loop?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(8

墨小墨 2025-02-19 19:09:26

另一个dplyr + purrr选项可能是:

dta %>%
 mutate(pmap_dfr(across(-ID), ~ `[<-`(c(...), seq_along(c(...)) > match(1, c(...)), 1)))

  ID Q0 Q1 Q2 Q3 Q4
1  A  0  0  0  0  0
2  B  0  1  1  1  1
3  C  0  0  1  1  1
4  D  0  0  0  1  1
5  E  0 NA NA NA NA
6  F  0  1  1  1  1

Yet another dplyr + purrr option could be:

dta %>%
 mutate(pmap_dfr(across(-ID), ~ `[<-`(c(...), seq_along(c(...)) > match(1, c(...)), 1)))

  ID Q0 Q1 Q2 Q3 Q4
1  A  0  0  0  0  0
2  B  0  1  1  1  1
3  C  0  0  1  1  1
4  D  0  0  0  1  1
5  E  0 NA NA NA NA
6  F  0  1  1  1  1
捶死心动 2025-02-19 19:09:26

通过循环保持简单:

for (i in 3:ncol(dta)) dta[[i]][dta[[i-1]] == 1] <- 1
    #   ID Q0 Q1 Q2 Q3 Q4
    # 1  A  0  0  0  0  0
    # 2  B  0  1  1  1  1
    # 3  C  0  0  1  1  1
    # 4  D  0  0  0  1  1
    # 5  E  0 NA NA NA NA
    # 6  F  0  1  1  1  1

使用dplyr + data.table受Yuriy的启发:

library(dplyr)
library(data.table)
setDT(dta)

dta[, (names(dta)[-1]) := as.list(cumany(.SD == 1)), by = ID]

Keeping things simple with a loop:

for (i in 3:ncol(dta)) dta[[i]][dta[[i-1]] == 1] <- 1
    #   ID Q0 Q1 Q2 Q3 Q4
    # 1  A  0  0  0  0  0
    # 2  B  0  1  1  1  1
    # 3  C  0  0  1  1  1
    # 4  D  0  0  0  1  1
    # 5  E  0 NA NA NA NA
    # 6  F  0  1  1  1  1

With dplyr + data.table inspired by Yuriy:

library(dplyr)
library(data.table)
setDT(dta)

dta[, (names(dta)[-1]) := as.list(cumany(.SD == 1)), by = ID]
小草泠泠 2025-02-19 19:09:26

带有na.locf的选项

library(zoo)
i1 <- do.call(pmax, c(dta[-1], na.rm = TRUE))!= 0
dta[-1][i1,] <- t(na.locf(as.data.frame(t(dta[-1][i1,]))))

-Output

> dta
  ID Q0 Q1 Q2 Q3 Q4
1  A  0  0  0  0  0
2  B  0  1  1  1  1
3  C  0  0  1  1  1
4  D  0  0  0  1  1
5  E  0 NA NA NA NA
6  F  0  1  1  1  1

An option with na.locf

library(zoo)
i1 <- do.call(pmax, c(dta[-1], na.rm = TRUE))!= 0
dta[-1][i1,] <- t(na.locf(as.data.frame(t(dta[-1][i1,]))))

-output

> dta
  ID Q0 Q1 Q2 Q3 Q4
1  A  0  0  0  0  0
2  B  0  1  1  1  1
3  C  0  0  1  1  1
4  D  0  0  0  1  1
5  E  0 NA NA NA NA
6  F  0  1  1  1  1
王权女流氓 2025-02-19 19:09:26

我发现了一个枢纽:

library(tidyr)
library(dplyr)

dta %>% 
  pivot_longer(-ID) %>% 
  group_by(ID) %>% 
  mutate(value2 = value) %>% 
  fill(value2) %>% 
  mutate(value = ifelse(value2 == 0, value, value2)) %>% 
  select(-value2) %>% 
  pivot_wider(names_from = name, values_from = value)
  ID       Q0    Q1    Q2    Q3    Q4
  <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A         0     0     0     0     0
2 B         0     1     1     1     1
3 C         0     0     1     1     1
4 D         0     0     0     1     1
5 E         0    NA    NA    NA    NA
6 F         0     1     1     1     1

I found one more with pivoting:

library(tidyr)
library(dplyr)

dta %>% 
  pivot_longer(-ID) %>% 
  group_by(ID) %>% 
  mutate(value2 = value) %>% 
  fill(value2) %>% 
  mutate(value = ifelse(value2 == 0, value, value2)) %>% 
  select(-value2) %>% 
  pivot_wider(names_from = name, values_from = value)
  ID       Q0    Q1    Q2    Q3    Q4
  <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A         0     0     0     0     0
2 B         0     1     1     1     1
3 C         0     0     1     1     1
4 D         0     0     0     1     1
5 E         0    NA    NA    NA    NA
6 F         0     1     1     1     1
十六岁半 2025-02-19 19:09:26

另一个可能的解决方案:

library(dplyr)

dta %>% 
  mutate(t(apply(.[-1], 1, \(x) {if (max(x, na.rm = T) == 1) 
    x[which.max(x):length(x)] <- 1 else x; x})) %>% as_tibble)

#>   ID Q0 Q1 Q2 Q3 Q4
#> 1  A  0  0  0  0  0
#> 2  B  0  1  1  1  1
#> 3  C  0  0  1  1  1
#> 4  D  0  0  0  1  1
#> 5  E  0 NA NA NA NA
#> 6  F  0  1  1  1  1

Another possible solution:

library(dplyr)

dta %>% 
  mutate(t(apply(.[-1], 1, \(x) {if (max(x, na.rm = T) == 1) 
    x[which.max(x):length(x)] <- 1 else x; x})) %>% as_tibble)

#>   ID Q0 Q1 Q2 Q3 Q4
#> 1  A  0  0  0  0  0
#> 2  B  0  1  1  1  1
#> 3  C  0  0  1  1  1
#> 4  D  0  0  0  1  1
#> 5  E  0 NA NA NA NA
#> 6  F  0  1  1  1  1
止于盛夏 2025-02-19 19:09:26
ID <- c("A", "B", "C", "D", "E", "F")
Q0 <- c(0, 0, 0, 0, 0, 0)
Q1 <- c(0, 1, 0, 0, NA, 1) 
Q2 <- c(0, NA, 1, 0, NA, 1) 
Q3 <- c(0, NA, NA, 1, NA, 1) 
Q4 <- c(0, NA, NA, 1, NA, 1)

df <- data.frame(ID, Q0, Q1, Q2, Q3, Q4)

df[-1] <- t(apply(df[-1], 1, function(x) +(dplyr::cumany(x == 1))))
df
#>   ID Q0 Q1 Q2 Q3 Q4
#> 1  A  0  0  0  0  0
#> 2  B  0  1  1  1  1
#> 3  C  0  0  1  1  1
#> 4  D  0  0  0  1  1
#> 5  E  0 NA NA NA NA
#> 6  F  0  1  1  1  1

ID <- c("A", "B", "C", "D", "E", "F")
Q0 <- c(0, 0, 0, 0, 0, 0)
Q1 <- c(0, 1, 0, 0, NA, 1) 
Q2 <- c(0, NA, 1, 0, NA, 1) 
Q3 <- c(0, NA, NA, 1, NA, 1) 
Q4 <- c(0, NA, NA, 1, NA, 1)

df <- data.frame(ID, Q0, Q1, Q2, Q3, Q4)

df[-1] <- t(apply(df[-1], 1, function(x) +(dplyr::cumany(x == 1))))
df
#>   ID Q0 Q1 Q2 Q3 Q4
#> 1  A  0  0  0  0  0
#> 2  B  0  1  1  1  1
#> 3  C  0  0  1  1  1
#> 4  D  0  0  0  1  1
#> 5  E  0 NA NA NA NA
#> 6  F  0  1  1  1  1

Created on 2022-07-04 by the reprex package (v2.0.1)

萌吟 2025-02-19 19:09:26

在相同的突变中创建所有变量

dta %>% 
  mutate(
  Q2 = case_when(Q1 == 1 ~ 1, TRUE ~ Q2), 
  Q3 = case_when(Q2 == 1 ~ 1, TRUE ~ Q3),
  Q4 = case_when(Q3 == 1 ~ 1, TRUE ~ Q4))

  ID Q0 Q1 Q2 Q3 Q4
1  A  0  0  0  0  0
2  B  0  1  1  1  1
3  C  0  0  1  1  1
4  D  0  0  0  1  1
5  E  0 NA NA NA NA
6  F  0  1  1  1  1

您可以

You can create all the variables in the same mutate

dta %>% 
  mutate(
  Q2 = case_when(Q1 == 1 ~ 1, TRUE ~ Q2), 
  Q3 = case_when(Q2 == 1 ~ 1, TRUE ~ Q3),
  Q4 = case_when(Q3 == 1 ~ 1, TRUE ~ Q4))

  ID Q0 Q1 Q2 Q3 Q4
1  A  0  0  0  0  0
2  B  0  1  1  1  1
3  C  0  0  1  1  1
4  D  0  0  0  1  1
5  E  0 NA NA NA NA
6  F  0  1  1  1  1

But I don't know if it's possible to do this more programmatically

听不够的曲调 2025-02-19 19:09:26

这是应用的基本r方式。

dta[-1] <- t(apply(dta[-1], 1, \(x) {
  y <- x
  y[is.na(y)] <- 0
  y <- as.integer(cumsum(y) > 0)
  is.na(y) <- is.na(x) & y == 0
  y
}))
dta
#>   ID Q0 Q1 Q2 Q3 Q4
#> 1  A  0  0  0  0  0
#> 2  B  0  1  1  1  1
#> 3  C  0  0  1  1  1
#> 4  D  0  0  0  1  1
#> 5  E  0 NA NA NA NA
#> 6  F  0  1  1  1  1

Here is a base R way with apply.

dta[-1] <- t(apply(dta[-1], 1, \(x) {
  y <- x
  y[is.na(y)] <- 0
  y <- as.integer(cumsum(y) > 0)
  is.na(y) <- is.na(x) & y == 0
  y
}))
dta
#>   ID Q0 Q1 Q2 Q3 Q4
#> 1  A  0  0  0  0  0
#> 2  B  0  1  1  1  1
#> 3  C  0  0  1  1  1
#> 4  D  0  0  0  1  1
#> 5  E  0 NA NA NA NA
#> 6  F  0  1  1  1  1

Created on 2022-07-04 by the reprex package (v2.0.1)

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文