如何在R中的某个标记后剪切列表中的数据框？

发布于 2025-01-12 01:52:12 字数 2912 浏览 4 评论 0原文

我想在某个标记之后剪切我的数据框。意味着在 V1 中第一次出现 3 次或多次 TRUE（=标记）后，我想剪切列表中的数据帧，并将接下来的 4 行作为列表中的新数据帧。

library(dplyr)
set.seed(94756)
mat1 <- matrix(sample(seq(-1,100, 0.11),70, replace = TRUE),ncol = 5) 
mat1 <- as.tibble(mat1)

mat2 <- matrix(sample(seq(-1,100, 0.11),70, replace = TRUE),ncol = 5)  
mat2 <- as.tibble(mat2)
mat2[3,1] <- NA
mat2[6,1] <- NA

mat3 <- matrix(sample(seq(-1,100, 0.11), 70,replace = TRUE),ncol = 5)  
mat3 <- as.tibble(mat3)
mat3[4,1] <- NA


data <- list(mat1, mat2, mat3)
data1 <- map(data, ~add_column(., V1_logical = between(.$V1, 20, 80), .after = 'V1'))

r_pre <- lapply(data1, "[", 2)

也许为列表中的每个数据帧添加一个 ID 列会很有帮助

r_pre1 <- rbindlist(r_pre, idcol = "ID")
r_pre1 <- split(r_pre1, r_pre1$ID)

所以结果应该是这样的：

mat1re <- data.frame(V1 = c(93.16, 47.18, 12.86, 38.71), 
                     V2 = c(56.75, 57.85, 18.69, 3.18), 
                     V3 = c(-0.01, 14.95, 46.08, 96.46), 
                     V4 = c(20.89, 32.55, 91.73, 58.73),
                     V5 = c(66.54, 56.75, 92.94, 77.54)) 
                   
mat2re <- data.frame(V1 = c(87.99, 53.23, 40.36, 0.65),
                     V2 = c(89.42, 81.28, 36.84, 73.58), 
                     V3 = c(89.86, 78.75, 76.77, 61.81), 
                     V4 = c(47.18, 22.98, 34.64, 25.18), 
                     V5 = c(18.69, 77.21, 58.29, 94.04))

mat3re <- data.frame(V1 = c(81.50, 43.55, 54.55, 9.45), 
                     V2 = c(33.21, 70.83, 21.66, 88.10), 
                     V3 = c(72.15, -0.45, 11.65, 15.06), 
                     V4 = c(47.07, 47.95, 88.10, 81.50), 
                     V5 = c(80.07, 67.75, 14.84, 10.33))

result <- list(mat1re, mat2re, mat3re)

我已经尝试过的：

data2 <- lapply(data1, function(x) {x$V1_logical[x$V1_logical== TRUE] <- 1; x})
data3 <- lapply(data2, function(x) {x$V1_logical[x$V1_logical== FALSE] <- 0; x})
data4 <- map(data3, ~add_column(., ind = rleid(.$V1_logical), .after = "V1_logical"))

所以在数据 4 中它将找到标记：$V1_逻辑 = 1 & $ind = 连续出现 >= 3 次的数字（例如 5、5、5）并删除之前的数据，包括。标记或换句话说，在标记之后开始新的数据帧。

下面的代码也很接近，但没有切断开头（包括）。当 NA 包含在数据中时标记出来...看看这里的第二个列表，不会删除开头和标记。

matrix_final <- map(data, ~ .x %>% 
                      mutate(V1_logical = between(V1, 20, 80), ind = rleid(V1_logical), .after = "V1") %>% 
                      group_by(ind) %>% 
                      mutate(rn = if(n() >=3 && first(V1_logical)) row_number() else NA_integer_) %>% 
                      ungroup  %>% 
                      slice(seq(max(which.max(rn) + 1, 1, replace_na = TRUE), length.out = 4)) %>% 
                      select(-ind, -rn) %>%
                      mutate(across(everything(), round, digits = 2)))

print(matrix_final[[2]])

提前致谢！

原文

I would like to cut my dataframe after a certain marker. Means after the first time 3 or more times TRUE shows up (=marker) in V1, I would like to cut the dataframes within a list and take the following next 4 rows as my new dataframe within a list.

library(dplyr)
set.seed(94756)
mat1 <- matrix(sample(seq(-1,100, 0.11),70, replace = TRUE),ncol = 5) 
mat1 <- as.tibble(mat1)

mat2 <- matrix(sample(seq(-1,100, 0.11),70, replace = TRUE),ncol = 5)  
mat2 <- as.tibble(mat2)
mat2[3,1] <- NA
mat2[6,1] <- NA

mat3 <- matrix(sample(seq(-1,100, 0.11), 70,replace = TRUE),ncol = 5)  
mat3 <- as.tibble(mat3)
mat3[4,1] <- NA


data <- list(mat1, mat2, mat3)
data1 <- map(data, ~add_column(., V1_logical = between(.$V1, 20, 80), .after = 'V1'))

r_pre <- lapply(data1, "[", 2)

Maybe it is helpful to add an ID column for each dataframe within the list

r_pre1 <- rbindlist(r_pre, idcol = "ID")
r_pre1 <- split(r_pre1, r_pre1$ID)

So the result should be like:

mat1re <- data.frame(V1 = c(93.16, 47.18, 12.86, 38.71), 
                     V2 = c(56.75, 57.85, 18.69, 3.18), 
                     V3 = c(-0.01, 14.95, 46.08, 96.46), 
                     V4 = c(20.89, 32.55, 91.73, 58.73),
                     V5 = c(66.54, 56.75, 92.94, 77.54)) 
                   
mat2re <- data.frame(V1 = c(87.99, 53.23, 40.36, 0.65),
                     V2 = c(89.42, 81.28, 36.84, 73.58), 
                     V3 = c(89.86, 78.75, 76.77, 61.81), 
                     V4 = c(47.18, 22.98, 34.64, 25.18), 
                     V5 = c(18.69, 77.21, 58.29, 94.04))

mat3re <- data.frame(V1 = c(81.50, 43.55, 54.55, 9.45), 
                     V2 = c(33.21, 70.83, 21.66, 88.10), 
                     V3 = c(72.15, -0.45, 11.65, 15.06), 
                     V4 = c(47.07, 47.95, 88.10, 81.50), 
                     V5 = c(80.07, 67.75, 14.84, 10.33))

result <- list(mat1re, mat2re, mat3re)

What I've tried already:

data2 <- lapply(data1, function(x) {x$V1_logical[x$V1_logical== TRUE] <- 1; x})
data3 <- lapply(data2, function(x) {x$V1_logical[x$V1_logical== FALSE] <- 0; x})
data4 <- map(data3, ~add_column(., ind = rleid(.$V1_logical), .after = "V1_logical"))

So in data 4 it's about to find the marker: $V1_logical = 1 & $ind = number that shows up >= 3 times consecutively (e. g. 5, 5, 5) and cut the data before away incl. marker or in other word start new dataframes after the marker.

The following code is also close, but doesn't cut the beginning incl. marker out when NA's are included in the data...Have a look at the second list here, doesn't cut the beginning and marker out.

matrix_final <- map(data, ~ .x %>% 
                      mutate(V1_logical = between(V1, 20, 80), ind = rleid(V1_logical), .after = "V1") %>% 
                      group_by(ind) %>% 
                      mutate(rn = if(n() >=3 && first(V1_logical)) row_number() else NA_integer_) %>% 
                      ungroup  %>% 
                      slice(seq(max(which.max(rn) + 1, 1, replace_na = TRUE), length.out = 4)) %>% 
                      select(-ind, -rn) %>%
                      mutate(across(everything(), round, digits = 2)))

print(matrix_final[[2]])

Thanks in advance!

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

楠木可依 2025-01-19 01:52:12

我们可以使用 map 循环遍历 list，使用 Between 在“V1”上创建逻辑列，使用 rleid 创建分组列 （返回一个序列列，当相邻元素的值发生变化时该序列列会递增）并根据条件

library(dplyr)
library(purrr)
library(data.table)
library(tidyr)
map(data, ~ .x %>% 
    mutate(V1_logical = replace_na(between(V1, 20, 80), FALSE), 
       ind = rleid(V1_logical), .after = "V1") %>% 
   group_by(ind) %>%
   mutate(rn = if(n() >=3 && first(V1_logical)) row_number() else
          NA_integer_) %>% 
   ungroup  %>% 
   slice(seq(max(which.max(rn) + 1, 1, na.rm = TRUE), length.out = 4)) %>%
   select(-ind, -rn, -V1_logical) %>%
   mutate(across(everything(), round, digits = 2)))

输出对行进行切片

[[1]]
# A tibble: 4 × 5
     V1    V2      V3    V4    V5
  <dbl> <dbl>   <dbl> <dbl> <dbl>
1  93.2 56.8  -0.0100  20.9  66.5
2  47.2 57.8  15.0     32.6  56.8
3  12.9 18.7  46.1     91.7  92.9
4  38.7  3.18 96.5     58.7  77.5

[[2]]
# A tibble: 4 × 5
     V1    V2    V3    V4    V5
  <dbl> <dbl> <dbl> <dbl> <dbl>
1 88.0   89.4  89.9  47.2  18.7
2 53.2   81.3  78.8  23.0  77.2
3 40.4   36.8  76.8  34.6  58.3
4  0.65  73.6  61.8  25.2  94.0

[[3]]
# A tibble: 4 × 5
     V1    V2    V3    V4    V5
  <dbl> <dbl> <dbl> <dbl> <dbl>
1 81.5   33.2 72.2   47.1  80.1
2 43.6   70.8 -0.45  48.0  67.8
3 54.6   21.7 11.6   88.1  14.8
4  9.45  88.1 15.1   81.5  10.3

We may loop over the list with map, create the logical column on 'V1' with between, create a grouping column with rleid (returns a sequence column that increments when there is a change in value in adjacent elements) and slice the rows based on the condition

library(dplyr)
library(purrr)
library(data.table)
library(tidyr)
map(data, ~ .x %>% 
    mutate(V1_logical = replace_na(between(V1, 20, 80), FALSE), 
       ind = rleid(V1_logical), .after = "V1") %>% 
   group_by(ind) %>%
   mutate(rn = if(n() >=3 && first(V1_logical)) row_number() else
          NA_integer_) %>% 
   ungroup  %>% 
   slice(seq(max(which.max(rn) + 1, 1, na.rm = TRUE), length.out = 4)) %>%
   select(-ind, -rn, -V1_logical) %>%
   mutate(across(everything(), round, digits = 2)))

-output

[[1]]
# A tibble: 4 × 5
     V1    V2      V3    V4    V5
  <dbl> <dbl>   <dbl> <dbl> <dbl>
1  93.2 56.8  -0.0100  20.9  66.5
2  47.2 57.8  15.0     32.6  56.8
3  12.9 18.7  46.1     91.7  92.9
4  38.7  3.18 96.5     58.7  77.5

[[2]]
# A tibble: 4 × 5
     V1    V2    V3    V4    V5
  <dbl> <dbl> <dbl> <dbl> <dbl>
1 88.0   89.4  89.9  47.2  18.7
2 53.2   81.3  78.8  23.0  77.2
3 40.4   36.8  76.8  34.6  58.3
4  0.65  73.6  61.8  25.2  94.0

[[3]]
# A tibble: 4 × 5
     V1    V2    V3    V4    V5
  <dbl> <dbl> <dbl> <dbl> <dbl>
1 81.5   33.2 72.2   47.1  80.1
2 43.6   70.8 -0.45  48.0  67.8
3 54.6   21.7 11.6   88.1  14.8
4  9.45  88.1 15.1   81.5  10.3

回复收藏 0 原文

~没有更多了~