仅当其存在时将字符串的第三部分提取到新列

发布于 2025-01-26 13:27:21 字数 3716 浏览 2 评论 0原文

我的数据如下：

数据

library(dplyr)
dat_in_one <- structure(list(rn = c("Type_A", "Type_B"
), `[0,25)` = c(5L, 0L), `[25,50)` = c(0L, 0L), `[25,100)` = c(38L, 
3L), `[50,100)` = c(0L, 0L), `[100,250)` = c(43L, 5L), `[100,500)` = c(0L, 
0L), `[250,500)` = c(27L, 12L), `[500,1000)` = c(44L, 0L), `[1000,1500)` = c(0L, 
0L), `[1500,3000)` = c(0L, 0L), `[500,1000000]` = c(0L, 53L), 
    `[1000,1000000]` = c(20L, 0L), `[3000,1000000]` = c(0L, 0L
    ), Sum_bin = c(177, 73), strata = list(c(0, 25, 100, 250, 
    500, 1000, 1e+06), c(0, 25, 100, 250, 500, 1e+06))), row.names = c(NA, 
-2L), class = c("data.table", "data.frame"))

library(dplyr)
dat_in_two <- structure(list(rn = c("Type_A", "Type_B"
), `[0,25) East` = c(5L, 0L), `[25,50) South` = c(0L, 0L), `[25,100) West` = c(38L, 
3L), `[50,100) North` = c(0L, 0L), `[100,250) East` = c(43L, 5L), `[100,500) South` = c(0L, 
0L), `[250,500) South` = c(27L, 12L), `[500,1000) South` = c(44L, 0L), `[1000,1500) South` = c(0L, 
0L), `[1500,3000) South` = c(0L, 0L), `[500,1000000] East` = c(0L, 53L), 
    `[1000,1000000] South` = c(20L, 0L), `[3000,1000000] South` = c(0L, 0L
    ), Sum_bin = c(177, 73), strata = list(c(0, 25, 100, 250, 
    500, 1000, 1e+06), c(0, 25, 100, 250, 500, 1e+06))), row.names = c(NA, 
-2L), class = c("data.table", "data.frame"))

问题

我想在下面调整此代码（从strata列提取数字并从中创建两个新列）：

dat_in_one %>%
  pivot_longer(-c(rn, strata)) %>%
  extract(name, c('lower', 'upper'), '(\\d+),(\\d+)', convert = TRUE)


# A tibble: 28 x 5
   rn     strata    lower upper value
   <chr>  <list>    <int> <int> <dbl>
 1 Type_A <dbl [7]>     0    25     5
 2 Type_A <dbl [7]>    25    50     0
 3 Type_A <dbl [7]>    25   100    38
 4 Type_A <dbl [7]>    50   100     0
 5 Type_A <dbl [7]>   100   250    43
 6 Type_A <dbl [7]>   100   500     0
# ... with 22 more rows

它当前忽略了east，south et ef dat_in_two中的术语。我想尝试调整此代码，同时处理dat_in_one和dat_in_two，其中dat_in_two它创建了第三列。我试图做，但根本不起作用。

dat_in_two %>%
  pivot_longer(-c(rn, strata)) %>%
  extract(name, c('lower', 'upper', 'the_rest'), '(\\d+),(\\d+),(\\w+)', convert = TRUE)

dat_in_one的期望结果

# A tibble: 28 x 5
   rn     strata    lower upper value
   <chr>  <list>    <int> <int> <dbl>
 1 Type_A <dbl [7]>     0    25     5
 2 Type_A <dbl [7]>    25    50     0
 3 Type_A <dbl [7]>    25   100    38
 4 Type_A <dbl [7]>    50   100     0
 5 Type_A <dbl [7]>   100   250    43
 6 Type_A <dbl [7]>   100   500     0
# ... with 22 more rows

dat_in_two的期望结果

# A tibble: 28 x 5
   rn     strata    lower upper rest  value
   <chr>  <list>    <int> <int> <char> <dbl>
 1 Type_A <dbl [7]>     0    25 East      5
 2 Type_A <dbl [7]>    25    50 South     0
 3 Type_A <dbl [7]>    25   100 West     38
 4 Type_A <dbl [7]>    50   100 North     0
 5 Type_A <dbl [7]>   100   250 East     43
 6 Type_A <dbl [7]>   100   500 South     0
# ... with 22 more rows

原文

I have data as follows:

Data

library(dplyr)
dat_in_one <- structure(list(rn = c("Type_A", "Type_B"
), `[0,25)` = c(5L, 0L), `[25,50)` = c(0L, 0L), `[25,100)` = c(38L, 
3L), `[50,100)` = c(0L, 0L), `[100,250)` = c(43L, 5L), `[100,500)` = c(0L, 
0L), `[250,500)` = c(27L, 12L), `[500,1000)` = c(44L, 0L), `[1000,1500)` = c(0L, 
0L), `[1500,3000)` = c(0L, 0L), `[500,1000000]` = c(0L, 53L), 
    `[1000,1000000]` = c(20L, 0L), `[3000,1000000]` = c(0L, 0L
    ), Sum_bin = c(177, 73), strata = list(c(0, 25, 100, 250, 
    500, 1000, 1e+06), c(0, 25, 100, 250, 500, 1e+06))), row.names = c(NA, 
-2L), class = c("data.table", "data.frame"))

library(dplyr)
dat_in_two <- structure(list(rn = c("Type_A", "Type_B"
), `[0,25) East` = c(5L, 0L), `[25,50) South` = c(0L, 0L), `[25,100) West` = c(38L, 
3L), `[50,100) North` = c(0L, 0L), `[100,250) East` = c(43L, 5L), `[100,500) South` = c(0L, 
0L), `[250,500) South` = c(27L, 12L), `[500,1000) South` = c(44L, 0L), `[1000,1500) South` = c(0L, 
0L), `[1500,3000) South` = c(0L, 0L), `[500,1000000] East` = c(0L, 53L), 
    `[1000,1000000] South` = c(20L, 0L), `[3000,1000000] South` = c(0L, 0L
    ), Sum_bin = c(177, 73), strata = list(c(0, 25, 100, 250, 
    500, 1000, 1e+06), c(0, 25, 100, 250, 500, 1e+06))), row.names = c(NA, 
-2L), class = c("data.table", "data.frame"))

Issue

I would like to adapt this piece of code below (which extracts the numbers from the strata column and creates two new columns from it):

dat_in_one %>%
  pivot_longer(-c(rn, strata)) %>%
  extract(name, c('lower', 'upper'), '(\\d+),(\\d+)', convert = TRUE)


# A tibble: 28 x 5
   rn     strata    lower upper value
   <chr>  <list>    <int> <int> <dbl>
 1 Type_A <dbl [7]>     0    25     5
 2 Type_A <dbl [7]>    25    50     0
 3 Type_A <dbl [7]>    25   100    38
 4 Type_A <dbl [7]>    50   100     0
 5 Type_A <dbl [7]>   100   250    43
 6 Type_A <dbl [7]>   100   500     0
# ... with 22 more rows

It currently ignores the terms East, South etc in dat_in_two. I would like to try to adapt this code, to work on both dat_in_one and dat_in_two, where for dat_in_two it creates a third column. I tried to do, but it does not work at all.

dat_in_two %>%
  pivot_longer(-c(rn, strata)) %>%
  extract(name, c('lower', 'upper', 'the_rest'), '(\\d+),(\\d+),(\\w+)', convert = TRUE)

Desired outcome for dat_in_one

# A tibble: 28 x 5
   rn     strata    lower upper value
   <chr>  <list>    <int> <int> <dbl>
 1 Type_A <dbl [7]>     0    25     5
 2 Type_A <dbl [7]>    25    50     0
 3 Type_A <dbl [7]>    25   100    38
 4 Type_A <dbl [7]>    50   100     0
 5 Type_A <dbl [7]>   100   250    43
 6 Type_A <dbl [7]>   100   500     0
# ... with 22 more rows

Desired outcome for dat_in_two

# A tibble: 28 x 5
   rn     strata    lower upper rest  value
   <chr>  <list>    <int> <int> <char> <dbl>
 1 Type_A <dbl [7]>     0    25 East      5
 2 Type_A <dbl [7]>    25    50 South     0
 3 Type_A <dbl [7]>    25   100 West     38
 4 Type_A <dbl [7]>    50   100 North     0
 5 Type_A <dbl [7]>   100   250 East     43
 6 Type_A <dbl [7]>   100   500 South     0
# ... with 22 more rows

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

流星番茄 2025-02-02 13:27:21

怎么样：

dat_in_one <- structure(list(rn = c("Type_A", "Type_B"
), `[0,25)` = c(5L, 0L), `[25,50)` = c(0L, 0L), `[25,100)` = c(38L, 
                                                               3L), `[50,100)` = c(0L, 0L), `[100,250)` = c(43L, 5L), `[100,500)` = c(0L, 
                                                                                                                                      0L), `[250,500)` = c(27L, 12L), `[500,1000)` = c(44L, 0L), `[1000,1500)` = c(0L, 
                                                                                                                                                                                                                   0L), `[1500,3000)` = c(0L, 0L), `[500,1000000]` = c(0L, 53L), 
`[1000,1000000]` = c(20L, 0L), `[3000,1000000]` = c(0L, 0L
), Sum_bin = c(177, 73), strata = list(c(0, 25, 100, 250, 
                                         500, 1000, 1e+06), c(0, 25, 100, 250, 500, 1e+06))), row.names = c(NA, 
                                                                                                            -2L), class = c("data.table", "data.frame"))

dat_in_two <- structure(list(rn = c("Type_A", "Type_B"
), `[0,25) East` = c(5L, 0L), `[25,50) South` = c(0L, 0L), `[25,100) West` = c(38L, 
                                                                               3L), `[50,100) North` = c(0L, 0L), `[100,250) East` = c(43L, 5L), `[100,500) South` = c(0L, 
                                                                                                                                                                       0L), `[250,500) South` = c(27L, 12L), `[500,1000) South` = c(44L, 0L), `[1000,1500) South` = c(0L, 
                                                                                                                                                                                                                                                                      0L), `[1500,3000) South` = c(0L, 0L), `[500,1000000] East` = c(0L, 53L), 
`[1000,1000000] South` = c(20L, 0L), `[3000,1000000] South` = c(0L, 0L
), Sum_bin = c(177, 73), strata = list(c(0, 25, 100, 250, 
                                         500, 1000, 1e+06), c(0, 25, 100, 250, 500, 1e+06))), row.names = c(NA, 
                                                                                                            -2L), class = c("data.table", "data.frame"))


library(dplyr)
library(tidyr)

is_all_na <- function(x)all(is.na(x))

dat_in_two %>%
  pivot_longer(-c(rn, strata)) %>%
  extract(name, c('lower', 'upper', 'rest'), '(\\d+),(\\d+)[\\]\\)]\\s*(\\w*)', convert = TRUE)  %>% 
  select(-where(is_all_na))
#> # A tibble: 28 × 6
#>    rn     strata    lower upper rest  value
#>    <chr>  <list>    <int> <int> <chr> <dbl>
#>  1 Type_A <dbl [7]>     0    25 East      5
#>  2 Type_A <dbl [7]>    25    50 South     0
#>  3 Type_A <dbl [7]>    25   100 West     38
#>  4 Type_A <dbl [7]>    50   100 North     0
#>  5 Type_A <dbl [7]>   100   250 East     43
#>  6 Type_A <dbl [7]>   100   500 South     0
#>  7 Type_A <dbl [7]>   250   500 South    27
#>  8 Type_A <dbl [7]>   500  1000 South    44
#>  9 Type_A <dbl [7]>  1000  1500 South     0
#> 10 Type_A <dbl [7]>  1500  3000 South     0
#> # … with 18 more rows

dat_in_one %>%
  pivot_longer(-c(rn, strata)) %>%
  extract(name, c('lower', 'upper', 'rest'), '(\\d+),(\\d+)[\\]\\)]\\s*(\\w*)', convert = TRUE)  %>% 
  select(-where(is_all_na))
#> # A tibble: 28 × 5
#>    rn     strata    lower upper value
#>    <chr>  <list>    <int> <int> <dbl>
#>  1 Type_A <dbl [7]>     0    25     5
#>  2 Type_A <dbl [7]>    25    50     0
#>  3 Type_A <dbl [7]>    25   100    38
#>  4 Type_A <dbl [7]>    50   100     0
#>  5 Type_A <dbl [7]>   100   250    43
#>  6 Type_A <dbl [7]>   100   500     0
#>  7 Type_A <dbl [7]>   250   500    27
#>  8 Type_A <dbl [7]>   500  1000    44
#>  9 Type_A <dbl [7]>  1000  1500     0
#> 10 Type_A <dbl [7]>  1500  3000     0
#> # … with 18 more rows

^{在2022-05-04创建的 reprex package （v2.0.1）< /sup>}

How about this:

dat_in_one <- structure(list(rn = c("Type_A", "Type_B"
), `[0,25)` = c(5L, 0L), `[25,50)` = c(0L, 0L), `[25,100)` = c(38L, 
                                                               3L), `[50,100)` = c(0L, 0L), `[100,250)` = c(43L, 5L), `[100,500)` = c(0L, 
                                                                                                                                      0L), `[250,500)` = c(27L, 12L), `[500,1000)` = c(44L, 0L), `[1000,1500)` = c(0L, 
                                                                                                                                                                                                                   0L), `[1500,3000)` = c(0L, 0L), `[500,1000000]` = c(0L, 53L), 
`[1000,1000000]` = c(20L, 0L), `[3000,1000000]` = c(0L, 0L
), Sum_bin = c(177, 73), strata = list(c(0, 25, 100, 250, 
                                         500, 1000, 1e+06), c(0, 25, 100, 250, 500, 1e+06))), row.names = c(NA, 
                                                                                                            -2L), class = c("data.table", "data.frame"))

dat_in_two <- structure(list(rn = c("Type_A", "Type_B"
), `[0,25) East` = c(5L, 0L), `[25,50) South` = c(0L, 0L), `[25,100) West` = c(38L, 
                                                                               3L), `[50,100) North` = c(0L, 0L), `[100,250) East` = c(43L, 5L), `[100,500) South` = c(0L, 
                                                                                                                                                                       0L), `[250,500) South` = c(27L, 12L), `[500,1000) South` = c(44L, 0L), `[1000,1500) South` = c(0L, 
                                                                                                                                                                                                                                                                      0L), `[1500,3000) South` = c(0L, 0L), `[500,1000000] East` = c(0L, 53L), 
`[1000,1000000] South` = c(20L, 0L), `[3000,1000000] South` = c(0L, 0L
), Sum_bin = c(177, 73), strata = list(c(0, 25, 100, 250, 
                                         500, 1000, 1e+06), c(0, 25, 100, 250, 500, 1e+06))), row.names = c(NA, 
                                                                                                            -2L), class = c("data.table", "data.frame"))


library(dplyr)
library(tidyr)

is_all_na <- function(x)all(is.na(x))

dat_in_two %>%
  pivot_longer(-c(rn, strata)) %>%
  extract(name, c('lower', 'upper', 'rest'), '(\\d+),(\\d+)[\\]\\)]\\s*(\\w*)', convert = TRUE)  %>% 
  select(-where(is_all_na))
#> # A tibble: 28 × 6
#>    rn     strata    lower upper rest  value
#>    <chr>  <list>    <int> <int> <chr> <dbl>
#>  1 Type_A <dbl [7]>     0    25 East      5
#>  2 Type_A <dbl [7]>    25    50 South     0
#>  3 Type_A <dbl [7]>    25   100 West     38
#>  4 Type_A <dbl [7]>    50   100 North     0
#>  5 Type_A <dbl [7]>   100   250 East     43
#>  6 Type_A <dbl [7]>   100   500 South     0
#>  7 Type_A <dbl [7]>   250   500 South    27
#>  8 Type_A <dbl [7]>   500  1000 South    44
#>  9 Type_A <dbl [7]>  1000  1500 South     0
#> 10 Type_A <dbl [7]>  1500  3000 South     0
#> # … with 18 more rows

dat_in_one %>%
  pivot_longer(-c(rn, strata)) %>%
  extract(name, c('lower', 'upper', 'rest'), '(\\d+),(\\d+)[\\]\\)]\\s*(\\w*)', convert = TRUE)  %>% 
  select(-where(is_all_na))
#> # A tibble: 28 × 5
#>    rn     strata    lower upper value
#>    <chr>  <list>    <int> <int> <dbl>
#>  1 Type_A <dbl [7]>     0    25     5
#>  2 Type_A <dbl [7]>    25    50     0
#>  3 Type_A <dbl [7]>    25   100    38
#>  4 Type_A <dbl [7]>    50   100     0
#>  5 Type_A <dbl [7]>   100   250    43
#>  6 Type_A <dbl [7]>   100   500     0
#>  7 Type_A <dbl [7]>   250   500    27
#>  8 Type_A <dbl [7]>   500  1000    44
#>  9 Type_A <dbl [7]>  1000  1500     0
#> 10 Type_A <dbl [7]>  1500  3000     0
#> # … with 18 more rows

^{Created on 2022-05-04 by the reprex package (v2.0.1)}

回复收藏 0 原文

~没有更多了~