如何在 r 中使用 dplyr::distinct() 命令的外部变量列表?

发布于 2025-01-09 20:59:49 字数 3961 浏览 1 评论 0原文

如何在 r 中使用 dplyr::distinct() 命令的外部变量列表?

例如,我想使用以下变量的外部列表作为 mtcars 数据集的不同命令的基础:

## creates external_list_of_vars_df
# ---- NOTE: creates object
external_list_of_vars_df <- 
    data.frame(
      external_list_of_vars_df = 
        c("gear", "carb", "am")
      )
# ---- NOTE: turns object into tibble
external_list_of_vars_df <- 
  as_tibble(external_list_of_vars_df)
# ---- NOTE: displays data
external_list_of_vars_df
> external_list_of_vars_df
# A tibble: 3 × 1
  external_list_of_vars_df
  <chr>                   
1 gear                    
2 carb                    
3 am   

我可以使用长方法,这需要手动输入感兴趣的变量,以完成此任务:

> mtcars_distinct_df_long
# A tibble: 13 × 11
     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1  21       6 160     110  3.9   2.62  16.5     0     1     4     4
 2  22.8     4 108      93  3.85  2.32  18.6     1     1     4     1
 3  21.4     6 258     110  3.08  3.22  19.4     1     0     3     1
 4  18.7     8 360     175  3.15  3.44  17.0     0     0     3     2
 5  14.3     8 360     245  3.21  3.57  15.8     0     0     3     4
 6  24.4     4 147.     62  3.69  3.19  20       1     0     4     2
 7  19.2     6 168.    123  3.92  3.44  18.3     1     0     4     4
 8  16.4     8 276.    180  3.07  4.07  17.4     0     0     3     3
 9  30.4     4  75.7    52  4.93  1.62  18.5     1     1     4     2
10  26       4 120.     91  4.43  2.14  16.7     0     1     5     2
11  15.8     8 351     264  4.22  3.17  14.5     0     1     5     4
12  19.7     6 145     175  3.62  2.77  15.5     0     1     5     6
13  15       8 301     335  3.54  3.57  14.6     0     1     5     8

当我尝试使用快捷方式时,它不起作用:

## my short way to create mtcars_distinct_df_external, by inputting variables manually
# ---- NOTE: creates object
mtcars_distinct_df_external <- 
  as_tibble(
    mtcars %>% 
      distinct(vars(external_list_of_vars_df$external_list_of_vars_df), .keep_all = TRUE)
  )
# ---- NOTE: displays data
mtcars_distinct_df_external
> mtcars_distinct_df_external
# A tibble: 1 × 12
    mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb `vars(external_list_of_vars_df$external_li…`
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <quos>                                      
1    21     6   160   110   3.9  2.62  16.5     0     1     4     4 external_list_of_vars_df$external_list_of_v…
> # ---- NOTE: does not work

此任务可能吗?如果是这样,怎么办?

提前致谢。



这是我用来生成示例的代码:

# how to use external list of vars for dplyr::distinct() cammand

## loads package(s)
if(!require(tidyverse)){install.packages("tidyverse")}

## data for example
mtcars

## creates external_list_of_vars_df
# ---- NOTE: creates object
external_list_of_vars_df <- 
    data.frame(
      external_list_of_vars_df = 
        c("gear", "carb", "am")
      )
# ---- NOTE: turns object into tibble
external_list_of_vars_df <- 
  as_tibble(external_list_of_vars_df)
# ---- NOTE: displays data
external_list_of_vars_df

## long way to create mtcars_distinct_df, by inputting variables manually
# ---- NOTE: creates object
mtcars_distinct_df_long <- 
  as_tibble(
    mtcars %>% 
      distinct(gear, carb, am, .keep_all = TRUE)
  )
# ---- NOTE: displays data
mtcars_distinct_df_long

## my short way to create mtcars_distinct_df_external, by inputting variables manually
# ---- NOTE: creates object
mtcars_distinct_df_external <- 
  as_tibble(
    mtcars %>% 
      distinct(vars(external_list_of_vars_df$external_list_of_vars_df), .keep_all = TRUE)
  )
# ---- NOTE: displays data
mtcars_distinct_df_external
# ---- NOTE: does not work

How does one use an external list of variables for dplyr::distinct() command in r?

For example, I want to use an external list of the following variables as the basis of a distinct command for the mtcars dataset:

## creates external_list_of_vars_df
# ---- NOTE: creates object
external_list_of_vars_df <- 
    data.frame(
      external_list_of_vars_df = 
        c("gear", "carb", "am")
      )
# ---- NOTE: turns object into tibble
external_list_of_vars_df <- 
  as_tibble(external_list_of_vars_df)
# ---- NOTE: displays data
external_list_of_vars_df
> external_list_of_vars_df
# A tibble: 3 × 1
  external_list_of_vars_df
  <chr>                   
1 gear                    
2 carb                    
3 am   

I can use the long way, which requires inputting the variables of interest manually, to accomplish this task:

> mtcars_distinct_df_long
# A tibble: 13 × 11
     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1  21       6 160     110  3.9   2.62  16.5     0     1     4     4
 2  22.8     4 108      93  3.85  2.32  18.6     1     1     4     1
 3  21.4     6 258     110  3.08  3.22  19.4     1     0     3     1
 4  18.7     8 360     175  3.15  3.44  17.0     0     0     3     2
 5  14.3     8 360     245  3.21  3.57  15.8     0     0     3     4
 6  24.4     4 147.     62  3.69  3.19  20       1     0     4     2
 7  19.2     6 168.    123  3.92  3.44  18.3     1     0     4     4
 8  16.4     8 276.    180  3.07  4.07  17.4     0     0     3     3
 9  30.4     4  75.7    52  4.93  1.62  18.5     1     1     4     2
10  26       4 120.     91  4.43  2.14  16.7     0     1     5     2
11  15.8     8 351     264  4.22  3.17  14.5     0     1     5     4
12  19.7     6 145     175  3.62  2.77  15.5     0     1     5     6
13  15       8 301     335  3.54  3.57  14.6     0     1     5     8

When I try to use the shortcut, it does not work:

## my short way to create mtcars_distinct_df_external, by inputting variables manually
# ---- NOTE: creates object
mtcars_distinct_df_external <- 
  as_tibble(
    mtcars %>% 
      distinct(vars(external_list_of_vars_df$external_list_of_vars_df), .keep_all = TRUE)
  )
# ---- NOTE: displays data
mtcars_distinct_df_external
> mtcars_distinct_df_external
# A tibble: 1 × 12
    mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb `vars(external_list_of_vars_df$external_li…`
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <quos>                                      
1    21     6   160   110   3.9  2.62  16.5     0     1     4     4 external_list_of_vars_df$external_list_of_v…
> # ---- NOTE: does not work

Is this task possible? If so, how?

Thanks ahead of time.



Here is the code I used to generate the example:

# how to use external list of vars for dplyr::distinct() cammand

## loads package(s)
if(!require(tidyverse)){install.packages("tidyverse")}

## data for example
mtcars

## creates external_list_of_vars_df
# ---- NOTE: creates object
external_list_of_vars_df <- 
    data.frame(
      external_list_of_vars_df = 
        c("gear", "carb", "am")
      )
# ---- NOTE: turns object into tibble
external_list_of_vars_df <- 
  as_tibble(external_list_of_vars_df)
# ---- NOTE: displays data
external_list_of_vars_df

## long way to create mtcars_distinct_df, by inputting variables manually
# ---- NOTE: creates object
mtcars_distinct_df_long <- 
  as_tibble(
    mtcars %>% 
      distinct(gear, carb, am, .keep_all = TRUE)
  )
# ---- NOTE: displays data
mtcars_distinct_df_long

## my short way to create mtcars_distinct_df_external, by inputting variables manually
# ---- NOTE: creates object
mtcars_distinct_df_external <- 
  as_tibble(
    mtcars %>% 
      distinct(vars(external_list_of_vars_df$external_list_of_vars_df), .keep_all = TRUE)
  )
# ---- NOTE: displays data
mtcars_distinct_df_external
# ---- NOTE: does not work

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(2

笛声青案梦长安 2025-01-16 20:59:49

有两种方法可以在 dplyr 动词内使用变量名的外部向量。

  1. 使用 across(all_of())
library(dplyr)

external_list_of_vars <- c("gear", "carb", "am")

mtcars %>%
  distinct(across(all_of(external_list_of_vars)), .keep_all = TRUE)
  1. 使用整洁的评估 - 具体来说,unquote-splice 运算符 !!!
mtcars %>%
  distinct(!!!syms(external_list_of_vars), .keep_all = TRUE)

我不清楚您的名称向量是否已经在数据框中,或者这是否只是您尝试解决问题的一部分。如果是前者,您可以将我的代码中的 external_list_of_vars 替换为 external_list_of_vars_df$external_list_of_vars_df

There are two ways to use an external vector of variable names inside dplyr verbs.

  1. Using across(all_of()):
library(dplyr)

external_list_of_vars <- c("gear", "carb", "am")

mtcars %>%
  distinct(across(all_of(external_list_of_vars)), .keep_all = TRUE)
  1. Using tidy evaluation — specifically, the unquote-splice operator !!!:
mtcars %>%
  distinct(!!!syms(external_list_of_vars), .keep_all = TRUE)

It wasn’t clear to me if your names vector is already inside a dataframe, or if that was just part of your attempt to solve the problem. If the former, you can replace external_list_of_vars in my code with external_list_of_vars_df$external_list_of_vars_df.

荒人说梦 2025-01-16 20:59:49
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

mtcars %>% 
  as_tibble() %>% 
  distinct(gear, carb, am, .keep_all = TRUE)
#> # A tibble: 13 x 11
#>      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1  21       6 160     110  3.9   2.62  16.5     0     1     4     4
#>  2  22.8     4 108      93  3.85  2.32  18.6     1     1     4     1
#>  3  21.4     6 258     110  3.08  3.22  19.4     1     0     3     1
#>  4  18.7     8 360     175  3.15  3.44  17.0     0     0     3     2
#>  5  14.3     8 360     245  3.21  3.57  15.8     0     0     3     4
#>  6  24.4     4 147.     62  3.69  3.19  20       1     0     4     2
#>  7  19.2     6 168.    123  3.92  3.44  18.3     1     0     4     4
#>  8  16.4     8 276.    180  3.07  4.07  17.4     0     0     3     3
#>  9  30.4     4  75.7    52  4.93  1.62  18.5     1     1     4     2
#> 10  26       4 120.     91  4.43  2.14  16.7     0     1     5     2
#> 11  15.8     8 351     264  4.22  3.17  14.5     0     1     5     4
#> 12  19.7     6 145     175  3.62  2.77  15.5     0     1     5     6
#> 13  15       8 301     335  3.54  3.57  14.6     0     1     5     8

reprex 软件包 (v2.0.1) 创建于 2022 年 2 月 25 日

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

mtcars %>% 
  as_tibble() %>% 
  distinct(gear, carb, am, .keep_all = TRUE)
#> # A tibble: 13 x 11
#>      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1  21       6 160     110  3.9   2.62  16.5     0     1     4     4
#>  2  22.8     4 108      93  3.85  2.32  18.6     1     1     4     1
#>  3  21.4     6 258     110  3.08  3.22  19.4     1     0     3     1
#>  4  18.7     8 360     175  3.15  3.44  17.0     0     0     3     2
#>  5  14.3     8 360     245  3.21  3.57  15.8     0     0     3     4
#>  6  24.4     4 147.     62  3.69  3.19  20       1     0     4     2
#>  7  19.2     6 168.    123  3.92  3.44  18.3     1     0     4     4
#>  8  16.4     8 276.    180  3.07  4.07  17.4     0     0     3     3
#>  9  30.4     4  75.7    52  4.93  1.62  18.5     1     1     4     2
#> 10  26       4 120.     91  4.43  2.14  16.7     0     1     5     2
#> 11  15.8     8 351     264  4.22  3.17  14.5     0     1     5     4
#> 12  19.7     6 145     175  3.62  2.77  15.5     0     1     5     6
#> 13  15       8 301     335  3.54  3.57  14.6     0     1     5     8

Created on 2022-02-25 by the reprex package (v2.0.1)

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文