根据来自R的另一个DF2的信息添加列到数据框中

发布于 2025-01-21 17:26:19 字数 1833 浏览 0 评论 0原文

让我们说我有2个类似的数据帧:

Model <- c("H5", "H5", "H5","H4","H3")
Code <- c("001001", "001002","001003","001004","001005")
City <-  c("Mexico", "London", "NY", "Otawa", "Liverpool")

df1 <- data.frame(Model,Length,Code)


Model   Code       City
H5      001001     Mexico  
H5      001002     London
H5      001003     NY
H4      001004     Otawa
H3      001005     Liverpool

X <- c("030299", "010121","030448","030324","010245","001001", "001002","001003","001004","001005")
Y <- c("030344", "010222","030448","030001","010245","221001", "221044","221044","221004"," 001005")
Var1 <- c("H5", "H5", "H4","H4","H4","H5", "H5", "H5","H4","H3")
Var2 <- c("H4", "H2", "H4","H3","H4","H3", "H3", "H3","H3","H3")

  df2 <- data.frame(X,Y,Var1,Var2)

  X            Y     VAR1   VAR2
030299      030344    H5     H4
010121      010222    H5     H2
030448      030448    H4     H4
030324      030001    H4     H3
010245      010245    H4     H4
001001      221001    H5     H3
001002      221044    H5     H3
001003      221044    H5     H3
001004      221004    H4     H3
001005      001005    H3     H3

想编码以下:

例如,如果我选择h3作为函数中的参数,我想从DF1中的“代码”列中获取所有值,请考虑其相应的值“模型”列并根据DF2信息从“代码”列转换这些值。例如,如果我们从DF1选择第一行,并将H3设置为参数:

  H5      001001    Mexico 

函数必须从DF2中进行相应的行:

   X            Y     VAR1   VAR2
 001001      221001    H5     H3

给我这样的输出:

   X            Y    VAR2  City   
 001001      221001   H3   Mexico   

最终输出应像这样:

  X            Y     VAR2   City 

001001      221001    H3   Mexico  
001002      221044    H3   London  
001003      221044    H3   NY
001004      221004    H3   Otawa  
001005      221056    H3   Liverpool 

lets, say I have 2 dataframes like this:

Model <- c("H5", "H5", "H5","H4","H3")
Code <- c("001001", "001002","001003","001004","001005")
City <-  c("Mexico", "London", "NY", "Otawa", "Liverpool")

df1 <- data.frame(Model,Length,Code)


Model   Code       City
H5      001001     Mexico  
H5      001002     London
H5      001003     NY
H4      001004     Otawa
H3      001005     Liverpool

And

X <- c("030299", "010121","030448","030324","010245","001001", "001002","001003","001004","001005")
Y <- c("030344", "010222","030448","030001","010245","221001", "221044","221044","221004"," 001005")
Var1 <- c("H5", "H5", "H4","H4","H4","H5", "H5", "H5","H4","H3")
Var2 <- c("H4", "H2", "H4","H3","H4","H3", "H3", "H3","H3","H3")

  df2 <- data.frame(X,Y,Var1,Var2)

  X            Y     VAR1   VAR2
030299      030344    H5     H4
010121      010222    H5     H2
030448      030448    H4     H4
030324      030001    H4     H3
010245      010245    H4     H4
001001      221001    H5     H3
001002      221044    H5     H3
001003      221044    H5     H3
001004      221004    H4     H3
001005      001005    H3     H3

I want to code following:

For example if I select H3 as an argument in function, I want to take all values from 'Code' column in df1, take into account its corresponding value in 'Model' column and convert these value from 'Code' column based on df2 information. For example if we select the first row from df1 and set H3 as argument:

  H5      001001    Mexico 

function must take corresponding row from df2:

   X            Y     VAR1   VAR2
 001001      221001    H5     H3

and give me the output like this:

   X            Y    VAR2  City   
 001001      221001   H3   Mexico   

The final output should be like this:

  X            Y     VAR2   City 

001001      221001    H3   Mexico  
001002      221044    H3   London  
001003      221044    H3   NY
001004      221004    H3   Otawa  
001005      221056    H3   Liverpool 

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(4

秉烛思 2025-01-28 17:26:19

也许一开始,这重现了您的示例的结果。

df2 %>% 
  left_join(df1, by = c( "Var1" = "Model", "X" = "Code")) %>% 
  filter(Var2 == "H3", !is.na(City)) %>% 
  select(-Var1)

       X       Y Var2      City
1 001001  221001   H3    Mexico
2 001002  221044   H3    London
3 001003  221044   H3        NY
4 001004  221004   H3     Otawa
5 001005  001005   H3 Liverpool

Maybe something to begin with, this reproduces the result of your example.

df2 %>% 
  left_join(df1, by = c( "Var1" = "Model", "X" = "Code")) %>% 
  filter(Var2 == "H3", !is.na(City)) %>% 
  select(-Var1)

       X       Y Var2      City
1 001001  221001   H3    Mexico
2 001002  221044   H3    London
3 001003  221044   H3        NY
4 001004  221004   H3     Otawa
5 001005  001005   H3 Liverpool
清醇 2025-01-28 17:26:19

遵循您的逻辑,我尝试使用base R:创建一个自定义功能:
它需要3个参数:df1,df2,x x是您要计算的行数。因此,您可以按照示例中的解释选择所有列或一列。

my_function <- function(df1, df2, x){
select_row <- df1[x,]
cbind(df2[X==select_row[,2],c(1:2, 4)],select_row[3])
}

my_function(df1, df2, 1:5)
        X       Y Var2      City
6  001001  221001   H3    Mexico
7  001002  221044   H3    London
8  001003  221044   H3        NY
9  001004  221004   H3     Otawa
10 001005  001005   H3 Liverpool

Following your logic I tried to create a custom function with base R:
It takes 3 arguments: df1, df2, x x is the number of rows you want to calculate. So you can select all columns or just one as you explained in your example.

my_function <- function(df1, df2, x){
select_row <- df1[x,]
cbind(df2[X==select_row[,2],c(1:2, 4)],select_row[3])
}

my_function(df1, df2, 1:5)
        X       Y Var2      City
6  001001  221001   H3    Mexico
7  001002  221044   H3    London
8  001003  221044   H3        NY
9  001004  221004   H3     Otawa
10 001005  001005   H3 Liverpool
嘿哥们儿 2025-01-28 17:26:19

像这样?

library(data.table)
setDT(df1);setDT(df2)
df2[df1, on = .(Var1 = Model, X = Code)]
#         X       Y Var1 Var2      City
# 1: 001001  221001   H5   H3    Mexico
# 2: 001002  221044   H5   H3    London
# 3: 001003  221044   H5   H3        NY
# 4: 001004  221004   H4   H3     Otawa
# 5: 001005  001005   H3   H3 Liverpool

Like this?

library(data.table)
setDT(df1);setDT(df2)
df2[df1, on = .(Var1 = Model, X = Code)]
#         X       Y Var1 Var2      City
# 1: 001001  221001   H5   H3    Mexico
# 2: 001002  221044   H5   H3    London
# 3: 001003  221044   H5   H3        NY
# 4: 001004  221004   H4   H3     Otawa
# 5: 001005  001005   H3   H3 Liverpool
听,心雨的声音 2025-01-28 17:26:19

另一种方法:

数据

library(tidyverse)

Model <- c("H5", "H5", "H5", "H4", "H3")
Code <- c("001001", "001002", "001003", "001004", "001005")
City <- c("Mexico", "London", "NY", "Otawa", "Liverpool")

df1 <- data.frame(Model, Code, City)

X <- c("030299", "010121", "030448", "030324", "010245", "001001", "001002", "001003", "001004", "001005")
Y <- c("030344", "010222", "030448", "030001", "010245", "221001", "221044", "221044", "221004", " 001005")
Var1 <- c("H5", "H5", "H4", "H4", "H4", "H5", "H5", "H5", "H4", "H3")
Var2 <- c("H4", "H2", "H4", "H3", "H4", "H3", "H3", "H3", "H3", "H3")

df2 <- data.frame(X, Y, Var1, Var2)

函数

my_fun <- function(row, var2) {
  df1_data <- df1 %>% slice(row)
  df2 %>%
    filter(Var2 == var2 & X == df1_data$Code) %>%
    mutate(df1_data$City)
}

1:nrow(df1) %>%
  map_dfr(~ my_fun(.x, "H3"))
#>        X       Y Var1 Var2 df1_data$City
#> 1 001001  221001   H5   H3        Mexico
#> 2 001002  221044   H5   H3        London
#> 3 001003  221044   H5   H3            NY
#> 4 001004  221004   H4   H3         Otawa
#> 5 001005  001005   H3   H3     Liverpool

在2022-04-14创建的 reprex package (v2.0.1.1(v2.0.11) )

An alternative approach:

Data

library(tidyverse)

Model <- c("H5", "H5", "H5", "H4", "H3")
Code <- c("001001", "001002", "001003", "001004", "001005")
City <- c("Mexico", "London", "NY", "Otawa", "Liverpool")

df1 <- data.frame(Model, Code, City)

X <- c("030299", "010121", "030448", "030324", "010245", "001001", "001002", "001003", "001004", "001005")
Y <- c("030344", "010222", "030448", "030001", "010245", "221001", "221044", "221044", "221004", " 001005")
Var1 <- c("H5", "H5", "H4", "H4", "H4", "H5", "H5", "H5", "H4", "H3")
Var2 <- c("H4", "H2", "H4", "H3", "H4", "H3", "H3", "H3", "H3", "H3")

df2 <- data.frame(X, Y, Var1, Var2)

Function

my_fun <- function(row, var2) {
  df1_data <- df1 %>% slice(row)
  df2 %>%
    filter(Var2 == var2 & X == df1_data$Code) %>%
    mutate(df1_data$City)
}

1:nrow(df1) %>%
  map_dfr(~ my_fun(.x, "H3"))
#>        X       Y Var1 Var2 df1_data$City
#> 1 001001  221001   H5   H3        Mexico
#> 2 001002  221044   H5   H3        London
#> 3 001003  221044   H5   H3            NY
#> 4 001004  221004   H4   H3         Otawa
#> 5 001005  001005   H3   H3     Liverpool

Created on 2022-04-14 by the reprex package (v2.0.1)

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文