按空间拆分列

发布于 2025-01-10 18:23:28 字数 716 浏览 0 评论 0原文

DATA <- data.frame("V1" = c("SCORE : 9.931 5.092",
                            "SCORE : 6.00007 15.1248",
                            "SCORE : 1.0002 12.987532",
                            "SCORE : 3.1 3.98532"))
WANT <- data.frame("VAR1" = c(9.931, 6.00007, 1.0002, 3.1),
                   'VAR2' = c(5.092, 15.1248, 12.987532, 3.98532))

我所拥有的是像“数据”中所示输入的学生考试成绩数据,但我希望将其拆分,以便我所拥有的就像“想要”框架中显示的那样,其中第一个数字位于“VAR1”中,而第二个数字在“VAR2”中,忽略空格

我的尝试:

DATA[, c("VAR1", "VAR2") := trimws(V1, whitespace = ".*:\\s+")]

产生: 输入图片此处描述

DATA <- data.frame("V1" = c("SCORE : 9.931 5.092",
                            "SCORE : 6.00007 15.1248",
                            "SCORE : 1.0002 12.987532",
                            "SCORE : 3.1 3.98532"))
WANT <- data.frame("VAR1" = c(9.931, 6.00007, 1.0002, 3.1),
                   'VAR2' = c(5.092, 15.1248, 12.987532, 3.98532))

What I have is student test score data that was entered like shown in 'DATA' but I wish to split it so that what I have is like what is shown in the 'WANT' frame where the first number is in 'VAR1' and the second number is in 'VAR2' ignoring the spaces

MY ATTEMPT:

DATA[, c("VAR1", "VAR2") := trimws(V1, whitespace = ".*:\\s+")]

PRODUCE:
enter image description here

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

全部不再 2025-01-17 18:23:29

我们可以使用 trimws 删除前缀子字符串,并使用 read.tablesep 作为默认空间来读取列,以返回两列数据.frame in base R

read.table(text = trimws(DATA$V1, whitespace = ".*:\\s+"), 
   header = FALSE, col.names = c("VAR1", "VAR2"))
     VAR1     VAR2
1 9.93100  5.09200
2 6.00007 15.12480
3 1.00020 12.98753
4 3.10000  3.98532

或者可以使用 extract from tidyr

library(tidyr)
extract(DATA, V1, into = c("VAR1", "VAR2"),
    ".*:\\s+([0-9.]+)\\s+([0-9.]+)", convert = TRUE)
     VAR1     VAR2
1 9.93100  5.09200
2 6.00007 15.12480
3 1.00020 12.98753
4 3.10000  3.98532

如果我们想要data.table,使用同样的方法可以在删除后用fread读取前缀子字符串

library(data.table)
fread(text = setDT(DATA)[, trimws(V1, whitespace = ".*:\\s+")], 
   col.names = c("VAR1", "VAR2"))
      VAR1     VAR2
     <num>    <num>
1: 9.93100  5.09200
2: 6.00007 15.12480
3: 1.00020 12.98753
4: 3.10000  3.98532

或者使用 fread 中的 select 选项

fread(text = DATA$V1, select = c(3, 4), col.names = c("VAR1", "VAR2"))
     VAR1     VAR2
     <num>    <num>
1: 9.93100  5.09200
2: 6.00007 15.12480
3: 1.00020 12.98753
4: 3.10000  3.98532

或者读取为四列,和子集

fread(text = DATA$V1)[, .(VAR1 = V3, VAR2 = V4)]
     VAR1     VAR2
     <num>    <num>
1: 9.93100  5.09200
2: 6.00007 15.12480
3: 1.00020 12.98753
4: 3.10000  3.98532

或者可以使用 tstrsplit

setDT(DATA)[, c("VAR1", "VAR2") := tstrsplit(trimws(V1, 
       whitespace = ".*:\\s+"), " ")]
DATA <- type.convert(DATA, as.is = TRUE)
DATA
                         V1    VAR1     VAR2
                     <char>   <num>    <num>
1:      SCORE : 9.931 5.092 9.93100  5.09200
2:  SCORE : 6.00007 15.1248 6.00007 15.12480
3: SCORE : 1.0002 12.987532 1.00020 12.98753
4:      SCORE : 3.1 3.98532 3.10000  3.98532

We can remove the prefix substring with trimws and read the column using read.table with sep as default space to return two column data.frame in base R

read.table(text = trimws(DATA$V1, whitespace = ".*:\\s+"), 
   header = FALSE, col.names = c("VAR1", "VAR2"))
     VAR1     VAR2
1 9.93100  5.09200
2 6.00007 15.12480
3 1.00020 12.98753
4 3.10000  3.98532

Or may use extract from tidyr

library(tidyr)
extract(DATA, V1, into = c("VAR1", "VAR2"),
    ".*:\\s+([0-9.]+)\\s+([0-9.]+)", convert = TRUE)
     VAR1     VAR2
1 9.93100  5.09200
2 6.00007 15.12480
3 1.00020 12.98753
4 3.10000  3.98532

If we want data.table, using the same method can read with fread after removing the prefix substring

library(data.table)
fread(text = setDT(DATA)[, trimws(V1, whitespace = ".*:\\s+")], 
   col.names = c("VAR1", "VAR2"))
      VAR1     VAR2
     <num>    <num>
1: 9.93100  5.09200
2: 6.00007 15.12480
3: 1.00020 12.98753
4: 3.10000  3.98532

Or use the select option in fread

fread(text = DATA$V1, select = c(3, 4), col.names = c("VAR1", "VAR2"))
     VAR1     VAR2
     <num>    <num>
1: 9.93100  5.09200
2: 6.00007 15.12480
3: 1.00020 12.98753
4: 3.10000  3.98532

Or read as four columns, and subset

fread(text = DATA$V1)[, .(VAR1 = V3, VAR2 = V4)]
     VAR1     VAR2
     <num>    <num>
1: 9.93100  5.09200
2: 6.00007 15.12480
3: 1.00020 12.98753
4: 3.10000  3.98532

Or can use tstrsplit

setDT(DATA)[, c("VAR1", "VAR2") := tstrsplit(trimws(V1, 
       whitespace = ".*:\\s+"), " ")]
DATA <- type.convert(DATA, as.is = TRUE)
DATA
                         V1    VAR1     VAR2
                     <char>   <num>    <num>
1:      SCORE : 9.931 5.092 9.93100  5.09200
2:  SCORE : 6.00007 15.1248 6.00007 15.12480
3: SCORE : 1.0002 12.987532 1.00020 12.98753
4:      SCORE : 3.1 3.98532 3.10000  3.98532
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文