可以删除Na值和剪切功能无法正常工作

发布于 2025-02-11 05:09:31 字数 8357 浏览 0 评论 0原文

除了价格列中的一个,我从数据中删除了所有NA,无论我尝试什么,我都无法删除。我尝试了na.omit(new_baltimore $ price),这给了我错误

Error in `$<-.data.frame`(`*tmp*`, PRICE, value = c(47, 113, 165, 104.3,  : 
  replacement has 204 rows, data has 205

drop_na(new_baltimore,price,price)从tidyr软件包中,它们不起作用。价格最初是一个字符向量,但我将其变成了数字,class(new_baltimore $ price)返回数值,没问题。我怀疑NA正在引起我的问​​题,因为可以将数据削减为删除,因为

GROUP1 <- cut(1:nrow(new_baltimore), breaks=quantile(new_baltimore$AGE, probs = seq(0, 1, 1/5)), include.lowest=TRUE)

返回了充满NA的水平,因此

[1] [0,18]    [0,18]    [0,18]    [0,18]    [0,18]    [0,18]    [0,18]   
  [8] [0,18]    [0,18]    [0,18]    [0,18]    [0,18]    [0,18]    [0,18]   
 [15] [0,18]    [0,18]    [0,18]    [0,18]    (18,22.6] (18,22.6] (18,22.6]
 [22] (18,22.6] (22.6,28] (22.6,28] (22.6,28] (22.6,28] (22.6,28] (22.6,28]
 [29] (28,45]   (28,45]   (28,45]   (28,45]   (28,45]   (28,45]   (28,45]  
 [36] (28,45]   (28,45]   (28,45]   (28,45]   (28,45]   (28,45]   (28,45]  
 [43] (28,45]   (28,45]   (28,45]   (45,148]  (45,148]  (45,148]  (45,148] 
 [50] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
 [57] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
 [64] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
 [71] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
 [78] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
 [85] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
 [92] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
 [99] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
[106] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
[113] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
[120] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
[127] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
[134] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
[141] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
[148] (45,148]  <NA>      <NA>      <NA>      <NA>      <NA>      <NA>     
[155] <NA>      <NA>      <NA>      <NA>      <NA>      <NA>      <NA>     
[162] <NA>      <NA>      <NA>      <NA>      <NA>      <NA>      <NA>     
[169] <NA>      <NA>      <NA>      <NA>      <NA>      <NA>      <NA>     
[176] <NA>      <NA>      <NA>      <NA>      <NA>      <NA>      <NA>     
[183] <NA>      <NA>      <NA>      <NA>      <NA>      <NA>      <NA>     
[190] <NA>      <NA>      <NA>      <NA>      <NA>      <NA>      <NA>     
[197] <NA>      <NA>      <NA>      <NA>      <NA>      <NA>      <NA>     
[204] <NA>      <NA>     
Levels: [0,18] (18,22.6] (22.6,28] (28,45] (45,148]

可重复的数据就在下面。专栏价格,年龄加1列,因为该网站不允许我发布更多。任何帮助都将受到赞赏!

baltimore_struct <- structure(list(new_baltimore.PRICE = c(47, 113, 165, 104.3, 62.5, 
70, 127.5, 64.5, 145, 63.5, 58.9, 65, 48, 3.5, 12.8, 17.5, 36, 
41.9, 53.5, 24.5, 24.5, 55.5, 60, 51, 46, 46, 44, 54.9, 42.5, 
44, 44.9, 37.9, 33, 43.9, 49.6, 52, 37.5, 50, 35.9, 42.9, 107, 
112, 44.9, 55, 102, 35.5, 62.9, 39, 110, 8, 62, NA, 85.9, 57, 
110, 67.7, 89.5, 70, 74, 13, 48, 24, 53.5, 34.5, 53, 87.5, 33.5, 
24, 9.6, 30, 41, 30, 38.9, 20.7, 49.9, 18.6, 39, 34, 16, 18.9, 
15.2, 41.5, 53, 22, 24.9, 6.7, 32.5, 30, 59, 29.5, 26, 16.5, 
39, 48.9, 33.5, 46, 54, 57.9, 37.9, 32, 31, 34, 29, 32.5, 51.9, 
31, 41.8, 48, 28, 35, 46.5, 51.9, 35.4, 16, 35, 35, 36.5, 35.9, 
45, 40, 35, 38, 37, 23, 25.5, 39.5, 21.5, 9, 67.5, 13.4, 12.5, 
28.5, 23, 33.5, 9, 11, 30.9, 31.65, 33, 33.4, 47, 40, 46, 45.5, 
57, 29.9, 30, 34, 51, 64.5, 57.5, 85.5, 61, 38, 56.5, 60.4, 51.5, 
54, 69, 56, 27.9, 37.5, 32.9, 22, 29.9, 39.9, 32.6, 38.5, 21.5, 
25.9, 27.5, 22.9, 31.5, 8.5, 5.5, 33, 57, 47, 43.5, 43.9, 68.5, 
44.25, 61, 40, 44.5, 57, 35, 35.1, 64.5, 40, 42.6, 50, 58, 58, 
55, 43, 54, 39, 45, 42, 38.9, 43.215, 26.5, 30, 29.5), new_baltimore.AGE = c(148L, 
9L, 23L, 5L, 19L, 20L, 20L, 22L, 4L, 23L, 20L, 30L, 18L, 75L, 
60L, 65L, 14L, 45L, 14L, 22L, 35L, 5L, 60L, 14L, 19L, 11L, 16L, 
19L, 17L, 24L, 22L, 27L, 3L, 21L, 20L, 4L, 40L, 23L, 35L, 25L, 
17L, 26L, 15L, 29L, 24L, 30L, 19L, 50L, 18L, 74L, 22L, 80L, 24L, 
20L, 7L, 47L, 50L, 45L, 48L, 50L, 48L, 55L, 27L, 20L, 33L, 40L, 
25L, 25L, 40L, 30L, 40L, 22L, 25L, 29L, 49L, 35L, 55L, 30L, 15L, 
40L, 35L, 70L, 25L, 25L, 30L, 30L, 50L, 25L, 70L, 55L, 40L, 70L, 
20L, 20L, 25L, 18L, 20L, 2L, 8L, 25L, 18L, 30L, 35L, 21L, 20L, 
24L, 13L, 25L, 18L, 30L, 20L, 22L, 28L, 50L, 20L, 38L, 17L, 22L, 
27L, 25L, 25L, 25L, 30L, 60L, 22L, 30L, 28L, 45L, 100L, 60L, 
50L, 35L, 50L, 24L, 50L, 33L, 40L, 50L, 25L, 48L, 10L, 45L, 20L, 
25L, 25L, 22L, 21L, 29L, 18L, 2L, 19L, 49L, 10L, 25L, 16L, 17L, 
27L, 34L, 2L, 24L, 23L, 40L, 25L, 45L, 26L, 37L, 15L, 22L, 75L, 
28L, 31L, 100L, 15L, 80L, 75L, 23L, 15L, 21L, 2L, 25L, 23L, 0L, 
4L, 40L, 55L, 21L, 45L, 50L, 5L, 50L, 22L, 22L, 6L, 18L, 18L, 
23L, 3L, 1L, 47L, 21L, 29L, 0L, 29L, 24L, 22L), new_baltimore.SQFT = c(1.0451592, 
2.6867559168, 2.8446910848, 2.4266274048, 2.0475830016, 3.6622378368, 
2.0327185152, 2.378317824, 4.0988821248, 1.8469124352, 1.1222687232, 
1.0210044096, 1.189158912, 2.7675815616, 1.328513472, 1.2746297088, 
1.0999719936, 1.6778289024, 0.9959205888, 0.8324112384, 1.3359457152, 
3.41418672, 1.8580608, 2.1200473728, 2.3095695744, 1.783738368, 
1.0758172032, 2.41547904, 1.337803776, 1.0795333248, 2.1442021632, 
2.2073762304, 1.449287424, 0.9290304, 2.118189312, 1.5570549504, 
2.053157184, 1.3266554112, 1.4269906944, 1.48644864, 2.1404860416, 
2.3170018176, 1.0981139328, 1.1965911552, 1.040514048, 1.6834030848, 
3.55354128, 1.6425257472, 1.7670158208, 3.047219712, 1.4084100864, 
2.3300082432, 2.041544304, 1.170578304, 2.1850795008, 1.6276612608, 
4.4231137344, 1.909157472, 3.2999159808, 0.780385536, 1.2709135872, 
1.3452360192, 1.189158912, 1.189158912, 1.67225472, 1.430706816, 
0.9364626432, 0.8324112384, 0.8324112384, 1.8580608, 1.1965911552, 
1.11483648, 1.6871192064, 1.3266554112, 2.41547904, 1.1166945408, 
1.932383232, 1.0943978112, 0.8063983872, 1.635093504, 1.059094656, 
4.138830432, 4.3032688128, 0.9513271296, 0.891869184, 2.898574848, 
2.452640256, 1.263481344, 2.5529755392, 1.6592482944, 1.6759708416, 
1.3786811136, 0.9717657984, 1.3526682624, 0.6466051584, 0.88257888, 
1.1018300544, 1.1965911552, 1.1445654528, 0.6243084288, 0.9364626432, 
1.449287424, 0.6243084288, 1.0702430208, 1.0925397504, 0.9513271296, 
1.0702430208, 0.8621402112, 0.6243084288, 1.449287424, 1.43999712, 
0.9141659136, 1.449287424, 1.2783458304, 0.9513271296, 0.5351215104, 
0.9364626432, 1.0702430208, 1.128771936, 0.9076627008, 1.3935456, 
1.337803776, 1.34709408, 2.0940345216, 0.9513271296, 0.724643712, 
0.780385536, 1.0145011968, 3.985540416, 0.83612736, 0.97548192, 
0.9364626432, 1.170578304, 0.8324112384, 0.7971080832, 0.7023469824, 
1.003352832, 1.2486168576, 0.9513271296, 1.3415198976, 1.1371332096, 
1.226320128, 0.891869184, 1.4139842688, 2.2445374464, 0.9513271296, 
0.9513271296, 0.9178820352, 2.155350528, 1.6425257472, 2.257543872, 
3.3389352576, 2.006705664, 1.0237915008, 1.95096384, 2.2222407168, 
1.337803776, 2.60128512, 1.0628107776, 2.0382926976, 0.9513271296, 
1.5663452544, 0.9215981568, 1.2486168576, 1.11483648, 1.3712488704, 
0.8324112384, 1.0702430208, 0.8026822656, 0.7543726848, 1.0330818048, 
1.0479462912, 0.9624754944, 1.0702430208, 1.588641984, 1.6276612608, 
0.9968496192, 1.040514048, 1.189158912, 1.11483648, 3.8155278528, 
1.189158912, 2.0773119744, 0.9810561024, 1.2486168576, 1.0237915008, 
1.6703966592, 1.7540093952, 1.0925397504, 0.8695724544, 1.0702430208, 
2.536252992, 2.1404860416, 1.6425257472, 1.2411846144, 1.077675264, 
1.0702430208, 0.9271723392, 1.2040233984, 1.0340108352, 1.820899584, 
1.0702430208, 1.1297009664, 0, 0.9884883456)), class = "data.frame", row.names = c(NA, 
-205L))

I have removed all NA's from my data, except for one in the PRICE column, which I can't remove no matter what I try. I tried na.omit(new_baltimore$PRICE) , which gives me the error

Error in `
lt;-.data.frame`(`*tmp*`, PRICE, value = c(47, 113, 165, 104.3,  : 
  replacement has 204 rows, data has 205

and drop_na(new_baltimore, PRICE) from the tidyr package, and they do not work. PRICE was a character vector originally, but I have turned it to numerical, and class(new_baltimore$PRICE) returns numerical, no problem. I suspect the NA is causing me issues with subsetting the data with the function cut, as

GROUP1 <- cut(1:nrow(new_baltimore), breaks=quantile(new_baltimore$AGE, probs = seq(0, 1, 1/5)), include.lowest=TRUE)

returns a level full of NA's, like this

[1] [0,18]    [0,18]    [0,18]    [0,18]    [0,18]    [0,18]    [0,18]   
  [8] [0,18]    [0,18]    [0,18]    [0,18]    [0,18]    [0,18]    [0,18]   
 [15] [0,18]    [0,18]    [0,18]    [0,18]    (18,22.6] (18,22.6] (18,22.6]
 [22] (18,22.6] (22.6,28] (22.6,28] (22.6,28] (22.6,28] (22.6,28] (22.6,28]
 [29] (28,45]   (28,45]   (28,45]   (28,45]   (28,45]   (28,45]   (28,45]  
 [36] (28,45]   (28,45]   (28,45]   (28,45]   (28,45]   (28,45]   (28,45]  
 [43] (28,45]   (28,45]   (28,45]   (45,148]  (45,148]  (45,148]  (45,148] 
 [50] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
 [57] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
 [64] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
 [71] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
 [78] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
 [85] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
 [92] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
 [99] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
[106] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
[113] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
[120] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
[127] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
[134] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
[141] (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148]  (45,148] 
[148] (45,148]  <NA>      <NA>      <NA>      <NA>      <NA>      <NA>     
[155] <NA>      <NA>      <NA>      <NA>      <NA>      <NA>      <NA>     
[162] <NA>      <NA>      <NA>      <NA>      <NA>      <NA>      <NA>     
[169] <NA>      <NA>      <NA>      <NA>      <NA>      <NA>      <NA>     
[176] <NA>      <NA>      <NA>      <NA>      <NA>      <NA>      <NA>     
[183] <NA>      <NA>      <NA>      <NA>      <NA>      <NA>      <NA>     
[190] <NA>      <NA>      <NA>      <NA>      <NA>      <NA>      <NA>     
[197] <NA>      <NA>      <NA>      <NA>      <NA>      <NA>      <NA>     
[204] <NA>      <NA>     
Levels: [0,18] (18,22.6] (22.6,28] (28,45] (45,148]

The reproducible data is right below. Columns PRICE, AGE plus 1 more column, as the site doesn't let me post more. Any help is appreciated!

baltimore_struct <- structure(list(new_baltimore.PRICE = c(47, 113, 165, 104.3, 62.5, 
70, 127.5, 64.5, 145, 63.5, 58.9, 65, 48, 3.5, 12.8, 17.5, 36, 
41.9, 53.5, 24.5, 24.5, 55.5, 60, 51, 46, 46, 44, 54.9, 42.5, 
44, 44.9, 37.9, 33, 43.9, 49.6, 52, 37.5, 50, 35.9, 42.9, 107, 
112, 44.9, 55, 102, 35.5, 62.9, 39, 110, 8, 62, NA, 85.9, 57, 
110, 67.7, 89.5, 70, 74, 13, 48, 24, 53.5, 34.5, 53, 87.5, 33.5, 
24, 9.6, 30, 41, 30, 38.9, 20.7, 49.9, 18.6, 39, 34, 16, 18.9, 
15.2, 41.5, 53, 22, 24.9, 6.7, 32.5, 30, 59, 29.5, 26, 16.5, 
39, 48.9, 33.5, 46, 54, 57.9, 37.9, 32, 31, 34, 29, 32.5, 51.9, 
31, 41.8, 48, 28, 35, 46.5, 51.9, 35.4, 16, 35, 35, 36.5, 35.9, 
45, 40, 35, 38, 37, 23, 25.5, 39.5, 21.5, 9, 67.5, 13.4, 12.5, 
28.5, 23, 33.5, 9, 11, 30.9, 31.65, 33, 33.4, 47, 40, 46, 45.5, 
57, 29.9, 30, 34, 51, 64.5, 57.5, 85.5, 61, 38, 56.5, 60.4, 51.5, 
54, 69, 56, 27.9, 37.5, 32.9, 22, 29.9, 39.9, 32.6, 38.5, 21.5, 
25.9, 27.5, 22.9, 31.5, 8.5, 5.5, 33, 57, 47, 43.5, 43.9, 68.5, 
44.25, 61, 40, 44.5, 57, 35, 35.1, 64.5, 40, 42.6, 50, 58, 58, 
55, 43, 54, 39, 45, 42, 38.9, 43.215, 26.5, 30, 29.5), new_baltimore.AGE = c(148L, 
9L, 23L, 5L, 19L, 20L, 20L, 22L, 4L, 23L, 20L, 30L, 18L, 75L, 
60L, 65L, 14L, 45L, 14L, 22L, 35L, 5L, 60L, 14L, 19L, 11L, 16L, 
19L, 17L, 24L, 22L, 27L, 3L, 21L, 20L, 4L, 40L, 23L, 35L, 25L, 
17L, 26L, 15L, 29L, 24L, 30L, 19L, 50L, 18L, 74L, 22L, 80L, 24L, 
20L, 7L, 47L, 50L, 45L, 48L, 50L, 48L, 55L, 27L, 20L, 33L, 40L, 
25L, 25L, 40L, 30L, 40L, 22L, 25L, 29L, 49L, 35L, 55L, 30L, 15L, 
40L, 35L, 70L, 25L, 25L, 30L, 30L, 50L, 25L, 70L, 55L, 40L, 70L, 
20L, 20L, 25L, 18L, 20L, 2L, 8L, 25L, 18L, 30L, 35L, 21L, 20L, 
24L, 13L, 25L, 18L, 30L, 20L, 22L, 28L, 50L, 20L, 38L, 17L, 22L, 
27L, 25L, 25L, 25L, 30L, 60L, 22L, 30L, 28L, 45L, 100L, 60L, 
50L, 35L, 50L, 24L, 50L, 33L, 40L, 50L, 25L, 48L, 10L, 45L, 20L, 
25L, 25L, 22L, 21L, 29L, 18L, 2L, 19L, 49L, 10L, 25L, 16L, 17L, 
27L, 34L, 2L, 24L, 23L, 40L, 25L, 45L, 26L, 37L, 15L, 22L, 75L, 
28L, 31L, 100L, 15L, 80L, 75L, 23L, 15L, 21L, 2L, 25L, 23L, 0L, 
4L, 40L, 55L, 21L, 45L, 50L, 5L, 50L, 22L, 22L, 6L, 18L, 18L, 
23L, 3L, 1L, 47L, 21L, 29L, 0L, 29L, 24L, 22L), new_baltimore.SQFT = c(1.0451592, 
2.6867559168, 2.8446910848, 2.4266274048, 2.0475830016, 3.6622378368, 
2.0327185152, 2.378317824, 4.0988821248, 1.8469124352, 1.1222687232, 
1.0210044096, 1.189158912, 2.7675815616, 1.328513472, 1.2746297088, 
1.0999719936, 1.6778289024, 0.9959205888, 0.8324112384, 1.3359457152, 
3.41418672, 1.8580608, 2.1200473728, 2.3095695744, 1.783738368, 
1.0758172032, 2.41547904, 1.337803776, 1.0795333248, 2.1442021632, 
2.2073762304, 1.449287424, 0.9290304, 2.118189312, 1.5570549504, 
2.053157184, 1.3266554112, 1.4269906944, 1.48644864, 2.1404860416, 
2.3170018176, 1.0981139328, 1.1965911552, 1.040514048, 1.6834030848, 
3.55354128, 1.6425257472, 1.7670158208, 3.047219712, 1.4084100864, 
2.3300082432, 2.041544304, 1.170578304, 2.1850795008, 1.6276612608, 
4.4231137344, 1.909157472, 3.2999159808, 0.780385536, 1.2709135872, 
1.3452360192, 1.189158912, 1.189158912, 1.67225472, 1.430706816, 
0.9364626432, 0.8324112384, 0.8324112384, 1.8580608, 1.1965911552, 
1.11483648, 1.6871192064, 1.3266554112, 2.41547904, 1.1166945408, 
1.932383232, 1.0943978112, 0.8063983872, 1.635093504, 1.059094656, 
4.138830432, 4.3032688128, 0.9513271296, 0.891869184, 2.898574848, 
2.452640256, 1.263481344, 2.5529755392, 1.6592482944, 1.6759708416, 
1.3786811136, 0.9717657984, 1.3526682624, 0.6466051584, 0.88257888, 
1.1018300544, 1.1965911552, 1.1445654528, 0.6243084288, 0.9364626432, 
1.449287424, 0.6243084288, 1.0702430208, 1.0925397504, 0.9513271296, 
1.0702430208, 0.8621402112, 0.6243084288, 1.449287424, 1.43999712, 
0.9141659136, 1.449287424, 1.2783458304, 0.9513271296, 0.5351215104, 
0.9364626432, 1.0702430208, 1.128771936, 0.9076627008, 1.3935456, 
1.337803776, 1.34709408, 2.0940345216, 0.9513271296, 0.724643712, 
0.780385536, 1.0145011968, 3.985540416, 0.83612736, 0.97548192, 
0.9364626432, 1.170578304, 0.8324112384, 0.7971080832, 0.7023469824, 
1.003352832, 1.2486168576, 0.9513271296, 1.3415198976, 1.1371332096, 
1.226320128, 0.891869184, 1.4139842688, 2.2445374464, 0.9513271296, 
0.9513271296, 0.9178820352, 2.155350528, 1.6425257472, 2.257543872, 
3.3389352576, 2.006705664, 1.0237915008, 1.95096384, 2.2222407168, 
1.337803776, 2.60128512, 1.0628107776, 2.0382926976, 0.9513271296, 
1.5663452544, 0.9215981568, 1.2486168576, 1.11483648, 1.3712488704, 
0.8324112384, 1.0702430208, 0.8026822656, 0.7543726848, 1.0330818048, 
1.0479462912, 0.9624754944, 1.0702430208, 1.588641984, 1.6276612608, 
0.9968496192, 1.040514048, 1.189158912, 1.11483648, 3.8155278528, 
1.189158912, 2.0773119744, 0.9810561024, 1.2486168576, 1.0237915008, 
1.6703966592, 1.7540093952, 1.0925397504, 0.8695724544, 1.0702430208, 
2.536252992, 2.1404860416, 1.6425257472, 1.2411846144, 1.077675264, 
1.0702430208, 0.9271723392, 1.2040233984, 1.0340108352, 1.820899584, 
1.0702430208, 1.1297009664, 0, 0.9884883456)), class = "data.frame", row.names = c(NA, 
-205L))

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

源来凯始玺欢你 2025-02-18 05:09:31

您可能想尝试一下。 (为了清楚起见,我从可变名称中删除了前缀。)

## remove observations with any NA
baltimore_struct <- baltimore_struct[rowSums(is.na(baltimore_struct)) == 0, ]

## use `cut()` correctly
baltimore_struct <- transform(baltimore_struct, 
                              GROUP1=cut(AGE, 
                                         breaks=quantile(AGE, probs=seq(0, 1, 1/5)),
                                         include.lowest=TRUE))

## result    
head(baltimore_struct)
#   PRICE AGE     SQFT    GROUP1
# 1  47.0 148 1.045159  (45,148]
# 2 113.0   9 2.686756    [0,18]
# 3 165.0  23 2.844691 (22.2,28]
# 4 104.3   5 2.426627    [0,18]
# 5  62.5  19 2.047583 (18,22.2]
# 6  70.0  20 3.662238 (18,22.2]

数据:

baltimore_struct <- structure(list(PRICE = c(47, 113, 165, 104.3, 62.5, 70, 127.5, 
64.5, 145, 63.5, 58.9, 65, 48, 3.5, 12.8, 17.5, 36, 41.9, 53.5, 
24.5, 24.5, 55.5, 60, 51, 46, 46, 44, 54.9, 42.5, 44, 44.9, 37.9, 
33, 43.9, 49.6, 52, 37.5, 50, 35.9, 42.9, 107, 112, 44.9, 55, 
102, 35.5, 62.9, 39, 110, 8, 62, NA, 85.9, 57, 110, 67.7, 89.5, 
70, 74, 13, 48, 24, 53.5, 34.5, 53, 87.5, 33.5, 24, 9.6, 30, 
41, 30, 38.9, 20.7, 49.9, 18.6, 39, 34, 16, 18.9, 15.2, 41.5, 
53, 22, 24.9, 6.7, 32.5, 30, 59, 29.5, 26, 16.5, 39, 48.9, 33.5, 
46, 54, 57.9, 37.9, 32, 31, 34, 29, 32.5, 51.9, 31, 41.8, 48, 
28, 35, 46.5, 51.9, 35.4, 16, 35, 35, 36.5, 35.9, 45, 40, 35, 
38, 37, 23, 25.5, 39.5, 21.5, 9, 67.5, 13.4, 12.5, 28.5, 23, 
33.5, 9, 11, 30.9, 31.65, 33, 33.4, 47, 40, 46, 45.5, 57, 29.9, 
30, 34, 51, 64.5, 57.5, 85.5, 61, 38, 56.5, 60.4, 51.5, 54, 69, 
56, 27.9, 37.5, 32.9, 22, 29.9, 39.9, 32.6, 38.5, 21.5, 25.9, 
27.5, 22.9, 31.5, 8.5, 5.5, 33, 57, 47, 43.5, 43.9, 68.5, 44.25, 
61, 40, 44.5, 57, 35, 35.1, 64.5, 40, 42.6, 50, 58, 58, 55, 43, 
54, 39, 45, 42, 38.9, 43.215, 26.5, 30, 29.5), AGE = c(148L, 
9L, 23L, 5L, 19L, 20L, 20L, 22L, 4L, 23L, 20L, 30L, 18L, 75L, 
60L, 65L, 14L, 45L, 14L, 22L, 35L, 5L, 60L, 14L, 19L, 11L, 16L, 
19L, 17L, 24L, 22L, 27L, 3L, 21L, 20L, 4L, 40L, 23L, 35L, 25L, 
17L, 26L, 15L, 29L, 24L, 30L, 19L, 50L, 18L, 74L, 22L, 80L, 24L, 
20L, 7L, 47L, 50L, 45L, 48L, 50L, 48L, 55L, 27L, 20L, 33L, 40L, 
25L, 25L, 40L, 30L, 40L, 22L, 25L, 29L, 49L, 35L, 55L, 30L, 15L, 
40L, 35L, 70L, 25L, 25L, 30L, 30L, 50L, 25L, 70L, 55L, 40L, 70L, 
20L, 20L, 25L, 18L, 20L, 2L, 8L, 25L, 18L, 30L, 35L, 21L, 20L, 
24L, 13L, 25L, 18L, 30L, 20L, 22L, 28L, 50L, 20L, 38L, 17L, 22L, 
27L, 25L, 25L, 25L, 30L, 60L, 22L, 30L, 28L, 45L, 100L, 60L, 
50L, 35L, 50L, 24L, 50L, 33L, 40L, 50L, 25L, 48L, 10L, 45L, 20L, 
25L, 25L, 22L, 21L, 29L, 18L, 2L, 19L, 49L, 10L, 25L, 16L, 17L, 
27L, 34L, 2L, 24L, 23L, 40L, 25L, 45L, 26L, 37L, 15L, 22L, 75L, 
28L, 31L, 100L, 15L, 80L, 75L, 23L, 15L, 21L, 2L, 25L, 23L, 0L, 
4L, 40L, 55L, 21L, 45L, 50L, 5L, 50L, 22L, 22L, 6L, 18L, 18L, 
23L, 3L, 1L, 47L, 21L, 29L, 0L, 29L, 24L, 22L), SQFT = c(1.0451592, 
2.6867559168, 2.8446910848, 2.4266274048, 2.0475830016, 3.6622378368, 
2.0327185152, 2.378317824, 4.0988821248, 1.8469124352, 1.1222687232, 
1.0210044096, 1.189158912, 2.7675815616, 1.328513472, 1.2746297088, 
1.0999719936, 1.6778289024, 0.9959205888, 0.8324112384, 1.3359457152, 
3.41418672, 1.8580608, 2.1200473728, 2.3095695744, 1.783738368, 
1.0758172032, 2.41547904, 1.337803776, 1.0795333248, 2.1442021632, 
2.2073762304, 1.449287424, 0.9290304, 2.118189312, 1.5570549504, 
2.053157184, 1.3266554112, 1.4269906944, 1.48644864, 2.1404860416, 
2.3170018176, 1.0981139328, 1.1965911552, 1.040514048, 1.6834030848, 
3.55354128, 1.6425257472, 1.7670158208, 3.047219712, 1.4084100864, 
2.3300082432, 2.041544304, 1.170578304, 2.1850795008, 1.6276612608, 
4.4231137344, 1.909157472, 3.2999159808, 0.780385536, 1.2709135872, 
1.3452360192, 1.189158912, 1.189158912, 1.67225472, 1.430706816, 
0.9364626432, 0.8324112384, 0.8324112384, 1.8580608, 1.1965911552, 
1.11483648, 1.6871192064, 1.3266554112, 2.41547904, 1.1166945408, 
1.932383232, 1.0943978112, 0.8063983872, 1.635093504, 1.059094656, 
4.138830432, 4.3032688128, 0.9513271296, 0.891869184, 2.898574848, 
2.452640256, 1.263481344, 2.5529755392, 1.6592482944, 1.6759708416, 
1.3786811136, 0.9717657984, 1.3526682624, 0.6466051584, 0.88257888, 
1.1018300544, 1.1965911552, 1.1445654528, 0.6243084288, 0.9364626432, 
1.449287424, 0.6243084288, 1.0702430208, 1.0925397504, 0.9513271296, 
1.0702430208, 0.8621402112, 0.6243084288, 1.449287424, 1.43999712, 
0.9141659136, 1.449287424, 1.2783458304, 0.9513271296, 0.5351215104, 
0.9364626432, 1.0702430208, 1.128771936, 0.9076627008, 1.3935456, 
1.337803776, 1.34709408, 2.0940345216, 0.9513271296, 0.724643712, 
0.780385536, 1.0145011968, 3.985540416, 0.83612736, 0.97548192, 
0.9364626432, 1.170578304, 0.8324112384, 0.7971080832, 0.7023469824, 
1.003352832, 1.2486168576, 0.9513271296, 1.3415198976, 1.1371332096, 
1.226320128, 0.891869184, 1.4139842688, 2.2445374464, 0.9513271296, 
0.9513271296, 0.9178820352, 2.155350528, 1.6425257472, 2.257543872, 
3.3389352576, 2.006705664, 1.0237915008, 1.95096384, 2.2222407168, 
1.337803776, 2.60128512, 1.0628107776, 2.0382926976, 0.9513271296, 
1.5663452544, 0.9215981568, 1.2486168576, 1.11483648, 1.3712488704, 
0.8324112384, 1.0702430208, 0.8026822656, 0.7543726848, 1.0330818048, 
1.0479462912, 0.9624754944, 1.0702430208, 1.588641984, 1.6276612608, 
0.9968496192, 1.040514048, 1.189158912, 1.11483648, 3.8155278528, 
1.189158912, 2.0773119744, 0.9810561024, 1.2486168576, 1.0237915008, 
1.6703966592, 1.7540093952, 1.0925397504, 0.8695724544, 1.0702430208, 
2.536252992, 2.1404860416, 1.6425257472, 1.2411846144, 1.077675264, 
1.0702430208, 0.9271723392, 1.2040233984, 1.0340108352, 1.820899584, 
1.0702430208, 1.1297009664, 0, 0.9884883456)), class = "data.frame", row.names = c(NA, 
-205L))

You might want to try this. (I removed the prefixes from variable names for sake of clarity.)

## remove observations with any NA
baltimore_struct <- baltimore_struct[rowSums(is.na(baltimore_struct)) == 0, ]

## use `cut()` correctly
baltimore_struct <- transform(baltimore_struct, 
                              GROUP1=cut(AGE, 
                                         breaks=quantile(AGE, probs=seq(0, 1, 1/5)),
                                         include.lowest=TRUE))

## result    
head(baltimore_struct)
#   PRICE AGE     SQFT    GROUP1
# 1  47.0 148 1.045159  (45,148]
# 2 113.0   9 2.686756    [0,18]
# 3 165.0  23 2.844691 (22.2,28]
# 4 104.3   5 2.426627    [0,18]
# 5  62.5  19 2.047583 (18,22.2]
# 6  70.0  20 3.662238 (18,22.2]

Data:

baltimore_struct <- structure(list(PRICE = c(47, 113, 165, 104.3, 62.5, 70, 127.5, 
64.5, 145, 63.5, 58.9, 65, 48, 3.5, 12.8, 17.5, 36, 41.9, 53.5, 
24.5, 24.5, 55.5, 60, 51, 46, 46, 44, 54.9, 42.5, 44, 44.9, 37.9, 
33, 43.9, 49.6, 52, 37.5, 50, 35.9, 42.9, 107, 112, 44.9, 55, 
102, 35.5, 62.9, 39, 110, 8, 62, NA, 85.9, 57, 110, 67.7, 89.5, 
70, 74, 13, 48, 24, 53.5, 34.5, 53, 87.5, 33.5, 24, 9.6, 30, 
41, 30, 38.9, 20.7, 49.9, 18.6, 39, 34, 16, 18.9, 15.2, 41.5, 
53, 22, 24.9, 6.7, 32.5, 30, 59, 29.5, 26, 16.5, 39, 48.9, 33.5, 
46, 54, 57.9, 37.9, 32, 31, 34, 29, 32.5, 51.9, 31, 41.8, 48, 
28, 35, 46.5, 51.9, 35.4, 16, 35, 35, 36.5, 35.9, 45, 40, 35, 
38, 37, 23, 25.5, 39.5, 21.5, 9, 67.5, 13.4, 12.5, 28.5, 23, 
33.5, 9, 11, 30.9, 31.65, 33, 33.4, 47, 40, 46, 45.5, 57, 29.9, 
30, 34, 51, 64.5, 57.5, 85.5, 61, 38, 56.5, 60.4, 51.5, 54, 69, 
56, 27.9, 37.5, 32.9, 22, 29.9, 39.9, 32.6, 38.5, 21.5, 25.9, 
27.5, 22.9, 31.5, 8.5, 5.5, 33, 57, 47, 43.5, 43.9, 68.5, 44.25, 
61, 40, 44.5, 57, 35, 35.1, 64.5, 40, 42.6, 50, 58, 58, 55, 43, 
54, 39, 45, 42, 38.9, 43.215, 26.5, 30, 29.5), AGE = c(148L, 
9L, 23L, 5L, 19L, 20L, 20L, 22L, 4L, 23L, 20L, 30L, 18L, 75L, 
60L, 65L, 14L, 45L, 14L, 22L, 35L, 5L, 60L, 14L, 19L, 11L, 16L, 
19L, 17L, 24L, 22L, 27L, 3L, 21L, 20L, 4L, 40L, 23L, 35L, 25L, 
17L, 26L, 15L, 29L, 24L, 30L, 19L, 50L, 18L, 74L, 22L, 80L, 24L, 
20L, 7L, 47L, 50L, 45L, 48L, 50L, 48L, 55L, 27L, 20L, 33L, 40L, 
25L, 25L, 40L, 30L, 40L, 22L, 25L, 29L, 49L, 35L, 55L, 30L, 15L, 
40L, 35L, 70L, 25L, 25L, 30L, 30L, 50L, 25L, 70L, 55L, 40L, 70L, 
20L, 20L, 25L, 18L, 20L, 2L, 8L, 25L, 18L, 30L, 35L, 21L, 20L, 
24L, 13L, 25L, 18L, 30L, 20L, 22L, 28L, 50L, 20L, 38L, 17L, 22L, 
27L, 25L, 25L, 25L, 30L, 60L, 22L, 30L, 28L, 45L, 100L, 60L, 
50L, 35L, 50L, 24L, 50L, 33L, 40L, 50L, 25L, 48L, 10L, 45L, 20L, 
25L, 25L, 22L, 21L, 29L, 18L, 2L, 19L, 49L, 10L, 25L, 16L, 17L, 
27L, 34L, 2L, 24L, 23L, 40L, 25L, 45L, 26L, 37L, 15L, 22L, 75L, 
28L, 31L, 100L, 15L, 80L, 75L, 23L, 15L, 21L, 2L, 25L, 23L, 0L, 
4L, 40L, 55L, 21L, 45L, 50L, 5L, 50L, 22L, 22L, 6L, 18L, 18L, 
23L, 3L, 1L, 47L, 21L, 29L, 0L, 29L, 24L, 22L), SQFT = c(1.0451592, 
2.6867559168, 2.8446910848, 2.4266274048, 2.0475830016, 3.6622378368, 
2.0327185152, 2.378317824, 4.0988821248, 1.8469124352, 1.1222687232, 
1.0210044096, 1.189158912, 2.7675815616, 1.328513472, 1.2746297088, 
1.0999719936, 1.6778289024, 0.9959205888, 0.8324112384, 1.3359457152, 
3.41418672, 1.8580608, 2.1200473728, 2.3095695744, 1.783738368, 
1.0758172032, 2.41547904, 1.337803776, 1.0795333248, 2.1442021632, 
2.2073762304, 1.449287424, 0.9290304, 2.118189312, 1.5570549504, 
2.053157184, 1.3266554112, 1.4269906944, 1.48644864, 2.1404860416, 
2.3170018176, 1.0981139328, 1.1965911552, 1.040514048, 1.6834030848, 
3.55354128, 1.6425257472, 1.7670158208, 3.047219712, 1.4084100864, 
2.3300082432, 2.041544304, 1.170578304, 2.1850795008, 1.6276612608, 
4.4231137344, 1.909157472, 3.2999159808, 0.780385536, 1.2709135872, 
1.3452360192, 1.189158912, 1.189158912, 1.67225472, 1.430706816, 
0.9364626432, 0.8324112384, 0.8324112384, 1.8580608, 1.1965911552, 
1.11483648, 1.6871192064, 1.3266554112, 2.41547904, 1.1166945408, 
1.932383232, 1.0943978112, 0.8063983872, 1.635093504, 1.059094656, 
4.138830432, 4.3032688128, 0.9513271296, 0.891869184, 2.898574848, 
2.452640256, 1.263481344, 2.5529755392, 1.6592482944, 1.6759708416, 
1.3786811136, 0.9717657984, 1.3526682624, 0.6466051584, 0.88257888, 
1.1018300544, 1.1965911552, 1.1445654528, 0.6243084288, 0.9364626432, 
1.449287424, 0.6243084288, 1.0702430208, 1.0925397504, 0.9513271296, 
1.0702430208, 0.8621402112, 0.6243084288, 1.449287424, 1.43999712, 
0.9141659136, 1.449287424, 1.2783458304, 0.9513271296, 0.5351215104, 
0.9364626432, 1.0702430208, 1.128771936, 0.9076627008, 1.3935456, 
1.337803776, 1.34709408, 2.0940345216, 0.9513271296, 0.724643712, 
0.780385536, 1.0145011968, 3.985540416, 0.83612736, 0.97548192, 
0.9364626432, 1.170578304, 0.8324112384, 0.7971080832, 0.7023469824, 
1.003352832, 1.2486168576, 0.9513271296, 1.3415198976, 1.1371332096, 
1.226320128, 0.891869184, 1.4139842688, 2.2445374464, 0.9513271296, 
0.9513271296, 0.9178820352, 2.155350528, 1.6425257472, 2.257543872, 
3.3389352576, 2.006705664, 1.0237915008, 1.95096384, 2.2222407168, 
1.337803776, 2.60128512, 1.0628107776, 2.0382926976, 0.9513271296, 
1.5663452544, 0.9215981568, 1.2486168576, 1.11483648, 1.3712488704, 
0.8324112384, 1.0702430208, 0.8026822656, 0.7543726848, 1.0330818048, 
1.0479462912, 0.9624754944, 1.0702430208, 1.588641984, 1.6276612608, 
0.9968496192, 1.040514048, 1.189158912, 1.11483648, 3.8155278528, 
1.189158912, 2.0773119744, 0.9810561024, 1.2486168576, 1.0237915008, 
1.6703966592, 1.7540093952, 1.0925397504, 0.8695724544, 1.0702430208, 
2.536252992, 2.1404860416, 1.6425257472, 1.2411846144, 1.077675264, 
1.0702430208, 0.9271723392, 1.2040233984, 1.0340108352, 1.820899584, 
1.0702430208, 1.1297009664, 0, 0.9884883456)), class = "data.frame", row.names = c(NA, 
-205L))
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文