运行随机森林循环,预测未采样的位置并保存输出
我想经营一个随机的森林100次,节省火车并测试AUC和OOB分数,预测为空间范围建模,保存平均值,标准误差和每个单元的变化系数,然后写入栅格,如果有人知道如何知道如何添加TSS分数。抱歉,有代码的前半部分缺少代码,零散的代码和错误,但是从随机的森林模型开始有问题,任何帮助都将不胜感激。
Packages:
require(raster);require(dismo);require(sp);
require(pROC);require(FinCal);require(ggplot2);require(randomForest);require (ROCR)
require(tidyverse); require(dplyr)
n.boot <- 100
length.map <- nrow(predictor_variables)
boot_mat <- array(0, c(length.map,n.boot)) # matrix where rows will store predictions for each
bootstrap (columns)
influence_mat <- array(0,c(21,n.boot)) # for saving information on predictors
auc_mat <- array(0,c(n.boot, 4)) #to save the RF training and test AUC and OOB.
for (i in 1:n.boot) # bootstrap loop
{ # create training and evaluation presence/ relative absence dataframes
train_ind <- sample(seq_len(nrow(sampled)), size = floor(0.75 * nrow(sampled))) # index of
rows for 75% of presence data
# creat training and evaluation presences
sampled _train <- sampled [train_ind, ]
sampled _eval <- sampled [-train_ind, ]
#creation of absences
rnd <- sort(sample(seq(1,length.abs),nrow(sampled _train)*2))
rnd.ev <- sort(sample(seq(1,length.abs),nrow(sampled _eval)*2))
absence.rnd <- absence[rnd,]
absence.rnd.ev <- absence[rnd.ev,]
# creation of presence and absence
sampled.PresAbs.B <- rbind(sampled _train,absence.rnd) # final presence/absence data
sampled_eval <- rbind(sampled_eval,absence.rnd.ev)
training<- randomForest(response~., sampled.PresAbs.B [,c(5:25)], ntree=1000, mtry = 2,
importance = TRUE) # column number of predictor variables to be used in RF
# need to save influence/importance of predictors on response for each run.
# need to save training (training) and test AUC for each run.
# need to save OOB error rate too for training and test runs if possible.
# predict spatially to map
pred_map <- predict(predictor_variables, training, type = "prob", index = 2)
boot_mat[,i] <- round(pred.map, digits = 2)
}
save(boot_mat, file=" sampled_boot_mat.Rdata")
save(auc_mat, file=" sampled_dev_mat.Rdata")
save(influence_mat, file=" sampled_inf_mat.Rdata")
# need to calculate mean and standard error of influence of predictors on response, then write
to csv.
# calculate mean suitability and CV spatially
sampled.boot.mean<-apply(boot_mat,1,mean)
sampled.boot.sd<-apply(boot_mat,1,sd)
sampled.boot.cv<- sampled.boot.sd/sampled.boot.mean # calculation of Coefficient of variation if needed
# export the maps
sampled.map.mean <- cbind(sna_all_preds[, c("X", "Y")], sampled.boot.mean) # mean prediciton
sampled.map.UC <- cbind(sna_all_preds[, c("X", "Y")], sampled.boot.sd) # uncertainty
sampled.map.CV <-cbind(sna_all_preds[,c("X", "Y")], sampled.boot.cv) # CV
#can then export as rasters easily enough.
例如数据:
dput(head(sna_all_boffffs_and_preds, 10))
structure(list(site = 1:10, X = c(708396.365970067, 708646.365970067,
708896.365970067, 709146.365970067, 708396.365970067, 708646.365970067,
708896.365970067, 709146.365970067, 708396.365970067, 708646.365970067
), Y = c(1243944.65711016, 1243944.65711016, 1243944.65711016,
1243944.65711016, 1243694.65711016, 1243694.65711016, 1243694.65711016,
1243694.65711016, 1243444.65711016, 1243444.65711016), sna_boffffs_clip = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), adet_2010_2019_250_clip = c(-14.4839498108682,
-14.4839498108682, -14.4839498108682, -14.4839498108682, -14.4839498108682,
-14.4839498108682, -14.4839498108682, -14.4839498108682, -14.4839498108682,
-14.4839498108682), bathy_250_slope_clip = c(3.62498998641968,
1.90744984149933, 4.96259355545044, 8.03348731994629, 2.80171227455139,
2.33035254478455, 5.86305713653564, 8.246413230896, 5.99569511413574,
6.80374336242676), bathy_250_vrm_007_clip = c(0.00215762853622437,
0.00210392475128174, 0.00183302164077759, 0.00150519609451294,
0.00216823816299438, 0.00220978260040283, 0.0020909309387207,
0.00188064575195312, 0.00211155414581299, 0.00220668315887451
), btr_100km_clip = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), btr_10km_clip = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), btr_50km_clip = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0), carbonate_250_clip = c(30.7859439849854, 30.7859439849854,
30.7859439849854, 30.7859439849854, 30.7859439849854, 30.7859439849854,
30.7859439849854, 30.7859439849854, 30.7859439849854, 30.7859439849854
), Catch_kg_per_km2_AllMethods_250_clip = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0), current_speed_250_clip = c(0.17399999499321, 0.17399999499321,
0.17399999499321, 0.17399999499321, 0.17399999499321, 0.17399999499321,
0.17399999499321, 0.17399999499321, 0.17399999499321, 0.17399999499321
), dist_shore_250_clip = c(67926.890625, 67982.53125, 68039.0546875,
68096.4375, 67683.0859375, 67738.9296875, 67795.6484375, 67853.2421875,
67439.328125, 67495.3671875), ebed_20_av_250_clip = c(0.538848269429723,
0.538848269429723, 0.538848269429723, 0.538848269429723, 0.538848269429723,
0.538848269429723, 0.538848269429723, 0.538848269429723, 0.538848269429723,
0.538848269429723), gravel_250_clip = c(19.7878284454346, 19.7878284454346,
19.7878284454346, 19.7878284454346, 19.7878284454346, 19.7878284454346,
19.7878284454346, 19.7878284454346, 19.7878284454346, 19.7878284454346
), hvis_20_av_250_clip = c(21.4873270945488, 21.4873270945488,
21.4873270945488, 21.4873270945488, 21.4873270945488, 21.4873270945488,
21.4873270945488, 21.4873270945488, 21.4873270945488, 21.4873270945488
), orb_vel_250_clip = c(150L, 150L, 150L, 150L, 150L, 150L, 150L,
150L, 150L, 150L), rocky_reef_clip = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), sal_depth_250_clip = c(35.231746673584, 35.231746673584,
35.231746673584, 35.231746673584, 35.231746673584, 35.231746673584,
35.231746673584, 35.231746673584, 35.231746673584, 35.231746673584
), sand_250_clip = c(19.3533153533936, 19.3533153533936, 19.3533153533936,
19.3533153533936, 19.3533153533936, 19.3533153533936, 19.3533153533936,
19.3533153533936, 19.3533153533936, 19.3533153533936), sst_20_av_250_clip = c(16.6609603188989,
16.6609603188989, 16.6609603188989, 16.6609603188989, 16.6609603188989,
16.6609603188989, 16.6609603188989, 16.6609603188989, 16.6609603188989,
16.6609603188989), sst_annual_amp_250_clip = c(2.67400002479553,
2.67400002479553, 2.67400002479553, 2.67400002479553, 2.67400002479553,
2.67400002479553, 2.67400002479553, 2.67400002479553, 2.67400002479553,
2.67400002479553), sst_grad_250_clip = c(0.00596274901181459,
0.00596274901181459, 0.00596274901181459, 0.00596274901181459,
0.00596274901181459, 0.00596274901181459, 0.00596274901181459,
0.00596274901181459, 0.00596274901181459, 0.00596274901181459
), temp_res_250_clip = c(4.63328790664673, 4.63328790664673,
4.63328790664673, 4.63328790664673, 4.63328790664673, 4.63328790664673,
4.63328790664673, 4.63328790664673, 4.63328790664673, 4.63328790664673
)), row.names = c(NA, 10L), class = "data.frame")
I would like to run a random forest 100 times, save train and test AUC and OOB scores, predict to model spatial extent, save mean, standard error and coefficient of variation per cell, then write to raster, bonus points if anyone knows how to add TSS score. Apologies there is missing code, fragmented code and errors, the first half of the code works, but from the random forest model onwards there are problems, any help would be greatly appreciated.
Packages:
require(raster);require(dismo);require(sp);
require(pROC);require(FinCal);require(ggplot2);require(randomForest);require (ROCR)
require(tidyverse); require(dplyr)
n.boot <- 100
length.map <- nrow(predictor_variables)
boot_mat <- array(0, c(length.map,n.boot)) # matrix where rows will store predictions for each
bootstrap (columns)
influence_mat <- array(0,c(21,n.boot)) # for saving information on predictors
auc_mat <- array(0,c(n.boot, 4)) #to save the RF training and test AUC and OOB.
for (i in 1:n.boot) # bootstrap loop
{ # create training and evaluation presence/ relative absence dataframes
train_ind <- sample(seq_len(nrow(sampled)), size = floor(0.75 * nrow(sampled))) # index of
rows for 75% of presence data
# creat training and evaluation presences
sampled _train <- sampled [train_ind, ]
sampled _eval <- sampled [-train_ind, ]
#creation of absences
rnd <- sort(sample(seq(1,length.abs),nrow(sampled _train)*2))
rnd.ev <- sort(sample(seq(1,length.abs),nrow(sampled _eval)*2))
absence.rnd <- absence[rnd,]
absence.rnd.ev <- absence[rnd.ev,]
# creation of presence and absence
sampled.PresAbs.B <- rbind(sampled _train,absence.rnd) # final presence/absence data
sampled_eval <- rbind(sampled_eval,absence.rnd.ev)
training<- randomForest(response~., sampled.PresAbs.B [,c(5:25)], ntree=1000, mtry = 2,
importance = TRUE) # column number of predictor variables to be used in RF
# need to save influence/importance of predictors on response for each run.
# need to save training (training) and test AUC for each run.
# need to save OOB error rate too for training and test runs if possible.
# predict spatially to map
pred_map <- predict(predictor_variables, training, type = "prob", index = 2)
boot_mat[,i] <- round(pred.map, digits = 2)
}
save(boot_mat, file=" sampled_boot_mat.Rdata")
save(auc_mat, file=" sampled_dev_mat.Rdata")
save(influence_mat, file=" sampled_inf_mat.Rdata")
# need to calculate mean and standard error of influence of predictors on response, then write
to csv.
# calculate mean suitability and CV spatially
sampled.boot.mean<-apply(boot_mat,1,mean)
sampled.boot.sd<-apply(boot_mat,1,sd)
sampled.boot.cv<- sampled.boot.sd/sampled.boot.mean # calculation of Coefficient of variation if needed
# export the maps
sampled.map.mean <- cbind(sna_all_preds[, c("X", "Y")], sampled.boot.mean) # mean prediciton
sampled.map.UC <- cbind(sna_all_preds[, c("X", "Y")], sampled.boot.sd) # uncertainty
sampled.map.CV <-cbind(sna_all_preds[,c("X", "Y")], sampled.boot.cv) # CV
#can then export as rasters easily enough.
e.g of data:
dput(head(sna_all_boffffs_and_preds, 10))
structure(list(site = 1:10, X = c(708396.365970067, 708646.365970067,
708896.365970067, 709146.365970067, 708396.365970067, 708646.365970067,
708896.365970067, 709146.365970067, 708396.365970067, 708646.365970067
), Y = c(1243944.65711016, 1243944.65711016, 1243944.65711016,
1243944.65711016, 1243694.65711016, 1243694.65711016, 1243694.65711016,
1243694.65711016, 1243444.65711016, 1243444.65711016), sna_boffffs_clip = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), adet_2010_2019_250_clip = c(-14.4839498108682,
-14.4839498108682, -14.4839498108682, -14.4839498108682, -14.4839498108682,
-14.4839498108682, -14.4839498108682, -14.4839498108682, -14.4839498108682,
-14.4839498108682), bathy_250_slope_clip = c(3.62498998641968,
1.90744984149933, 4.96259355545044, 8.03348731994629, 2.80171227455139,
2.33035254478455, 5.86305713653564, 8.246413230896, 5.99569511413574,
6.80374336242676), bathy_250_vrm_007_clip = c(0.00215762853622437,
0.00210392475128174, 0.00183302164077759, 0.00150519609451294,
0.00216823816299438, 0.00220978260040283, 0.0020909309387207,
0.00188064575195312, 0.00211155414581299, 0.00220668315887451
), btr_100km_clip = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), btr_10km_clip = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), btr_50km_clip = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0), carbonate_250_clip = c(30.7859439849854, 30.7859439849854,
30.7859439849854, 30.7859439849854, 30.7859439849854, 30.7859439849854,
30.7859439849854, 30.7859439849854, 30.7859439849854, 30.7859439849854
), Catch_kg_per_km2_AllMethods_250_clip = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0), current_speed_250_clip = c(0.17399999499321, 0.17399999499321,
0.17399999499321, 0.17399999499321, 0.17399999499321, 0.17399999499321,
0.17399999499321, 0.17399999499321, 0.17399999499321, 0.17399999499321
), dist_shore_250_clip = c(67926.890625, 67982.53125, 68039.0546875,
68096.4375, 67683.0859375, 67738.9296875, 67795.6484375, 67853.2421875,
67439.328125, 67495.3671875), ebed_20_av_250_clip = c(0.538848269429723,
0.538848269429723, 0.538848269429723, 0.538848269429723, 0.538848269429723,
0.538848269429723, 0.538848269429723, 0.538848269429723, 0.538848269429723,
0.538848269429723), gravel_250_clip = c(19.7878284454346, 19.7878284454346,
19.7878284454346, 19.7878284454346, 19.7878284454346, 19.7878284454346,
19.7878284454346, 19.7878284454346, 19.7878284454346, 19.7878284454346
), hvis_20_av_250_clip = c(21.4873270945488, 21.4873270945488,
21.4873270945488, 21.4873270945488, 21.4873270945488, 21.4873270945488,
21.4873270945488, 21.4873270945488, 21.4873270945488, 21.4873270945488
), orb_vel_250_clip = c(150L, 150L, 150L, 150L, 150L, 150L, 150L,
150L, 150L, 150L), rocky_reef_clip = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), sal_depth_250_clip = c(35.231746673584, 35.231746673584,
35.231746673584, 35.231746673584, 35.231746673584, 35.231746673584,
35.231746673584, 35.231746673584, 35.231746673584, 35.231746673584
), sand_250_clip = c(19.3533153533936, 19.3533153533936, 19.3533153533936,
19.3533153533936, 19.3533153533936, 19.3533153533936, 19.3533153533936,
19.3533153533936, 19.3533153533936, 19.3533153533936), sst_20_av_250_clip = c(16.6609603188989,
16.6609603188989, 16.6609603188989, 16.6609603188989, 16.6609603188989,
16.6609603188989, 16.6609603188989, 16.6609603188989, 16.6609603188989,
16.6609603188989), sst_annual_amp_250_clip = c(2.67400002479553,
2.67400002479553, 2.67400002479553, 2.67400002479553, 2.67400002479553,
2.67400002479553, 2.67400002479553, 2.67400002479553, 2.67400002479553,
2.67400002479553), sst_grad_250_clip = c(0.00596274901181459,
0.00596274901181459, 0.00596274901181459, 0.00596274901181459,
0.00596274901181459, 0.00596274901181459, 0.00596274901181459,
0.00596274901181459, 0.00596274901181459, 0.00596274901181459
), temp_res_250_clip = c(4.63328790664673, 4.63328790664673,
4.63328790664673, 4.63328790664673, 4.63328790664673, 4.63328790664673,
4.63328790664673, 4.63328790664673, 4.63328790664673, 4.63328790664673
)), row.names = c(NA, 10L), class = "data.frame")
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论