使用管道工创建 API
library(tidyverse)
library(caret)
library(InformationValue)
library(epicalc)
library(lubridate)
library(Boruta)
df <- read_csv("dataset.csv")
sum(is.na(df))
str(df)
names(df)
#convert to R format
df$sex <- as.factor(df$sex)
df$drug_administered <- as.factor(df$drug_administered)
df$pregnancy_status <- as.factor(df$pregnancy_status)
df$symtoms_status <- as.factor(df$symtoms_status)
df$marital_status <- as.factor(df$marital_status)
df$education_level <- as.factor(df$education_level)
df$occupation <- as.factor(df$occupation)
#scale numeric variables
df$current_age <- scale(df$current_age)
df$weight <- scale(df$weight)
df$drug_administered <- ifelse(df$drug_administered=="Yes",1,0)
#checking proportion of active and inactive patients
table(df$drug_administered)
#create training and testing model
set.seed(100)
sample <- sample(c(TRUE,FALSE),nrow(df),replace = TRUE,prob = c(0.8,0.2))
train <- df[sample,]
test <- df[!sample,]
#checking the proportion
table(train$drug_administered)
#to correct the imbalance
set.seed(100)
options(scipen = 999)
downtrain <- downSample(x=train[,colnames(train)%in% "drug_adminstered"],y=train$administered)
downtrain <- subset(downtrain,select = (-drug_administered))
#model
model <- glm(drug_administered~.,family = "binomial", data = downtrain)
summary(model)
#fitting the model
pscl::pR2(model)["McFadden"]
#checking for variable importance
caret::varImp(model)
#calculate the VIF values of each variable in the model to see if multicollinearity is a problem.
car::vif(model)
predicted <- predict(model, test, type="response")
summary(predicted)
#to check how well our model perform
optimal <- optimalCutoff(test$drug_administered, predicted)[1]
optimal
#confuxion matrix
confusionMatrix(test$drug_administered,predicted)
#specificity & sensitivity
sensitivity(test$drug_administered,predicted)*100
#specificity calculation
specificity(test$administered,predicted)*100
#error calculation
miscla <- misClassError(test$administered,predicted,threshold = optimal)
miscla*100
#overall accuracy of the model
(1-miscla)*100
#plot the ROCcurve
plotROC(test$administered,predicted)
write_rds(model,"model_patients.rds")
上面的脚本是用 R 构建的逻辑回归模型。我想使用管道工将其部署到我的本地服务器。我还为管道工创建了另一个脚本。管道工的脚本实际上并没有做任何应该做的预测。我有什么不对劲的地方。
library(tidyverse)
library(caret)
library(InformationValue)
library(epicalc)
library(lubridate)
library(Boruta)
df <- read_csv("dataset.csv")
sum(is.na(df))
str(df)
names(df)
#convert to R format
df$sex <- as.factor(df$sex)
df$drug_administered <- as.factor(df$drug_administered)
df$pregnancy_status <- as.factor(df$pregnancy_status)
df$symtoms_status <- as.factor(df$symtoms_status)
df$marital_status <- as.factor(df$marital_status)
df$education_level <- as.factor(df$education_level)
df$occupation <- as.factor(df$occupation)
#scale numeric variables
df$current_age <- scale(df$current_age)
df$weight <- scale(df$weight)
df$drug_administered <- ifelse(df$drug_administered=="Yes",1,0)
#checking proportion of active and inactive patients
table(df$drug_administered)
#create training and testing model
set.seed(100)
sample <- sample(c(TRUE,FALSE),nrow(df),replace = TRUE,prob = c(0.8,0.2))
train <- df[sample,]
test <- df[!sample,]
#checking the proportion
table(train$drug_administered)
#to correct the imbalance
set.seed(100)
options(scipen = 999)
downtrain <- downSample(x=train[,colnames(train)%in% "drug_adminstered"],y=train$administered)
downtrain <- subset(downtrain,select = (-drug_administered))
#model
model <- glm(drug_administered~.,family = "binomial", data = downtrain)
summary(model)
#fitting the model
pscl::pR2(model)["McFadden"]
#checking for variable importance
caret::varImp(model)
#calculate the VIF values of each variable in the model to see if multicollinearity is a problem.
car::vif(model)
predicted <- predict(model, test, type="response")
summary(predicted)
#to check how well our model perform
optimal <- optimalCutoff(test$drug_administered, predicted)[1]
optimal
#confuxion matrix
confusionMatrix(test$drug_administered,predicted)
#specificity & sensitivity
sensitivity(test$drug_administered,predicted)*100
#specificity calculation
specificity(test$administered,predicted)*100
#error calculation
miscla <- misClassError(test$administered,predicted,threshold = optimal)
miscla*100
#overall accuracy of the model
(1-miscla)*100
#plot the ROCcurve
plotROC(test$administered,predicted)
write_rds(model,"model_patients.rds")
This script above is the logistic regression model built in R. I want to use plumber to deploy this to my local server. I also created another script for plumber. The script for plumber is actually not doing any prediction that is meant to do. What is it that I am not getting right.
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论