使用管道工创建 API

发布于 2025-01-09 13:38:17 字数 2693 浏览 1 评论 0原文

library(tidyverse)
library(caret)
library(InformationValue)
library(epicalc)
library(lubridate)
library(Boruta)
df <- read_csv("dataset.csv")
sum(is.na(df))
str(df)
names(df)

#convert to R format
df$sex <- as.factor(df$sex)
df$drug_administered <- as.factor(df$drug_administered)
df$pregnancy_status <- as.factor(df$pregnancy_status)
df$symtoms_status <- as.factor(df$symtoms_status)
df$marital_status <- as.factor(df$marital_status)
df$education_level <- as.factor(df$education_level)
df$occupation <- as.factor(df$occupation)
#scale numeric variables
df$current_age <- scale(df$current_age)
df$weight <- scale(df$weight)
df$drug_administered <- ifelse(df$drug_administered=="Yes",1,0)
#checking proportion of active and inactive patients
table(df$drug_administered)
#create training and testing model

set.seed(100)
sample <- sample(c(TRUE,FALSE),nrow(df),replace = TRUE,prob = c(0.8,0.2))
train <- df[sample,]
test <- df[!sample,]
#checking the proportion 
table(train$drug_administered)
#to correct the imbalance
set.seed(100)
options(scipen = 999)
downtrain <- downSample(x=train[,colnames(train)%in% "drug_adminstered"],y=train$administered)
downtrain <- subset(downtrain,select = (-drug_administered))
#model
model <- glm(drug_administered~.,family = "binomial", data = downtrain)
summary(model)
#fitting the model
pscl::pR2(model)["McFadden"]
#checking for variable importance
caret::varImp(model)
#calculate the VIF values of each variable in the model to see if multicollinearity is a problem.
car::vif(model)
predicted <- predict(model, test, type="response")
summary(predicted)
#to check how well our model perform
optimal <- optimalCutoff(test$drug_administered, predicted)[1]
optimal
#confuxion matrix
confusionMatrix(test$drug_administered,predicted)
#specificity & sensitivity
sensitivity(test$drug_administered,predicted)*100
#specificity calculation
specificity(test$administered,predicted)*100
#error calculation
miscla <- misClassError(test$administered,predicted,threshold = optimal)
miscla*100
#overall accuracy of the model
(1-miscla)*100
#plot the ROCcurve
plotROC(test$administered,predicted)
write_rds(model,"model_patients.rds") 

上面的脚本是用 R 构建的逻辑回归模型。我想使用管道工将其部署到我的本地服务器。我还为管道工创建了另一个脚本。管道工的脚本实际上并没有做任何应该做的预测。我有什么不对劲的地方。

library(tidyverse)
library(caret)
library(InformationValue)
library(epicalc)
library(lubridate)
library(Boruta)
df <- read_csv("dataset.csv")
sum(is.na(df))
str(df)
names(df)

#convert to R format
df$sex <- as.factor(df$sex)
df$drug_administered <- as.factor(df$drug_administered)
df$pregnancy_status <- as.factor(df$pregnancy_status)
df$symtoms_status <- as.factor(df$symtoms_status)
df$marital_status <- as.factor(df$marital_status)
df$education_level <- as.factor(df$education_level)
df$occupation <- as.factor(df$occupation)
#scale numeric variables
df$current_age <- scale(df$current_age)
df$weight <- scale(df$weight)
df$drug_administered <- ifelse(df$drug_administered=="Yes",1,0)
#checking proportion of active and inactive patients
table(df$drug_administered)
#create training and testing model

set.seed(100)
sample <- sample(c(TRUE,FALSE),nrow(df),replace = TRUE,prob = c(0.8,0.2))
train <- df[sample,]
test <- df[!sample,]
#checking the proportion 
table(train$drug_administered)
#to correct the imbalance
set.seed(100)
options(scipen = 999)
downtrain <- downSample(x=train[,colnames(train)%in% "drug_adminstered"],y=train$administered)
downtrain <- subset(downtrain,select = (-drug_administered))
#model
model <- glm(drug_administered~.,family = "binomial", data = downtrain)
summary(model)
#fitting the model
pscl::pR2(model)["McFadden"]
#checking for variable importance
caret::varImp(model)
#calculate the VIF values of each variable in the model to see if multicollinearity is a problem.
car::vif(model)
predicted <- predict(model, test, type="response")
summary(predicted)
#to check how well our model perform
optimal <- optimalCutoff(test$drug_administered, predicted)[1]
optimal
#confuxion matrix
confusionMatrix(test$drug_administered,predicted)
#specificity & sensitivity
sensitivity(test$drug_administered,predicted)*100
#specificity calculation
specificity(test$administered,predicted)*100
#error calculation
miscla <- misClassError(test$administered,predicted,threshold = optimal)
miscla*100
#overall accuracy of the model
(1-miscla)*100
#plot the ROCcurve
plotROC(test$administered,predicted)
write_rds(model,"model_patients.rds") 

This script above is the logistic regression model built in R. I want to use plumber to deploy this to my local server. I also created another script for plumber. The script for plumber is actually not doing any prediction that is meant to do. What is it that I am not getting right.

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。
列表为空,暂无数据
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文