使用管道工创建 API

发布于 2025-01-09 13:38:17 字数 2693 浏览 1 评论 0原文

library(tidyverse)
library(caret)
library(InformationValue)
library(epicalc)
library(lubridate)
library(Boruta)

df <- read_csv("dataset.csv")
sum(is.na(df))
str(df)
names(df)

#convert to R format
df$sex <- as.factor(df$sex)
df$drug_administered <- as.factor(df$drug_administered)
df$pregnancy_status <- as.factor(df$pregnancy_status)
df$symtoms_status <- as.factor(df$symtoms_status)
df$marital_status <- as.factor(df$marital_status)
df$education_level <- as.factor(df$education_level)
df$occupation <- as.factor(df$occupation)

#scale numeric variables
df$current_age <- scale(df$current_age)
df$weight <- scale(df$weight)

df$drug_administered <- ifelse(df$drug_administered=="Yes",1,0)

#checking proportion of active and inactive patients
table(df$drug_administered)

#create training and testing model

set.seed(100)
sample <- sample(c(TRUE,FALSE),nrow(df),replace = TRUE,prob = c(0.8,0.2))
train <- df[sample,]
test <- df[!sample,]

#checking the proportion 
table(train$drug_administered)

#to correct the imbalance
set.seed(100)
options(scipen = 999)
downtrain <- downSample(x=train[,colnames(train)%in% "drug_adminstered"],y=train$administered)
downtrain <- subset(downtrain,select = (-drug_administered))

#model
model <- glm(drug_administered~.,family = "binomial", data = downtrain)
summary(model)

#fitting the model
pscl::pR2(model)["McFadden"]
#checking for variable importance
caret::varImp(model)
#calculate the VIF values of each variable in the model to see if multicollinearity is a problem.
car::vif(model)

predicted <- predict(model, test, type="response")
summary(predicted)

#to check how well our model perform
optimal <- optimalCutoff(test$drug_administered, predicted)[1]
optimal
#confuxion matrix
confusionMatrix(test$drug_administered,predicted)
#specificity & sensitivity
sensitivity(test$drug_administered,predicted)*100
#specificity calculation
specificity(test$administered,predicted)*100
#error calculation
miscla <- misClassError(test$administered,predicted,threshold = optimal)
miscla*100

#overall accuracy of the model
(1-miscla)*100
#plot the ROCcurve
plotROC(test$administered,predicted)

write_rds(model,"model_patients.rds")

上面的脚本是用 R 构建的逻辑回归模型。我想使用管道工将其部署到我的本地服务器。我还为管道工创建了另一个脚本。管道工的脚本实际上并没有做任何应该做的预测。我有什么不对劲的地方。

原文

library(tidyverse)
library(caret)
library(InformationValue)
library(epicalc)
library(lubridate)
library(Boruta)

df <- read_csv("dataset.csv")
sum(is.na(df))
str(df)
names(df)

#convert to R format
df$sex <- as.factor(df$sex)
df$drug_administered <- as.factor(df$drug_administered)
df$pregnancy_status <- as.factor(df$pregnancy_status)
df$symtoms_status <- as.factor(df$symtoms_status)
df$marital_status <- as.factor(df$marital_status)
df$education_level <- as.factor(df$education_level)
df$occupation <- as.factor(df$occupation)

#scale numeric variables
df$current_age <- scale(df$current_age)
df$weight <- scale(df$weight)

df$drug_administered <- ifelse(df$drug_administered=="Yes",1,0)

#checking proportion of active and inactive patients
table(df$drug_administered)

#create training and testing model

set.seed(100)
sample <- sample(c(TRUE,FALSE),nrow(df),replace = TRUE,prob = c(0.8,0.2))
train <- df[sample,]
test <- df[!sample,]

#checking the proportion 
table(train$drug_administered)

#to correct the imbalance
set.seed(100)
options(scipen = 999)
downtrain <- downSample(x=train[,colnames(train)%in% "drug_adminstered"],y=train$administered)
downtrain <- subset(downtrain,select = (-drug_administered))

#model
model <- glm(drug_administered~.,family = "binomial", data = downtrain)
summary(model)

#fitting the model
pscl::pR2(model)["McFadden"]
#checking for variable importance
caret::varImp(model)
#calculate the VIF values of each variable in the model to see if multicollinearity is a problem.
car::vif(model)

predicted <- predict(model, test, type="response")
summary(predicted)

#to check how well our model perform
optimal <- optimalCutoff(test$drug_administered, predicted)[1]
optimal
#confuxion matrix
confusionMatrix(test$drug_administered,predicted)
#specificity & sensitivity
sensitivity(test$drug_administered,predicted)*100
#specificity calculation
specificity(test$administered,predicted)*100
#error calculation
miscla <- misClassError(test$administered,predicted,threshold = optimal)
miscla*100

#overall accuracy of the model
(1-miscla)*100
#plot the ROCcurve
plotROC(test$administered,predicted)

write_rds(model,"model_patients.rds")

This script above is the logistic regression model built in R. I want to use plumber to deploy this to my local server. I also created another script for plumber. The script for plumber is actually not doing any prediction that is meant to do. What is it that I am not getting right.

分享到QQ

分享到微博