R2WinBUGS - 使用模拟数据进行逻辑回归

发布于 2024-12-18 01:35:42 字数 1490 浏览 1 评论 0原文

我只是想知道是否有人有一些使用 R2WinBUGS 包来运行逻辑回归的 R 代码 - 理想情况下使用模拟数据来生成“真相”和两个连续协变量。

谢谢。

Christian

PS：

生成人工数据（一维情况）并通过 r2winbugs 运行 winbugs 的潜在代码（尚未工作）。

library(MASS)
library(R2WinBUGS)

setwd("d:/BayesianLogisticRegression")

n.site <- 150

X1<- sort(runif(n = n.site, min = -1, max =1))

xb <- 0.0 + 3.0*X1 

occ.prob <- 1/(1+exp(-xb))

plot(X1, occ.prob,xlab="X1",ylab="occ.prob")

true.presence <- rbinom(n = n.site, size = 1, prob = occ.prob)

plot(X1, true.presence,xlab="X1",ylab="true.presence")

# combine data as data frame and save
data <- data.frame(X1, true.presence)
write.matrix(data, file = "data.txt", sep = "\t")

sink("model.txt")
cat("
model {

# Priors
 alpha ~ dnorm(0,0.01)
 beta ~ dnorm(0,0.01)

# Likelihood
 for (i in 1:n) {
    C[i] ~ dbin(p[i], N)        # Note p before N
    logit(p[i]) <- alpha + beta *X1[i]
 }
}
",fill=TRUE)
sink()

# Bundle data
win.data <- list(mass = X1, n = length(X1))

# Inits function
inits <- function(){ list(alpha=rlnorm(1), beta=rlnorm(1))}

# Parameters to estimate
params <- c("alpha", "beta")

# MCMC settings
nc <- 3 #Number of Chains
ni <- 1200 #Number of draws from posterior
nb <- 200 #Number of draws to discard as burn-in
nt <- 2 Thinning rate

# Start Gibbs sampling
out <- bugs(data=win.data, inits=inits, parameters.to.save=params, 
model.file="model.txt", n.thin=nt, n.chains=nc, n.burnin=nb, 
n.iter=ni, debug = TRUE)

原文

I am just wondering whether anyone has some R code that uses the package R2WinBUGS to run logistic regression - ideally with simulated data to generate the 'truth' and two continous co-variates.

Thanks.

Christian

PS:

Potential code to generate artificial data (one dimensional case) and run winbugs via r2winbugs (it does not work yet).

library(MASS)
library(R2WinBUGS)

setwd("d:/BayesianLogisticRegression")

n.site <- 150

X1<- sort(runif(n = n.site, min = -1, max =1))

xb <- 0.0 + 3.0*X1 

occ.prob <- 1/(1+exp(-xb))

plot(X1, occ.prob,xlab="X1",ylab="occ.prob")

true.presence <- rbinom(n = n.site, size = 1, prob = occ.prob)

plot(X1, true.presence,xlab="X1",ylab="true.presence")

# combine data as data frame and save
data <- data.frame(X1, true.presence)
write.matrix(data, file = "data.txt", sep = "\t")

sink("model.txt")
cat("
model {

# Priors
 alpha ~ dnorm(0,0.01)
 beta ~ dnorm(0,0.01)

# Likelihood
 for (i in 1:n) {
    C[i] ~ dbin(p[i], N)        # Note p before N
    logit(p[i]) <- alpha + beta *X1[i]
 }
}
",fill=TRUE)
sink()

# Bundle data
win.data <- list(mass = X1, n = length(X1))

# Inits function
inits <- function(){ list(alpha=rlnorm(1), beta=rlnorm(1))}

# Parameters to estimate
params <- c("alpha", "beta")

# MCMC settings
nc <- 3 #Number of Chains
ni <- 1200 #Number of draws from posterior
nb <- 200 #Number of draws to discard as burn-in
nt <- 2 Thinning rate

# Start Gibbs sampling
out <- bugs(data=win.data, inits=inits, parameters.to.save=params, 
model.file="model.txt", n.thin=nt, n.chains=nc, n.burnin=nb, 
n.iter=ni, debug = TRUE)

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

箜明 2024-12-25 01:35:42

您的脚本正是这样做的方法。它几乎可以工作了，只需要进行一项简单的更改即可使其工作：

win.data <- list(X1 = X1, n = length(X1), C = true.presence, N = 1)

定义哪些数据进入 WinBugs。变量C必须填写true.presence，根据你生成的数据，N必须为1 - 注意，这是N = 1的二项式分布的特殊情况，称为伯努利 - 一个简单的“硬币翻转”。

这是输出：

> print(out, dig = 3)
Inference for Bugs model at "model.txt", fit using WinBUGS,
 3 chains, each with 1200 iterations (first 200 discarded), n.thin = 2
 n.sims = 1500 iterations saved
            mean    sd    2.5%     25%     50%     75%   97.5%  Rhat n.eff
alpha     -0.040 0.221  -0.465  -0.187  -0.037   0.114   0.390 1.001  1500
beta       3.177 0.478   2.302   2.845   3.159   3.481   4.165 1.000  1500
deviance 136.438 2.059 134.500 135.000 135.800 137.200 141.852 1.001  1500

如您所见，参数对应于用于生成数据的参数。另外，如果您与频率论解决方案进行比较，您会发现它是对应的。

编辑：但是典型的逻辑（~二项式）回归会用一些上限值 N[i] 来测量一些计数，并且它允许每个观察有不同的 N[i]。例如，青少年占总人口的比例（N）。这只需要在模型中向 N 添加索引：

C[i] ~ dbin(p[i], N[i])

数据生成将类似于：

N = rpois(n = n.site, lambda = 50) 
juveniles <- rbinom(n = n.site, size = N, prob = occ.prob)
win.data <- list(X1 = X1, n = length(X1), C = juveniles, N = N)

（编辑结束）

有关种群生态学的更多示例，请参阅 Marc Kéry 的书籍（生态学家的 WinBUGS 简介，特别是使用贝叶斯人口分析WinBUGS：分层视角，这是一本很棒的书）。

我使用的完整脚本 - 此处列出了您的更正脚本（与最后的频率论解决方案进行比较）：

#library(MASS)
library(R2WinBUGS)

#setwd("d:/BayesianLogisticRegression")

n.site <- 150

X1<- sort(runif(n = n.site, min = -1, max =1))

xb <- 0.0 + 3.0*X1 

occ.prob <- 1/(1+exp(-xb)) # inverse logit

plot(X1, occ.prob,xlab="X1",ylab="occ.prob")

true.presence <- rbinom(n = n.site, size = 1, prob = occ.prob)

plot(X1, true.presence,xlab="X1",ylab="true.presence")

# combine data as data frame and save
data <- data.frame(X1, true.presence)
write.matrix(data, file = "data.txt", sep = "\t")

sink("tmp_bugs/model.txt")
cat("
model {

# Priors
 alpha ~ dnorm(0,0.01)
 beta ~ dnorm(0,0.01)

# Likelihood
 for (i in 1:n) {
    C[i] ~ dbin(p[i], N)        # Note p before N
    logit(p[i]) <- alpha + beta *X1[i]
 }
}
",fill=TRUE)
sink()

# Bundle data
win.data <- list(X1 = X1, n = length(X1), C = true.presence, N = 1)

# Inits function
inits <- function(){ list(alpha=rlnorm(1), beta=rlnorm(1))}

# Parameters to estimate
params <- c("alpha", "beta")

# MCMC settings
nc <- 3 #Number of Chains
ni <- 1200 #Number of draws from posterior
nb <- 200 #Number of draws to discard as burn-in
nt <- 2 #Thinning rate

# Start Gibbs sampling
out <- bugs(data=win.data, inits=inits, parameters.to.save=params, 
model.file="model.txt", n.thin=nt, n.chains=nc, n.burnin=nb, 
n.iter=ni, 
working.directory = paste(getwd(), "/tmp_bugs/", sep = ""),
debug = TRUE)

print(out, dig = 3)

# Frequentist approach for comparison
m1 = glm(true.presence ~ X1, family = binomial)
summary(m1)

# normally, you should do it this way, but the above also works:
#m2 = glm(cbind(true.presence, 1 - true.presence) ~ X1, family = binomial)

Your script is exactly the way to do it. It is almost working, it just required one simple change to make it work:

win.data <- list(X1 = X1, n = length(X1), C = true.presence, N = 1)

Which defines which data go to WinBugs. The variable C must be filled with true.presence, N must be 1 according to the data you generated - note that this is a special case of binomial distribution for N = 1, which is called Bernoulli - a simple "coin flip".

Here is the output:

> print(out, dig = 3)
Inference for Bugs model at "model.txt", fit using WinBUGS,
 3 chains, each with 1200 iterations (first 200 discarded), n.thin = 2
 n.sims = 1500 iterations saved
            mean    sd    2.5%     25%     50%     75%   97.5%  Rhat n.eff
alpha     -0.040 0.221  -0.465  -0.187  -0.037   0.114   0.390 1.001  1500
beta       3.177 0.478   2.302   2.845   3.159   3.481   4.165 1.000  1500
deviance 136.438 2.059 134.500 135.000 135.800 137.200 141.852 1.001  1500

as you see, the parameters correspond to the parameters used to generate the data. Also, if you compare with the frequentist solution, you see it corresponds.

EDIT: but the typical logistic (~ binomial) regression would measure some counts with some upper value N[i], and it would allow for different N[i] for each observation. For example say the proportion of juveniles to the whole population (N). This would require just to add index to N in your model:

C[i] ~ dbin(p[i], N[i])

The data generation would look something like:

N = rpois(n = n.site, lambda = 50) 
juveniles <- rbinom(n = n.site, size = N, prob = occ.prob)
win.data <- list(X1 = X1, n = length(X1), C = juveniles, N = N)

(end of edit)

For more examples from population ecology see books of Marc Kéry (Introduction to WinBUGS for ecologist, and especially Bayesian Population Analysis using WinBUGS: A hierarchical perspective, which is a great book).

The complete script I used - the corrected script of yours is listed here (comparison with frequentist solution at the end):

#library(MASS)
library(R2WinBUGS)

#setwd("d:/BayesianLogisticRegression")

n.site <- 150

X1<- sort(runif(n = n.site, min = -1, max =1))

xb <- 0.0 + 3.0*X1 

occ.prob <- 1/(1+exp(-xb)) # inverse logit

plot(X1, occ.prob,xlab="X1",ylab="occ.prob")

true.presence <- rbinom(n = n.site, size = 1, prob = occ.prob)

plot(X1, true.presence,xlab="X1",ylab="true.presence")

# combine data as data frame and save
data <- data.frame(X1, true.presence)
write.matrix(data, file = "data.txt", sep = "\t")

sink("tmp_bugs/model.txt")
cat("
model {

# Priors
 alpha ~ dnorm(0,0.01)
 beta ~ dnorm(0,0.01)

# Likelihood
 for (i in 1:n) {
    C[i] ~ dbin(p[i], N)        # Note p before N
    logit(p[i]) <- alpha + beta *X1[i]
 }
}
",fill=TRUE)
sink()

# Bundle data
win.data <- list(X1 = X1, n = length(X1), C = true.presence, N = 1)

# Inits function
inits <- function(){ list(alpha=rlnorm(1), beta=rlnorm(1))}

# Parameters to estimate
params <- c("alpha", "beta")

# MCMC settings
nc <- 3 #Number of Chains
ni <- 1200 #Number of draws from posterior
nb <- 200 #Number of draws to discard as burn-in
nt <- 2 #Thinning rate

# Start Gibbs sampling
out <- bugs(data=win.data, inits=inits, parameters.to.save=params, 
model.file="model.txt", n.thin=nt, n.chains=nc, n.burnin=nb, 
n.iter=ni, 
working.directory = paste(getwd(), "/tmp_bugs/", sep = ""),
debug = TRUE)

print(out, dig = 3)

# Frequentist approach for comparison
m1 = glm(true.presence ~ X1, family = binomial)
summary(m1)

# normally, you should do it this way, but the above also works:
#m2 = glm(cbind(true.presence, 1 - true.presence) ~ X1, family = binomial)

回复收藏 0 原文

~没有更多了~