GLMM:标准化与实尺度变量表示模型

发布于 2025-01-17 11:14:02 字数 2567 浏览 2 评论 0原文

我在 glmm 调整之前标准化了输入变量,但在最终图中,我的变量和预测值的实际规模存在问题。在我的示例中:

temp_glmm

我做:

#Packages
library(lme4)
library(ggplot2)
library(ggeffects)
library(tidyverse)
library(bbmle) 
library(broom)

#Open my dataset
myds<-read.csv("https://raw.githubusercontent.com/Leprechault/trash/main/ds.desenvol.csv")
str(myds)
# 'data.frame': 400 obs. of  4 variables:
#  $ temp       : num  0 0 0 0 0 0 0 0 0 0 ...
#  $ storage    : int  5 5 5 5 5 5 5 5 5 5 ...
#  $ rep        : chr  "r1" "r2" "r3" "r4" ...
#  $ development: int  0 23 22 27 24 25 24 22 0 22 ...

# Storage (days) is temporally correlated with temperature then mixed model
ds.scale<- myds %>%
  mutate(across(c(temp, storage), ~ drop(scale(.))))

# Models creation Poisson/Negative binomial
m_1 <- glmer(development ~ temp + storage +
               (1 | storage ), data = ds.scale, 
                 family = "poisson")
m_2 <- glmer(development ~ poly(temp,2) + storage +
               (1 | storage ), data = ds.scale, 
                 family = "poisson")  
m_3 <- glmer(development ~ poly(temp,2) + poly(storage,2) +
               (1 | storage ), data = ds.scale, 
                 family = "poisson")  
m_4 <- glmer.nb(development ~ temp + storage +
               (1 | storage ), data = ds.scale)
m_5 <- glmer.nb(development ~ poly(temp,2) + storage +
               (1 | storage ), data = ds.scale)  
m_6 <- glmer.nb(development ~ poly(temp,2) + poly(storage,2) +
               (1 | storage ), data = ds.scale)   
modList <- tibble::lst(m_1,m_2,m_3,m_4,m_5,m_6)
bbmle::AICtab(modList)

#     dAIC df
# m_6  0.0 7 
# m_3  1.0 6 
# m_5  3.3 6 
# m_2  5.0 5 
# m_4 17.9 5 
# m_1 21.0 4

# Plot the results for my better model (m_6)
mydf <- ggpredict(m_6, terms = c("temp [all]", "storage[all]"))

# For temp
ggplot(mydf, aes(x, predicted)) +
  geom_point(data=myds, aes(temp, development), alpha = 0.5) + 
  geom_line() +
  labs(x = "temp", y = "development")

# For storage
ggplot(mydf, aes(x, predicted)) +
  geom_point(data=myds, aes(storage, development), alpha = 0.5) + 
  geom_line() +
  labs(x = "storage", y = "development")
# -------------------------------------------------------------------------------------------  

但我希望在我更好的模型中表示我的 tempstorage 变量的原始规模(m_6)。 正确的做法是什么? 尽管有警告,但不要标准化我的输入变量(模型几乎无法识别:非常大的特征值 - 重新缩放变量?)? 最后进行一些数据转换?

请问有什么帮助吗?

I standardized my input variables before glmm adjustments but in the final plot I have a problem with the real-world scale of my variables and the predicted values. In my example:

temp_glmm

I make:

#Packages
library(lme4)
library(ggplot2)
library(ggeffects)
library(tidyverse)
library(bbmle) 
library(broom)

#Open my dataset
myds<-read.csv("https://raw.githubusercontent.com/Leprechault/trash/main/ds.desenvol.csv")
str(myds)
# 'data.frame': 400 obs. of  4 variables:
#  $ temp       : num  0 0 0 0 0 0 0 0 0 0 ...
#  $ storage    : int  5 5 5 5 5 5 5 5 5 5 ...
#  $ rep        : chr  "r1" "r2" "r3" "r4" ...
#  $ development: int  0 23 22 27 24 25 24 22 0 22 ...

# Storage (days) is temporally correlated with temperature then mixed model
ds.scale<- myds %>%
  mutate(across(c(temp, storage), ~ drop(scale(.))))

# Models creation Poisson/Negative binomial
m_1 <- glmer(development ~ temp + storage +
               (1 | storage ), data = ds.scale, 
                 family = "poisson")
m_2 <- glmer(development ~ poly(temp,2) + storage +
               (1 | storage ), data = ds.scale, 
                 family = "poisson")  
m_3 <- glmer(development ~ poly(temp,2) + poly(storage,2) +
               (1 | storage ), data = ds.scale, 
                 family = "poisson")  
m_4 <- glmer.nb(development ~ temp + storage +
               (1 | storage ), data = ds.scale)
m_5 <- glmer.nb(development ~ poly(temp,2) + storage +
               (1 | storage ), data = ds.scale)  
m_6 <- glmer.nb(development ~ poly(temp,2) + poly(storage,2) +
               (1 | storage ), data = ds.scale)   
modList <- tibble::lst(m_1,m_2,m_3,m_4,m_5,m_6)
bbmle::AICtab(modList)

#     dAIC df
# m_6  0.0 7 
# m_3  1.0 6 
# m_5  3.3 6 
# m_2  5.0 5 
# m_4 17.9 5 
# m_1 21.0 4

# Plot the results for my better model (m_6)
mydf <- ggpredict(m_6, terms = c("temp [all]", "storage[all]"))

# For temp
ggplot(mydf, aes(x, predicted)) +
  geom_point(data=myds, aes(temp, development), alpha = 0.5) + 
  geom_line() +
  labs(x = "temp", y = "development")

# For storage
ggplot(mydf, aes(x, predicted)) +
  geom_point(data=myds, aes(storage, development), alpha = 0.5) + 
  geom_line() +
  labs(x = "storage", y = "development")
# -------------------------------------------------------------------------------------------  

But I´d like the original scale of my temp and storage variables represented in my better model (m_6).
What is the correct approach for this?
Do not standardise my input variables, despite the warnings (Model is nearly unidentifiable: very large eigenvalue - Rescale variables?)?
Some data transformation at the end?

Please, any help with it?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

转身以后 2025-01-24 11:14:02

基本思想是将非标准化变量的范围值与标准化变量的范围值进行映射,然后使用 ggplot 中的scale_x_...() 来更改标签。

library(datawizard)
library(lme4)
library(ggeffects)
library(ggplot2)

myds <- read.csv("https://raw.githubusercontent.com/Leprechault/trash/main/ds.desenvol.csv")
d.scale <- standardize(myds, select = c("temp", "storage"))
m_6 <- glmer.nb(development ~ poly(temp,2) + poly(storage,2) + (1 | storage ), data = d.scale)


# for temp
mydf <- ggpredict(m_6, terms = "temp [all]")

# retrieve center and scale from standardization
center_temp <- attributes(d.scale)$center["temp"]
scale_temp <- attributes(d.scale)$scale["temp"]

# scaled range, calculate back to range of unstandardized
scaled_range <- c(-1, 0, 1, 2)
new_range <- round(scaled_range * scale_temp + center_temp)

# scaled range
plot(mydf, add.data = TRUE)

plot1

# original range
plot(mydf, add.data = TRUE) +
  scale_x_continuous(
    breaks = scaled_range,
    labels = new_range
  )

plot2
# 用于存储
mydf <- ggpredict(m_6, terms = "存储[全部]")

# retrieve center and scale from standardization
center_storage <- attributes(d.scale)$center["storage"]
scale_storage <- attributes(d.scale)$scale["storage"]

# scaled range, calculate back to range of unstandardized
scaled_range <- c(-1, 0, 1)
new_range <- round(scaled_range * scale_storage + center_storage)

# scaled range
plot(mydf, add.data = TRUE)

plot3

# original range
plot(mydf, add.data = TRUE) +
  scale_x_continuous(
    breaks = scaled_range,
    labels = new_range
  )

plot4

The basic idea is to map the values of the range from the unstandardized variable with those from the standardized one, and then use scale_x_...() from ggplot to change the labels.

library(datawizard)
library(lme4)
library(ggeffects)
library(ggplot2)

myds <- read.csv("https://raw.githubusercontent.com/Leprechault/trash/main/ds.desenvol.csv")
d.scale <- standardize(myds, select = c("temp", "storage"))
m_6 <- glmer.nb(development ~ poly(temp,2) + poly(storage,2) + (1 | storage ), data = d.scale)


# for temp
mydf <- ggpredict(m_6, terms = "temp [all]")

# retrieve center and scale from standardization
center_temp <- attributes(d.scale)$center["temp"]
scale_temp <- attributes(d.scale)$scale["temp"]

# scaled range, calculate back to range of unstandardized
scaled_range <- c(-1, 0, 1, 2)
new_range <- round(scaled_range * scale_temp + center_temp)

# scaled range
plot(mydf, add.data = TRUE)

plot1

# original range
plot(mydf, add.data = TRUE) +
  scale_x_continuous(
    breaks = scaled_range,
    labels = new_range
  )

plot2
# for storage
mydf <- ggpredict(m_6, terms = "storage [all]")

# retrieve center and scale from standardization
center_storage <- attributes(d.scale)$center["storage"]
scale_storage <- attributes(d.scale)$scale["storage"]

# scaled range, calculate back to range of unstandardized
scaled_range <- c(-1, 0, 1)
new_range <- round(scaled_range * scale_storage + center_storage)

# scaled range
plot(mydf, add.data = TRUE)

plot3

# original range
plot(mydf, add.data = TRUE) +
  scale_x_continuous(
    breaks = scaled_range,
    labels = new_range
  )

plot4

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文