在图上添加回归线方程和 R^2

发布于 12-06 14:08 字数 342 浏览 1 评论 0原文

我想知道如何在 ggplot 上添加回归线方程和 R^2。我的代码是:

library(ggplot2)

df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
p <- ggplot(data = df, aes(x = x, y = y)) +
            geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
            geom_point()
p

任何帮助将不胜感激。

I wonder how to add regression line equation and R^2 on the ggplot. My code is:

library(ggplot2)

df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
p <- ggplot(data = df, aes(x = x, y = y)) +
            geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
            geom_point()
p

Any help will be highly appreciated.

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(10

笑饮青盏花2024-12-13 14:08:23

这是一种解决方案

# GET EQUATION AND R-SQUARED AS STRING
# SOURCE: https://groups.google.com/forum/#!topic/ggplot2/1TgH-kG5XMA

lm_eqn <- function(df){
    m <- lm(y ~ x, df);
    eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
         list(a = format(unname(coef(m)[1]), digits = 2),
              b = format(unname(coef(m)[2]), digits = 2),
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}

p1 <- p + geom_text(x = 25, y = 300, label = lm_eqn(df), parse = TRUE)

编辑。我从我选择这段代码的地方找出了来源。以下是 ggplot2 google 群组中原始帖子的链接

输出

Here is one solution

# GET EQUATION AND R-SQUARED AS STRING
# SOURCE: https://groups.google.com/forum/#!topic/ggplot2/1TgH-kG5XMA

lm_eqn <- function(df){
    m <- lm(y ~ x, df);
    eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
         list(a = format(unname(coef(m)[1]), digits = 2),
              b = format(unname(coef(m)[2]), digits = 2),
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}

p1 <- p + geom_text(x = 25, y = 300, label = lm_eqn(df), parse = TRUE)

EDIT. I figured out the source from where I picked this code. Here is the link to the original post in the ggplot2 google groups

Output

错々过的事2024-12-13 14:08:23

我的包中的统计 stat_poly_eq() ggpmisc< /a> 可以根据线性模型拟合向绘图添加文本标签。 (统计 stat_ma_eq()stat_quant_eq() 工作方式类似,分别支持主轴回归和分位数回归。每个 eq 统计数据都有一个匹配的 < em>线条绘图统计。)

我已经更新了“ggpmisc”(>= 0.5.0)和“ggplot2”(>= 3.4.0)的答案2023年3月30日。主要变化是使用“ggpmisc”(==0.5.0) 中添加的函数 use_label() 来组装标签及其映射。尽管 aes()after_stat() 的使用保持不变,但 use_label() 使映射编码和标签组装更加简单。

在示例中,我使用 stat_poly_line() 而不是 stat_smooth(),因为它与 method< 的 stat_poly_eq() 具有相同的默认值/code> 和公式。我在所有代码示例中省略了 stat_poly_line() 的附加参数,因为它们与添加标签的问题无关。

library(ggplot2)
library(ggpmisc)
#> Loading required package: ggpp
#> 
#> Attaching package: 'ggpp'
#> The following object is masked from 'package:ggplot2':
#> 
#>     annotate

# artificial data
df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
df$yy <- 2 + 3 * df$x + 0.1 * df$x^2 + rnorm(100, sd = 40)

# using default formula, label and methods
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq() +
  geom_point()


# assembling a single label with equation and R2
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(use_label(c("eq", "R2"))) +
  geom_point()

"">


# assembling a single label with equation, adjusted R2, F-value, n, P-value
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(use_label(c("eq", "adj.R2", "f", "p", "n"))) +
  geom_point()


# assembling a single label with R2, its confidence interval, and n
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(use_label(c("R2", "R2.confint", "n"))) +
  geom_point()


# adding separate labels with equation and R2
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(use_label("eq")) +
  stat_poly_eq(label.y = 0.9) +
  geom_point()


# regression through the origin
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line(formula = y ~ x + 0) +
  stat_poly_eq(use_label("eq"),
               formula = y ~ x + 0) +
  geom_point()


# fitting a polynomial
ggplot(data = df, aes(x = x, y = yy)) +
  stat_poly_line(formula = y ~ poly(x, 2, raw = TRUE)) +
  stat_poly_eq(formula = y ~ poly(x, 2, raw = TRUE), use_label("eq")) +
  geom_point()


# adding a hat as asked by @MYaseen208 and @elarry
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(eq.with.lhs = "italic(hat(y))~`=`~",
               use_label(c("eq", "R2"))) +
  geom_point()


# variable substitution as asked by @shabbychef
# same labels in equation and axes
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(eq.with.lhs = "italic(h)~`=`~",
               eq.x.rhs = "~italic(z)",
               use_label("eq")) +
  labs(x = expression(italic(z)), y = expression(italic(h))) +
  geom_point()


# grouping as asked by @helen.h
dfg <- data.frame(x = c(1:100))
dfg$y <- 20 * c(0, 1) + 3 * df$x + rnorm(100, sd = 40)
dfg$group <- factor(rep(c("A", "B"), 50))

ggplot(data = dfg, aes(x = x, y = y, colour = group)) +
  stat_poly_line() +
  stat_poly_eq(use_label(c("eq", "R2"))) +
  geom_point()


# A group label is available, for grouped data
ggplot(data = dfg, aes(x = x, y = y, linetype = group, grp.label = group)) +
  stat_poly_line() +
  stat_poly_eq(use_label(c("grp", "eq", "R2"))) +
  geom_point()


# use_label() makes it easier to create the mappings, but when more
# flexibility is needed like different separators at different positions,
# as shown here, aes() has to be used instead of use_label().
ggplot(data = dfg, aes(x = x, y = y, linetype = group, grp.label = group)) +
  stat_poly_line() +
  stat_poly_eq(aes(label = paste(after_stat(grp.label), "*\": \"*",
                                 after_stat(eq.label), "*\", \"*",
                                 after_stat(rr.label), sep = ""))) +
  geom_point()


# a single fit with grouped data as asked by @Herman
ggplot(data = dfg, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(use_label(c("eq", "R2"))) +
  geom_point(aes(colour = group))


# facets
ggplot(data = dfg, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(use_label(c("eq", "R2"))) +
  geom_point() +
  facet_wrap(~group)

已创建于 2023 年 3 月 30 日使用 reprex v2.0.2

Statistic stat_poly_eq() in my package ggpmisc makes it possible to add text labels to plots based on a linear model fit. (Statistics stat_ma_eq() and stat_quant_eq() work similarly and support major axis regression and quantile regression, respectively. Each eq stat has a matching line drawing stat.)

I have updated this answer for 'ggpmisc' (>= 0.5.0) and 'ggplot2' (>= 3.4.0) on 2023-03-30. The main change is the assembly of labels and their mapping using function use_label() added in 'ggpmisc' (==0.5.0). Although use of aes() and after_stat() remains unchanged, use_label() makes coding of mappings and assembly of labels simpler.

In the examples I use stat_poly_line() instead of stat_smooth() as it has the same defaults as stat_poly_eq() for method and formula. I have omitted in all code examples the additional arguments to stat_poly_line() as they are irrelevant to the question of adding labels.

library(ggplot2)
library(ggpmisc)
#> Loading required package: ggpp
#> 
#> Attaching package: 'ggpp'
#> The following object is masked from 'package:ggplot2':
#> 
#>     annotate

# artificial data
df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
df$yy <- 2 + 3 * df$x + 0.1 * df$x^2 + rnorm(100, sd = 40)

# using default formula, label and methods
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq() +
  geom_point()


# assembling a single label with equation and R2
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(use_label(c("eq", "R2"))) +
  geom_point()


# assembling a single label with equation, adjusted R2, F-value, n, P-value
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(use_label(c("eq", "adj.R2", "f", "p", "n"))) +
  geom_point()


# assembling a single label with R2, its confidence interval, and n
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(use_label(c("R2", "R2.confint", "n"))) +
  geom_point()


# adding separate labels with equation and R2
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(use_label("eq")) +
  stat_poly_eq(label.y = 0.9) +
  geom_point()


# regression through the origin
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line(formula = y ~ x + 0) +
  stat_poly_eq(use_label("eq"),
               formula = y ~ x + 0) +
  geom_point()


# fitting a polynomial
ggplot(data = df, aes(x = x, y = yy)) +
  stat_poly_line(formula = y ~ poly(x, 2, raw = TRUE)) +
  stat_poly_eq(formula = y ~ poly(x, 2, raw = TRUE), use_label("eq")) +
  geom_point()


# adding a hat as asked by @MYaseen208 and @elarry
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(eq.with.lhs = "italic(hat(y))~`=`~",
               use_label(c("eq", "R2"))) +
  geom_point()


# variable substitution as asked by @shabbychef
# same labels in equation and axes
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(eq.with.lhs = "italic(h)~`=`~",
               eq.x.rhs = "~italic(z)",
               use_label("eq")) +
  labs(x = expression(italic(z)), y = expression(italic(h))) +
  geom_point()


# grouping as asked by @helen.h
dfg <- data.frame(x = c(1:100))
dfg$y <- 20 * c(0, 1) + 3 * df$x + rnorm(100, sd = 40)
dfg$group <- factor(rep(c("A", "B"), 50))

ggplot(data = dfg, aes(x = x, y = y, colour = group)) +
  stat_poly_line() +
  stat_poly_eq(use_label(c("eq", "R2"))) +
  geom_point()


# A group label is available, for grouped data
ggplot(data = dfg, aes(x = x, y = y, linetype = group, grp.label = group)) +
  stat_poly_line() +
  stat_poly_eq(use_label(c("grp", "eq", "R2"))) +
  geom_point()


# use_label() makes it easier to create the mappings, but when more
# flexibility is needed like different separators at different positions,
# as shown here, aes() has to be used instead of use_label().
ggplot(data = dfg, aes(x = x, y = y, linetype = group, grp.label = group)) +
  stat_poly_line() +
  stat_poly_eq(aes(label = paste(after_stat(grp.label), "*\": \"*",
                                 after_stat(eq.label), "*\", \"*",
                                 after_stat(rr.label), sep = ""))) +
  geom_point()


# a single fit with grouped data as asked by @Herman
ggplot(data = dfg, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(use_label(c("eq", "R2"))) +
  geom_point(aes(colour = group))


# facets
ggplot(data = dfg, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(use_label(c("eq", "R2"))) +
  geom_point() +
  facet_wrap(~group)

Created on 2023-03-30 with reprex v2.0.2

梦萦几度2024-12-13 14:08:23

我更改了 stat_smooth 源代码和相关函数的几行,以创建一个添加拟合方程和 R 平方值的新函数。这也适用于小平面图!

library(devtools)
source_gist("524eade46135f6348140")
df = data.frame(x = c(1:100))
df$y = 2 + 5 * df$x + rnorm(100, sd = 40)
df$class = rep(1:2,50)
ggplot(data = df, aes(x = x, y = y, label=y)) +
  stat_smooth_func(geom="text",method="lm",hjust=0,parse=TRUE) +
  geom_smooth(method="lm",se=FALSE) +
  geom_point() + facet_wrap(~class)

在此处输入图像描述

我使用@Ramnath 的答案中的代码来格式化方程。 stat_smooth_func 函数不是很强大,但使用它应该不难。

https://gist.github.com/kdauria/524eade46135f6348140。如果出现错误,请尝试更新 ggplot2。

I changed a few lines of the source of stat_smooth and related functions to make a new function that adds the fit equation and R squared value. This will work on facet plots too!

library(devtools)
source_gist("524eade46135f6348140")
df = data.frame(x = c(1:100))
df$y = 2 + 5 * df$x + rnorm(100, sd = 40)
df$class = rep(1:2,50)
ggplot(data = df, aes(x = x, y = y, label=y)) +
  stat_smooth_func(geom="text",method="lm",hjust=0,parse=TRUE) +
  geom_smooth(method="lm",se=FALSE) +
  geom_point() + facet_wrap(~class)

enter image description here

I used the code in @Ramnath's answer to format the equation. The stat_smooth_func function isn't very robust, but it shouldn't be hard to play around with it.

https://gist.github.com/kdauria/524eade46135f6348140. Try updating ggplot2 if you get an error.

深陷2024-12-13 14:08:23

我已将 Ramnath 的帖子修改为 a) 使其更加通用,因此它接受线性模型作为参数而不是数据框,b) 更适当地显示负数。

lm_eqn = function(m) {

  l <- list(a = format(coef(m)[1], digits = 2),
      b = format(abs(coef(m)[2]), digits = 2),
      r2 = format(summary(m)$r.squared, digits = 3));

  if (coef(m)[2] >= 0)  {
    eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,l)
  } else {
    eq <- substitute(italic(y) == a - b %.% italic(x)*","~~italic(r)^2~"="~r2,l)    
  }

  as.character(as.expression(eq));                 
}

用法将更改为:

p1 = p + geom_text(aes(x = 25, y = 300, label = lm_eqn(lm(y ~ x, df))), parse = TRUE)

I've modified Ramnath's post to a) make more generic so it accepts a linear model as a parameter rather than the data frame and b) displays negatives more appropriately.

lm_eqn = function(m) {

  l <- list(a = format(coef(m)[1], digits = 2),
      b = format(abs(coef(m)[2]), digits = 2),
      r2 = format(summary(m)$r.squared, digits = 3));

  if (coef(m)[2] >= 0)  {
    eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,l)
  } else {
    eq <- substitute(italic(y) == a - b %.% italic(x)*","~~italic(r)^2~"="~r2,l)    
  }

  as.character(as.expression(eq));                 
}

Usage would change to:

p1 = p + geom_text(aes(x = 25, y = 300, label = lm_eqn(lm(y ~ x, df))), parse = TRUE)
我很OK2024-12-13 14:08:23

这是给大家的最简单的代码

注意:显示 Pearson 的 Rho 和不是 R^2。

library(ggplot2)
library(ggpubr)

df <- data.frame(x = c(1:100)
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
p <- ggplot(data = df, aes(x = x, y = y)) +
        geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
        geom_point()+
        stat_cor(label.y = 35)+ #this means at 35th unit in the y axis, the r squared and p value will be shown
        stat_regline_equation(label.y = 30) #this means at 30th unit regresion line equation will be shown

p

一个这样的例子使用我自己的数据集

Here's the most simplest code for everyone

Note: Showing Pearson's Rho and not R^2.

library(ggplot2)
library(ggpubr)

df <- data.frame(x = c(1:100)
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
p <- ggplot(data = df, aes(x = x, y = y)) +
        geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
        geom_point()+
        stat_cor(label.y = 35)+ #this means at 35th unit in the y axis, the r squared and p value will be shown
        stat_regline_equation(label.y = 30) #this means at 30th unit regresion line equation will be shown

p

One such example with my own dataset

烟织青萝梦2024-12-13 14:08:23

使用 ggpubr

library(ggpubr)

# reproducible data
set.seed(1)
df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)

# By default showing Pearson R
ggscatter(df, x = "x", y = "y", add = "reg.line") +
  stat_cor(label.y = 300) +
  stat_regline_equation(label.y = 280)

在此处输入图像描述

# Use R2 instead of R
ggscatter(df, x = "x", y = "y", add = "reg.line") +
  stat_cor(label.y = 300, 
           aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~"))) +
  stat_regline_equation(label.y = 280)

## compare R2 with accepted answer
# m <- lm(y ~ x, df)
# round(summary(m)$r.squared, 2)
# [1] 0.85

在此处输入图像描述

Using ggpubr:

library(ggpubr)

# reproducible data
set.seed(1)
df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)

# By default showing Pearson R
ggscatter(df, x = "x", y = "y", add = "reg.line") +
  stat_cor(label.y = 300) +
  stat_regline_equation(label.y = 280)

enter image description here

# Use R2 instead of R
ggscatter(df, x = "x", y = "y", add = "reg.line") +
  stat_cor(label.y = 300, 
           aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~"))) +
  stat_regline_equation(label.y = 280)

## compare R2 with accepted answer
# m <- lm(y ~ x, df)
# round(summary(m)$r.squared, 2)
# [1] 0.85

enter image description here

浅浅2024-12-13 14:08:23

真的很喜欢@Ramnath 解决方案。为了允许使用自定义回归公式(而不是固定为 y 和 x 作为文字变量名称),并将 p 值添加到打印输出中(如 @Jerry T 评论),这里是 mod

lm_eqn <- function(df, y, x){
    formula = as.formula(sprintf('%s ~ %s', y, x))
    m <- lm(formula, data=df);
    # formating the values into a summary string to print out
    # ~ give some space, but equal size and comma need to be quoted
    eq <- substitute(italic(target) == a + b %.% italic(input)*","~~italic(r)^2~"="~r2*","~~p~"="~italic(pvalue), 
         list(target = y,
              input = x,
              a = format(as.vector(coef(m)[1]), digits = 2), 
              b = format(as.vector(coef(m)[2]), digits = 2), 
             r2 = format(summary(m)$r.squared, digits = 3),
             # getting the pvalue is painful
             pvalue = format(summary(m)$coefficients[2,'Pr(>|t|)'], digits=1)
            )
          )
    as.character(as.expression(eq));                 
}

geom_point() +
  ggrepel::geom_text_repel(label=rownames(mtcars)) +
  geom_text(x=3,y=300,label=lm_eqn(mtcars, 'hp','wt'),color='red',parse=T) +
  geom_smooth(method='lm')

: "https://i.sstatic.net/m3G4L.png" rel="noreferrer">在此处输入图像描述< /a>
不幸的是,这不适用于facet_wrap或facet_grid。

really love @Ramnath solution. To allow use to customize the regression formula (instead of fixed as y and x as literal variable names), and added the p-value into the printout as well (as @Jerry T commented), here is the mod:

lm_eqn <- function(df, y, x){
    formula = as.formula(sprintf('%s ~ %s', y, x))
    m <- lm(formula, data=df);
    # formating the values into a summary string to print out
    # ~ give some space, but equal size and comma need to be quoted
    eq <- substitute(italic(target) == a + b %.% italic(input)*","~~italic(r)^2~"="~r2*","~~p~"="~italic(pvalue), 
         list(target = y,
              input = x,
              a = format(as.vector(coef(m)[1]), digits = 2), 
              b = format(as.vector(coef(m)[2]), digits = 2), 
             r2 = format(summary(m)$r.squared, digits = 3),
             # getting the pvalue is painful
             pvalue = format(summary(m)$coefficients[2,'Pr(>|t|)'], digits=1)
            )
          )
    as.character(as.expression(eq));                 
}

geom_point() +
  ggrepel::geom_text_repel(label=rownames(mtcars)) +
  geom_text(x=3,y=300,label=lm_eqn(mtcars, 'hp','wt'),color='red',parse=T) +
  geom_smooth(method='lm')

enter image description here
Unfortunately, this doesn't work with facet_wrap or facet_grid.

软的没边2024-12-13 14:08:23

另一种选择是使用 dplyr 和 broom 库创建一个生成方程的自定义函数:

get_formula <- function(model) {
  
  broom::tidy(model)[, 1:2] %>%
    mutate(sign = ifelse(sign(estimate) == 1, ' + ', ' - ')) %>% #coeff signs
    mutate_if(is.numeric, ~ abs(round(., 2))) %>% #for improving formatting
    mutate(a = ifelse(term == '(Intercept)', paste0('y ~ ', estimate), paste0(sign, estimate, ' * ', term))) %>%
    summarise(formula = paste(a, collapse = '')) %>%
    as.character
  
}

lm(y ~ x, data = df) -> model
get_formula(model)
#"y ~ 6.22 + 3.16 * x"

scales::percent(summary(model)$r.squared, accuracy = 0.01) -> r_squared

现在我们需要将文本添加到图中:

p + 
  geom_text(x = 20, y = 300,
            label = get_formula(model),
            color = 'red') +
  geom_text(x = 20, y = 285,
            label = r_squared,
            color = 'blue')

plot

Another option would be to create a custom function generating the equation using dplyr and broom libraries:

get_formula <- function(model) {
  
  broom::tidy(model)[, 1:2] %>%
    mutate(sign = ifelse(sign(estimate) == 1, ' + ', ' - ')) %>% #coeff signs
    mutate_if(is.numeric, ~ abs(round(., 2))) %>% #for improving formatting
    mutate(a = ifelse(term == '(Intercept)', paste0('y ~ ', estimate), paste0(sign, estimate, ' * ', term))) %>%
    summarise(formula = paste(a, collapse = '')) %>%
    as.character
  
}

lm(y ~ x, data = df) -> model
get_formula(model)
#"y ~ 6.22 + 3.16 * x"

scales::percent(summary(model)$r.squared, accuracy = 0.01) -> r_squared

Now we need to add the text to the plot:

p + 
  geom_text(x = 20, y = 300,
            label = get_formula(model),
            color = 'red') +
  geom_text(x = 20, y = 285,
            label = r_squared,
            color = 'blue')

plot

音栖息无2024-12-13 14:08:23

这个答案中提供的方程样式的启发,一种更通用的方法(多个预测器+乳胶输出作为选项)可以be:

print_equation= function(model, latex= FALSE, ...){
    dots <- list(...)
    cc= model$coefficients
    var_sign= as.character(sign(cc[-1]))%>%gsub("1","",.)%>%gsub("-"," - ",.)
    var_sign[var_sign==""]= ' + '

    f_args_abs= f_args= dots
    f_args$x= cc
    f_args_abs$x= abs(cc)
    cc_= do.call(format, args= f_args)
    cc_abs= do.call(format, args= f_args_abs)
    pred_vars=
        cc_abs%>%
        paste(., x_vars, sep= star)%>%
        paste(var_sign,.)%>%paste(., collapse= "")

    if(latex){
        star= " \\cdot "
        y_var= strsplit(as.character(model$call$formula), "~")[[2]]%>%
            paste0("\\hat{",.,"_{i}}")
        x_vars= names(cc_)[-1]%>%paste0(.,"_{i}")
    }else{
        star= " * "
        y_var= strsplit(as.character(model$call$formula), "~")[[2]]        
        x_vars= names(cc_)[-1]
    }

    equ= paste(y_var,"=",cc_[1],pred_vars)
    if(latex){
        equ= paste0(equ," + \\hat{\\varepsilon_{i}} \\quad where \\quad \\varepsilon \\sim \\mathcal{N}(0,",
                    summary(MetamodelKdifEryth)$sigma,")")%>%paste0("$",.,"$")
    }
    cat(equ)
}

model 参数需要一个 lm 对象,latex 参数是一个布尔值,用于请求简单字符或乳胶格式的方程,和... 参数将其值传递给 format 函数。

我还添加了一个选项将其输出为乳胶,以便您可以在 rmarkdown 中使用此函数,如下所示:


```{r echo=FALSE, results='asis'}
print_equation(model = lm_mod, latex = TRUE)
```

现在使用它:

df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
df$z <- 8 + 3 * df$x + rnorm(100, sd = 40)
lm_mod= lm(y~x+z, data = df)

print_equation(model = lm_mod, latex = FALSE)

此代码产生:
y = 11.3382963933174 + 2.5893419 * x + 0.1002227 * z

如果我们要求一个乳胶方程,将参数四舍五入到 3 位:

print_equation(model = lm_mod, latex = TRUE, digits= 3)

这会产生:
乳胶方程

Inspired by the equation style provided in this answer, a more generic approach (more than one predictor + latex output as option) can be:

print_equation= function(model, latex= FALSE, ...){
    dots <- list(...)
    cc= model$coefficients
    var_sign= as.character(sign(cc[-1]))%>%gsub("1","",.)%>%gsub("-"," - ",.)
    var_sign[var_sign==""]= ' + '

    f_args_abs= f_args= dots
    f_args$x= cc
    f_args_abs$x= abs(cc)
    cc_= do.call(format, args= f_args)
    cc_abs= do.call(format, args= f_args_abs)
    pred_vars=
        cc_abs%>%
        paste(., x_vars, sep= star)%>%
        paste(var_sign,.)%>%paste(., collapse= "")

    if(latex){
        star= " \\cdot "
        y_var= strsplit(as.character(model$call$formula), "~")[[2]]%>%
            paste0("\\hat{",.,"_{i}}")
        x_vars= names(cc_)[-1]%>%paste0(.,"_{i}")
    }else{
        star= " * "
        y_var= strsplit(as.character(model$call$formula), "~")[[2]]        
        x_vars= names(cc_)[-1]
    }

    equ= paste(y_var,"=",cc_[1],pred_vars)
    if(latex){
        equ= paste0(equ," + \\hat{\\varepsilon_{i}} \\quad where \\quad \\varepsilon \\sim \\mathcal{N}(0,",
                    summary(MetamodelKdifEryth)$sigma,")")%>%paste0("$",.,"$")
    }
    cat(equ)
}

The model argument expects an lm object, the latex argument is a boolean to ask for a simple character or a latex-formated equation, and the ... argument pass its values to the format function.

I also added an option to output it as latex so you can use this function in a rmarkdown like this:


```{r echo=FALSE, results='asis'}
print_equation(model = lm_mod, latex = TRUE)
```

Now using it:

df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
df$z <- 8 + 3 * df$x + rnorm(100, sd = 40)
lm_mod= lm(y~x+z, data = df)

print_equation(model = lm_mod, latex = FALSE)

This code yields:
y = 11.3382963933174 + 2.5893419 * x + 0.1002227 * z

And if we ask for a latex equation, rounding the parameters to 3 digits:

print_equation(model = lm_mod, latex = TRUE, digits= 3)

This yields:
latex equation

秋意浓2024-12-13 14:08:23

与 @zx8754 和 @kdauria 答案类似,但使用 ggplot2 和 ggpubr 。我更喜欢使用 ggpubr ,因为它不需要自定义函数,例如这个问题的最佳答案。

library(ggplot2)
library(ggpubr)

df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)

ggplot(data = df, aes(x = x, y = y)) +
  stat_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_point() +
  stat_cor(aes(label = paste(..rr.label..)), # adds R^2 value
           r.accuracy = 0.01,
           label.x = 0, label.y = 375, size = 4) +
  stat_regline_equation(aes(label = ..eq.label..), # adds equation to linear regression
                        label.x = 0, label.y = 400, size = 4)

输入图片此处描述

还可以在上图中添加 p 值

ggplot(data = df, aes(x = x, y = y)) +
  stat_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_point() +
  stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~")), # adds R^2 and p-value
           r.accuracy = 0.01,
           p.accuracy = 0.001,
           label.x = 0, label.y = 375, size = 4) +
  stat_regline_equation(aes(label = ..eq.label..), # adds equation to linear regression
                        label.x = 0, label.y = 400, size = 4)

在此处输入图像描述

当您有多个组时,也可以与 facet_wrap() 配合使用

df$group <- rep(1:2,50)

ggplot(data = df, aes(x = x, y = y)) +
  stat_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_point() +
  stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~")),
           r.accuracy = 0.01,
           p.accuracy = 0.001,
           label.x = 0, label.y = 375, size = 4) +
  stat_regline_equation(aes(label = ..eq.label..),
                        label.x = 0, label.y = 400, size = 4) +
  theme_bw() +
  facet_wrap(~group)

< a href="https://i.sstatic.net/or08N.jpg" rel="nofollow noreferrer">输入图像描述这里

Similar to @zx8754 and @kdauria answers except using ggplot2 and ggpubr. I prefer using ggpubr because it does not require custom functions such as the top answer to this question.

library(ggplot2)
library(ggpubr)

df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)

ggplot(data = df, aes(x = x, y = y)) +
  stat_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_point() +
  stat_cor(aes(label = paste(..rr.label..)), # adds R^2 value
           r.accuracy = 0.01,
           label.x = 0, label.y = 375, size = 4) +
  stat_regline_equation(aes(label = ..eq.label..), # adds equation to linear regression
                        label.x = 0, label.y = 400, size = 4)

enter image description here

Could also add p-value to the figure above

ggplot(data = df, aes(x = x, y = y)) +
  stat_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_point() +
  stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~")), # adds R^2 and p-value
           r.accuracy = 0.01,
           p.accuracy = 0.001,
           label.x = 0, label.y = 375, size = 4) +
  stat_regline_equation(aes(label = ..eq.label..), # adds equation to linear regression
                        label.x = 0, label.y = 400, size = 4)

enter image description here

Also works well with facet_wrap() when you have multiple groups

df$group <- rep(1:2,50)

ggplot(data = df, aes(x = x, y = y)) +
  stat_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_point() +
  stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~")),
           r.accuracy = 0.01,
           p.accuracy = 0.001,
           label.x = 0, label.y = 375, size = 4) +
  stat_regline_equation(aes(label = ..eq.label..),
                        label.x = 0, label.y = 400, size = 4) +
  theme_bw() +
  facet_wrap(~group)

enter image description here

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文