在 R 中标记循环图

发布于 2025-01-21 03:49:47 字数 446 浏览 0 评论 0原文

我正在使用一个循环来绘制直方图，同时按 column_a 的不同值进行分组，效果非常好。代码如下：

par(ask=F)

for (i in unique(Data$column_a)) {
  dat <- Data[Data$column_a== i, ]
  plotdist(dat$count,histo = TRUE, demp = TRUE, discrete = T,
           pch = 16, col = "dodgerblue1") 
}

唯一的问题是我无法相对于 column_a 值标记每个数字以将这些数字与另一个数字区分开来。

预先感谢您的帮助。

我的数据由损失数量组成，列名称为“count”，column_a(R,I,F)) 中有 3 个不同的值。我想绘制这三个值的损失数直方图。

原文

I am using a loop for plotting the histogram, group by different values of column_a at once which works perfectly fine. Here's the code:

par(ask=F)

for (i in unique(Data$column_a)) {
  dat <- Data[Data$column_a== i, ]
  plotdist(dat$count,histo = TRUE, demp = TRUE, discrete = T,
           pch = 16, col = "dodgerblue1") 
}

The only problem is that I cannot label each figure relative to column_a value to differentiate the figures from on another.

Thanks in advance for the help.

my data consists of number of losses with the column name of "count" with 3 distinct value in column_a(R,I,F)). and I want to plot the histogram of number of losses for these three values.

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

却一份温柔 2025-01-28 03:49:47

一个有点hacky的解决方案是改变函数本身。

下面是 alteret 函数，它不包括 title 参数（并且仅适用于您问题中的配置！）

plotdist_alt <- function (data, distr, para, histo = TRUE, breaks = "default", 
                      demp = FALSE, discrete, title = "default", ...) 
{
  def.par <- par(no.readonly = TRUE)
  if (missing(data) || !is.vector(data, mode = "numeric")) 
    stop("data must be a numeric vector")
  if ((missing(distr) & !missing(para)) || (missing(distr) & 
                                            !missing(para))) 
    stop("distr and para must defined")
  if (!histo & !demp) 
    stop("one the arguments histo and demp must be put to TRUE")
  xlim <- c(min(data), max(data))
  s <- sort(data)
  n <- length(data)
  if (missing(distr)) {
    par(mfrow = c(1, 2))
    if (missing(discrete)) 
      discrete <- FALSE
    if (!discrete) {
      obsp <- ppoints(s)
      if (histo) {
        if (demp) {
          if (breaks == "default") 
            h <- hist(data, freq = FALSE, xlab = "Data", 
                      main = "Empirical density", ...)
          else h <- hist(data, freq = FALSE, xlab = "Data", 
                         main = "Empirical density", breaks = breaks, 
                         ...)
          lines(density(data)$x, density(data)$y, lty = 2, 
                col = "black")
        }
        else {
          if (breaks == "default") 
            h <- hist(data, freq = FALSE, xlab = "Data", 
                      main = "Histogram", ...)
          else h <- hist(data, freq = FALSE, xlab = "Data", 
                         main = "Histogram", breaks = breaks, 
                         ...)
        }
      }
      else {
        h <- hist(data, freq = FALSE, xlab = "Data", 
                  main = "Histogram", plot = FALSE, ...)
        plot(density(data)$x, density(data)$y, lty = 1, 
             col = "black", type = "l", xlab = "Data", 
             main = paste("Empirical density"), ylab = "Density", 
             ...)
      }
      plot(s, obsp, main = paste("Cumulative distribution"), 
           xlab = "Data", xlim = c(h$breaks[1], h$breaks[length(h$breaks)]), 
           ylab = "CDF", ...)
    }
    else {
      if (breaks != "default") 
        warning("Breaks are\tnot taken into account for discrete data")
      t <- table(data)
      xval <- as.numeric(names(t))
      ydobs <- as.vector(t)/n
      ydmax <- max(ydobs)
      plot(xval, ydobs, type = "h", xlim = xlim, 
           ylim = c(0, ydmax), main = paste0("Empirical distribution ", title), 
           xlab = "Data", ylab = "Density", 
           ...)
      ycdfobs <- cumsum(ydobs)
      plot(xval, ycdfobs, type = "p", xlim = xlim, 
           ylim = c(0, 1), main = paste0("Empirical CDFs ", title), 
           xlab = "Data", ylab = "CDF", ...)
    }
  }
  else {
    if (!is.character(distr)) 
      distname <- substring(as.character(match.call()$distr), 
                            2)
    else distname <- distr
    if (!is.list(para)) 
      stop("'para' must be a named list")
    ddistname <- paste("d", distname, sep = "")
    if (!exists(ddistname, mode = "function")) 
      stop(paste("The ", ddistname, " function must be defined"))
    pdistname <- paste("p", distname, sep = "")
    if (!exists(pdistname, mode = "function")) 
      stop(paste("The ", pdistname, " function must be defined"))
    qdistname <- paste("q", distname, sep = "")
    if (!exists(qdistname, mode = "function")) 
      stop(paste("The ", qdistname, " function must be defined"))
    densfun <- get(ddistname, mode = "function")
    nm <- names(para)
    f <- formals(densfun)
    args <- names(f)
    m <- match(nm, args)
    if (any(is.na(m))) 
      stop(paste("'para' specifies names which are not arguments to ", 
                 ddistname))
    if (missing(discrete)) {
      if (is.element(distname, c("binom", "nbinom", 
                                 "geom", "hyper", "pois"))) 
        discrete <- TRUE
      else discrete <- FALSE
    }
    if (!discrete) {
      par(mfrow = c(2, 2))
      obsp <- ppoints(s)
      if (breaks == "default") 
        h <- hist(data, plot = FALSE)
      else h <- hist(data, breaks = breaks, plot = FALSE, 
                     ...)
      xhist <- seq(min(h$breaks), max(h$breaks), length = 1000)
      yhist <- do.call(ddistname, c(list(xhist), as.list(para)))
      if (length(yhist) != length(xhist)) 
        stop("problem when computing densities.")
      ymax <- ifelse(is.finite(max(yhist)), max(max(h$density), 
                                                max(yhist)), max(h$density))
      if (histo) {
        hist(data, freq = FALSE, xlab = "Data", 
             ylim = c(0, ymax), breaks = h$breaks, main = paste("Empirical and theoretical dens."), 
             ...)
        if (demp) {
          lines(density(data)$x, density(data)$y, lty = 2, 
                col = "black")
        }
      }
      else plot(density(data)$x, density(data)$y, lty = 2, 
                col = "black", type = "l", xlab = "Data", 
                main = paste("Empirical and theoretical dens."), 
                ylab = "Density", xlim = c(min(h$breaks), 
                                           max(h$breaks)), ...)
      if (demp) 
        legend("topright", bty = "n", lty = c(2, 
                                              1), col = c("black", "red"), legend = c("empirical", 
                                                                                      "theoretical"), bg = "white", cex = 0.7)
      lines(xhist, yhist, lty = 1, col = "red")
      theoq <- do.call(qdistname, c(list(obsp), as.list(para)))
      if (length(theoq) != length(obsp)) 
        stop("problem when computing quantities.")
      plot(theoq, s, main = " Q-Q plot", xlab = "Theoretical quantiles", 
           ylab = "Empirical quantiles", ...)
      abline(0, 1)
      xmin <- h$breaks[1]
      xmax <- h$breaks[length(h$breaks)]
      if (length(s) != length(obsp)) 
        stop("problem when computing probabilities.")
      plot(s, obsp, main = paste("Empirical and theoretical CDFs"), 
           xlab = "Data", ylab = "CDF", xlim = c(xmin, 
                                                 xmax), ...)
      sfin <- seq(xmin, xmax, by = (xmax - xmin)/100)
      theopfin <- do.call(pdistname, c(list(sfin), as.list(para)))
      lines(sfin, theopfin, lty = 1, col = "red")
      theop <- do.call(pdistname, c(list(s), as.list(para)))
      if (length(theop) != length(obsp)) 
        stop("problem when computing probabilities.")
      plot(theop, obsp, main = "P-P plot", xlab = "Theoretical probabilities", 
           ylab = "Empirical probabilities", ...)
      abline(0, 1)
    }
    else {
      par(mfrow = c(1, 2))
      if (breaks != "default") 
        warning("Breaks are not taken into account for discrete distributions")
      t <- table(data)
      xval <- as.numeric(names(t))
      xvalfin <- seq(min(xval), max(xval), by = 1)
      xlinesdec <- min((max(xval) - min(xval))/30, 0.4)
      yd <- do.call(ddistname, c(list(xvalfin), as.list(para)))
      if (length(yd) != length(xvalfin)) 
        stop("problem when computing density points.")
      ydobs <- as.vector(t)/n
      ydmax <- max(yd, ydobs)
      plot(xvalfin + xlinesdec, yd, type = "h", xlim = c(min(xval), 
                                                         max(xval) + xlinesdec), ylim = c(0, ydmax), lty = 1, 
           col = "red", main = "Emp. and theo. distr.", 
           xlab = "Data", ylab = "Density", 
           ...)
      points(xval, ydobs, type = "h", lty = 1, col = "black", 
             ...)
      legend("topright", lty = c(1, 1), col = c("black", 
                                                "red"), legend = c("empirical", paste("theoretical")), 
             bty = "o", bg = "white", cex = 0.6, 
             ...)
      ycdf <- do.call(pdistname, c(list(xvalfin), as.list(para)))
      if (length(ycdf) != length(xvalfin)) 
        stop("problem when computing probabilities.")
      plot(xvalfin, ycdf, type = "s", xlim = c(min(xval), 
                                               max(xval) + xlinesdec), ylim = c(0, 1), lty = 1, 
           col = "red", main = "Emp. and theo. CDFs", 
           xlab = "Data", ylab = "CDF", ...)
      ycdfobs <- cumsum(ydobs)
      points(xval, ycdfobs, type = "p", col = "black", 
             ...)
      legend("bottomright", lty = c(1, 1), col = c("black", 
                                                   "red"), legend = c("empirical", paste("theoretical")), 
             bty = "o", bg = "white", cex = 0.6, 
             ...)
    }
  }
  par(def.par)
  invisible()
}

现在要向绘图添加标题，只需使用以下命令：

par(ask=F)

for (i in unique(Data$column_a)) {
  dat <- Data[Data$column_a== i, ]
  plotdist_alt(dat$count,histo = TRUE, demp = TRUE, discrete = T,
           pch = 16, col = "dodgerblue1", title = i) 
}

编辑：添加虚拟数据测试提供的循环。

df <- data.frame(column_a = rep(c("a", "b"), each = 50),
                 count = sample(1:1000, 100, replace = T))
par(ask=F)

for (i in unique(df$column_a)) {
  dat <- df[df$column_a== i, ]
  plotdist_alt(dat$count,histo = TRUE, demp = TRUE, discrete = T,
               pch = 16, col = "dodgerblue1", title = i) 
}

A somewhat hacky solution would be to alter the function itself.

Below is the alteret function, which uncludes the title argument (and only works for the configuration you had in your question!)

plotdist_alt <- function (data, distr, para, histo = TRUE, breaks = "default", 
                      demp = FALSE, discrete, title = "default", ...) 
{
  def.par <- par(no.readonly = TRUE)
  if (missing(data) || !is.vector(data, mode = "numeric")) 
    stop("data must be a numeric vector")
  if ((missing(distr) & !missing(para)) || (missing(distr) & 
                                            !missing(para))) 
    stop("distr and para must defined")
  if (!histo & !demp) 
    stop("one the arguments histo and demp must be put to TRUE")
  xlim <- c(min(data), max(data))
  s <- sort(data)
  n <- length(data)
  if (missing(distr)) {
    par(mfrow = c(1, 2))
    if (missing(discrete)) 
      discrete <- FALSE
    if (!discrete) {
      obsp <- ppoints(s)
      if (histo) {
        if (demp) {
          if (breaks == "default") 
            h <- hist(data, freq = FALSE, xlab = "Data", 
                      main = "Empirical density", ...)
          else h <- hist(data, freq = FALSE, xlab = "Data", 
                         main = "Empirical density", breaks = breaks, 
                         ...)
          lines(density(data)$x, density(data)$y, lty = 2, 
                col = "black")
        }
        else {
          if (breaks == "default") 
            h <- hist(data, freq = FALSE, xlab = "Data", 
                      main = "Histogram", ...)
          else h <- hist(data, freq = FALSE, xlab = "Data", 
                         main = "Histogram", breaks = breaks, 
                         ...)
        }
      }
      else {
        h <- hist(data, freq = FALSE, xlab = "Data", 
                  main = "Histogram", plot = FALSE, ...)
        plot(density(data)$x, density(data)$y, lty = 1, 
             col = "black", type = "l", xlab = "Data", 
             main = paste("Empirical density"), ylab = "Density", 
             ...)
      }
      plot(s, obsp, main = paste("Cumulative distribution"), 
           xlab = "Data", xlim = c(h$breaks[1], h$breaks[length(h$breaks)]), 
           ylab = "CDF", ...)
    }
    else {
      if (breaks != "default") 
        warning("Breaks are\tnot taken into account for discrete data")
      t <- table(data)
      xval <- as.numeric(names(t))
      ydobs <- as.vector(t)/n
      ydmax <- max(ydobs)
      plot(xval, ydobs, type = "h", xlim = xlim, 
           ylim = c(0, ydmax), main = paste0("Empirical distribution ", title), 
           xlab = "Data", ylab = "Density", 
           ...)
      ycdfobs <- cumsum(ydobs)
      plot(xval, ycdfobs, type = "p", xlim = xlim, 
           ylim = c(0, 1), main = paste0("Empirical CDFs ", title), 
           xlab = "Data", ylab = "CDF", ...)
    }
  }
  else {
    if (!is.character(distr)) 
      distname <- substring(as.character(match.call()$distr), 
                            2)
    else distname <- distr
    if (!is.list(para)) 
      stop("'para' must be a named list")
    ddistname <- paste("d", distname, sep = "")
    if (!exists(ddistname, mode = "function")) 
      stop(paste("The ", ddistname, " function must be defined"))
    pdistname <- paste("p", distname, sep = "")
    if (!exists(pdistname, mode = "function")) 
      stop(paste("The ", pdistname, " function must be defined"))
    qdistname <- paste("q", distname, sep = "")
    if (!exists(qdistname, mode = "function")) 
      stop(paste("The ", qdistname, " function must be defined"))
    densfun <- get(ddistname, mode = "function")
    nm <- names(para)
    f <- formals(densfun)
    args <- names(f)
    m <- match(nm, args)
    if (any(is.na(m))) 
      stop(paste("'para' specifies names which are not arguments to ", 
                 ddistname))
    if (missing(discrete)) {
      if (is.element(distname, c("binom", "nbinom", 
                                 "geom", "hyper", "pois"))) 
        discrete <- TRUE
      else discrete <- FALSE
    }
    if (!discrete) {
      par(mfrow = c(2, 2))
      obsp <- ppoints(s)
      if (breaks == "default") 
        h <- hist(data, plot = FALSE)
      else h <- hist(data, breaks = breaks, plot = FALSE, 
                     ...)
      xhist <- seq(min(h$breaks), max(h$breaks), length = 1000)
      yhist <- do.call(ddistname, c(list(xhist), as.list(para)))
      if (length(yhist) != length(xhist)) 
        stop("problem when computing densities.")
      ymax <- ifelse(is.finite(max(yhist)), max(max(h$density), 
                                                max(yhist)), max(h$density))
      if (histo) {
        hist(data, freq = FALSE, xlab = "Data", 
             ylim = c(0, ymax), breaks = h$breaks, main = paste("Empirical and theoretical dens."), 
             ...)
        if (demp) {
          lines(density(data)$x, density(data)$y, lty = 2, 
                col = "black")
        }
      }
      else plot(density(data)$x, density(data)$y, lty = 2, 
                col = "black", type = "l", xlab = "Data", 
                main = paste("Empirical and theoretical dens."), 
                ylab = "Density", xlim = c(min(h$breaks), 
                                           max(h$breaks)), ...)
      if (demp) 
        legend("topright", bty = "n", lty = c(2, 
                                              1), col = c("black", "red"), legend = c("empirical", 
                                                                                      "theoretical"), bg = "white", cex = 0.7)
      lines(xhist, yhist, lty = 1, col = "red")
      theoq <- do.call(qdistname, c(list(obsp), as.list(para)))
      if (length(theoq) != length(obsp)) 
        stop("problem when computing quantities.")
      plot(theoq, s, main = " Q-Q plot", xlab = "Theoretical quantiles", 
           ylab = "Empirical quantiles", ...)
      abline(0, 1)
      xmin <- h$breaks[1]
      xmax <- h$breaks[length(h$breaks)]
      if (length(s) != length(obsp)) 
        stop("problem when computing probabilities.")
      plot(s, obsp, main = paste("Empirical and theoretical CDFs"), 
           xlab = "Data", ylab = "CDF", xlim = c(xmin, 
                                                 xmax), ...)
      sfin <- seq(xmin, xmax, by = (xmax - xmin)/100)
      theopfin <- do.call(pdistname, c(list(sfin), as.list(para)))
      lines(sfin, theopfin, lty = 1, col = "red")
      theop <- do.call(pdistname, c(list(s), as.list(para)))
      if (length(theop) != length(obsp)) 
        stop("problem when computing probabilities.")
      plot(theop, obsp, main = "P-P plot", xlab = "Theoretical probabilities", 
           ylab = "Empirical probabilities", ...)
      abline(0, 1)
    }
    else {
      par(mfrow = c(1, 2))
      if (breaks != "default") 
        warning("Breaks are not taken into account for discrete distributions")
      t <- table(data)
      xval <- as.numeric(names(t))
      xvalfin <- seq(min(xval), max(xval), by = 1)
      xlinesdec <- min((max(xval) - min(xval))/30, 0.4)
      yd <- do.call(ddistname, c(list(xvalfin), as.list(para)))
      if (length(yd) != length(xvalfin)) 
        stop("problem when computing density points.")
      ydobs <- as.vector(t)/n
      ydmax <- max(yd, ydobs)
      plot(xvalfin + xlinesdec, yd, type = "h", xlim = c(min(xval), 
                                                         max(xval) + xlinesdec), ylim = c(0, ydmax), lty = 1, 
           col = "red", main = "Emp. and theo. distr.", 
           xlab = "Data", ylab = "Density", 
           ...)
      points(xval, ydobs, type = "h", lty = 1, col = "black", 
             ...)
      legend("topright", lty = c(1, 1), col = c("black", 
                                                "red"), legend = c("empirical", paste("theoretical")), 
             bty = "o", bg = "white", cex = 0.6, 
             ...)
      ycdf <- do.call(pdistname, c(list(xvalfin), as.list(para)))
      if (length(ycdf) != length(xvalfin)) 
        stop("problem when computing probabilities.")
      plot(xvalfin, ycdf, type = "s", xlim = c(min(xval), 
                                               max(xval) + xlinesdec), ylim = c(0, 1), lty = 1, 
           col = "red", main = "Emp. and theo. CDFs", 
           xlab = "Data", ylab = "CDF", ...)
      ycdfobs <- cumsum(ydobs)
      points(xval, ycdfobs, type = "p", col = "black", 
             ...)
      legend("bottomright", lty = c(1, 1), col = c("black", 
                                                   "red"), legend = c("empirical", paste("theoretical")), 
             bty = "o", bg = "white", cex = 0.6, 
             ...)
    }
  }
  par(def.par)
  invisible()
}

To now add a title to your plot, simply use this:

par(ask=F)

for (i in unique(Data$column_a)) {
  dat <- Data[Data$column_a== i, ]
  plotdist_alt(dat$count,histo = TRUE, demp = TRUE, discrete = T,
           pch = 16, col = "dodgerblue1", title = i) 
}

Edit: Added dummy data to test the provided loop.

df <- data.frame(column_a = rep(c("a", "b"), each = 50),
                 count = sample(1:1000, 100, replace = T))
par(ask=F)

for (i in unique(df$column_a)) {
  dat <- df[df$column_a== i, ]
  plotdist_alt(dat$count,histo = TRUE, demp = TRUE, discrete = T,
               pch = 16, col = "dodgerblue1", title = i) 
}

回复收藏 0 原文

~没有更多了~