在 R 中,如何为多个 csv 文件制作箱线图并导出为 pdf 文件

发布于 2025-01-12 21:25:29 字数 3451 浏览 0 评论 0原文

我想为 80 个 csv 文件制作箱线图,文件名如下所示:-NY_two.csv,CA_third.csv,FL_ Three.csv ,....,NY_ten.csv

理想的包括

(I)箱线图(导出为 pdf,每页 2 个图表)

请参阅下面的 80 个 csv 文件中的 3 个


# All 80 files have the same column names - state, dept, year and revenue

#copy and paste to generate 3 out of 80 csv, 

# The datasets generated below represent 3 out of the 80 csv files

# Dataset 1
state <-c("NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY")

dept <- c("energy","energy","energy","energy","works",'works','works','works','fin','fin','fin','fin','parks','parks','parks','parks','trans','trans','trans','trans')
year <- c("two","two","two","two","two","two","two","two","two","two","two","two","two","two","two","two","two","two","two","two")
revenue <-c(1212.9,1253,1244.4,5123.5,1312,3134,515.8,2449.9,3221.6,3132.5,2235.09,2239.01,3235.01,5223.01,4235.6,2204.5,2315.5,6114,4512,3514.2)

NY_two <-data.frame(state,dept,year,revenue)


# Dataset 2
state <- rep("FL",20)

dept <- c("energy","energy","energy","energy","works",'works','works','works','fin','fin','fin','fin','parks','parks','parks','parks','trans','trans','trans','trans')
year <- rep("three",20)
revenue <-c(112.9,123,124,523.5,112,334,55,449,221.6,332,235,239,235,223,235.6,204,315.5,614,512,514.2)

FL_three <- data.frame(state,dept,year,revenue)

# Dataset 3
state <- rep("CA",20)

dept <- c("energy","energy","energy","energy","works",'works','works','works','fin','fin','fin','fin','parks','parks','parks','parks','trans','trans','trans','trans')
year <- rep("three",20)
revenue <-c(1102.9,1023,1024,5203.5,1012,3034,505,4049,2021.6,3032,2035,2039,2035,2023,2035.6,2004,3015.5,6014,5012,5014.2)

CA_three <- data.frame(state,dept,year,revenue)

# exporting the the above datasets as csv files ( imagine them as 3 out of the 80 files)
# set the path in the write.csv(/path/.csv) to collect the datasets

write.csv(NY_two,"C:\\Path to export the DataFrame\\NY_two.csv", row.names = FALSE)
write.csv(FL_three,"C:\\Path to export the DataFrame\\FL_three.csv", row.names = FALSE)
write.csv(CA_three,"C:\\Path to export the DataFrame\\CA_three.csv", row.names = FALSE)

我的尝试

# Desirables include
#(I) plot the boxplot & export as pdf file (2 graphs per page)

######################################################################################

library(ggplot2)

# import all csv files in the folder
files <- list.files("C:\\path to the files\\", pattern="*.csv", full.names = T)
files

# set the pdf file path, I want two plots per page
pdf(file = "/Users/Desktop/boxplot_anova.pdf")

#specify to save plots in 2x2 grid
par(mfrow = c(2,2))

out <- lapply(1:length(files), function(idx) {
  # read the file
  this_data <- read.csv(files[idx], header = TRUE) # choose TRUE/FALSE accordingly
  # boxplot using ggplot
   p <-ggplot(this_data, aes(x = dept, y = revenue, fill = dept)) + 
       stat_boxplot(geom = "errorbar", width = 0.15) + geom_boxplot(alpha = 0.8,    # Fill transparency
                                            colour = "#474747",   # Border color
                                            outlier.colour = 1)+ theme(panel.background = element_blank())+ ggtitle("Title using each file name ")

  p
dev.off() 
})

out

请分享您的代码,提前感谢

I want to make boxplot for 80 csv files, the filenames look something like this: -NY_two.csv,CA_three.csv,FL_three.csv,....,NY_ten.csv.

Desirables include

(I) boxplot (export as pdf, 2 graphs per page)

See below for the 3 out of the 80 csv files


# All 80 files have the same column names - state, dept, year and revenue

#copy and paste to generate 3 out of 80 csv, 

# The datasets generated below represent 3 out of the 80 csv files

# Dataset 1
state <-c("NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY")

dept <- c("energy","energy","energy","energy","works",'works','works','works','fin','fin','fin','fin','parks','parks','parks','parks','trans','trans','trans','trans')
year <- c("two","two","two","two","two","two","two","two","two","two","two","two","two","two","two","two","two","two","two","two")
revenue <-c(1212.9,1253,1244.4,5123.5,1312,3134,515.8,2449.9,3221.6,3132.5,2235.09,2239.01,3235.01,5223.01,4235.6,2204.5,2315.5,6114,4512,3514.2)

NY_two <-data.frame(state,dept,year,revenue)


# Dataset 2
state <- rep("FL",20)

dept <- c("energy","energy","energy","energy","works",'works','works','works','fin','fin','fin','fin','parks','parks','parks','parks','trans','trans','trans','trans')
year <- rep("three",20)
revenue <-c(112.9,123,124,523.5,112,334,55,449,221.6,332,235,239,235,223,235.6,204,315.5,614,512,514.2)

FL_three <- data.frame(state,dept,year,revenue)

# Dataset 3
state <- rep("CA",20)

dept <- c("energy","energy","energy","energy","works",'works','works','works','fin','fin','fin','fin','parks','parks','parks','parks','trans','trans','trans','trans')
year <- rep("three",20)
revenue <-c(1102.9,1023,1024,5203.5,1012,3034,505,4049,2021.6,3032,2035,2039,2035,2023,2035.6,2004,3015.5,6014,5012,5014.2)

CA_three <- data.frame(state,dept,year,revenue)

# exporting the the above datasets as csv files ( imagine them as 3 out of the 80 files)
# set the path in the write.csv(/path/.csv) to collect the datasets

write.csv(NY_two,"C:\\Path to export the DataFrame\\NY_two.csv", row.names = FALSE)
write.csv(FL_three,"C:\\Path to export the DataFrame\\FL_three.csv", row.names = FALSE)
write.csv(CA_three,"C:\\Path to export the DataFrame\\CA_three.csv", row.names = FALSE)

My attempt

# Desirables include
#(I) plot the boxplot & export as pdf file (2 graphs per page)

######################################################################################

library(ggplot2)

# import all csv files in the folder
files <- list.files("C:\\path to the files\\", pattern="*.csv", full.names = T)
files

# set the pdf file path, I want two plots per page
pdf(file = "/Users/Desktop/boxplot_anova.pdf")

#specify to save plots in 2x2 grid
par(mfrow = c(2,2))

out <- lapply(1:length(files), function(idx) {
  # read the file
  this_data <- read.csv(files[idx], header = TRUE) # choose TRUE/FALSE accordingly
  # boxplot using ggplot
   p <-ggplot(this_data, aes(x = dept, y = revenue, fill = dept)) + 
       stat_boxplot(geom = "errorbar", width = 0.15) + geom_boxplot(alpha = 0.8,    # Fill transparency
                                            colour = "#474747",   # Border color
                                            outlier.colour = 1)+ theme(panel.background = element_blank())+ ggtitle("Title using each file name ")

  p
dev.off() 
})

out

Kindly share your code, thanx in advance

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

×纯※雪 2025-01-19 21:25:30

有一些单独的问题可能会导致代码出现问题:

  1. 在函数中生成绘图可能无法正确导出(请使用 plot(p)print(p) 代替p)。
  2. 您必须在循环之前打开 pdf 设备并在循环之后关闭它,而不是在循环内。例如,这原则上是可行的:
pdf(file = "boxplot_anova.pdf")
#specify to save plots in 2x2 grid
par(mfrow = c(2,2))
out <- lapply(1:length(files), function(idx) {
  # read the file
  this_data <- read.csv(files[idx], header = TRUE) # choose TRUE/FALSE accordingly
  # boxplot using ggplot
   p <-ggplot(this_data, aes(x = dept, y = revenue, fill = dept)) + 
       stat_boxplot(geom = "errorbar", width = 0.15) + geom_boxplot(alpha = 0.8,    # Fill transparency
                                            colour = "#474747",   # Border color
                                            outlier.colour = 1)+ theme(panel.background = element_blank())+ ggtitle("Title using each file name ")
   plot(p)
})
out
dev.off() 
  1. 上面的代码不会在同一页面上绘制(最多 4 个,您期望从 mfrow(2,2) 获得的)图,因为 ggplot2 不使用基础图形。使用例如cowplot 包中的plot_grid 函数来实现此目的。要生成多个页面,请将绘图列表拆分为匹配数量的元素,例如每页 4 个绘图:
res <- lapply(files, function(x){
    this_data <- read.csv(x, header = TRUE) # choose TRUE/FALSE accordingly
  # boxplot using ggplot
   ggplot(this_data, aes(x = dept, y = revenue, fill = dept)) + 
       stat_boxplot(geom = "errorbar", width = 0.15) + 
       geom_boxplot(alpha = 0.8,    # Fill transparency
           colour = "#474747",   # Border color
           outlier.colour = 1)+ 
       theme(panel.background = element_blank()) + 
       ggtitle(gsub("(.*/)(.*)(.csv)", "\\2", x))
})

# set the pdf file path, I want two plots per page
pdf(file = "boxplot_anova.pdf")
lapply(split(res, ceiling(seq_along(res)/4)), 
    function(x) plot_grid(plotlist=x, ncol=2, nrow=2))
dev.off()

There are a few separate issues that might cause problems in your code:

  1. generating plots in a function might not be properly exported (use plot(p) or print(p) instead of p).
  2. You have to open the pdf device before your loop and close it after, not within the loop. E.g. this would work in principle:
pdf(file = "boxplot_anova.pdf")
#specify to save plots in 2x2 grid
par(mfrow = c(2,2))
out <- lapply(1:length(files), function(idx) {
  # read the file
  this_data <- read.csv(files[idx], header = TRUE) # choose TRUE/FALSE accordingly
  # boxplot using ggplot
   p <-ggplot(this_data, aes(x = dept, y = revenue, fill = dept)) + 
       stat_boxplot(geom = "errorbar", width = 0.15) + geom_boxplot(alpha = 0.8,    # Fill transparency
                                            colour = "#474747",   # Border color
                                            outlier.colour = 1)+ theme(panel.background = element_blank())+ ggtitle("Title using each file name ")
   plot(p)
})
out
dev.off() 
  1. The code above will not plot (up to 4, which you would expect from mfrow(2,2)) plots on the same page, since ggplot2 does not use base graphics. Use e.g. the plot_grid function from the cowplot package to achieve this. To generate multiple pages, split the plot list in matching number of elements, e.g. for 4 plots per page:
res <- lapply(files, function(x){
    this_data <- read.csv(x, header = TRUE) # choose TRUE/FALSE accordingly
  # boxplot using ggplot
   ggplot(this_data, aes(x = dept, y = revenue, fill = dept)) + 
       stat_boxplot(geom = "errorbar", width = 0.15) + 
       geom_boxplot(alpha = 0.8,    # Fill transparency
           colour = "#474747",   # Border color
           outlier.colour = 1)+ 
       theme(panel.background = element_blank()) + 
       ggtitle(gsub("(.*/)(.*)(.csv)", "\\2", x))
})

# set the pdf file path, I want two plots per page
pdf(file = "boxplot_anova.pdf")
lapply(split(res, ceiling(seq_along(res)/4)), 
    function(x) plot_grid(plotlist=x, ncol=2, nrow=2))
dev.off()
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文