从 ggplot 条形图中的刻度线中删除一些文本标记

发布于 2024-10-25 07:40:05 字数 3239 浏览 4 评论 0原文

我正在对银行业的非违约者和违约者进行一些研究。在这种情况下，我在条形图中绘制了它们相对于某些分数的分布。分数越高，信用等级越好。

由于与非默认值的数量相比，默认值的数量非常有限，因此在同一条形图上绘制默认值和非默认值并不是很方便，因为您几乎看不到默认值。然后，我仅根据违约者的分数制作第二个条形图，但其间隔比例与违约者和非违约者分数的完整条形图相同。然后，我向第一个条形图添加垂直线，指示最高和最低违约分数所在的位置。即了解违约者的分布与违约者和非违约者的总体分布的关系。

x 轴很容易变得非常“拥挤”。我想删除刻度线的一些文本，但不是所有刻度线。

下面是我使用的代码，替换为（种子）随机数据。

第一个条形图是我想要的关于刻度线上的文本的内容，但我缺少第二个条形图中的所有刻度。在第二个条形图中显示了“拥挤”！

library(ggplot2)
library(ggExtra)

#NDS represents non-defaults and DS defaults on the same scale
#although here being just some random normals for the sake of simplicity.
set.seed(10)
NDS<-rnorm(10000,sd=1)-2
DS<-rnorm(100,sd=2)-5

#Cutoffs are constructed such that intervals of size 0.3 contain all values 
#of NDS & DS
minCutoff<--9.3
maxCutoff<-2.1

#Generate the actual interval "bins"
NDS_CUT<-cut(NDS,breaks=seq(minCutoff, maxCutoff, by = 0.3))
DS_CUT<-cut(DS,breaks=seq(minCutoff, maxCutoff, by = 0.3))

#Manually generate where to put the vertical lines for min(DS) and max(DS)
minDS_bar<-levels(cut(NDS,breaks=seq(minCutoff, maxCutoff, by = 0.3)))[1]
maxDS_bar<-levels(cut(NDS,breaks=seq(minCutoff, maxCutoff, by = 0.3)))[32]

#Generate data frame - seems stupid, but makes sense
#when the "real" data is used :-)
NDSdataframe<-cbind(as.data.frame(NDS_CUT),rep(factor("State-1"),length(NDS_CUT)))
colnames(NDSdataframe)<-c("Score","Action")
DSdataframe<-cbind(as.data.frame(DS_CUT),rep(factor("State-2"),length(DS_CUT)))
colnames(DSdataframe)<-c("Score","Action")
fulldataframe<-rbind(NDSdataframe,DSdataframe)
attach(fulldataframe)

#Plot the full distribution of NDS & DS with geom_vlines

#Get the tick texts I want to show
myLevels<-levels(cut(NDS,breaks=seq(roundDownNDS, roundUpNDS, by = 0.3)))
lengthMyLevels<-length(myLevels)
myBreaks<-seq(1,lengthMyLevels,3)
chosenbreaks<-myLevels[myBreaks[1]]
for(i in 2:length(myBreaks))
 {
 chosenbreaks<-rbind(chosenbreaks,myLevels[myBreaks[i]])
 }


#Generate the plot of both NDS & DS
fullplot<-ggplot(fulldataframe, aes(Score, fill=factor(Action,levels=c("State-    2","State-1")))) + geom_bar(position="stack") + opts(axis.text.x = theme_text(angle = 45,size=8))  + opts(legend.position = "none")    + xlab("Scoreinterval") + ylab("Antal pr. interval") + geom_vline(aes(xintercept = minDS_bar, colour="red")) + geom_vline(aes(xintercept = maxDS_bar, colour="red")) + scale_x_discrete("test",breaks=chosenbreaks)

#Generate second dataframe for the plot of DS only
DSdataframe2<-cbind(na.omit(as.data.frame(DS_CUT)),rep(factor("Fallit"),length    (na.omit(as.data.frame(DS_CUT)))))
colnames(DSdataframe2)<-c("theScore","theAction")

#Calculate max value for the DS
myMax<-max(table(DSdataframe2))+1

attach(DSdataframe2)

#Generate plot for the DS only
subplot<-ggplot(fulldataframe, aes(theScore, fill=factor(theAction))) + geom_bar   (position="stack") + opts(axis.text.x = theme_text(angle = 45))  + opts(legend.position = "none") + ylim(0, myMax) + xlab("Scoreinterval") + ylab("Antal pr. interval")

#Using the ggExtra package the to plots are aligned
align.plots(fullplot, subplot)

detach(DSdataframe2)
detach(fulldataframe)

非常感谢任何帮助！

谢谢，

克里斯蒂安

原文

I am doing some research on non-defaulters and defaulters with regards to banking. In that context I am plotting their distributions relative to some score in a bar plot. The higher the score, the better the credit rating.

Since the number of defaults is very limited compared to the number of non-defaults plotting the defaults and non-defaults on the same bar plot is not very giving as you hardly can see the defaults. I then make a second bar plot based on the defaulters' scores only, but on the same interval scale as the full bar plot of both the scores of the defaulters and non-defaulters. I then add vertical lines to the first bar plot indicating where the highest and the lowest defaulter score is located. That is to get a view of where the distribution of the defaulters fit into that of the overall distribution of both defaulters and non-defaulters.

The x-axis easily gets very "crowded". I would like to remove some of the text for the ticks, but not all the tick marks.

Below is the code I am using replaced with (seeded) random data instead.

The first bar plot is what I would like to have with regards to text on the tick marks, but I am missing all the ticks I have in the second bar plot. In the second bar plot the "crowded"-ness is shown!

library(ggplot2)
library(ggExtra)

#NDS represents non-defaults and DS defaults on the same scale
#although here being just some random normals for the sake of simplicity.
set.seed(10)
NDS<-rnorm(10000,sd=1)-2
DS<-rnorm(100,sd=2)-5

#Cutoffs are constructed such that intervals of size 0.3 contain all values 
#of NDS & DS
minCutoff<--9.3
maxCutoff<-2.1

#Generate the actual interval "bins"
NDS_CUT<-cut(NDS,breaks=seq(minCutoff, maxCutoff, by = 0.3))
DS_CUT<-cut(DS,breaks=seq(minCutoff, maxCutoff, by = 0.3))

#Manually generate where to put the vertical lines for min(DS) and max(DS)
minDS_bar<-levels(cut(NDS,breaks=seq(minCutoff, maxCutoff, by = 0.3)))[1]
maxDS_bar<-levels(cut(NDS,breaks=seq(minCutoff, maxCutoff, by = 0.3)))[32]

#Generate data frame - seems stupid, but makes sense
#when the "real" data is used :-)
NDSdataframe<-cbind(as.data.frame(NDS_CUT),rep(factor("State-1"),length(NDS_CUT)))
colnames(NDSdataframe)<-c("Score","Action")
DSdataframe<-cbind(as.data.frame(DS_CUT),rep(factor("State-2"),length(DS_CUT)))
colnames(DSdataframe)<-c("Score","Action")
fulldataframe<-rbind(NDSdataframe,DSdataframe)
attach(fulldataframe)

#Plot the full distribution of NDS & DS with geom_vlines

#Get the tick texts I want to show
myLevels<-levels(cut(NDS,breaks=seq(roundDownNDS, roundUpNDS, by = 0.3)))
lengthMyLevels<-length(myLevels)
myBreaks<-seq(1,lengthMyLevels,3)
chosenbreaks<-myLevels[myBreaks[1]]
for(i in 2:length(myBreaks))
 {
 chosenbreaks<-rbind(chosenbreaks,myLevels[myBreaks[i]])
 }


#Generate the plot of both NDS & DS
fullplot<-ggplot(fulldataframe, aes(Score, fill=factor(Action,levels=c("State-    2","State-1")))) + geom_bar(position="stack") + opts(axis.text.x = theme_text(angle = 45,size=8))  + opts(legend.position = "none")    + xlab("Scoreinterval") + ylab("Antal pr. interval") + geom_vline(aes(xintercept = minDS_bar, colour="red")) + geom_vline(aes(xintercept = maxDS_bar, colour="red")) + scale_x_discrete("test",breaks=chosenbreaks)

#Generate second dataframe for the plot of DS only
DSdataframe2<-cbind(na.omit(as.data.frame(DS_CUT)),rep(factor("Fallit"),length    (na.omit(as.data.frame(DS_CUT)))))
colnames(DSdataframe2)<-c("theScore","theAction")

#Calculate max value for the DS
myMax<-max(table(DSdataframe2))+1

attach(DSdataframe2)

#Generate plot for the DS only
subplot<-ggplot(fulldataframe, aes(theScore, fill=factor(theAction))) + geom_bar   (position="stack") + opts(axis.text.x = theme_text(angle = 45))  + opts(legend.position = "none") + ylim(0, myMax) + xlab("Scoreinterval") + ylab("Antal pr. interval")

#Using the ggExtra package the to plots are aligned
align.plots(fullplot, subplot)

detach(DSdataframe2)
detach(fulldataframe)

Any help is very much appreciated!

Thanks,

Christian

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

冷情 2024-11-01 07:40:05

如果我理解正确，您可以简单地为每个其他标签指定空文本标签，

library(ggplot2)

interleave <- function(x,y){
  lx <- length(x)
  ly <- length(y)
  n <- max(lx,ly)
  as.vector(rbind(rep(x, length.out=n), rep(y, length.out=n)))
}

d <- data.frame(x=1:10, y=rnorm(10))

my_breaks <- seq(1,10,by=1)
my_labs <- interleave(seq(1,10,by=2), "")

qplot(x,y,data=d)+
  scale_x_continuous(breaks=my_breaks, labels=my_labs)

If I understand correctly, you can simply specify empty text labels for every other label,

library(ggplot2)

interleave <- function(x,y){
  lx <- length(x)
  ly <- length(y)
  n <- max(lx,ly)
  as.vector(rbind(rep(x, length.out=n), rep(y, length.out=n)))
}

d <- data.frame(x=1:10, y=rnorm(10))

my_breaks <- seq(1,10,by=1)
my_labs <- interleave(seq(1,10,by=2), "")

qplot(x,y,data=d)+
  scale_x_continuous(breaks=my_breaks, labels=my_labs)

回复收藏 0 原文

逆夏时光 2024-11-01 07:40:05

这是另一个基于 @baptiste 的版本，可以轻松选择偶数或奇数、每三分之一等。

library(ggplot2)
library(gridExtra)

## helper function
## periodically replace orig with .fill
label_fill <- function(orig, .offset=0, .mod=2, .fill=""){
    ## replace
    ii <- as.logical(
        ## offset==0 keeps first
        (1:length(orig)-1+.offset) %% .mod
    )
    orig[ii] <- .fill
    orig
}

## data, labels
nn <- 10
my_dat <- data.frame(x=1:nn, y=rnorm(nn))
my_breaks <- my_dat$x

my_plot <- (
    ggplot(my_dat, aes(x,y))
    + geom_line()
    ## default: every other, start from 1
    ## by default, function takes breaks
    + scale_x_continuous(
        breaks=my_breaks, labels=label_fill
    )
    + theme_bw()
)

## another form 
## manually pass breaks
every_third <- scale_x_continuous(
    breaks=my_breaks, 
    labels=label_fill(my_breaks, .mod=3)
)

## side-by-side
grid.arrange(ncol=2,
    my_plot, 
    ## every third with offset
    my_plot + every_third
)

< img src="https://i.sstatic.net/IwiCv.png" alt="在此处输入图像描述">

Here's another version, based on @baptiste, that allows for easy selection of even or odd, every third, etc.

library(ggplot2)
library(gridExtra)

## helper function
## periodically replace orig with .fill
label_fill <- function(orig, .offset=0, .mod=2, .fill=""){
    ## replace
    ii <- as.logical(
        ## offset==0 keeps first
        (1:length(orig)-1+.offset) %% .mod
    )
    orig[ii] <- .fill
    orig
}

## data, labels
nn <- 10
my_dat <- data.frame(x=1:nn, y=rnorm(nn))
my_breaks <- my_dat$x

my_plot <- (
    ggplot(my_dat, aes(x,y))
    + geom_line()
    ## default: every other, start from 1
    ## by default, function takes breaks
    + scale_x_continuous(
        breaks=my_breaks, labels=label_fill
    )
    + theme_bw()
)

## another form 
## manually pass breaks
every_third <- scale_x_continuous(
    breaks=my_breaks, 
    labels=label_fill(my_breaks, .mod=3)
)

## side-by-side
grid.arrange(ncol=2,
    my_plot, 
    ## every third with offset
    my_plot + every_third
)