How to create barplot using ggplot in R Programming
Barplot
data
<- data.frame(
name=letters[1:5],
value=sample(seq(4,15),5)
)
barplot(height=data$value,
names=data$name)
Custom color
Here
are 2 examples showing how to custom the barplot color:
- uniform
color with col, asking one
color only
- using
a palette coming from RColorBrewer
- change
border color with the border argument
# create dummy data
data
<- data.frame(
name=letters[1:5],
value=sample(seq(4,15),5)
)
barplot(height=data$value,
names=data$name, col=rgb(0.2,0.4,0.6,0.6) )
library(RColorBrewer)
coul
<- brewer.pal(5, "Set2")
barplot(height=data$value,
names=data$name, col=coul )
barplot(height=data$value,
names=data$name, border="#69b3a2", col="white" )
Title, Axis label, Custom limits
Usual customizations with xlab, ylab, main and ylim.
# create dummy data
data
<- data.frame(
name=letters[1:5],
value=sample(seq(4,15),5)
)
# Uniform color
barplot(height=data$value,
names=data$name,
col=rgb(0.8,0.1,0.1,0.6),
xlab="categories",
ylab="values",
main="My title",
ylim=c(0,40)
)
Horizontal barplot
Usual customizations with xlab, ylab, main and ylim.
# create dummy data
data
<- data.frame(
name=letters[1:5],
value=sample(seq(4,15),5)
)
barplot(height=data$value,
names=data$name,
col="#69b3a2",
horiz=T, las=1
)
Bar width & space between
bars
It
is possible to control the space between bars and the width of the bars
using space and width.
Can
be usefull to represent the number of value behind each bar.
# create dummy data
data
<- data.frame(
name=letters[1:5],
value=sample(seq(4,15),5)
)
barplot(height=data$value,
names=data$name, col=rgb(0.2,0.4,0.6,0.6), space=c(0.1,0.2,3,1.5,0.3)
)
# Control width:
barplot(height=data$value,
names=data$name, col=rgb(0.2,0.4,0.6,0.6), width=c(0.1,0.2,3,1.5,0.3)
)
Barplot texture
Change bar texture with the density and angle arguments.
# create dummy data
data
<- data.frame(
name=letters[1:5],
value=sample(seq(4,15),5)
)
barplot(
height=data$value, names=data$name , density=c(5,10,20,30,7) , angle=c(0,45,90,11,36)
, col="brown" )
The geom_errorbar() function
Error
bars give a general idea of how precise a measurement is, or conversely, how
far from the reported value the true (error free) value might be. If the value
displayed on your barplot is the result of an
aggregation (like the mean value of several data points), you may want to
display error bars.
To
understand how to build it, you first need to understand how to build a basic barplot with R. Then, you just
it to add an extra layer using the geom_errorbar() function.
The
function takes at least 3 arguments in its aesthetics:
yminandymax: position of the bottom and the top of the error bar respectivelyx: position on the X axis
Note:
the lower and upper limits of your error bars must be computed before building
the chart, and available in a column of the input data.
# Load ggplot2
library(ggplot2)
data
<- data.frame(
name=letters[1:5],
value=sample(seq(4,15),5),
sd=c(1,0.2,3,2,4)
)
# Most basic error bar
ggplot(data) +
geom_bar( aes(x=name,
y=value), stat="identity", fill="skyblue", alpha=0.7) +
geom_errorbar( aes(x=name,
ymin=value-sd, ymax=value+sd), width=0.4, colour="orange", alpha=0.9,
size=1.3)
Customization
It is possible to change error bar types thanks to similar
function: geom_crossbar(), geom_linerange() and geom_pointrange(). Those functions works basically the same as the most
common geom_errorbar().
# Load ggplot2
library(ggplot2)
data
<- data.frame(
name=letters[1:5],
value=sample(seq(4,15),5),
sd=c(1,0.2,3,2,4)
)
ggplot(data) +
geom_bar( aes(x=name, y=value),
stat="identity", fill="skyblue", alpha=0.5) +
geom_crossbar( aes(x=name,
y=value, ymin=value-sd, ymax=value+sd), width=0.4, colour="orange",
alpha=0.9, size=1.3)
# line
ggplot(data) +
geom_bar( aes(x=name, y=value),
stat="identity", fill="skyblue", alpha=0.5) +
geom_linerange( aes(x=name,
ymin=value-sd, ymax=value+sd), colour="orange", alpha=0.9, size=1.3)
ggplot(data) +
geom_bar( aes(x=name, y=value),
stat="identity", fill="skyblue", alpha=0.5) +
geom_pointrange( aes(x=name,
y=value, ymin=value-sd, ymax=value+sd), colour="orange", alpha=0.9,
size=1.3)
# horizontal
ggplot(data) +
geom_bar( aes(x=name, y=value),
stat="identity", fill="skyblue", alpha=0.5) +
geom_errorbar( aes(x=name,
ymin=value-sd, ymax=value+sd), width=0.4, colour="orange", alpha=0.9,
size=1.3) +
coord_flip()
Barplot with number of
observation
This
chart illustrates many tips you can apply to a base R barplot:
- Add abline with abline()
- Change axis labels orientation with las
- Add text with text()
- Add a legend with legend()
# Data
data
<- data.frame(
name = c("DD","with
himself","with DC","with Silur"
,"DC","with himself","with DD","with
Silur" ,"Silur","with himself","with
DD","with DC" ),
average = sample(seq(1,10) , 12
, replace=T),
number = sample(seq(4,39) , 12
, replace=T)
)
# Increase bottom margin
par(mar=c(6,4,4,4))
# Basic Barplot
my_bar
<- barplot(data$average , border=F , names.arg=data$name ,
las=2 ,
col=c(rgb(0.3,0.1,0.4,0.6)
, rgb(0.3,0.5,0.4,0.6) , rgb(0.3,0.9,0.4,0.6) , rgb(0.3,0.9,0.4,0.6)) ,
ylim=c(0,13) ,
main="" )
# Add abline
abline(v=c(4.9
, 9.7) , col="grey")
# Add the text
text(my_bar,
data$average+0.4 , paste("n: ", data$number, sep="")
,cex=1)
#Legende
legend("topleft",
legend = c("Alone","with Himself","With other
genotype" ) ,
col = c(rgb(0.3,0.1,0.4,0.6)
, rgb(0.3,0.5,0.4,0.6) , rgb(0.3,0.9,0.4,0.6) , rgb(0.3,0.9,0.4,0.6)) ,
bty = "n", pch=20 , pt.cex = 2,
cex = 0.8, horiz = FALSE, inset = c(0.05, 0.05))
Most basic circular barplot
A circular barplot is a barplot where
bars are displayed along a circle instead of a line.
The
input dataset is the same than for a barplot: we need one numeric value per
group (one group = one bar). (See more explanation in the barplot section).
Basically,
the method is the same than to do a classic barplot. At the end, we call coord_polar() to make the chart
circular. Note that the ylim() argument is really important. If it starts
at 0, the bars will start from the centre of the circle. If you provide a
negative value, a white circle space will appear!
This
chart is not really insightful, go to the next example to learn how to add labels!
# Libraries
library(tidyverse)
# Create dataset
data
<- data.frame(
id=seq(1,60),
individual=paste( "Mister ",
seq(1,60), sep=""),
value=sample( seq(10,100), 60,
replace=T)
)
# Make the plot
p
<- ggplot(data, aes(x=as.factor(id), y=value)) + # Note that id is a factor. If x is
numeric, there is some space between the first bar
# This add the bars with a blue color
geom_bar(stat="identity",
fill=alpha("blue", 0.3)) +
# Limits of the plot = very important. The negative value controls the size of the inner circle, the positive one is useful to add size over each bar
ylim(-100,120) +
# Custom the theme: no axis title and no cartesian grid
theme_minimal() +
theme(
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-2,4),
"cm") # This remove
unnecessary margin around plot
) +
# This makes the coordinate polar instead of cartesian.
coord_polar(start = 0)
p
Add labels to circular barplot
Here
I suggest a method to add label at the top of each bar, using the same angle
that the central part of the bar. In the code below, a short section creates a
dataframe with the feature of each label, that we can then call in geom_text().
Note
that labels are always in an angle that allows to read them easily, what
requires a 180 degrees flip for some of them.
# Libraries
library(tidyverse)
# Create dataset
data
<- data.frame(
id=seq(1,60),
individual=paste( "Mister ",
seq(1,60), sep=""),
value=sample( seq(10,100), 60,
replace=T)
)
# ----- This section prepare a dataframe for labels ---- #
# Get the name and the y position of each label
label_data
<- data
# calculate the ANGLE of the labels
number_of_bar
<- nrow(label_data)
angle
<- 90 - 360 * (label_data$id-0.5)
/number_of_bar # I substract 0.5
because the letter must have the angle of the center of the bars. Not extreme
right(1) or extreme left (0)
# calculate the alignment of labels: right or left
# If I am on the left part of the plot, my labels have
currently an angle < -90
label_data$hjust<-ifelse(
angle < -90, 1, 0)
# flip angle BY to make them readable
label_data$angle<-ifelse(angle
< -90, angle+180, angle)
# ----- ------------------------------------------- ---- #
# Start the plot
p
<- ggplot(data, aes(x=as.factor(id), y=value)) + # Note that id is a factor. If x is
numeric, there is some space between the first bar
# This add the bars with a blue color
geom_bar(stat="identity",
fill=alpha("skyblue", 0.7)) +
# Limits of the plot = very important. The negative value controls the size of the inner circle, the positive one is useful to add size over each bar
ylim(-100,120) +
# Custom the theme: no axis title and no cartesian grid
theme_minimal() +
theme(
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4),
"cm") # Adjust the
margin to make in sort labels are not truncated!
) +
# This makes the coordinate polar instead of cartesian.
coord_polar(start = 0) +
# Add the labels, using the label_data dataframe that we have created before
geom_text(data=label_data, aes(x=id,
y=value+10, label=individual, hjust=hjust), color="black",
fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle,
inherit.aes = FALSE )
p
Circular barplot with groups
Add a gap in the circle
A circular barplot is a barplot where
bars are displayed along a circle instead of a line. This page aims to teach
you how to make a circular barplot with groups.
Since
this kind of chart is a bit tricky, I strongly advise to understand graph #295 and #296 that will teach you the basics.
The
first step is to build a circular barplot with a break in the circle. Actually,
I just added a few empty lines at the end of the initial data frame:
# library
library(tidyverse)
# Create dataset
data
<- data.frame(
individual=paste( "Mister ",
seq(1,60), sep=""),
value=sample( seq(10,100), 60,
replace=T)
)
# Set a number of 'empty bar'
empty_bar
<- 10
# Add lines to the initial dataset
to_add
<- matrix(NA, empty_bar, ncol(data))
colnames(to_add)
<- colnames(data)
data
<- rbind(data, to_add)
data$id
<- seq(1, nrow(data))
# Get the name and the y position of each label
label_data
<- data
number_of_bar
<- nrow(label_data)
angle
<- 90 - 360 * (label_data$id-0.5) /number_of_bar # I substract 0.5 because the letter
must have the angle of the center of the bars. Not extreme right(1) or extreme
left (0)
label_data$hjust
<- ifelse( angle < -90, 1, 0)
label_data$angle
<- ifelse(angle < -90, angle+180, angle)
# Make the plot
p
<- ggplot(data, aes(x=as.factor(id), y=value)) + # Note that id is a factor. If x is
numeric, there is some space between the first bar
geom_bar(stat="identity",
fill=alpha("green", 0.3)) +
ylim(-100,120) +
theme_minimal() +
theme(
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4),
"cm")
) +
coord_polar(start = 0) +
geom_text(data=label_data, aes(x=id,
y=value+10, label=individual, hjust=hjust), color="black",
fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle,
inherit.aes = FALSE )
p
Space between groups
This
concept can now be used to add space between each group of the dataset. I
add n lines with only NA at the bottom of each group.
This
chart is far more insightful since it allows one to quickly compare the
different groups, and to compare the value of items within each group.
# library
library(tidyverse)
# Create dataset
data
<- data.frame(
individual=paste( "Mister ",
seq(1,60), sep=""),
group=c( rep('A', 10), rep('B',
30), rep('C', 14), rep('D', 6)) ,
value=sample( seq(10,100), 60,
replace=T)
)
# Set a number of 'empty bar' to add at the end of each group
empty_bar
<- 4
to_add
<- data.frame( matrix(NA, empty_bar*nlevels(data$group),
ncol(data)) )
colnames(to_add)
<- colnames(data)
to_add$group
<- rep(levels(data$group), each=empty_bar)
data
<- rbind(data, to_add)
data
<- data %>% arrange(group)
data$id
<- seq(1, nrow(data))
# Get the name and the y position of each label
label_data
<- data
number_of_bar
<- nrow(label_data)
angle
<- 90 - 360 * (label_data$id-0.5) /number_of_bar # I substract 0.5 because the letter
must have the angle of the center of the bars. Not extreme right(1) or extreme
left (0)
label_data$hjust
<- ifelse( angle < -90, 1, 0)
label_data$angle
<- ifelse(angle < -90, angle+180, angle)
# Make the plot
p
<- ggplot(data, aes(x=as.factor(id), y=value,
fill=group)) + # Note that id is
a factor. If x is numeric, there is some space between the first bar
geom_bar(stat="identity",
alpha=0.5) +
ylim(-100,120) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4),
"cm")
) +
coord_polar() +
geom_text(data=label_data, aes(x=id,
y=value+10, label=individual, hjust=hjust), color="black",
fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle,
inherit.aes = FALSE )
p
Order bars
Here
observations are sorted by bar height within each group. It can be useful if
your goal is to understand what are the highest / lowest observations within
and across groups.
The
method used to order groups in ggplot2 is extensively described in this dedicated page. Basically, you just have to
add the following piece of code right after the data frame creation:
# Order data:
data
= data %>% arrange(group, value)
Circular barchart customization
Last but not least, it is highly advisable to add some
customisation to your chart. Here we add group names (A, B, C and D), and we
add a scale to help compare the sizes of the bars. Voila! The code is a bit
long, but the result is quite worth it in my opinion!
# library
library(tidyverse)
# Create dataset
data
<- data.frame(
individual=paste( "Mister ",
seq(1,60), sep=""),
group=c( rep('A', 10), rep('B',
30), rep('C', 14), rep('D', 6)) ,
value=sample( seq(10,100), 60,
replace=T)
)
# Set a number of 'empty bar' to add at the end of each group
empty_bar
<- 3
to_add
<- data.frame( matrix(NA, empty_bar*nlevels(data$group),
ncol(data)) )
colnames(to_add)
<- colnames(data)
to_add$group
<- rep(levels(data$group), each=empty_bar)
data
<- rbind(data, to_add)
data
<- data %>% arrange(group)
data$id
<- seq(1, nrow(data))
# Get the name and the y position of each label
label_data
<- data
number_of_bar
<- nrow(label_data)
angle
<- 90 - 360 * (label_data$id-0.5) /number_of_bar # I substract 0.5 because the letter
must have the angle of the center of the bars. Not extreme right(1) or extreme
left (0)
label_data$hjust
<- ifelse( angle < -90, 1, 0)
label_data$angle
<- ifelse(angle < -90, angle+180, angle)
# prepare a data frame for base lines
base_data
<- data %>%
group_by(group) %>%
summarize(start=min(id), end=max(id)
- empty_bar) %>%
rowwise() %>%
mutate(title=mean(c(start,
end)))
# prepare a data frame for grid (scales)
grid_data
<- base_data
grid_data$end
<- grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)]
+ 1
grid_data$start
<- grid_data$start - 1
grid_data
<- grid_data[-1,]
# Make the plot
p
<- ggplot(data, aes(x=as.factor(id), y=value,
fill=group)) + # Note that id is
a factor. If x is numeric, there is some space between the first bar
geom_bar(aes(x=as.factor(id), y=value, fill=group), stat="identity", alpha=0.5) +
# Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
geom_segment(data=grid_data, aes(x
= end, y = 80, xend = start, yend = 80), colour = "grey", alpha=1,
size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x
= end, y = 60, xend = start, yend = 60), colour = "grey", alpha=1,
size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x
= end, y = 40, xend = start, yend = 40), colour = "grey", alpha=1,
size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x
= end, y = 20, xend = start, yend = 20), colour = "grey", alpha=1,
size=0.3 , inherit.aes = FALSE ) +
# Add text showing the value of each 100/75/50/25 lines
annotate("text", x = rep(max(data$id),4),
y = c(20, 40, 60, 80), label = c("20", "40",
"60", "80") , color="grey", size=3 , angle=0,
fontface="bold", hjust=1) +
geom_bar(aes(x=as.factor(id), y=value, fill=group), stat="identity", alpha=0.5) +
ylim(-100,120) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4),
"cm")
) +
coord_polar() +
geom_text(data=label_data, aes(x=id,
y=value+10, label=individual, hjust=hjust), color="black",
fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle,
inherit.aes = FALSE ) +
# Add base line information
geom_segment(data=base_data, aes(x
= start, y = -5, xend = end, yend = -5), colour = "black", alpha=0.8,
size=0.6 , inherit.aes = FALSE ) +
geom_text(data=base_data, aes(x
= title, y = -18, label=group), hjust=c(1,1,0,0), colour =
"black", alpha=0.8, size=4, fontface="bold", inherit.aes =
FALSE)
p
Circular stacked barplot
I
tried to add as many comments as possible in the code, and thus hope that the
method is understandable. If it is not, please comment and ask supplementary
explanations.
You
first need to understand how to make a stacked barplot with ggplot2. Then
understand how to properly add labels, calculating the good angles, flipping
them if necessary, and adjusting their position. The trickiest part is probably
the one allowing to add space between each group. All these steps are described
one by one in the circular barchart section.
# library
library(tidyverse)
library(viridis)
# Create dataset
data
<- data.frame(
individual=paste( "Mister ",
seq(1,60), sep=""),
group=c( rep('A', 10), rep('B',
30), rep('C', 14), rep('D', 6)) ,
value1=sample( seq(10,100), 60,
replace=T),
value2=sample( seq(10,100), 60,
replace=T),
value3=sample( seq(10,100), 60,
replace=T)
)
# Transform data in a tidy format (long format)
data
<- data %>% gather(key = "observation",
value="value", -c(1,2))
# Set a number of 'empty bar' to add at the end of each group
empty_bar
<- 2
nObsType
<- nlevels(as.factor(data$observation))
to_add
<- data.frame( matrix(NA, empty_bar*nlevels(data$group)*nObsType,
ncol(data)) )
colnames(to_add)
<- colnames(data)
to_add$group
<- rep(levels(data$group), each=empty_bar*nObsType )
data
<- rbind(data, to_add)
data
<- data %>% arrange(group, individual)
data$id
<- rep( seq(1, nrow(data)/nObsType) , each=nObsType)
# Get the name and the y position of each label
label_data
<- data %>% group_by(id, individual) %>% summarize(tot=sum(value))
number_of_bar
<- nrow(label_data)
angle
<- 90 - 360 * (label_data$id-0.5) /number_of_bar # I substract 0.5 because the letter
must have the angle of the center of the bars. Not extreme right(1) or extreme
left (0)
label_data$hjust
<- ifelse( angle < -90, 1, 0)
label_data$angle
<- ifelse(angle < -90, angle+180, angle)
# prepare a data frame for base lines
base_data
<- data %>%
group_by(group) %>%
summarize(start=min(id), end=max(id)
- empty_bar) %>%
rowwise() %>%
mutate(title=mean(c(start,
end)))
# prepare a data frame for grid (scales)
grid_data
<- base_data
grid_data$end
<- grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)]
+ 1
grid_data$start
<- grid_data$start - 1
grid_data
<- grid_data[-1,]
# Make the plot
p
<- ggplot(data) +
# Add the stacked bar
geom_bar(aes(x=as.factor(id),
y=value, fill=observation), stat="identity", alpha=0.5) +
scale_fill_viridis(discrete=TRUE) +
# Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
geom_segment(data=grid_data, aes(x
= end, y = 0, xend = start, yend = 0), colour = "grey", alpha=1,
size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x
= end, y = 50, xend = start, yend = 50), colour = "grey", alpha=1,
size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x
= end, y = 100, xend = start, yend = 100), colour = "grey", alpha=1,
size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x
= end, y = 150, xend = start, yend = 150), colour = "grey", alpha=1,
size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x
= end, y = 200, xend = start, yend = 200), colour = "grey", alpha=1,
size=0.3 , inherit.aes = FALSE ) +
# Add text showing the value of each 100/75/50/25 lines
ggplot2::annotate("text", x
= rep(max(data$id),5), y = c(0, 50, 100, 150, 200), label
= c("0", "50", "100", "150",
"200") , color="grey", size=6 , angle=0,
fontface="bold", hjust=1) +
ylim(-150,max(label_data$tot, na.rm=T)) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4),
"cm")
) +
coord_polar() +
# Add labels on top of each bar
geom_text(data=label_data, aes(x=id,
y=tot+10, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6,
size=5, angle= label_data$angle, inherit.aes = FALSE ) +
# Add base line information
geom_segment(data=base_data, aes(x
= start, y = -5, xend = end, yend = -5), colour = "black", alpha=0.8,
size=0.6 , inherit.aes = FALSE ) +
geom_text(data=base_data, aes(x
= title, y = -18, label=group), hjust=c(1,1,0,0), colour =
"black", alpha=0.8, size=4, fontface="bold", inherit.aes =
FALSE)
ggsave(p,
file="output.png", width=10, height=10)
Comments
Post a Comment