How to create barplot using ggplot in R Programming

 

Barplot

 # create dummy data

data <- data.frame(

  name=letters[1:5],

  value=sample(seq(4,15),5)

)

 # The most basic barplot you can do:

barplot(height=data$value, names=data$name)

 

Custom color

Here are 2 examples showing how to custom the barplot color:

  • uniform color with col, asking one color only
  • using a palette coming from RColorBrewer
  • change border color with the border argument

 

# create dummy data

data <- data.frame(

  name=letters[1:5],

  value=sample(seq(4,15),5)

)

 # Uniform color

barplot(height=data$value, names=data$name, col=rgb(0.2,0.4,0.6,0.6) )

 # Specific color for each bar? Use a well known palette

library(RColorBrewer)

coul <- brewer.pal(5, "Set2")

barplot(height=data$value, names=data$name, col=coul )

 # Change border color

barplot(height=data$value, names=data$name, border="#69b3a2", col="white" )

Title, Axis label, Custom limits

Usual customizations with xlabylabmain and ylim.

# create dummy data

data <- data.frame(

  name=letters[1:5],

  value=sample(seq(4,15),5)

)

 

# Uniform color

barplot(height=data$value, names=data$name,

        col=rgb(0.8,0.1,0.1,0.6),

        xlab="categories",

        ylab="values",

        main="My title",

        ylim=c(0,40)

        )

Horizontal barplot

Usual customizations with xlabylabmain and ylim.

# create dummy data

data <- data.frame(

  name=letters[1:5],

  value=sample(seq(4,15),5)

)

 # Uniform color

barplot(height=data$value, names=data$name,

        col="#69b3a2",

        horiz=T, las=1

        )

Bar width & space between bars

It is possible to control the space between bars and the width of the bars using space and width.

Can be usefull to represent the number of value behind each bar.

# create dummy data

data <- data.frame(

  name=letters[1:5],

  value=sample(seq(4,15),5)

)

 # Control space:

barplot(height=data$value, names=data$name, col=rgb(0.2,0.4,0.6,0.6), space=c(0.1,0.2,3,1.5,0.3) )

 # Control width:

barplot(height=data$value, names=data$name, col=rgb(0.2,0.4,0.6,0.6), width=c(0.1,0.2,3,1.5,0.3) )

Barplot texture

Change bar texture with the density and angle arguments.

# create dummy data

data <- data.frame(

  name=letters[1:5],

  value=sample(seq(4,15),5)

)

 # barplot

barplot( height=data$value, names=data$name , density=c(5,10,20,30,7) , angle=c(0,45,90,11,36) , col="brown"   )

 

The geom_errorbar() function

Error bars give a general idea of how precise a measurement is, or conversely, how far from the reported value the true (error free) value might be. If the value displayed on your barplot is the result of an aggregation (like the mean value of several data points), you may want to display error bars.

To understand how to build it, you first need to understand how to build a basic barplot with R. Then, you just it to add an extra layer using the geom_errorbar() function.

The function takes at least 3 arguments in its aesthetics:

  • ymin and ymax: position of the bottom and the top of the error bar respectively
  • x: position on the X axis

Note: the lower and upper limits of your error bars must be computed before building the chart, and available in a column of the input data.

# Load ggplot2

library(ggplot2)

 # create dummy data

data <- data.frame(

  name=letters[1:5],

  value=sample(seq(4,15),5),

  sd=c(1,0.2,3,2,4)

)

 # Most basic error bar

ggplot(data) +

    geom_bar( aes(x=name, y=value), stat="identity", fill="skyblue", alpha=0.7) +

    geom_errorbar( aes(x=name, ymin=value-sd, ymax=value+sd), width=0.4, colour="orange", alpha=0.9, size=1.3)

Customization

It is possible to change error bar types thanks to similar function: geom_crossbar()geom_linerange() and geom_pointrange(). Those functions works basically the same as the most common geom_errorbar().

# Load ggplot2

library(ggplot2)

 # create dummy data

data <- data.frame(

  name=letters[1:5],

  value=sample(seq(4,15),5),

  sd=c(1,0.2,3,2,4)

)

 # rectangle

ggplot(data) +

  geom_bar( aes(x=name, y=value), stat="identity", fill="skyblue", alpha=0.5) +

  geom_crossbar( aes(x=name, y=value, ymin=value-sd, ymax=value+sd), width=0.4, colour="orange", alpha=0.9, size=1.3)

 # line

ggplot(data) +

  geom_bar( aes(x=name, y=value), stat="identity", fill="skyblue", alpha=0.5) +

  geom_linerange( aes(x=name, ymin=value-sd, ymax=value+sd), colour="orange", alpha=0.9, size=1.3)

 # line + dot

ggplot(data) +

  geom_bar( aes(x=name, y=value), stat="identity", fill="skyblue", alpha=0.5) +

  geom_pointrange( aes(x=name, y=value, ymin=value-sd, ymax=value+sd), colour="orange", alpha=0.9, size=1.3)

 # horizontal

ggplot(data) +

  geom_bar( aes(x=name, y=value), stat="identity", fill="skyblue", alpha=0.5) +

  geom_errorbar( aes(x=name, ymin=value-sd, ymax=value+sd), width=0.4, colour="orange", alpha=0.9, size=1.3) +

  coord_flip()

 

Barplot with number of observation

This chart illustrates many tips you can apply to a base R barplot:

  • Add abline with abline()
  • Change axis labels orientation with las
  • Add text with text()
  • Add a legend with legend()

# Data

data <- data.frame(

  name = c("DD","with himself","with DC","with Silur" ,"DC","with himself","with DD","with Silur" ,"Silur","with himself","with DD","with DC" ),

  average = sample(seq(1,10) , 12 , replace=T),

  number = sample(seq(4,39) , 12 , replace=T)

)

 

# Increase bottom margin

par(mar=c(6,4,4,4))

 

 

# Basic Barplot

my_bar <- barplot(data$average , border=F , names.arg=data$name ,

                  las=2 ,

                  col=c(rgb(0.3,0.1,0.4,0.6) , rgb(0.3,0.5,0.4,0.6) , rgb(0.3,0.9,0.4,0.6) ,  rgb(0.3,0.9,0.4,0.6)) ,

                  ylim=c(0,13) ,

                  main="" )

 

# Add abline

abline(v=c(4.9 , 9.7) , col="grey")

 

# Add the text

text(my_bar, data$average+0.4 , paste("n: ", data$number, sep="") ,cex=1)

 

#Legende

legend("topleft", legend = c("Alone","with Himself","With other genotype" ) ,

     col = c(rgb(0.3,0.1,0.4,0.6) , rgb(0.3,0.5,0.4,0.6) , rgb(0.3,0.9,0.4,0.6) ,  rgb(0.3,0.9,0.4,0.6)) ,

     bty = "n", pch=20 , pt.cex = 2, cex = 0.8, horiz = FALSE, inset = c(0.05, 0.05))

 

Most basic circular barplot

circular barplot is a barplot where bars are displayed along a circle instead of a line.

The input dataset is the same than for a barplot: we need one numeric value per group (one group = one bar). (See more explanation in the barplot section).

Basically, the method is the same than to do a classic barplot. At the end, we call coord_polar() to make the chart circular. Note that the ylim() argument is really important. If it starts at 0, the bars will start from the centre of the circle. If you provide a negative value, a white circle space will appear!

This chart is not really insightful, go to the next example to learn how to add labels!

# Libraries

library(tidyverse)

 # Create dataset

data <- data.frame(

  id=seq(1,60),

  individual=paste( "Mister ", seq(1,60), sep=""),

  value=sample( seq(10,100), 60, replace=T)

)

 # Make the plot

p <- ggplot(data, aes(x=as.factor(id), y=value)) +       # Note that id is a factor. If x is numeric, there is some space between the first bar

   # This add the bars with a blue color

  geom_bar(stat="identity", fill=alpha("blue", 0.3)) +

   # Limits of the plot = very important. The negative value controls the size of the inner circle, the positive one is useful to add size over each bar

  ylim(-100,120) +

   # Custom the theme: no axis title and no cartesian grid

  theme_minimal() +

  theme(

    axis.text = element_blank(),

    axis.title = element_blank(),

    panel.grid = element_blank(),

    plot.margin = unit(rep(-2,4), "cm")     # This remove unnecessary margin around plot

  ) +

   # This makes the coordinate polar instead of cartesian.

  coord_polar(start = 0)

p

 

Add labels to circular barplot

Here I suggest a method to add label at the top of each bar, using the same angle that the central part of the bar. In the code below, a short section creates a dataframe with the feature of each label, that we can then call in geom_text().

Note that labels are always in an angle that allows to read them easily, what requires a 180 degrees flip for some of them.

# Libraries

library(tidyverse)

 # Create dataset

data <- data.frame(

  id=seq(1,60),

  individual=paste( "Mister ", seq(1,60), sep=""),

  value=sample( seq(10,100), 60, replace=T)

)

 # ----- This section prepare a dataframe for labels ---- #

# Get the name and the y position of each label

label_data <- data

 # calculate the ANGLE of the labels

number_of_bar <- nrow(label_data)

angle <-  90 - 360 * (label_data$id-0.5) /number_of_bar     # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)

 # calculate the alignment of labels: right or left

# If I am on the left part of the plot, my labels have currently an angle < -90

label_data$hjust<-ifelse( angle < -90, 1, 0)

 # flip angle BY to make them readable

label_data$angle<-ifelse(angle < -90, angle+180, angle)

# ----- ------------------------------------------- ---- #

 # Start the plot

p <- ggplot(data, aes(x=as.factor(id), y=value)) +       # Note that id is a factor. If x is numeric, there is some space between the first bar

   # This add the bars with a blue color

  geom_bar(stat="identity", fill=alpha("skyblue", 0.7)) +

   # Limits of the plot = very important. The negative value controls the size of the inner circle, the positive one is useful to add size over each bar

  ylim(-100,120) +

   # Custom the theme: no axis title and no cartesian grid

  theme_minimal() +

  theme(

    axis.text = element_blank(),

    axis.title = element_blank(),

    panel.grid = element_blank(),

    plot.margin = unit(rep(-1,4), "cm")      # Adjust the margin to make in sort labels are not truncated!

  ) +

   # This makes the coordinate polar instead of cartesian.

  coord_polar(start = 0) +

   # Add the labels, using the label_data dataframe that we have created before

  geom_text(data=label_data, aes(x=id, y=value+10, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle, inherit.aes = FALSE )

 p

 

Circular barplot with groups

Add a gap in the circle

circular barplot is a barplot where bars are displayed along a circle instead of a line. This page aims to teach you how to make a circular barplot with groups.

Since this kind of chart is a bit tricky, I strongly advise to understand graph #295 and #296 that will teach you the basics.

The first step is to build a circular barplot with a break in the circle. Actually, I just added a few empty lines at the end of the initial data frame:

# library

library(tidyverse)

 # Create dataset

data <- data.frame(

  individual=paste( "Mister ", seq(1,60), sep=""),

  value=sample( seq(10,100), 60, replace=T)

)

 # Set a number of 'empty bar'

empty_bar <- 10

 # Add lines to the initial dataset

to_add <- matrix(NA, empty_bar, ncol(data))

colnames(to_add) <- colnames(data)

data <- rbind(data, to_add)

data$id <- seq(1, nrow(data))

 # Get the name and the y position of each label

label_data <- data

number_of_bar <- nrow(label_data)

angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar     # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)

label_data$hjust <- ifelse( angle < -90, 1, 0)

label_data$angle <- ifelse(angle < -90, angle+180, angle)

 # Make the plot

p <- ggplot(data, aes(x=as.factor(id), y=value)) +       # Note that id is a factor. If x is numeric, there is some space between the first bar

  geom_bar(stat="identity", fill=alpha("green", 0.3)) +

  ylim(-100,120) +

  theme_minimal() +

  theme(

    axis.text = element_blank(),

    axis.title = element_blank(),

    panel.grid = element_blank(),

    plot.margin = unit(rep(-1,4), "cm")

  ) +

  coord_polar(start = 0) +

  geom_text(data=label_data, aes(x=id, y=value+10, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle, inherit.aes = FALSE )

 p

 

Space between groups

This concept can now be used to add space between each group of the dataset. I add n lines with only NA at the bottom of each group.

This chart is far more insightful since it allows one to quickly compare the different groups, and to compare the value of items within each group.

# library

library(tidyverse)

 # Create dataset

data <- data.frame(

  individual=paste( "Mister ", seq(1,60), sep=""),

  group=c( rep('A', 10), rep('B', 30), rep('C', 14), rep('D', 6)) ,

  value=sample( seq(10,100), 60, replace=T)

)

 # Set a number of 'empty bar' to add at the end of each group

empty_bar <- 4

to_add <- data.frame( matrix(NA, empty_bar*nlevels(data$group), ncol(data)) )

colnames(to_add) <- colnames(data)

to_add$group <- rep(levels(data$group), each=empty_bar)

data <- rbind(data, to_add)

data <- data %>% arrange(group)

data$id <- seq(1, nrow(data))

 # Get the name and the y position of each label

label_data <- data

number_of_bar <- nrow(label_data)

angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar     # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)

label_data$hjust <- ifelse( angle < -90, 1, 0)

label_data$angle <- ifelse(angle < -90, angle+180, angle)

 # Make the plot

p <- ggplot(data, aes(x=as.factor(id), y=value, fill=group)) +       # Note that id is a factor. If x is numeric, there is some space between the first bar

  geom_bar(stat="identity", alpha=0.5) +

  ylim(-100,120) +

  theme_minimal() +

  theme(

    legend.position = "none",

    axis.text = element_blank(),

    axis.title = element_blank(),

    panel.grid = element_blank(),

    plot.margin = unit(rep(-1,4), "cm")

  ) +

  coord_polar() +

  geom_text(data=label_data, aes(x=id, y=value+10, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle, inherit.aes = FALSE )

 p

 

Order bars

Here observations are sorted by bar height within each group. It can be useful if your goal is to understand what are the highest / lowest observations within and across groups.

The method used to order groups in ggplot2 is extensively described in this dedicated page. Basically, you just have to add the following piece of code right after the data frame creation:

# Order data:

data = data %>% arrange(group, value)

Circular barchart customization

Last but not least, it is highly advisable to add some customisation to your chart. Here we add group names (A, B, C and D), and we add a scale to help compare the sizes of the bars. Voila! The code is a bit long, but the result is quite worth it in my opinion!

 

# library

library(tidyverse)

 # Create dataset

data <- data.frame(

  individual=paste( "Mister ", seq(1,60), sep=""),

  group=c( rep('A', 10), rep('B', 30), rep('C', 14), rep('D', 6)) ,

  value=sample( seq(10,100), 60, replace=T)

)

 # Set a number of 'empty bar' to add at the end of each group

empty_bar <- 3

to_add <- data.frame( matrix(NA, empty_bar*nlevels(data$group), ncol(data)) )

colnames(to_add) <- colnames(data)

to_add$group <- rep(levels(data$group), each=empty_bar)

data <- rbind(data, to_add)

data <- data %>% arrange(group)

data$id <- seq(1, nrow(data))

 # Get the name and the y position of each label

label_data <- data

number_of_bar <- nrow(label_data)

angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar     # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)

label_data$hjust <- ifelse( angle < -90, 1, 0)

label_data$angle <- ifelse(angle < -90, angle+180, angle)

 # prepare a data frame for base lines

base_data <- data %>%

  group_by(group) %>%

  summarize(start=min(id), end=max(id) - empty_bar) %>%

  rowwise() %>%

  mutate(title=mean(c(start, end)))

 # prepare a data frame for grid (scales)

grid_data <- base_data

grid_data$end <- grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)] + 1

grid_data$start <- grid_data$start - 1

grid_data <- grid_data[-1,]

 # Make the plot

p <- ggplot(data, aes(x=as.factor(id), y=value, fill=group)) +       # Note that id is a factor. If x is numeric, there is some space between the first bar

   geom_bar(aes(x=as.factor(id), y=value, fill=group), stat="identity", alpha=0.5) +

   # Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.

  geom_segment(data=grid_data, aes(x = end, y = 80, xend = start, yend = 80), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +

  geom_segment(data=grid_data, aes(x = end, y = 60, xend = start, yend = 60), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +

  geom_segment(data=grid_data, aes(x = end, y = 40, xend = start, yend = 40), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +

  geom_segment(data=grid_data, aes(x = end, y = 20, xend = start, yend = 20), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +

   # Add text showing the value of each 100/75/50/25 lines

  annotate("text", x = rep(max(data$id),4), y = c(20, 40, 60, 80), label = c("20", "40", "60", "80") , color="grey", size=3 , angle=0, fontface="bold", hjust=1) +

   geom_bar(aes(x=as.factor(id), y=value, fill=group), stat="identity", alpha=0.5) +

  ylim(-100,120) +

  theme_minimal() +

  theme(

    legend.position = "none",

    axis.text = element_blank(),

    axis.title = element_blank(),

    panel.grid = element_blank(),

    plot.margin = unit(rep(-1,4), "cm")

  ) +

  coord_polar() +

  geom_text(data=label_data, aes(x=id, y=value+10, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle, inherit.aes = FALSE ) +

   # Add base line information

  geom_segment(data=base_data, aes(x = start, y = -5, xend = end, yend = -5), colour = "black", alpha=0.8, size=0.6 , inherit.aes = FALSE )  +

  geom_text(data=base_data, aes(x = title, y = -18, label=group), hjust=c(1,1,0,0), colour = "black", alpha=0.8, size=4, fontface="bold", inherit.aes = FALSE)

 p

 

Circular stacked barplot

I tried to add as many comments as possible in the code, and thus hope that the method is understandable. If it is not, please comment and ask supplementary explanations.

You first need to understand how to make a stacked barplot with ggplot2. Then understand how to properly add labels, calculating the good angles, flipping them if necessary, and adjusting their position. The trickiest part is probably the one allowing to add space between each group. All these steps are described one by one in the circular barchart section.

# library

library(tidyverse)

library(viridis)

 # Create dataset

data <- data.frame(

  individual=paste( "Mister ", seq(1,60), sep=""),

  group=c( rep('A', 10), rep('B', 30), rep('C', 14), rep('D', 6)) ,

  value1=sample( seq(10,100), 60, replace=T),

  value2=sample( seq(10,100), 60, replace=T),

  value3=sample( seq(10,100), 60, replace=T)

)

 # Transform data in a tidy format (long format)

data <- data %>% gather(key = "observation", value="value", -c(1,2))

 # Set a number of 'empty bar' to add at the end of each group

empty_bar <- 2

nObsType <- nlevels(as.factor(data$observation))

to_add <- data.frame( matrix(NA, empty_bar*nlevels(data$group)*nObsType, ncol(data)) )

colnames(to_add) <- colnames(data)

to_add$group <- rep(levels(data$group), each=empty_bar*nObsType )

data <- rbind(data, to_add)

data <- data %>% arrange(group, individual)

data$id <- rep( seq(1, nrow(data)/nObsType) , each=nObsType)

 

# Get the name and the y position of each label

label_data <- data %>% group_by(id, individual) %>% summarize(tot=sum(value))

number_of_bar <- nrow(label_data)

angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar     # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)

label_data$hjust <- ifelse( angle < -90, 1, 0)

label_data$angle <- ifelse(angle < -90, angle+180, angle)

 # prepare a data frame for base lines

base_data <- data %>%

  group_by(group) %>%

  summarize(start=min(id), end=max(id) - empty_bar) %>%

  rowwise() %>%

  mutate(title=mean(c(start, end)))

 

# prepare a data frame for grid (scales)

grid_data <- base_data

grid_data$end <- grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)] + 1

grid_data$start <- grid_data$start - 1

grid_data <- grid_data[-1,]

 # Make the plot

p <- ggplot(data) +     

   # Add the stacked bar

  geom_bar(aes(x=as.factor(id), y=value, fill=observation), stat="identity", alpha=0.5) +

  scale_fill_viridis(discrete=TRUE) +

   # Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.

  geom_segment(data=grid_data, aes(x = end, y = 0, xend = start, yend = 0), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +

  geom_segment(data=grid_data, aes(x = end, y = 50, xend = start, yend = 50), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +

  geom_segment(data=grid_data, aes(x = end, y = 100, xend = start, yend = 100), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +

  geom_segment(data=grid_data, aes(x = end, y = 150, xend = start, yend = 150), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +

  geom_segment(data=grid_data, aes(x = end, y = 200, xend = start, yend = 200), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +

   # Add text showing the value of each 100/75/50/25 lines

  ggplot2::annotate("text", x = rep(max(data$id),5), y = c(0, 50, 100, 150, 200), label = c("0", "50", "100", "150", "200") , color="grey", size=6 , angle=0, fontface="bold", hjust=1) +

   ylim(-150,max(label_data$tot, na.rm=T)) +

  theme_minimal() +

  theme(

    legend.position = "none",

    axis.text = element_blank(),

    axis.title = element_blank(),

    panel.grid = element_blank(),

    plot.margin = unit(rep(-1,4), "cm")

  ) +

  coord_polar() +

   # Add labels on top of each bar

  geom_text(data=label_data, aes(x=id, y=tot+10, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=5, angle= label_data$angle, inherit.aes = FALSE ) +

   # Add base line information

  geom_segment(data=base_data, aes(x = start, y = -5, xend = end, yend = -5), colour = "black", alpha=0.8, size=0.6 , inherit.aes = FALSE )  +

  geom_text(data=base_data, aes(x = title, y = -18, label=group), hjust=c(1,1,0,0), colour = "black", alpha=0.8, size=4, fontface="bold", inherit.aes = FALSE)

 # Save at png

ggsave(p, file="output.png", width=10, height=10)

Comments

Popular posts from this blog

How to create Animated 3d chart with R.

Linux/Unix Commands frequently used

R Programming Introduction