This file demonstrates how the relative abundance plots for archaea and bacteria were plotted.

Load Libraries

library(readr)
library(phyloseq)
library(ggplot2)
library(dplyr)
library(tidyverse)
library(plyr)
library(vegan)
library(ggplot2)
library(ggforce)
library(Biostrings)
library(reshape2)

Archaea

Read files

sample <- read.csv("metadata_noHS.csv", 
                    check.names = FALSE, row.names = 1)
arc.genus.data <- read.csv("AD_archaeagenus_noHS.csv", check.names = F, row.names = 1)
arc.genus.data <- arc.genus.data[,-2]
colnames(sample)[1] <- "variable"

Prepare data for plotting with ggplot2

arc.genus.melted <- reshape2::melt(arc.genus.data,id=("Genus"))
arc.genus.melted.new <- left_join(arc.genus.melted, sample, by = "variable")

sample.new <- sample[order(sample$Treatment),]

C37 plot

arc.genus.melted.new.37 <- subset(arc.genus.melted.new, Treatment == 37)
sample.37 <- subset(sample, Treatment == 37)

theme_set(theme_bw())
p.37 <- ggplot(arc.genus.melted.new.37, aes(fill=Genus, y=value, x=fct_inorder(variable))) + 
  geom_bar(position="stack", stat="identity",color = "black", width=0.5) +
  labs(x="",y="Relative abundance") + 
  theme(axis.text.y = element_text(size=12),axis.text.x = element_text(angle=-90,size=12,margin = margin(t = 2, unit = "pt")), 
        axis.title.y = element_text(size=15, margin = margin(r = 5, unit = "pt")),
        legend.text = element_text(size=10, face = "italic"), legend.title = element_text(size=14),
        panel.border = element_rect(size = 0.8)) +
  scale_fill_manual(values=c("#93a8ac","#ccd5ae","#ffa62b","#90a955","#cdb4db","#c5283d","#1f7a8c",
                                      "#ff8c42","#7e8d6e","#fceade","#8ecae6","#d8e2dc","#a8577e","#e8e9eb"))+
  facet_grid(cols = vars(Treatment), scales="free",space = "free_x") +
  theme(strip.text.x = element_text(size = 12),
        strip.background = element_rect(size = 0.8)) +
  scale_x_discrete(labels = sample.37$Replicate)

T50 plot

arc.genus.melted.new.50 <- subset(arc.genus.melted.new, Treatment == 50)
sample.50 <- subset(sample, Treatment == 50)

theme_set(theme_bw())
p.50 <- ggplot(arc.genus.melted.new.50, aes(fill=Genus, y=value, x=fct_inorder(variable))) + 
  geom_bar(position="stack", stat="identity",color = "black", width=0.5) +
  labs(x="",y="") + 
  theme(axis.text.y = element_text(size=12),axis.text.x = element_text(angle=-90,size=12,margin = margin(t = 2, unit = "pt")), 
        axis.title.y = element_text(size=15, margin = margin(r = 5, unit = "pt")),
        legend.text = element_text(size=8, face = "italic"), legend.title = element_text(size=13),
        panel.border = element_rect(size = 0.8)) +
  scale_fill_manual(values=c("#93a8ac","#ccd5ae","#ffa62b","#90a955","#cdb4db","#c5283d","#1f7a8c",
                                      "#ff8c42","#7e8d6e","#fceade","#8ecae6","#d8e2dc","#a8577e","#e8e9eb"))+
  facet_grid(cols = vars(Treatment), scales="free",space = "free_x") +
  theme(strip.text.x = element_text(size = 12),
        strip.background = element_rect(size = 0.8)) +
  scale_x_discrete(labels = sample.50$Replicate)

T65 plot

arc.genus.melted.new.65 <- subset(arc.genus.melted.new, Treatment == 65)
sample.65 <- subset(sample, Treatment == 65)

theme_set(theme_bw())
p.65 <- ggplot(arc.genus.melted.new.65, aes(fill=Genus, y=value, x=fct_inorder(variable))) + 
  geom_bar(position="stack", stat="identity",color = "black", width=0.5) +
  labs(x="Time (d)",y="Relative abundance") + 
  theme(axis.text.y = element_text(size=12),axis.text.x = element_text(angle=-90,size=12,margin = margin(t = 2, unit = "pt")), 
        axis.title.y = element_text(size=15, margin = margin(r = 5, unit = "pt")),
        axis.title.x = element_text(size=15, margin = margin(r = 5, unit = "pt")),
        legend.text = element_text(size=8, face = "italic"), legend.title = element_text(size=13),
        panel.border = element_rect(size = 0.8)) +
  scale_fill_manual(values=c("#93a8ac","#ccd5ae","#ffa62b","#90a955","#cdb4db","#c5283d","#1f7a8c",
                                      "#ff8c42","#7e8d6e","#fceade","#8ecae6","#d8e2dc","#a8577e","#e8e9eb"))+
  facet_grid(cols = vars(Treatment), scales="free",space = "free_x") +
  theme(strip.text.x = element_text(size = 12),
        strip.background = element_rect(size = 0.8)) +
  scale_x_discrete(labels = sample.65$Replicate)

T80 plot

arc.genus.melted.new.80 <- subset(arc.genus.melted.new, Treatment == 80)
sample.80 <- subset(sample, Treatment == 80)

theme_set(theme_bw())
p.80 <- ggplot(arc.genus.melted.new.80, aes(fill=Genus, y=value, x=fct_inorder(variable))) + 
  geom_bar(position="stack", stat="identity",color = "black", width=0.5) +
  labs(x="Time (d)",y="") + 
  theme(axis.text.y = element_text(size=12),axis.text.x = element_text(angle=-90,size=12,margin = margin(t = 2, unit = "pt")), 
        axis.title.y = element_text(size=15, margin = margin(r = 5, unit = "pt")),
        axis.title.x = element_text(size=15, margin = margin(r = 5, unit = "pt")),
        legend.text = element_text(size=8, face = "italic"), legend.title = element_text(size=13),
        panel.border = element_rect(size = 0.8)) +
  scale_fill_manual(values=c("#93a8ac","#ccd5ae","#ffa62b","#90a955","#cdb4db","#c5283d","#1f7a8c",
                             "#ff8c42","#7e8d6e","#fceade","#8ecae6","#d8e2dc","#a8577e","#e8e9eb"))+
  facet_grid(cols = vars(Treatment), scales="free",space = "free_x") +
  theme(strip.text.x = element_text(size = 12),
        strip.background = element_rect(size = 0.8)) +
  scale_x_discrete(labels = sample.80$Replicate)

Organize plots

library(cowplot)
legend.arc <- get_legend(p.37)
p.1 <- plot_grid(p.37+theme(legend.position="none"),
          p.50+theme(legend.position="none"),
          p.65+theme(legend.position="none"),
          p.80+theme(legend.position="none"))
# ggsave("archaea_genus_RA1.png", 
#        width = 18, height = 15, units = "cm", bg='white')
p.2 <- plot_grid(legend.arc)
# ggsave("archaea_genus_lgd.png", 
#        width = 10, height = 15, units = "cm", bg='white')
p.1

Bacteria

Read files

sample <- read.csv("metadata.csv", 
                   check.names = FALSE, row.names = 1)
bac.data.RA <- read.csv("AD_bacteria.csv", check.names = F, row.names = 1)

Prepare the relative abundance data

bac.data.RA <- (bac.data.RA[,!names(bac.data.RA) %in% 
                     c("Kingdom", "Phylum", "Class")]) # Delete these columns
bac.data.RA <- bac.data.RA[,-1] # Delete Blank
bac.data.RA <- bac.data.RA %>% drop_na(Order) # Drop the rows where the Order is NA
bac.data.RA <- unite(bac.data.RA, Taxa, c(56:59)) # Unite the taxa into one column
ASV.Tax <- data.frame(row.names(bac.data.RA), bac.data.RA[,56]) # Pull out ASV taxa name in case we need this into
bac.data.RA.agr <- aggregate(bac.data.RA[,c(1:55)], by = list(bac.data.RA$Taxa), FUN = sum) # Aggregate the rows with same Taxa

Select out the bacteria that had higher abundance than 2% at any time point

bac.data.RA.1 <- bac.data.RA.agr[apply(bac.data.RA.agr[,-1], MARGIN = 1, function(x) any(x>2)), ] # Keep the rows if a row contains values bigger than 2

# Can also use filter_all function if everything is numeric
# bac.data.RA.1 <- bac.data.RA[,-56] %>% filter_all(any_vars(. > 2))

apply(bac.data.RA.1[,-1], 1, max) # Sanity check. Make sure the max in each row is higher than 2

##        13        36        37        58        64        65        66        72 
##  3.310334  2.018891  4.109964  9.364059  3.316782 14.365979  2.233432 16.919926 
##        73        75        88        93       161       182       213       251 
##  7.551348  4.120359  2.811151  7.235339  2.891924  3.027079  2.710919 49.098806 
##       252       253       268       271       310       335       349       368 
## 13.669214  4.505592  2.264751  2.314416  2.048091  2.234036 18.771139  4.031856 
##       378       422       431       466       476       478       494       522 
##  2.692225  9.377734 27.754120  2.034750 14.606868  3.614680  2.170692  6.853878 
##       553       557       565       592       594       605       607       608 
##  8.906691  3.344947  2.524611  4.825037  2.990220  6.599236  4.096834  6.184223 
##       623       624       633       655       668       669       670       671 
##  3.448611  2.007635  6.602515  2.154828  3.282685  7.841892 36.951453 30.202904 
##       681       684       690       723       811       829       832       841 
##  6.113660  4.273263  3.542706 12.192148  2.829987  3.480196  6.942869 18.327795 
##       844       847       850       856       861       872       895 
##  5.802712  2.587005  3.826457  9.382495 11.714334  3.301398  9.470284

Shape the data for ggplot

names(bac.data.RA.1)[1] <- "Taxa"
bac.data.RA.1[bac.data.RA.1 == 0] <- NA
bac.RA.melted <- reshape2::melt(bac.data.RA.1,id=("Taxa")) # Shape the data format for ggplot
names(sample)[1] <- "variable" # Change the first column name
bac.RA.melted.new <- left_join(bac.RA.melted, sample, by = "variable") # Combine metadata with relative abundance and taxa info

Remove a few time points

bac.RA.melted.plot <- subset(bac.RA.melted.new, Time != 3)
bac.RA.melted.plot <- subset(bac.RA.melted.plot, Time != 20)
bac.RA.melted.plot <- subset(bac.RA.melted.plot, Time != 76)

Plot the graph

ggplot(bac.RA.melted.plot, aes(x = Replicate, y = Taxa, size = value, color = Group)) +
  geom_point(alpha=0.8) +
  theme_bw()+
  theme(panel.grid.major = element_blank(),
        panel.grid.major.y = element_line( size=.1, color="white" ), 
        axis.text.y = element_text(colour = "black", size = 10),axis.text.x = element_text(angle=-90,size=12,face = "bold", margin = margin(t = 2, unit = "pt"),colour = "black"),
        axis.title = element_blank(),
        panel.border = element_rect(size =0.2, fill = NA), 
        legend.title = element_text(size = 15, face = "bold", colour = "black"), 
        legend.position = "left",
        legend.text = element_text(size = 14, colour = "black")) +
  guides(color = guide_legend(override.aes = list(size=4)))+
  scale_y_discrete(position = "right")+
  scale_size(name = "Abundance%", range = c(0.2,6), breaks = c(2,10,20,40))+
  scale_color_manual(name = "Group", 
                     values = c( "#134074", "#f5bb00" ,"#90a955", "#ef476f"))+
  facet_grid(~ Time, labeller = label_both) +
  theme(panel.border = element_rect(color = "#6c757d", fill = NA, size = 0.3),
        panel.spacing = unit(1.5, "lines"),
        strip.text.x = element_text(size = 13,face = "bold"),
        strip.background = element_rect(color = "#6c757d", size = 0.5, fill = "#cfdbd5")) +
  coord_cartesian(clip = 'off')

## Warning: Removed 505 rows containing missing values (geom_point).

Relative Abundance

Hezhou (Jenny) Ding

This file demonstrates how the relative abundance plots for archaea and bacteria were plotted.

Load Libraries

Archaea

Read files

Prepare data for plotting with ggplot2

C37 plot

T50 plot

T65 plot

T80 plot

Organize plots

Bacteria

Read files

Prepare the relative abundance data

Select out the bacteria that had higher abundance than 2% at any time point

Shape the data for ggplot

Remove a few time points

Plot the graph