This file demonstrates the diversity analysis including statistical analysis used in this manuscript.

library (iNEXT)
## Warning: package 'iNEXT' was built under R version 4.2.2
library(MicrobiotaProcess)
library(readr)
library(phyloseq)
library(ggplot2)
library(dplyr)
library(tidyverse)
library(plyr)
library(vegan)
library(ggplot2)
library(ggforce)
library(Biostrings)
library(microbiome)
library(DESeq2)
library(tools)
library(compositions)
library(psadd)
library(RRPP)
library(rcompanion)
library(philr)
library(ggordiplots)
load("DADA2_output_and_phyloseq_object.Rdata")

Alpha Diversity

Rarefaction curve

votu <- otu_table(ps) 
votu <- as.data.frame(votu) # convert to data frame
str(votu) # check that observations are numeric
anyNA(votu) # check for NA in data
vdata <- sample_data(ps) 
str(vdata) # examine classifications

Plotting

rarecurve <- get_rarecurve(obj=ps, chunks=400)
p_rare <- ggrarecurve(obj=rarecurve, factorNames="Group",
                      shadow=F,
                      indexNames=c("Observe")) +
  scale_color_manual(values=c("#00AED7", "#FD9347","#a3b18a","#f0a6ca"))+
  theme_bw()+
  theme(axis.text=element_text(size=10), 
        panel.grid=element_blank(), panel.border = element_rect(size=1),
        strip.background = element_blank(),
        strip.text.x = element_blank())    
## The color has been set automatically, you can reset it manually by adding scale_color_manual(values=yourcolors)
p_rare <- p_rare + geom_vline(xintercept = min(rowSums(votu)))
vegan::rarecurve(votu, step=100, ylab="ASVs", lwd=1.5, col = vdata$Treatment, label=F) 
abline(v=(min(rowSums(votu)))) 

Alpha Diversity

alpha <- phyloseq::estimate_richness(ps, split=TRUE, measures=c("Observed", "Chao1", "Shannon", "Simpson")) 

Plot Alpha diversity

row.names(alpha) <- sample_names(ps)

sample1 <- sample[-56,]
sample_alpha <- merge(sample1, alpha, by='row.names')

alpha1 <- ggplot(sample_alpha, aes(x = Group, y = Chao1, fill=Group)) + 
  geom_boxplot() +
  theme_classic() +
  theme(axis.title = element_text(size = 12, face = "bold", colour = "grey30"),
        panel.background = element_blank(), panel.border = element_rect(size =1, fill = NA), 
        axis.ticks = element_blank(), legend.key = element_blank(), 
        legend.position = "none")

alpha2 <- ggplot(sample_alpha, aes(x = Group, y = Shannon, fill=Group)) + 
  geom_boxplot() +
  theme_classic() +
  theme(axis.title = element_text(size = 12, face = "bold", colour = "grey30"),
        panel.background = element_blank(), panel.border = element_rect(size =1, fill = NA), 
        axis.ticks = element_blank(), legend.key = element_blank(), 
        legend.position = "none")

alpha3 <- ggplot(sample_alpha, aes(x = Group, y = Simpson, fill=Group)) + 
  geom_boxplot() +
  theme_classic() +
  theme(axis.title = element_text(size = 12, face = "bold", colour = "grey30"),
        panel.background = element_blank(), panel.border = element_rect(size =1, fill = NA), 
        axis.ticks = element_blank(), legend.key = element_blank(), 
        legend.title = element_text(size = 12, face = "bold", colour = "grey30"), 
        legend.text = element_text(size = 10, colour = "grey30"))

alpha1+alpha2+alpha3

permANOVA for Alpha diversity

# Check assumptions
shapiro.test((sample_alpha$Observed))
## 
##  Shapiro-Wilk normality test
## 
## data:  (sample_alpha$Observed)
## W = 0.89256, p-value = 0.0001386
rcompanion::plotNormalHistogram(sample_alpha$Observed)

rich <- sample_alpha$Shannon 
rich.rrpp <- RRPP::lm.rrpp(rich ~ Group,
                           data = sample_alpha, SS.type="III", 
                           print.progress = FALSE, iter=9999)
anova(rich.rrpp, effect.type = "F")
## 
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals 
## Number of permutations: 10000 
## Estimation method: Ordinary Least Squares 
## Sums of Squares and Cross-products: Type III 
## Effect sizes (Z) based on F distributions
## 
##           Df      SS      MS     Rsq      F        Z Pr(>F)
## Group      3  0.3918 0.13060 0.01744 0.3018 -0.93758 0.8225
## Residuals 51 22.0694 0.43273 0.98256                       
## Total     54 22.4612                                       
## 
## Call: RRPP::lm.rrpp(f1 = rich ~ Group, iter = 9999, SS.type = "III",  
##     data = sample_alpha, print.progress = FALSE)

Beta Diversity

Data preparation

ps.nosing <- phyloseq::prune_taxa(phyloseq::taxa_sums(ps) > 1, ps) # Remove singleton
# Check numbers of taxa
ntaxa(ps)
## [1] 5894
ntaxa(ps.nosing)
## [1] 5817
ps.bacteria <- phyloseq::subset_taxa(ps.nosing, Kingdom=="Bacteria") # Subset Bacteria
ps.bacteria
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 5616 taxa and 55 samples ]
## sample_data() Sample Data:       [ 55 samples by 7 sample variables ]
## tax_table()   Taxonomy Table:    [ 5616 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 5616 reference sequences ]
ps.archaea <- phyloseq::subset_taxa(ps.nosing, Kingdom=="Archaea") # Subset Archaea
ps.archaea
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 152 taxa and 55 samples ]
## sample_data() Sample Data:       [ 55 samples by 7 sample variables ]
## tax_table()   Taxonomy Table:    [ 152 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 152 reference sequences ]

Bacteria

Data normalization for bacteria data

# VST using DeSeq2
ps.bac.ds <- phyloseq::phyloseq_to_deseq2(ps.bacteria, ~1)
## converting counts to integer mode
ps.bac.ds <- DESeq2::estimateSizeFactors(ps.bac.ds)
ps.bac.ds <- DESeq2::estimateDispersions(ps.bac.ds, fitType = "parametric") 
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
DESeq2::plotDispEsts(ps.bac.ds)

ps.bac.vst <- ps.bacteria
vst.bac <- DESeq2::getVarianceStabilizedData(ps.bac.ds)
phyloseq::otu_table(ps.bac.vst) <- phyloseq::otu_table(vst.bac, taxa_are_rows = TRUE)
ps.bac.vst
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 5616 taxa and 55 samples ]
## sample_data() Sample Data:       [ 55 samples by 7 sample variables ]
## tax_table()   Taxonomy Table:    [ 5616 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 5616 reference sequences ]
# Sanity check
psadd::plot_mv(ps.bacteria, title = "Raw Mean-Variance Plot with log-transform")

psadd::plot_mv(ps.bac.vst, title = "VST Mean-Variance Plot with log-transform")
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 5070 rows containing missing values (geom_point).

phyloseq::plot_bar(ps.bacteria)

phyloseq::plot_bar(ps.bac.vst)

hist(phyloseq::sample_sums(ps.bacteria))

hist(phyloseq::sample_sums(ps.bac.vst))

# Make all values positive
min(otu_table(ps.bac.vst))
## [1] -5.159595
ps.bac.vst.pos <- transform_sample_counts(ps.bac.vst, function(x) x+5.2)
min(otu_table(ps.bac.vst.pos))
## [1] 0.04040469

With this ps object, one can use phyloseq package to plot the beta diversity graph.

Beta diversity using Bray-Curtis index and PCoA analysis

I used vegan instead of phyloseq here to have more freedom to graph the plots using ggplot2.

asv.vst.bac <- t(vst.bac)
min(asv.vst.bac)
## [1] -5.159595
asv.vst.bac <- asv.vst.bac + 5.2 # Make sure all data positive

dist.bac <- vegdist(asv.vst.bac, method = "bray")
ord.bac.vst <- cmdscale(dist.bac, eig = TRUE)
scores.bac <- as.data.frame(scores(ord.bac.vst))
scores.bac1 <- scores.bac[match(row.names(sample1), row.names(scores.bac)),] #reorder the scores to desired sample order by matching with sample row names

scores.bac1$Group <- sample1$Group
scores.bac1$Time <- sample1$Time
scores.bac1$HeatShock <- sample1$HeatShock

eig.bac <- ord.bac.vst$eig # Access the Eigenvalues of each principle components 
eig.bac.per <- eig.bac/sum(eig.bac) # percentage of eigenvalues

Plotting

# Remove initial D3 samples in the dataset
scores.bac1.new <- scores.bac1[!grepl("D3",row.names(scores.bac1)),]

ggplot(data = scores.bac1, aes(x = Dim1, y = Dim2)) + 
  geom_mark_ellipse(data = scores.bac1.new, aes(color = Group))+
  geom_point(data = scores.bac1, aes(alpha = factor(Time),colour = Group, shape = Group), size = 5, stroke=1.5) +
  geom_point(data=scores.bac1 %>%
               filter(HeatShock=="Yes"),
             pch = 3, size=2, colour = "#4a4e69", stroke = 1) +
  scale_alpha_discrete(range = c(0.2, 1), name = "Time (d)")+
  scale_shape_manual(values=c(15, 17, 18, 19))+
  scale_colour_manual(values = c( "#4ba3c3","orange","#8cb369","#ff595e")) + 
  theme(axis.title = element_text(size = 15, face = "bold"),
        axis.text.y = element_text(size=12),axis.text.x = element_text(size=12),
        panel.background = element_blank(), panel.border = element_rect(size =1.5, fill = NA), 
        axis.ticks = element_blank(), legend.key = element_blank(), 
        legend.title = element_text(size = 12, face = "bold", colour = "grey30"), 
        legend.text = element_text(size = 10, colour = "grey30")) +
  labs(colour = "Group") + xlab("PC1 31.0%") + ylab("PC2 14.8%") + 
  coord_fixed(xlim = c(-0.4,0.4),ylim = c(-0.4,0.3)) +
  ggtitle("(B) Bacterial Diversity")+ theme(plot.title = element_text(size = 18,margin = margin(b = 10, unit = "pt")))
## Warning: Using alpha for a discrete variable is not advised.

permANOVA for Beta Diversity

# Remove Initial D3 samples
asv.vst.bac.new <- asv.vst.bac[!grepl("D3",row.names(asv.vst.bac)),]
dist.bac.new <- vegdist(asv.vst.bac.new, method = "bray")
sample.perm <- sample1[!grepl("D3",row.names(sample1)),]

bac.adonis.G <- adonis2(dist.bac.new ~ Group, data = sample.perm, permutations = 9999, method = "bray") 
print(bac.adonis.G)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = dist.bac.new ~ Group, data = sample.perm, permutations = 9999, method = "bray")
##          Df SumOfSqs      R2      F Pr(>F)   
## Group     3   0.8024 0.13094 2.0592 0.0033 **
## Residual 41   5.3257 0.86906                 
## Total    44   6.1281 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bac.rrpp <- RRPP::lm.rrpp(dist.bac.new ~ Group,
                            data=sample.perm, SS.type="III", 
                            print.progress = FALSE, 
                            seed="random", 
                            iter=9999)
anova(bac.rrpp, effect.type = "F",  error = c("Residuals"))
## 
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals 
## Number of permutations: 10000 
## Estimation method: Ordinary Least Squares 
## Sums of Squares and Cross-products: Type III 
## Effect sizes (Z) based on F distributions
## 
##           Df     SS      MS     Rsq      F     Z Pr(>F)   
## Group      3 0.8024 0.26748 0.13094 2.0592 2.643 0.0032 **
## Residuals 41 5.3257 0.12990 0.86906                       
## Total     44 6.1281                                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Call: RRPP::lm.rrpp(f1 = dist.bac.new ~ Group, iter = 9999, seed = "random",  
##     SS.type = "III", data = sample.perm, print.progress = FALSE)
group <- interaction(sample.perm$Group)
PW.bac <- pairwise(bac.rrpp, groups = group)
summary(PW.bac,confidence = 0.95, test.type = "dist")
## 
## Pairwise comparisons
## 
## Groups: C37 T50 T65 T80 
## 
## RRPP: 10000 permutations
## 
## LS means:
## Vectors hidden (use show.vectors = TRUE to view)
## 
## Pairwise distances between means, plus statistics
##                 d UCL (95%)          Z Pr > d
## C37:T50 0.1286000 0.2184631 -0.9520118 0.8235
## C37:T65 0.2600543 0.2353704  2.0488974 0.0183
## C37:T80 0.3135088 0.2415742  2.6797890 0.0013
## T50:T65 0.1911724 0.1989987  1.4866287 0.0718
## T50:T80 0.2498265 0.2047458  2.4335826 0.0057
## T65:T80 0.1848961 0.2216693  0.8586459 0.2029

Archaea

# VST
ps.archaea.pos <- transform_sample_counts(ps.archaea, function(x) x+1) # Avoid 0 in all samples for DESeq2 to work
ps.arc.ds <- phyloseq::phyloseq_to_deseq2(ps.archaea.pos, ~1)
## converting counts to integer mode
ps.arc.ds <- DESeq2::estimateSizeFactors(ps.arc.ds)
ps.arc.ds <- DESeq2::estimateDispersions(ps.arc.ds, fitType = "parametric") 
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
DESeq2::plotDispEsts(ps.arc.ds)

ps.arc.vst <- ps.archaea
vst.arc <- DESeq2::getVarianceStabilizedData(ps.arc.ds)
phyloseq::otu_table(ps.arc.vst) <- phyloseq::otu_table(vst.arc, taxa_are_rows = TRUE)
ps.arc.vst
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 152 taxa and 55 samples ]
## sample_data() Sample Data:       [ 55 samples by 7 sample variables ]
## tax_table()   Taxonomy Table:    [ 152 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 152 reference sequences ]
# Sanity check
psadd::plot_mv(ps.archaea, title = "Raw Mean-Variance Plot with log-transform")

psadd::plot_mv(ps.arc.vst, title = "VST Mean-Variance Plot with log-transform")
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 98 rows containing missing values (geom_point).

phyloseq::plot_bar(ps.archaea)

phyloseq::plot_bar(ps.arc.vst)

hist(phyloseq::sample_sums(ps.archaea))

hist(phyloseq::sample_sums(ps.arc.vst))

# Use Vegan to plot Beta diversity to make graph prettier
asv.vst.arc <- t(vst.arc)
min(asv.vst.arc)
## [1] -1.52006
asv.vst.arc <- asv.vst.arc + 1.6
dist.arc <- vegdist(asv.vst.arc, method = "bray")
ord.arc.vst <- cmdscale(dist.arc, eig = TRUE)
scores.arc <- as.data.frame(scores(ord.arc.vst))

scores.arc1 <- scores.arc[match(row.names(sample1), row.names(scores.arc)),] #reorder the scores to desired sample order by matching with sample row names
scores.arc1$Group <- sample1$Group
scores.arc1$Time <- sample1$Time
scores.arc1$HeatShock <- sample1$HeatShock

eig.arc <- ord.arc.vst$eig
eig.arc.per <- eig.arc/sum(eig.arc)
# Remove initial D3 samples in the dataset
scores.arc1.new <- scores.arc1[!grepl("D3",row.names(scores.arc1)),]

ggplot(data = scores.arc1, aes(x = Dim1, y = Dim2)) + 
  geom_mark_ellipse(data = scores.arc1.new, aes(color = Group))+
  geom_point(data = scores.arc1, aes(alpha = factor(Time),colour = Group, shape = Group), size = 5, stroke=1.5) +
  geom_point(data=scores.arc1 %>%
               filter(HeatShock=="Yes"),
             pch = 3, size=2, colour = "#4a4e69", stroke = 1) +
  scale_alpha_discrete(range = c(0.2, 1), name = "Time (d)")+
  scale_shape_manual(values=c(15, 17, 18, 19))+
  scale_colour_manual(values = c("#4ba3c3","orange","#8cb369","#ff595e")) + 
  theme(axis.title = element_text(size = 15, face = "bold"),
        axis.text.y = element_text(size=12),axis.text.x = element_text(size=12),
        panel.background = element_blank(), panel.border = element_rect(size =1.5, fill = NA), 
        axis.ticks = element_blank(), legend.key = element_blank(), 
        legend.title = element_text(size = 12, face = "bold", colour = "grey30"), 
        legend.text = element_text(size = 10, colour = "grey30")) +
  labs(colour = "Group") + xlab("PC1 34.2%") + ylab("PC2 18.2%") + 
  coord_fixed(xlim = c(-0.4,0.4),ylim = c(-0.4,0.3)) +
  ggtitle("(A) Archaeal Diversity")+ theme(plot.title = element_text(size = 18,margin = margin(b = 10, unit = "pt")))
## Warning: Using alpha for a discrete variable is not advised.

# permANOVA
asv.vst.arc.new <- asv.vst.arc[!grepl("D3",row.names(asv.vst.arc)),]
dist.arc.new <- vegdist(asv.vst.arc.new, method = "bray")
sample.perm <- sample1[!grepl("D3",row.names(sample1)),]

arc.adonis.G <- adonis2(dist.arc.new ~ Group, data = sample.perm, permutations = 9999, method = "bray") 
print(arc.adonis.G)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = dist.arc.new ~ Group, data = sample.perm, permutations = 9999, method = "bray")
##          Df SumOfSqs      R2      F Pr(>F)   
## Group     3   0.8988 0.15235 2.4564 0.0034 **
## Residual 41   5.0005 0.84765                 
## Total    44   5.8993 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
arc.rrpp <- RRPP::lm.rrpp(dist.arc.new ~ Group,
                          data=sample.perm, SS.type="III", 
                          print.progress = FALSE, 
                          seed="random", 
                          iter=9999)
anova(arc.rrpp, effect.type = "F",  error = c("Residuals"))
## 
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals 
## Number of permutations: 10000 
## Estimation method: Ordinary Least Squares 
## Sums of Squares and Cross-products: Type III 
## Effect sizes (Z) based on F distributions
## 
##           Df      SS      MS    Rsq      F      Z Pr(>F)   
## Group      3  1.7677 0.58922 0.1279 2.0044 2.7128 0.0025 **
## Residuals 41 12.0525 0.29396 0.8721                        
## Total     44 13.8201                                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Call: RRPP::lm.rrpp(f1 = dist.arc.new ~ Group, iter = 9999, seed = "random",  
##     SS.type = "III", data = sample.perm, print.progress = FALSE)
group <- interaction(sample.perm$Group)
PW.arc <- pairwise(arc.rrpp, groups = group)
summary(PW.arc,confidence = 0.95, test.type = "dist")
## 
## Pairwise comparisons
## 
## Groups: C37 T50 T65 T80 
## 
## RRPP: 10000 permutations
## 
## LS means:
## Vectors hidden (use show.vectors = TRUE to view)
## 
## Pairwise distances between means, plus statistics
##                 d UCL (95%)         Z Pr > d
## C37:T50 0.1929113 0.3210114 -1.094786 0.8633
## C37:T65 0.3361846 0.3472361  1.524595 0.0688
## C37:T80 0.4793835 0.3550464  2.895115 0.0008
## T50:T65 0.2691085 0.2909663  1.293633 0.1010
## T50:T80 0.3803979 0.3014603  2.646089 0.0028
## T65:T80 0.3097240 0.3249433  1.431697 0.0770