This file demonstrates the diversity analysis including statistical analysis used in this manuscript.

library (iNEXT)

## Warning: package 'iNEXT' was built under R version 4.2.2

library(MicrobiotaProcess)
library(readr)
library(phyloseq)
library(ggplot2)
library(dplyr)
library(tidyverse)
library(plyr)
library(vegan)
library(ggplot2)
library(ggforce)
library(Biostrings)
library(microbiome)
library(DESeq2)
library(tools)
library(compositions)
library(psadd)
library(RRPP)
library(rcompanion)
library(philr)
library(ggordiplots)

load("DADA2_output_and_phyloseq_object.Rdata")

Alpha Diversity

Rarefaction curve

votu <- otu_table(ps) 
votu <- as.data.frame(votu) # convert to data frame
str(votu) # check that observations are numeric
anyNA(votu) # check for NA in data
vdata <- sample_data(ps) 
str(vdata) # examine classifications

Plotting

rarecurve <- get_rarecurve(obj=ps, chunks=400)
p_rare <- ggrarecurve(obj=rarecurve, factorNames="Group",
                      shadow=F,
                      indexNames=c("Observe")) +
  scale_color_manual(values=c("#00AED7", "#FD9347","#a3b18a","#f0a6ca"))+
  theme_bw()+
  theme(axis.text=element_text(size=10), 
        panel.grid=element_blank(), panel.border = element_rect(size=1),
        strip.background = element_blank(),
        strip.text.x = element_blank())

## The color has been set automatically, you can reset it manually by adding scale_color_manual(values=yourcolors)

p_rare <- p_rare + geom_vline(xintercept = min(rowSums(votu)))
vegan::rarecurve(votu, step=100, ylab="ASVs", lwd=1.5, col = vdata$Treatment, label=F) 
abline(v=(min(rowSums(votu))))

Alpha Diversity

alpha <- phyloseq::estimate_richness(ps, split=TRUE, measures=c("Observed", "Chao1", "Shannon", "Simpson"))

Plot Alpha diversity

row.names(alpha) <- sample_names(ps)

sample1 <- sample[-56,]
sample_alpha <- merge(sample1, alpha, by='row.names')

alpha1 <- ggplot(sample_alpha, aes(x = Group, y = Chao1, fill=Group)) + 
  geom_boxplot() +
  theme_classic() +
  theme(axis.title = element_text(size = 12, face = "bold", colour = "grey30"),
        panel.background = element_blank(), panel.border = element_rect(size =1, fill = NA), 
        axis.ticks = element_blank(), legend.key = element_blank(), 
        legend.position = "none")

alpha2 <- ggplot(sample_alpha, aes(x = Group, y = Shannon, fill=Group)) + 
  geom_boxplot() +
  theme_classic() +
  theme(axis.title = element_text(size = 12, face = "bold", colour = "grey30"),
        panel.background = element_blank(), panel.border = element_rect(size =1, fill = NA), 
        axis.ticks = element_blank(), legend.key = element_blank(), 
        legend.position = "none")

alpha3 <- ggplot(sample_alpha, aes(x = Group, y = Simpson, fill=Group)) + 
  geom_boxplot() +
  theme_classic() +
  theme(axis.title = element_text(size = 12, face = "bold", colour = "grey30"),
        panel.background = element_blank(), panel.border = element_rect(size =1, fill = NA), 
        axis.ticks = element_blank(), legend.key = element_blank(), 
        legend.title = element_text(size = 12, face = "bold", colour = "grey30"), 
        legend.text = element_text(size = 10, colour = "grey30"))

alpha1+alpha2+alpha3

permANOVA for Alpha diversity

# Check assumptions
shapiro.test((sample_alpha$Observed))

## 
##  Shapiro-Wilk normality test
## 
## data:  (sample_alpha$Observed)
## W = 0.89256, p-value = 0.0001386

rcompanion::plotNormalHistogram(sample_alpha$Observed)

rich <- sample_alpha$Shannon 
rich.rrpp <- RRPP::lm.rrpp(rich ~ Group,
                           data = sample_alpha, SS.type="III", 
                           print.progress = FALSE, iter=9999)
anova(rich.rrpp, effect.type = "F")

## 
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals 
## Number of permutations: 10000 
## Estimation method: Ordinary Least Squares 
## Sums of Squares and Cross-products: Type III 
## Effect sizes (Z) based on F distributions
## 
##           Df      SS      MS     Rsq      F        Z Pr(>F)
## Group      3  0.3918 0.13060 0.01744 0.3018 -0.93758 0.8225
## Residuals 51 22.0694 0.43273 0.98256                       
## Total     54 22.4612                                       
## 
## Call: RRPP::lm.rrpp(f1 = rich ~ Group, iter = 9999, SS.type = "III",  
##     data = sample_alpha, print.progress = FALSE)

Beta Diversity

Data preparation

ps.nosing <- phyloseq::prune_taxa(phyloseq::taxa_sums(ps) > 1, ps) # Remove singleton
# Check numbers of taxa
ntaxa(ps)

## [1] 5894

ntaxa(ps.nosing)

## [1] 5817

ps.bacteria <- phyloseq::subset_taxa(ps.nosing, Kingdom=="Bacteria") # Subset Bacteria
ps.bacteria

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 5616 taxa and 55 samples ]
## sample_data() Sample Data:       [ 55 samples by 7 sample variables ]
## tax_table()   Taxonomy Table:    [ 5616 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 5616 reference sequences ]

ps.archaea <- phyloseq::subset_taxa(ps.nosing, Kingdom=="Archaea") # Subset Archaea
ps.archaea

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 152 taxa and 55 samples ]
## sample_data() Sample Data:       [ 55 samples by 7 sample variables ]
## tax_table()   Taxonomy Table:    [ 152 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 152 reference sequences ]

Bacteria

Data normalization for bacteria data

# VST using DeSeq2
ps.bac.ds <- phyloseq::phyloseq_to_deseq2(ps.bacteria, ~1)

## converting counts to integer mode

ps.bac.ds <- DESeq2::estimateSizeFactors(ps.bac.ds)
ps.bac.ds <- DESeq2::estimateDispersions(ps.bac.ds, fitType = "parametric")

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

DESeq2::plotDispEsts(ps.bac.ds)

ps.bac.vst <- ps.bacteria
vst.bac <- DESeq2::getVarianceStabilizedData(ps.bac.ds)
phyloseq::otu_table(ps.bac.vst) <- phyloseq::otu_table(vst.bac, taxa_are_rows = TRUE)
ps.bac.vst

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 5616 taxa and 55 samples ]
## sample_data() Sample Data:       [ 55 samples by 7 sample variables ]
## tax_table()   Taxonomy Table:    [ 5616 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 5616 reference sequences ]

# Sanity check
psadd::plot_mv(ps.bacteria, title = "Raw Mean-Variance Plot with log-transform")

psadd::plot_mv(ps.bac.vst, title = "VST Mean-Variance Plot with log-transform")

## Warning in self$trans$transform(x): NaNs produced

## Warning: Transformation introduced infinite values in continuous x-axis

## Warning: Removed 5070 rows containing missing values (geom_point).

phyloseq::plot_bar(ps.bacteria)

phyloseq::plot_bar(ps.bac.vst)

hist(phyloseq::sample_sums(ps.bacteria))

hist(phyloseq::sample_sums(ps.bac.vst))

# Make all values positive
min(otu_table(ps.bac.vst))

## [1] -5.159595

ps.bac.vst.pos <- transform_sample_counts(ps.bac.vst, function(x) x+5.2)
min(otu_table(ps.bac.vst.pos))

## [1] 0.04040469

With this ps object, one can use phyloseq package to plot the beta diversity graph.

Beta diversity using Bray-Curtis index and PCoA analysis

I used vegan instead of phyloseq here to have more freedom to graph the plots using ggplot2.

asv.vst.bac <- t(vst.bac)
min(asv.vst.bac)

## [1] -5.159595

asv.vst.bac <- asv.vst.bac + 5.2 # Make sure all data positive

dist.bac <- vegdist(asv.vst.bac, method = "bray")
ord.bac.vst <- cmdscale(dist.bac, eig = TRUE)
scores.bac <- as.data.frame(scores(ord.bac.vst))
scores.bac1 <- scores.bac[match(row.names(sample1), row.names(scores.bac)),] #reorder the scores to desired sample order by matching with sample row names

scores.bac1$Group <- sample1$Group
scores.bac1$Time <- sample1$Time
scores.bac1$HeatShock <- sample1$HeatShock

eig.bac <- ord.bac.vst$eig # Access the Eigenvalues of each principle components 
eig.bac.per <- eig.bac/sum(eig.bac) # percentage of eigenvalues

Plotting

# Remove initial D3 samples in the dataset
scores.bac1.new <- scores.bac1[!grepl("D3",row.names(scores.bac1)),]

ggplot(data = scores.bac1, aes(x = Dim1, y = Dim2)) + 
  geom_mark_ellipse(data = scores.bac1.new, aes(color = Group))+
  geom_point(data = scores.bac1, aes(alpha = factor(Time),colour = Group, shape = Group), size = 5, stroke=1.5) +
  geom_point(data=scores.bac1 %>%
               filter(HeatShock=="Yes"),
             pch = 3, size=2, colour = "#4a4e69", stroke = 1) +
  scale_alpha_discrete(range = c(0.2, 1), name = "Time (d)")+
  scale_shape_manual(values=c(15, 17, 18, 19))+
  scale_colour_manual(values = c( "#4ba3c3","orange","#8cb369","#ff595e")) + 
  theme(axis.title = element_text(size = 15, face = "bold"),
        axis.text.y = element_text(size=12),axis.text.x = element_text(size=12),
        panel.background = element_blank(), panel.border = element_rect(size =1.5, fill = NA), 
        axis.ticks = element_blank(), legend.key = element_blank(), 
        legend.title = element_text(size = 12, face = "bold", colour = "grey30"), 
        legend.text = element_text(size = 10, colour = "grey30")) +
  labs(colour = "Group") + xlab("PC1 31.0%") + ylab("PC2 14.8%") + 
  coord_fixed(xlim = c(-0.4,0.4),ylim = c(-0.4,0.3)) +
  ggtitle("(B) Bacterial Diversity")+ theme(plot.title = element_text(size = 18,margin = margin(b = 10, unit = "pt")))

## Warning: Using alpha for a discrete variable is not advised.

permANOVA for Beta Diversity

# Remove Initial D3 samples
asv.vst.bac.new <- asv.vst.bac[!grepl("D3",row.names(asv.vst.bac)),]
dist.bac.new <- vegdist(asv.vst.bac.new, method = "bray")
sample.perm <- sample1[!grepl("D3",row.names(sample1)),]

bac.adonis.G <- adonis2(dist.bac.new ~ Group, data = sample.perm, permutations = 9999, method = "bray") 
print(bac.adonis.G)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = dist.bac.new ~ Group, data = sample.perm, permutations = 9999, method = "bray")
##          Df SumOfSqs      R2      F Pr(>F)   
## Group     3   0.8024 0.13094 2.0592 0.0033 **
## Residual 41   5.3257 0.86906                 
## Total    44   6.1281 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

bac.rrpp <- RRPP::lm.rrpp(dist.bac.new ~ Group,
                            data=sample.perm, SS.type="III", 
                            print.progress = FALSE, 
                            seed="random", 
                            iter=9999)
anova(bac.rrpp, effect.type = "F",  error = c("Residuals"))

## 
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals 
## Number of permutations: 10000 
## Estimation method: Ordinary Least Squares 
## Sums of Squares and Cross-products: Type III 
## Effect sizes (Z) based on F distributions
## 
##           Df     SS      MS     Rsq      F     Z Pr(>F)   
## Group      3 0.8024 0.26748 0.13094 2.0592 2.643 0.0032 **
## Residuals 41 5.3257 0.12990 0.86906                       
## Total     44 6.1281                                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Call: RRPP::lm.rrpp(f1 = dist.bac.new ~ Group, iter = 9999, seed = "random",  
##     SS.type = "III", data = sample.perm, print.progress = FALSE)

group <- interaction(sample.perm$Group)
PW.bac <- pairwise(bac.rrpp, groups = group)
summary(PW.bac,confidence = 0.95, test.type = "dist")

## 
## Pairwise comparisons
## 
## Groups: C37 T50 T65 T80 
## 
## RRPP: 10000 permutations
## 
## LS means:
## Vectors hidden (use show.vectors = TRUE to view)
## 
## Pairwise distances between means, plus statistics
##                 d UCL (95%)          Z Pr > d
## C37:T50 0.1286000 0.2184631 -0.9520118 0.8235
## C37:T65 0.2600543 0.2353704  2.0488974 0.0183
## C37:T80 0.3135088 0.2415742  2.6797890 0.0013
## T50:T65 0.1911724 0.1989987  1.4866287 0.0718
## T50:T80 0.2498265 0.2047458  2.4335826 0.0057
## T65:T80 0.1848961 0.2216693  0.8586459 0.2029

Archaea

# VST
ps.archaea.pos <- transform_sample_counts(ps.archaea, function(x) x+1) # Avoid 0 in all samples for DESeq2 to work
ps.arc.ds <- phyloseq::phyloseq_to_deseq2(ps.archaea.pos, ~1)

## converting counts to integer mode

ps.arc.ds <- DESeq2::estimateSizeFactors(ps.arc.ds)
ps.arc.ds <- DESeq2::estimateDispersions(ps.arc.ds, fitType = "parametric")

## gene-wise dispersion estimates

## mean-dispersion relationship

## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.

## final dispersion estimates

DESeq2::plotDispEsts(ps.arc.ds)

ps.arc.vst <- ps.archaea
vst.arc <- DESeq2::getVarianceStabilizedData(ps.arc.ds)
phyloseq::otu_table(ps.arc.vst) <- phyloseq::otu_table(vst.arc, taxa_are_rows = TRUE)
ps.arc.vst

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 152 taxa and 55 samples ]
## sample_data() Sample Data:       [ 55 samples by 7 sample variables ]
## tax_table()   Taxonomy Table:    [ 152 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 152 reference sequences ]

# Sanity check
psadd::plot_mv(ps.archaea, title = "Raw Mean-Variance Plot with log-transform")

psadd::plot_mv(ps.arc.vst, title = "VST Mean-Variance Plot with log-transform")

## Warning in self$trans$transform(x): NaNs produced

## Warning: Transformation introduced infinite values in continuous x-axis

## Warning: Removed 98 rows containing missing values (geom_point).

phyloseq::plot_bar(ps.archaea)

phyloseq::plot_bar(ps.arc.vst)

hist(phyloseq::sample_sums(ps.archaea))

hist(phyloseq::sample_sums(ps.arc.vst))

# Use Vegan to plot Beta diversity to make graph prettier
asv.vst.arc <- t(vst.arc)
min(asv.vst.arc)

## [1] -1.52006

asv.vst.arc <- asv.vst.arc + 1.6
dist.arc <- vegdist(asv.vst.arc, method = "bray")
ord.arc.vst <- cmdscale(dist.arc, eig = TRUE)
scores.arc <- as.data.frame(scores(ord.arc.vst))

scores.arc1 <- scores.arc[match(row.names(sample1), row.names(scores.arc)),] #reorder the scores to desired sample order by matching with sample row names
scores.arc1$Group <- sample1$Group
scores.arc1$Time <- sample1$Time
scores.arc1$HeatShock <- sample1$HeatShock

eig.arc <- ord.arc.vst$eig
eig.arc.per <- eig.arc/sum(eig.arc)

# Remove initial D3 samples in the dataset
scores.arc1.new <- scores.arc1[!grepl("D3",row.names(scores.arc1)),]

ggplot(data = scores.arc1, aes(x = Dim1, y = Dim2)) + 
  geom_mark_ellipse(data = scores.arc1.new, aes(color = Group))+
  geom_point(data = scores.arc1, aes(alpha = factor(Time),colour = Group, shape = Group), size = 5, stroke=1.5) +
  geom_point(data=scores.arc1 %>%
               filter(HeatShock=="Yes"),
             pch = 3, size=2, colour = "#4a4e69", stroke = 1) +
  scale_alpha_discrete(range = c(0.2, 1), name = "Time (d)")+
  scale_shape_manual(values=c(15, 17, 18, 19))+
  scale_colour_manual(values = c("#4ba3c3","orange","#8cb369","#ff595e")) + 
  theme(axis.title = element_text(size = 15, face = "bold"),
        axis.text.y = element_text(size=12),axis.text.x = element_text(size=12),
        panel.background = element_blank(), panel.border = element_rect(size =1.5, fill = NA), 
        axis.ticks = element_blank(), legend.key = element_blank(), 
        legend.title = element_text(size = 12, face = "bold", colour = "grey30"), 
        legend.text = element_text(size = 10, colour = "grey30")) +
  labs(colour = "Group") + xlab("PC1 34.2%") + ylab("PC2 18.2%") + 
  coord_fixed(xlim = c(-0.4,0.4),ylim = c(-0.4,0.3)) +
  ggtitle("(A) Archaeal Diversity")+ theme(plot.title = element_text(size = 18,margin = margin(b = 10, unit = "pt")))

## Warning: Using alpha for a discrete variable is not advised.

# permANOVA
asv.vst.arc.new <- asv.vst.arc[!grepl("D3",row.names(asv.vst.arc)),]
dist.arc.new <- vegdist(asv.vst.arc.new, method = "bray")
sample.perm <- sample1[!grepl("D3",row.names(sample1)),]

arc.adonis.G <- adonis2(dist.arc.new ~ Group, data = sample.perm, permutations = 9999, method = "bray") 
print(arc.adonis.G)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = dist.arc.new ~ Group, data = sample.perm, permutations = 9999, method = "bray")
##          Df SumOfSqs      R2      F Pr(>F)   
## Group     3   0.8988 0.15235 2.4564 0.0034 **
## Residual 41   5.0005 0.84765                 
## Total    44   5.8993 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

arc.rrpp <- RRPP::lm.rrpp(dist.arc.new ~ Group,
                          data=sample.perm, SS.type="III", 
                          print.progress = FALSE, 
                          seed="random", 
                          iter=9999)
anova(arc.rrpp, effect.type = "F",  error = c("Residuals"))

## 
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals 
## Number of permutations: 10000 
## Estimation method: Ordinary Least Squares 
## Sums of Squares and Cross-products: Type III 
## Effect sizes (Z) based on F distributions
## 
##           Df      SS      MS    Rsq      F      Z Pr(>F)   
## Group      3  1.7677 0.58922 0.1279 2.0044 2.7128 0.0025 **
## Residuals 41 12.0525 0.29396 0.8721                        
## Total     44 13.8201                                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Call: RRPP::lm.rrpp(f1 = dist.arc.new ~ Group, iter = 9999, seed = "random",  
##     SS.type = "III", data = sample.perm, print.progress = FALSE)

group <- interaction(sample.perm$Group)
PW.arc <- pairwise(arc.rrpp, groups = group)
summary(PW.arc,confidence = 0.95, test.type = "dist")

## 
## Pairwise comparisons
## 
## Groups: C37 T50 T65 T80 
## 
## RRPP: 10000 permutations
## 
## LS means:
## Vectors hidden (use show.vectors = TRUE to view)
## 
## Pairwise distances between means, plus statistics
##                 d UCL (95%)         Z Pr > d
## C37:T50 0.1929113 0.3210114 -1.094786 0.8633
## C37:T65 0.3361846 0.3472361  1.524595 0.0688
## C37:T80 0.4793835 0.3550464  2.895115 0.0008
## T50:T65 0.2691085 0.2909663  1.293633 0.1010
## T50:T80 0.3803979 0.3014603  2.646089 0.0028
## T65:T80 0.3097240 0.3249433  1.431697 0.0770

Alpha and Beta Diversity

Hezhou (Jenny) Ding

This file demonstrates the diversity analysis including statistical analysis used in this manuscript.

Alpha Diversity

Rarefaction curve

Plotting

Alpha Diversity

Plot Alpha diversity

permANOVA for Alpha diversity

Beta Diversity

Data preparation

Bacteria

Data normalization for bacteria data

Beta diversity using Bray-Curtis index and PCoA analysis

Plotting

permANOVA for Beta Diversity

Archaea