Correlation Analysis

Load Libraries

We likely don’t actually need this many libraries

library(readr)
library(phyloseq)
library(ggplot2)
library(dplyr)
library(tidyverse)
library(plyr)
library(vegan)
library(ggplot2)
library(ggforce)
library(Biostrings)
library(reshape2)

Load files

load("DADA2_output_and_phyloseq_object.RData")
sample <- read.csv("metadata-VFA.csv", 
                    check.names = FALSE, row.names = 1)
colnames(sample)[1] <- "Sample"

Replace the Sample info with the VFA metadata

ps <- phyloseq(otu_table(seqtab.nochim.rename, taxa_are_rows=FALSE),
               sample_data(sample),
               tax_table(taxa))

dna <- Biostrings::DNAStringSet(taxa_names(ps))
names(dna) <- taxa_names(ps)
ps <- merge_phyloseq(ps, dna)
taxa_names(ps) <- paste0("ASV", seq(ntaxa(ps))) # change taxa name with ASV numbers

Process ps object

ps.nosing <- phyloseq::prune_taxa(phyloseq::taxa_sums(ps) > 1, ps) # Remove singleton
ntaxa(ps)

## [1] 5894

ntaxa(ps.nosing)

## [1] 4418

# Prune bacteria out
ps.bacteria <- phyloseq::subset_taxa(ps.nosing, Kingdom=="Bacteria")
ps.bacteria

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 4247 taxa and 38 samples ]
## sample_data() Sample Data:       [ 38 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 4247 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 4247 reference sequences ]

ps.VFA <- prune_samples(sample$Sample, ps.bacteria) # Keep only the samples shown in the metadata
ps.VFA

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 4247 taxa and 38 samples ]
## sample_data() Sample Data:       [ 38 samples by 11 sample variables ]
## tax_table()   Taxonomy Table:    [ 4247 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 4247 reference sequences ]

ps.VFA.RA <- microbiome::transform(ps.VFA, "compositional") # Convert ASV counts to percentage

otu <- as.data.frame(otu_table(ps.VFA)) # pull out ASV information
otu.t <- as.data.frame(t(otu))
VFA <- sample
VFA <- VFA %>% arrange(Sample)
row.names(VFA) <- VFA$Sample

Correlation using spearman correlation

otu.melt <- reshape2::melt(otu_table(ps.VFA.RA)*100)
colnames(otu.melt) <- c("Sample","OTU","value")
otuandVFA <- merge(otu.melt, VFA, by = "Sample")
otuandVFA.pos <- subset(otuandVFA, value > 0.1 ) #Subset OTUs with abundance higher than 0.2%

detach(package:plyr) # loading plyr after dplyr will cause group function malfunction

VFA-OTU relationship within each treatment

T50 treatment

The codes in this section will be the same for T65, and T80 treatments. To reduce length of this document, T65 and T80 codes will be hidden.

otuandVFA.50 <- subset(otuandVFA.pos, Treatment == "50")
# Propionate
otuandVFA.50.pro <- otuandVFA.50[c("OTU", "value", "Propionate")]
otuandVFA.50.pro <- otuandVFA.50.pro %>%
  group_by(OTU) %>%
  filter(n() >= 3) # n>3 means OTU has appeared more than three times

corr.50.pro <- otuandVFA.50.pro %>%
  group_by(OTU) %>%
  summarize(cor.test(value, Propionate, method = "spearman")[[c("estimate")]]) 

corr.50.pro[c("OTU","p-value")] <- otuandVFA.50.pro %>%
  group_by(OTU) %>%
  summarize(cor.test(value, Propionate, method = "spearman")[[c("p.value")]])

colnames(corr.50.pro) <- c("OTU","Coeff","p")
corr.50.pro.sig <- subset(corr.50.pro, p <= 0.05 )  #Subset the significant correlated results
corr.50.pro.sig$Code <- "Propionate"

# Butyrate
otuandVFA.50.but <- otuandVFA.50[c("OTU", "value", "Butyrate")]
otuandVFA.50.but <- otuandVFA.50.but %>%
  group_by(OTU) %>%
  filter(n() >= 3)

corr.50.but <- otuandVFA.50.but %>%
  group_by(OTU) %>%
  summarize(cor.test(value, Butyrate, method = "spearman")[[c("estimate")]]) 

corr.50.but[c("OTU","p-value")] <- otuandVFA.50.but %>%
  group_by(OTU) %>%
  summarize(cor.test(value, Butyrate, method = "spearman")[[c("p.value")]])

colnames(corr.50.but) <- c("OTU","Coeff","p")
corr.50.but.sig <- subset(corr.50.but, p <= 0.05 )
corr.50.but.sig$Code <- "Butyrate"

# Valerate
otuandVFA.50.val <- otuandVFA.50[c("OTU", "value", "Valerate")]
otuandVFA.50.val <- otuandVFA.50.val %>%
  group_by(OTU) %>%
  filter(n() >= 3)

corr.50.val <- otuandVFA.50.val %>%
  group_by(OTU) %>%
  summarize(cor.test(value, Valerate, method = "spearman")[[c("estimate")]]) 
corr.50.val[c("OTU","p-value")] <- otuandVFA.50.val %>%
  group_by(OTU) %>%
  summarize(cor.test(value, Valerate, method = "spearman")[[c("p.value")]])
colnames(corr.50.val) <- c("OTU","Coeff","p")
corr.50.val.sig <- subset(corr.50.val, p <= 0.05 )
corr.50.val.sig$Code <- "Valerate"

# Caproate
otuandVFA.50.cap <- otuandVFA.50[c("OTU", "value", "Caproate")]
otuandVFA.50.cap <- otuandVFA.50.cap %>%
  group_by(OTU) %>%
  filter(n() >= 3)

corr.50.cap <- otuandVFA.50.cap %>%
  group_by(OTU) %>%
  summarize(cor.test(value, Caproate, method = "spearman")[[c("estimate")]]) 
corr.50.cap[c("OTU","p-value")] <- otuandVFA.50.cap %>%
  group_by(OTU) %>%
  summarize(cor.test(value, Caproate, method = "spearman")[[c("p.value")]])
colnames(corr.50.cap) <- c("OTU","Coeff","p")
corr.50.cap.sig <- subset(corr.50.cap, p <= 0.05 )
corr.50.cap.sig$Code <- "Caproate"

# Combine all correlation results in one table
corr.50 <- rbind(corr.50.pro.sig, corr.50.but.sig, corr.50.val.sig, corr.50.cap.sig)

Combine the correlation results in one file

corr.50$Trt <- "T50"; corr.65$Trt <- "T65"; corr.80$Trt <- "T80";
corr.all <- rbind(corr.50, corr.65, corr.80)

Prepare for Plotting

# Now get the name info for these ASVs
name.all.asv <- unique(corr.all$OTU)
name.all <- as.data.frame(taxa.ASV[row.names(taxa.ASV) %in% name.all.asv,])
name.all$OTU <- row.names(name.all)

corr.plot.all <- merge(name.all, corr.all, by = "OTU")
corr.plot.all <- corr.plot.all %>% drop_na(Genus)
corr.plot.all <- subset(corr.plot.all, abs(Coeff) > 0.5)
corr.plot.all <- corr.plot.all %>% unite("Name", 6:7, remove = F, sep = " " )
length(unique(corr.plot.all$OTU))

## [1] 34

corr.plot.all.1 <- corr.plot.all[,c("OTU", "Coeff", "Code", "Trt")]
corr.plot.all.1 <- reshape(corr.plot.all.1, idvar = "OTU", timevar = "Code", direction = "wide") # long to wide format
row.names(corr.plot.all.1) <- corr.plot.all.1$OTU; 

corr.plot.val <- select(corr.plot.all.1,contains("Valerate"))
colnames(corr.plot.val) <- c("values", "Trt"); corr.plot.val$OTU <- row.names(corr.plot.val)

corr.plot.but <- select(corr.plot.all.1,contains("Butyrate"))
colnames(corr.plot.but) <- c("values", "Trt"); corr.plot.but$OTU <- row.names(corr.plot.but)

corr.plot.pro <- select(corr.plot.all.1,contains("Propionate"))
colnames(corr.plot.pro) <- c("values", "Trt"); corr.plot.pro$OTU <- row.names(corr.plot.pro)

corr.plot.cap <- select(corr.plot.all.1,contains("Caproate"))
colnames(corr.plot.cap) <- c("values", "Trt"); corr.plot.cap$OTU <- row.names(corr.plot.cap)

corr.plot.pro$ind <- "Propionate"; corr.plot.but$ind <- "Butyrate"; corr.plot.val$ind <- "Valerate"; corr.plot.cap$ind <- "Caproate"; 
corr.plot.all.2 <- rbind(corr.plot.pro, corr.plot.but, corr.plot.val, corr.plot.cap)

y.lab.all <- corr.plot.all[,c(1,6)]
y.lab.all <- unique(y.lab.all) # check if labels are unique

length(unique(corr.plot.all.2$OTU))

## [1] 34

length(unique(y.lab.all$OTU)) # Check if these two files are the same length

## [1] 34

corr.plot.all.2.1 <- merge(corr.plot.all.2, y.lab.all, by = "OTU") # Add names column to the dataframe
corr.plot.all.2.1 <- corr.plot.all.2.1 %>% drop_na(Trt)
corr.plot.all.2.1$abs <- abs(corr.plot.all.2.1$values)

Plot the result

ggplot(corr.plot.all.2.1, aes(x = factor(ind, level=c("Propionate", "Butyrate", "Valerate","Caproate")), y = Name, color = cut(values, breaks = c(-1, -0.7, -0.5, 0, 0.5,0.7,1)))) +
  # geom_tile(color = "#adb5bd", fill = "white",
  #           lwd = 0.2,
  #           linetype = 1)+
  geom_point(aes(size = abs)) +
  theme_bw()+
  theme(panel.grid.major = element_line(color = "#f5f3f4",size = 0.2),
        axis.text.x = element_text(angle= 90, colour = "#403d39", size = 12,face = "bold", margin = margin(t = 2, unit = "pt")),
        axis.text.y = element_text(size=10.5, colour = "#403d39"),
        axis.title = element_blank(),
        axis.line=element_line(color="#403d39"),
        panel.border = element_blank(),
        legend.title = element_text(size = 12, face = "bold", colour = "#403d39"), 
        legend.key.height = unit(0.5, 'cm'),
        legend.key.width = unit(0.3, 'cm'),
        #legend.position = c(5.4,0.5), legend.justification='left',
        legend.text = element_text(size = 10, colour = "#403d39")) +
  guides(size = FALSE, colour = guide_legend(override.aes = list(size=4))) +
  scale_y_discrete(position = "right")+
  scale_color_manual(name = "Coefficient", 
                     values = c( "#669bbc", "#c0d6df","#ffb703", "#bb3e03"), na.value = NA,
                     labels = c("[-1.0, -0.7)", "[-0.7, -0.5)", "[0.5, 0.7)", "[0.7, 1.0]")) +
  facet_grid(~ Trt) +
  theme(panel.border = element_rect(color = "#6c757d", fill = NA, size = 0.3),
        strip.text.x = element_text(size = 11,face = "bold"),
        strip.background = element_rect(color = "#6c757d", size = 0.3, fill = "#cfdbd5"))

## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

Correlation Analysis

Hezhou (Jenny) Ding

Load Libraries

Load files

Replace the Sample info with the VFA metadata

Process ps object

Correlation using spearman correlation

VFA-OTU relationship within each treatment

T50 treatment

Combine the correlation results in one file

Prepare for Plotting

Plot the result