We likely don’t actually need this many libraries
library(readr)
library(phyloseq)
library(ggplot2)
library(dplyr)
library(tidyverse)
library(plyr)
library(vegan)
library(ggplot2)
library(ggforce)
library(Biostrings)
library(reshape2)
load("DADA2_output_and_phyloseq_object.RData")
sample <- read.csv("metadata-VFA.csv",
check.names = FALSE, row.names = 1)
colnames(sample)[1] <- "Sample"
ps <- phyloseq(otu_table(seqtab.nochim.rename, taxa_are_rows=FALSE),
sample_data(sample),
tax_table(taxa))
dna <- Biostrings::DNAStringSet(taxa_names(ps))
names(dna) <- taxa_names(ps)
ps <- merge_phyloseq(ps, dna)
taxa_names(ps) <- paste0("ASV", seq(ntaxa(ps))) # change taxa name with ASV numbers
ps.nosing <- phyloseq::prune_taxa(phyloseq::taxa_sums(ps) > 1, ps) # Remove singleton
ntaxa(ps)
## [1] 5894
ntaxa(ps.nosing)
## [1] 4418
# Prune bacteria out
ps.bacteria <- phyloseq::subset_taxa(ps.nosing, Kingdom=="Bacteria")
ps.bacteria
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4247 taxa and 38 samples ]
## sample_data() Sample Data: [ 38 samples by 11 sample variables ]
## tax_table() Taxonomy Table: [ 4247 taxa by 7 taxonomic ranks ]
## refseq() DNAStringSet: [ 4247 reference sequences ]
ps.VFA <- prune_samples(sample$Sample, ps.bacteria) # Keep only the samples shown in the metadata
ps.VFA
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4247 taxa and 38 samples ]
## sample_data() Sample Data: [ 38 samples by 11 sample variables ]
## tax_table() Taxonomy Table: [ 4247 taxa by 7 taxonomic ranks ]
## refseq() DNAStringSet: [ 4247 reference sequences ]
ps.VFA.RA <- microbiome::transform(ps.VFA, "compositional") # Convert ASV counts to percentage
otu <- as.data.frame(otu_table(ps.VFA)) # pull out ASV information
otu.t <- as.data.frame(t(otu))
VFA <- sample
VFA <- VFA %>% arrange(Sample)
row.names(VFA) <- VFA$Sample
otu.melt <- reshape2::melt(otu_table(ps.VFA.RA)*100)
colnames(otu.melt) <- c("Sample","OTU","value")
otuandVFA <- merge(otu.melt, VFA, by = "Sample")
otuandVFA.pos <- subset(otuandVFA, value > 0.1 ) #Subset OTUs with abundance higher than 0.2%
detach(package:plyr) # loading plyr after dplyr will cause group function malfunction
The codes in this section will be the same for T65, and T80 treatments. To reduce length of this document, T65 and T80 codes will be hidden.
otuandVFA.50 <- subset(otuandVFA.pos, Treatment == "50")
# Propionate
otuandVFA.50.pro <- otuandVFA.50[c("OTU", "value", "Propionate")]
otuandVFA.50.pro <- otuandVFA.50.pro %>%
group_by(OTU) %>%
filter(n() >= 3) # n>3 means OTU has appeared more than three times
corr.50.pro <- otuandVFA.50.pro %>%
group_by(OTU) %>%
summarize(cor.test(value, Propionate, method = "spearman")[[c("estimate")]])
corr.50.pro[c("OTU","p-value")] <- otuandVFA.50.pro %>%
group_by(OTU) %>%
summarize(cor.test(value, Propionate, method = "spearman")[[c("p.value")]])
colnames(corr.50.pro) <- c("OTU","Coeff","p")
corr.50.pro.sig <- subset(corr.50.pro, p <= 0.05 ) #Subset the significant correlated results
corr.50.pro.sig$Code <- "Propionate"
# Butyrate
otuandVFA.50.but <- otuandVFA.50[c("OTU", "value", "Butyrate")]
otuandVFA.50.but <- otuandVFA.50.but %>%
group_by(OTU) %>%
filter(n() >= 3)
corr.50.but <- otuandVFA.50.but %>%
group_by(OTU) %>%
summarize(cor.test(value, Butyrate, method = "spearman")[[c("estimate")]])
corr.50.but[c("OTU","p-value")] <- otuandVFA.50.but %>%
group_by(OTU) %>%
summarize(cor.test(value, Butyrate, method = "spearman")[[c("p.value")]])
colnames(corr.50.but) <- c("OTU","Coeff","p")
corr.50.but.sig <- subset(corr.50.but, p <= 0.05 )
corr.50.but.sig$Code <- "Butyrate"
# Valerate
otuandVFA.50.val <- otuandVFA.50[c("OTU", "value", "Valerate")]
otuandVFA.50.val <- otuandVFA.50.val %>%
group_by(OTU) %>%
filter(n() >= 3)
corr.50.val <- otuandVFA.50.val %>%
group_by(OTU) %>%
summarize(cor.test(value, Valerate, method = "spearman")[[c("estimate")]])
corr.50.val[c("OTU","p-value")] <- otuandVFA.50.val %>%
group_by(OTU) %>%
summarize(cor.test(value, Valerate, method = "spearman")[[c("p.value")]])
colnames(corr.50.val) <- c("OTU","Coeff","p")
corr.50.val.sig <- subset(corr.50.val, p <= 0.05 )
corr.50.val.sig$Code <- "Valerate"
# Caproate
otuandVFA.50.cap <- otuandVFA.50[c("OTU", "value", "Caproate")]
otuandVFA.50.cap <- otuandVFA.50.cap %>%
group_by(OTU) %>%
filter(n() >= 3)
corr.50.cap <- otuandVFA.50.cap %>%
group_by(OTU) %>%
summarize(cor.test(value, Caproate, method = "spearman")[[c("estimate")]])
corr.50.cap[c("OTU","p-value")] <- otuandVFA.50.cap %>%
group_by(OTU) %>%
summarize(cor.test(value, Caproate, method = "spearman")[[c("p.value")]])
colnames(corr.50.cap) <- c("OTU","Coeff","p")
corr.50.cap.sig <- subset(corr.50.cap, p <= 0.05 )
corr.50.cap.sig$Code <- "Caproate"
# Combine all correlation results in one table
corr.50 <- rbind(corr.50.pro.sig, corr.50.but.sig, corr.50.val.sig, corr.50.cap.sig)
corr.50$Trt <- "T50"; corr.65$Trt <- "T65"; corr.80$Trt <- "T80";
corr.all <- rbind(corr.50, corr.65, corr.80)
# Now get the name info for these ASVs
name.all.asv <- unique(corr.all$OTU)
name.all <- as.data.frame(taxa.ASV[row.names(taxa.ASV) %in% name.all.asv,])
name.all$OTU <- row.names(name.all)
corr.plot.all <- merge(name.all, corr.all, by = "OTU")
corr.plot.all <- corr.plot.all %>% drop_na(Genus)
corr.plot.all <- subset(corr.plot.all, abs(Coeff) > 0.5)
corr.plot.all <- corr.plot.all %>% unite("Name", 6:7, remove = F, sep = " " )
length(unique(corr.plot.all$OTU))
## [1] 34
corr.plot.all.1 <- corr.plot.all[,c("OTU", "Coeff", "Code", "Trt")]
corr.plot.all.1 <- reshape(corr.plot.all.1, idvar = "OTU", timevar = "Code", direction = "wide") # long to wide format
row.names(corr.plot.all.1) <- corr.plot.all.1$OTU;
corr.plot.val <- select(corr.plot.all.1,contains("Valerate"))
colnames(corr.plot.val) <- c("values", "Trt"); corr.plot.val$OTU <- row.names(corr.plot.val)
corr.plot.but <- select(corr.plot.all.1,contains("Butyrate"))
colnames(corr.plot.but) <- c("values", "Trt"); corr.plot.but$OTU <- row.names(corr.plot.but)
corr.plot.pro <- select(corr.plot.all.1,contains("Propionate"))
colnames(corr.plot.pro) <- c("values", "Trt"); corr.plot.pro$OTU <- row.names(corr.plot.pro)
corr.plot.cap <- select(corr.plot.all.1,contains("Caproate"))
colnames(corr.plot.cap) <- c("values", "Trt"); corr.plot.cap$OTU <- row.names(corr.plot.cap)
corr.plot.pro$ind <- "Propionate"; corr.plot.but$ind <- "Butyrate"; corr.plot.val$ind <- "Valerate"; corr.plot.cap$ind <- "Caproate";
corr.plot.all.2 <- rbind(corr.plot.pro, corr.plot.but, corr.plot.val, corr.plot.cap)
y.lab.all <- corr.plot.all[,c(1,6)]
y.lab.all <- unique(y.lab.all) # check if labels are unique
length(unique(corr.plot.all.2$OTU))
## [1] 34
length(unique(y.lab.all$OTU)) # Check if these two files are the same length
## [1] 34
corr.plot.all.2.1 <- merge(corr.plot.all.2, y.lab.all, by = "OTU") # Add names column to the dataframe
corr.plot.all.2.1 <- corr.plot.all.2.1 %>% drop_na(Trt)
corr.plot.all.2.1$abs <- abs(corr.plot.all.2.1$values)
ggplot(corr.plot.all.2.1, aes(x = factor(ind, level=c("Propionate", "Butyrate", "Valerate","Caproate")), y = Name, color = cut(values, breaks = c(-1, -0.7, -0.5, 0, 0.5,0.7,1)))) +
# geom_tile(color = "#adb5bd", fill = "white",
# lwd = 0.2,
# linetype = 1)+
geom_point(aes(size = abs)) +
theme_bw()+
theme(panel.grid.major = element_line(color = "#f5f3f4",size = 0.2),
axis.text.x = element_text(angle= 90, colour = "#403d39", size = 12,face = "bold", margin = margin(t = 2, unit = "pt")),
axis.text.y = element_text(size=10.5, colour = "#403d39"),
axis.title = element_blank(),
axis.line=element_line(color="#403d39"),
panel.border = element_blank(),
legend.title = element_text(size = 12, face = "bold", colour = "#403d39"),
legend.key.height = unit(0.5, 'cm'),
legend.key.width = unit(0.3, 'cm'),
#legend.position = c(5.4,0.5), legend.justification='left',
legend.text = element_text(size = 10, colour = "#403d39")) +
guides(size = FALSE, colour = guide_legend(override.aes = list(size=4))) +
scale_y_discrete(position = "right")+
scale_color_manual(name = "Coefficient",
values = c( "#669bbc", "#c0d6df","#ffb703", "#bb3e03"), na.value = NA,
labels = c("[-1.0, -0.7)", "[-0.7, -0.5)", "[0.5, 0.7)", "[0.7, 1.0]")) +
facet_grid(~ Trt) +
theme(panel.border = element_rect(color = "#6c757d", fill = NA, size = 0.3),
strip.text.x = element_text(size = 11,face = "bold"),
strip.background = element_rect(color = "#6c757d", size = 0.3, fill = "#cfdbd5"))
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.