“Publication Figure 1”
百度云盘链接: https://pan.baidu.com/s/15g7caZp354zIWktpnWzWhQ
提取码: 4sh7
Libraries
Standard Import
library(tidyverse)
library(cowplot)
library(scales)
library(ggpubr)
Special
# devtools::install_github("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis")
library(pairwiseAdonis)
library(FactoMineR)
# library(rgr)
# library(mixOmics)
library(grid)
library(broom)
Paths
bin_dir = '../bin/'
data_dir = '../data/'
results_dir = '../results/'
Custom Scripts / Theme
source(paste0(bin_dir, 'theme_settings.R'))
source (paste0(bin_dir, 'utilities.R'))
source(paste0(bin_dir, 'analysis_metadata.R'))
date <- format(Sys.time(), "%d%m%y")
Import Tables
Metadata
samples.metadata <- read_tsv(paste0(data_dir, 'samples.metadata.tsv'))
microbe.taxonomy <- read_tsv(paste0(data_dir, 'microbe.taxonomy.tsv'))
microbe.metadata <- microbe.taxonomy %>% right_join(., read_tsv(paste0(data_dir, 'microbe.metadata.tsv')))
Taxonomy
MetaPhlAn2 tables
combine with taxonomy
mp_species.long <- microbe.taxonomy %>% right_join(., read_tsv(paste0(data_dir, 'samples.mp_profiles.species.tsv')), by = 'species') %>% left_join(., samples.metadata, by = 'Name')mp_species.subsampled.long <- microbe.taxonomy %>% right_join(., read_tsv(paste0(data_dir, 'samples.mp_profiles_subsampled.species.tsv')), by = 'species') %>% left_join(., samples.metadata, by = 'Name')
rCDI subset
annotate with metadata
mp_species.rcdi.long <- mp_species.long %>%filter(Study %in% c(rcdi.studies)) mp_species.subsampled.rcdi.long <- mp_species.subsampled.long %>% filter(Study %in% c(rcdi.studies))
Function
gene_family_annotated <- read_tsv(paste0(data_dir, 'genefamilies.infogo1000.cpm.annotated.tsv'))h2_unstratified <- samples.metadata %>% right_join(., read_tsv(paste0(data_dir, 'samples.h2_profiles.unstratified.tsv')), by = 'Name')
Set Figure
fig = paste0('Fig_1.')
Taxonomic Composition
PCA analysis of species-level sample composition
using data from rCDI recipients, stool donors and post-FMT patients with resolved symptoms
Format
# filter successful post-FMT samples
failed_samples.Name <- samples.metadata %>% filter(!fmt_success & Sample_Type == 'post') %>% pull(Name)# transpose
mp_species.rcdi_success.wide <- mp_species.rcdi.long %>%filter(kingdom == 'Bacteria') %>% filter(!Name %in% failed_samples.Name) %>% pivot_wider(id_cols = 'species', names_from = 'Name',values_from = 'rel_abund', values_fill = list(rel_abund = 0))## -- subsampled
mp_species.subsampled.rcdi_success.wide <- mp_species.subsampled.rcdi.long %>%filter(kingdom == 'Bacteria') %>% filter(!Name %in% failed_samples.Name) %>% pivot_wider(id_cols = 'species', names_from = 'Name',values_from = 'rel_abund', values_fill = list(rel_abund = 0))## -- genus-level
mp_genus.rcdi_success.wide <- mp_species.rcdi.long %>%filter(kingdom == 'Bacteria') %>% filter(!Name %in% failed_samples.Name) %>%pivot_wider(id_cols = 'genus', names_from = 'Name',values_from = 'rel_abund', values_fill = list(rel_abund = 0), values_fn = list(rel_abund = sum))# get data
mp_species.rcdi_success.data <- t(select(mp_species.rcdi_success.wide, matches('.pair'), species) %>% column_to_rownames(var = 'species'))## -- subsampled
mp_species.subsampled.rcdi_success.data <- t(select(mp_species.subsampled.rcdi_success.wide, matches('.pair'), species) %>% column_to_rownames(var = 'species'))## -- genus
mp_genus.rcdi_success.data <- t(select(mp_genus.rcdi_success.wide, matches('.pair'), genus) %>% column_to_rownames(var = 'genus'))
Diversity
Diversity metrics based on species-level relative abundances
Alpha: Shannon Index
mp_species.raw.rcdi_success.shannon <- data.frame(Shannon.raw = diversity(mp_species.rcdi_success.data, index = 'shannon')) %>% rownames_to_column('Name')mp_species.subsampled.rcdi_success.shannon <- data.frame(Shannon.subsampled = diversity(mp_species.subsampled.rcdi_success.data, index = 'shannon')) %>% rownames_to_column('Name')mp_species.raw.rcdi_success.shannon <- full_join(mp_species.raw.rcdi_success.shannon, mp_species.subsampled.rcdi_success.shannon, by = 'Name')
CLR-Transform
# species-level
pseudo = 1e-6remove.na <- function (xx, iftell = TRUE) {if (is.vector(xx)) {nx <- length(xx)x <- na.omit(xx)n <- length(x)m <- 1intype <- "vector"}else {nx <- length(xx[, 1])x <- na.omit(xx)n <- length(x[, 1])m <- length(x[1, ])intype <- "matrix"}nna <- nx - nif (iftell & nna > 0) {if (intype == "matrix") cat(paste("\n ", nna, "row(s) with NA(s) removed from matrix\n"))else {cat(paste("\n ", nna, "NA(s) removed from vector\n"))}}invisible(list(x = x, n = n, m = m, nna = nna))
}clr <- function(xx, ifclose = FALSE, ifwarn = TRUE) {if (is.data.frame(xx)) xx <- as.matrix(xx)if (any(xx < 0, na.rm = TRUE)) stop("Negative values not allowed\n")if(ifwarn) cat(" ** Are the data/parts all in the same measurement units? **\n")temp.x <- remove.na(xx, iftell = FALSE)x <- temp.x$x; nna <- temp.x$nnaif (nna >= 1) cat(" ", nna, "composition(s) with NA(s) removed\n")#if(ifclose) x <- 100 * sweep(x, 1, rowSums(x), "/")#x <- log(x)x <- sweep(x, 1, rowMeans(x), "-")x <- as.matrix(x)return(x = x)
}mp_species.rcdi_success.clr <- clr(mp_species.rcdi_success.data + pseudo, ifclose = FALSE, ifwarn = TRUE)
Beta: PCA of species-level taxonomic composition
# species-level
mp_species.rcdi_success.clr.pca_data <- PCA(mp_species.rcdi_success.clr, graph = FALSE, scale.unit = F)
mp_species.rcdi_success.clr.pca <- mp_species.rcdi_success.clr.pca_data %>% .$ind %>% .$coord %>% as.data.frame() %>% rownames_to_column(var = 'Name') ## annotate with metadata, diversity
mp_species.rcdi_success.data.clr.pca <- mp_species.rcdi_success.clr.pca %>% left_join(., samples.metadata, by = 'Name') %>% left_join(., mp_species.raw.rcdi_success.shannon, by = 'Name')
Pairwise PERMANOVA (adonis function) testing between rCDI recipients, Donors, and the last available post-FMT sample Showing that composition of rCDI recipient is distinct from donors and post-FMT samples, but last post-FMT time point not different from donors.
mp_species.rcdi_success.data.clr.pca.last <-mp_species.rcdi_success.data.clr.pca %>% filter(last_sample)# n count
mp_species.rcdi_success.data.clr.pca.last %>% group_by(Sample_Type) %>% tally()rcdi.adonis <-pairwise.adonis(x = mp_species.rcdi_success.clr[mp_species.rcdi_success.data.clr.pca.last$Name, ],factors = mp_species.rcdi_success.data.clr.pca.last$Sample_Type,sim.method = 'euclidean',p.adjust.m = 'fdr',perm = 1e3) %>% rename(fdr = p.adjusted) %>% mutate(sig = cut(fdr, breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),labels = c("***", "**", "*", "")))
rcdi.adonis
PCA Plot shows rCDI recipients clustering separately from post-FMT patients and donors.
Separate clustering of MGH03D-related cases is also visible.
plot <-mp_species.rcdi_success.data.clr.pca %>% ggplot(aes(-Dim.2, -Dim.1, labels = Unique_ID)) +geom_point(shape = 21, size = 8, col = 'black', aes(fill = Sample_Type)) + scale_fill_manual(values = colors.main) +theme_cowplot() + labs(y = paste0('PC 1 (', round(mp_species.rcdi_success.clr.pca_data$eig[1,'percentage of variance'], 1), '%)'), x = paste0('PC 2 (', round(mp_species.rcdi_success.clr.pca_data$eig[2,'percentage of variance'], 1), '%)'),subtitle = 'Taxonomic Composition') +theme(plot.title = element_text(hjust = 0.5),legend.title=element_blank(),aspect.ratio = 1,plot.subtitle = element_text(hjust = .5, size = 12), axis.title = element_text(size = 10),axis.text = element_text(size = 8))
legend <- cowplot::get_legend(plot)plot + theme(legend.position = 'none')
grid.newpage()
grid.draw(legend)
Exporting plots, tables
output_name = 'TaxonomicDiversity.SuccessfulCases.CLR_PCA'ggsave(plot + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 5, height = 5,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))ggsave(legend, device = 'pdf', dpi = 300, width = 1.1, height = 1,filename = paste0(results_dir, fig, output_name, '.Legend.pdf'))write_tsv(rcdi.adonis, paste0(results_dir, fig, output_name, '.Adonis.tsv'))
Alpha/Beta Correlation
Correlation analysis of Shannon Index with first principal component (PC 1) of PCA analysis shows that large fraction of variance could be explained by alpha diversity differences between samples.
mp_species.rcdi_success.data.clr.pca.cor <- cor.test(~ Shannon.raw + Dim.1,data = mp_species.rcdi_success.data.clr.pca,method = 'spearman') %>% broom::tidy(.) %>% mutate(estimate.round = round(estimate, 2), sig = cut(p.value, breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),labels = c("***", "**", "*", "ns")))
mp_species.rcdi_success.data.clr.pca.corplot <- mp_species.rcdi_success.data.clr.pca %>% ggplot(aes(Shannon.raw, -Dim.1)) +geom_point(shape = 21, col = 'black', size = 8, aes(fill = Sample_Type)) + geom_smooth(method = 'lm', color = 'black') + annotate('text', label = paste0('r=', mp_species.rcdi_success.data.clr.pca.cor$estimate.round,' ', mp_species.rcdi_success.data.clr.pca.cor$sig), x = 1.15, y = -40, size = 6) +scale_fill_manual(values = colors.discrete[c(1,2,3)]) +theme_cowplot() + theme(legend.title=element_blank(),aspect.ratio = 1) +labs(x = 'Shannon Index', y = 'PC 1')plot + theme(legend.position = 'none')
Exporting plot
output_name = 'TaxonomicDiversity.SuccessfulCases.PC1_Shannon'ggsave(plot + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 3, height = 3,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))ggsave(legend, device = 'pdf', dpi = 300, width = 1.1, height = 1,filename = paste0(results_dir, fig, output_name, '.Legend.pdf'))
Confirming that alpha diversity is representative by comparing Shannon calculated on full and 5M subset of reads generated with seqtk after QC but before MetaPhlAn2.
mp_species.rcdi_success.data.clr.pca %>% ggplot(aes(Shannon.subsampled, Shannon.raw)) +geom_point(shape = 21, col = 'black', size = 8, aes(fill = Sample_Type)) + geom_smooth(method = 'lm', color = 'black') + stat_cor(method = 'spearman') +scale_fill_manual(values = colors.discrete[c(1,2,3)]) +theme_cowplot() + theme(legend.title=element_blank(),legend.position = 'none', aspect.ratio = 1) +labs(x = 'Shannon Index [5M Subsampled Reads]', y = 'Shannon Index [All Reads]')
Taxa Distinguishing Recipients and Donors
Format
# rcdi_success only excludes failed post-FMT samples
mp_genus.rcdi_success.annotated <- samples.metadata %>% mutate(tag = paste0(Unique_ID, ' ', gsub(gsub(Name, pattern = '.*_', replacement = ''), pattern = '.pair', replacement = ''))) %>% right_join(., as.data.frame(mp_genus.rcdi_success.data) %>% rownames_to_column('Name')) %>% filter(Sample_Type %in% c('recipient','donor'))X.metadata <-mp_genus.rcdi_success.annotated %>% select(Study:tag)X = mp_genus.rcdi_success.annotated %>% column_to_rownames(var = 'tag') %>% select(-Study:-Study_Type) %>% as.data.frame()Y = as.factor(X.metadata$Sample_Type)
sparse PLS Discriminant Analysis
Initial sPLS-DA
sPLS-DA of rCDI recipients and donor samples based on genus-level taxonomy
pseudo = 1e-6data.plsda = mixOmics::plsda(X = X + pseudo, Y, ncomp = nlevels(Y), logratio = 'CLR')
data.perf.plsda = mixOmics::perf(data.plsda, validation = 'Mfold', folds = 5,progressBar = FALSE, nrepeat = 10)plot(data.perf.plsda, overlay = 'measure', sd=TRUE)
mixOmics::plotIndiv(data.plsda , comp = c(1,2),group = Y, ind.names = T,ellipse = T,legend = TRUE, title = 'PLSDA comp 1 - 2')
Cross-Validate features
5x10 CV
set.seed(100)
data.tune.splsda = mixOmics::tune.splsda(X + pseudo, Y = Y, ncomp = 2, multilevel = NULL, logratio = 'CLR',validation = c('Mfold'), folds = 5, dist = 'max.dist', nrepeat = 10,progressBar = FALSE)plot(data.tune.splsda)
select.keepX = data.tune.splsda$choice.keepX[1:2]
select.keepX # c(15, 5)
Apply CV features
data.splsda = mixOmics::splsda(X = X + pseudo, Y = Y, ncomp = 2, keepX = select.keepX, logratio= "CLR",near.zero.var = T,
)
data.perf.splsda = mixOmics::perf(data.splsda, validation = 'Mfold', folds = 5, progressBar = FALSE, nrepeat = 10, dist = 'max.dist')data.perf.splsda$error.ratemixOmics::plotIndiv(data.splsda , comp = c(1,2),group = Y, ind.names = T, ellipse = TRUE, legend = TRUE, title = 'PLSDA comp 1 - 2')
Plot loadings and save to table
# COMP 1
pL.pc1 <- mixOmics::plotLoadings(data.splsda, title = 'sPLS-DA PC 1',comp = 1, method = 'median', contrib = 'max', size.title = rel(1), border = T, size.name = .5, size.legend = .75, legend.color = colors.discrete[c(4, 1)])
pL.pc1# COMP 2
pL.pc2 <- mixOmics::plotLoadings(data.splsda, title = 'sPLS-DA PC 2',comp = 2, method = 'median', contrib = 'max', size.title = rel(1), border = T, size.name = .5, size.legend = .75, legend.color = colors.discrete[c(4, 1)])
pL.pc2pL <- rbind(pL.pc1$X, pL.pc2$X) %>% rownames_to_column('genus') %>% dplyr::rename(rel_abund.recipient.median_clr = recipient,rel_abund.donor.median_clr = donor,rel_abund.highest = GroupContrib) %>% dplyr::select(genus, rel_abund.donor.median_clr, rel_abund.recipient.median_clr, rel_abund.highest, importance)output_name = 'sPLSDA.DonorRecipient.Loadings'
write_tsv(pL, paste0(results_dir, fig, output_name, '.tsv'))
sPLS-DA: Heatmap
Heatmap of differentially abundant taxa between rCDI recipients and donors based on clr-transformed genus-level relative abundances. Plots shows typical opportunistic pathogens and oral bacteria to be differentially abundant between groups.
Genera from displayed samples (R/D) in which >=20% of species are predicted orals
genus.oral <- mp_species.rcdi.long %>%filter(kingdom == 'Bacteria') %>% filter(Sample_Type %in% c('recipient', 'donor')) %>% left_join(., microbe.metadata %>% select(species, habit.oral), by = 'species') %>% mutate(genus = gsub(genus, pattern = '_noname', replacement = '')) %>%group_by(genus) %>% summarize(oral_rate = replace_na(sum(habit.oral == T, na.rm = T) / n(), NA), .groups = 'drop') %>% mutate(oral_genus = oral_rate >= 0.20)
Get colors for sample columns and taxa rows
sample_colors <-tibble(Unique_ID.splsda = str_split_fixed(data.splsda$names$sample, pattern = ' ', n = 2)[,1]) %>% cbind(., mp_genus.rcdi_success.annotated %>% select(tag, Unique_ID, fmt_success, Case_Name, Sample_Type)) %>% mutate(color = case_when(Sample_Type == 'recipient' ~ colors.discrete[3],Sample_Type == 'donor' ~ colors.discrete[1], T ~ 'white'))taxa_colors <- tibble(genus = gsub(data.splsda$names$colnames$X, pattern = '_noname', replacement = '')) %>% left_join(., genus.oral, by = 'genus') %>% mutate(color = case_when(oral_genus == T ~ 'gray25',oral_genus == F ~ 'gray95', T ~ 'white'))
Export Heatmap
output_name = 'sPLSDA.DonorRecipient.Heatmap'
mixOmics::cim(data.splsda, row.sideColors = sample_colors$color, col.sideColors = taxa_colors$color, symkey = T, keysize = c(0.8, 0.8), row.names = F, col.names = gsub(data.splsda$names$colnames$X, pattern = '_noname', replacement = ''),save = 'pdf', name.save = paste0(results_dir, fig, output_name),margins = c(20, 6.5),row.cex = .75, col.cex = .5,transpose = T,scale = F, center = T)
Oral Species in rCDI recipients
Pulling Oral species from microbe.metadata
oral_species <-microbe.metadata %>%filter(habit.oral) %>%pull(species)
Format
rcdi.oral_species <- mp_species.rcdi.long %>% filter(last_sample, fmt_success | Sample_Type != 'post') %>% mutate(oral = species %in% oral_species,rel_abund = rel_abund / 100) %>% group_by(Case_Name, Sample, Days_Since_FMT, Sample_Type) %>% summarize(n = sum(rel_abund > 0, na.rm = T), oral = sum(ifelse(species %in% oral_species & rel_abund > 0, 1, 0), na.rm = T),species_rel_abund = mean(ifelse(species %in% oral_species & rel_abund > 0, rel_abund, 0), na.rm = T),sample_rel_abund = sum(ifelse(species %in% oral_species, rel_abund, 0), na.rm = T), ) %>% mutate(f = oral / n) %>% mutate(source = fct_recode(as.factor(Sample_Type),R = 'recipient',P = 'post',D = 'donor')) %>% pivot_longer(names_to = 'metric', values_to = 'value',cols = c('sample_rel_abund', 'species_rel_abund', 'f'))
Boxplots Oral Taxa
Plot shows enrichment of oral-classified bacteria in rCDI recipients but not donors or controls with decreasing abundances post-FMT.
Cumulative Sample Rel. Abund. of species classified as oral
stat_comparisons = list(c('P', 'D'), c('R', 'D'), c('R', 'P'))plot.sample_rel_abund <-rcdi.oral_species %>% filter(metric == 'sample_rel_abund') %>% ggplot(aes(fct_relevel(source, 'R', 'P', 'D'),value, fill = source)) + stat_boxplot(geom = 'errorbar') +geom_boxplot(show.legend = F) + geom_dotplot(aes(fill = NULL), binaxis = "y", stackdir = "center", binpositions="all", binwidth = 0.01) +theme_cowplot() + scale_y_continuous(labels = percent_format_signif,limits = c(0, 1.21),expand = c(0,0,0.05,0),breaks = c(0, 0.25, 0.5, 0.75, 1)) + scale_fill_manual(values = colors.discrete[c(2,1,3)]) +stat_compare_means(method = 'wilcox', comparisons = stat_comparisons,exact = F, p.adjust = 'fdr', label = 'p.signif',symnum.args = list(cutpoints = c(-Inf, 0.001, 0.01, 0.05, Inf),symbols = c("***", "**", "*", "ns"))) + labs(y = 'Cumulative Sample Rel. Abund.') +theme(axis.title.x = element_blank())
plot.sample_rel_abund + theme(legend.position = 'none')
Rel. Abund. of Species classified as oral
Majority of oral species are below 0.01% relative abundance.
plot.species_rel_abund <-
rcdi.oral_species %>% filter(metric == 'species_rel_abund') %>% ggplot(aes(fct_relevel(source, 'R', 'P', 'D'),value, fill = source)) + stat_boxplot(geom = 'errorbar') +geom_boxplot(show.legend = F) + geom_dotplot(aes(fill = NULL), binaxis = "y", stackdir = "center", binpositions="all", binwidth = 0.00025) +theme_cowplot() + scale_y_continuous(limits = c(0, NA),labels = percent_format_signif,expand = c(0,0,0.05,0),# breaks = c(0, 0.001, 0.0025, 0.005, 0.01, 0.015)) + scale_fill_manual(values = colors.discrete[c(2,1,3)]) +stat_compare_means(method = 'wilcox', comparisons = stat_comparisons,exact = F, p.adjust = 'fdr', label = 'p.signif',symnum.args = list(cutpoints = c(-Inf, 0.001, 0.01, 0.05, Inf),symbols = c("***", "**", "*", "ns"))) + labs(y = 'Mean Rel. Abund. per Sp.') +theme(axis.title.x = element_blank())
plot.species_rel_abund + theme(legend.position = 'none')
Fraction of Species classified as oral
plot.fraction <-
rcdi.oral_species %>% filter(metric == 'f') %>%ungroup() %>% ggplot(aes(fct_relevel(source, 'R', 'P', 'D'),value, fill = source)) + stat_boxplot(geom = 'errorbar') +geom_boxplot(show.legend = F) + geom_dotplot(aes(fill = NULL), binaxis = "y", stackdir = "center", binpositions="all", binwidth = 0.01) +theme_cowplot() + scale_y_continuous(labels = percent_format_signif,limits = c(0, .75),expand = c(0,0,0.05,0),breaks = c(0, 0.25, 0.5, 0.75, 1)) + scale_fill_manual(values = colors.discrete[c(2,1,3)]) +stat_compare_means(method = 'wilcox', comparisons = stat_comparisons,exact = F, p.adjust = 'fdr', label = 'p.signif', symnum.args = list(cutpoints = c(-Inf, 0.001, 0.01, 0.05, Inf),symbols = c("***", "**", "*", "ns"))) + theme(axis.title.x = element_blank(),legend.title=element_blank(),legend.position = 'none', strip.background = element_blank()) +labs(y = 'Cumulative Fraction of Spp.')plot.fraction + theme(legend.position = 'none')
Exporting plot
output_name = 'OralSpecies'ggsave(plot.sample_rel_abund + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 2.5, height = 3.5,filename = paste0(results_dir, fig, output_name, '.SampleRelAbund', '.Plot.pdf'))
ggsave(plot.species_rel_abund + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 2.5, height = 3.5,filename = paste0(results_dir, fig, output_name, '.SpeciesRelAbund','.Plot.pdf'))
ggsave(plot.fraction + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 2.5, height = 3.5,filename = paste0(results_dir, fig, output_name, '.SpeciesFraction','.Plot.pdf'))
Oxygen Tolerant Species enriched in rCDI recipients
Summarize over oxygen tolerant species
mp_species.tag.long <- samples.metadata %>% right_join(., mp_species.long) %>% filter(last_sample, !Name %in% failed_samples.Name) %>% left_join(., microbe.metadata %>% select(species, oxygen.tolerant), by = 'species') %>% mutate(tag = ifelse(is.na(oxygen.tolerant), 'Unknown',ifelse(oxygen.tolerant, 'Oxygen Tolerant',ifelse(!oxygen.tolerant, 'Oxygen Intolerant', 'Unknown'))),tag.bool = !is.na(tag),tag = as.factor(tag)) %>%group_by(Study_Type, Case_Name, Name, Days_Since_FMT, Sample_Type, tag, fmt_success) %>% summarize(rel_abund = sum(rel_abund, na.rm = T)) %>%ungroup() %>% mutate(tag = fct_relevel(tag, 'Unknown', 'Oxygen Intolerant'))
rCDI recipients show high relative abundance of oxygen-tolerant taxa (fac. anaerobes, microaerophiles) compared to donors. Patients do not fully recover down to donor levels after FMT.
plot <-mp_species.tag.long %>% filter(Study_Type == 'rCDI') %>% mutate(Sample_Type.label = str_to_upper(unlist(str_extract(Sample_Type, pattern = '.')))) %>% filter(!Sample_Type.label == 'P' | fmt_success) %>%ggplot(aes(fct_reorder(paste0('D', Days_Since_FMT, ' | ', Case_Name), Days_Since_FMT),rel_abund / 100, fill = tag)) + geom_bar(stat = 'identity', position = position_fill(), width = 1) +theme_cowplot() + scale_y_continuous(labels = percent_format(),breaks = c(1, .75, .5, .25, 0),expand = c(0,0)) +scale_fill_manual(values = c(colors.discrete[10], 'grey50', 'grey25')) +theme(axis.title.y = element_blank(), axis.title.x = element_blank(),axis.line.y = element_blank(),axis.text.x = element_blank(),axis.ticks.x = element_blank(),strip.background = element_blank(), strip.text = element_text(size = 14), strip.placement = "outside",legend.title=element_blank(),legend.position = 'bottom') +guides(fill=guide_legend(nrow = 1, reverse = T)) +facet_grid(cols = vars(fct_relevel(Sample_Type.label, 'R','P','D')), scales = 'free_x', space = 'free')
plot + theme(legend.position = 'none')legend <- cowplot::get_legend(plot)grid.newpage()
grid.draw(legend)
output_name = 'OxygenTolerantSpecies'ggsave(plot + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 5, height = 2.5,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))
ggsave(legend, device = 'pdf', dpi = 300, width = 5, height = 1,filename = paste0(results_dir, fig, output_name, '.Legend.pdf'))
Control samples are almost exclusively below 1% sample relative abundance.
mp_species.tag.long %>% filter(Study_Type == 'Control') %>% ggplot(aes(fct_reorder(paste0('D', Days_Since_FMT, ' | ', Name), Days_Since_FMT),rel_abund / 100, fill = tag)) + geom_bar(stat = 'identity', width = 1, position = position_fill(),) +theme_cowplot() + scale_y_continuous(labels = percent_format(),breaks = c(1, .75, .5, .25, 0),expand = c(0,0)) +scale_fill_manual(values = c(colors.discrete[10], 'grey50', 'grey25'),guide = guide_legend(reverse = T)) +theme(axis.title.y = element_blank(), axis.title.x = element_blank(),axis.line.y = element_blank(),axis.text.x = element_blank(),# axis.text.x = element_text(angle = 90, hjust = 1, size = 2),axis.ticks.x = element_blank(),strip.background = element_blank(), strip.text = element_text(size = 14), strip.placement = "outside",legend.title=element_blank(),legend.position = 'none') +facet_grid(cols = vars(Sample_Type), scales = 'free_x', space = 'free')mp_species.tag.long %>% filter(Study_Type == 'Control', tag == 'Oxygen Tolerant') %>% ggplot(aes(fct_reorder(paste0('D', Days_Since_FMT, ' | ', Name), Days_Since_FMT),rel_abund / 100, fill = tag)) + geom_bar(stat = 'identity', width = 1) +theme_cowplot() + scale_y_continuous(labels = percent_format(),expand = c(0,0)) +scale_fill_manual(values = 'grey25',guide = guide_legend(reverse = T)) +theme(axis.title.y = element_blank(), axis.title.x = element_blank(),axis.line.y = element_blank(),axis.text.x = element_blank(),# axis.text.x = element_text(angle = 90, hjust = 1, size = 2),axis.ticks.x = element_blank(),strip.background = element_blank(), strip.text = element_text(size = 14), strip.placement = "outside",legend.title=element_blank(),legend.position = 'none') +facet_grid(cols = vars(Sample_Type), scales = 'free_x', space = 'free')
Pairwise (R/P/D) testing
mp_species.tag.rcdi.long <- mp_species.tag.long %>% filter(fmt_success | Sample_Type != 'post') %>% # filtering remaining recipientsfilter(Study_Type == 'rCDI', tag == 'Oxygen Tolerant') %>% select(Case_Name, Days_Since_FMT, Sample_Type, rel_abund)plot.testing <- pairwise.wilcox.test(x = mp_species.tag.rcdi.long$rel_abund, g = mp_species.tag.rcdi.long$Sample_Type, paired = F, exact = F, p.adjust.method = 'fdr') %>% broom::tidy(.) %>% mutate(sig = cut(p.value, breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),labels = c("***", "**", "*", "")))
plot.testing
Pairwise (R/P/D) testing paired
mp_species.tag.rcdi.wide <-mp_species.tag.rcdi.long %>% separate_rows(Case_Name, sep = ';') %>% pivot_wider(names_from = 'Sample_Type',values_from = 'rel_abund', id_cols = 'Case_Name',values_fill = list(rel_abund = 0))mp_species.tag.complete.long <-mp_species.tag.rcdi.wide %>% pivot_longer(names_to = 'Sample_Type', values_to = 'value', cols = c('recipient','post','donor'))plot.testing.paired <- pairwise.wilcox.test(x = mp_species.tag.complete.long$value, g = mp_species.tag.complete.long$Sample_Type, paired = T, exact = F, p.adjust.method = 'fdr') %>% broom::tidy(.) %>% mutate(sig = cut(p.value, breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),labels = c("***", "**", "*", "")))
plot.testing.paired
Exporting plots, tables
output_name = 'OxygenTolerantSpecies'
write_tsv(plot.testing, paste0(results_dir, fig, output_name, '.Testing.PWNP.tsv'))
write_tsv(plot.testing.paired, paste0(results_dir, fig, output_name, '.Testing.PWPP.tsv'))
Functional difference rCDI recipients and donors
Gene-content based Indicators for Oxygen Usage and ABx treatment
Plots show increased abundance of these pathways in rCDI recipients, decreasing following FMT treatment.
Format
# data is in CPM
gf <-gene_family_annotated %>% separate(Gene_Family, into = c('Gene_Family_Name', 'Taxonomy'), sep = '[|]', fill = 'right') %>% select(-Taxonomy) %>% group_by(Gene_Family_Name) %>% summarize_all(.funs = funs(sum(., na.rm = T))) %>% group_by(Gene_Family_Name) %>% pivot_longer(names_to = 'Name', values_to = 'value', cols = ends_with(paste0('.pair'))) %>% left_join(., samples.metadata, by = 'Name') %>% separate(Gene_Family_Name, into = c('Gene_Family_ID', 'Gene_Family_Name'), sep = '[:] ', fill = 'right') %>% filter(grepl(Gene_Family_Name, pattern = '[[BP]]')) %>% filter(last_sample, fmt_success | Sample_Type != 'post')
Microbial oxygen usage
Genes: Aerobic Electron Transport Chain
stat_comparisons = list(c('P', 'D'), c('R', 'D'), c('R', 'P'))
pseudo = 1e-2plot.aerobic <-
gf %>% filter(Gene_Family_Name == '[BP] aerobic electron transport chain') %>% mutate(GFN = 'Aerobic Electron\nTransport Chain') %>% mutate(Sample_Type = str_to_upper(str_extract(Sample_Type, '.'))) %>% ggplot(aes(fct_relevel(Sample_Type, 'R','P','D'), value + pseudo, fill = Sample_Type)) + stat_boxplot(geom = 'errorbar') +geom_boxplot(show.legend = F) + geom_dotplot(aes(fill = NULL), binaxis = "y", stackdir = "center", binpositions="all", binwidth = 0.05) +scale_y_continuous(trans = pseudo_log_trans(pseudo, 10),expand = c(0,0,0.1,0), labels = label_number(accuracy = 1),breaks = c(1, 10, 100, 500)) +facet_wrap(~ GFN, scales = 'free_y', nrow = 2) + theme_bw() +scale_fill_manual(values = c(colors.discrete[1], colors.discrete[2], colors.discrete[3])) + labs(y = 'Copies per Million') +theme_cowplot() + theme(legend.title=element_blank(),legend.position = 'none', strip.background = element_blank(),axis.title.x = element_blank()) + stat_compare_means(method = 'wilcox', comparisons = stat_comparisons,exact = F, p.adjust = 'fdr', label = 'p.signif', symnum.args = list(cutpoints = c(-Inf, 0.001, 0.01, 0.05, Inf),symbols = c("***", "**", "*", "ns")))
plot.aerobic
ABx response genes
plot.abx <-gf %>% filter(Gene_Family_Name == '[BP] response to antibiotic') %>% mutate(GFN = 'Response to\nAntibiotics') %>% mutate(Sample_Type = str_to_upper(str_extract(Sample_Type, '.'))) %>% ggplot(aes(fct_relevel(Sample_Type, 'R','P','D'), value + pseudo, fill = Sample_Type)) + stat_boxplot(geom = 'errorbar') +geom_boxplot(show.legend = F) + geom_dotplot(aes(fill = NULL), binaxis = "y", stackdir = "center", binpositions="all", binwidth = 0.015) +scale_y_continuous(trans = pseudo_log_trans(pseudo, 10),expand = c(0,0,0.1,0), breaks = c(1000, 3000, 10000), labels = label_number(accuracy = 1)) +facet_wrap(~ GFN, scales = 'free_y', nrow = 2) + theme_bw() +scale_fill_manual(values = c(colors.discrete[1], colors.discrete[2], colors.discrete[3])) + labs(y = 'Copies per Million') +theme_cowplot() + theme(legend.title=element_blank(),legend.position = 'none', axis.title.x = element_blank(),strip.background = element_blank(),) + stat_compare_means(method = 'wilcox', comparisons = stat_comparisons,exact = F, p.adjust = 'fdr', label = 'p.signif',symnum.args = list(cutpoints = c(-Inf, 0.001, 0.01, 0.05, Inf),symbols = c("***", "**", "*", "ns")))
plot.abx
Exporting plot
output_name = 'FunctionAerobic'
ggsave(plot.aerobic + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 2.5, height = 3.5,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))output_name = 'FunctionABx'
ggsave(plot.abx + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 2.5, height = 3.5,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))
stat_comparisons = list(c('P', 'D'), c('R', 'D'), c('R', 'P'), c('D', 'D3'))plot.sulf <-gf %>% filter(Gene_Family_Name %in% c('[BP] hydrogen sulfide biosynthetic process','[BP] sulfur compound transport','[BP] sulfate assimilation, phosphoadenylyl sulfate reduction by phosphoadenylyl-sulfate reductase (thioredoxin)')) %>% mutate(Gene_Family_Name = case_when(Gene_Family_Name == '[BP] hydrogen sulfide biosynthetic process' ~ '[BP] hydrogen sulfide\nbiosynthetic process',Gene_Family_Name == '[BP] sulfate assimilation, phosphoadenylyl sulfate reduction by phosphoadenylyl-sulfate reductase (thioredoxin)' ~ '[BP] sulfate assimilation,\nphosphoadenylyl sulfate reduction\nby phosphoadenylyl-sulfate\n reductase (thioredoxin)',T ~ Gene_Family_Name)) %>% mutate(GFN = Gene_Family_Name) %>% mutate(Sample_Type = str_to_upper(str_extract(Sample_Type, '.'))) %>% mutate(Sample_Type = ifelse(Sample_Type != 'R' & Donor.Subject == "D0_ALM_Case_4;5;6;8;9;12;13;15;17;18;20;21", 'D3', Sample_Type)) %>% ggplot(aes(fct_relevel(Sample_Type, 'R','P','D'), value + pseudo, fill = Sample_Type)) + stat_boxplot(geom = 'errorbar') +geom_boxplot(show.legend = F) + geom_dotplot(aes(fill = NULL), binaxis = "y",stackdir = "center", binpositions="all", binwidth = 0.015) +scale_y_log10(expand = c(0,0,0.1,0), labels = label_number(accuracy = 1)) +facet_wrap(~ GFN, nrow = 1, scales = 'free_y') + theme_bw() +scale_fill_manual(values = c(colors.discrete[c(1,4,2,3)])) + labs(y = 'Copies per Million') +theme_cowplot() + theme(legend.title=element_blank(),legend.position = 'none', axis.title.x = element_blank(),strip.background = element_blank(),) + stat_compare_means(method = 'wilcox', comparisons = stat_comparisons,exact = F, p.adjust = 'fdr', label = 'p.signif',symnum.args = list(cutpoints = c(-Inf, 0.001, 0.01, 0.05, Inf),symbols = c("***", "**", "*", "ns")))
plot.sulf
Functional Composition
PCA analysis of functional composition of rCDI recipients, stool donors and post-FMT patients with resolved symptoms based on clr-transformed pathway relative abundances.
Plot shows rCDI recipients clustering separately from donors with less distinct clusters compared to taxonomic data.
Format
Annotate Humann2 with PWY Classes
h2.rcdi <-h2_unstratified %>% filter(!Name %in% failed_samples.Name) h2.rcdi.wide <-h2_unstratified %>% filter(!Name %in% failed_samples.Name) %>% filter(!is.na(pathway_description)) %>%reshape2::dcast(., pathway_id + pathway_description ~ Name,value.var = 'CPM', fun.aggregate = sum, na.rm = T)# transpose
h2.rcdi_success.wide <- h2_unstratified %>%filter(!Name %in% failed_samples.Name) %>% filter(!is.na(pathway_description)) %>%pivot_wider(id_cols = 'pathway_description', names_from = 'Name',values_from = 'CPM', values_fill = list(CPM = 0))# get data
h2.rcdi_success.data <- t(select(h2.rcdi_success.wide, matches('.pair'), pathway_description) %>% column_to_rownames(var = 'pathway_description'))
Diversity
Diversity metrics based on species-level relative abundances
Alpha: Shannon Index
Alpha Diversity
h2.rcdi_success.shannon <- data.frame(Shannon = diversity(h2.rcdi_success.data, index = 'shannon')) %>% rownames_to_column('Name')
CLR-Transform
# https://cran.r-project.org/web/packages/rgr/index.html
library(rgr)pseudo = 1e-3 # CPM
h2.rcdi_success.clr <- rgr::clr(h2.rcdi_success.data + pseudo, ifclose = FALSE, ifwarn = TRUE)
Beta: PCA of functional composition
h2.rcdi_success.clr.pca_data <- PCA(h2.rcdi_success.clr, graph = FALSE, scale.unit = F)
h2.rcdi_success.clr.pca <- h2.rcdi_success.clr.pca_data %>% .$ind %>% .$coord %>% as.data.frame() %>% rownames_to_column(var = 'Name') ## annotate with metadata, diversity
h2.rcdi_success.data.clr.pca <- h2.rcdi_success.clr.pca %>% left_join(., samples.metadata, by = 'Name') %>% left_join(., h2.rcdi_success.shannon, by = 'Name')
Pairwise PERMANOVA (adonis function) testing between rCDI recipients, Donors, and the last available post-FMT sample Showing that composition of rCDI recipient is distinct from donors and post-FMT samples, but last post-FMT time point not different from donors.
h2.rcdi_success.data.clr.pca.last <-h2.rcdi_success.data.clr.pca %>% filter(last_sample)# n count
h2.rcdi_success.data.clr.pca.last %>% group_by(Sample_Type) %>% tally()rcdi.adonis <-pairwise.adonis(x = h2.rcdi_success.clr[h2.rcdi_success.data.clr.pca.last$Name, ],factors = h2.rcdi_success.data.clr.pca.last$Sample_Type,sim.method = 'euclidean',p.adjust.m = 'fdr',perm = 1e3) %>% rename(fdr = p.adjusted) %>% mutate(sig = cut(fdr, breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),labels = c("***", "**", "*", "")))
rcdi.adonis
PCA Plot shows rCDI recipients clustering separately from donors and latest post-FMT patients.
plot <-h2.rcdi_success.data.clr.pca %>% ggplot(aes(Dim.2, Dim.1, label = Unique_ID)) +geom_point(shape = 21, col = 'black', size = 8, aes(fill = Sample_Type)) + # Sample_Type scale_fill_manual(values = c(colors.discrete[c(1,2,3)]), guide = guide_legend(reverse = TRUE)) +theme_cowplot() + labs(subtitle = 'Functional Composition') + coord_fixed() +theme(plot.subtitle = element_text(hjust = 0.5),legend.title=element_blank(),aspect.ratio = 1) + labs(y = paste0('PC 1 (', round(h2.rcdi_success.clr.pca_data$eig[1,'percentage of variance'], 1), '%)'), x = paste0('PC 2 (', round(h2.rcdi_success.clr.pca_data$eig[2,'percentage of variance'], 1), '%)'))legend <- cowplot::get_legend(plot)
plot + theme(legend.position = 'none')
grid.newpage()
grid.draw(legend)
Exporting plots, tables
output_name = 'FunctionalDiversity.SuccessfulCases.CLR_PCA'ggsave(plot + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 5, height = 5,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))ggsave(legend, device = 'pdf', dpi = 300, width = 1.1, height = 1,filename = paste0(results_dir, fig, output_name, '.Legend.pdf'))write_tsv(rcdi.adonis, paste0(results_dir, fig, output_name, '.Adonis.tsv'))