文章SameStr（一）：图1代码

“Publication Figure 1”

百度云盘链接: https://pan.baidu.com/s/15g7caZp354zIWktpnWzWhQ

提取码: 4sh7

Libraries

Standard Import

library(tidyverse)
library(cowplot)
library(scales)
library(ggpubr)

Special

# devtools::install_github("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis")
library(pairwiseAdonis)
library(FactoMineR)
# library(rgr)
# library(mixOmics)
library(grid)
library(broom)

Paths

bin_dir = '../bin/'
data_dir = '../data/'
results_dir = '../results/'

Custom Scripts / Theme

source(paste0(bin_dir, 'theme_settings.R'))
source (paste0(bin_dir, 'utilities.R'))
source(paste0(bin_dir, 'analysis_metadata.R'))
date <- format(Sys.time(), "%d%m%y")

Import Tables

Metadata

samples.metadata <- read_tsv(paste0(data_dir, 'samples.metadata.tsv'))
microbe.taxonomy <- read_tsv(paste0(data_dir, 'microbe.taxonomy.tsv')) 
microbe.metadata <- microbe.taxonomy %>% right_join(., read_tsv(paste0(data_dir, 'microbe.metadata.tsv')))

Taxonomy

MetaPhlAn2 tables
combine with taxonomy

mp_species.long <- microbe.taxonomy %>% right_join(., read_tsv(paste0(data_dir, 'samples.mp_profiles.species.tsv')), by = 'species') %>% left_join(., samples.metadata, by = 'Name')mp_species.subsampled.long <- microbe.taxonomy %>% right_join(., read_tsv(paste0(data_dir, 'samples.mp_profiles_subsampled.species.tsv')), by = 'species') %>% left_join(., samples.metadata, by = 'Name')

rCDI subset
annotate with metadata

mp_species.rcdi.long <- mp_species.long %>%filter(Study %in% c(rcdi.studies)) mp_species.subsampled.rcdi.long <- mp_species.subsampled.long %>% filter(Study %in% c(rcdi.studies))

Function

gene_family_annotated <- read_tsv(paste0(data_dir, 'genefamilies.infogo1000.cpm.annotated.tsv'))h2_unstratified <- samples.metadata %>% right_join(., read_tsv(paste0(data_dir, 'samples.h2_profiles.unstratified.tsv')), by = 'Name')

Set Figure

fig = paste0('Fig_1.')

Taxonomic Composition

PCA analysis of species-level sample composition
using data from rCDI recipients, stool donors and post-FMT patients with resolved symptoms

Format

# filter successful post-FMT samples
failed_samples.Name <- samples.metadata %>% filter(!fmt_success & Sample_Type == 'post') %>% pull(Name)# transpose
mp_species.rcdi_success.wide <- mp_species.rcdi.long %>%filter(kingdom == 'Bacteria') %>% filter(!Name %in% failed_samples.Name) %>% pivot_wider(id_cols = 'species', names_from = 'Name',values_from = 'rel_abund', values_fill = list(rel_abund = 0))## -- subsampled
mp_species.subsampled.rcdi_success.wide <- mp_species.subsampled.rcdi.long %>%filter(kingdom == 'Bacteria') %>% filter(!Name %in% failed_samples.Name) %>% pivot_wider(id_cols = 'species', names_from = 'Name',values_from = 'rel_abund', values_fill = list(rel_abund = 0))## -- genus-level
mp_genus.rcdi_success.wide <- mp_species.rcdi.long %>%filter(kingdom == 'Bacteria') %>% filter(!Name %in% failed_samples.Name) %>%pivot_wider(id_cols = 'genus', names_from = 'Name',values_from = 'rel_abund', values_fill = list(rel_abund = 0), values_fn = list(rel_abund = sum))# get data
mp_species.rcdi_success.data <- t(select(mp_species.rcdi_success.wide, matches('.pair'), species) %>% column_to_rownames(var = 'species'))## -- subsampled
mp_species.subsampled.rcdi_success.data <- t(select(mp_species.subsampled.rcdi_success.wide, matches('.pair'), species) %>% column_to_rownames(var = 'species'))## -- genus
mp_genus.rcdi_success.data <- t(select(mp_genus.rcdi_success.wide, matches('.pair'), genus) %>% column_to_rownames(var = 'genus'))

Diversity

Diversity metrics based on species-level relative abundances

Alpha: Shannon Index

mp_species.raw.rcdi_success.shannon <- data.frame(Shannon.raw = diversity(mp_species.rcdi_success.data, index = 'shannon')) %>% rownames_to_column('Name')mp_species.subsampled.rcdi_success.shannon <- data.frame(Shannon.subsampled = diversity(mp_species.subsampled.rcdi_success.data, index = 'shannon')) %>% rownames_to_column('Name')mp_species.raw.rcdi_success.shannon <- full_join(mp_species.raw.rcdi_success.shannon, mp_species.subsampled.rcdi_success.shannon, by = 'Name')

CLR-Transform

# species-level
pseudo = 1e-6remove.na <- function (xx, iftell = TRUE) {if (is.vector(xx)) {nx <- length(xx)x <- na.omit(xx)n <- length(x)m <- 1intype <- "vector"}else {nx <- length(xx[, 1])x <- na.omit(xx)n <- length(x[, 1])m <- length(x[1, ])intype <- "matrix"}nna <- nx - nif (iftell & nna > 0) {if (intype == "matrix") cat(paste("\n ", nna, "row(s) with NA(s) removed from matrix\n"))else {cat(paste("\n ", nna, "NA(s) removed from vector\n"))}}invisible(list(x = x, n = n, m = m, nna = nna))
}clr <- function(xx, ifclose = FALSE, ifwarn = TRUE) {if (is.data.frame(xx)) xx <- as.matrix(xx)if (any(xx < 0, na.rm = TRUE)) stop("Negative values not allowed\n")if(ifwarn) cat("  ** Are the data/parts all in the same measurement units? **\n")temp.x <- remove.na(xx, iftell = FALSE)x <- temp.x$x; nna <- temp.x$nnaif (nna >= 1) cat(" ", nna, "composition(s) with NA(s) removed\n")#if(ifclose) x <- 100 * sweep(x, 1, rowSums(x), "/")#x <- log(x)x <- sweep(x, 1, rowMeans(x), "-")x <- as.matrix(x)return(x = x)
}mp_species.rcdi_success.clr <- clr(mp_species.rcdi_success.data + pseudo,  ifclose = FALSE, ifwarn = TRUE)

Beta: PCA of species-level taxonomic composition

# species-level
mp_species.rcdi_success.clr.pca_data <- PCA(mp_species.rcdi_success.clr, graph = FALSE, scale.unit = F)
mp_species.rcdi_success.clr.pca <- mp_species.rcdi_success.clr.pca_data %>% .$ind %>% .$coord %>% as.data.frame() %>% rownames_to_column(var = 'Name') ## annotate with metadata, diversity
mp_species.rcdi_success.data.clr.pca <- mp_species.rcdi_success.clr.pca %>% left_join(., samples.metadata, by = 'Name') %>% left_join(., mp_species.raw.rcdi_success.shannon, by = 'Name')

Pairwise PERMANOVA (adonis function) testing between rCDI recipients, Donors, and the last available post-FMT sample Showing that composition of rCDI recipient is distinct from donors and post-FMT samples, but last post-FMT time point not different from donors.

mp_species.rcdi_success.data.clr.pca.last <-mp_species.rcdi_success.data.clr.pca %>% filter(last_sample)# n count
mp_species.rcdi_success.data.clr.pca.last %>% group_by(Sample_Type) %>% tally()rcdi.adonis <-pairwise.adonis(x = mp_species.rcdi_success.clr[mp_species.rcdi_success.data.clr.pca.last$Name, ],factors = mp_species.rcdi_success.data.clr.pca.last$Sample_Type,sim.method = 'euclidean',p.adjust.m = 'fdr',perm = 1e3) %>% rename(fdr = p.adjusted) %>% mutate(sig = cut(fdr, breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),labels = c("***", "**", "*", "")))
rcdi.adonis

在这里插入图片描述

PCA Plot shows rCDI recipients clustering separately from post-FMT patients and donors.
Separate clustering of MGH03D-related cases is also visible.

plot <-mp_species.rcdi_success.data.clr.pca %>% ggplot(aes(-Dim.2, -Dim.1, labels = Unique_ID)) +geom_point(shape = 21, size = 8, col = 'black', aes(fill = Sample_Type)) + scale_fill_manual(values = colors.main) +theme_cowplot() + labs(y = paste0('PC 1 (', round(mp_species.rcdi_success.clr.pca_data$eig[1,'percentage of variance'], 1), '%)'), x = paste0('PC 2 (', round(mp_species.rcdi_success.clr.pca_data$eig[2,'percentage of variance'], 1), '%)'),subtitle = 'Taxonomic Composition') +theme(plot.title = element_text(hjust = 0.5),legend.title=element_blank(),aspect.ratio = 1,plot.subtitle = element_text(hjust = .5, size = 12), axis.title = element_text(size = 10),axis.text = element_text(size = 8))
legend <- cowplot::get_legend(plot)plot + theme(legend.position = 'none')
grid.newpage()
grid.draw(legend)

在这里插入图片描述

Exporting plots, tables

output_name = 'TaxonomicDiversity.SuccessfulCases.CLR_PCA'ggsave(plot + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 5, height = 5,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))ggsave(legend, device = 'pdf', dpi = 300, width = 1.1, height = 1,filename = paste0(results_dir, fig, output_name, '.Legend.pdf'))write_tsv(rcdi.adonis, paste0(results_dir, fig, output_name, '.Adonis.tsv'))

Alpha/Beta Correlation

Correlation analysis of Shannon Index with first principal component (PC 1) of PCA analysis shows that large fraction of variance could be explained by alpha diversity differences between samples.

mp_species.rcdi_success.data.clr.pca.cor <- cor.test(~ Shannon.raw + Dim.1,data = mp_species.rcdi_success.data.clr.pca,method = 'spearman') %>% broom::tidy(.) %>% mutate(estimate.round = round(estimate, 2), sig = cut(p.value, breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),labels = c("***", "**", "*", "ns")))
mp_species.rcdi_success.data.clr.pca.corplot <- mp_species.rcdi_success.data.clr.pca %>% ggplot(aes(Shannon.raw, -Dim.1)) +geom_point(shape = 21, col = 'black', size = 8, aes(fill = Sample_Type)) + geom_smooth(method = 'lm', color = 'black') +  annotate('text', label = paste0('r=', mp_species.rcdi_success.data.clr.pca.cor$estimate.round,' ', mp_species.rcdi_success.data.clr.pca.cor$sig), x = 1.15, y = -40, size = 6) +scale_fill_manual(values = colors.discrete[c(1,2,3)]) +theme_cowplot() + theme(legend.title=element_blank(),aspect.ratio = 1) +labs(x = 'Shannon Index', y = 'PC 1')plot + theme(legend.position = 'none')

在这里插入图片描述

Exporting plot

output_name = 'TaxonomicDiversity.SuccessfulCases.PC1_Shannon'ggsave(plot + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 3, height = 3,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))ggsave(legend, device = 'pdf', dpi = 300, width = 1.1, height = 1,filename = paste0(results_dir, fig, output_name, '.Legend.pdf'))

Confirming that alpha diversity is representative by comparing Shannon calculated on full and 5M subset of reads generated with seqtk after QC but before MetaPhlAn2.

mp_species.rcdi_success.data.clr.pca %>% ggplot(aes(Shannon.subsampled, Shannon.raw)) +geom_point(shape = 21, col = 'black', size = 8, aes(fill = Sample_Type)) + geom_smooth(method = 'lm', color = 'black') +  stat_cor(method = 'spearman') +scale_fill_manual(values = colors.discrete[c(1,2,3)]) +theme_cowplot() + theme(legend.title=element_blank(),legend.position = 'none', aspect.ratio = 1) +labs(x = 'Shannon Index [5M Subsampled Reads]', y = 'Shannon Index [All Reads]')

Taxa Distinguishing Recipients and Donors

Format

# rcdi_success only excludes failed post-FMT samples
mp_genus.rcdi_success.annotated <- samples.metadata %>% mutate(tag = paste0(Unique_ID, ' ', gsub(gsub(Name, pattern = '.*_', replacement = ''), pattern = '.pair', replacement = ''))) %>% right_join(., as.data.frame(mp_genus.rcdi_success.data) %>% rownames_to_column('Name')) %>% filter(Sample_Type %in% c('recipient','donor'))X.metadata <-mp_genus.rcdi_success.annotated %>% select(Study:tag)X = mp_genus.rcdi_success.annotated %>% column_to_rownames(var = 'tag') %>% select(-Study:-Study_Type) %>% as.data.frame()Y = as.factor(X.metadata$Sample_Type)

sparse PLS Discriminant Analysis

Initial sPLS-DA

sPLS-DA of rCDI recipients and donor samples based on genus-level taxonomy

pseudo = 1e-6data.plsda = mixOmics::plsda(X = X + pseudo, Y, ncomp = nlevels(Y), logratio = 'CLR')
data.perf.plsda = mixOmics::perf(data.plsda, validation = 'Mfold', folds = 5,progressBar = FALSE, nrepeat = 10)plot(data.perf.plsda, overlay = 'measure', sd=TRUE)
mixOmics::plotIndiv(data.plsda , comp = c(1,2),group = Y, ind.names = T,ellipse = T,legend = TRUE, title = 'PLSDA comp 1 - 2')

在这里插入图片描述

Cross-Validate features

5x10 CV

set.seed(100)
data.tune.splsda = mixOmics::tune.splsda(X + pseudo, Y = Y, ncomp = 2, multilevel = NULL, logratio = 'CLR',validation = c('Mfold'), folds = 5, dist = 'max.dist', nrepeat = 10,progressBar = FALSE)plot(data.tune.splsda)
select.keepX = data.tune.splsda$choice.keepX[1:2]
select.keepX # c(15, 5)

Apply CV features

data.splsda = mixOmics::splsda(X = X + pseudo,  Y = Y, ncomp = 2, keepX = select.keepX, logratio= "CLR",near.zero.var = T,
)
data.perf.splsda = mixOmics::perf(data.splsda, validation = 'Mfold', folds = 5, progressBar = FALSE, nrepeat = 10, dist = 'max.dist')data.perf.splsda$error.ratemixOmics::plotIndiv(data.splsda , comp = c(1,2),group = Y, ind.names = T, ellipse = TRUE, legend = TRUE, title = 'PLSDA comp 1 - 2')

在这里插入图片描述

Plot loadings and save to table

# COMP 1
pL.pc1 <- mixOmics::plotLoadings(data.splsda, title = 'sPLS-DA PC 1',comp = 1, method = 'median', contrib = 'max', size.title = rel(1), border = T, size.name = .5, size.legend = .75, legend.color = colors.discrete[c(4, 1)])
pL.pc1# COMP 2
pL.pc2 <- mixOmics::plotLoadings(data.splsda, title = 'sPLS-DA PC 2',comp = 2, method = 'median', contrib = 'max', size.title = rel(1), border = T, size.name = .5, size.legend = .75, legend.color = colors.discrete[c(4, 1)])
pL.pc2pL <- rbind(pL.pc1$X, pL.pc2$X) %>% rownames_to_column('genus') %>% dplyr::rename(rel_abund.recipient.median_clr = recipient,rel_abund.donor.median_clr = donor,rel_abund.highest = GroupContrib) %>% dplyr::select(genus, rel_abund.donor.median_clr, rel_abund.recipient.median_clr, rel_abund.highest, importance)output_name = 'sPLSDA.DonorRecipient.Loadings'
write_tsv(pL, paste0(results_dir, fig, output_name, '.tsv'))

在这里插入图片描述

sPLS-DA: Heatmap

Heatmap of differentially abundant taxa between rCDI recipients and donors based on clr-transformed genus-level relative abundances. Plots shows typical opportunistic pathogens and oral bacteria to be differentially abundant between groups.

Genera from displayed samples (R/D) in which >=20% of species are predicted orals

genus.oral <- mp_species.rcdi.long %>%filter(kingdom == 'Bacteria') %>% filter(Sample_Type %in% c('recipient', 'donor')) %>% left_join(., microbe.metadata %>% select(species, habit.oral), by = 'species') %>% mutate(genus = gsub(genus, pattern = '_noname', replacement = '')) %>%group_by(genus) %>% summarize(oral_rate = replace_na(sum(habit.oral == T, na.rm = T) / n(), NA), .groups = 'drop') %>% mutate(oral_genus = oral_rate >= 0.20)

Get colors for sample columns and taxa rows

sample_colors <-tibble(Unique_ID.splsda = str_split_fixed(data.splsda$names$sample, pattern = ' ', n = 2)[,1]) %>% cbind(., mp_genus.rcdi_success.annotated %>% select(tag, Unique_ID, fmt_success, Case_Name, Sample_Type)) %>% mutate(color = case_when(Sample_Type == 'recipient' ~ colors.discrete[3],Sample_Type == 'donor' ~ colors.discrete[1], T ~ 'white'))taxa_colors <- tibble(genus = gsub(data.splsda$names$colnames$X, pattern = '_noname', replacement = '')) %>% left_join(., genus.oral, by = 'genus') %>% mutate(color = case_when(oral_genus == T ~ 'gray25',oral_genus == F ~ 'gray95', T ~ 'white'))

Export Heatmap

output_name = 'sPLSDA.DonorRecipient.Heatmap'
mixOmics::cim(data.splsda, row.sideColors = sample_colors$color, col.sideColors = taxa_colors$color, symkey = T, keysize = c(0.8, 0.8), row.names = F, col.names = gsub(data.splsda$names$colnames$X, pattern = '_noname', replacement = ''),save = 'pdf', name.save = paste0(results_dir, fig, output_name),margins = c(20, 6.5),row.cex = .75, col.cex = .5,transpose = T,scale = F, center = T)

Oral Species in rCDI recipients

Pulling Oral species from microbe.metadata

oral_species <-microbe.metadata %>%filter(habit.oral) %>%pull(species)

Format

rcdi.oral_species <- mp_species.rcdi.long %>% filter(last_sample,  fmt_success | Sample_Type != 'post') %>% mutate(oral = species %in% oral_species,rel_abund = rel_abund / 100) %>% group_by(Case_Name, Sample, Days_Since_FMT, Sample_Type) %>% summarize(n = sum(rel_abund > 0, na.rm = T), oral = sum(ifelse(species %in% oral_species & rel_abund > 0, 1, 0), na.rm = T),species_rel_abund = mean(ifelse(species %in% oral_species & rel_abund > 0, rel_abund, 0), na.rm = T),sample_rel_abund = sum(ifelse(species %in% oral_species, rel_abund, 0), na.rm = T), ) %>% mutate(f = oral / n) %>% mutate(source = fct_recode(as.factor(Sample_Type),R = 'recipient',P = 'post',D = 'donor')) %>% pivot_longer(names_to = 'metric', values_to = 'value',cols = c('sample_rel_abund', 'species_rel_abund', 'f'))

Boxplots Oral Taxa

Plot shows enrichment of oral-classified bacteria in rCDI recipients but not donors or controls with decreasing abundances post-FMT.

Cumulative Sample Rel. Abund. of species classified as oral

stat_comparisons = list(c('P', 'D'), c('R', 'D'), c('R', 'P'))plot.sample_rel_abund <-rcdi.oral_species %>% filter(metric == 'sample_rel_abund') %>% ggplot(aes(fct_relevel(source, 'R', 'P', 'D'),value, fill = source)) + stat_boxplot(geom = 'errorbar') +geom_boxplot(show.legend = F) + geom_dotplot(aes(fill = NULL), binaxis = "y", stackdir = "center", binpositions="all", binwidth = 0.01) +theme_cowplot() + scale_y_continuous(labels = percent_format_signif,limits = c(0, 1.21),expand = c(0,0,0.05,0),breaks = c(0, 0.25, 0.5, 0.75, 1)) + scale_fill_manual(values = colors.discrete[c(2,1,3)]) +stat_compare_means(method = 'wilcox', comparisons = stat_comparisons,exact = F, p.adjust = 'fdr', label = 'p.signif',symnum.args = list(cutpoints = c(-Inf, 0.001, 0.01, 0.05, Inf),symbols = c("***", "**", "*", "ns"))) + labs(y = 'Cumulative Sample Rel. Abund.') +theme(axis.title.x = element_blank())
plot.sample_rel_abund + theme(legend.position = 'none')

在这里插入图片描述

Rel. Abund. of Species classified as oral
Majority of oral species are below 0.01% relative abundance.

plot.species_rel_abund <-
rcdi.oral_species %>% filter(metric == 'species_rel_abund') %>% ggplot(aes(fct_relevel(source, 'R', 'P', 'D'),value, fill = source)) + stat_boxplot(geom = 'errorbar') +geom_boxplot(show.legend = F) + geom_dotplot(aes(fill = NULL), binaxis = "y", stackdir = "center", binpositions="all", binwidth = 0.00025) +theme_cowplot() + scale_y_continuous(limits = c(0, NA),labels = percent_format_signif,expand = c(0,0,0.05,0),# breaks = c(0, 0.001, 0.0025, 0.005, 0.01, 0.015)) + scale_fill_manual(values = colors.discrete[c(2,1,3)]) +stat_compare_means(method = 'wilcox', comparisons = stat_comparisons,exact = F, p.adjust = 'fdr', label = 'p.signif',symnum.args = list(cutpoints = c(-Inf, 0.001, 0.01, 0.05, Inf),symbols = c("***", "**", "*", "ns"))) + labs(y = 'Mean Rel. Abund. per Sp.') +theme(axis.title.x = element_blank())
plot.species_rel_abund + theme(legend.position = 'none')

Fraction of Species classified as oral

plot.fraction <-
rcdi.oral_species %>% filter(metric == 'f') %>%ungroup() %>% ggplot(aes(fct_relevel(source, 'R', 'P', 'D'),value, fill = source)) + stat_boxplot(geom = 'errorbar') +geom_boxplot(show.legend = F) + geom_dotplot(aes(fill = NULL), binaxis = "y", stackdir = "center", binpositions="all", binwidth = 0.01) +theme_cowplot() + scale_y_continuous(labels = percent_format_signif,limits = c(0, .75),expand = c(0,0,0.05,0),breaks = c(0, 0.25, 0.5, 0.75, 1)) + scale_fill_manual(values = colors.discrete[c(2,1,3)]) +stat_compare_means(method = 'wilcox', comparisons = stat_comparisons,exact = F, p.adjust = 'fdr', label = 'p.signif', symnum.args = list(cutpoints = c(-Inf, 0.001, 0.01, 0.05, Inf),symbols = c("***", "**", "*", "ns"))) + theme(axis.title.x = element_blank(),legend.title=element_blank(),legend.position = 'none', strip.background = element_blank()) +labs(y = 'Cumulative Fraction of Spp.')plot.fraction + theme(legend.position = 'none')

Exporting plot

output_name = 'OralSpecies'ggsave(plot.sample_rel_abund + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 2.5, height = 3.5,filename = paste0(results_dir, fig, output_name, '.SampleRelAbund', '.Plot.pdf'))
ggsave(plot.species_rel_abund + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 2.5, height = 3.5,filename = paste0(results_dir, fig, output_name, '.SpeciesRelAbund','.Plot.pdf'))
ggsave(plot.fraction + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 2.5, height = 3.5,filename = paste0(results_dir, fig, output_name, '.SpeciesFraction','.Plot.pdf'))

Oxygen Tolerant Species enriched in rCDI recipients

Summarize over oxygen tolerant species

mp_species.tag.long <- samples.metadata %>% right_join(., mp_species.long) %>% filter(last_sample, !Name %in% failed_samples.Name) %>% left_join(., microbe.metadata %>% select(species, oxygen.tolerant), by = 'species') %>% mutate(tag = ifelse(is.na(oxygen.tolerant), 'Unknown',ifelse(oxygen.tolerant, 'Oxygen Tolerant',ifelse(!oxygen.tolerant, 'Oxygen Intolerant', 'Unknown'))),tag.bool = !is.na(tag),tag = as.factor(tag)) %>%group_by(Study_Type, Case_Name, Name, Days_Since_FMT, Sample_Type, tag, fmt_success) %>% summarize(rel_abund = sum(rel_abund, na.rm = T)) %>%ungroup() %>% mutate(tag = fct_relevel(tag, 'Unknown', 'Oxygen Intolerant'))

rCDI recipients show high relative abundance of oxygen-tolerant taxa (fac. anaerobes, microaerophiles) compared to donors. Patients do not fully recover down to donor levels after FMT.

plot <-mp_species.tag.long %>% filter(Study_Type == 'rCDI') %>% mutate(Sample_Type.label = str_to_upper(unlist(str_extract(Sample_Type, pattern = '.')))) %>% filter(!Sample_Type.label == 'P' | fmt_success) %>%ggplot(aes(fct_reorder(paste0('D', Days_Since_FMT, ' | ', Case_Name), Days_Since_FMT),rel_abund / 100, fill = tag)) + geom_bar(stat = 'identity', position = position_fill(), width = 1)  +theme_cowplot() + scale_y_continuous(labels = percent_format(),breaks = c(1, .75, .5, .25, 0),expand = c(0,0)) +scale_fill_manual(values = c(colors.discrete[10], 'grey50', 'grey25')) +theme(axis.title.y = element_blank(), axis.title.x = element_blank(),axis.line.y = element_blank(),axis.text.x = element_blank(),axis.ticks.x = element_blank(),strip.background = element_blank(), strip.text = element_text(size = 14), strip.placement = "outside",legend.title=element_blank(),legend.position = 'bottom') +guides(fill=guide_legend(nrow = 1, reverse = T)) +facet_grid(cols = vars(fct_relevel(Sample_Type.label, 'R','P','D')), scales = 'free_x', space = 'free') 
plot + theme(legend.position = 'none')legend <- cowplot::get_legend(plot)grid.newpage()
grid.draw(legend)

output_name = 'OxygenTolerantSpecies'ggsave(plot + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 5, height = 2.5,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))
ggsave(legend, device = 'pdf', dpi = 300, width = 5, height = 1,filename = paste0(results_dir, fig, output_name, '.Legend.pdf'))

Control samples are almost exclusively below 1% sample relative abundance.

mp_species.tag.long %>% filter(Study_Type == 'Control') %>% ggplot(aes(fct_reorder(paste0('D', Days_Since_FMT, ' | ', Name), Days_Since_FMT),rel_abund / 100, fill = tag)) + geom_bar(stat = 'identity', width = 1, position = position_fill(),)  +theme_cowplot() + scale_y_continuous(labels = percent_format(),breaks = c(1, .75, .5, .25, 0),expand = c(0,0)) +scale_fill_manual(values = c(colors.discrete[10], 'grey50', 'grey25'),guide = guide_legend(reverse = T)) +theme(axis.title.y = element_blank(), axis.title.x = element_blank(),axis.line.y = element_blank(),axis.text.x = element_blank(),# axis.text.x = element_text(angle = 90, hjust = 1, size = 2),axis.ticks.x = element_blank(),strip.background = element_blank(), strip.text = element_text(size = 14), strip.placement = "outside",legend.title=element_blank(),legend.position = 'none') +facet_grid(cols = vars(Sample_Type), scales = 'free_x', space = 'free')mp_species.tag.long %>% filter(Study_Type == 'Control', tag == 'Oxygen Tolerant') %>% ggplot(aes(fct_reorder(paste0('D', Days_Since_FMT, ' | ', Name), Days_Since_FMT),rel_abund / 100, fill = tag)) + geom_bar(stat = 'identity', width = 1)  +theme_cowplot() + scale_y_continuous(labels = percent_format(),expand = c(0,0)) +scale_fill_manual(values = 'grey25',guide = guide_legend(reverse = T)) +theme(axis.title.y = element_blank(), axis.title.x = element_blank(),axis.line.y = element_blank(),axis.text.x = element_blank(),# axis.text.x = element_text(angle = 90, hjust = 1, size = 2),axis.ticks.x = element_blank(),strip.background = element_blank(), strip.text = element_text(size = 14), strip.placement = "outside",legend.title=element_blank(),legend.position = 'none') +facet_grid(cols = vars(Sample_Type), scales = 'free_x', space = 'free')

Pairwise (R/P/D) testing

mp_species.tag.rcdi.long <- mp_species.tag.long %>% filter(fmt_success | Sample_Type != 'post') %>% # filtering remaining recipientsfilter(Study_Type == 'rCDI', tag == 'Oxygen Tolerant') %>% select(Case_Name, Days_Since_FMT, Sample_Type, rel_abund)plot.testing <- pairwise.wilcox.test(x = mp_species.tag.rcdi.long$rel_abund, g = mp_species.tag.rcdi.long$Sample_Type, paired = F, exact = F, p.adjust.method = 'fdr') %>% broom::tidy(.) %>% mutate(sig = cut(p.value, breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),labels = c("***", "**", "*", "")))
plot.testing

Pairwise (R/P/D) testing paired

mp_species.tag.rcdi.wide <-mp_species.tag.rcdi.long %>% separate_rows(Case_Name, sep = ';') %>% pivot_wider(names_from = 'Sample_Type',values_from = 'rel_abund', id_cols = 'Case_Name',values_fill = list(rel_abund = 0))mp_species.tag.complete.long <-mp_species.tag.rcdi.wide %>% pivot_longer(names_to = 'Sample_Type', values_to = 'value', cols = c('recipient','post','donor'))plot.testing.paired <- pairwise.wilcox.test(x = mp_species.tag.complete.long$value, g = mp_species.tag.complete.long$Sample_Type, paired = T, exact = F, p.adjust.method = 'fdr') %>% broom::tidy(.) %>% mutate(sig = cut(p.value, breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),labels = c("***", "**", "*", "")))
plot.testing.paired

Exporting plots, tables

output_name = 'OxygenTolerantSpecies'
write_tsv(plot.testing, paste0(results_dir, fig, output_name, '.Testing.PWNP.tsv'))
write_tsv(plot.testing.paired, paste0(results_dir, fig, output_name, '.Testing.PWPP.tsv'))

Functional difference rCDI recipients and donors

Gene-content based Indicators for Oxygen Usage and ABx treatment
Plots show increased abundance of these pathways in rCDI recipients, decreasing following FMT treatment.

Format

# data is in CPM
gf <-gene_family_annotated %>% separate(Gene_Family, into = c('Gene_Family_Name', 'Taxonomy'), sep = '[|]', fill = 'right') %>% select(-Taxonomy) %>% group_by(Gene_Family_Name) %>% summarize_all(.funs = funs(sum(., na.rm = T))) %>% group_by(Gene_Family_Name) %>% pivot_longer(names_to = 'Name', values_to = 'value', cols = ends_with(paste0('.pair'))) %>% left_join(., samples.metadata, by = 'Name') %>% separate(Gene_Family_Name, into = c('Gene_Family_ID', 'Gene_Family_Name'), sep = '[:] ', fill = 'right') %>% filter(grepl(Gene_Family_Name, pattern = '[[BP]]')) %>% filter(last_sample, fmt_success | Sample_Type != 'post')

Microbial oxygen usage

Genes: Aerobic Electron Transport Chain

stat_comparisons = list(c('P', 'D'), c('R', 'D'), c('R', 'P'))
pseudo = 1e-2plot.aerobic <-
gf %>% filter(Gene_Family_Name == '[BP] aerobic electron transport chain') %>% mutate(GFN = 'Aerobic Electron\nTransport Chain') %>% mutate(Sample_Type = str_to_upper(str_extract(Sample_Type, '.'))) %>% ggplot(aes(fct_relevel(Sample_Type, 'R','P','D'), value + pseudo, fill = Sample_Type)) + stat_boxplot(geom = 'errorbar') +geom_boxplot(show.legend = F) + geom_dotplot(aes(fill = NULL), binaxis = "y", stackdir = "center", binpositions="all", binwidth = 0.05) +scale_y_continuous(trans = pseudo_log_trans(pseudo, 10),expand = c(0,0,0.1,0), labels = label_number(accuracy = 1),breaks = c(1, 10, 100, 500)) +facet_wrap(~ GFN, scales = 'free_y', nrow = 2) + theme_bw() +scale_fill_manual(values = c(colors.discrete[1], colors.discrete[2], colors.discrete[3])) + labs(y = 'Copies per Million') +theme_cowplot() + theme(legend.title=element_blank(),legend.position = 'none', strip.background = element_blank(),axis.title.x = element_blank()) + stat_compare_means(method = 'wilcox', comparisons = stat_comparisons,exact = F, p.adjust = 'fdr', label = 'p.signif', symnum.args = list(cutpoints = c(-Inf, 0.001, 0.01, 0.05, Inf),symbols = c("***", "**", "*", "ns")))
plot.aerobic

ABx response genes

plot.abx <-gf %>% filter(Gene_Family_Name == '[BP] response to antibiotic') %>% mutate(GFN = 'Response to\nAntibiotics') %>% mutate(Sample_Type = str_to_upper(str_extract(Sample_Type, '.'))) %>% ggplot(aes(fct_relevel(Sample_Type, 'R','P','D'), value + pseudo, fill = Sample_Type)) + stat_boxplot(geom = 'errorbar') +geom_boxplot(show.legend = F) + geom_dotplot(aes(fill = NULL), binaxis = "y", stackdir = "center", binpositions="all", binwidth = 0.015) +scale_y_continuous(trans = pseudo_log_trans(pseudo, 10),expand = c(0,0,0.1,0), breaks = c(1000, 3000, 10000), labels = label_number(accuracy = 1)) +facet_wrap(~ GFN, scales = 'free_y', nrow = 2) + theme_bw() +scale_fill_manual(values = c(colors.discrete[1], colors.discrete[2], colors.discrete[3])) + labs(y = 'Copies per Million') +theme_cowplot() + theme(legend.title=element_blank(),legend.position = 'none', axis.title.x = element_blank(),strip.background = element_blank(),) + stat_compare_means(method = 'wilcox', comparisons = stat_comparisons,exact = F, p.adjust = 'fdr', label = 'p.signif',symnum.args = list(cutpoints = c(-Inf, 0.001, 0.01, 0.05, Inf),symbols = c("***", "**", "*", "ns")))
plot.abx

Exporting plot

output_name = 'FunctionAerobic'
ggsave(plot.aerobic + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 2.5, height = 3.5,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))output_name = 'FunctionABx'
ggsave(plot.abx + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 2.5, height = 3.5,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))

stat_comparisons = list(c('P', 'D'), c('R', 'D'), c('R', 'P'), c('D', 'D3'))plot.sulf <-gf %>% filter(Gene_Family_Name %in% c('[BP] hydrogen sulfide biosynthetic process','[BP] sulfur compound transport','[BP] sulfate assimilation, phosphoadenylyl sulfate reduction by phosphoadenylyl-sulfate reductase (thioredoxin)')) %>% mutate(Gene_Family_Name = case_when(Gene_Family_Name == '[BP] hydrogen sulfide biosynthetic process' ~ '[BP] hydrogen sulfide\nbiosynthetic process',Gene_Family_Name == '[BP] sulfate assimilation, phosphoadenylyl sulfate reduction by phosphoadenylyl-sulfate reductase (thioredoxin)' ~ '[BP] sulfate assimilation,\nphosphoadenylyl sulfate reduction\nby phosphoadenylyl-sulfate\n reductase (thioredoxin)',T ~ Gene_Family_Name)) %>% mutate(GFN = Gene_Family_Name) %>% mutate(Sample_Type = str_to_upper(str_extract(Sample_Type, '.'))) %>% mutate(Sample_Type = ifelse(Sample_Type != 'R' & Donor.Subject == "D0_ALM_Case_4;5;6;8;9;12;13;15;17;18;20;21", 'D3', Sample_Type)) %>% ggplot(aes(fct_relevel(Sample_Type, 'R','P','D'), value + pseudo, fill = Sample_Type)) + stat_boxplot(geom = 'errorbar') +geom_boxplot(show.legend = F) + geom_dotplot(aes(fill = NULL), binaxis = "y",stackdir = "center", binpositions="all", binwidth = 0.015) +scale_y_log10(expand = c(0,0,0.1,0), labels = label_number(accuracy = 1)) +facet_wrap(~ GFN, nrow = 1, scales = 'free_y') + theme_bw() +scale_fill_manual(values = c(colors.discrete[c(1,4,2,3)])) + labs(y = 'Copies per Million') +theme_cowplot() + theme(legend.title=element_blank(),legend.position = 'none', axis.title.x = element_blank(),strip.background = element_blank(),) + stat_compare_means(method = 'wilcox', comparisons = stat_comparisons,exact = F, p.adjust = 'fdr', label = 'p.signif',symnum.args = list(cutpoints = c(-Inf, 0.001, 0.01, 0.05, Inf),symbols = c("***", "**", "*", "ns")))
plot.sulf

Functional Composition

PCA analysis of functional composition of rCDI recipients, stool donors and post-FMT patients with resolved symptoms based on clr-transformed pathway relative abundances.
Plot shows rCDI recipients clustering separately from donors with less distinct clusters compared to taxonomic data.

Format

Annotate Humann2 with PWY Classes

h2.rcdi <-h2_unstratified %>% filter(!Name %in% failed_samples.Name) h2.rcdi.wide <-h2_unstratified %>% filter(!Name %in% failed_samples.Name) %>% filter(!is.na(pathway_description)) %>%reshape2::dcast(., pathway_id + pathway_description ~ Name,value.var = 'CPM', fun.aggregate = sum, na.rm = T)# transpose
h2.rcdi_success.wide <- h2_unstratified %>%filter(!Name %in% failed_samples.Name) %>% filter(!is.na(pathway_description)) %>%pivot_wider(id_cols = 'pathway_description', names_from = 'Name',values_from = 'CPM', values_fill = list(CPM = 0))# get data
h2.rcdi_success.data <- t(select(h2.rcdi_success.wide, matches('.pair'), pathway_description) %>% column_to_rownames(var = 'pathway_description'))

Diversity

Diversity metrics based on species-level relative abundances

Alpha: Shannon Index

Alpha Diversity

h2.rcdi_success.shannon <- data.frame(Shannon = diversity(h2.rcdi_success.data, index = 'shannon')) %>% rownames_to_column('Name')

CLR-Transform

# https://cran.r-project.org/web/packages/rgr/index.html
library(rgr)pseudo = 1e-3 # CPM
h2.rcdi_success.clr <- rgr::clr(h2.rcdi_success.data + pseudo,  ifclose = FALSE, ifwarn = TRUE)

Beta: PCA of functional composition

h2.rcdi_success.clr.pca_data <- PCA(h2.rcdi_success.clr, graph = FALSE, scale.unit = F)
h2.rcdi_success.clr.pca <- h2.rcdi_success.clr.pca_data %>% .$ind %>% .$coord %>% as.data.frame() %>% rownames_to_column(var = 'Name') ## annotate with metadata, diversity
h2.rcdi_success.data.clr.pca <- h2.rcdi_success.clr.pca %>% left_join(., samples.metadata, by = 'Name') %>% left_join(., h2.rcdi_success.shannon, by = 'Name')

h2.rcdi_success.data.clr.pca.last <-h2.rcdi_success.data.clr.pca %>% filter(last_sample)# n count
h2.rcdi_success.data.clr.pca.last %>% group_by(Sample_Type) %>% tally()rcdi.adonis <-pairwise.adonis(x = h2.rcdi_success.clr[h2.rcdi_success.data.clr.pca.last$Name, ],factors = h2.rcdi_success.data.clr.pca.last$Sample_Type,sim.method = 'euclidean',p.adjust.m = 'fdr',perm = 1e3) %>% rename(fdr = p.adjusted) %>% mutate(sig = cut(fdr, breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),labels = c("***", "**", "*", "")))
rcdi.adonis

PCA Plot shows rCDI recipients clustering separately from donors and latest post-FMT patients.

plot <-h2.rcdi_success.data.clr.pca %>% ggplot(aes(Dim.2, Dim.1, label = Unique_ID)) +geom_point(shape = 21, col = 'black', size = 8, aes(fill = Sample_Type)) + # Sample_Type scale_fill_manual(values = c(colors.discrete[c(1,2,3)]), guide = guide_legend(reverse = TRUE)) +theme_cowplot() + labs(subtitle = 'Functional Composition') + coord_fixed() +theme(plot.subtitle = element_text(hjust = 0.5),legend.title=element_blank(),aspect.ratio = 1) + labs(y = paste0('PC 1 (', round(h2.rcdi_success.clr.pca_data$eig[1,'percentage of variance'], 1), '%)'), x = paste0('PC 2 (', round(h2.rcdi_success.clr.pca_data$eig[2,'percentage of variance'], 1), '%)'))legend <- cowplot::get_legend(plot)
plot + theme(legend.position = 'none')
grid.newpage()
grid.draw(legend)

Exporting plots, tables

output_name = 'FunctionalDiversity.SuccessfulCases.CLR_PCA'ggsave(plot + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 5, height = 5,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))ggsave(legend, device = 'pdf', dpi = 300, width = 1.1, height = 1,filename = paste0(results_dir, fig, output_name, '.Legend.pdf'))write_tsv(rcdi.adonis, paste0(results_dir, fig, output_name, '.Adonis.tsv'))