文章SameStr(四):图4代码

“Publication Figure 4”

百度云盘链接: https://pan.baidu.com/s/15g7caZp354zIWktpnWzWhQ

提取码: 4sh7

Libraries

Standard Import

library(tidyverse)
library(cowplot)
library(scales)
library(ggpubr)

Special

library(caret)
library(plotROC)
library(tidymodels)
library(grid)
library(sjPlot)

Paths

bin_dir = '../bin/'
data_dir = '../data/'
results_dir = '../results/'

Custom Scripts / Theme

source(paste0(bin_dir, 'theme_settings.R'))
source (paste0(bin_dir, 'utilities.R'))
source(paste0(bin_dir, 'species_competition_functions.R'))
source(paste0(bin_dir, 'distmat_utils.R'))
source(paste0(bin_dir, 'analysis_metadata.R'))
date <- format(Sys.time(), "%d%m%y")

Import Tables

Metadata

samples.metadata <- read_tsv(paste0(data_dir, 'samples.metadata.tsv'))
microbe.taxonomy <- read_tsv(paste0(data_dir, 'microbe.taxonomy.tsv'))
microbe.metadata <- microbe.taxonomy %>% right_join(., read_tsv(paste0(data_dir, 'microbe.metadata.tsv'))) 

Taxonomy

MetaPhlAn2 tables
combine with taxonomy

mp_species.long <- microbe.taxonomy %>% right_join(., read_tsv(paste0(data_dir, 'samples.mp_profiles.species.tsv')), by = 'species') %>% left_join(., samples.metadata, by = 'Name')

rCDI subset
annotate with metadata

mp_species.rcdi.long <- mp_species.long %>%filter(Study %in% c(rcdi.studies)) 

SameStr Data

SameStr Case-Summary table

samples.sstr_data <- read_tsv(paste0(data_dir, 'samples.sstr_data_wmetadata.tsv')) %>% left_join(., microbe.taxonomy, by = 'species')

Case Summary

SameStr Case-Summary table

sstr_cases <- read_tsv(paste0(data_dir, 'samples.case_summary.tsv')) %>% left_join(., microbe.taxonomy, by = 'species')

Functions

precision <- function(matrix) {tp <- matrix[2, 2]fp <- matrix[1, 2]return (tp / (tp + fp))
}
recall <- function(matrix) {tp <- matrix[2, 2]fn <- matrix[2, 1]return (tp / (tp + fn))
}

Set Figure

fig = paste0('Fig_4.')

Identify Individuals

Identifying related sample pairs coming from the same individual (controls) or from related biological context (rCDI) based on the number and fraction of shared family, genera, species and strains.

Format

Generate MetaPhlAn co-occurrence table, taxa counts

cooccurrence <- mp_cooccurrence(mp_species.long)
taxa_counts <- cooccurrence$mp_taxa_counts
taxa_cooccurrence <- cooccurrence$mp_taxa_table.long

Merge with SameStr strain-level data

sstr <-samples.sstr_data %>% group_by(Study.pair, Name.row, Name.col, Days_Since_FMT.row, Days_Since_FMT.col,Case_Name.pair, Case_Name.row, Case_Name.col, Same_Case.label, Related.label, Related.bool, Pair_Type) %>%summarize(.groups = 'drop',n.analyzed_strains = sum(overlap > min_overlap, na.rm = T),n.shared_strains.mvs = sum(distance.mvs > min_similarity & overlap > min_overlap, na.rm = T),n.shared_strains.cvs = sum(distance.cvs > min_similarity & overlap > min_overlap, na.rm = T)) %>%# tag successful fmt casesmutate(fmt_success = !Case_Name.pair %in% cases_failed) %>% mutate(Study.row = str_split_fixed(Name.row, pattern = '_', 2)[,1], Study.col = str_split_fixed(Name.col, pattern = '_', 2)[,1]) %>% mutate(Related.text = ifelse(Related.bool, 'Related', 'Unrelated')) %>% rowwise() %>%mutate(Sample_Pair = paste0(sort(c(Name.row, Name.col)), collapse = ',')) %>% ungroup() %>% left_join(., taxa_cooccurrence, by = 'Sample_Pair') %>% left_join(., taxa_counts, by = c('Name.row' = 'Sample')) %>% left_join(., taxa_counts, by = c('Name.row' = 'Sample'), suffix = c('.row','.col')) %>% mutate(n.not_shared_species = (n.species.row - n.shared_species) + (n.species.col - n.shared_species), n.not_shared_genus = (n.genus.row - n.shared_genus) + (n.genus.col - n.shared_genus), n.not_shared_family = (n.family.row - n.shared_family) + (n.family.col - n.shared_family), n.total_species = n.shared_species + n.not_shared_species, n.total_genus = n.shared_genus + n.not_shared_genus, n.total_family = n.shared_family + n.not_shared_family, f.shared_strains.mvs = replace_na(n.shared_strains.mvs / n.analyzed_strains, 0), f.shared_strains.cvs = replace_na(n.shared_strains.cvs / n.analyzed_strains, 0), f.shared_species = n.shared_species / n.total_species,f.shared_genus = n.shared_genus / n.total_genus, f.shared_family = n.shared_family / n.total_family)

Subset rCDI and Controls

# filter only healthy controls
sstr.control <-sstr %>% filter(Study.row %in% control.studies &Study.col %in% control.studies)sstr.rcdi <- sstr %>% filter(Study.row %in% rcdi.studies &Study.col %in% rcdi.studies)sstr.louiss <-sstr %>% filter(Study.row == 'LOUISS',Study.col == 'LOUISS')

Logistic Regression Model

Train on Control

Model on train data of healthy controls

set.seed(50)
# 60% train / 40% test
trainIndex <- caret::createDataPartition(sstr.control$Related.bool, p = .6, list = FALSE, times = 1)[,1]
train <- sstr.control[ trainIndex,]
test <- sstr.control[-trainIndex,]fit.norm.strain.all <-glm(Related.bool ~ n.total_species + f.shared_strains.mvs + n.shared_strains.mvs +f.shared_species + n.shared_species +f.shared_genus + n.shared_genus +f.shared_family + n.shared_family, data = train, family = binomial(link = 'logit'))# normal models
fit.norm.strain <-glm(Related.bool ~ f.shared_strains.mvs + n.shared_strains.mvs + n.analyzed_strains, data = train, family = binomial(link = 'logit'))
fit.norm.species <-glm(Related.bool ~ f.shared_species + n.shared_species + n.total_species, data = train, family = binomial(link = 'logit'))
fit.norm.genus <-glm(Related.bool ~ f.shared_genus + n.shared_genus + n.total_genus, data = train, family = binomial(link = 'logit'))
fit.norm.family <-glm(Related.bool ~ f.shared_family + n.shared_family + n.total_family, data = train, family = binomial(link = 'logit'))

Test on Control

prediction on test data of healthy controls

test$pred.norm.strain <- predict(fit.norm.strain, test, type = 'response')
test$pred.norm.strain.all <- predict(fit.norm.strain.all, test, type = 'response')
test$pred.norm.species <- predict(fit.norm.species, test, type = 'response')
test$pred.norm.genus <- predict(fit.norm.genus, test, type = 'response')
test$pred.norm.family <- predict(fit.norm.family, test, type = 'response')test.long <-test %>% pivot_longer(names_to = 'model', values_to = 'M', cols = c('pred.norm.strain',# pred.norm.strain.all,'pred.norm.species','pred.norm.genus','pred.norm.family'))
Performance

PR with yardstick from tidymodels

options(yardstick.event_first = FALSE)c.d1 <-test.long %>% mutate(model = gsub(model, pattern = 'pred.norm.', replacement = ''),model = fct_relevel(model, 'family', 'genus', 'species', 'strain'))c.d2 <-c.d1 %>% dplyr::group_by(model) %>% dplyr::mutate(Related.bool = factor(Related.bool)) %>%yardstick::pr_curve(data = ., truth = Related.bool, M)c.d3 <-c.d1 %>% dplyr::group_by(model) %>% dplyr::mutate(Related.bool = factor(Related.bool)) %>%  pr_auc(data = ., truth = Related.bool, M, na_rm = T)

ROC with plotroc

x1 = .95
x2 = .77
yy = .17
p.expand <- rep(0.01, 2)# auROC
control.auROC <-c.d1 %>% ggplot(aes(m = M, d = as.numeric(Related.bool), color = model)) + geom_roc(n.cuts = 0, labels = F) +theme_cowplot() +coord_fixed() + scale_color_manual(values = colors.discrete) + scale_x_continuous(labels = percent_format(), limits = c(0, 1), expand = p.expand) + scale_y_continuous(labels = percent_format(), limits = c(0, 1), expand = p.expand, sec.axis = dup_axis()) + geom_abline(slope = 1, linetype = 'dashed') + labs(x = 'False Positive', y = 'True Positive') +theme(legend.position = 'none',axis.line.y.right = element_blank(),axis.ticks.y.right = element_blank(),axis.text.y.right = element_blank(),axis.title.y.left = element_blank()# axis.text.x = element_blank(), # axis.ticks.x = element_blank(),# axis.line.x = element_blank())control.auROC.values <- calc_auc(control.auROC)$AUC
control.auPR.values <- c.d3$.estimatecontrol.auROC.table <-control.auROC + theme(legend.justification = c(0, 1), legend.position = c(.35, .38), legend.title = element_blank()) +annotate(geom = 'text', label = 'bold("auPR")', x = x1, y = .375, parse = T) +annotate(geom = 'text', label = 'bold("auROC")', x = x2, y = .375, parse = T) +annotate(geom = 'text', label = paste0(round(control.auROC.values, 3), collapse = '\n'),x = x2, y = yy, parse = F) +annotate(geom = 'text', label = paste0(round(control.auPR.values, 3), collapse = '\n'),x = x1, y = yy, parse = F)# auPR (precision-recall plot)
control.auPR <-c.d2 %>% ggplot(aes(y = precision, x = recall, col = model)) + geom_line(size = 1) + coord_fixed() + theme_cowplot() +scale_color_manual(values = colors.discrete) + scale_x_continuous(labels = percent_format(), limits = c(0, 1), expand = p.expand) + scale_y_continuous(labels = percent_format(), limits = c(0, 1), expand = p.expand, sec.axis = dup_axis()) + labs(x = 'Recall', y = 'Precision') + geom_abline(intercept = 1, slope = -1, linetype = 'dashed') + theme(legend.position = 'none',axis.line.y.right = element_blank(),axis.ticks.y.right = element_blank(),axis.text.y.right = element_blank(),axis.title.y.left = element_blank())control.auROC
control.auPR

在这里插入图片描述

Confusion Matrix

Confusion Matrix using a conservative threshold, showing that the simple logistic regression classifier achieves ~90% accuracy for within-cases comparisons, almost 100% accuracy for distinct subjects (total FP = 2), and has most difficulty with assigning the the correct label to related pre/post-FMT samples and distinct individuals that were treated with stool from the same donor.

# inspect false calls
sstr.control.confusion <-test %>% mutate(Predicted = pred.norm.strain > 8.713023e-08,TP = Predicted & Related.bool,FP = Predicted & !Related.bool,FN = !Predicted & Related.bool,TN = !Predicted & !Related.bool) %>% group_by(Pair_Type, Related.label, fmt_success) %>% summarize(TP = sum(TP, na.rm = T),FP = sum(FP, na.rm = T),FN = sum(FN, na.rm = T),TN = sum(TN, na.rm = T),.groups = 'drop') %>% mutate(Acc = (TP + TN) / (TP + TN + FP + FN)) sstr.control.confusion

Test on rCDI

Predict using control-trained model on FMT sample pairs, including donor/post-FMT, recipient/post-FMT.

# prediction
sstr.rcdi$pred.norm.strain <- predict(fit.norm.strain, sstr.rcdi, type = 'response')
sstr.rcdi$pred.norm.strain.all <- predict(fit.norm.strain.all, sstr.rcdi, type = 'response')
sstr.rcdi$pred.norm.species <- predict(fit.norm.species, sstr.rcdi, type = 'response')
sstr.rcdi$pred.norm.genus <- predict(fit.norm.genus, sstr.rcdi, type = 'response')
sstr.rcdi$pred.norm.family <- predict(fit.norm.family, sstr.rcdi, type = 'response')sstr.rcdi.long <-sstr.rcdi %>% pivot_longer(names_to = 'model', values_to = 'M', cols = c('pred.norm.strain',# pred.norm.strain.all,'pred.norm.species','pred.norm.genus','pred.norm.family'))
Performance

PR curve

options(yardstick.event_first = FALSE)r.d1 <-sstr.rcdi.long %>% mutate(model = gsub(model, pattern = 'pred.norm.', replacement = ''),model = fct_relevel(model, 'family', 'genus', 'species', 'strain'))r.d2 <-r.d1 %>% dplyr::ungroup() %>% dplyr::group_by(model) %>%dplyr::mutate(Related.bool = factor(Related.bool)) %>%pr_curve(data = ., truth = Related.bool, M)r.d3 <-r.d1 %>% dplyr::group_by(model) %>% dplyr::mutate(Related.bool = factor(Related.bool)) %>%  pr_auc(data = ., truth = Related.bool, M, na_rm = T)

ROC

x1 = .95
x2 = .77
yy = .17
p.expand <- rep(0.01, 2)# auROC
rcdi.auROC <-r.d1 %>% ggplot(aes(m = M, d = as.numeric(Related.bool), color = model)) + geom_roc(n.cuts = 0, labels = F) +theme_cowplot() +coord_fixed() + scale_color_manual(values = colors.discrete) + scale_x_continuous(labels = percent_format(), limits = c(0, 1), expand = p.expand) +scale_y_continuous(labels = percent_format(), limits = c(0, 1), expand = p.expand, sec.axis = dup_axis()) +geom_abline(slope = 1, linetype = 'dashed') + labs(x = 'False Positive', y = 'True Positive') + theme(aspect.ratio = 1,legend.position = 'none',axis.line.y.right = element_blank(),axis.ticks.y.right = element_blank(),axis.text.y.right = element_blank(),axis.title.y.left = element_blank())rcdi.auROC.values <- calc_auc(rcdi.auROC)$AUC
rcdi.auPR.values <- r.d3$.estimate# auPR (precision-recall plot)
rcdi.auPR <-r.d2 %>% ggplot(aes(y = precision, x = recall, col = model)) + geom_line(size = 1) + coord_fixed() + theme_cowplot() +scale_color_manual(values = colors.discrete) + scale_x_continuous(labels = percent_format(), limits = c(0, 1), expand = p.expand) +scale_y_continuous(labels = percent_format(), limits = c(0, 1), expand = p.expand, sec.axis = dup_axis()) +labs(x = 'Recall', y = 'Precision') + geom_abline(intercept = 1, slope = -1, linetype = 'dashed') + theme(aspect.ratio = 1,legend.position = 'none',axis.line.y.right = element_blank(),axis.ticks.y.right = element_blank(),axis.text.y.right = element_blank(),axis.title.y.left = element_blank())rcdi.auROC
rcdi.auPR

在这里插入图片描述

Confusion Matrix

Confusion Matrix using a conservative threshold, showing that the simple logistic regression classifier achieves ~90% accuracy for within-cases comparisons, almost 100% accuracy for distinct subjects (total FP = 2), and has most difficulty with assigning the the correct label to related pre/post-FMT samples and distinct individuals that were treated with stool from the same donor.

# inspect false calls
sstr.rcdi.confusion <-sstr.rcdi %>% mutate(Predicted = pred.norm.strain > 0.001774427,TP = Predicted & Related.bool,FP = Predicted & !Related.bool,FN = !Predicted & Related.bool,TN = !Predicted & !Related.bool) %>% group_by(Pair_Type, Related.label, fmt_success) %>% summarize(TP = sum(TP, na.rm = T),FP = sum(FP, na.rm = T),FN = sum(FN, na.rm = T),TN = sum(TN, na.rm = T),.groups = 'drop') %>% mutate(Acc = (TP + TN) / (TP + TN + FP + FN)) sstr.rcdi.confusion

Write to File

sstr.rcdi.confusion %>%  write_tsv(., paste0(results_dir, fig, 'ConfusionMatrix.rCDI.tsv'))
Scatter Plot
r.d1 %>% ggplot(aes(n.shared_strains.mvs, n.shared_species)) + geom_point(aes(fill = Related.label), size = 2, shape = 21) + scale_fill_manual(values = colors.discrete) + theme_cowplot() + theme(legend.title = element_blank()) +labs(x = 'Shared Strains [n]',y = 'Shared Species [n]') r.d1 %>% ggplot(aes(f.shared_strains.mvs, f.shared_species)) + geom_point(aes(fill = Related.label), size = 2, shape = 21) + geom_abline(slope = 1, intercept = 0, linetype = 'dashed') + scale_x_continuous(labels = percent_format(), limits = c(0, 1)) +scale_y_continuous(labels = percent_format(), limits = c(0, 1)) +theme_cowplot() + theme(aspect.ratio = 1, legend.title = element_blank()) + scale_fill_manual(values = colors.discrete) + labs(x = 'Shared Strains [%]',y = 'Shared Species [%]')

在这里插入图片描述

Model Summary

Exporting Table

model_comparison <- tibble(model = c('family', 'genus', 'species', 'strain'),control.auROC = control.auROC.values, control.auPR = control.auPR.values, rcdi.auROC = rcdi.auROC.values, rcdi.auPR = rcdi.auPR.values, )output_name = 'RelatedSubject.ModelPerformance'
sjPlot::tab_df(model_comparison, digits = 4,alternate.rows = F,file = paste0(results_dir, fig, output_name, '.doc'), title = 'Logistic Regression Model Performance')

Exporting Plots

ww = 3.25
hh = 3
# legend
legend = cowplot::get_legend(rcdi.auROC + theme(legend.position = 'left',legend.title = element_blank()))
output_name = 'RelatedSubject'
ggsave(legend, device = 'pdf', filename = paste0(results_dir, fig, output_name, '.Legend.pdf'), dpi = 300, width = 1, height = .9)# control
output_name = 'RelatedSubject.Control'
ggsave(control.auROC, device = 'pdf', filename = paste0(results_dir, fig, output_name, '.auROC.pdf'), dpi = 300, width = ww, height = hh)ggsave(control.auPR, device = 'pdf', filename = paste0(results_dir, fig, output_name, '.auPR.pdf'), dpi = 300, width = ww, height = hh)# rcdi
output_name = 'RelatedSubject.rCDI'
ggsave(rcdi.auROC, device = 'pdf', filename = paste0(results_dir, fig, output_name, '.auROC.pdf'), dpi = 300, width = ww, height = hh)ggsave(rcdi.auPR, device = 'pdf', filename = paste0(results_dir, fig, output_name, '.auPR.pdf'), dpi = 300, width = ww, height = hh)

Strain-level Source (Control)

Strain-level plot of per-Case second time point rel. abund. by source

Format

sstr_cases.control.long <- sstr_cases %>%filter(Study_Type == 'Control') %>% tag_species_competition(.) %>% mutate(n = 1) %>% mutate(source = ifelse(grepl(species, pattern = 'unclassified'),  'Unclassified', source)) %>% mutate(source = case_when(analysis_level == 'species' & source == 'self' ~ 'Initial Sample Sp.',analysis_level == 'species' & source == 'unique' ~ 'New Sp.',T ~ source)) %>% group_by(Study_Type, Case_Name, source, Days_Since_FMT.post, fmt_success.label) %>% summarize(n = sum(n, na.rm = T),rel_abund = sum(rel_abund.post, na.rm = T) / 100,.groups = 'drop') %>% group_by(Case_Name, Days_Since_FMT.post) %>% mutate(f = n / sum(n, na.rm = T)) %>%ungroup() %>% dplyr::select(-n) %>% group_by(Case_Name) %>% mutate(source = case_when(source == 'Unclassified' ~ 'Unclassified Sp.',source == 'same_species' ~ 'Same Sp.', source == 'unique' ~ 'New',source == 'self' ~ 'Initial Sample',T ~ source)) %>% pivot_wider(names_from = 'source', values_from = c('rel_abund','f'), names_sep = '___') %>% mutate_at(.vars = vars(contains('___')),.funs = funs(replace_na(., 0))) %>%pivot_longer(cols = contains("___"), names_to = c("metric", "source"),names_sep = '___', values_drop_na = F)

Statistics
Retained (strain-level) from previous time point: 73.1 ± 18.3%

sstr_cases.control.long  %>% filter(metric == 'rel_abund') %>% filter(Days_Since_FMT.post <= 373) %>% # ~ same time range as last rcdigroup_by(Study_Type, Case_Name) %>% filter(dense_rank(Days_Since_FMT.post) == max(dense_rank(Days_Since_FMT.post))) %>% ungroup() %>% group_by(Study_Type, source) %>% summarize(rel_abund.mean = mean(value, na.rm = T), rel_abund.sd = sd(value, na.rm = T),.groups = 'drop') sstr_cases.control.long  %>% filter(metric == 'f') %>% filter(Days_Since_FMT.post <= 373) %>% # ~ same time range as last rcdigroup_by(Study_Type, Case_Name) %>% filter(dense_rank(Days_Since_FMT.post) == max(dense_rank(Days_Since_FMT.post))) %>% ungroup() %>% group_by(Study_Type, source) %>% summarize(fraction.mean = mean(value, na.rm = T), fraction.sd = sd(value, na.rm = T),.groups = 'drop')

Bar Chart, per Case

Control - Bar charts showing second time point rel. abund. of sets of co-occurring strains in first and second time points of controls.

strain_order <- c('Initial Sample', 'Initial Sample Sp.', 'New', 'New Sp.', 'Same Sp.')
strain_colors <- c(colors.discrete[c(8,3, 7,2, 10,5)])
use_vars = list(strain_order, strain_colors)plot.control <-sstr_cases.control.long  %>% filter(metric == 'rel_abund') %>% filter(Days_Since_FMT.post <= 373) %>% # ~ same time range as last rcdiungroup() %>% mutate(ordering = as.numeric(as.factor(Case_Name)) + (Days_Since_FMT.post/1000)) %>% ggplot(aes(fct_reorder(paste0('D', Days_Since_FMT.post, ' | ', str_remove(Case_Name, pattern = 'Case_')), ordering),y = value,fill = fct_relevel(source, use_vars[[1]]))) + geom_bar(stat = 'identity', position = position_fill(), width = 1, col = 'black') +theme_cowplot() + scale_y_continuous(labels = percent_format(),breaks = c(1, .75, .5, .25, 0),expand = c(0,0)) +scale_fill_manual(values = strain_colors, guide = guide_legend(reverse = TRUE)) + labs(y = 'Rel. Abund.') + theme(axis.title.x = element_blank(),axis.line.y = element_blank(),axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 7.5),legend.title=element_blank(),legend.position = 'top') 
plot.control

Exporting plots

output_name = 'PostSource.Cases.Control.Strain'
ggsave(plot.control + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 25, height = 3.5,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))

Area Chart, Across Cases

Control - Area chart showing second time point rel. abund. of sets of co-occurring strains in first and second time points of controls based on glm 2nd binomial model across control cases


strain_order <- c('Initial Sample', 'Initial Sample Sp.', 'New', 'New Sp.', 'Same Sp.')
strain_colors <- c(colors.discrete[c(8,3, 7,2, 10,5)])
use_vars = list(strain_order, strain_colors)pseudo = 1e-6
plot <-sstr_cases.control.long  %>% filter(Days_Since_FMT.post <= 373) %>% # ~ same time range as last rcdiggplot(aes(x = Days_Since_FMT.post, y = value, fill = fct_relevel(source, use_vars[[1]]))) +  stat_smooth(geom = 'area',method = 'glm', colour = 'black', size = 0.25,position = 'fill',method.args=list(family='binomial')) +facet_grid(scales = 'free_x', space = 'free_x',cols = vars(''),rows = vars(ifelse(grepl(metric, pattern = '^f'), 'Spp. Fraction', 'Rel. Abund.'))) +theme_cowplot() + scale_y_continuous(labels = percent_format(),breaks = c(1, .75, .5, .25, 0),expand = c(0,0), ) +scale_x_continuous(trans = pseudo_log_trans(0.5, 10), breaks = c(pseudo, 7, 14, 35, 84, 365),expand = c(0,0)) + scale_fill_manual(values = use_vars[[2]], guide = guide_legend(reverse = TRUE)) + # labs(x = 'Days Since\nInitial Sampling') + labs(x = 'Days between\nSample Collection') + theme(axis.title.y = element_blank(), plot.margin = unit(c(0, 0, 0, 0), "cm"), strip.background = element_blank(), strip.text = element_text(size = 14), panel.spacing.y = unit(.5, "cm"),panel.spacing.x = unit(.35, "cm"),legend.title=element_blank(),legend.margin=ggplot2::margin(l = 0.1, unit='cm'))plot + theme(legend.position = 'none')
legend <- cowplot::get_legend(plot)grid.newpage()
grid.draw(legend)

Histogram of available samples for modelling

plot.histogram <-sstr_cases.control.long %>% filter(metric == 'f') %>% group_by(Study_Type, Case_Name, Days_Since_FMT.post) %>% summarize(value = sum(value, na.rm = T)) %>% ggplot(aes(Days_Since_FMT.post)) + geom_histogram(fill = 'black', bins = 10) +facet_grid(scales = 'free_x', space = 'free_x', cols = vars(fct_relevel(Study_Type, 'rCDI', 'Control')),rows = vars('n')) +scale_x_continuous(trans = pseudo_log_trans(0.5, 10), breaks = c(pseudo, 7, 14, 30, 90, 365),expand = c(0,0)) + scale_y_continuous(breaks = c(0, 12, 25), expand = c(0,0),) +theme_cowplot() + theme(strip.background = element_blank(), strip.text.x = element_blank(), axis.title = element_blank(), axis.line.x = element_line(size = 1),axis.line.y = element_blank(), axis.text.x = element_blank(),axis.ticks.x = element_blank(),legend.title=element_blank())plot.histogram
legend = cowplot::get_legend(plot)
grid.newpage()
grid.draw(legend)

Exporting plot

output_name = 'PostSource.AcrossCases.Control.Strain'ggsave(plot + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 3, height = 5,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))
ggsave(plot.histogram + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 3, height = .6,filename = paste0(results_dir, fig, output_name, '.Histogram','.Plot.pdf'))
ggsave(legend, device = 'pdf', dpi = 300, width = 5, height = 2,filename = paste0(results_dir, fig, output_name, '.Legend.pdf'))

Strain-level Resolution

Format

resolution <- sstr_cases %>%filter(Study_Type == 'Control') %>% tag_species_competition(.) %>% mutate(n = 1) %>% mutate(source = ifelse(grepl(species, pattern = 'unclassified'),  'Unclassified', source)) %>% mutate(source = case_when(analysis_level == 'species' & source == 'self' ~ 'Initial Sample Sp.',analysis_level == 'species' & source == 'unique' ~ 'New Sp.',T ~ source)) %>% group_by(Study_Type, Case_Name, source, Days_Since_FMT.post, fmt_success.label) %>% summarize(n = sum(n, na.rm = T),rel_abund = sum(rel_abund.post, na.rm = T) / 100,.groups = 'drop') %>% group_by(Case_Name, Days_Since_FMT.post) %>% mutate(f = n / sum(n, na.rm = T)) %>%ungroup() %>% dplyr::select(-n) %>% group_by(Case_Name) %>% mutate(source = case_when(source == 'Unclassified' ~ 'Unclassified Sp.',source == 'same_species' ~ 'Same Sp.', source == 'unique' ~ 'New',source == 'self' ~ 'Initial Sample',T ~ source)) %>% pivot_wider(names_from = 'source', values_from = c('rel_abund','f'), names_sep = '___') %>% mutate_at(.vars = vars(contains('___')),.funs = funs(replace_na(., 0))) %>%pivot_longer(cols = contains("___"), names_to = c("metric", "source"),names_sep = '___', values_drop_na = F) %>% mutate(resolution = case_when(grepl(source, pattern = 'Sp.') ~ 'Species',T ~ 'Strain')) %>% mutate(metric = case_when(metric == 'rel_abund' ~ 'Rel.\nAbund.', metric == 'f' ~ 'Spp.\nFraction', T ~ metric)) %>% group_by(Case_Name, Days_Since_FMT.post, metric, resolution) %>% summarize(value = sum(value, na.rm = T), .groups = 'drop')

Relative abundance and fraction of species, on average, for which we obtained strain-level resolution

resolution %>% filter(resolution == 'Strain') %>% group_by(metric) %>% summarize(value.mean = mean(value, na.rm = T), value.sd = sd(value, na.rm = T), .groups = 'drop') 

Boxplots

plot.resolution <- resolution %>% filter(resolution == 'Strain') %>% ggplot(aes(metric, value)) + stat_boxplot(geom = 'errorbar') +geom_boxplot(show.legend = F) + geom_dotplot(aes(fill = NULL), binaxis = "y", stackdir = "center", binpositions="all", binwidth = 0.015) +scale_y_continuous(labels = percent_format(),breaks = seq(0, 1, .25),expand = c(0,0),limits = c(0, 1)) + theme_cowplot() +theme(legend.position = 'none', # c(.6, .65),legend.title = element_blank(),plot.subtitle = element_text(hjust = 0.5),axis.title = element_blank(),) + labs(subtitle = 'Strain-level Resolution')
plot.resolution

Exporting plot

output_name = 'PostSource.Control.Strain'ggsave(plot.resolution, device = 'pdf', dpi = 300, width = 2.25, height = 3.75,filename = paste0(results_dir, 'Fig_3.', output_name, '.ResolutionPlot.pdf'))

Shared Strains (rCDI)

Family Related

plot <-sstr.rcdi %>% filter(Pair_Type == 'Pre-FMT/Donor') %>%mutate(tag.facet = case_when(Related.label == 'Same Subject' ~ 'Same Individual', Related.label == 'Same Donor' & Pair_Type == 'Donor/Donor' ~ 'Same Individual',Related.label == 'Same Case' & Pair_Type != 'Donor/Post-FMT' ~ 'Same Individual',Related.label == 'Same Donor' & Pair_Type == 'Post-FMT/Post-FMT' ~ 'Treatment-related',Related.label %in% c('Same Donor', 'Same Case') & Pair_Type == 'Donor/Post-FMT' ~ 'Treatment-related',T ~ Related.label),tag.x = case_when(Related.label == 'Distinct Subject' ~ 'Unrelated\nSample Pair', T ~ gsub(Pair_Type, pattern = '/', replacement= '\n'))) %>% mutate(tag.x = case_when(Pair_Type == 'Pre-FMT/Donor' & Study.pair == 'FRICKE' ~'Family-related\nPre-FMT/Donor',T ~ tag.x),tag.x = case_when(Related.label == 'Same Donor' ~ paste0(gsub(tag.x, pattern = '\n', replacement ='*\n'), '*'),T ~ tag.x),tag.x = fct_relevel(as.factor(gsub(tag.x, pattern = '-FMT', replacement = '')), 'Pre\nPost', 'Donor\nPost')) %>% ggplot(aes(tag.x, n.shared_strains.mvs, fill = tag.facet)) + stat_boxplot(geom = 'errorbar') +geom_boxplot() + stat_compare_means(method = 'wilcox', label.x.npc = 0.5,exact = F, label = 'p.signif') +facet_grid(cols = vars(tag.facet),  scales = 'free', space = 'free_x') + scale_fill_manual(values = c('black', 'grey75','white')) +theme_cowplot() + guides(color = guide_legend(ncol = 1), linetype = guide_legend(ncol = 1)) + theme(legend.title=element_blank(),axis.title.x = element_blank(),panel.margin.y = unit(.5, "lines"),panel.background = element_blank(),strip.background = element_blank(), strip.text = element_blank(),axis.text = element_text(size = 9),axis.title = element_text(size = 11), axis.text.x = element_text(hjust = 0.5),text = element_text(colour = 'black'),) + labs(y = 'Strains Shared between Samples')plot + theme(legend.position = 'none')legend = cowplot::get_legend(plot)
grid.newpage()
grid.draw(legend)

FMT Related

plot <-sstr.rcdi %>% mutate(tag.facet = case_when(Related.label == 'Same Subject' ~ 'Same Individual', Related.label == 'Same Donor' & Pair_Type == 'Donor/Donor' ~ 'Same Individual',Related.label == 'Same Case' & Pair_Type != 'Donor/Post-FMT' ~ 'Same Individual',Related.label == 'Same Donor' & Pair_Type == 'Post-FMT/Post-FMT' ~ 'Same Donor',Related.label %in% c('Same Donor', 'Same Case') & Pair_Type == 'Donor/Post-FMT' ~ 'Same Donor',Related.label == 'Distinct Subject' ~ 'Different Individuals',T ~ 'Unknown'),tag.x = gsub(Pair_Type, pattern = '/', replacement = '\n'),tag.x = gsub(tag.x, pattern = '-FMT', replacement = ''),tag.x = ifelse(tag.x == 'Pre\nDonor', 'Donor\nPre', tag.x),tag.x = fct_relevel(tag.x,  'Pre\nPre', 'Pre\nPost', 'Post\nPost', 'Donor\nDonor', 'Donor\nPre', 'Donor\nPost')) %>% mutate(tag.facet = fct_relevel(tag.facet, 'Same Individual', 'Same Donor')) %>% mutate(tag.f = ifelse(grepl(tag.x, pattern = 'Donor'),  'Donor-Related', 'Patient-Related'),tag.f = fct_relevel(tag.f, 'Patient-Related')) %>% filter(!(tag.facet == 'Different Individuals' & tag.x %in% c('Pre\nPost', 'Donor\nPre', 'Donor\nPost'))) %>% ggplot(aes(tag.x, n.shared_strains.mvs, fill = tag.facet)) + stat_boxplot(geom = 'errorbar') +geom_boxplot() + facet_grid(cols = vars(tag.facet),scales = 'free', space = 'free_x') +scale_fill_manual(values = c('white', 'grey75','black')) +theme_cowplot() + guides(color = guide_legend(ncol = 1), linetype = guide_legend(ncol = 1)) + theme(legend.title=element_blank(),axis.title.x = element_blank(),panel.margin.y = unit(.5, "lines"),panel.background = element_blank(),strip.background = element_blank(), strip.text = element_blank(),axis.text = element_text(size = 9),axis.title = element_text(size = 11), axis.text.x = element_text(hjust = 0.5),text = element_text(colour = 'black'),) + labs(y = 'Strains Shared between Samples')plot + theme(legend.position = 'none')legend = cowplot::get_legend(plot)
grid.newpage()
grid.draw(legend)

Exporting plot

output_name = 'SharedStrains.rCDI'ggsave(plot + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 4.75, height = 3.05,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))
ggsave(legend, device = 'pdf', dpi = 300, width = 2, height = 1,filename = paste0(results_dir, fig, output_name, '.Legend.pdf'))

Statistics

output_name = 'SharedStrains.rCDI'
sstr.rcdi %>% mutate(fmt_success.label = ifelse(fmt_success, 'Resolved', 'Failed')) %>% filter(Same_Case.label == 'Same Case') %>% dplyr::select(Study.pair, Case_Name.pair, Pair_Type, fmt_success.label, starts_with('n.shared'), starts_with('Days_Since_FMT')) %>% filter(Pair_Type %in% c('Pre-FMT/Post-FMT', 'Donor/Post-FMT')) %>% rowwise() %>% mutate(Days_Since_FMT = max(c(Days_Since_FMT.row, Days_Since_FMT.col))) %>% ungroup() %>% group_by(Case_Name.pair) %>% filter(dense_rank(Days_Since_FMT) == max(dense_rank(Days_Since_FMT))) %>% ungroup() %>% dplyr::select(-Days_Since_FMT.row, -Days_Since_FMT.col, -n.shared_strains.cvs, -n.shared_genus, -n.shared_family) %>% pivot_wider(names_from = 'Pair_Type', values_from = starts_with('n.shared'), id_cols = c('Days_Since_FMT', 'Study.pair', 'Case_Name.pair', 'fmt_success.label')) %>% dplyr::select(Study.pair, Case_Name.pair, fmt_success.label, Days_Since_FMT, starts_with('n.shared_strains'), starts_with('n.shared_species')) %>% write_tsv(., paste0(results_dir, fig, output_name, '.Counts.tsv'))

Labelling Control

Format

library(tidygraph)
library(ggraph)
sstr_cooccurrence <- read_tsv(file = paste0(data_dir, 'samples.sstr_cooccurrence.tsv'))

Set Figure

fig = paste0('Suppl.Fig_6.')
get_matrix <- function(data, selection, min_strain=3) {sstr_select <-data %>%filter(row %in% selection,col %in% selection)sstr_matrix <-bind_rows(sstr_select, sstr_select %>% rename(row = col, col = row)) %>% pivot_wider(names_from = col, values_from = n.shared_strain,values_fill = list(n.shared_strain = 0)) %>% column_to_rownames('row')idx <- intersect(rownames(sstr_matrix), colnames(sstr_matrix))sstr_matrix <- sstr_matrix[idx, idx]sstr_matrix[as.matrix(sstr_matrix) <= min_strain] <- 0return(sstr_matrix)
}get_graph <- function(sstr_matrix) {sstr_graph <- as_tbl_graph(as.matrix(sstr_matrix), directed = F) %>% activate(nodes) %>%left_join(., samples.metadata, by = c('name' = 'Name'))return(sstr_graph)
}generate_graph <- function(data, selection, min_strain=3) {return(get_graph(get_matrix(data, selection, min_strain=3)))
}

LOUISS

LOUISS Case 64 / 66 last time point mislabelling

selection <- samples.metadata %>% filter(Case_Name %in% c('LOUISS_Case_64', 'LOUISS_Case_66')) %>% pull(Name)misl1 <- get_matrix(sstr_cooccurrence, selection, 0) %>% rownames_to_column('row') %>% pivot_longer(names_to = 'col', values_to = 'shared',cols = ends_with('.pair')) %>% filter(row != col) %>% separate(row, into = c('Study.row','Case_Name.row','Time_point.row'), sep = '_', remove = F) %>% separate(col, into = c('Study.col','Case_Name.col','Time_point.col'), sep = '_', remove = F) %>% filter(!col %in% c('LOUISS_AS64_24.pair','LOUISS_AS66_24.pair')) %>% mutate(Same = Case_Name.col == Case_Name.row,Name = str_remove(row, pattern = '.pair')) %>% dplyr::select(Name, shared, Same) %>% group_by(Name, Same) %>% summarize_all(.funs = funs(mean, sd)) %>% mutate(Same = ifelse(Same, 'Original Case', 'Other Case')) %>% pivot_wider(names_from = 'Same', names_sep = '.',values_from = c('mean','sd')) %>% dplyr::select(Name, contains('Original'), everything()) %>% separate(Name, into = c('Study','Case Name','Time point')) %>% arrange(`Case Name`, as.numeric(`Time point`)) %>% mutate_at(.vars = vars(starts_with('mean'), starts_with('sd')),.funs = funs(round(., 2)))

Denoising up to 3 shared strains, showing all connections above 3 strains

selection <- samples.metadata %>% filter(grepl(Study, pattern = 'LOUISS')) %>% pull(Name)sstr_graph <- generate_graph(sstr_cooccurrence, selection, 3)yy = -.6
c.col = get_palette(c('black','white'), 15)plot <- ggraph(sstr_graph, layout = 'stress') +geom_edge_link(aes(filter = weight >= 3, alpha = weight)) +geom_node_point(shape = 21, col = 'black',size = 5, aes(fill = str_remove(Case_Name, pattern = 'LOUISS_Case_')),) + geom_node_label(aes(label = Sample, filter = name %in% c('LOUISS_AS66_24.pair', 'LOUISS_AS64_24.pair')), repel = T, hjust = .25, vjust = -3) +scale_y_continuous(expand = c(0.6, 0.6)) +scale_x_reverse(expand = c(0.1, 0.1)) +theme_graph() +theme(legend.position = 'none', strip.background = element_blank(),panel.spacing.y = unit(0, 'cm'),strip.placement = 'inside',strip.text = element_blank()) + guides(edge_alpha = F, fill = guide_legend(override.aes=list(shape=21), title = '', nrow = 2)) + scale_edge_alpha_continuous(breaks = c(5, 10, 25, 50)) +scale_fill_manual(values = c(# 43c.col[1],# 44c.col[5],# 45c.col[2],# 50c.col[6],# 51c.col[7],# 53c.col[8],# 56c.col[9],# 58c.col[10],# 60c.col[11],# 61c.col[3],# 62c.col[12],# 63c.col[13],# 64colors.discrete[4],# 65c.col[14],# 66'#B4907E',# 68c.col[4])) + annotate('text', x = .6, y = yy, label = '43') + annotate('text', x = 2.75, y = yy, label = '45') + annotate('text', x = 4.75, y = yy, label = '61') + annotate('text', x = 7, y = yy, label = '68') + annotate('text', x = 9.5, y = yy, label = '44') + annotate('text', x = 11.75, y = yy, label = '50') + annotate('text', x = 13.75, y = yy, label = '51') + annotate('text', x = 16.25, y = yy, label = '53') + annotate('text', x = 18.35, y = yy, label = '56') + annotate('text', x = 20.6, y = yy, label = '58') + annotate('text', x = 23, y = yy, label = '60') + annotate('text', x = 25.25, y = yy, label = '62') + annotate('text', x = 27.5, y = yy, label = '63') + annotate('text', x = 29.75, y = yy, label = '64') + annotate('text', x = 32, y = yy, label = '66') + annotate('text', x = 34.25, y = yy, label = '65') + facet_wrap(~ x < 20, scales = 'free', nrow = 2)
plot

Exporting plot

output_name = 'Mislabelling.Example1'ggsave(plot, device = 'pdf', dpi = 300, width = 8, height = 5,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))
misl1 %>% write_tsv(., paste0(results_dir, fig, output_name, '.tsv'))

ALM

ALM patient sample with many Donor strains BEFORE FMT labelled as recipient / pre-FMT
Collected on day of FMT - As per C. Smillie not clear if before or after from notes

selection <- samples.metadata %>% filter(Donor.Subject == 'D0_ALM_Case_4;5;6;8;9;12;13;15;17;18;20;21') %>% pull(Name)samples.metadata.orig <-samples.metadata %>% dplyr::select(Name, Case_Name, Sample_Type) %>% mutate(Sample_Type = ifelse(Name == 'ALM_FMT15.pair', 'recipient', Sample_Type))misl2 <- get_matrix(sstr_cooccurrence, selection, 0) %>% rownames_to_column('row') %>% pivot_longer(names_to = 'col', values_to = 'shared',cols = ends_with('.pair')) %>% filter(row != col) %>% left_join(., samples.metadata.orig, by = c('row' = 'Name')) %>% left_join(., samples.metadata.orig, by = c('col' = 'Name'),suffix = c('.row','.col')) %>% filter(Sample_Type.row == 'recipient', Sample_Type.col != 'recipient') %>% mutate(Same = case_when(Sample_Type.col == 'post' & Case_Name.col == Case_Name.row ~ 'Self Post-FMT',Sample_Type.col == 'post' & Case_Name.col != Case_Name.row ~ 'Other Post-FMT',Sample_Type.col == 'donor' ~ 'Donor',)) %>% mutate(Recipient = str_remove(row, pattern = '.pair'),Study = 'ALM') %>% rename(Case_Name = 'Case_Name.row') %>% dplyr::select(Study, Case_Name, Recipient, shared, Same) %>% group_by(Study, Case_Name, Recipient, Same) %>% summarize_all(.funs = funs(mean, sd)) %>% pivot_wider(names_from = 'Same', names_sep = '.',values_from = c('mean','sd')) %>% dplyr::select(Study, Case_Name, Recipient, contains('Self'), contains('Other'), everything()) %>% mutate_at(.vars = vars(starts_with('mean'), starts_with('sd')),.funs = funs(replace_na(round(., 2), 0))) %>% mutate_at(.vars = vars(Case_Name, Recipient),.funs = funs(str_remove(., 'ALM_'))) %>% arrange(Recipient)

Denoising below 3 shared strains, showing all connections >4 strains

selection <- samples.metadata %>% filter(Donor.Subject == 'D0_ALM_Case_4;5;6;8;9;12;13;15;17;18;20;21') %>% pull(Name)sstr_graph <- generate_graph(sstr_cooccurrence, selection, 3)plot <-ggraph(sstr_graph, layout = 'stress') +geom_edge_link(aes(filter = weight >= 5, alpha = weight)) +geom_node_point(shape = 21, col = 'black',size = 8, aes(fill = case_when(Days_Since_FMT == 0.5 ~ 'Recipient', Sample_Type == 'post' ~ 'Post-FMT', Sample_Type == 'donor' ~ 'Donor', Sample_Type == 'recipient' ~ 'Recipient', T ~ '?'))) + geom_node_text(size = 3, aes(label = ifelse(Sample_Type == 'donor', 'D', Case_Number),col = ifelse(Sample_Type == 'recipient' | name == 'ALM_FMT15.pair', 'R', 'DP')),) + geom_node_label(aes(label = Sample, filter = name %in% c('ALM_FMT15.pair')), repel = T, hjust = .75, vjust = -8) +scale_y_continuous(expand = c(0.25, 0)) + theme_graph(base_family = 'Helvetica') + # theme(legend.key.width = unit(1, 'cm'),# legend.key.height = unit(0.5, 'cm')) + scale_fill_manual(values = colors.discrete[c(1,2,3)]) + scale_edge_alpha_continuous(breaks = c(5, 10, 25, 50)) +scale_color_manual(values = c('black', 'white')) + guides(edge_alpha = guide_legend(title = 'Shared\nStrains',override.aes = list(size = 5)), col = F, fill = F)
plot + theme(legend.position = 'none')
legend = cowplot::get_legend(plot)
grid.newpage()
grid.draw(legend)

Exporting plot

output_name = 'Mislabelling.Example2'ggsave(plot + theme(legend.position = 'none'), device = 'pdf', dpi = 300, width = 11, height = 8,filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))
ggsave(legend, device = 'pdf', dpi = 300, width = 1, height = 1.5,filename = paste0(results_dir, fig, output_name, '.Legend.pdf'))misl2 %>% write_tsv(., paste0(results_dir, fig, output_name, '.tsv'))
selection <- samples.metadata %>% filter(Donor.Subject == 'D0_ALM_Case_22;23;24;25') %>% pull(Name)sstr_graph <- generate_graph(sstr_cooccurrence, selection, 3)ggraph(sstr_graph, layout = 'stress') +geom_edge_link(aes(filter = weight >= 3, alpha = weight)) +geom_node_point(shape = 21, col = 'black',size = 8, aes(fill = case_when(Days_Since_FMT == 0.5 ~ 'Recipient', Sample_Type == 'post' ~ 'Post-FMT', Sample_Type == 'donor' ~ 'Donor', Sample_Type == 'recipient' ~ 'Recipient', T ~ '?'))) + geom_node_text(col = 'black',size = 3.5, aes(label = ifelse(Sample_Type == 'donor', 'D', Case_Number))) + geom_node_label(aes(label = Sample, filter = name %in% c('ALM_FMT15.pair')), repel = T, hjust = .75, vjust = -3) +scale_y_continuous(expand = c(0.25, 0)) + scale_x_continuous(expand = c(0.25, 0.25)) + theme_graph() + guides(edge_alpha = F, fill = guide_legend(override.aes = list(shape=21), title = 'Smillie et al.\noriginal label', ncol = 1))
selection <- samples.metadata %>% filter(Donor.Subject == 'D0_ALM_Case_1;3') %>% pull(Name)sstr_graph <- generate_graph(sstr_cooccurrence, selection, 3)ggraph(sstr_graph, layout = 'stress') +geom_edge_link(aes(filter = weight >= 3, alpha = weight)) +geom_node_point(shape = 21, col = 'black',size = 8, aes(fill = case_when(Days_Since_FMT == 0.5 ~ 'Recipient', Sample_Type == 'post' ~ 'Post-FMT', Sample_Type == 'donor' ~ 'Donor', Sample_Type == 'recipient' ~ 'Recipient', T ~ '?'))) + geom_node_text(col = 'black',size = 3.5, aes(label = ifelse(Sample_Type == 'donor', 'D', Case_Number))) + geom_node_label(aes(label = Sample, filter = name %in% c('ALM_FMT15.pair')), repel = T, hjust = .75, vjust = -5) +scale_y_continuous(expand = c(0.25, 0)) + scale_x_continuous(expand = c(0.25, 0.25)) +  theme_graph() + guides(edge_alpha = F, fill = guide_legend(override.aes = list(shape=21), title = 'Smillie et al.\noriginal label', ncol = 1))

FRICKE

selection <- samples.metadata %>% filter(Study == 'FRICKE') %>% pull(Name)sstr_graph <- generate_graph(sstr_cooccurrence, selection, 3)ggraph(sstr_graph, layout = 'stress') +geom_edge_link(aes(filter = weight >= 3, alpha = weight)) +geom_node_point(shape = 21, col = 'black',size = 8, aes(fill = case_when(Days_Since_FMT == 0.5 ~ 'Recipient', Sample_Type == 'post' ~ 'Post-FMT', Sample_Type == 'donor' ~ 'Donor', Sample_Type == 'recipient' ~ 'Recipient', T ~ '?'))) + geom_node_text(col = 'black',size = 3.5, aes(label = Case_Number)) + geom_node_label(aes(label = Sample, filter = name %in% c('ALM_FMT15.pair')), repel = T, hjust = .75, vjust = -5) +scale_y_continuous(expand = c(0.25, 0)) + theme_graph() + guides(edge_alpha = F, fill = guide_legend(override.aes = list(shape=21), title = 'Sample Type', ncol = 1))

RAYMONDF

selection <- samples.metadata %>% filter(Study == 'RAYMONDF') %>% pull(Name)sstr_graph <- generate_graph(sstr_cooccurrence, selection, 3)ggraph(sstr_graph, layout = 'stress') +geom_edge_link(aes(filter = weight >= 3, alpha = weight)) +geom_node_point(shape = 21, col = 'black',size = 8, aes(fill = Days_Since_FMT)) + geom_node_text(col = 'black',size = 3.5, aes(label = Case_Number)) + geom_node_label(aes(label = Sample, filter = name %in% c('ALM_FMT15.pair')), repel = T, hjust = .75, vjust = -5) +scale_y_continuous(expand = c(0.25, 0)) + theme_graph() + guides(edge_alpha = F, fill = guide_legend(override.aes = list(shape=21), title = 'Sampling Day', ncol = 1))

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/news/868872.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

AIGC:为创意产业注入新质生产力

在当今数字化浪潮下&#xff0c;人工智能&#xff08;AI&#xff09;正以惊人的速度重塑着各行各业&#xff0c;特别是在创意产业领域&#xff0c;AI所带来的变革显得尤为深刻且广泛。 我深切感受到AIGC&#xff08;Artificial Intelligence Generated Content&#xff0c;即人…

gen_cross_contour_xld 为每个输入点生成一个十字形状的XLD轮廓。

gen_cross_contour_xld Name 名称 gen_cross_contour_xld — Generate one XLD contour in the shape of a cross for each input point. 为每个输入点生成一个十字形状的XLD轮廓。 Signature 签名 gen_cross_contour_xld( : Cross : Row, Col, Size, Angle : ) Descripti…

uniapp开发android调试工具

程序运行在真机后&#xff0c;点击Hbuilder菜单栏中的视图&#xff0c;点击下方的显示Webview调试控制台 下方就会展示你在手机端操作的界面 点击需要在电脑端调试的界面&#xff0c;界面会在新开的浏览器中展示&#xff0c;这样一些h5浏览器无法点击出来的样式就可以这样调试了…

【Java】搜索引擎设计:信息搜索怎么避免大海捞针?

一、内容分析 我们准备开发一个针对全网内容的搜索引擎&#xff0c;产品名称为“Bingoo”。 Bingoo的主要技术挑战包括&#xff1a; 针对爬虫获取的海量数据&#xff0c;如何高效地进行数据管理&#xff1b;当用户输入搜索词的时候&#xff0c;如何快速查找包含搜索词的网页…

零基础STM32单片机编程入门(十一) OLED显示屏实战含源码视频

文章目录 一.概要二.0.96寸OLED屏介绍1.OLED屏外观图2.OLED屏特点3.OLED屏接口定义4.OLED屏原理图5.OLED屏像素分辨率 三.字库取模软件PCtoLCD2002介绍四.STM32单片机驱动OLED显示实验五.CubeMX工程源代码下载六.讲解视频链接地址七.小结 一.概要 OLED被称为有机激光二极管显示…

DBeaver安装教程(开发人员和数据库管理员通用数据库管理工具)

前言 DBeaver 是一个通用的数据库管理工具和 SQL 客户端&#xff0c;支持 MySQL, PostgreSQL, Oracle, DB2, MSSQL, Sybase, Mimer, HSQLDB, Derby, 以及其他兼容 JDBC 的数据库。DBeaver 提供一个图形界面用来查看数据库结构、执行SQL查询和脚本&#xff0c;浏览和导出数据&a…

无损音频格式 FLAC 转 MP3 音频图文教程

音频文件的格式多样&#xff0c;每种格式都有其独特的特点与适用场景。FLAC&#xff08;Free Lossless Audio Codec&#xff09;&#xff0c;作为一种无损音频压缩格式&#xff0c;因其能够完美保留原始音频数据的每一个细节而备受音频发烧友和专业人士的青睐。 然而&#xff0…

NuGet 中国国内镜像

有时候网络不好&#xff0c;需要使用国内的一些镜像源。 华为资源&#xff1a; https://repo.huaweicloud.com/repository/nuget/v3/index.json 腾讯资源&#xff1a; https://mirrors.cloud.tencent.com/nuget/ 微软中国镜像源&#xff1a;https://nuget.cdn.azure.cn/v3…

仪表板展示|DataEase看中国:2023年中国新能源汽车经济运行情况分析

背景介绍 随着政府机构、企业和个人对环保和可持续发展的高度关注&#xff0c;“新能源汽车”在全球范围内成为了一个热门话题。新能源汽车是指使用非传统燃料&#xff08;如电能、氢能等&#xff09;作为动力源的汽车。 在中国市场&#xff0c;新能源汽车产业蓬勃发展&#…

form原生表单以及el-from表单阻止表单默认提交事件,上传文件自动提交刷新以及input输入框按下enter键自动搜索的事件

问题&#xff1a; 1.form原生表单以及el-from表单阻止表单默认提交事件 2.上传文件自动提交刷新 3.只有一个input输入框按下enter键自动搜索提交的事件 解决方案&#xff1a; 1.el-form使用submit.native.prevent 2.form表单使用οnsubmit"return false"

如何切换手机的ip地址

在数字时代的浪潮中&#xff0c;智能手机已成为我们日常生活中不可或缺的一部分。然而&#xff0c;随着网络安全问题的日益凸显&#xff0c;保护个人隐私和数据安全变得尤为重要。其中&#xff0c;IP地址作为网络身份的重要标识&#xff0c;其安全性与隐私性备受关注。本文将详…

智谱AI开源代码生成大模型 CodeGeeX4-ALL-9B

智谱AI 旗下的代码生成大模型 CodeGeeX 正式发布第四代开源版本&#xff1a;CodeGeeX4-ALL-9B&#xff0c;其是集代码补全和生成、代码问答、代码解释器、工具调用、联网搜索、项目级代码问答等能力于一体的代码大模型&#xff0c;是目前百亿&#xff08;10B&#xff09;参数以…

打开 Windows 磁盘管理程序的不同方法总结

如何打开磁盘管理 Windows 怎么打开磁盘管理呢&#xff1f;下面总结了几种不同的方法用来打开磁盘管理。 快速访问菜单 我们可以使用快捷键“WinX”或者将鼠标移动到开始菜单位置右键&#xff0c;这样就可以打开快速访问菜单&#xff0c;然后选择磁盘管理即可打开程序。 运行…

浅谈VPS主机上的数据库性能优化

如何提高网站性能&#xff1f;一个显而易见的解决方案是升级托管账户。您的网站将拥有更多硬件资源&#xff0c;因此可以同时处理更多请求并更快地传递数据。 无论如何&#xff0c;人们都是这么认为的。但事实总是不一样。 现代网站是一个复杂的系统&#xff0c;包含许多必须…

c++ primer plus 第15章友,异常和其他:异常,15.3.3 异常机制

#c primer plus 第15章友&#xff0c;异常和其他&#xff1a;异常,15.3.3 异常机制 异常,15.3.3 异常机制 文章目录 15.3.3 异常机制15.3.3 异常机制程序清单 15.9error3.cpp15.3.4 将对象用作异常类型程序清单15.10exc mean.h程序清单 15.11error4.cpp 15.3.3 异常机制 15.3.…

【STM32标准库】DMA双缓冲模式

1.双缓冲模式简介 设置DMA_SxCR寄存器的DBM位为1可启动双缓冲传输模式&#xff0c;并自动激活循环模式&#xff0c;所以设置普通模式或者循环模式都可以。 双缓冲不应用与存储器到存储器的传输。可以应用在从存储器到外设或者外设到存储器。 双缓冲模式下&#xff0c; 两个存…

IC认证介绍

IC认证是什么&#xff1f; IC是加拿大工业部Industry Canada的简称&#xff0c;作为政府机构&#xff0c;负责电子电器产品进入加拿大市场的认证事务。与美国的FCC相似&#xff0c;IC目前只在电磁干扰上做限制。一般规定&#xff1a;仅限制EMI&#xff0c;认证方式也与FCC相同…

领略超越王勃的AI颂扬艺术:一睹其惊艳夸赞风采

今日&#xff0c;咱也用国产AI技术&#xff0c;文心一言3.5的文字生成与可灵的图像创作&#xff0c;自动生成一篇文章&#xff0c;提示语文章末下载。 【玄武剑颂星际墨侠】 苍穹为布&#xff0c;星辰织锦&#xff0c;世间万象&#xff0c;皆入我玄武剑公众号之浩瀚画卷。此号…

Python入门 函数传递

目录 函数的多返回值 函数的多种传参形式 位置参数 关键字参数 缺省参数 不定长参数 位置传递 关键字传递 函数作为参数传递 lambda匿名函数 函数定义中 匿名函数的语法 函数的多返回值 按照返回值的顺序&#xff0c;与对应顺序的多个变量接受即可 变量之间用逗号…

SPI协议——对外部SPI操作(跨页读写)

关于W25Q32JVSSIQ的详细内容在之前的两篇文章中已经详细介绍&#xff0c;本文不做太多赘述&#xff0c;如果对芯片的了解有缺失的话&#xff0c;可以参考&#xff1a; SPI协议——对外部SPI Flash操作-CSDN博客 SPI协议——读取外部SPI Flash ID_spi flash 读取id-CSDN博客 目录…