跑完了,记录一下,顺便写写我在使用中遇到的问题,欢迎讨论~
声明:我是用自己数据跑的,因为还未发表所以就还是借用官网的图啦~
1.准备
library(GeneTrajectory)
library(Seurat)
library(dplyr)
library(reticulate)
library(RColorBrewer)
library(ggplot2)
library(scales)
2.加载数据集
data_S <- readRDS("./yourscdata.rds")
DimPlot(data_S, group.by = "celltype", shuffle = T)
3.基因间距离计算
选择基因
这里作者选择了前 2000 个高变基因中 1% 到 50% 的细胞表达的基因来演示基因间距离计算。具体可以看自己的选择,也可以选择一些你认为重要的基因。
assay <- "RNA"
DefaultAssay(data_S) <- assay
data_S <- FindVariableFeatures(data_S, nfeatures = 2000)
all_genes <- data_S@assays[[assay]]@var.features
expr_percent <- apply(as.matrix(data_S[[assay]]@data[all_genes, ]) > 0, 1, sum)/ncol(data_S)
genes <- all_genes[which(expr_percent > 0.01 & expr_percent < 0.5)]
length(genes)
## [1] 251
准备用于基因-基因距离计算的输入
# Compute the Diffusion Map cell embedding
data_S <- GeneTrajectory::RunDM(data_S)
# Calculate cell-cell graph distances over a cell-cell kNN graph
cell.graph.dist <- GetGraphDistance(data_S, K = 10)
# Coarse-grain the cell graph by grouping cells into `N`=500 "meta-cells"
cg_output <- CoarseGrain(data_S, cell.graph.dist, genes, N = 500)
计算基因间距离
使用 reticulate R 包设置 virtualenv,py_install安装gene-trajectory。
if(!reticulate::virtualenv_exists('gene_trajectory')){reticulate::virtualenv_create('gene_trajectory', packages=c('gene_trajectory'))
}
reticulate::use_virtualenv('gene_trajectory')
reticulate::py_install("gene-trajectory")
# Import the function to compute gene-gene distances
cal_ot_mat_from_numpy <- reticulate::import('gene_trajectory.compute_gene_distance_cmd')$cal_ot_mat_from_numpy
# Compute gene-gene distances
gene.dist.mat <- cal_ot_mat_from_numpy(ot_cost = cg_output[["graph.dist"]], gene_expr = cg_output[["gene.expression"]], num_iter_max = 50000, show_progress_bar = TRUE)
rownames(gene.dist.mat) <- cg_output[["features"]]
colnames(gene.dist.mat) <- cg_output[["features"]]
dim(gene.dist.mat)
4.基因轨迹推断和可视化
gene_embedding <- GetGeneEmbedding(gene.dist.mat, K = 5)$diffu.emb
gene_trajectory <- ExtractGeneTrajectory(gene_embedding, gene.dist.mat, N = 3, t.list = c(4,7,7), K = 5)
table(gene_trajectory$selected)
##
## Trajectory-1 Trajectory-2 Trajectory-3
## 43 55 153
# Visualize gene trajectories
par(mar = c(1.5,1.5,1.5,1.5))
scatter3D(gene_embedding[,1],gene_embedding[,2],gene_embedding[,3],bty = "b2", colvar = as.integer(as.factor(gene_trajectory$selected))-1,main = "trajectory", pch = 19, cex = 1, theta = 45, phi = 0,col = ramp.col(c(hue_pal()(3))))
可以得到基因的轨迹图,从得到的数据来看,每个gene都有一个order值。
5.可视化基因箱图
这一步比较耗时,自定义N.bin值,可以分开看每个轨迹的基因分布在细胞上的进展路径。
# Seurat v4安装旧版本SeuratWrappers
# remotes::install_github('satijalab/seurat-wrappers@community-vignette')
library(SeuratWrappers)
data_S <- RunALRA(data_S)
data_S <- AddGeneBinScore(data_S, gene_trajectory, N.bin = 5, trajectories = 1:3, assay = "alra", reverse = c(F, F, T))# Visualize gene bin plots for each gene trajectory
FeaturePlot(data_S, pt.size = 0.05, features = paste0("Trajectory",1,"_genes", 1:5), ncol = 5, order = T) &scale_color_gradientn(colors = rev(brewer_pal(palette = "RdYlBu")(10))) & NoLegend() & NoAxes() &
theme(title = element_text(size = 10))
FeaturePlot(data_S, pt.size = 0.05, features = paste0("Trajectory",2,"_genes", 1:5), ncol = 5, order = T) &scale_color_gradientn(colors = rev(brewer_pal(palette = "RdYlBu")(10))) & NoLegend() & NoAxes() &
theme(title = element_text(size = 10))
FeaturePlot(data_S, pt.size = 0.05, features = paste0("Trajectory",3,"_genes", 1:5), ncol = 5, order = T) &scale_color_gradientn(colors = rev(brewer_pal(palette = "RdYlBu")(10))) & NoLegend() & NoAxes() &
theme(title = element_text(size = 10))
如何比较不同条件的基因轨迹?
参考作者回复:
参考:Editing GeneTrajectory/scripts/mouse_dermal_example.R at main · KlugerLab/GeneTrajectory (github.com)
python版本可参考:KlugerLab/GeneTrajectory-python: Python implementation of Gene Trajectory (github.com)