1、原论文数据双标图
代码:
setwd("D:/Desktop/0000/R") #更改路径#导入数据
df <- read.table("Input data.csv", header = T, sep = ",")# -----------------------------------
#所需的包:
packages <- c("ggplot2", "tidyr", "dplyr", "readr", "ggrepel", "cowplot", "factoextra")
#安装你尚未安装的R包
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {install.packages(packages[!installed_packages])
}
invisible(lapply(packages, library, character.only = TRUE))# -----------------------------------
# 设置一些颜色、文字的基础设置
# Colors:
CatCol <- c(CSH = "#586158", DBF = "#C46B39", EBF = "#4DD8C0", ENF = "#3885AB", GRA = "#9C4DC4",MF = "#C4AA4D", OSH = "#443396", SAV = "#CC99CC", WET = "#88C44D", WSA = "#AB3232"
)
Three_colorblind <- c("#A8AD6F", "#AD6FA8", "#6FA8AD") #c("#809844", "#4f85b0", "#b07495")
graph_elements_dark <- "black"
plot_elements_light <- "gray75"
plot_elements_dark <- "gray25"# Transparency:
boot_alpha_main <- 0.9
boot_alpha_small <- 0.05# Text:
# if (n_pcs > 3) {x_angle <- 270; x_adjust <- 0.25} else {x_angle <- 0; x_adjust <- 0} # option to change orientation of x axis text
x_angle <- 0; x_adjust <- 0
title_text <- 9 # Nature Communications: max 7 pt; cowplot multiplier: 1/1.618; 7 pt : 1/1.618 = x pt : 1; x = 7 / 1/1.618; x = 11.326 (round up to integer)
subtitle_text <- 9
normal_text <- 9 # Nature Communications: min 5 pt; cowplot multiplier: 1/1.618; 5 pt : 1/1.618 = x pt : 1; x = 5 / 1/1.618; x = 8.09 (round up to integer)# Element dimensions:
plot_linewidth <- 0.33
point_shape <- 18
point_size <- 1.5# Initialize figure lists:
p_biplot <- list(); p_r2 <- list(); p_load <- list(); p_contr <- list(); col_ii <- list()# Labels:
veg_sub_labels <- c("All Sites", "All Forests", "Evergreen Needle-Forests") # -----------------------------------
#选择PCA所需的数据
codes_4_PCA <- c("SITE_ID", "IGBP", "GPPsat", "wLL", "wNmass", "wLMA", "RECOmax") # 选择需要的列数据
#执行筛选
df_subset <- df %>%dplyr::select(all_of(codes_4_PCA))
#运行PCA。dplyr::select(-species):将不需要的列数据去除
pca_result <- FactoMineR::PCA(df_subset %>% dplyr::select(-SITE_ID, -IGBP), scale.unit = T, ncp = 10, graph = F)# -----------------------------------
#绘图
p1<- fviz_pca_biplot(pca_result,axes = c(1, 2),col.ind = df_subset$IGBP, #"grey50",# col.ind = NA, #plot_elements_light, #"white",geom.ind = "point",palette = CatCol,#'futurama',label = "var",col.var = plot_elements_dark,labelsize = 3,repel = TRUE,pointshape = 16,pointsize = 2,alpha.ind = 0.67,arrowsize = 0.5)# -----------------------------------
# 它是ggplot2对象,我们在此基础上进一步修改一下标注。
p1<-p1+labs(title = "",x = "PC1",y = "PC2",fill = "IGBP") +guides(fill = guide_legend(title = "")) +theme(title = element_blank(),text = element_text(size = normal_text),axis.line = element_blank(),axis.ticks = element_blank(),axis.title = element_text(size = title_text, face = "bold"),axis.text = element_text(size = normal_text),#plot.margin = unit(c(0, 0, 0, 0), "cm"),# legend.position = "none"legend.text = element_text(size = subtitle_text),legend.key.height = unit(5, "mm"),legend.key.width = unit(2, "mm"))
p1
参考:Leaf-level coordination principles propagate to the ecosystem scale (https://doi.org/10.1038/s41467-023-39572-5)、主成分分析图。
2、我选用iris数据进行重新绘制测试双标图
代码:
setwd("D:/Desktop/0000/R") #更改路径#导入数据
df <- read.table("iris1.csv", header = T, sep = ",")# -----------------------------------
#所需的包:
packages <- c("ggplot2", "tidyr", "dplyr", "readr", "ggrepel", "cowplot", "factoextra")
#安装你尚未安装的R包
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {install.packages(packages[!installed_packages])
}
invisible(lapply(packages, library, character.only = TRUE))# -----------------------------------
# 设置一些颜色、文字的基础设置
# Colors:
CatCol <- c(setosa = "#586158", versicolor = "#C46B39", virginica = "#4DD8C0") # 设置类别颜色
Three_colorblind <- c("#A8AD6F", "#AD6FA8", "#6FA8AD") #c("#809844", "#4f85b0", "#b07495")
graph_elements_dark <- "black"
plot_elements_light <- "gray75"
plot_elements_dark <- "gray25"# Transparency:
boot_alpha_main <- 0.9
boot_alpha_small <- 0.05# Text:
# if (n_pcs > 3) {x_angle <- 270; x_adjust <- 0.25} else {x_angle <- 0; x_adjust <- 0} # option to change orientation of x axis text
x_angle <- 0; x_adjust <- 0
title_text <- 9 # Nature Communications: max 7 pt; cowplot multiplier: 1/1.618; 7 pt : 1/1.618 = x pt : 1; x = 7 / 1/1.618; x = 11.326 (round up to integer)
subtitle_text <- 9
normal_text <- 9 # Nature Communications: min 5 pt; cowplot multiplier: 1/1.618; 5 pt : 1/1.618 = x pt : 1; x = 5 / 1/1.618; x = 8.09 (round up to integer)# Element dimensions:
plot_linewidth <- 0.33
point_shape <- 18
point_size <- 1.5# Initialize figure lists:
p_biplot <- list(); p_r2 <- list(); p_load <- list(); p_contr <- list(); col_ii <- list()# Labels:
veg_sub_labels <- c("All Sites", "All Forests", "Evergreen Needle-Forests") # -----------------------------------
#选择PCA所需的数据
codes_4_PCA <- c("sepal_length", "sepal_width", "petal_length", "petal_width", "species") # 选择需要的列数据
#执行筛选
df_subset <- df %>%dplyr::select(all_of(codes_4_PCA))
#运行PCA。dplyr::select(-species):将不需要的列数据去除
pca_result <- FactoMineR::PCA(df_subset %>% dplyr::select(-species), scale.unit = T, ncp = 10, graph = F)# -----------------------------------
#绘图
p1<- fviz_pca_biplot(pca_result,axes = c(1, 2),col.ind = df_subset$species, #"grey50",# col.ind = NA, #plot_elements_light, #"white",geom.ind = "point",palette = CatCol,#'futurama',label = "var",col.var = plot_elements_dark,labelsize = 3,repel = TRUE,pointshape = 16,pointsize = 2,alpha.ind = 0.67,arrowsize = 0.5)# -----------------------------------
# 它是ggplot2对象,我们在此基础上修改一下标注。
p1<-p1+labs(title = "",x = "PC1",y = "PC2",fill = "IGBP") +guides(fill = guide_legend(title = "")) +theme(title = element_blank(),text = element_text(size = normal_text),axis.line = element_blank(),axis.ticks = element_blank(),axis.title = element_text(size = title_text, face = "bold"),axis.text = element_text(size = normal_text),#plot.margin = unit(c(0, 0, 0, 0), "cm"),# legend.position = "none"legend.text = element_text(size = subtitle_text),legend.key.height = unit(5, "mm"),legend.key.width = unit(2, "mm"))
p1
3、iris数据进行绘制碎石图、变量载荷图、变量贡献图
代码:
#加载包
library(dplyr) #用于数据预处理
library(tidyr) #用于数据预处理
library(stringr) #用于字符串处理
library(modelr) #用于自助法重抽样
library(FactoMineR) #用于PCA
library(ade4) #用于PCA
library(factoextra) #用于PCA结果提取及绘图
#所需的包:
packages <- c("ggplot2", "tidyr", "dplyr", "readr", "ggrepel", "cowplot", "factoextra")
#安装你尚未安装的R包
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {install.packages(packages[!installed_packages])
}
invisible(lapply(packages, library, character.only = TRUE))setwd("D:/Desktop/0000/R") #更改路径
# 加载数据
df <- read.csv("iris.csv",header = T, row.names = 1) # row.names = 1: 第一列为标签,这时赋值给df时就没有这列了#重抽样
set.seed(123) #设置随机种子
tt=99 #设置重抽样的次数。iris[,-5]:表示去除第5列,因为这是类别
df_boot <- iris[,-5] %>% modelr::bootstrap(n = tt) #重抽样,结果是一个列表,包含499个数据框#使用循环对每一个数据集进行PCA
#初始化3个空变量
N_PCS <- tibble() #使用维数检验保留的PC数量
pca_stats <- tibble() #变量的贡献和载荷
R2 <- c() #解释方差占比#使用循环对每一个数据集进行PCA
#初始化3个空变量
N_PCS <- tibble() #使用维数检验保留的PC数量
pca_stats <- tibble() #变量的贡献和载荷
R2 <- c() #解释方差占比#循环
for (j in 1:tt) {##提取第j次bootstrap的数据dat <- df_boot %>% slice(j) %>% # 选择第j行pull(strap) %>% # 提取列表as.data.frame() # 提取数据集#使用FactoMineR包执行PCApca_result <- FactoMineR::PCA(dat, scale.unit = T, ncp = 4, graph = F) # ncp = 4:降维几个主成分,设置最大即为全部#使用ade4包执行PCA# center:指定是否对数据进行中心化,默认为 TRUE。中心化意味着将数据减去各自的均值,使得数据在每个维度上的平均值为零。# scale:指定是否对数据进行缩放,默认为 TRUE。缩放意味着将数据除以各自的标准差,使得数据在每个维度上的标准差为一。# scannf:指定是否计算特征值和特征向量,默认为 FALSE。如果设置为 TRUE,则会计算特征值和特征向量。pca1 <- ade4::dudi.pca(dat, center = TRUE, scale = TRUE, scannf = FALSE, nf = 4) # nf= 4:降维几个主成分,设置最大即为全部#检测不确定性和显著性#执行维数检验pc_tested <-testdim(pca1, nrepet = 999)###提取bootstrap数据集的PCA结果N_PCS <- N_PCS %>% bind_rows(tibble(strap = j, n_pcs = pc_tested$nb.cor)) #第j次运行的PCApca_stats <- bind_rows(pca_stats,pca_result$var$contrib %>% # add contributionsas_tibble(rownames = "var") %>%pivot_longer(cols = !var, names_to = "PC", values_to = "contrib") %>% left_join(pca_result$var$coord %>% # add loadingsas_tibble(rownames = "var") %>%pivot_longer(cols = !var, names_to = "PC", values_to = "loading"),by = c("var", "PC")) %>% mutate(PC = str_sub(PC, start = 5), #提取PC名称中的数字strap = j) # bootstrap run number) #得到变量贡献和载荷R2 <- bind_rows(R2,tibble(PC = pca_result[["eig"]]%>% rownames(),exp_var = pca_result[["eig"]][,2],strap = j) %>% mutate(PC = str_sub(PC, start = 6)) #提取PC名称中的数字)
}#保留的PC数量
N_PCS <- N_PCS %>%group_by(n_pcs) %>% summarise(n_rep = n()) %>% #对重复值进行计数mutate(retained = n_rep / tt * 100) #计算运行次数百分比
pc_ret <- N_PCS %>% filter(retained == max(retained))
#输出结果的摘要
print(paste0("Number of statistical significant components according to Dray method (Dray et al., 2008) was ",pc_ret[1,1], " in ", round(pc_ret[1,3], digits = 1), "% of runs."))n_pcs <- NA #保留PC数的初始设置
# n_pcs <- 2 #可以手动设置保留PC数
if (is.na(n_pcs)) {n_pcs <- N_PCS %>% filter(retained == max(retained)) %>% select(n_pcs) %>% unlist() %>% unname()
} #按照Dray等人的方法设置保留PC数##变量贡献和载荷
pca_stats <- pca_stats %>% group_by(PC, var) %>% mutate(contrib_mean = mean(contrib),contrib_median = median(contrib),contrib_std = sd(contrib),# contrib_q25 = quantile(contrib, 0.25), contrib_q75 = quantile(contrib, 0.75),loading_mean = mean(loading),loading_median = median(loading),loading_std = sd(loading),# loading_q25 = quantile(loading, 0.25), loading_q75 = quantile(loading, 0.75)) %>% ungroup() %>% dplyr::rename(contrib_boot = contrib, loading_boot = loading) #重命名以免后续的匹配过程出现混乱##修改PC名称
pca_stats <- pca_stats %>%mutate(PC_name = paste0("PC", PC))##解释方差占比
R2 <- R2 %>% group_by(PC) %>% mutate(R2_mean = mean(exp_var),R2_median = median(exp_var),R2_std = sd(exp_var),# R2_q25 = quantile(exp_var, 0.25), R2_q75 = quantile(exp_var, 0.75)) %>% ungroup() %>% dplyr::rename(R2_boot = exp_var) #重命名以免后续的匹配过程出现混乱##添加到pca_stats的表格中
pca_stats <- pca_stats %>% left_join(R2, by = c("PC", "strap"))#对原始数据的PCA
pca_result <- FactoMineR::PCA(iris[,-5], scale.unit = T, ncp = 4, graph = F)#添加原始数据计算得到的实际值
pca_stats <- pca_stats %>% dplyr::left_join( #添加原始数据的R2(不是bootstrapping的均值)tibble(PC = pca_result[["eig"]] %>% rownames(),R2 = pca_result[["eig"]][,2]) %>% mutate(PC = str_sub(PC, start = 6)), #提取PC数by = "PC") %>% dplyr::left_join( #添加原始数据的变量贡献(不是bootstrapping的均值)pca_result$var$contrib %>% #添加贡献as_tibble(rownames = "var") %>%pivot_longer(cols = !var, names_to = "PC", values_to = "contrib") %>% mutate(PC = str_sub(PC, start = 5)), #提取PC数by = c("PC", "var")) %>%dplyr::left_join( #添加原始数据的变量载荷(不是bootstrapping的均值)pca_result$var$coord %>% #添加载荷as_tibble(rownames = "var") %>%pivot_longer(cols = !var, names_to = "PC", values_to = "loading") %>% mutate(PC = str_sub(PC, start = 5)), # extract PC numbersby = c("PC", "var"))## 添加PC数的保留百分比(在自助法中PC被保留得有多频繁)
pca_stats <- pca_stats %>% dplyr::left_join(N_PCS %>% dplyr::mutate(PC = n_pcs %>% as.character) %>% dplyr::select(PC, retained),by = "PC") # -----------------------------------
# 绘制图碎石图
dat_boot <- pca_stats %>%dplyr::select(PC_name, PC, R2_boot) %>% unique()%>% #去除重复dplyr::mutate(PC = as.character(PC))dat_true <- pca_stats %>%dplyr::select(PC_name, PC, R2, R2_median, R2_std) %>% unique() %>% #去除重复dplyr::mutate(PC = as.character(PC))p2 <- ggplot(data = dat_true, aes(x = PC_name, y = R2, group = 1)) + # x = PC -> only numbers on axis, x = PC_name -> can give problems with PC10 being ordered before PC2;# group 1 是用来避免某些warning/error的geom_errorbar(aes(ymin = R2 - R2_std, ymax = R2 + R2_std),color = Three_colorblind[1], linewidth = plot_linewidth, width = 0.4) + # bootstrapping的标准差# geom_bar(stat = "identity", position = position_dodge(), fill = Three_colorblind[1], width = 0.61) + #b07a4f, #9c6a5e, #643c3cgeom_line(color = Three_colorblind[1]) +geom_point(color = Three_colorblind[1], size = point_size) + #实际值geom_jitter(data = dat_boot, aes(x = PC_name, y = R2_boot, group = 1), alpha = 0.1,color = "black", shape = point_shape, size = 0.5, width = 0.1) + #每次自助样本的值geom_point(aes(x = PC_name, y = R2_median), color = plot_elements_dark,alpha = boot_alpha_main, shape = point_shape, size = point_size) + #添加自助法得到的中位数值geom_text(aes(x = PC_name, y = R2 + R2_std + 2, label = paste0(R2 %>% round(digits = 1), "%")),nudge_x = 0.33, size = 2) + #添加数值标注labs(title = "", x = "", y = "Explained variance") +theme_classic() +theme(title = element_blank(),text = element_text(size = normal_text),axis.line = element_line(color = graph_elements_dark),axis.ticks.x = element_line(color = graph_elements_dark),axis.ticks.y = element_blank(),axis.title = element_text(size = title_text, face = "bold"),# axis.title.x = element_blank(), #已经在'labs'中指定axis.text = element_text(size = normal_text),axis.text.y = element_blank(),plot.margin = unit(c(0, 1, 0, 1), "cm"),legend.position = "none") +NULL
p2# -----------------------------------
# 绘制变量载荷图
dat_boot <- pca_stats %>%dplyr::filter(PC <= n_pcs[1]) %>% #去除额外的PCdplyr::select(PC_name, var, loading_boot) %>% unique() #去除重复dat_true <- pca_stats %>%dplyr::filter(PC <= n_pcs[1]) %>% #去除额外的PCdplyr::select(PC_name, var, loading, loading_median, loading_std) %>% unique() #去除重复p3 <- ggplot(data = dat_true, aes(x = var, y = loading)) +facet_grid(. ~ PC_name, scales = "free_y") +geom_errorbar(aes(ymin = loading - loading_std, ymax = loading + loading_std), # loading_q25, ymax = loading_q75color = Three_colorblind[2], linewidth = plot_linewidth, width = 0.9) + # standard error = std from bootstrappinggeom_bar(stat = "identity", position = position_dodge(), fill = Three_colorblind[2]) + #b07a4f, #9c6a5e, #643c3cgeom_hline(yintercept = 0, color = graph_elements_dark) +geom_jitter(data = dat_boot, aes(x = var, y = loading_boot), alpha = boot_alpha_small, color = plot_elements_dark,shape = point_shape, size = 0.2, width = 0.1) + #每次自助抽样的值geom_point(aes(x = var, y = loading_median), alpha = boot_alpha_main, shape = point_shape,size = point_size, color = plot_elements_dark) + #添加自助法得到的中位数值coord_flip() + #对调坐标轴以更好地展示图形scale_y_continuous(breaks = waiver(), n.breaks = 4) + #修改x轴(对调后,这就是y轴)labs(y = "Loadings", x = "", title = "") +theme_classic() +theme(title = element_text(size = normal_text, face = "bold"),text = element_text(size = normal_text),axis.line.x = element_line(color = graph_elements_dark),axis.line.y = element_blank(),axis.ticks.x = element_line(color = graph_elements_dark),axis.ticks.y = element_blank(),axis.title = element_text(size = title_text),axis.text = element_text(size = normal_text),axis.text.x = element_text(angle = x_angle, vjust = x_adjust),legend.position = "none",legend.title = element_text(size = title_text),legend.text = element_text(size = subtitle_text),legend.key.height = unit(1.0, "mm"),legend.key.width = unit(1.0, "mm"),plot.margin = unit(c(0, 0, 0, 0), "cm"),strip.text = element_text(face = "bold", size = title_text),strip.background = element_blank()) +NULL
p3# -----------------------------------
# 绘制变量贡献图
dat_boot <- pca_stats %>%dplyr::filter(PC <= n_pcs[1]) %>% #去除额外的PCdplyr::select(PC_name, var, contrib_boot) %>% unique() #去除重复dat_true <- pca_stats %>%dplyr::filter(PC <= n_pcs[1]) %>% # remove additional PCsdplyr::select(PC_name, var, contrib, contrib_median, contrib_std) %>% unique() #去除重复p4<- ggplot(data = dat_true, aes(x = var, y = contrib)) +facet_grid(. ~ PC_name, scales = "free_y") +geom_errorbar(aes(ymin = contrib_median - contrib_std, ymax = contrib_median + contrib_std), # ymin = contrib_q25, ymax = contrib_q75color = Three_colorblind[3], linewidth = plot_linewidth, width = 0.9) + # standard error = standard deviation from bootstrappinggeom_bar(stat = "identity", position = position_dodge(), fill = Three_colorblind[3]) + #4f85b0, #59918e, #3c6464geom_hline(yintercept = 0, color = graph_elements_dark) +geom_jitter(data = dat_boot, aes(x = var, y = contrib_boot), alpha = boot_alpha_small, color = plot_elements_dark,shape = point_shape, size = 0.2, width = 0.1) + #每次自助抽样的值geom_point(aes(x = var, y = contrib_median), alpha = boot_alpha_main, shape = point_shape,size = point_size, color = plot_elements_dark) + #添加自助法得到的中位数值coord_flip() + #对调坐标轴以更好地展示图形scale_y_continuous(breaks = waiver(), n.breaks = 4) + #添加自助法得到的中位数值labs(y = "Contribution [%]", x = "", title = "") +theme_classic() +theme(title = element_text(size = normal_text, face = "bold"),text = element_text(size = normal_text),axis.line.x = element_line(color = graph_elements_dark),axis.line.y = element_blank(),axis.ticks.x = element_line(color = graph_elements_dark),axis.ticks.y = element_blank(),axis.title = element_text(size = title_text),axis.text = element_text(size = normal_text),axis.text.x = element_text(angle = x_angle, vjust = x_adjust),legend.position = "none",legend.title = element_text(size = title_text),legend.text = element_text(size = subtitle_text),legend.key.height = unit(1.0, "mm"),legend.key.width = unit(1.0, "mm"),# plot.margin = unit(c(0, 0, 0, 0), "cm"),strip.text = element_text(face = "bold", size = title_text),strip.background = element_blank()) +NULL
p4# -----------------------------------
# 拼图
library(patchwork)
p2+p3/p4
文中用到的数据代码:R语言绘制 PCA 双标图、碎石图、变量载荷图和变量贡献图(self).zip - 蓝奏云