
加载数据 绘制茎叶图 绘制直方图 绘制概率密度曲线 绘制小提琴图 绘制箱线图 绘制小提琴图箱线图 集中趋势统计 分散程度 apply的使用
加载数据
模拟数据下载
library(tidyverse)
cjb <- read.csv("/home/wy/Downloads/cjb.csv",header = TRUE,stringsAsFactors = FALSE,fileEncoding = "UTF-8")绘制茎叶图
cjb %>% filter(bj == '1101') %>%select(sx) %>%as_vector() %>%stem()
5 | 57996 | 00146 | 557897 | 0000111223344447 | 7888998 | 1112223344448 | 5899 | 224绘制直方图
sx_hist_result = hist(cjb$sx,plot = FALSE)
typeof(sx_hist_result)
names(sx_hist_result)
# 使用ggplot绘制与hist相同的直方图
ggplot(data = cjb,mapping = aes(sx))+geom_histogram(breaks = sx_hist_result$breaks,color = "darkgray",fill = "white")+stat_bin(breaks = sx_hist_result$breaks,geom = "text",aes(label = ..count..))+coord_flip()
绘制概率密度曲线
ggplot(data = cjb,mapping = aes(sx))+geom_histogram(breaks = sx_hist_result$breaks,color = "darkgray",fill = "white",aes(y = ..density.. ))+geom_density(color = 'blue')
绘制小提琴图
ggplot(cjb,aes(x=factor(0),y=sx))+geom_violin(fill="orange",alpha=0.2)+coord_flip()
绘制箱线图

cjb %>%ggplot(aes(x=factor(0),y=sx))+geom_boxplot(width=0.25,fill = "#E69F00",outlier.colour = "red",outlier.shape = 1,outlier.size = .2)+geom_rug(position = "jitter",size=0.1,sides = "l")+coord_flip()
boxplot.stats(cjb$sx)
# $stats 下边界 一分位距 中位数 三分位距 上边界
# [1]  60  81  89  95 100
# $n 数据记录数
# [1] 775
# $conf
# [1] 88.20543 89.79457
# $out 异常点
# [1] 55 59 57 59 58 51 56 55 59 26 58 46  0 59 59绘制小提琴图+箱线图
cjb %>%ggplot(aes(x=factor(0),y=sx)) +geom_violin(fill="#56B4E9",width=0.75) +geom_boxplot(width=0.25,fill = "#E69F00",outlier.colour = "red",outlier.shape = 1,outlier.size = 2)+geom_rug(position = "jitter",size=0.1,sides = "l")+coord_flip()
集中趋势统计
cjb %>%group_by(wlfk) %>% # 按文理分科分组统计summarise(count = n(), # 各组人数sx_median = median(sx), # 中位数sx_mean = mean(sx) # 均值)
# A tibble: 2 x 4
# wlfk  count sx_median sx_mean
# <chr> <int>     <dbl>   <dbl>
# 1 文科    394        84    82.7
# 2 理科    381        93    89.5分散程度
cjb %>%group_by(wlfk) %>% # 按文理分科分组统计summarise(sx_max = max(sx), # 最大值sx_min = min(sx), # 最小值sx_range = max(sx) - min(sx) # 极差)# A tibble: 2 x 4
# wlfk  sx_max sx_min sx_range
# <chr>  <int>  <int>    <int>
# 1 文科     100     26       74
# 2 理科     100      0      100
cjb %>%group_by(wlfk) %>% # 按文理分科分组统计summarise(sx_O3 = quantile(sx,3/4), # 第三分位数sx_min = quantile(sx,1/4), # 第一分位数sx_iqr = IQR(sx) # 四分位距)
# A tibble: 2 x 4
# wlfk  sx_O3 sx_min sx_iqr
# <chr> <dbl>  <dbl>  <dbl>
# 1 文科     92     75     17
# 2 理科     96     86     10apply的使用
round(apply(cjb[,4:12], 2, function(x){c(mean = mean(x),median = median(x),range = diff(range(x)),IQR = IQR(x))
}))
# yw  sx wy  zz  ls  dl  wl  hx  sw
# mean   87  86 87  92  89  93  81  92  86
# median 88  89 88  93  90  94  83  94  88
# range  96 100 99 100 100 100 100 100 100
# IQR     6  14  8   5  10   6  17  10  12