文章目录 数据诊断分析(均值方差) Matlab代码实现 结果展示
数据诊断分析(均值方差)
均值方差检测是一种简单但有效的异常检测方法,主要基于样本的均值和方差的统计信息。该方法的核心思想是假设正常的样本点应该聚集在某个区域,而异常点则可能远离这个区域。 3 σ 3\sigma 3 σ 准则:是一种统计学中常用的规则,用于衡量数据集中的值离均值的距离。该规则基于正态分布的性质,提供了一个衡量数据集中值的离散程度的指标。具体来说, 3 σ 3\sigma 3 σ 准则表明: 对于一个正态分布的数据集,约有 68% 的数据值会落在均值的一个标准差范围内。 约有 95% 的数据值会落在均值的两个标准差范围内。 约有 99.7% 的数据值会落在均值的三个标准差范围内。
Matlab代码实现
clear; clc; close all
load ( '.\data\daily_sia_7920.mat' ) ;
sia= min ( cdr_sia./ 1e11 ) ;
for ii= 1 : 42 sia_day ( ii) = find ( cdr_sia ( : , ii) ./ 1e11 == sia ( ii) ) ;
end msia= mean ( sia_day) ;
ssia= std ( sia_day) ; siz= 25 ; lind= 1.5 ;
x_0= 0.10 ;
y_0= 0.70 ;
len= 0.85 ;
width= 0.25 ;
d_x= 0.32 ;
d_y= - 0.27 ;
px= [ 0 0 0 0 ] ;
py= [ 0 1 2 3 ] ;
set ( gcf, 'color' , [ 1 1 1 ] , 'position' , [ 10 45 800 800 * 1.2 ] ) ;
axes ( 'position' , [ x_0+ d_x* px ( 1 ) , y_0+ d_y* py ( 1 ) , len, width] ) ;
plot ( sia_day, 'k-*' , 'linewidth' , lind) ; hold on
plot ( [ 1 42 ] , [ msia, msia] , 'k-' , 'linewidth' , lind) ; hold on
plot ( [ 1 42 ] , [ msia+ ssia, msia+ ssia] , 'k--' , 'linewidth' , lind) ; hold on
plot ( [ 1 42 ] , [ msia- ssia, msia- ssia] , 'k--' , 'linewidth' , lind) ; hold on
C1= sia_day; C1 ( C1> msia- ssia) = nan;
C3= sia_day; C3 ( C3< msia+ ssia) = nan;
scatter ( [ 1 : 42 ] , C1, 60 , 'b' , 'filled' , 's' ) ; hold on
scatter ( [ 1 : 42 ] , C3, 60 , 'r' , 'filled' , '^' ) ; hold on
set ( gca, 'linewidth' , lind) ; grid on
set ( gca, 'xlim' , [ 0 43 ] , 'xtick' , 1 : 5 : 42 , 'xticklabel' , '' , ... 'fontname' , 'Times New Roman' , 'FontSize' , siz- 10 , 'fontweight' , 'bold' )
set ( gca, 'ylim' , [ 210 280 ] , 'ytick' , 210 : 20 : 280 , 'yticklabel' , num2str ( [ 210 : 20 : 280 ] ',' 'Times New Roman' , 'FontSize' , siz- 10 , 'fontweight' , 'bold' ) ;
ylabel ( 'Day of year (d)' , 'fontname' , 'Times New Roman' , ... 'FontSize' , siz- 10 , 'fontweight' , 'bold' ) ;
hh= get ( gca) ;
X= hh. XLim;
Y= hh. YLim;
k1= [ 0.03 0.8 ] ;
k2= [ 0.3 0.8 ] ;
k3= [ 0.03 0.9 ] ;
x_2= X ( 1 ) + k2 ( 1 ) * ( X ( 2 ) - X ( 1 ) ) ;
y_2= Y ( 1 ) + k2 ( 2 ) * ( Y ( 2 ) - Y ( 1 ) ) ;
x_3= X ( 1 ) + k3 ( 1 ) * ( X ( 2 ) - X ( 1 ) ) ;
y_3= Y ( 1 ) + k3 ( 2 ) * ( Y ( 2 ) - Y ( 1 ) ) ;
text ( double ( x_3) , double ( y_3) , '(a)' , 'color' , 'k' , 'fontname' , ... 'Times New Roman' , 'fontweight' , 'bold' , 'fontsize' , siz- 10 ) ;
load ( '.\data\BFT_wind2_daily_7920.mat' )
ue= u;
uc= mean ( u) ;
stdc= std ( u) ;
data1= ue- uc;
data2= ue- uc- stdc;
data3= ue- uc- 2 * stdc;
data1 ( data1< 0 ) = nan; data1 ( ~ isnan ( data1) ) = 1 ;
data2 ( data2< 0 ) = nan; data2 ( ~ isnan ( data2) ) = 1 ;
data3 ( data3< 0 ) = nan; data3 ( ~ isnan ( data3) ) = 1 ;
mon= [ 31 28 31 30 31 30 31 31 30 31 30 31 ] ;
mona= [ 1 , 32 , 60 , 91 , 121 , 152 , 182 , 213 , 244 , 274 , 305 , 335 ] ;
monb= [ 31 , 59 , 90 , 120 , 151 , 181 , 212 , 243 , 273 , 304 , 334 , 365 ] ;
barmap1= [ 190 223 235 ; 0 190 255 ; 26 26 210 ] ./ 255 ;
for mm= 1 : 12 datam= nansum ( data1 ( mona ( mm) : monb ( mm) , : ) , 1 ) ; datas1= nansum ( data2 ( mona ( mm) : monb ( mm) , : ) , 1 ) ; datas2= nansum ( data3 ( mona ( mm) : monb ( mm) , : ) , 1 ) ; datad0 ( mm, : ) = datam- datas1; datad1 ( mm, : ) = datas1- datas2; datad2 ( mm, : ) = datas2;
end
data590= nansum ( datad0 ( 5 : 9 , : ) , 1 ) ;
data591= nansum ( datad1 ( 5 : 9 , : ) , 1 ) ;
data592= nansum ( datad2 ( 5 : 9 , : ) , 1 ) ; axes ( 'position' , [ x_0+ d_x* px ( 2 ) , y_0+ d_y* py ( 2 ) , len, width] ) ;
ch= bar ( [ data590; data591; data592] ',' stacked' ) ;
grid on
set ( ch ( 1 ) , 'FaceColor' , barmap1 ( 1 , : ) ) ;
set ( ch ( 2 ) , 'FaceColor' , barmap1 ( 2 , : ) ) ;
set ( ch ( 3 ) , 'FaceColor' , barmap1 ( 3 , : ) ) ;
set ( gca, 'linewidth' , lind, 'ylim' , [ 0 120 ] ) ;
ylabel ( 'Days yr^{-1}' , 'fontname' , 'Times New Roman' , 'FontSize' , siz- 10 , 'fontweight' , 'bold' ) ; text ( 15 , 110 , { '>0\sigma days' } , 'color' , barmap1 ( 1 , : ) , 'fontname' , ... 'Times New Roman' , 'fontweight' , 'bold' , 'fontsize' , siz- 10 ) ;
text ( 15 , 100 , { '≥1\sigma days' } , 'color' , barmap1 ( 2 , : ) , 'fontname' , ... 'Times New Roman' , 'fontweight' , 'bold' , 'fontsize' , siz- 10 ) ;
text ( 15 , 90 , { '≥2\sigma days' } , 'color' , barmap1 ( 3 , : ) , 'fontname' , ... 'Times New Roman' , 'fontweight' , 'bold' , 'fontsize' , siz- 10 ) ;
set ( gca, 'xlim' , [ 0 43 ] , 'xtick' , [ 1 : 5 : 42 ] , 'xticklabel' , '' , 'Fontname' , ... 'Times New Roman' , 'FontSize' , siz- 10 , 'fontweight' , 'bold' )
hh= get ( gca) ;
X= hh. XLim;
Y= hh. YLim;
k1= [ 0.03 0.8 ] ;
k2= [ 0.3 0.8 ] ;
k3= [ 0.03 0.9 ] ;
x_2= X ( 1 ) + k2 ( 1 ) * ( X ( 2 ) - X ( 1 ) ) ;
y_2= Y ( 1 ) + k2 ( 2 ) * ( Y ( 2 ) - Y ( 1 ) ) ;
x_3= X ( 1 ) + k3 ( 1 ) * ( X ( 2 ) - X ( 1 ) ) ;
y_3= Y ( 1 ) + k3 ( 2 ) * ( Y ( 2 ) - Y ( 1 ) ) ;
text ( double ( x_3) , double ( y_3) , '(b)' , 'color' , 'k' , 'fontname' , ... 'Times New Roman' , 'fontweight' , 'bold' , 'fontsize' , siz- 10 ) ;
load ( '.\data\BFT_wind2_daily_7920.mat' )
ue= u;
uc= mean ( u) ;
stdc= std ( u) ;
data1= ue- uc;
data2= ue- uc- stdc;
data3= ue- uc- 2 * stdc;
data1 ( data1< 0 ) = nan; data1 ( ~ isnan ( data1) ) = 1 ;
data2 ( data2< 0 ) = nan; data2 ( ~ isnan ( data2) ) = 1 ;
data3 ( data3< 0 ) = nan; data3 ( ~ isnan ( data3) ) = 1 ;
barmap2= [ 178 38 38 ; 255 27 255 ; 221 159 221 ; 255 195 204 ; 187 143 143 ] ./ 255 ;
for yy= 1 : 42 for mm= 1 : 12 events3= 0 ; for aa= mona ( mm) : monb ( mm) - 2 if ~ isnan ( data2 ( aa, yy) ) & ~ isnan ( data2 ( aa+ 1 , yy) ) & ~ isnan ( data2 ( aa+ 2 , yy) ) events3= events3+ 1 ; end Events3 ( mm, yy) = events3; end events4= 0 ; for aa= mona ( mm) : monb ( mm) - 3 if ~ isnan ( data2 ( aa, yy) ) & ~ isnan ( data2 ( aa+ 1 , yy) ) & ~ isnan ( data2 ( aa+ 2 , yy) ) & ~ isnan ( data2 ( aa+ 3 , yy) ) events4= events4+ 1 ; end Events4 ( mm, yy) = events4; end events5= 0 ; for aa= mona ( mm) : monb ( mm) - 4 if ~ isnan ( data2 ( aa, yy) ) & ~ isnan ( data2 ( aa+ 1 , yy) ) & ~ isnan ( data2 ( aa+ 2 , yy) ) & ~ isnan ( data2 ( aa+ 3 , yy) ) ... & ~ isnan ( data2 ( aa+ 4 , yy) ) events5= events5+ 1 ; end Events5 ( mm, yy) = events5; end events6= 0 ; for aa= mona ( mm) : monb ( mm) - 5 if ~ isnan ( data2 ( aa, yy) ) & ~ isnan ( data2 ( aa+ 1 , yy) ) & ~ isnan ( data2 ( aa+ 2 , yy) ) & ~ isnan ( data2 ( aa+ 3 , yy) ) ... & ~ isnan ( data2 ( aa+ 4 , yy) ) & ~ isnan ( data2 ( aa+ 5 , yy) ) events6= events6+ 1 ; end Events6 ( mm, yy) = events6; end events7= 0 ; for aa= mona ( mm) : monb ( mm) - 6 if ~ isnan ( data2 ( aa, yy) ) & ~ isnan ( data2 ( aa+ 1 , yy) ) & ~ isnan ( data2 ( aa+ 2 , yy) ) & ~ isnan ( data2 ( aa+ 3 , yy) ) ... & ~ isnan ( data2 ( aa+ 4 , yy) ) & ~ isnan ( data2 ( aa+ 5 , yy) ) & ~ isnan ( data2 ( aa+ 6 , yy) ) events7= events7+ 1 ; end Events7 ( mm, yy) = events7; end end
end
datas3= nansum ( Events3 ( 5 : 9 , : ) , 1 ) ;
datas4= nansum ( Events4 ( 5 : 9 , : ) , 1 ) ;
datas5= nansum ( Events5 ( 5 : 9 , : ) , 1 ) ;
datas6= nansum ( Events6 ( 5 : 9 , : ) , 1 ) ;
datas7= nansum ( Events7 ( 5 : 9 , : ) , 1 ) ; axes ( 'position' , [ x_0+ d_x* px ( 3 ) , y_0+ d_y* py ( 3 ) , len, width] ) ;
ch= bar ( [ datas3; datas4; datas5; datas6; datas7] ',' stacked' ) ;
grid on
set ( ch ( 1 ) , 'FaceColor' , barmap2 ( 1 , : ) ) ;
set ( ch ( 2 ) , 'FaceColor' , barmap2 ( 2 , : ) ) ;
set ( ch ( 3 ) , 'FaceColor' , barmap2 ( 3 , : ) ) ;
set ( ch ( 4 ) , 'FaceColor' , barmap2 ( 4 , : ) ) ;
set ( ch ( 5 ) , 'FaceColor' , barmap2 ( 5 , : ) ) ; set ( gca, 'linewidth' , lind, 'ylim' , [ 0 60 ] ) ;
ylabel ( 'Events yr^{-1}' , 'fontname' , 'Times New Roman' , 'FontSize' , siz- 10 , 'fontweight' , 'bold' ) ;
text ( 15 , 58 , { '3-days events' } , 'color' , barmap2 ( 1 , : ) , 'fontname' , ... 'Times New Roman' , 'fontweight' , 'bold' , 'fontsize' , siz- 10 ) ;
text ( 15 , 53 , { '4-days events' } , 'color' , barmap2 ( 2 , : ) , 'fontname' , ... 'Times New Roman' , 'fontweight' , 'bold' , 'fontsize' , siz- 10 ) ;
text ( 15 , 48 , { '5-days events' } , 'color' , barmap2 ( 3 , : ) , 'fontname' , ... 'Times New Roman' , 'fontweight' , 'bold' , 'fontsize' , siz- 10 ) ;
text ( 15 , 43 , { '6-days events' } , 'color' , barmap2 ( 4 , : ) , 'fontname' , ... 'Times New Roman' , 'fontweight' , 'bold' , 'fontsize' , siz- 10 ) ;
text ( 15 , 38 , { '7-days events' } , 'color' , barmap2 ( 5 , : ) , 'fontname' , ... 'Times New Roman' , 'fontweight' , 'bold' , 'fontsize' , siz- 10 ) ;
set ( gca, 'xlim' , [ 0 43 ] , 'xtick' , [ 1 : 5 : 42 ] , 'xticklabel' , 1979 : 5 : 2020 , 'Fontname' , ... 'Times New Roman' , 'FontSize' , siz- 10 , 'fontweight' , 'bold' )
hh= get ( gca) ;
X= hh. XLim;
Y= hh. YLim;
k1= [ 0.03 0.8 ] ;
k2= [ 0.3 0.8 ] ;
k3= [ 0.03 0.9 ] ;
x_2= X ( 1 ) + k2 ( 1 ) * ( X ( 2 ) - X ( 1 ) ) ;
y_2= Y ( 1 ) + k2 ( 2 ) * ( Y ( 2 ) - Y ( 1 ) ) ;
x_3= X ( 1 ) + k3 ( 1 ) * ( X ( 2 ) - X ( 1 ) ) ;
y_3= Y ( 1 ) + k3 ( 2 ) * ( Y ( 2 ) - Y ( 1 ) ) ;
text ( double ( x_3) , double ( y_3) , '(c)' , 'color' , 'k' , 'fontname' , ... 'Times New Roman' , 'fontweight' , 'bold' , 'fontsize' , siz- 10 ) ;
结果展示