目录
0 问题背景
1 数据准备
2 问题解决
2.1 模型构建
(1)符号规定
(2)基本假设
(3)模型的分析与建立
2.2 模型求解
3 小结
0 问题背景
1960年—1985年全国社会商品零售额如图1 所示
表1全国社会商品零售额数据
年份 | 1960 | 1961 | 1962 | 1963 | 1964 | 1965 | 1966 | 1967 |
零售总额 | 696.6 | 607.7 | 604 | 604.5 | 638.2 | 670.3 | 732.8 | 770.5 |
年份 | 1968 | 1969 | 1970 | 1971 | 1972 | 1973 | 1974 | 1975 |
零售总额 | 737.3 | 801.5 | 858 | 929.2 | 1023.3 | 1106.7 | 1163.6 | 1271.1 |
年份 | 1976 | 1977 | 1978 | 1979 | 1980 | 1981 | 1982 |
|
零售总额 | 1339.4 | 1432.8 | 1558.6 | 1800 | 2140 | 2350 | 2570 |
|
问题:试用三次指数平滑法预测1983年和1985年全国社会商品零售额?
1 数据准备
create table sale_amount as
select '1960' years, '696.6' sale_amount from dual union all
select '1961' years, '607.7' sale_amount from dual union all
select '1962' years, '604' sale_amount from dual union all
select '1963' years, '604.5' sale_amount from dual union all
select '1964' years, '638.2' sale_amount from dual union all
select '1965' years, '670.3' sale_amount from dual union all
select '1966' years, '732.8' sale_amount from dual union all
select '1967' years, '770.5' sale_amount from dual union all
select '1968' years, '737.3' sale_amount from dual union all
select '1969' years, '801.5' sale_amount from dual union all
select '1970' years, '858' sale_amount from dual union all
select '1971' years, '929.2' sale_amount from dual union all
select '1972' years, '1023.3' sale_amount from dual union all
select '1973' years, '1106.7' sale_amount from dual union all
select '1974' years, '1163.6' sale_amount from dual union all
select '1975' years, '1271.1' sale_amount from dual union all
select '1976' years, '1339.4' sale_amount from dual union all
select '1977' years, '1432.8' sale_amount from dual union all
select '1978' years, '1558.6' sale_amount from dual union all
select '1979' years, '1800' sale_amount from dual union all
select '1980' years, '2140' sale_amount from dual union all
select '1981' years, '2350' sale_amount from dual union all
select '1982' years, '2570' sale_amount from dual
2 问题解决
2.1 模型构建
(1)符号规定
(2)基本假设
- 假设本问题考虑全社会商品零售额数据;
- 假设本问题只考虑销售,不考虑其余因素
- 假设本问题只考虑销售额总额,不考虑其余分支
(3)模型的分析与建立
令加权系数,则计算公式为
其中, 表示一次指数的平滑值;表示二次次指数的平滑值;表示三次指数的平滑值。初始值为
三次指数平滑法的预测模型为:
其中,
2.2 模型求解
步骤1:计算初始值
select years, sale_amount, last_value(init_sale_amount ignore nulls) over (order by YEARS) init_sale_amount, rn
from (select years, sale_amount, casewhen rn = 1 then cast(avg(sale_amount)over (order by years rows between current row and 2 following ) as decimal(18, 1)) end init_sale_amount, rnfrom (select years, sale_amount, row_number() over (order by years) rnfrom sale_amount) t) t
步骤2 :计算一次平滑值
with init as (select years, sale_amount, last_value(init_sale_amount ignore nulls) over (order by YEARS) init_sale_amount, rnfrom (select years, sale_amount, casewhen rn = 1 then cast(avg(sale_amount)over (order by years rows between current row and 2 following ) as decimal(18, 1)) end init_sale_amount, rnfrom (select years, sale_amount, row_number() over (order by years) rnfrom sale_amount) t) t
)
--计算一次平滑值, s1 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, cast(sum(case when t2.rn <= t1.rn then t2.sale_amount * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s1_p3from init t1,init t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn
)
select * from s1 order by years;
步骤3:计算二次平滑值
with init as (select years, sale_amount, last_value(init_sale_amount ignore nulls) over (order by YEARS) init_sale_amount, rnfrom (select years, sale_amount, casewhen rn = 1 then cast(avg(sale_amount)over (order by years rows between current row and 2 following ) as decimal(18, 1)) end init_sale_amount, rnfrom (select years, sale_amount, row_number() over (order by years) rnfrom sale_amount) t) t
)
--计算一次平滑值, s1 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, cast(sum(case when t2.rn <= t1.rn then t2.sale_amount * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s1_p3from init t1,init t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn
)
--计算二次平滑值
, s2 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s1_p3, cast(sum(case when t2.rn <= t1.rn then t2.s1_p3 * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s2_p3from s1 t1,s1 t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s1_p3
)
select * from s2 order by years;
步骤4:计算三次平滑值
with init as (select years, sale_amount, last_value(init_sale_amount ignore nulls) over (order by YEARS) init_sale_amount, rnfrom (select years, sale_amount, casewhen rn = 1 then cast(avg(sale_amount)over (order by years rows between current row and 2 following ) as decimal(18, 1)) end init_sale_amount, rnfrom (select years, sale_amount, row_number() over (order by years) rnfrom sale_amount) t) t
)
--计算一次平滑值, s1 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, cast(sum(case when t2.rn <= t1.rn then t2.sale_amount * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s1_p3from init t1,init t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn
)
--计算二次平滑值
, s2 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s1_p3, cast(sum(case when t2.rn <= t1.rn then t2.s1_p3 * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s2_p3from s1 t1,s1 t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s1_p3
)--计算三次平滑值
,s3 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s2_p3, cast(sum(case when t2.rn <= t1.rn then t2.s2_p3 * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s3_p3from s2 t1,s2 t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s2_p3
)
select * from s3 order by years;
步骤4:计算二次函数模型系数
with init as (select years, sale_amount, last_value(init_sale_amount ignore nulls) over (order by YEARS) init_sale_amount, rnfrom (select years, sale_amount, casewhen rn = 1 then cast(avg(sale_amount)over (order by years rows between current row and 2 following ) as decimal(18, 1)) end init_sale_amount, rnfrom (select years, sale_amount, row_number() over (order by years) rnfrom sale_amount) t) t
)
--计算一次平滑值, s1 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, cast(sum(case when t2.rn <= t1.rn then t2.sale_amount * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s1_p3from init t1,init t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn
)
--计算二次平滑值
, s2 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s1_p3, cast(sum(case when t2.rn <= t1.rn then t2.s1_p3 * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s2_p3from s1 t1,s1 t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s1_p3
)--计算三次平滑值
,s3 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s1_p3, t1.s2_p3, cast(sum(case when t2.rn <= t1.rn then t2.s2_p3 * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s3_p3from s2 t1,s2 t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s1_p3, t1.s2_p3
)--计算二次趋势模型系数
select years, sale_amount, init_sale_amount, rn, s1_p3, s2_p3, s3_p3, cast(case when rk=1 then 3*s1_p3 - 3*s2_p3 + s3_p3 else 0 end as decimal(18,4)) a_p3, cast(case when rk=1 then ((6-5*0.3)*s1_p3 - 2*(5-4*0.3)*s2_p3 + (4-3*0.3)*s3_p3 ) * 0.3/(2*power(0.7,2)) else 0 end as decimal(18,2)) b_p3, cast(case when rk=1 then (s1_p3 - 2*s2_p3 + s3_p3 ) * power(0.3,2)/(2*power(0.7,2)) else 0 end as decimal(18,4)) c_p3
from (select years, sale_amount, init_sale_amount, rn, s1_p3, s2_p3, s3_p3, row_number() over (order by rn desc) rkfrom s3) t
order by years
步骤5:构建二次预测模型,并预测结果值
由步骤4得知:
a=2572.2607,b=259.3367,c=8.9818
则预测模型为:
最后求得1983,1985年销售额的预测值分别是2840.5792亿元,3431.107亿元。
3 小结
本文针对商品零售额采用三次指数平滑法构建预测模型,文中选取加权系数 求解模型,并利用SQL语言进行实现,若实际中有相关需求,可针对加权系数再进行优化,利用RMSE均方根误差来使模型达到最优。
如果您觉得本文还不错,对你有帮助,那么不妨可以关注一下我的数字化建设实践之路专栏,这里的内容会更精彩。
专栏 原价99,现在活动价59.9,按照阶梯式增长,还差5个人上升到69.9,最终恢复到原价。
专栏优势:
(1)一次收费持续更新。
(2)实战中总结的SQL技巧,帮助SQLBOY 在SQL语言上有质的飞越,无论你应对业务难题及面试都会游刃有余【全网唯一讲SQL实战技巧,方法独特】
(3)实战中数仓建模技巧总结,让你认识不一样的数仓。【数据建模+业务建模,不一样的认知体系】(如果只懂数据建模而不懂业务建模,数仓体系认知是不全面的)
(4)数字化建设当中遇到难题解决思路及问题思考。
我的 专栏具体链接如下:
数字化建设通关指南_莫叫石榴姐的博客-CSDN博客
https://blog.csdn.net/godlovedaniel/category_12706766.html