视频地址:尚硅谷大数据项目《在线教育之离线数仓》_哔哩哔哩_bilibili
目录
第9章 数仓开发之DWD层
P049
P050
P051
P052
P053
P054
P055
P056
P057
P058
P059
P060
P061
P062
P063
P064
P065
P066
P067
P068
P069
P070
第9章 数仓开发之DWD层
P049
第9章 数仓开发之DWD层
DWD层设计要点:
(1)DWD层的设计依据是维度建模理论,该层存储维度模型的事实表。
(2)DWD层的数据存储格式为orc列式存储+snappy压缩。
(3)DWD层表名的命名规范为dwd_数据域_表名_单分区增量全量标识(inc/full)。
-- 9.1 交易域加购事务事实表
DROP TABLE IF EXISTS dwd_trade_cart_add_inc;
CREATE EXTERNAL TABLE dwd_trade_cart_add_inc
(`id` STRING COMMENT '编号',`user_id` STRING COMMENT '用户id',`course_id` STRING COMMENT '课程id',`date_id` STRING COMMENT '时间id',`session_id` STRING COMMENT '会话id',`create_time` STRING COMMENT '加购时间',`cart_price` DECIMAL(16, 2) COMMENT '加购时价格'
) COMMENT '交易域加购事务事实表'PARTITIONED BY (`dt` STRING)STORED AS ORCLOCATION '/warehouse/edu/dwd/dwd_trade_cart_add_inc/'TBLPROPERTIES ('orc.compress' = 'snappy');select *
from edu2077.ods_cart_info_full;select *
from edu2077.ods_cart_info_inc;select data.id,data.user_id,data.course_id,data.date_id,data.session_id,data.create_time,data.cart_price
from edu2077.ods_cart_info_inc
where dt = '2022-02-21'and type = 'bootstrap-insert';set hive.exec.dynamic.partition.mode=nonstrict;--关闭严格模式insert overwrite table edu2077.dwd_trade_cart_add_inc
select data.id,data.user_id,data.course_id,date_format(data.create_time, 'yyyy-MM-dd') date_id,data.session_id,data.create_time,data.cart_price,date_format(data.create_time, 'yyyy-MM-dd') dt
from edu2077.ods_cart_info_inc
where dt = '2022-02-21'and type = 'bootstrap-insert';insert overwrite table edu2077.dwd_trade_cart_add_inc partition (dt = '2022-02-21')
select data.id,data.user_id,data.course_id,date_format(data.create_time, 'yyyy-MM-dd') date_id,data.session_id,data.create_time,data.cart_price
from edu2077.ods_cart_info_inc
where dt = '2022-02-21'and type = 'bootstrap-insert';select *
from dwd_trade_cart_add_inc;--每日装载
insert overwrite table edu2077.dwd_trade_cart_add_inc partition (dt = '2022-02-22')
select data.id,data.user_id,data.course_id,date_format(data.create_time, 'yyyy-MM-dd') date_id,data.session_id,data.create_time,data.cart_price
from edu2077.ods_cart_info_inc
where dt = '2022-02-22'and type = 'insert';
P050
-- 9.2 交易域加购周期快照事实表
DROP TABLE IF EXISTS dwd_trade_cart_full;
CREATE EXTERNAL TABLE dwd_trade_cart_full
(`id` STRING COMMENT '编号',`user_id` STRING COMMENT '用户id',`course_id` STRING COMMENT '课程id',`date_id` STRING COMMENT '时间id',`session_id` STRING COMMENT '会话id',`course_name` STRING COMMENT '课程名称',`create_time` STRING COMMENT '加购时间',`cart_price` DECIMAL(16, 2) COMMENT '加购时价格'
) COMMENT '交易域加购周期快照事实表'PARTITIONED BY (`dt` STRING)STORED AS ORCLOCATION '/warehouse/edu/dwd/dwd_trade_cart_full/'TBLPROPERTIES ('orc.compress' = 'snappy');select * from ods_cart_info_full
where dt = '2022-02-21';--数据装载
insert overwrite table edu2077.dwd_trade_cart_full partition (dt = '2022-02-21')
select id,user_id,course_id,date_format(create_time, 'yyyy-MM-dd'),session_id,course_name,create_time,cart_price
from edu2077.ods_cart_info_full
where dt = '2022-02-21'and deleted = '0'and sold = '0';select * from dwd_trade_cart_full;
P051
P052
P053
--9.3 交易域试听下单累积快照事实表
DROP TABLE IF EXISTS dwd_trade_course_order_inc;
CREATE EXTERNAL TABLE dwd_trade_course_order_inc
(`id` STRING COMMENT '编号',`user_id` STRING COMMENT '用户id',`course_id` STRING COMMENT '课程id',`course_name` STRING COMMENT '课程名称',`category_id` STRING COMMENT '分类id',`category_name` STRING COMMENT '分类名称',`subject_id` STRING COMMENT '科目id',`subject_name` STRING COMMENT '科目名称',`order_id` STRING COMMENT '订单id',`province_id` STRING COMMENT '省份id',`play_time` STRING COMMENT '首次播放时间',`play_date` STRING COMMENT '首次播放日期',`order_time` STRING COMMENT '首次下单时间',`order_date` STRING COMMENT '首次下单日期',`end_date` STRING COMMENT '结束日期,试听后七天内未下单即为结束,试听日期+7为结束日期',`session_id` STRING COMMENT '会话id',`original_amount` DECIMAL(16, 2) COMMENT '原始金额分摊',`coupon_reduce_amount` DECIMAL(16, 2) COMMENT '优惠金额分摊',`final_amount` DECIMAL(16, 2) COMMENT '最终价格分摊'
) COMMENT '交易域试听下单累积快照事实表'PARTITIONED BY (`dt` STRING)STORED AS ORCLOCATION '/warehouse/edu/dwd/dwd_trade_course_order_inc/'TBLPROPERTIES ('orc.compress' = 'snappy');set hive.exec.dynamic.partition.mode=nonstrict;--关闭严格模式select * from ods_user_chapter_process_full;select * from edu2077.ods_order_info_inc;--(1)首日装载
with play as(select min(id) id,user_id,course_id,min(create_time) play_time,date_format(min(create_time), 'yyyy-MM-dd') play_datefrom edu2077.ods_user_chapter_process_fullwhere dt = '2022-02-21'group by user_id, course_id),oi as(select data.id,data.province_id,data.session_idfrom edu2077.ods_order_info_incwhere dt = '2022-02-21'and type = 'bootstrap-insert'),od as(select data.id,data.course_id,data.order_id,data.user_id,data.origin_amount,data.coupon_reduce,data.final_amount,data.create_time order_time,date_format(data.create_time, 'yyyy-MM-dd') order_datefrom edu2077.ods_order_detail_incwhere dt = '2022-02-21'and type = 'bootstrap-insert'),dim_course as (select id,course_name,category_id,category_name,subject_id,subject_namefrom edu2077.dim_course_fullwhere dt = '2022-02-21')
insert
overwrite
table
edu2077.dwd_trade_course_order_inc
partition
(
dt
)
select final.id,user_id,course_id,course_name,category_id,category_name,subject_id,subject_name,order_id,province_id,play_time,play_date,order_time,order_date,end_date,session_id,origin_amount,coupon_reduce,final_amount,casewhen end_date is not null then end_datewhen order_date is not null then order_dateelse '9999-12-31' end dt
from (select play.id,play.user_id,play.course_id,od.order_id,oi.province_id,play.play_time,play.play_date,od.order_time,od.order_date,if(od.order_date is null anddate_add(play.play_date, 7) <= '2022-02-21',date_add(play.play_date, 7), null) end_date,oi.session_id,od.origin_amount,od.coupon_reduce,od.final_amountfrom playleft join od on play.user_id = od.user_id and play.course_id = od.course_idleft join oi on od.order_id = oi.idwhere od.order_time is nullor od.order_time > play.play_time) finalleft join dim_course on course_id = dim_course.id;select * from dwd_trade_course_order_inc;
P054
--9.3 交易域试听下单累积快照事实表
--(2)每日装载
set hive.exec.dynamic.partition.mode=nonstrict;
with play as(select id,user_id,course_id,play_time,play_datefrom edu2077.dwd_trade_course_order_incwhere dt = '9999-12-31'unionselect min(id) id,user_id,course_id,min(create_time),date_format(min(create_time), 'yyyy-MM-dd') play_datefrom edu2077.ods_user_chapter_process_fullwhere dt = '2022-02-22'group by user_id, course_idhaving date_format(min(create_time), 'yyyy-MM-dd') = '2022-02-22'),oi as(select data.id,data.province_id,data.session_id,data.create_time order_timefrom edu2077.ods_order_info_incwhere dt = '2022-02-22'and type = 'insert'),od as(select data.id,data.course_id,data.order_id,data.user_id,data.origin_amount,data.coupon_reduce,data.final_amount,date_format(data.create_time, 'yyyy-MM-dd') order_datefrom edu2077.ods_order_detail_incwhere dt = '2022-02-22'and type = 'insert'),dim_course as(select id,course_name,category_id,category_name,subject_id,subject_namefrom edu2077.dim_course_fullwhere dt = '2022-02-22')
insert
overwrite
table
edu2077.dwd_trade_course_order_inc
partition
(
dt
)
select final.id,user_id,course_id,course_name,category_id,category_name,subject_id,subject_name,order_id,province_id,play_time,play_date,order_time,order_date,end_date,session_id,origin_amount,coupon_reduce,final_amount,casewhen end_date is not null then end_datewhen order_date is not null then order_dateelse '9999-12-31' end dt
from (select play.id,play.user_id,play.course_id,od.order_id,oi.province_id,play.play_time,play.play_date,oi.order_time,od.order_date,if(order_date is null and date_add(play_date, 7) = '2022-02-22', '2022-02-22', null) end_date,oi.session_id,od.origin_amount,od.coupon_reduce,od.final_amountfrom playleft join od on play.user_id = od.user_id and play.course_id = od.course_idleft join oi on od.order_id = oi.idwhere order_time is nullor order_time > play_time) finalleft join dim_course on course_id = dim_course.id;
P055
--9.4 交易域下单事务事实表
DROP TABLE IF EXISTS dwd_trade_order_detail_inc;
CREATE EXTERNAL TABLE dwd_trade_order_detail_inc
(`id` STRING COMMENT '编号',`order_id` STRING COMMENT '订单id',`user_id` STRING COMMENT '用户id',`course_id` STRING COMMENT '课程id',`course_name` STRING COMMENT '课程名称',`category_id` STRING COMMENT '分类id',`category_name` STRING COMMENT '分类名称',`subject_id` STRING COMMENT '科目id',`subject_name` STRING COMMENT '科目名称',`province_id` STRING COMMENT '省份id',`date_id` STRING COMMENT '下单日期id',`session_id` STRING COMMENT '会话id',`source_id` STRING COMMENT '来源id',`create_time` STRING COMMENT '下单时间',`original_amount` DECIMAL(16, 2) COMMENT '原始金额分摊',`coupon_reduce_amount` DECIMAL(16, 2) COMMENT '优惠金额分摊',`final_amount` DECIMAL(16, 2) COMMENT '最终价格分摊',`out_trade_no` STRING COMMENT '订单交易编号',`trade_body` STRING COMMENT '订单描述'
) COMMENT '交易域下单事务事实表'PARTITIONED BY (`dt` STRING)STORED AS ORCLOCATION '/warehouse/edu/dwd/dwd_trade_order_detail_inc/'TBLPROPERTIES ('orc.compress' = 'snappy');select * from edu2077.ods_order_detail_inc where dt = '2022-02-21';--(1)首日装载
set hive.exec.dynamic.partition.mode=nonstrict;insert overwrite table edu2077.dwd_trade_order_detail_incpartition (dt)
select odt.id,order_id,user_id,course_id,course_name,category_id,category_name,subject_id,subject_name,province_id,date_id,session_id,source_id,create_time,origin_amount,coupon_reduce,final_amount,out_trade_no,trade_body,date_id
from (select data.id,data.order_id,data.user_id,data.course_id,date_format(data.create_time, 'yyyy-MM-dd') date_id,data.create_time,data.origin_amount,data.coupon_reduce,data.final_amountfrom edu2077.ods_order_detail_incwhere dt = '2022-02-21'and type = 'bootstrap-insert') odtleft join(select data.id,data.province_id,data.out_trade_no,data.session_id,data.trade_bodyfrom edu2077.ods_order_info_incwhere dt = '2022-02-21'and type = 'bootstrap-insert') odon odt.order_id = od.idleft join(select distinct common.sid,common.sc source_idfrom edu2077.ods_log_inc oliwhere dt = '2022-02-21') logon od.session_id = log.sidleft join(select id,course_name,category_id,category_name,subject_id,subject_namefrom edu2077.dim_course_fullwhere dt = '2022-02-21') dim_courseon course_id = dim_course.id;
P056
--9.4 交易域下单事务事实表
--(2)每日装载
insert overwrite table edu2077.dwd_trade_order_detail_incpartition (dt = '2022-02-22')
select odt.id,order_id,user_id,course_id,course_name,category_id,category_name,subject_id,subject_name,province_id,date_id,session_id,source_id,create_time,origin_amount,coupon_reduce,final_amount,out_trade_no,trade_body
from (select data.id,data.order_id,data.user_id,data.course_id,date_format(data.create_time, 'yyyy-MM-dd') date_id,data.create_time,data.origin_amount,data.coupon_reduce,data.final_amountfrom edu2077.ods_order_detail_incwhere dt = '2022-02-22'and type = 'insert') odtleft join(select data.id,data.province_id,data.session_id,data.out_trade_no,data.trade_bodyfrom edu2077.ods_order_info_incwhere dt = '2022-02-22'and type = 'insert') odon odt.order_id = od.idleft join(select distinct common.sid,common.sc source_idfrom edu2077.ods_log_inc oliwhere dt = '2022-02-22') logon od.session_id = log.sidleft join(select id,course_name,category_id,category_name,subject_id,subject_namefrom edu2077.dim_course_fullwhere dt = '2022-02-22') dim_courseon course_id = dim_course.id;
P057
--9.5 交易域支付成功事务事实表DROP TABLE IF EXISTS dwd_trade_pay_detail_suc_inc;
CREATE EXTERNAL TABLE dwd_trade_pay_detail_suc_inc
(`id` STRING COMMENT '编号',`order_id` STRING COMMENT '订单id',`user_id` STRING COMMENT '用户id',`course_id` STRING COMMENT '课程id',`province_id` STRING COMMENT '省份id',`date_id` STRING COMMENT '支付日期id',`alipay_trade_no` STRING COMMENT '支付宝交易编号',`trade_body` STRING COMMENT '交易内容',`payment_type` STRING COMMENT '支付类型名称',`payment_status` STRING COMMENT '支付状态',`callback_time` STRING COMMENT '支付成功时间',`callback_content` STRING COMMENT '回调信息',`original_amount` DECIMAL(16, 2) COMMENT '原始支付金额分摊',`coupon_reduce_amount` DECIMAL(16, 2) COMMENT '优惠支付金额分摊',`final_amount` DECIMAL(16, 2) COMMENT '最终支付金额分摊'
) COMMENT '交易域支付成功事务事实表'PARTITIONED BY (`dt` STRING)STORED AS ORCLOCATION '/warehouse/edu/dwd/dwd_trade_pay_detail_suc_inc/'TBLPROPERTIES ('orc.compress' = 'snappy');--(1)首日装载
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table edu2077.dwd_trade_pay_detail_suc_incpartition (dt)
select odt.id,od.id,user_id,course_id,province_id,date_format(create_time, 'yyyy-MM-dd') date_id,alipay_trade_no,trade_body,payment_type,payment_status,callback_time,callback_content,origin_amount,coupon_reduce,final_amount,date_format(create_time, 'yyyy-MM-dd') date_id
from (select data.id,data.order_id,data.user_id,data.course_id,data.origin_amount,data.coupon_reduce,data.final_amount,data.create_timefrom edu2077.ods_order_detail_incwhere dt = '2022-02-21'and type = 'bootstrap-insert') odtleft join(select data.id,data.province_idfrom edu2077.ods_order_info_incwhere dt = '2022-02-21'and type = 'bootstrap-insert') odon odt.order_id = od.idjoin(select data.alipay_trade_no,data.trade_body,data.order_id,data.payment_type,data.payment_status,data.callback_time,data.callback_contentfrom edu2077.ods_payment_info_incwhere dt = '2022-02-21'and type = 'bootstrap-insert'and data.callback_time is not null) pion od.id = pi.order_id;select * from dwd_trade_pay_detail_suc_inc;
P058
--9.5 交易域支付成功事务事实表
-- (2)每日装载
insert overwrite table edu2077.dwd_trade_pay_detail_suc_incpartition (dt = '2022-02-22')
select odt.id,od.id,user_id,course_id,province_id,date_format(create_time, 'yyyy-MM-dd') date_id,alipay_trade_no,trade_body,payment_type,payment_status,callback_time,callback_content,origin_amount,coupon_reduce,final_amount
from (select data.id,data.order_id,data.user_id,data.course_id,data.origin_amount,data.coupon_reduce,data.final_amount,data.create_timefrom edu2077.ods_order_detail_incwhere (dt = '2022-02-22' or dt = date_add('2022-02-22', -1))and (type = 'insert' or type = 'bootstrap-insert')) odtleft join(select data.id,data.province_idfrom edu2077.ods_order_info_incwhere (dt = '2022-02-22' or dt = date_add('2022-02-22', -1))and (type = 'insert' or type = 'bootstrap-insert')) odon odt.order_id = od.idjoin(select data.alipay_trade_no,data.trade_body,data.order_id,data.payment_type,data.payment_status,data.callback_time,data.callback_contentfrom edu2077.ods_payment_info_incwhere dt = '2022-02-22'and type = 'update'and array_contains(map_keys(old), 'callback_time')) pion od.id = pi.order_id;
P059
9.6 流量域页面浏览事务事实表
P060
9.7 流量域启动事务事实表
P061
9.8 流量域动作事务事实表
9.9 流量域曝光事务事实表
9.10 流量域错误事务事实表
P062
9.11 互动域收藏事务事实表
P063
9.12 互动域章节评价事务事实表
9.13 互动域课程评价事务事实表
P064
9.14 考试域答卷事务事实表
9.15 考试域答题事务事实表
P065
9.16 学习域播放周期快照事实表
(1)首日装载
P066
9.16 学习域播放周期快照事实表
(2)每日装载
P067
9.17 学习域播放事务事实表
P068
9.18 用户域用户注册事务事实表
P069
9.19 用户域用户登录事务事实表
P070
9.20 数据装载脚本
9.20.1 首日装载脚本
9.20.2 每日装载脚本