补充月份,这个需求是计算每个小组前三个月的平均产能,前三个月指的是都是自然月,比如现在是2023年7月,所以现在计算的前三个月的平均产能就是2023年4月到6月的平均产能,但是这些月份可能不是连续的
方法一:把日期补齐
WITH t1 AS (SELECT group_id,group_name,all_pert,stat_monthFROM dws_group_user_month_statWHERE dt = '2023-07-10'and group_name is not nulland group_id = '00a62545-e66f-4961-9f39-8f4b5c894d19'
)
,months_table AS (SELECT group_name,group_id,min_month,max_month,add_months(min_month, i) as cur_monthFROM (SELECT group_name,group_id,min(stat_month) AS min_month,max(stat_month) AS max_monthFROM t1
-- WHERE group_id = '508b3e79-6e9a-446a-ac29-1f4428a006af'GROUP BY group_name, group_id) t1LATERAL VIEW posexplode(split(space(cast(months_between(max_month, min_month) as int)), ' ')) pe AS i, num
)
select group_name,group_id,cur_month,min_month,max_month,all_pert,AVG(all_pert) OVER (PARTITION BY group_name, group_id ORDER BY cur_month ROWS BETWEEN 3 PRECEDING AND 1 PRECEDING) AS average_sales
from (SELECT months_table.group_name,months_table.group_id,cur_month,min_month,max_month,nvl(all_pert, 0) all_pertFROM t1right JOIN months_tableON t1.group_name = months_table.group_nameand t1.stat_month = cur_month)t1;
方法二:
将数据炸开成三份
with t1 AS (
selectmonth_diff,stat_month,all_pert,group_name
from dws_group_month_stat lateral view explode(array(1,2,3)) tmp as month_diff
where dt = '${datestr}'
),t2 as (select all_pert,t1.stat_month,group_name,add_months(stat_month,month_diff) cur_monthfrom t1
)insert overwrite table dws_group_three_month_stat partition (dt='${datestr}')
selectgroup_name,cur_month,three_month_pert/3 three_month_pert
from (select cur_month,sum(all_pert) three_month_pert,group_namefrom t2group by cur_month,group_name)t3;"
补齐日期:
select staff_id,start_date,end_date,date_add(start_date, i) real_date,dayofweek(date_add(start_date, i)) real_week,codefrom t3LATERAL VIEW posexplode(split(space(DATEDIFF(end_date, start_date)), ' ')) pe AS i, numgroup by staff_id, start_date, end_date, i, code, date_add(start_date, i),dayofweek(date_add(start_date, i)),code
这里使用的是datediff