目录
- 1 题目
- 2 建表语句
- 3 题解
1 题目
查询每个用户最大连续登录天数
样例数据如下 login_log:
2 建表语句
--建表语句
create table if not exists login_log
(user_id int comment '用户id',login_time date comment '登录时间'
);
--数据插入
INSERT overwrite table login_log
VALUES (1, '2022-11-28'),(1, '2022-12-01'),(1, '2022-12-02'),(1, '2022-12-03'),(2, '2022-12-01'),(2, '2022-12-04');
3 题解
(1)开窗,排序
select user_id,login_time,row_number() over (partition by user_id order by login_time) rn
from login_log;
执行结果
(2)利用等差数列的特性,如果是连续登录,login_time - num
则相等。
select user_id,login_time,-- 在Hive中,DATE_SUB函数只能以天为单位进行操作。DATE_SUB用于从指定日期减去指定天数。date_sub(login_time, rn) as diff
from (select user_id,login_time,row_number() over (partition by user_id order by login_time) as rnfrom login_log) t
(3)按照 diff 分组,获取每个用户每次连续登录的天数
select user_id,diff,count(*) days
from (select user_id,login_time,-- 在Hive中,DATE_SUB函数只能以天为单位进行操作。DATE_SUB用于从指定日期减去指定天数。date_sub(login_time, rn) as difffrom (select user_id,login_time,row_number() over (partition by user_id order by login_time) as rnfrom login_log) t) tt
group by user_id,diff;
执行结果
(4)得出每个用户最大连续登录天数
select user_id,max(days) as max_days
from
(select user_id,diff,count(*) days
from (select user_id,login_time,-- 在Hive中,DATE_SUB函数只能以天为单位进行操作。DATE_SUB用于从指定日期减去指定天数。date_sub(login_time, rn) as difffrom (select user_id,login_time,row_number() over (partition by user_id order by login_time) as rnfrom login_log) t) tt
group by user_id,diff) ttt
group by user_id ;
执行结果