-- 创建数据库
create database taobao;
use taobao;
desc use_behaviour;-- 创建表格
create table use_behaviour(
user_id int(9),
item_id int(9),
category_id int(9),
behaviour_type varchar(5),
timestamps int(14));-- 查询已导入多少条
select count(*) from use_behaviour;
select * from use_behaviour limit 10;#改变字段名timestamp 改成timestamps
alter table use_behaviour change timestamp timestamps int(14);
desc use_behaviour;-- -检查空值
-- select * from where ziduanming is null-- 检查重复值
select user_id, item_id, timestamps from use_behaviour
group by user_id,item_id,timestamps
having count(*) > 1;-- -去重
alter table use_behaviour add id int first;
select * from use_behaviour limit 10;
-- 将id设置成自增主键
alter table use_behaviour modify id int primary key auto_increment;
select * from use_behaviour limit 10;delete use_behaviour from
use_behaviour,
(
select user_id,item_id, timestamps, min(id) id
from use_behaviour
group by user_id,item_id,timestamps
having count(*) > 1
) t2
where use_behaviour.user_id=t2.user_id
and use_behaviour.item_id=t2.item_id
and use_behaviour.timestamps=t2.timestamps
and use_behaviour.id>t2.id;-- 增加三个字段 date time hour
-- 更改buffer值
show variables like '%_buffer%';set global innodb_buffer_pool_size = 10700000000;alter table use_behaviour add datetimes TIMESTAMP(0);
update use_behaviour set datetimes = FROM_UNIXTIME(timestamps);
select * from use_behaviour limit 5;-- date
alter table use_behaviour add dates char(10);
alter table use_behaviour add times char(8);
alter table use_behaviour add hours char(2);
-- 一次性对三个字段进行截取活分开截取,分开截取将多花两倍时间
update use_behaviour set dates=substring(datetimes,1,10 ),times=substring(datetimes,12,8 ),dates=substring(datetimes,12,2);update use_behaviour set dates=substring(datetimes,1,10 );
update use_behaviour set times=substring(datetimes,12,8 );
update use_behaviour set hours=substring(datetimes,12,2);
select * from use_behaviour limit 5;-- 去异常;三部曲:去空去重去异常
select max(datetimes),min(datetimes) from use_behaviour;delete from use_behaviour
where datetimes <'2017-11-25 00:00:00'
or datetimes > '2017-12-03 23:59:59'
-- 共删除942行-- 数据概览
desc use_behaviour;
select * from use_behaviour limit 5;
select count(1) from use_behaviour; -- 1889658条记录-- 创建临时表
drop table if exists temp_behaviour;
create table temp_behaviour like use_behaviour;-- 截取
insert into temp_behaviour
select * from use_behaviour limit 100000;select * from temp_behaviour limit 5;-- pv
select dates
, count(*) 'pv'
from temp_behaviour
where behaviour_type ='pv'
group by dates;-- 独立访客数uv
select dates
, count(distinct user_id) 'uv'
from temp_behaviour
where behaviour_type ='pv'
group by dates;-- 一条语句
select dates
, count(*) 'pv'
, count(distinct user_id) 'uv'
,round(count(*)/count(distinct user_id),1) 'pv/uv'
from temp_behaviour
where behaviour_type ='pv'
group by dates;-- 处理真实数据
create table pv_uv_puv(
dates char(10),
pv int(9),
uv int(9),
puv decimal(10,1)
);insert into pv_uv_puv
select dates
, count(*) 'pv'
, count(distinct user_id) 'uv'
,round(count(*)/count(distinct user_id),1) 'pv/uv'
from use_behaviour
where behaviour_type ='pv'
group by dates;-- 测试
select * from pv_uv_puv-- 去除异常数据
delete from pv_uv_puv where dates is null;
delete from use_behaviour where dates is null;-- 留存率
select user_id,dates
from temp_behaviour
group by user_id,dates;-- 自关联 相同的userid以及b的日期比a的日期大的数据
select * from
(
select user_id,dates
from temp_behaviour
group by user_id,dates
) a
,
(
select user_id,dates
from temp_behaviour
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates<b.dates;-- 次日留存数,即日期相隔一天的
select a.dates
,count(if (datediff(b.dates,a.dates)=0, b.user_id, null)) retention_0
,count(if (datediff(b.dates,a.dates)=1, b.user_id, null)) rentention_1
,count(if (datediff(b.dates,a.dates)=3, b.user_id, null)) rentention_33
from
(
select user_id,dates
from temp_behaviour
group by user_id,dates
) a
,
(
select user_id,dates
from temp_behaviour
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates<=b.dates
group by a.dates;-- 留存率
select a.dates
,count(if (datediff(b.dates,a.dates)=1, b.user_id, null))/count(if (datediff(b.dates,a.dates)=0, b.user_id, null)) rentention_1from
(
select user_id,dates
from temp_behaviour
group by user_id,dates
) a
,
(
select user_id,dates
from temp_behaviour
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates<=b.dates
group by a.dates;-- 保存结果
create table retention_rate(
dates char(10),
retention_1 float
)insert into retention_rate
select a.dates
,count(if (datediff(b.dates,a.dates)=1, b.user_id, null))/count(if (datediff(b.dates,a.dates)=0, b.user_id, null)) rentention_1from
(
select user_id,dates
from use_behaviour
group by user_id,dates
) a
,
(
select user_id,dates
from use_behaviour
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates<=b.dates
group by a.dates;-- 跳失率
-- 跳使用户
select count(*)
from
(
select user_id from use_behaviour
group by user_id
having count(behaviour_type)=1
) aselect sum(pv) from pv_uv_puv; -- 1782280-- 1/1782280-- 时间序列分析
select dates,hours
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type='buy',behaviour_type,null)) 'buy'
from temp_behaviour
group by dates,hours
order by dates,hours--存储
create table date_hour_behaviour(
dates char(10),
hours char(2),
pv int,
cart int,
fav int,
buy int
);--结果插入
insert into date_hour_behaviour
select dates,hours
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type='buy',behaviour_type,null)) 'buy'
from use_behaviour
group by dates,hours
order by dates,hoursselect * from date_hour_behaviour-- 统计各类行为数
select behaviour_type
,count(DISTINCT user_id) user_num
from temp_behaviour
group by behaviour_type
order by behaviour_type desc;-- 存储 ARCHAR类型用于存储可变长度字符串 CHAR类型用于存储固定长度字符串
create table behaviour_user_num(
behaviour_type varchar(5),
user_num int
);insert into behaviour_user_num
select behaviour_type
,count(DISTINCT user_id) user_num
from use_behaviour
group by behaviour_type
order by behaviour_type desc;
-- 测试
select * from behaviour_user_num;-- 转化率分析 0.6844
select 12630/18453-- 统计各类行为数量
select behaviour_type
,count(*) user_num
from temp_behaviour
group by behaviour_type
order by behaviour_type desc;-- 存储各类行为数据数量
create table behaviour_num(
behaviour_type varchar(5),
behaviour_num int
);insert into behaviour_num
select behaviour_type
,count(*) behaviour_count_num
from use_behaviour
group by behaviour_type
order by behaviour_type desc;
-- 检查是否成功
select * from behaviour_num;-- 行为路径分析select user_id,item_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
from temp_behaviour
group by user_id,item_idcreate table use_behaviout_view(
uesr_id int(9),
item_id int(9),
pv int,
cart int,
fav int,
buy int
);insert into use_behaviout_view
select user_id,item_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
from use_behaviour
group by user_id,item_id-- 修改字段名
alter table use_behaviout_view change uesr_id user_id int;
select * from use_behaviout_view-- 修改表名
ALTER TABLE use_behaviout_view RENAME TO use_behaviour_view
select * from use_behaviour_view-- 用户行为标准化
create view user_behaviour_standard as
select user_id
,item_id
,(case when pv>0 then 1 else 0 end) 浏览了
,(case when fav>0 then 1 else 0 end) 收藏了
,(case when cart>0 then 1 else 0 end) 加购了
,(case when buy>0 then 1 else 0 end) 购买了
from use_behaviour_view-- 路径类型select *
,concat(浏览了,收藏了,加购了,购买了) 购买路径类型
from user_behaviour_standard as a
where a.购买了>0--统计各类购买行为数量
create view path_count as
select 购买路径类型
,count(*) 数量
from
(
select *
,concat(浏览了,收藏了,加购了,购买了) 购买路径类型
from user_behaviour_standard as a
where a.购买了>0
) b
group by 购买路径类型
order by 数量 desc;create table renhua(
path_type char(4),
description varchar(40)
);insert into renhua
values('0001','直接购买了'),
('1001','浏览后购买了'),
('0011','加购后购买了'),
('1011','浏览加购后购买了'),
('0101','收藏后购买了'),
('1101','浏览收藏后购买了'),
('0111','收藏加购后购买了'),
('1111','浏览收藏加购后购买了')select * from renhuaselect * from path_count p
join renhua r
on p.购买路径类型 = r.path_type
order by 数量 desc;-- 存储
create table path_result(
path_type char(4),
description varchar(40),
num int
);insert into path_result
select path_type, description, 数量 num
from
path_count p
join renhua r
on p.购买路径类型 = r.path_type
order by 数量 desc;select * from path_resultselect sum(buy)
from use_behaviour_view
where buy>0 and fav=0 and cart=0
-- 28790-- 用户定位1 付费和非付费 2RFM模型: 指标(R值:最近一次消费 F值:消费频率 M值:消费金额)+根据F值和R值分类(价值用户、发展用户、保持用户、挽留用户)-- 最近购买时间
select user_id
,max(dates) '最近购买时间'
from temp_behaviour
where behaviour_type='buy'
group by user_id
order by 2 desc;
-- ASC表示按升序排序,DESC表示按降序排序 2 表示按照第二栏-- 购买次数
select user_id
,count(user_id) '购买次数'
from temp_behaviour
where behaviour_type='buy'
group by user_id
order by 2 desc;-- 统一select user_id
,max(dates) '最近购买时间'
,count(user_id) '购买次数'
from temp_behaviour
where behaviour_type='buy'
group by user_id
order by 2 desc, 3 desc; -- 优先按照第二列排序,二裂一样的按照第三列排序drop table if exists rfm_model;
create table rfm_model(
user_id int,
frequency int,
recent char(10)
)insert into rfm_model
select user_id
,count(user_id) '购买次数'
,max(dates) '最近购买时间'
from use_behaviour
where behaviour_type='buy'
group by user_id
order by 2 desc, 3 desc; -- 优先按照第二列排序,二裂一样的按照第三列排序select * from rfm_model;-- 根据最近购买时间对用户进行分层
alter table rfm_model add column rscore int;update rfm_model
set rscore = case
when recent ='2017-12-03' then 5
when recent in ('2017-12-01','2017-12-02') then 4
when recent in ('2017-11-29','2017-11-28') then 3
when recent in ('2017-11-27','2017-11-26') then 2
else 1
end-- 根据购买频次对用户进行分层
alter table rfm_model add column fscore int;select max(frequency),min(frequency) from rfm_model; -- 72 1update rfm_model
set fscore = case
when frequency between 72 and 40 then 5
when frequency between 21 and 39 then 4
when frequency between 11 and 200 then 3
when frequency between 5 and 10 then 2
else 1
end-- 分层
set @f_avg=null;
set @r_avg=null;
select avg(fscore) into @f_avg from rfm_model;
select avg(rscore) into @r_avg from rfm_model;select *
,(case
when fscore>@f_avg and rscore>@r_avg then '价值用户'
when fscore>@f_avg and rscore<@r_avg then '保持用户'
when fscore<@f_avg and rscore>@r_avg then '发展用户'
when fscore<@f_avg and rscore<@r_avg then '挽留用户'
end) class
from rfm_model-- 将结果插入alter table rfm_model add column class varchar(40);
update rfm_model
set class =case
when fscore>@f_avg and rscore>@r_avg then '价值用户'
when fscore>@f_avg and rscore<@r_avg then '保持用户'
when fscore<@f_avg and rscore>@r_avg then '发展用户'
when fscore<@f_avg and rscore<@r_avg then '挽留用户'
end;select * from rfm_model limit 10;-- 统计各分区用户数select class,count(distinct user_id) from rfm_model
group by class-- 商品按照热度分类select category_id
,count(if(behaviour_type='pv',behaviour_type,null)) '品类浏览量'
from temp_behaviour
group by category_id
order by 2 desc
limit 10;select item_id
,count(if(behaviour_type='pv',behaviour_type,null)) '商品浏览量'
from temp_behaviour
group by item_id
order by 2 desc
limit 10;select category_id,item_id,品类商品浏览量
from
(
select category_id,item_id
,count(if(behaviour_type='pv',behaviour_type,null)) '品类商品浏览量'
,rank() over(partition by category_id order by count(if(behaviour_type='pv',behaviour_type,null)) desc) r
from temp_behaviour
group by category_id,item_id
order by 3 desc
) a
where r =1
order by a.品类商品浏览量 desc
limit 10create table popular_categories(
category_id int,
pv int);create table popular_items(
item_id int,
pv int);create table popular_cateitems(
category_id int,
item_id int,
pv int);insert into popular_categories
select category_id
,count(if(behaviour_type='pv',behaviour_type,null)) '品类浏览量'
from use_behaviour
group by category_id
order by 2 desc
limit 10;insert into popular_items
select item_id
,count(if(behaviour_type='pv',behaviour_type,null)) '品类浏览量'
from use_behaviour
group by item_id
order by 2 desc
limit 10;insert into popular_cateitems
select category_id
,item_id
,count(if(behaviour_type='pv',behaviour_type,null)) '品类浏览量'
from use_behaviour
group by category_id,item_id
order by 3 desc
limit 10;select * from popular_cateitems;-- 特定商品转化率
select item_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
,count(distinct if(behaviour_type='buy', user_id,null))/count(distinct user_id) 商品转化率
from use_behaviour
group by item_id
order by 商品转化率 desc;-- 保存
create table item_detail(
item_id int,
pv int,
cart int,
fav int,
buy int,
user_buy_rate float
);insert into item_detail
select item_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
,count(distinct if(behaviour_type='buy', user_id,null))/count(distinct user_id) 商品转化率
from use_behaviour
group by item_id
order by 商品转化率 desc;select * from item_detail-- 品类转化率
select category_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
,count(distinct if(behaviour_type='buy', user_id,null))/count(distinct user_id) 品类转化率
from use_behaviour
group by category_id
order by 品类转化率 desc;-- 保存
create table category_detail(
category_id int,
pv int,
cart int,
fav int,
buy int,
user_buy_rate float
);insert into category_detail
select category_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
,count(distinct if(behaviour_type='buy', user_id,null))/count(distinct user_id) 品类转化率
from use_behaviour
group by category_id
order by 品类转化率 desc;select * from category_detail-- 商品特征分析 tableau 添加平均值参考线 筛选器-- 数据可视化