SQL示例
一、前言
-
安装MySQL
-
启动MySQL
MySQL is configured to only allow connections from localhost by defaultTo connect run:mysql -u rootTo start mysql now and restart at login:brew services start mysql
- 下载客户端工具Sequel Ace并登录
二、示例SQL
- where过滤数据
使用where过滤数据
SELECT*
FROMactor
WHERElast_name = 'ALLEN' OR last_name = 'DAVIS'
ORDER BY `actor_id`;
where可以和and或者or联合使用,来圈定条件;常见的比较符:
=、<>、!=、>、<、>=、<=、IN、NOT IN、BETWEEN、LIKE、IS NULL、IS NOT NULL
- case when示例
统计address表格中,postal_code在区间的分布
select sum(
case when `postal_code` < 1000 then 1 else 0
end
) as c1,sum(
case when (`postal_code` < 5000 and `postal_code`>=1000) then 1 else 0
end
) as c2,sum(
case when (`postal_code` < 10000 and `postal_code`>=5000) then 1 else 0
end
) as c3,sum(
case when (`postal_code`>=10000) then 1 else 0
end
) as c4from address;
- 派生表的子查询
通过派生表temp1,统计符合条件的数量
select
count(*) as cnt
from(SELECT *FROM actorWHERE last_name = 'ALLEN' OR last_name = 'DAVIS'ORDER BY `actor_id`) temp1;
- 常见通用格式(统计数字)
SELECT t1.a1 -- 所有要展示的字段,都写在这里,可能不止来源一个表,t1.a2,COUNT(DISTINCT t1.a1) AS cnt,t2.a3,t2.a4
FROM t1 -- 来源于第一个表
LEFT JOIN (SELECT a3,a4FROM xxxWHERE ds = 'xxx') t2 -- 第二个表
ON t1.id = t2.id -- JOIN的关联条件
WHERE t1.a1 = 'xx' -- 过滤条件
AND t1.ds = '20230711'
AND t1.a1 IS NOT NULL
GROUP BY t1.a1 -- 分组:注意,所有select中出现的字段(除聚合函数外),都需要出现在GROUP BY中,t1.a2,t2.a3,t2.a4
HAVING cnt > 0 -- 对分组统计之后的最终数据做一个过滤,这里可以直接使用聚合函数as之后的别名
ORDER BY cnt DESC -- 排序
LIMIT 30
;
1.执行顺序:->from (去加载t1 和 t2这2个表 ) -> join -> on -> where -> group by->select 后面的聚合函数count,sum -> having -> distinct -> order by -> limit
2.FROM 子句执行顺序为从后往前、从右到左
- 查询最近几天某城市的xx
SELECT ds,SUM(CASE WHEN cnt >= 1 THEN 1 ELSE 0 END) AS fake_cnt_android,COUNT(*) AS all_cnt_android
FROM (SELECT user_id AS id,SUM(CASE WHEN (location_type = '21' OR location_type = '25') THEN 1 ELSE 0 END) AS cnt,dsFROM xxxxxxxxWHERE ds IN ('20230623','20230624','20230625','20230626','20230627')AND city_name = 'xx市'AND location_type IS NOT NULL -- 过滤掉无效数据AND location_type != -1 -- 过滤掉xxxGROUP BY ds,id) id_cnt
GROUP BY ds
;
- 一个数据开发的ODPS SQL示例
CREATE TABLE IF NOT EXISTS temp_xxx
(city STRING COMMENT '城市',line_name STRING COMMENT 'xxx',fake_cnt BIGINT COMMENT 'xx数量',all_cnt BIGINT COMMENT '总数量',ratio STRING COMMENT '百分比'
)
COMMENT 'xxx情况表'
PARTITIONED BY
(ds STRING COMMENT 'day 分区字段'
)
LIFECYCLE 30
;INSERT OVERWRITE TABLE temp_xxx PARTITION (ds='${bizdate}')
SELECT city,line_name,fake_cnt,all_cnt,CONCAT(CAST(ROUND(100 * fake_cnt / all_cnt,2) AS STRING),'%') AS ratio
FROM (SELECT CASE modeWHEN 'xx' THEN 'xxxxx'WHEN 'xx' THEN 'xxxxx'ELSE 'xxxxxx'END AS line_name,city,SUM(CASE WHEN cnt >= 1 THEN 1 ELSE 0 END) AS fake_cnt,COUNT(*) AS all_cntFROM (SELECT rider_._id,SUM(CASE WHEN location_type = '21' THEN 1 ELSE 0 END) AS cnt,ds,rider_.city_name AS city,id_mode.tms_line_name AS modeFROM xxxxxxxxxx AS rider_LEFT JOIN (SELECT _id AS id,tms_line_nameFROM xxxxxxxx.xxxdiWHERE ds = '${bizdate}'AND tms_line_name IS NOT NULL) id_modeON id_mode.id = rider_._idWHERE ds = '${bizdate}'AND _id IS NOT NULL AND city_id IN ('91','66')AND location_type IS NOT NULL -- 过滤掉无效数据GROUP BY ds,rider_._id,mode,rider_.city_name) id_cntGROUP BY city,line_name --GROUP BY ROLLUP(city,mode)) mod_cnt
;
其中bizdate可以在调度配置中设置
附
参考资料:https://help.aliyun.com/zh/maxcompute/