1.创建好waf-cdn 并且设置好规则和log存储方式为s3
2. Amazon Athena 服务 使用 (注意s3桶位置相同得区域)
https://docs.aws.amazon.com/zh_cn/athena/latest/ug/waf-logs.html#waf-example-count-matched-ip-addresses
官方文档参考,建一个分区查询表
不能直接使用 因为是cdn 资源需要修改相关字段
CREATE EXTERNAL TABLE `waf_logs`(`timestamp` bigint,`formatversion` int,`webaclid` string,`terminatingruleid` string,`terminatingruletype` string,`action` string,`terminatingrulematchdetails` array <struct <conditiontype: string,sensitivitylevel: string,location: string,matcheddata: array < string >>>,`httpsourcename` string,`httpsourceid` string,`rulegrouplist` array <struct <rulegroupid: string,terminatingrule: struct <ruleid: string,action: string,rulematchdetails: array <struct <conditiontype: string,sensitivitylevel: string,location: string,matcheddata: array < string >>>>,nonterminatingmatchingrules: array <struct <ruleid: string,action: string,overriddenaction: string,rulematchdetails: array <struct <conditiontype: string,sensitivitylevel: string,location: string,matcheddata: array < string >>>,challengeresponse: struct <responsecode: string,solvetimestamp: string>,captcharesponse: struct <responsecode: string,solvetimestamp: string>>>,excludedrules: string>>,
`ratebasedrulelist` array <struct <ratebasedruleid: string,limitkey: string,maxrateallowed: int>>,`nonterminatingmatchingrules` array <struct <ruleid: string,action: string,rulematchdetails: array <struct <conditiontype: string,sensitivitylevel: string,location: string,matcheddata: array < string >>>,challengeresponse: struct <responsecode: string,solvetimestamp: string>,captcharesponse: struct <responsecode: string,solvetimestamp: string>>>,`requestheadersinserted` array <struct <name: string,value: string>>,`responsecodesent` string,`httprequest` struct <clientip: string,country: string,headers: array <struct <name: string,value: string>>,uri: string,args: string,httpversion: string,httpmethod: string,requestid: string>,`labels` array <struct <name: string>>,`captcharesponse` struct <responsecode: string,solvetimestamp: string,failureReason: string>,`challengeresponse` struct <responsecode: string,solvetimestamp: string,failureReason: string>,`ja3Fingerprint` string,`oversizefields` string,`requestbodysize` int,`requestbodysizeinspectedbywaf` int
)
PARTITIONED BY (
`region` string,
`date` string)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION's3://<aws-waf-logs-xxx>/AWSLogs/<accountID>/WAFLogs/cloudfront/<waf-acl>'
TBLPROPERTIES('projection.enabled' = 'true','projection.region.type' = 'enum','projection.region.values' = 'cloudfront','projection.date.type' = 'date','projection.date.range' = '2024/07/08,NOW','projection.date.format' = 'yyyy/MM/dd','projection.date.interval' = '1','projection.date.interval.unit' = 'DAYS','storage.location.template' = 's3://<aws-waf-logs-xxx>/AWSLogs/<accountID>/WAFLogs/${region}/<waf-acl>/${date}/')
修改< >中的字符为自己的资源
测试查询
SELECT COUNT(httpRequest.country) as count, httpRequest.country
FROM waf_logs
WHERE terminatingruletype='RATE_BASED'
GROUP BY httpRequest.country
ORDER BY count
LIMIT 100;
SELECT COUNT(*) AS count,webaclid,action,httprequest.clientip,httprequest.uri
FROM waf_logs
WHERE terminatingruleid='<id>'
GROUP BY webaclid, action, httprequest.clientip, httprequest.uri
ORDER BY count DESC
LIMIT 100;
具体的sql 字段需要修改成自己的 可以先检索全表 查看字段 方便搜索