一. 部署
apiVersion: v1
kind: ConfigMap
metadata:name: blackbox-confignamespace: monitor
data:blackbox.yml: |-modules:http_2xx: # http 检测模块 Blockbox-Exporter 中所有的探针均是以 Module 的信息进行配置prober: httptimeout: 10shttp:valid_http_versions: ["HTTP/1.1", "HTTP/2"] valid_status_codes: [200] # 这里最好作一个返回状态码,在grafana作图时method: GETpreferred_ip_protocol: "ip4"http_post_2xx: # http post 监测模块prober: httptimeout: 10shttp:valid_http_versions: ["HTTP/1.1", "HTTP/2"]method: POSTpreferred_ip_protocol: "ip4"tcp_connect: # TCP 检测模块prober: tcptimeout: 10sdns: # DNS 检测模块prober: dnsdns:transport_protocol: "tcp" # 默认是 udppreferred_ip_protocol: "ip4" # 默认是 ip6query_name: "kubernetes.default.svc.cluster.local"
---
apiVersion: apps/v1
kind: Deployment
metadata:name: blackboxnamespace: monitor
spec:selector:matchLabels:app: blackboxtemplate:metadata:labels:app: blackboxspec:containers:- image: harbor.yeemiao.com:8443/library/blackbox-exporter:v0.23.0name: blackboxargs:- --config.file=/etc/blackbox_exporter/blackbox.yml- --log.level=errorports:- containerPort: 9115volumeMounts:- name: configmountPath: /etc/blackbox_exportervolumes:- name: configconfigMap:name: blackbox-config
---
apiVersion: v1
kind: Service
metadata:name: blackboxnamespace: monitor
spec:selector:app: blackboxports:- port: 9115targetPort: 9115
(一). 错误和措施
1. 给指标加上 hostname
报错,提示的是
172.17.110.154
这个 IP 不允许多对多匹配:匹配标签的一侧必须唯一, 可能是被监控列表,IP地址重复了或者语法错误,如:
on(instance) group_left(nodename) (node_uname_info)
2. 配置文件架 hostname
- job_name: 'hadoop-exporter'consul_sd_configs:- server: 'localhost:8500'services: [hadoop-exporter]relabel_configs: # 把__meta_consul_service_id 映射主机名- source_labels: [__meta_consul_service_id]separator: ;regex: (.*)target_label: hostnamereplacement: $1action: replace- source_labels: [__meta_consul_service_address] # 映射主机IPseparator: ;regex: (.*)target_label: ipreplacement: $1action: replace- source_labels: ['__meta_consul_tags'] # 根据tag来匹配分组regex: '^.*,hadoop-test,.*$'action: keep
二. 配置文件
1. Prometheus 配置文件
- job_name: 'port_status'metrics_path: /probeparams:module: [tcp_connect]file_sd_configs:- files:- /etc/prometheus/network-probe.yml # Prometheus 需要挂到容器中refresh_interval: 2mrelabel_configs:- source_labels: [__address__]target_label: __param_target- source_labels: [__param_target]regex: '(.*):(.*)'replacement: '$1'target_label: ip- source_labels: [__param_target]regex: '(.*):(.*)'replacement: '$2'target_label: port- target_label: __address__replacement: blackbox.monitor.svc.cluster.local:9115
2. 服务发现配置文件
/etc/prometheus/network-probe.yml
挂到 Prometheus 中
kind: ConfigMap
apiVersion: v1
metadata:name: network-probenamespace: monitor
data:network-probe.yml: |-- labels:app: "济南外部"targets:- app.jinan.gov.cn:443- labels:app: "hhhh.net.cn"targets:- 80.80.80.80:443- labels:app: "湖南接口 80.80.5.80:80"targets:- 80.80.5.80:80:380- labels:app: "hhhh.net 外网访问"targets:- 80.17.55.111:443- labels:app: "湖南接口 80.30.80.80:280"targets:- 80.30.80.80:280- labels:app: "温州接口 https://wz.hhhh.com:39443(80.19.89.121)"targets:- wz.hhhh.com:39443