实验部署
工作中是基于kube-api的自动发现
1、创建账户绑定集群
kubectl create serviceaccount monitor -n monitor-sa
#创建账户
kubectl create clusterrolebinding monitor-clusterrolebinding -n monitor-sa --clusterrole=cluster-admin --serviceaccount=monitor-sa:monitor
#绑定集群
2、node-exporter发现节点
先部署node-exporter发现节点:
apiVersion: apps/v1
kind: DaemonSet
metadata:name: node-exporternamespace: monitor-salabels:name: node-exporter
spec:selector:matchLabels:name: node-exportertemplate:metadata:labels:name: node-exporterspec:hostPID: truehostIPC: truehostNetwork: true
#表示共用节点服务器的网络,进程号和进程间的通信命名空间containers:- name: node-exporterimage: prom/node-exporterports:- containerPort: 9100resources:limits:cpu: "0.5"securityContext:privileged: true
#给容器在节点服务器上拥有所有权限args:- --path.procfs- /host/proc- --path.sysfs- /host/sys- --collector.filesystem.ignored-mount-points- '"^/(sys|proc|dev|host|etc)($|/)"'
#收集节点系统配置,网络配置,硬件设备,主机信息volumeMounts:- name: devmountPath: /host/dev- name: procmountPath: /host/proc- name: sysmountPath: /host/sys- name: rootfsmountPath: /rootfsvolumes:- name: prochostPath:path: /proc- name: devhostPath:path: /dev- name: syshostPath:path: /sys- name: rootfshostPath:path: /
3、创建configmap,传输配置文件
apiVersion: v1
kind: ConfigMap
metadata:labels:app: prometheusname: prometheus-confignamespace: monitor-sa
data:prometheus.yml: |global:scrape_interval: 15sscrape_timeout: 10sevaluation_interval: 1mscrape_configs:- job_name: 'kubernetes-node'kubernetes_sd_configs:- role: noderelabel_configs:- source_labels: [__address__]regex: '(.*):10250'replacement: '${1}:9100'target_label: __address__action: replace- action: labelmapregex: __meta_kubernetes_node_label_(.+)- job_name: 'kubernetes-node-cadvisor'kubernetes_sd_configs:- role: nodescheme: httpstls_config:ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crtbearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/tokenrelabel_configs:- action: labelmapregex: __meta_kubernetes_node_label_(.+)- target_label: __address__replacement: kubernetes.default.svc:443- source_labels: [__meta_kubernetes_node_name]regex: (.+)target_label: __metrics_path__replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor- job_name: 'kubernetes-apiserver'kubernetes_sd_configs:- role: endpointsscheme: httpstls_config:ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crtbearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/tokenrelabel_configs:- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]action: keepregex: default;kubernetes;https- job_name: 'kubernetes-service-endpoints'kubernetes_sd_configs:- role: endpointsrelabel_configs:- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]action: keepregex: true- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]action: replacetarget_label: __scheme__regex: (https?)- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]action: replacetarget_label: __metrics_path__regex: (.+)- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]action: replacetarget_label: __address__regex: ([^:]+)(?::\d+)?;(\d+)replacement: $1:$2- action: labelmapregex: __meta_kubernetes_service_label_(.+)- source_labels: [__meta_kubernetes_namespace]action: replacetarget_label: kubernetes_namespace- source_labels: [__meta_kubernetes_service_name]action: replacetarget_label: kubernetes_name- job_name: 'kubernetes-ingresses'kubernetes_sd_configs:- role: ingressrelabel_configs:- source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe]action: keepregex: true- source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path]regex: (.+);(.+);(.+)replacement: ${1}://${2}${3}target_label: __param_target- target_label: __address__replacement: blackbox-exporter.example.com:9115- source_labels: [__param_target]target_label: instance- action: labelmapregex: __meta_kubernetes_ingress_label_(.+)- source_labels: [__meta_kubernetes_namespace]target_label: kubernetes_namespace- source_labels: [__meta_kubernetes_ingress_name]target_label: kubernetes_name- job_name: 'kubernetes-pods'kubernetes_sd_configs:- role: podrelabel_configs:- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]action: keepregex: true- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]action: replacetarget_label: __metrics_path__regex: (.+)- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]action: replaceregex: ([^:]+)(?::\d+)?;(\d+)replacement: $1:$2target_label: __address__- action: labelmapregex: __meta_kubernetes_pod_label_(.+)- source_labels: [__meta_kubernetes_namespace]action: replacetarget_label: kubernetes_namespace- source_labels: [__meta_kubernetes_pod_name]action: replacetarget_label: kubernetes_pod_name注释
apiVersion: v1
kind: ConfigMap
matadata:labels:app: prometheusname: prometheus-confignamespace: monitor-sa
data:prometheus.yaml:global:scrape_interval: 15sscrape_timeout: 10sevaluation_interval: 1mscrape_configs:- job_name: 'kubernetes-node'kubernetes_sd_configs:
#*_sd_configs,是promQL的语法,使用k8s的服务发现- role: node
#使用默认kubernets提供的http端口,发现集群中的每个noderelabel_configs:
#重新标记- source_labels: [_address_]
#匹配原始的标签regex: '(.*):10250'
#匹配10250端口的urlreplacement: '${1}:9100'target_label: _address_
#把新生成的URL是${1}获取的ip:192.168.233.81:9100---->传给addressaction: replace- action: labelmap
#替换动作regex: _meta_kubernets_node_label_(.+)
#匹配下面的正则表达式,保留node的标签,否则只会展示instance,这些主要是为了获取和匹配node节点的信息- job_name: 'kubernetes-node-cadvisor'kubernetes_sd_configs:- role: nodescheme: httpstls_config:ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crtbearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/tokenrelabel_configs:- action: labelmapregex: _meta_kubernets
3.1、热更新修改configmap方式
curl -X POST -Ls http://10.244.1.45:9090/-/reload
#热更新的方式。不要直接delete。线上环境一定要热更新
#线上重启,千万不要Delete重启,会数据丢失
4、部署Prometheus:
apiVersion: apps/v1
kind: Deployment
metadata:name: prometheus-servernamespace: monitor-salabels:app: prometheus
spec:replicas: 1selector:matchLabels:app: prometheuscomponent: servertemplate:metadata:labels:app: prometheuscomponent: serverannotations:prometheus.io/scrape: 'false'
#prometheus能够获取集群的信息spec:serviceAccountName: monitorinitContainers:- name: init-chmodimage: busybox:latest
#busybox是最精简版的centos系统command: ['sh','-c','chmod -R 777 /prometheus;chmod -R 777 /etc']volumeMounts:- mountPath: /prometheusname: prometheus-storage-volume- mountPath: /etc/localtimename: timezonecontainers:- name: prometheusimage: prom/prometheus:v2.45.0command:- prometheus- --config.file=/etc/prometheus/prometheus.yml- --storage.tsdb.path=/prometheus- --storage.tsdb.retention=720h- --web.enable-lifecycleports:- containerPort: 9090volumeMounts:- name: prometheus-configmountPath: /etc/prometheus/prometheus.ymlsubPath: prometheus.yml- mountPath: /prometheus/name: prometheus-storage-volume- name: timezonemountPath: /etc/localtimevolumes:- name: prometheus-configconfigMap:name: prometheus-configdefaultMode: 0777- name: prometheus-storage-volumehostPath:path: /datatype: Directory- name: timezonehostPath:path: /usr/share/zoneinfo/Asia/Shanghai
5、部署service提供外部访问:
apiVersion: v1
kind: Service
metadata:name: prometheusnamespace: monitor-salabels:app: prometheus
spec:type: NodePortports:- port: 9090targetPort: 9090protocol: TCPselector:app: prometheus
component: server
6、安装grafana可视化工具
apiVersion: v1
kind: PersistentVolumeClaim
metadata:name: grafananamespace: kube-system
spec:accessModes:- ReadWriteManystorageClassName: nfs-client-storageclassresources:requests:storage: 2Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:name: monitoring-grafananamespace: kube-system
spec:replicas: 1selector:matchLabels:task: monitoringk8s-app: grafanatemplate:metadata:labels:task: monitoringk8s-app: grafanaspec:containers:- name: grafanaimage: grafana/grafana:7.5.11securityContext:runAsUser: 104runAsGroup: 107ports:- containerPort: 3000protocol: TCPvolumeMounts:- mountPath: /etc/ssl/certsname: ca-certificatesreadOnly: false- mountPath: /varname: grafana-storage- mountPath: /var/lib/grafananame: graf-testenv:- name: INFLUXDB_HOSTvalue: monitoring-influxdb- name: GF_SERVER_HTTP_PORTvalue: "3000"- name: GF_AUTH_BASIC_ENABLEDvalue: "false"- name: GF_AUTH_ANONYMOUS_ENABLEDvalue: "true"- name: GF_AUTH_ANONYMOUS_ORG_ROLEvalue: Admin- name: GF_SERVER_ROOT_URLvalue: /volumes:- name: ca-certificateshostPath:path: /etc/ssl/certs- name: grafana-storageemptyDir: {}- name: graf-testpersistentVolumeClaim:claimName: grafana
---
apiVersion: v1
kind: Service
metadata:labels:name: monitoring-grafananamespace: kube-system
spec:ports:- port: 80targetPort: 3000selector:k8s-app: grafanatype: NodePort
[root@node01 ~]# mkdir -p /data
[root@master01 node]# kubectl get pod -n monitor-sa -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
node-exporter-f9825 1/1 Running 0 34m 192.168.233.83 node02 <none> <none>
node-exporter-pklzk 1/1 Running 0 34m 192.168.233.81 master01 <none> <none>
node-exporter-xnfxv 1/1 Running 0 34m 192.168.233.82 node01 <none> <none>
prometheus-server-76bd89b95b-cdbqb 1/1 Running 0 29m 10.244.2.11 node02 <none> <none>
prometheus-server-76bd89b95b-lgpr2 1/1 Running 0 3m22s 10.244.0.3 master01 <none> <none>
prometheus-server-76bd89b95b-mx5gd 1/1 Running 0 29m 10.244.1.12 node01 <none> <none>
[root@master01 node]# yum -y install stress
[root@master01 node]# stress --cpu 2
stress: info: [72419] dispatching hogs: 2 cpu, 0 io, 0 vm, 0 hdd
压力测试