部署方式:https://github.com/kubernetes/kubernetes/tree/master/cluster/addons/prometheus
源码目录:kubernetes/cluster/addons/prometheus
服务发现:https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config
部署条件
1、K8S中部署内部DNS服务
2、已有可使用的动态PV
配置文件
下列是已经修改好的配置文件,可根据条件自行微调
- # 访问api授权
- prometheus-rbac.yaml
-
配置文件
-
apiVersion: v1 # 创建 ServiceAccount 授予权限 kind: ServiceAccount metadata:name: prometheusnamespace: kube-systemlabels:kubernetes.io/cluster-service: "true"addonmanager.kubernetes.io/mode: Reconcile --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata:name: prometheuslabels:kubernetes.io/cluster-service: "true"addonmanager.kubernetes.io/mode: Reconcile rules:- apiGroups:- ""# 授予的权限resources:- nodes- nodes/metrics- services- endpoints- podsverbs:- get- list- watch- apiGroups:- ""resources:- configmapsverbs:- get- nonResourceURLs:- "/metrics"verbs:- get --- # 角色绑定 apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata:name: prometheuslabels:kubernetes.io/cluster-service: "true"addonmanager.kubernetes.io/mode: Reconcile roleRef:apiGroup: rbac.authorization.k8s.iokind: ClusterRolename: prometheus subjects: - kind: ServiceAccountname: prometheusnamespace: kube-system
- # 管理prometheus配置文件
- prometheus-configmap.yaml
-
配置文件
-
# Prometheus configuration format https://prometheus.io/docs/prometheus/latest/configuration/configuration/ apiVersion: v1 kind: ConfigMap metadata:name: prometheus-confignamespace: kube-system labels:kubernetes.io/cluster-service: "true"addonmanager.kubernetes.io/mode: EnsureExists data:# 存放prometheus配置文件prometheus.yml: |# 配置采集目标scrape_configs:- job_name: prometheusstatic_configs:- targets:# 采集自身- localhost:9090# 采集:Apiserver 生存指标# 创建的job name 名称为 kubernetes-apiservers- job_name: kubernetes-apiservers# 基于k8s的服务发现kubernetes_sd_configs:- role: endpoints# 使用通信标记标签relabel_configs:# 保留正则匹配标签- action: keep# 已经包含regex: default;kubernetes;httpssource_labels:- __meta_kubernetes_namespace- __meta_kubernetes_service_name- __meta_kubernetes_endpoint_port_name# 使用方法为https、默认httpscheme: httpstls_config:# promethus访问Apiserver使用认证ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt# 跳过https认证insecure_skip_verify: true# promethus访问Apiserver使用认证bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token# 采集:Kubelet 生存指标- job_name: kubernetes-nodes-kubeletkubernetes_sd_configs:# 发现集群中所有的Node- role: noderelabel_configs:# 通过regex获取关键信息- action: labelmapregex: __meta_kubernetes_node_label_(.+)scheme: httpstls_config:ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crtinsecure_skip_verify: truebearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token# 采集:nodes-cadvisor 信息- job_name: kubernetes-nodes-cadvisorkubernetes_sd_configs:- role: noderelabel_configs:- action: labelmapregex: __meta_kubernetes_node_label_(.+)# 重命名标签- target_label: __metrics_path__replacement: /metrics/cadvisorscheme: httpstls_config:ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crtinsecure_skip_verify: truebearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token# 采集:service-endpoints 信息- job_name: kubernetes-service-endpoints# 选定指标kubernetes_sd_configs:- role: endpointsrelabel_configs:- action: keepregex: true# 指定源标签source_labels:- __meta_kubernetes_service_annotation_prometheus_io_scrape- action: replaceregex: (https?)source_labels:- __meta_kubernetes_service_annotation_prometheus_io_scheme# 重命名标签采集target_label: __scheme__- action: replaceregex: (.+)source_labels:- __meta_kubernetes_service_annotation_prometheus_io_pathtarget_label: __metrics_path__- action: replaceregex: ([^:]+)(?::\d+)?;(\d+)replacement: $1:$2source_labels:- __address__- __meta_kubernetes_service_annotation_prometheus_io_porttarget_label: __address__- action: labelmapregex: __meta_kubernetes_service_label_(.+)- action: replacesource_labels:- __meta_kubernetes_namespacetarget_label: kubernetes_namespace- action: replacesource_labels:- __meta_kubernetes_service_nametarget_label: kubernetes_name# 采集:kubernetes-services 服务指标- job_name: kubernetes-serviceskubernetes_sd_configs:- role: service# 黑盒探测,探测IP与端口是否可用metrics_path: /probeparams:module:- http_2xxrelabel_configs:- action: keepregex: truesource_labels:- __meta_kubernetes_service_annotation_prometheus_io_probe- source_labels:- __address__target_label: __param_target# 使用 blackbox进行黑盒探测- replacement: blackboxtarget_label: __address__- source_labels:- __param_targettarget_label: instance- action: labelmapregex: __meta_kubernetes_service_label_(.+)- source_labels:- __meta_kubernetes_namespacetarget_label: kubernetes_namespace- source_labels:- __meta_kubernetes_service_nametarget_label: kubernetes_name# 采集: kubernetes-pods 信息- job_name: kubernetes-podskubernetes_sd_configs:- role: podrelabel_configs:- action: keepregex: truesource_labels:# 只保留采集的信息- __meta_kubernetes_pod_annotation_prometheus_io_scrape- action: replaceregex: (.+)source_labels:- __meta_kubernetes_pod_annotation_prometheus_io_pathtarget_label: __metrics_path__- action: replaceregex: ([^:]+)(?::\d+)?;(\d+)replacement: $1:$2source_labels:# 采集地址- __address__# 采集端口 - __meta_kubernetes_pod_annotation_prometheus_io_porttarget_label: __address__- action: labelmapregex: __meta_kubernetes_pod_label_(.+)- action: replacesource_labels:- __meta_kubernetes_namespacetarget_label: kubernetes_namespace- action: replacesource_labels:- __meta_kubernetes_pod_nametarget_label: kubernetes_pod_namealerting:# 告警配置文件alertmanagers:- kubernetes_sd_configs:# 采用动态获取- role: podtls_config:ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crtbearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/tokenrelabel_configs:- source_labels: [__meta_kubernetes_namespace]regex: kube-system action: keep- source_labels: [__meta_kubernetes_pod_label_k8s_app]regex: alertmanageraction: keep- source_labels: [__meta_kubernetes_pod_container_port_number]regex:action: drop
- # 将prometheus暴露访问
- prometheus-service.yaml
-
apiVersion: apps/v1 kind: StatefulSet metadata:name: prometheus# 部署命名空间 namespace: kube-systemlabels:k8s-app: prometheuskubernetes.io/cluster-service: "true"addonmanager.kubernetes.io/mode: Reconcileversion: v2.2.1 spec:serviceName: "prometheus"replicas: 1podManagementPolicy: "Parallel"updateStrategy:type: "RollingUpdate"selector:matchLabels:k8s-app: prometheustemplate:metadata:labels:k8s-app: prometheusannotations:scheduler.alpha.kubernetes.io/critical-pod: ''spec:priorityClassName: system-cluster-criticalserviceAccountName: prometheus# 初始化容器initContainers:- name: "init-chown-data"image: "busybox:latest"imagePullPolicy: "IfNotPresent"command: ["chown", "-R", "65534:65534", "/data"]volumeMounts:- name: prometheus-datamountPath: /datasubPath: ""containers:- name: prometheus-server-configmap-reloadimage: "jimmidyson/configmap-reload:v0.1"imagePullPolicy: "IfNotPresent"args:- --volume-dir=/etc/config- --webhook-url=http://localhost:9090/-/reloadvolumeMounts:- name: config-volumemountPath: /etc/configreadOnly: trueresources:limits:cpu: 10mmemory: 10Mirequests:cpu: 10mmemory: 10Mi- name: prometheus-server# 主要使用镜像image: "prom/prometheus:v2.2.1"imagePullPolicy: "IfNotPresent"args:- --config.file=/etc/config/prometheus.yml- --storage.tsdb.path=/data- --web.console.libraries=/etc/prometheus/console_libraries- --web.console.templates=/etc/prometheus/consoles- --web.enable-lifecycleports:- containerPort: 9090readinessProbe:# 健康检查httpGet:path: /-/readyport: 9090initialDelaySeconds: 30timeoutSeconds: 30livenessProbe:httpGet:path: /-/healthyport: 9090initialDelaySeconds: 30timeoutSeconds: 30# based on 10 running nodes with 30 pods eachresources:limits:cpu: 200mmemory: 1000Mirequests:cpu: 200mmemory: 1000Mi# 数据卷volumeMounts:- name: config-volumemountPath: /etc/config- name: prometheus-datamountPath: /datasubPath: ""terminationGracePeriodSeconds: 300volumes:- name: config-volumeconfigMap:name: prometheus-configvolumeClaimTemplates:- metadata:name: prometheus-dataspec:# 使用动态PV、修改为已创建的PV动态存储storageClassName: managed-nfs-storageaccessModes:- ReadWriteOnceresources:requests:storage: "16Gi"
- # 通过有状态的形式将prometheus部署
- prometheus-statefulset.yaml
-
配置文件
-
kind: Service apiVersion: v1 metadata:name: prometheus# 指定命名空间namespace: kube-systemlabels:kubernetes.io/name: "Prometheus"kubernetes.io/cluster-service: "true"addonmanager.kubernetes.io/mode: Reconcile spec:# 添加外部访问type: NodePort# 指定内部访问协议ports:- name: httpport: 9090protocol: TCPtargetPort: 9090selector:k8s-app: prometheus
部署
1、下载github包:https://github.com/kubernetes/kubernetes/
2、复制文件到指定目录
mkdir ~/prometheus cp ~/kubernetes/cluster/addons/prometheus/* ~/prometheus/
3、进入到目录
cd ~/prometheus/
4、k8s通过配置文件创建运行容器
kubectl apply -f prometheus-rbac.yaml kubectl apply -f prometheus-configmap.yaml kubectl apply -f prometheus-statefulset.yaml kubectl apply -f prometheus-service.yaml
5、查看创建资源
kubectl get pod,svc -n kube-system NAME READY STATUS RESTARTS AGE pod/coredns-64479cf49b-lsqqn 1/1 Running 0 75m pod/prometheus-0 2/2 Running 0 2m12sNAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE service/kube-dns ClusterIP 10.0.0.2 <none> 53/UDP,53/TCP,9153/TCP 75m service/prometheus NodePort 10.0.0.170 <none> 9090:42575/TCP 8s
6、测试通过端口开启端口访问监控端
192.168.190.61:42575/graph