1 PG数量限制问题
1.1 原因分析
1.还是老样子,先创建存储池,在初始化为rbd。
[root@ceph141~]# ceph osd pool create wenzhiyong-k8s 128 128
Error ERANGE: pg_num 128 size 3 for this pool would result in 295 cumulative PGs per OSD
(2067 total PG replicas on 7 'in' root OSDs by crush rule) which exceeds the mon_max_pg_per_osd value
of 250
意思说:创建1个存储池wenzhiyong-k8s且指定PG数量是128,副本3个;将会产生128*3=384个PG
但是集群中每个OSD最多只能有250个PG。我是7个OSD,所以集群最大OSD数量是250*7=1750个
目前集群有多个PG了呢?可以通过ceph osd pool ls detail
命令查看并累加得出:1 + 16 + 32 + 32 + 32 + 32 + 32 + 32 + 32 + 32 + 256 + 32 = 561;但是因为3副本机制,所以当前集群的PG数量还要再乘3为1683个,再加上即将创建的384个PG很明显是超过集群的最大PG1750的
1.2 故障解决方式
- 上策,增加OSD数量。说白了,就是加设备,加机器。
- 中策,调大OSD存储PG的上限,可以临时解决,治标不治本,因为随着PG数量增多,会给该磁盘添加I/O压力。
- 下策,创建存储池时指定略少的PG数量。如:
ceph osd pool create wenzhiyong-k8s 2 2
- 下下策,删除没有用的存储池,已释放PG数量。也可以临时解决,但是要确定的确是没有用的数据,再做操作。
1.临时修改,修改完后可能需要重启集群才能生效。但实测后发现没用
[root@ceph141~]# ceph tell osd.* injectargs --mon-max-pg-per-osd 500
osd.0: {}
osd.0: mon_max_pg_per_osd = '' (not observed, change may require restart) osd_delete_sleep = '' osd_delete_sleep_hdd = '' osd_delete_sleep_hybrid = '' osd_delete_sleep_ssd = '' osd_max_backfills = '' osd_pg_delete_cost = '' (not observed, change may require restart) osd_recovery_max_active = '' osd_recovery_max_active_hdd = '' osd_recovery_max_active_ssd = '' osd_recovery_sleep = '' osd_recovery_sleep_hdd = '' osd_recovery_sleep_hybrid = '' osd_recovery_sleep_ssd = '' osd_scrub_sleep = '' osd_snap_trim_sleep = '' osd_snap_trim_sleep_hdd = '' osd_snap_trim_sleep_hybrid = '' osd_snap_trim_sleep_ssd = ''
osd.1: {}
...
[root@ceph141~]# ceph osd pool create wenzhiyong-k8s 128 128
Error ERANGE: pg_num 128 size 3 for this pool would result in 295 cumulative PGs per OSD (2067 total PG replicas on 7 'in' root OSDs by crush rule) which exceeds the mon_max_pg_per_osd value of 250[root@ceph141~]# systemctl reboot docker
2.永久修改方式,/etc/ceph/ceph.conf
[osd]
mon_max_pg_per_osd = 100
最终选择了指定少量的存储池
1.创建K8S特用的存储池
[root@ceph141~]# ceph osd pool create wenzhiyong-k8s 2 2
pool 'wenzhiyong-k8s' created2.创建镜像块设备文件
[root@ceph141 ~]# rbd create -s 5G wenzhiyong-k8s/nginx-web
[root@ceph141 ~]#
[root@ceph141 ~]# rbd -p wenzhiyong-k8s ls
nginx-web
[root@ceph141 ~]#
[root@ceph141 ~]# rbd -p wenzhiyong-k8s info nginx-web
rbd image 'nginx-web':size 5 GiB in 1280 objectsorder 22 (4 MiB objects)snapshot_count: 0id: 12214b350eaa5block_name_prefix: rbd_data.12214b350eaa5format: 2features: layering, exclusive-lock, object-map, fast-diff, deep-flattenop_features: flags: create_timestamp: Fri Aug 23 16:34:00 2024access_timestamp: Fri Aug 23 16:34:00 2024modify_timestamp: Fri Aug 23 16:34:00 20243.ceph集群将ceph管理员的秘钥环keyring拷贝到所有的worker节点
[root@ceph141 ~]# scp /etc/ceph/ceph.client.admin.keyring 10.0.0.231:/etc/ceph/
[root@ceph141 ~]# scp /etc/ceph/ceph.client.admin.keyring 10.0.0.232:/etc/ceph/
[root@ceph141 ~]# scp /etc/ceph/ceph.client.admin.keyring 10.0.0.233:/etc/ceph/
2RBD作为volumes
2.1 基于keyring的方式
cat > 03-rdb-admin-keyring.yaml << EOF
apiVersion: apps/v1
kind: Deployment
metadata:name: deploy-volume-rbd-admin-keyring
spec:replicas: 1selector:matchLabels:apps: ceph-rbdtemplate:metadata:labels:apps: ceph-rbdspec:volumes:- name: datarbd:monitors:- 10.0.0.141:6789- 10.0.0.142:6789- 10.0.0.143:6789# 指定存储池pool: wenzhiyong-k8s# 指定块设备镜像image: nginx-web# 指定文件系统,目前仅支持: "ext4", "xfs", "ntfs"。fsType: xfs# 块设备是否只读,默认值为false。readOnly: false# 指定连接ceph集群的用户,若不指定,默认为adminuser: admin# 指定ceph秘钥环的路径,默认值为: "/etc/ceph/keyring"keyring: "/etc/ceph/ceph.client.admin.keyring"containers:- name: c1image: registry.cn-hangzhou.aliyuncs.com/yinzhengjie-k8s/apps:v1volumeMounts:- name: datamountPath: /wenzhiyong-dataports:- containerPort: 80
---apiVersion: v1
kind: Service
metadata:name: svc-rbd
spec:type: NodePortselector:apps: ceph-rbdports:- protocol: TCPport: 80targetPort: 80nodePort: 30033
EOF
1.运用K8S配置文件进入容器测试
/wenzhiyong-data # cd /
/ # rm -rf /wenzhiyong-data/
rm: can't remove '/wenzhiyong-data': Resource busy/ # df -h | grep wen
/dev/rbd0 5.0G 68.1M 4.9G 1% /wenzhiyong-data
/ # ls -l /dev/rbd0
ls: /dev/rbd0: No such file or directory/ # umount /wenzhiyong-data/
umount: can't unmount /wenzhiyong-data: Operation not permitted/ # rm -rf /wenzhiyong-data/
rm: can't remove '/wenzhiyong-data': Resource busy
2.查看该pod调度到了worker232节点
[root@master23104-cephfs]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
deploy-volume-rbd-admin-keyring-6b94f8cc86-nnpjd 1/1 Running 0 6m37s 10.100.2.45 worker232
3.在worker232节点查看,这个rbd挂载给了pod使用
[root@worker232~]# ll /dev/rbd*
brw-rw---- 1 root disk 252, 0 Nov 8 22:38 /dev/rbd0/dev/rbd:
total 0
drwxr-xr-x 2 root root 60 Nov 8 22:38 wenzhiyong-k8s/[root@worker232~]# df -h | grep rbd
/dev/rbd0 5.0G 69M 5.0G 2% /var/lib/kubelet/plugins/kubernetes.io/rbd/mounts/wenzhiyong-k8s-image-nginx-web
4.删除该应用后,worker232的挂载也随之消失
[root@worker232~]# df -h | grep rbd
[root@worker232~]#
2.2 基于秘钥的方式
1.获取ceph集群的admin账号的key信息并经过base64编码
[root@worker232~]# awk '/key/ {printf "%s", $NF}' /etc/ceph/ceph.client.admin.keyring | more
AQAlsChnHubLJRAAH2s3vhyGrxgba8anloPDtg==
[root@worker232~]# awk '/key/ {printf "%s", $NF}' /etc/ceph/ceph.client.admin.keyring | base64
QVFBbHNDaG5IdWJMSlJBQUgyczN2aHlHcnhnYmE4YW5sb1BEdGc9PQ==
2.编写资源清单并把key封装为secret资源
cat > 04-rbd-admin-key.yaml << EOF
apiVersion: v1
kind: Secret
metadata:name: ceph-admin-secret
type: "kubernetes.io/rbd"
data:# 指定ceph的admin的KEY,将其进行base64编码,此处需要修改! key: QVFBbHNDaG5IdWJMSlJBQUgyczN2aHlHcnhnYmE4YW5sb1BEdGc9PQ==
# 注意,data如果觉得麻烦,可以考虑使用stringData
#stringData:
# key: AQBeYMVm8+/UNhAAV8lxv/CvIm0Lyer1wSp9yA==
---apiVersion: apps/v1
kind: Deployment
metadata:name: deploy-volume-rbd-secrets-keyring
spec:replicas: 1selector:matchLabels:apps: ceph-rbdtemplate:metadata:labels:apps: ceph-rbdspec:volumes:- name: datarbd:monitors:- 10.0.0.141:6789- 10.0.0.142:6789- 10.0.0.143:6789pool: wenzhiyong-k8simage: nginx-webfsType: xfsreadOnly: falseuser: adminsecretRef:# 指定用于存储ceph管理员的secret名称name: ceph-admin-secretcontainers:- name: c1image: registry.cn-hangzhou.aliyuncs.com/yinzhengjie-k8s/apps:v3volumeMounts:- name: datamountPath: /wenzhiyong-dataports:- containerPort: 80
EOF
现象和基于keyring的一样
3RBD结合动态存储类
01 SC
cat > 01-rbd-sc.yaml << EOF
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:name: csi-rbd-sc
provisioner: rbd.csi.ceph.com
parameters:# 指定集群的ID地址,需要改成自己的集群环境,通过ceph -s可以看到clusterID: 12fad866-9aa0-11ef-8656-6516a17ad6dd# 指定存储池pool: wenzhiyong-k8s# 镜像的特性imageFeatures: layeringcsi.storage.k8s.io/provisioner-secret-name: csi-rbd-secretcsi.storage.k8s.io/provisioner-secret-namespace: defaultcsi.storage.k8s.io/controller-expand-secret-name: csi-rbd-secretcsi.storage.k8s.io/controller-expand-secret-namespace: defaultcsi.storage.k8s.io/node-stage-secret-name: csi-rbd-secretcsi.storage.k8s.io/node-stage-secret-namespace: default# 指定文件系统的类型csi.storage.k8s.io/fstype: xfs
# 定义回收策略
reclaimPolicy: Delete
allowVolumeExpansion: true
mountOptions:- discard
EOF
02 rbd secret
cat > csi-rbd-secret.yaml <<EOF
apiVersion: v1
kind: Secret
metadata:name: csi-rbd-secretnamespace: default
# 对于stringData和Data字段有所不同,无需进行base64编码,说白了就是原样输出。
stringData:# 指定用户名是admin,也可以自定义普通用户。userID: admin# 指定admin用户的key(注意要直接用ceph配置文件里面的秘钥)userKey: AQAlsChnHubLJRAAH2s3vhyGrxgba8anloPDtg==
EOF
03 rbd configmap
cat > ceph-config-map.yaml <<EOF
apiVersion: v1
kind: ConfigMap
metadata:name: ceph-config
data:# ceph集群的配置文件"/etc/ceph/ceph.conf",认证要求可以写成 cephx; fsid填写集群IDceph.conf: |[global]fsid = 12fad866-9aa0-11ef-8656-6516a17ad6ddmon_initial_members = ceph141, ceph142, ceph143mon_host = 10.0.0.141,10.0.0.142,10.0.0.143auth_cluster_required = cephxauth_service_required = cephxauth_client_required = cephx# 要求存在keyring这个key,值为空keyring: |
EOF
04 csi configmap
容器存储接口
cat > csi-config-map.yaml <<EOF
apiVersion: v1
kind: ConfigMap
metadata:name: ceph-csi-config
data:# clusterID可以通过"ceph -s"获取集群ID# monitors为你自己的ceph集群mon地址config.json: |-[{"clusterID": "12fad866-9aa0-11ef-8656-6516a17ad6dd","monitors": ["10.0.0.141:6789","10.0.0.142:6789","10.0.0.143:6789"]}]
EOF
05 csi-kms-config-map
cat > csi-kms-config-map.yaml <<EOF
apiVersion: v1
kind: ConfigMap
metadata:name: ceph-csi-encryption-kms-config
data:# 可以不创建这个cm资源,但是需要将csi-rbdplugin-provisioner.yaml和csi-rbdplugin.yaml中kms有关内容注释掉。# - deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml # - deploy/rbd/kubernetes/csi-rbdplugin.yamlconfig.json: |-{}
EOF
06 定义PVC
cat > pvc.yaml <<EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:name: rbd-pvc01
spec:accessModes:- ReadWriteOnceresources:requests:storage: 2GistorageClassName: csi-rbd-sc
---apiVersion: v1
kind: PersistentVolumeClaim
metadata:name: rbd-pvc02
spec:accessModes:- ReadWriteOnceresources:requests:storage: 4GistorageClassName: csi-rbd-sc
EOF
07 创建csi接口
需要这些文件,下载地址:https://github.com/ceph/ceph-csi/tree/release-v3.7/deploy/rbd/kubernetes
==注:==前面6步的文件需要先apply,否则这些pod无法创建
csi-config-map.yaml csidriver.yaml csi-nodeplugin-rbac.yaml
csi-provisioner-rbac.yaml csi-rbdplugin-provisioner.yaml csi-rbdplugin.yaml
[root@master231deploy]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
csi-rbdplugin-l24hj 3/3 Running 0 30m 10.0.0.233 worker233
csi-rbdplugin-provisioner-5dfcf67885-8rk48 7/7 Running 0 30m 10.100.1.30 worker233
csi-rbdplugin-provisioner-5dfcf67885-9wznm 7/7 Running 0 30m 10.100.2.48 worker232
csi-rbdplugin-qz7k6 3/3 Running 0 30m 10.0.0.232 worker232
08 查看rbd image和PVC的关联
1.在ceph集群可以看到有2个rbd image
[root@ceph141~]# rbd ls -p wenzhiyong-k8s
csi-vol-d33df512-9df7-11ef-85fc-4a89d731ca68
csi-vol-d33df570-9df7-11ef-85fc-4a89d731ca68
nginx-web
[root@ceph141~]# rbd info wenzhiyong-k8s/csi-vol-d33df512-9df7-11ef-85fc-4a89d731ca68
rbd image 'csi-vol-d33df512-9df7-11ef-85fc-4a89d731ca68':size 2 GiB in 512 objectsorder 22 (4 MiB objects)snapshot_count: 0id: 392f1fe374022block_name_prefix: rbd_data.392f1fe374022format: 2features: layeringop_features: flags: create_timestamp: Sat Nov 9 01:35:19 2024access_timestamp: Sat Nov 9 01:35:19 2024modify_timestamp: Sat Nov 9 01:35:19 2024
[root@ceph141~]# rbd info wenzhiyong-k8s/csi-vol-d33df570-9df7-11ef-85fc-4a89d731ca68
rbd image 'csi-vol-d33df570-9df7-11ef-85fc-4a89d731ca68':size 4 GiB in 1024 objectsorder 22 (4 MiB objects)snapshot_count: 0id: 392f182ce6323block_name_prefix: rbd_data.392f182ce6323format: 2features: layeringop_features: flags: create_timestamp: Sat Nov 9 01:35:19 2024access_timestamp: Sat Nov 9 01:35:19 2024modify_timestamp: Sat Nov 9 01:35:19 2024
24
[root@ceph141~]# rbd info wenzhiyong-k8s/csi-vol-d33df570-9df7-11ef-85fc-4a89d731ca68
rbd image 'csi-vol-d33df570-9df7-11ef-85fc-4a89d731ca68':size 4 GiB in 1024 objectsorder 22 (4 MiB objects)snapshot_count: 0id: 392f182ce6323block_name_prefix: rbd_data.392f182ce6323format: 2features: layeringop_features: flags: create_timestamp: Sat Nov 9 01:35:19 2024access_timestamp: Sat Nov 9 01:35:19 2024modify_timestamp: Sat Nov 9 01:35:19 2024