主机:10.10.10.71、10.10.10.72、10.10.10.73 VIP: 10.10.10.101
用户:postgres
配置互信
passwd postgres
密码 postgres
vi /etc/sudoers
postgres ALL=(ALL) NOPASSWD: ALL
su - postgres
ssh-keygen
ssh-keygen -t rsa -f .ssh/id_rsa_pgpool
ssh-copy-id -i .ssh/id_rsa_pgpool.pub 10.10.10.71
ssh-copy-id -i .ssh/id_rsa_pgpool.pub 10.10.10.72
ssh-copy-id -i .ssh/id_rsa_pgpool.pub 10.10.10.73测试
ssh -i ~/.ssh/id_rsa_pgpool 10.10.10.71 date
ssh -i ~/.ssh/id_rsa_pgpool 10.10.10.72 date
ssh -i ~/.ssh/id_rsa_pgpool 10.10.10.73 date
配置pgpass
su - postgres
echo "10.10.10.71:5432:replication:repuser:Repuser@2024_RP" >> ~/.pgpass
echo "10.10.10.72:5432:replication:repuser:Repuser@2024_RP" >> ~/.pgpass
echo "10.10.10.73:5432:replication:repuser:Repuser@2024_RP" >> ~/.pgpass
echo "10.10.10.71:5432:postgres:postgres:Postgres@2024_PG" >> ~/.pgpass
echo "10.10.10.72:5432:postgres:postgres:Postgres@2024_PG" >> ~/.pgpass
echo "10.10.10.73:5432:postgres:postgres:Postgres@2024_PG" >> ~/.pgpass
chmod 600 ~/.pgpass
pcp免密
echo 'localhost:9898:postgres:postgres' > ~/.pcppass
chmod 600 ~/.pcppass
安装pgpool-II
yum install -y https://www.pgpool.net/yum/rpms/4.1/redhat/rhel-7-x86_64/pgpool-II-release-4.1-3.noarch.rpmrpm -e pgdg-redhat-repo-42.0-28.noarch-- 4.1.14
yum -y install pgpool-II-pg12
yum -y install pgpool-II-pg12-debuginfo
yum -y install pgpool-II-pg12-devel
yum -y install pgpool-II-pg12-extensions
授权
chown postgres.postgres /var/run/pgpool
mkdir -p /var/log/pgpool/
chown -R postgres.postgres /var/log/pgpool/cd /etc/pgpool-II
cp failover.sh.sample failover.sh
cp follow_master.sh.sample follow_master.sh
cp recovery_1st_stage.sample recovery_1st_stage
cp pgpool_remote_start.sample pgpool_remote_startchown postgres.postgres /etc/pgpool-II/{failover.sh,follow_master.sh,recovery_1st_stage,pgpool_remote_start} -R
chmod u+x /etc/pgpool-II/{failover.sh,follow_master.sh,recovery_1st_stage,pgpool_remote_start} -Rchmod u+x /usr/sbin/ip
chmod u+s /usr/sbin/arping
chmod u+s /sbin/ip
chmod u+s /sbin/ifup
chmod u+s /bin/ping
chmod u+s /sbin/arping
配置pcp.conf
pg_md5 Postgres@2024_PG vi /etc/pgpool-II/pcp.confecho "postgres:0b4854363bbbde1021c518850f0e5f32" >> /etc/pgpool-II/pcp.confpg_md5 -m -p -u postgres /etc/pgpool-II/pool_passwd
Postgres@2024_PG pg_md5 -m -p -u twadmin /etc/pgpool-II/pool_passwd
TW_admin@2024_TH
配置hba.conf
vi /etc/pgpool-II/pool_hba.confhost all all 0.0.0.0/0 md5
host all all 0/0 md5
配置pgpool.conf
vi pgpool.conf
pid_file_name = '/var/run/pgpool/pgpool.pid'
logdir = '/var/run/pgpool'
listen_addresses = '*'
port = 9999
pcp_listen_addresses = '*'
pcp_port = 9898backend_hostname0 = '10.10.10.71'
backend_port0 = 5432
backend_weight0 = 1
backend_data_directory0 = '/data/pg_data'
backend_flag0 = 'ALLOW_TO_FAILOVER'backend_hostname1 = '10.10.10.72'
backend_port1 = 5432
backend_weight1 = 1
backend_data_directory1 = '/data/pg_data'
backend_flag1 = 'ALLOW_TO_FAILOVER'
backend_application_name0 = 'server1'backend_hostname2 = '10.10.10.73'
backend_port2 = 5432
backend_weight2 = 1
backend_data_directory2 = '/data/pg_data'
backend_flag2 = 'ALLOW_TO_FAILOVER'
backend_application_name0 = 'server2'
流复制相关配置
replication_mode = off
load_balance_mode = on
master_slave_mode = on
master_slave_sub_mode = 'stream'
sr_check_period = 5
sr_check_user = 'repuser'
sr_check_password = 'Repuser@2024_RP'
sr_check_database = 'postgres'
数据库故障转移(故障后处理)
health_check_period = 10
health_check_timeout = 20
health_check_user = 'postgres'
health_check_password = 'Postgres@2024_PG'
health_check_database = 'postgres' failover_command = '/etc/pgpool-II/failover.sh %d %h %p %D %m %H %M %P %r %R'
follow_master_command = '/etc/pgpool-II/follow_master.sh %d %h %p %D %m %H %M %P %r %R' recovery_user = 'postgres'
recovery_password = 'Postgres@2024_PG'
recovery_1st_stage_command = 'recovery_1st_stage' watchdog(看门狗)配置(用于检测pgpool-ii 节点状态, 为后续pgpool故障处理提供依据)
use_watchdog = on
wd_hostname = '10.10.10.71'
wd_port = 9000
虚拟IP指定
delegate_IP = '10.10.10.101'if_cmd_path = '/sbin'
if_up_cmd = '/usr/bin/sudo /sbin/ip addr add $_IP_$/24 dev ens192 label ens192:0'
if_down_cmd = '/usr/bin/sudo /sbin/ip addr del $_IP_$/24 dev ens192'
arping_cmd = '/usr/bin/sudo /usr/sbin/arping -U $_IP_$ -w 1 -I ens192'
watchdog 健康检查
wd_heartbeat_port = 9694
wd_heartbeat_keepalive = 2
wd_heartbeat_deadtime = 30heartbeat_destination0 = '10.10.10.72'
heartbeat_destination_port0 = 9694
heartbeat_device0 = 'ens192'heartbeat_destination0 = '10.10.10.73'
heartbeat_destination_port0 = 9694
heartbeat_device0 = 'ens192'
其他pgpgool节点链接信息(多台请增加配置)
other_pgpool_hostname0 = '10.10.10.72'
other_pgpool_port0 = 9999
other_wd_port0 = 9000
watchdog 发生故障后, 处理的相关配置(宕机, pgpool进程终止)# 当某个节点故障后
other_pgpool_hostname1 = '10.10.10.73'
other_pgpool_port1 = 9999
other_wd_port1 = 9000
failover_when_quorum_exists = on
failover_require_consensus = on
allow_multiple_failover_requests_from_node = on
enable_consensus_with_half_votes = on
修改failover.sh
PGHOME=/usr/pgsql-12
REPL_SLOT_NAME=${FAILED_NODE_HOST//[-.]/_}
POSTGRESQL_STARTUP_USER=postgres
SSH_KEY_FILE=id_rsa_pgpoolssh -T ${SSH_OPTIONS} ${POSTGRESQL_STARTUP_USER}@${NEW_MASTER_NODE_HOST} ${PGHOME}/bin/psql -p ${NEW_MASTER_NODE_PORT} -c \"select pg_promote\(true,30\)\"
修改follow_master.sh
#!/bin/bash
# This script is run after failover_command to synchronize the Standby with the new Primary.
# First try pg_rewind. If pg_rewind failed, use pg_basebackup.set -o xtrace
exec > >(logger -i -p local1.info) 2>&1# Special values:
# 1) %d = node id
# 2) %h = hostname
# 3) %p = port number
# 4) %D = database cluster path
# 5) %m = new primary node id
# 6) %H = new primary node hostname
# 7) %M = old master node id
# 8) %P = old primary node id
# 9) %r = new primary port number
# 10) %R = new primary database cluster path
# 11) %N = old primary node hostname
# 12) %S = old primary node port number
# 13) %% = '%' characterNODE_ID="$1"
NODE_HOST="$2"
NODE_PORT="$3"
NODE_PGDATA="$4"
NEW_MASTER_NODE_ID="$5"
NEW_MASTER_NODE_HOST="$6"
OLD_MASTER_NODE_ID="$7"
OLD_PRIMARY_NODE_ID="$8"
NEW_MASTER_NODE_PORT="$9"
NEW_MASTER_NODE_PGDATA="${10}"PGHOME=/usr/pgsql-12
ARCHIVEDIR=/data/pg_arch
REPLUSER=repuser
REPLUSER_PD=Repuser@2024_RP
PCP_USER=postgres
PG_USER=postgres
PG_PD=Postgres@2024_PG
PGPOOL_PATH=/usr/bin
PCP_PORT=9898
REPL_SLOT_NAME=${NODE_HOST//[-.]/_}
POSTGRESQL_STARTUP_USER=postgres
SSH_KEY_FILE=id_rsa_pgpool
SSH_OPTIONS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ~/.ssh/${SSH_KEY_FILE}"logger -i -p local1.info follow_master.sh: start: Standby node ${NODE_ID}# Check the connection status of Standby
${PGHOME}/bin/pg_isready -h ${NODE_HOST} -p ${NODE_PORT} > /dev/null 2>&1if [ $? -ne 0 ]; thenlogger -i -p local1.info follow_master.sh: node_id=${NODE_ID} is not running. skipping follow master commandexit 0
fi## Test passwordless SSH
ssh -T ${SSH_OPTIONS} ${POSTGRESQL_STARTUP_USER}@${NEW_MASTER_NODE_HOST} ls /tmp > /dev/nullif [ $? -ne 0 ]; thenlogger -i -p local1.info follow_master.sh: passwordless SSH to postgres@${NEW_MASTER_NODE_HOST} failed. Please setup passwordless SSH.exit 1
fi## Get PostgreSQL major version
PGVERSION=`${PGHOME}/bin/initdb -V | awk '{print $3}' | sed 's/\..*//' | sed 's/\([0-9]*\)[a-zA-Z].*/\1/'`if [ $PGVERSION -ge 12 ]; thenRECOVERYCONF=${NODE_PGDATA}/myrecovery.conf
elseRECOVERYCONF=${NODE_PGDATA}/recovery.conf
fi# Synchronize Standby with the new Primary.
logger -i -p local1.info follow_master.sh: pg_rewind for node $NODE_ID# Create replication slot "${REPL_SLOT_NAME}"
${PGHOME}/bin/psql -h ${NEW_MASTER_NODE_HOST} -p ${NEW_MASTER_NODE_PORT} \-c "SELECT pg_create_physical_replication_slot('${REPL_SLOT_NAME}');" >/dev/null 2>&1if [ $? -ne 0 ]; thenlogger -i -p local1.error follow_master.sh: create replication slot \"${REPL_SLOT_NAME}\" failed. You may need to create replication slot manually.
fissh -T ${SSH_OPTIONS} ${POSTGRESQL_STARTUP_USER}@${NODE_HOST} "set -o errexit${PGHOME}/bin/pg_ctl -w -m f -D ${NODE_PGDATA} stop${PGHOME}/bin/pg_rewind -D ${NODE_PGDATA} --source-server=\"user=${PG_USER} password=${PG_PD} host=${NEW_MASTER_NODE_HOST} port=${NEW_MASTER_NODE_PORT}\"rm -rf ${NODE_PGDATA}/pg_replslot/*sed -i '/primary_conninfo/d' ${NODE_PGDATA}/postgresql.auto.confcat >> ${NODE_PGDATA}/postgresql.auto.conf << EOT
primary_conninfo = 'user=${REPLUSER} password=${REPLUSER_PD} host=${NEW_MASTER_NODE_HOST} port=${NEW_MASTER_NODE_PORT} sslmode=prefer sslcompression=0 gssencmode=prefer krbsrvname=postgres target_session_attrs=any'
EOTif [ ${PGVERSION} -ge 12 ]; thentouch ${NODE_PGDATA}/standby.signalecho \"standby_mode = 'on'\" > ${NODE_PGDATA}/standby.signalelseecho \"standby_mode = 'on'\" >> ${RECOVERYCONF}fi#${PGHOME}/bin/pg_ctl -l /dev/null -w -D ${NODE_PGDATA} startsudo systemctl restart postgresql-12"# If pg_rewind failed, try pg_basebackup
if [ $? -ne 0 ]; thenlogger -i -p local1.error follow_master.sh: end: pg_rewind failed. Try pg_basebackup.
fi# If start Standby successfully, attach this node
if [ $? -eq 0 ]; then# Run pcp_attact_node to attach Standby node to Pgpool-II.${PGPOOL_PATH}/pcp_attach_node -w -h localhost -U $PCP_USER -p ${PCP_PORT} -n ${NODE_ID}if [ $? -ne 0 ]; thenlogger -i -p local1.error follow_master.sh: end: pcp_attach_node failedexit 1fielse# If start Standby failed, drop replication slot "${REPL_SLOT_NAME}"${PGHOME}/bin/psql -h ${NEW_MASTER_NODE_HOST} -p ${NEW_MASTER_NODE_PORT} \-c "SELECT pg_drop_replication_slot('${REPL_SLOT_NAME}');" >/dev/null 2>&1if [ $? -ne 0 ]; thenlogger -i -p local1.error follow_master.sh: drop replication slot \"${REPL_SLOT_NAME}\" failed. You may need to drop replication slot manually.filogger -i -p local1.error follow_master.sh: end: follow master command failedexit 1
filogger -i -p local1.info follow_master.sh: end: follow master command complete
exit 0
修改recovery_1st_stage
#!/bin/bash# This script is executed by "recovery_1st_stage" to recovery a Standby node.set -o xtraceexec > >(logger -i -p local1.info) 2>&1PRIMARY_NODE_PGDATA="$1"DEST_NODE_HOST="$2"DEST_NODE_PGDATA="$3"PRIMARY_NODE_PORT="$4"DEST_NODE_ID="$5"DEST_NODE_PORT="$6"PRIMARY_NODE_HOST=$(hostname)PGHOME=/usr/pgsql-12ARCHIVEDIR=/data/pg_archREPLUSER=repuserREPLUSER_PD=Repuser@2024_RPREPL_SLOT_NAME=${DEST_NODE_HOST//[-.]/_}POSTGRESQL_STARTUP_USER=postgresSSH_KEY_FILE=id_rsa_pgpoolSSH_OPTIONS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ~/.ssh/${SSH_KEY_FILE}"logger -i -p local1.info recovery_1st_stage: start: pg_basebackup for Standby node $DEST_NODE_ID## Test passwordless SSHssh -T ${SSH_OPTIONS} ${POSTGRESQL_STARTUP_USER}@${DEST_NODE_HOST} ls /tmp > /dev/nullif [ $? -ne 0 ]; thenlogger -i -p local1.info recovery_1st_stage: passwordless SSH to postgres@${DEST_NODE_HOST} failed. Please setup passwordless SSH.exit 1fi## Get PostgreSQL major versionPGVERSION=`${PGHOME}/bin/initdb -V | awk '{print $3}' | sed 's/\..*//' | sed 's/\([0-9]*\)[a-zA-Z].*/\1/'`if [ $PGVERSION -ge 12 ]; thenRECOVERYCONF=${DEST_NODE_PGDATA}/myrecovery.confelseRECOVERYCONF=${DEST_NODE_PGDATA}/recovery.conffi## Create replication slot "${REPL_SLOT_NAME}"${PGHOME}/bin/psql -p ${PRIMARY_NODE_PORT} << EOQSELECT pg_create_physical_replication_slot('${REPL_SLOT_NAME}');EOQ## Execute pg_basebackup to recovery Standby nodessh -T ${SSH_OPTIONS} ${POSTGRESQL_STARTUP_USER}@$DEST_NODE_HOST "set -o errexitrm -rf $DEST_NODE_PGDATA/*rm -rf $ARCHIVEDIR/*${PGHOME}/bin/pg_basebackup -h $PRIMARY_NODE_HOST -U $REPLUSER -w -p $PRIMARY_NODE_PORT -D $DEST_NODE_PGDATA -X streamcat >> ${NODE_PGDATA}/postgresql.auto.conf << EOTprimary_conninfo = 'user=${REPLUSER} password=${REPLUSER_PD} host=${PRIMARY_NODE_HOST} port=${PRIMARY_NODE_PORT} sslmode=prefer sslcompression=0 gssencmode=prefer krbsrvname=postgres target_session_attrs=any'EOTif [ ${PGVERSION} -ge 12 ]; thentouch ${DEST_NODE_PGDATA}/standby.signalecho \"standby_mode = 'on'\" > ${NODE_PGDATA}/standby.signalelseecho \"standby_mode = 'on'\" >> ${RECOVERYCONF}fi"if [ $? -ne 0 ]; then${PGHOME}/bin/psql -p ${PRIMARY_NODE_PORT} << EOQSELECT pg_drop_replication_slot('${REPL_SLOT_NAME}');EOQlogger -i -p local1.error recovery_1st_stage: end: pg_basebackup failed. online recovery failedexit 1filogger -i -p local1.info recovery_1st_stage: end: recovery_1st_stage completeexit 0
修改pgpool_remote_start
#!/bin/bash
# This script is run after recovery_1st_stage to start Standby node.DEST_NODE_HOST="$1"
DEST_NODE_PGDATA="$2"PGHOME=/usr/pgsql-12
POSTGRESQL_STARTUP_USER=postgres
SSH_KEY_FILE=id_rsa_pgpool
SSH_OPTIONS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ~/.ssh/${SSH_KEY_FILE}"logger -i -p local1.info pgpool_remote_start: start: remote start Standby node $DEST_NODE_HOST## Test passwordless SSH
ssh -T ${SSH_OPTIONS} ${POSTGRESQL_STARTUP_USER}@${DEST_NODE_HOST} ls /tmp > /dev/nullif [ $? -ne 0 ]; thenlogger -i -p local1.info pgpool_remote_start: passwordless SSH to postgres@${DEST_NODE_HOST} failed. Please setup passwordless SSH.exit 1
fi## Start Standby node
ssh -T ${SSH_OPTIONS} ${POSTGRESQL_STARTUP_USER}@${DEST_NODE_HOST} "#$PGHOME/bin/pg_ctl -l /dev/null -w -D $DEST_NODE_PGDATA startsudo systemctl restart postgresql-12
"if [ $? -ne 0 ]; thenlogger -i -p local1.error pgpool_remote_start: $DEST_NODE_HOST PostgreSQL start failed.exit 1
filogger -i -p local1.info pgpool_remote_start: end: $DEST_NODE_HOST PostgreSQL started successfully.
exit 0
recovery必须创建
su - postgrespsql template1 -c "CREATE EXTENSION pgpool_recovery"
拷贝文件
su - postgresscp /etc/pgpool-II/{pool_hba.conf,pcp.conf,pool_passwd,pgpool.conf,failover.sh,follow_master.sh} 10.10.10.72:/etc/pgpool-II/
scp /etc/pgpool-II/{pool_hba.conf,pcp.conf,pool_passwd,pgpool.conf,failover.sh,follow_master.sh} 10.10.10.73:/etc/pgpool-II/scp /etc/pgpool-II/{failover.sh,follow_master.sh,recovery_1st_stage,pgpool_remote_start} 10.10.10.72:/home/postgres
scp /etc/pgpool-II/{failover.sh,follow_master.sh,recovery_1st_stage,pgpool_remote_start} 10.10.10.73:/home/postgresmv /home/postgres/failover.sh /etc/pgpool-II/
mv /home/postgres/follow_master.sh /etc/pgpool-II/mv recovery_1st_stage /data/pg_data/
mv pgpool_remote_start /data/pg_data/
修改配置
#其他节点 修改以下几项即可vi pgpool.conf
wd_hostname = '10.10.10.72' # 当前机器ip
wd_priority = 2heartbeat_destination0 = '10.10.10.71' # 其他pg库机器(如10.10.10.71)
heartbeat_destination1 = '10.10.10.73' # 其他pg库机器(如10.10.10.73)other_pgpool_hostname0 = '10.10.10.71' # 其他pgpool节点机器
other_pgpool_hostname1 = '10.10.10.73' # 其他pgpool节点机器
启动Pgpool-II
systemctl start pgpool.service
systemctl status pgpool.service
查看主节点
ip addrens192: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000link/ether 00:50:56:96:d0:57 brd ff:ff:ff:ff:ff:ffinet 10.10.10.72/24 brd 10.10.243.255 scope global noprefixroute ens192valid_lft forever preferred_lft foreverinet 10.10.10.101/24 scope global secondary ens192:0valid_lft forever preferred_lft foreverinet6 fe80::250:56ff:fe96:d057/64 scope linkvalid_lft forever preferred_lft forever
集群维护
启动顺序:先启动所有的pg,再按照顺序启动pgpool
systemctl start postgresql-12
systemctl status postgresql-12systemctl start pgpool.service
systemctl status pgpool.service关闭顺序:先顺序关闭pgpool,再关闭pg数据库
systemctl stop pgpool.service
systemctl status pgpool.servicesystemctl stop postgresql-12
systemctl status postgresql-12
监控节点状态
psql -U postgres -h 10.10.10.101 -p 9999psql -h 10.10.10.101 -p9999 -Utwadmin -d thingworx
show pool_nodes;systemctl restart pgpool.service