一、 切换前预检查
1. dg_precheck_main_v1.4.sh
#!/bin/bash#**********************************************************************************
# Author: Hehuyi_In
# Date: 2022年06月16日
# FileName: dg_precheck_main_v1.4.sh
#
# For sys user, execute the script directly.
# For other sysdba privileged users, you need to manually synchronize the password file to standby server in versions below 12.2.
# The script cannot be executed on the Linux 5 os.
#
# Example:
# ./dg_precheck_main_v1.4.sh &> dg_precheck.log
# cat dg_precheck.log | grep --color=no -E "Warning|Error" ,or less -R dg_precheck.log
#
#
# Description:
# 2022-06-16 v_1.0 just get and print the infomation we want
# 2022-06-17 v_1.1 auto check the database role
# 2022-06-22 v_1.2 adjust the output format
# 2022-06-28 v_1.3 add compare function, compare two nodes at a time
# 2022-06-28 split to two scripts dg_precheck_main_v1.0.sh and dg_precheck_func_v1.0.sh
# 2022-07-06 dg_precheck_main_v1.1 check os info
# 2022-07-12 dg_precheck_main_v1.2 adjust the output format
# 2022-07-13 dg_precheck_main_v1.3 adjust os info check function
# 2022-10-19 dg_precheck_main_v1.4 add checking items: dblink,standby_file_management,flashback_on,db_flashback_retention_target,db_recovery_file_dest,db_recovery_file_dest_size
#**********************************************************************************source ./dg_precheck_func_v1.4.sh# TNS列表
TNS_LIST=(test_dg test)# 连接信息
DBUSER='sys/"xxxxx"'# --------------------------- 检查并设置TNS_LIST中各tnsname对应数据库角色 ----------------------------------
auto_check_db_role# 字典定义
declare -A PRIMARY_TNS_DIC
declare -A STANDBY_TNS_DICecho -e "--------------------------------------------------------------------------------"
echo -e "- 各节点主要参数检查 "
echo -e "--------------------------------------------------------------------------------\n"# 内存参数 及 compatible 检查和对比
PARAMS_LIST=(sga_max_size sga_target pga_aggregate_target shared_pool_size db_cache_size streams_pool_size compatible standby_file_management)for PARAM in ${PARAMS_LIST[@]};
doPRIMARY_TNS_DIC+=([$PARAM]=`get_params_value $PRIMARY_TNS $PARAM`)
STANDBY_TNS_DIC+=([$PARAM]=`get_params_value $STANDBY_TNS $PARAM`)# 主从库间对比参数是否相等
compare_value ${PRIMARY_TNS_DIC[$PARAM]} ${STANDBY_TNS_DIC[$PARAM]} $PARAMdone# 获取其他参数值
PARAMS_LIST=(log_archive_max_processes db_create_file_dest db_file_name_convert log_file_name_convert fal_client fal_server log_archive_config db_recovery_file_dest db_recovery_file_dest_size db_flashback_retention_target)for PARAM in ${PARAMS_LIST[@]};
doPRIMARY_TNS_DIC+=([$PARAM]=`get_params_value $PRIMARY_TNS $PARAM`)
STANDBY_TNS_DIC+=([$PARAM]=`get_params_value $STANDBY_TNS $PARAM`)doneecho -e "\n"echo -e "--------------------------------------------------------------------------------"
echo -e "- 各节点主要文件数检查 "
echo -e "--------------------------------------------------------------------------------\n"PRIMARY_TNS_DIC+=([redo_count]=`get_file_count $PRIMARY_TNS "LOG"`)
PRIMARY_TNS_DIC+=([standby_count]=`get_file_count $PRIMARY_TNS "STANDBY_LOG"`)
PRIMARY_TNS_DIC+=([tempfile_count]=`get_file_count $PRIMARY_TNS "TEMPFILE"`)
STANDBY_TNS_DIC+=([redo_count]=`get_file_count $STANDBY_TNS "LOG"`)
STANDBY_TNS_DIC+=([standby_count]=`get_file_count $STANDBY_TNS "STANDBY_LOG"`)
STANDBY_TNS_DIC+=([tempfile_count]=`get_file_count $STANDBY_TNS "TEMPFILE"`)PRIMARY_TNS_DIC+=([datafile_online_count]=`get_datafile_online_count $PRIMARY_TNS`)
STANDBY_TNS_DIC+=([datafile_online_count]=`get_datafile_online_count $STANDBY_TNS`)compare_value ${PRIMARY_TNS_DIC[redo_count]} ${STANDBY_TNS_DIC[redo_count]} "redo_count"
compare_value ${PRIMARY_TNS_DIC[standby_count]} ${STANDBY_TNS_DIC[standby_count]} "standby_count"
compare_value ${PRIMARY_TNS_DIC[tempfile_count]} ${STANDBY_TNS_DIC[tempfile_count]} "tempfile_count"
compare_value ${PRIMARY_TNS_DIC[datafile_online_count]} ${STANDBY_TNS_DIC[datafile_online_count]} "datafile_online_count"echo -e "\n"echo -e "--------------------------------------------------------------------------------"
echo -e "- 从节点redo日志状态检查 "
echo -e "--------------------------------------------------------------------------------\n"# 只有从库需要检查
STANDBY_TNS_DIC+=([redo_status_count]=`get_redo_status_count $STANDBY_TNS`)
check_redo_status_count ${STANDBY_TNS_DIC[db_create_file_dest]} ${STANDBY_TNS_DIC[log_file_name_convert]} ${STANDBY_TNS_DIC[redo_status_count]}
echo -e "\n"echo -e "--------------------------------------------------------------------------------"
echo -e "- 闪回开启及保留时间检查 "
echo -e "--------------------------------------------------------------------------------\n"# --------------------------- flashback_on 与 db_flashback_retention_target -------------------------------PRIMARY_TNS_DIC+=([flashback_on]=`get_flashback_on $PRIMARY_TNS`)
echo -e "- primary database "
compare_value ${PRIMARY_TNS_DIC[flashback_on]} "NO" "flashback_on"
compare_value ${PRIMARY_TNS_DIC[db_flashback_retention_target]} "4320" "db_flashback_retention_target"STANDBY_TNS_DIC+=([flashback_on]=`get_flashback_on $STANDBY_TNS`)
echo -e "- standby database "
compare_value ${STANDBY_TNS_DIC[flashback_on]} "YES" "flashback_on"
compare_value ${STANDBY_TNS_DIC[db_flashback_retention_target]} "4320" "db_flashback_retention_target"echo -e "\n"echo -e "--------------------------------------------------------------------------------"
echo -e "- FRA区设置检查 "
echo -e "--------------------------------------------------------------------------------\n"# --------------------------- db_recovery_file_dest 与 db_recovery_file_dest_size -------------------------------
# 主从库设置是否相同
compare_value ${PRIMARY_TNS_DIC[db_recovery_file_dest]} ${STANDBY_TNS_DIC[db_recovery_file_dest]} "db_recovery_file_dest"
compare_value ${PRIMARY_TNS_DIC[db_recovery_file_dest_size]} ${STANDBY_TNS_DIC[db_recovery_file_dest_size]} "db_recovery_file_dest_size"echo -e "\n"echo -e "--------------------------------------------------------------------------------"
echo -e "- 各节点主从相关参数检查 "
echo -e "--------------------------------------------------------------------------------\n"# --------------------------- fal_client 与 fal_server ---------------------------# fal_server的值应该包括对方,因为${PRIMARY_TNS_DIC[fal_server]}返回值包含逗号,因此需要在传参时加"",避免参数传入函数后被截断# 主节点参数值
include_value $PRIMARY_TNS "fal_server" "${PRIMARY_TNS_DIC[fal_server]}" $STANDBY_TNS
# 从节点参数值
include_value $STANDBY_TNS "fal_server" "${STANDBY_TNS_DIC[fal_server]}" $PRIMARY_TNS# fal_client的值应该包含自己
# 主节点参数值
include_value $PRIMARY_TNS "fal_client" "${PRIMARY_TNS_DIC[fal_client]}" $PRIMARY_TNS
# 从节点参数值
include_value $STANDBY_TNS "fal_client" "${STANDBY_TNS_DIC[fal_client]}" $STANDBY_TNS# --------------------------- log_archive_config -------------------------------
# 主节点参数值
include_value $PRIMARY_TNS "log_archive_config" ${PRIMARY_TNS_DIC[log_archive_config]} $STANDBY_TNS
# 从节点参数值
include_value $STANDBY_TNS "log_archive_config" ${STANDBY_TNS_DIC[log_archive_config]} $PRIMARY_TNS# --------------------------- log_archive_max_processes -------------------------------
check_log_archive_max_processes $PRIMARY_TNS ${PRIMARY_TNS_DIC[log_archive_max_processes]}
check_log_archive_max_processes $STANDBY_TNS ${STANDBY_TNS_DIC[log_archive_max_processes]} # --------------------------- log_archive_dest_n 与 log_archive_dest_state_n -------------------------------check_archive_dest $PRIMARY_TNS
check_archive_dest $STANDBY_TNScheck_archive_dest_state $PRIMARY_TNS
check_archive_dest_state $STANDBY_TNS# --------------------------- OMF 与 db_file_name_convert,log_file_name_convert -------------------------------check_omf_and_convert_params $PRIMARY_TNS ${PRIMARY_TNS_DIC[db_create_file_dest]} ${PRIMARY_TNS_DIC[db_file_name_convert]} ${PRIMARY_TNS_DIC[log_file_name_convert]}
check_omf_and_convert_params $STANDBY_TNS ${STANDBY_TNS_DIC[db_create_file_dest]} ${STANDBY_TNS_DIC[db_file_name_convert]} ${STANDBY_TNS_DIC[log_file_name_convert]}echo -e "\n"echo -e "--------------------------------------------------------------------------------"
echo -e "- 从节点延迟 MRP进程与触发器检查 "
echo -e "--------------------------------------------------------------------------------\n"check_db_lag $STANDBY_TNS "transport lag"
check_db_lag $STANDBY_TNS "apply lag"check_mrp_process $STANDBY_TNScheck_mrp_trigger $PRIMARY_TNS
check_mrp_trigger $STANDBY_TNS
echo -e "\n"echo -e "--------------------------------------------------------------------------------"
echo -e "- dblink检查 "
echo -e "--------------------------------------------------------------------------------\n"# --------------------------- dblink -------------------------------
# 检查主库即可
check_dblink $PRIMARY_TNS
echo -e "\n"
2. dg_precheck_func_v1.4.sh
#!/bin/bash#**********************************************************************************
# Author: Hehuyi_In
# Date: 2022年06月16日
# FileName: dg_precheck_func_v1.4.sh
#
# Description:
# 2022-06-16 v_1.0 just get and print the infomation we want
# 2022-06-17 v_1.1 auto check the database role
# 2022-06-22 v_1.2 adjust the output format
# 2022-06-28 v_1.3 add compare function, compare two nodes at a time
# 2022-06-28 split to two scripts dg_precheck_main_v1.0.sh and dg_precheck_func_v1.0.sh
# 2022-07-06 dg_precheck_func_v1.1 check os info
# 2022-07-12 dg_precheck_func_v1.2 adjust the output format
# 2022-07-13 dg_precheck_func_v1.3 adjust os info check function
# 2022-10-19 dg_precheck_func_v1.4 add checking items: dblink,standby_file_management,flashback_on,db_flashback_retention_target,db_recovery_file_dest,db_recovery_file_dest_size
#**********************************************************************************# 颜色定义
# 红色
ERROR_COLOR='\e[1;31m'
# 绿色
NARMAL_COLOR='\e[1;32m'
# 黄色
WARNING_COLOR='\e[1;33m'
# 重置
RESET_COLOR='\e[0m'# ------------------------------------------------------------------------
# 检查TNS_LIST中各tnsname对应数据库角色
auto_check_db_role() {for TNS in ${TNS_LIST[@]}; doDB_ROLE=$(sqlplus -s $DBUSER@$TNS as sysdba <<EOFset heading off feedback off pagesize 0 verify off echo off
select database_role from v\$database;
EOF)# 主库,则赋值给 PRIMARY_TNSif [[ "$DB_ROLE" = "PRIMARY" ]]; thenPRIMARY_TNS=$TNS# 从库,则赋值给 STANDBY_TNSelif [[ "$DB_ROLE" = "PHYSICAL STANDBY" ]]; thenSTANDBY_TNS=$TNSfi doneif [[ "$PRIMARY_TNS" = "" ]]; thenecho -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! There is no primary database tns ${RESET_COLOR}"exit 1elif [[ "$STANDBY_TNS" = "" ]]; thenecho -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! There is no standby database tns ${RESET_COLOR}"exit 1fi
}# ----------------------------------获取操作系统信息--------------------------------------
# 参数1:TNS连接串# get_cpu_info() {
# sqlplus -s $DBUSER@$1 as sysdba <<EOF
# !cat /proc/cpuinfo | grep "processor" | wc -l
# EOF
# }# get_memory_info() {
# sqlplus -s $DBUSER@$1 as sysdba <<EOF
# !free -h | grep 'Mem:' | awk '{print \$2}'
# EOF
# }# get_disk_info() {
# sqlplus -s $DBUSER@$1 as sysdba <<EOF
# !df -m | grep -v 'Filesystem' | awk '{sum+=\$2} END {print sum}'
# EOF
# }# 获取Oracle参数值
# 参数1:TNS连接串,参数2:待检查参数名(不区分大小写)get_params_value() {if [[ "$1" != "" && "$2" != "" ]]; thensqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
select value from v\$parameter where upper(name)=upper('$2');
EOFelseecho -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! get_params_value() parameter 1 and 2 can't be null ${RESET_COLOR}"exit 1fi
}# log_archive_max_processes 参数检查
# 参数1:TNS连接串,参数2:待检查参数值check_log_archive_max_processes() {if [[ "$1" != "" && "$2" != "" ]]; thenif [[ "$2" -ge 4 ]]; thenprintf "${NARMAL_COLOR}%-10s | Success | log_archive_max_processes >= 4${RESET_COLOR}\n" $1elseprintf "${WARNING_COLOR}%-10s | Warning | log_archive_max_processes < 4${RESET_COLOR}\n" $1fielseecho "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! check_log_archive_max_processes() parameter 1 and 2 can't be null ${RESET_COLOR}"exit 1fi
}# 检查log_archive_dest_n参数,是否有设置DELAY关键字(延迟从库)
# 参数1:TNS连接串check_archive_dest() {if [[ "$1" != "" ]]; thenCOUNT=$(sqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
select count(*) FROM v\$parameter where name like 'log_archive_dest%' and upper(value) like '%DELAY%';
EOF)if [[ "$COUNT" -eq 0 ]]; thenprintf "${NARMAL_COLOR}%-10s | Success | There is no DELAY attribute in log_archive_dest_n${RESET_COLOR}\n" $1elseprintf "${WARNING_COLOR}%-10s | Warning | There is DELAY attribute in log_archive_dest_n${RESET_COLOR}\n" $1fielseecho -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! check_archive_dest() parameter 1 can't be null ${RESET_COLOR}"exit 1fi
}# 检查log_archive_dest_state_n参数,是否有设置DEFER
# 参数1:TNS连接串check_archive_dest_state() {if [[ "$1" != "" ]]; thenCOUNT=$(sqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
select count(*) FROM v\$parameter where name like 'log_archive_dest_state%' and upper(value)='DEFER';
EOF)if [[ "$COUNT" -eq 0 ]]; thenprintf "${NARMAL_COLOR}%-10s | Success | There is no DEFER value in log_archive_dest_state_n${RESET_COLOR}\n" $1 elseprintf "${WARNING_COLOR}%-10s | Warning | There is DEFER value in log_archive_dest_state_n${RESET_COLOR}\n" $1fielseecho -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! check_archive_dest_state() parameter 1 can't be null ${RESET_COLOR}"exit 1fi
}# OMF与convert参数检查
# 参数1:TNS连接串;参数2:db_create_file_dest参数值;参数3:db_file_name_convert参数值;参数4:log_file_name_convert参数值check_omf_and_convert_params() {if [[ "$2" != "" ]]; thenprintf "${NARMAL_COLOR}%-10s | Success | db_create_file_dest parameter was set${RESET_COLOR}\n" $1elseif [[ "$3" != "" && "$4" != "" ]]; thenprintf "${NARMAL_COLOR}%-10s | Success | dbfile and logfile convert parameters were set${RESET_COLOR}\n" $1elseprintf "${WARNING_COLOR}%-10s | Warning | db_create_file_dest,dbfile and logfile convert parameters were set${RESET_COLOR}\n" $1fifi
}# 获取各类文件数
# 参数1:TNS连接串,参数2:待查询视图名(V$LOG V$STANDBY_LOG V$TEMPFILE)get_file_count() {if [[ "$1" != "" ]]; thensqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
SELECT count(*) FROM V\$$2;
EOFelseecho -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! get_file_count() parameter 1 and 2 can't be null ${RESET_COLOR}"exit 1fi
}# 获取指定状态的redo文件数
# 参数1:TNS连接串get_redo_status_count() {if [[ "$1" != "" ]]; thensqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
SELECT count(*) FROM V\$LOG WHERE STATUS NOT IN ('UNUSED', 'CLEARING','CLEARING_CURRENT');
EOFelseecho -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! get_redo_status_count() parameter 1 can't be null ${RESET_COLOR}"exit 1fi
}# 检查指定状态的redo文件数
# 参数1:db_create_file_dest参数值;参数2:log_file_name_convert参数值;参数3:${STANDBY_TNS_DIC[redo_status_count]} check_redo_status_count() {# 如果设置了OMF或者LOG_FILE_NAME_CONVERT,则可以跳过此步骤if [[ "$1" != "" || "$2" != "" ]]; thenprintf "${NARMAL_COLOR}%-30s | %-8s | There is no need to check this item${RESET_COLOR}\n" redo_status_count Successelse# 否则,指定状态的redo文件数应该为0if [[ "$3" -eq 0 ]]; thenprintf "${WARNING_COLOR}%-30s | %-8s | redo_status_count value is 0${RESET_COLOR}\n" redo_status_count Warningelse# 否则,发出告警printf "${WARNING_COLOR}%-30s | %-8s | redo_status_count value is not 0${RESET_COLOR}\n" $1 Warningfifi
}# 获取online状态的数据文件数量
# 参数1:TNS连接串get_datafile_online_count() {if [[ "$1" != "" ]]; thensqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
SELECT count(*) FROM V\$DATAFILE WHERE STATUS='ONLINE';
EOFelseecho -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! get_datafile_online_count() parameter 1 can't be null ${RESET_COLOR}"exit 1fi
}# 获取数据库闪回功能开启状态
# 参数1:TNS连接串
get_flashback_on() {if [[ "$1" != "" ]]; thensqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
SELECT FLASHBACK_ON FROM V\$DATABASE;
EOFelseecho -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! get_flashback_on() parameter 1 can't be null ${RESET_COLOR}"exit 1fi
}# mrp触发器检查
# 参数1:TNS连接串check_mrp_trigger() {if [[ "$1" != "" ]]; thenCOUNT=$(sqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
select count(*) FROM dba_triggers where trigger_name in ('AUTO_START_STANDBY_MRP','AUTO_START_STANDBY_MRP_PDB');
EOF)if [[ "$COUNT" -eq 0 ]]; thenprintf "${ERROR_COLOR}%-10s | %-8s | There is no MRP trigger${RESET_COLOR}\n" $1 Errorelseprintf "${NARMAL_COLOR}%-10s | %-8s | MRP trigger was found${RESET_COLOR}\n" $1 Successfielseecho -e "${ERROR_COLOR}$(date "+%Y-%m-%d %H:%M:%S") | Error! check_mrp_trigger() parameter 1 can't be null ${RESET_COLOR}"exit 1fi
}# mrp进程检查
# 参数1:TNS连接串check_mrp_process() {if [[ "$1" != "" ]]; thenCOUNT=$(sqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
select count(*) FROM v\$managed_standby where process like 'MRP%';
EOF)if [[ "$COUNT" -eq 0 ]]; thenprintf "${ERROR_COLOR}%-10s | %-8s | There is no MRP process${RESET_COLOR}\n" $1 Errorelseprintf "${NARMAL_COLOR}%-10s | %-8s | MRP process was found${RESET_COLOR}\n" $1 Successfielseecho -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! check_mrp_process() parameter 1 can't be null ${RESET_COLOR}"exit 1fi
}# 检查从库延迟(transport lag,apply lag )
# 参数1:TNS连接串,参数2:待检查的lag类型check_db_lag(){
if [[ "$1" != "" && "$2" != "" ]]
then
DB_LAG=`sqlplus -s $DBUSER@$1 as sysdba << EOF set heading off feedback off pagesize 0 verify off echo off
select (extract(second from to_dsinterval(value)) + extract(minute from to_dsinterval(value)) * 60 +extract(hour from to_dsinterval(value)) * 60 * 60 +extract(day from to_dsinterval(value)) * 60 * 60 * 24) as retvalue
from v\\$dataguard_stats
where name = '$2';EOF`#去除左侧空格
DB_LAG=`eval echo $DB_LAG`if [[ "$DB_LAG" -gt 30 ]]
thenprintf "${WARNING_COLOR}%-10s | %-8s | the $2 is too large($DB_LAG seconds)${RESET_COLOR}\n" $1 Warning
else printf "${NARMAL_COLOR}%-10s | %-8s | the $2 is $DB_LAG second(s)${RESET_COLOR}\n" $1 Success
fielseecho -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! check_db_lag() parameter 1 and 2 can't be null ${RESET_COLOR}"exit 1
fi
}# dblink检查
# 参数1:TNS连接串check_dblink() {if [[ "$1" != "" ]]; thenCOUNT=$(sqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
select count(*) FROM dba_db_links;
EOF)if [[ "$COUNT" -eq 0 ]]; thenprintf "${NARMAL_COLOR}%-10s | %-8s | There is no dblink in our database${RESET_COLOR}\n" $1 Successelseprintf "${WARNING_COLOR}%-10s | %-8s | dblink was found,please check the tns file and firewall${RESET_COLOR}\n" $1 Warningfielseecho -e "${ERROR_COLOR}$(date "+%Y-%m-%d %H:%M:%S") | Error! check_mrp_trigger() parameter 1 can't be null ${RESET_COLOR}"exit 1fi
}# 对比两个值是否相等
# 参数1:待比较值1,参数2:待比较值2
compare_value() {if [[ "$1" = "$2" ]]; thenprintf "${NARMAL_COLOR}%-30s | %-8s | The value is $1 ${RESET_COLOR}\n" $3 Successelseprintf "${WARNING_COLOR}%-30s | %-8s | The value 1 is: $1,but value 2 is: $2 ${RESET_COLOR}\n" $3 Warningfi
}# 待判断参数值中是否包含给定的子字符串
# 参数1:TNS连接串;参数2:待判断参数名;参数3:待判断参数值;参数3:子字符串
# 例如:include_value $PRIMARY_TNS "fal_server" "${PRIMARY_TNS_DIC[fal_server]}" $STANDBY_TNSinclude_value(){# 过滤结果cnt=`echo $3 | grep $4 | wc -l`if [[ "$cnt" -gt 0 ]]; thenprintf "${NARMAL_COLOR}%-10s | %-8s| %-20s | The value is $3,include $4 ${RESET_COLOR}\n" $1 Success $2elseprintf "${WARNING_COLOR}%-10s | %-8s| %-20s | The value is $3,doesn't include $4 ${RESET_COLOR}\n" $1 Warning $2fi
}
3. 执行效果
二、 执行切换
1. dg_switchover_v1.4.sh
#!/bin/bash#**********************************************************************************
# Author: Hehuyi_In
# Date: 2022年06月10日
# FileName: dg_switchover_v1.4.sh
#
# For sys user, execute the script directly.
# For other sysdba privileged users, you need to manually synchronize the password file to standby server and chown file mode to 640 in versions below 12.2.
#
# Example: ./dg_switchover_v1.4.sh or ./dg_switchover_v1.4.sh &>> dg_switchover.log
# Description:
# 2022-06-10 v_1.0 only oracle dataguard switchover
# 2022-06-10 v_1.1 check instance status and db role after switchover.
# 2022-06-10 v_1.2 check transport lag,apply lag,switchover_status before switchover.
# 2022-06-13 v_1.3 add logging for main steps
# 2022-06-14 v_1.4 auto check and select the target primary and standby database.
#**********************************************************************************# 连接信息
DBUSER='sys/xxxxx'# TNS列表,越靠前的被选为目标主库的概率较高
TNS_LIST=(mrptest mrptest_dg mrpuat)# 列表元素个数检查,目前仅支持一主一从或一主两从
if [[ ${#TNS_LIST[@]} -lt 2 || ${#TNS_LIST[@]} -gt 3 ]]
thenecho "`date "+%Y-%m-%d %H:%M:%S"` | Error! The number of standby db is less than 1 or greater than 2."
fi# ------------------------------------------------------------------------
# 判断 TNS_LIST 中每个连接串角色,并设置各切换目标参数
# 若为主库,则作为TARGET_STANDBY_TNS
# 若为从库,则检查lag。如果lag均为0,则作为TARGET_PRIMARY_TNS,剩余的从库作为OUTLOOKING_STANDBY_TNS
# 如果所有从库lag都大于0,则报错退出。auto_set_target_db_tns(){for TNS in ${TNS_LIST[@]}do# 获取TNS对应db角色DB_ROLE=`check_db_role $TNS`# 若为主库,则赋值给TARGET_STANDBY_TNSif [[ "$DB_ROLE" = "PRIMARY" ]]thenTARGET_STANDBY_TNS=$TNS # 若为从库elif [[ "$DB_ROLE" = "PHYSICAL STANDBY" ]]then# 如果TARGET_PRIMARY_TNS不为空,说明已经设置过了目标主库,直接设置旁观从库即可if [[ "$TARGET_PRIMARY_TNS" != "" ]]thenOUTLOOKING_STANDBY_TNS=$TNS else # 若TARGET_PRIMARY_TNS为空,则需要再检查lagDB_TRAN_LAG=`check_db_lag $TNS "transport lag"`DB_APPLY_LAG=`check_db_lag $TNS "apply lag"`# 如果lag均为0,则赋值给TARGET_PRIMARY_TNSif [[ "$DB_TRAN_LAG" -eq 0 && "$DB_APPLY_LAG" -eq 0 ]]thenTARGET_PRIMARY_TNS=$TNSfi fifidone# 设置后检查,若TARGET_PRIMARY_TNS或TARGET_STANDBY_TNS为空,直接报错退出if [[ "$TARGET_PRIMARY_TNS" = "" || "$TARGET_STANDBY_TNS" = "" ]] thenecho "`date "+%Y-%m-%d %H:%M:%S"` | Error! TARGET_PRIMARY_TNS and TARGET_STANDBY_TNS can't be null."exit 1 fi}# ------------------------------------------------------------------------
# 检查从库延迟(transport lag,apply lag )
# 参数1为db对应的tnsname,参数2为待检查的lag
check_db_lag(){
if [[ "$1" != "" && "$2" != "" ]]
then
sqlplus -s $DBUSER@$1 as sysdba << EOF
set heading off feedback off pagesize 0 verify off echo off
select (extract(second from to_dsinterval(value)) + extract(minute from to_dsinterval(value)) * 60 +extract(hour from to_dsinterval(value)) * 60 * 60 +extract(day from to_dsinterval(value)) * 60 * 60 * 24) as retvalue
from v\$dataguard_stats
where name = '$2';
EOFelseecho "`date "+%Y-%m-%d %H:%M:%S"` | Error! parameter 1,2 can't be null."exit 1
fi
}# ------------------------------------------------------------------------# 检查切换前switchover_status
# 参数1为db对应的tnsname,参数2为待检查的状态
check_switchover_status(){
if [[ "$1" != "" && "$2" != "" ]]
then
STATUS=`sqlplus -s $DBUSER@$1 as sysdba << EOF
set heading off feedback off pagesize 0 verify off echo off
select switchover_status from v\\$database;
EOF`if [[ "$STATUS" = $2 || "$STATUS" = "SESSIONS ACTIVE" ]]
thenecho "`date "+%Y-%m-%d %H:%M:%S"` | Success! switchover_status of $1 is $STATUS."
else echo "`date "+%Y-%m-%d %H:%M:%S"` | Error! switchover_status of $1 is $STATUS (should be $2 or SESSIONS ACTIVE)."exit 1
fielseecho "`date "+%Y-%m-%d %H:%M:%S"` | Error! parameter 1 and 2 can't be null."exit 1
fi
}# ------------------------------------------------------------------------
# 检查db角色,参数1为db对应的tnsnamecheck_db_role(){
if [[ "$1" != "" ]]
then
sqlplus -s $DBUSER@$1 as sysdba << EOF set heading off feedback off pagesize 0 verify off echo off
select database_role from v\$database;
EOFelseecho "`date "+%Y-%m-%d %H:%M:%S"` | Error! parameter 1 should be tnsname."exit 1
fi
}# ------------------------------------------------------------------------
# 检查切换后实例状态
# 参数1为db对应的tnsnamecheck_instance_status(){
if [[ "$1" != "" ]]
then
STATUS=`sqlplus -s $DBUSER@$1 as sysdba << EOF set heading off feedback off pagesize 0 verify off echo off
select status from v\\$instance;EOF`if [[ "$STATUS" != "OPEN" ]]
thenecho "`date "+%Y-%m-%d %H:%M:%S"` | Error! instance status of $1 is $STATUS after switchover."exit 1
else echo "`date "+%Y-%m-%d %H:%M:%S"` | Success! instance status of $1 is $STATUS after switchover."
fielseecho "`date "+%Y-%m-%d %H:%M:%S"` | Error! parameter 1 can't be null."exit 1
fi
}# ------------------------------- main() -----------------------------------------echo "------------------------------------------------------------------------"
echo "`date "+%Y-%m-%d %H:%M:%S"` | Precheck before switchover..."# 判断 TNS_LIST 中每个连接串角色,并设置各切换目标参数
auto_set_target_db_tns# 设置后各目标参数值
echo "`date "+%Y-%m-%d %H:%M:%S"` | TARGET_PRIMARY_TNS: $TARGET_PRIMARY_TNS"
echo "`date "+%Y-%m-%d %H:%M:%S"` | TARGET_STANDBY_TNS: $TARGET_STANDBY_TNS"
echo "`date "+%Y-%m-%d %H:%M:%S"` | OUTLOOKING_STANDBY_TNS: $OUTLOOKING_STANDBY_TNS"# 切换前switchover_status检查
check_switchover_status $TARGET_STANDBY_TNS "TO STANDBY"# 目标从库(原主库)执行:将原主库转切为从库
echo "`date "+%Y-%m-%d %H:%M:%S"` | Begin switchover $TARGET_STANDBY_TNS to standby database..."sqlplus -s $DBUSER@$TARGET_STANDBY_TNS as sysdba << EOF
alter database commit to switchover to standby with session shutdown;
shutdown immediate
EOFecho "`date "+%Y-%m-%d %H:%M:%S"` | Restarting $TARGET_STANDBY_TNS database..."# 重新启动目标从库
sqlplus -s $DBUSER@$TARGET_STANDBY_TNS as sysdba << EOF
startup
EOF# 切换后角色检查
DB_ROLE=`check_db_role $TARGET_STANDBY_TNS`if [[ "$DB_ROLE" != "PHYSICAL STANDBY" ]];
thenecho "`date "+%Y-%m-%d %H:%M:%S"` | Error! db role of $TARGET_STANDBY_TNS is $DB_ROLE(should be PHYSICAL STANDBY) after switchover."exit 1
else echo "`date "+%Y-%m-%d %H:%M:%S"` | Success! db role of $TARGET_STANDBY_TNS is $DB_ROLE after switchover."
fi# 切换后实例状态检查
check_instance_status $TARGET_STANDBY_TNSecho "`date "+%Y-%m-%d %H:%M:%S"` | Finish switchover $TARGET_STANDBY_TNS to standby database."# ------------------------------------------------------------------------echo "`date "+%Y-%m-%d %H:%M:%S"` | Precheck before switchover $TARGET_PRIMARY_TNS to primary database..."# 切换前switchover_status检查
check_switchover_status $TARGET_PRIMARY_TNS "TO PRIMARY"# 目标主库(原从库)执行
# 将原从库切为主库echo "`date "+%Y-%m-%d %H:%M:%S"` | Begin switchover $TARGET_PRIMARY_TNS to primary database..."sqlplus -s $DBUSER@$TARGET_PRIMARY_TNS as sysdba << EOF
alter database commit to switchover to primary with session shutdown;
ALTER DATABASE OPEN;
EOF# 切换后角色检查
DB_ROLE=`check_db_role $TARGET_PRIMARY_TNS`if [[ "$DB_ROLE" != "PRIMARY" ]];
thenecho "`date "+%Y-%m-%d %H:%M:%S"` | Error! db role of $1 is $DB_ROLE(should be PRIMARY) after switchover."exit 1
else echo "`date "+%Y-%m-%d %H:%M:%S"` | Success! db role of $1 is $DB_ROLE after switchover."
fi# 切换后实例状态检查
check_instance_status $TARGET_PRIMARY_TNS# 切换后从库lag检查
DB_TRAN_LAG=`check_db_lag $TARGET_STANDBY_TNS "transport lag"`
DB_APPLY_LAG=`check_db_lag $TARGET_STANDBY_TNS "apply lag"`echo "`date "+%Y-%m-%d %H:%M:%S"` | After switchover: DB_TRAN_LAG is $DB_TRAN_LAG second(s), DB_APPLY_LAG is $DB_APPLY_LAG second(s)"echo "`date "+%Y-%m-%d %H:%M:%S"` | Finish switchover $TARGET_PRIMARY_TNS to primary database."# ------------------------------------------------------------------------
# 如果有旁观从库
if [ "$OUTLOOKING_STANDBY_TNS" != "" ]; thenecho "`date "+%Y-%m-%d %H:%M:%S"` | Begin onlooking standby database setting..."# 目标主库调整参数,指向旁观从库,先清空再设置,否则有可能报参数冲突
sqlplus -s $DBUSER@$TARGET_PRIMARY_TNS as sysdba << EOF
alter system set LOG_ARCHIVE_DEST_2='';
alter system set LOG_ARCHIVE_DEST_3='';
alter system set LOG_ARCHIVE_DEST_2='SERVICE=$TARGET_STANDBY_TNS ASYNC VALID_FOR=(ONLINE_LOGFILES,PRIMARY_ROLE) DB_UNIQUE_NAME=$TARGET_STANDBY_TNS';
alter system set LOG_ARCHIVE_DEST_3='SERVICE=$OUTLOOKING_STANDBY_TNS ASYNC VALID_FOR=(ONLINE_LOGFILES,PRIMARY_ROLE) DB_UNIQUE_NAME=$OUTLOOKING_STANDBY_TNS';
EOF# 目标从库调整参数,断开到旁观从库的连接
sqlplus -s $DBUSER@$TARGET_STANDBY_TNS as sysdba << EOF
alter system set LOG_ARCHIVE_DEST_3='';
alter system set LOG_ARCHIVE_DEST_2='SERVICE=$TARGET_PRIMARY_TNS ASYNC VALID_FOR=(ONLINE_LOGFILES,PRIMARY_ROLE) DB_UNIQUE_NAME=$TARGET_PRIMARY_TNS';
EOF# 旁观从库调整参数,指向新主库
sqlplus -s $DBUSER@$OUTLOOKING_STANDBY_TNS as sysdba << EOF
alter system set LOG_ARCHIVE_DEST_3='';
alter system set LOG_ARCHIVE_DEST_2='SERVICE=$TARGET_PRIMARY_TNS ASYNC VALID_FOR=(ONLINE_LOGFILES,PRIMARY_ROLE) DB_UNIQUE_NAME=$TARGET_PRIMARY_TNS';
EOF# 切换后旁观从库lag检查
DB_TRAN_LAG=`check_db_lag $OUTLOOKING_STANDBY_TNS "transport lag"`
DB_APPLY_LAG=`check_db_lag $OUTLOOKING_STANDBY_TNS "apply lag"`echo "`date "+%Y-%m-%d %H:%M:%S"` | After setting: DB_TRAN_LAG is $DB_TRAN_LAG second(s), DB_APPLY_LAG is $DB_APPLY_LAG second(s)"echo "`date "+%Y-%m-%d %H:%M:%S"` | Finish onlooking standby database setting."else # 没有旁观从库
echo "`date "+%Y-%m-%d %H:%M:%S"` | There is no onlooking standby database."
fi