扁鹊的弟弟来了
要求5分钟定位达梦数据库问题
#!/bin/bash## content 实例个数 告警日志 实例状态 用户连接 活动会话 锁 集群状态 服务状态 磁盘空间 cpu mem 侦听及日志
## scope 单机、DW、DSC Linux 多实例
## example 将脚本保存为d.sh,用root用执行,例如 sh /tmp/d.sh
## ver 1.0printf '%.s*' {1..80}
echo
echo "__CHECK BEGIN" `date '+%Y-%m-%d %H:%M:%S'`"__"## cpu memory filesystem instance_name
os=`uname`
case $os inAIX)ps aux |head -1 ; ps aux|sort -rn +2|head -5echops aux|head -1;ps aux | sort +5 -6 -n -r | head -5echodf|grep -e "[8,9][0-9]%" -e "100%"instlist=`ps -ef|grep dm.ini|grep -v grep|awk '{print $2}' `inst_cnt=`ps -ef|grep dm.ini|grep -v grep|awk '{print $2}' |wc -l`if [ $inst_cnt -gt 1 ]; thenps -ef|grep dms|grep -v grep|awk '{print $9}'|cut -b 6-fierrpt|headPROFILE='.profile';;Linux)## CPU较高的进程ps aux --sort=-%cpu| head -5echo##内存较高的进程ps aux|head -1;ps aux|grep -v PID|sort -rn -k +4|head -5echo##磁盘空间使用率超过80df -h|grep -v Size|sed 's#[[:space:]][[:space:]]*# #g'|cut -d ' ' -f5,6|sort -t '%' -k1 -nr|egrep '[8-9][0-9]%|100%' ##实例个数大于1,提醒 ora_ckpt_ORCL2instlist=`ps -ef|grep dm.ini|grep -v grep|awk '{print $9}'|cut -b 6- |sort`inst_cnt=`ps -ef|grep dm.ini|grep -v grep|awk '{print $2}' |wc -l`if [ $inst_cnt -gt 1 ]; thenps -ef|grep dms|grep -v grep|awk '{print $9}'|cut -b 6- |sortfitail -5500 /var/log/messages|grep -C 5 -iE 'err|fatal|oom'|tail -20PROFILE='.bash_profile';;*)echo "Unsupported OS type!"exit;
esac## 主要进程
echo
ps -ef|grep dmdbms|grep -v grep## databaseCMDFILE=/tmp/dm_check.sh
(cat << EOF
#!/bin/bash
. \$HOME/$PROFILE
os=\`uname\`
echo "----------------------------"
echo \$1 \$2 \$3
echo "----------------------------"##登录用户及口令放到一个临时文件,例如sysdba/Dameng
if [ -f '/tmp/pwdfile.tmp' ]; thensqlpwd=\`cat /tmp/pwdfile.tmp\`
elsesqlpwd=" /:\$1 as sysdba "
fi# 20221121
dbver=\` disql -id|awk -F'-' '{print \$2}' \`## 显示告警日志 `date +"%Y%m"`
aa=\$DM_HOME/log/dm_\$3_\`date +"%Y%m"\`.log tail -5000 \$aa|grep -v INFO|grep -v libgssapi|tail -20#disql "/:5237 as sysdba"
disql -S " \$sqlpwd " <<!
set linesize 200 pagesize 200
set feedback off lineshow off timing off--os配置及负载
select N_CPU cpu,TOTAL_PHY_SIZE/1024/1024/1024||'G' memory,trunc(CPU_USER_RATE,2) us,trunc(CPU_SYSTEM_RATE,2) sy, ROUND(SEND_BYTES_TOTAL /1024/1024/1024) send_gb,ROUND(RECEIVE_BYTES_TOTAL/1024/1024/1024) recv_gb, trunc(LOAD_ONE_AVERAGE,2) load1,trunc(LOAD_FIFTEEN_AVERAGE,2) load15 from v\\\$systeminfo;--实例状态
select instance_name,start_time,host_name,status\$,mode\$,BUILD_VERSION from v\\\$instance;--license
select SERIES_NO,AUTHORIZED_CUSTOMER, isnull(to_char(EXPIRED_DATE),'for ever') yxq,
case SERVER_SERIES when 'P' then 'Personal'when 'S' then 'Standard'when 'E' then 'Enterprise'when 'A' then 'Security'when 'D' then 'Developer'when 'C' then 'Cloud' end edition,
case SERVER_type when '1' then 'Official'when '2' then 'Test'when '3' then 'Trial' end type, server_ver,
case substr(CLUSTER_TYPE,1,1) when '1' then 'YES' else 'NO' end DW,
case substr(CLUSTER_TYPE,2,1) when '1' then 'YES' else 'NO' end MPP,
case substr(CLUSTER_TYPE,3,1) when '1' then 'YES' else 'NO' end RW,
case substr(CLUSTER_TYPE,4,1) when '1' then 'YES' else 'NO' end DSC
from v\\\$license;--内存分配
SELECT
round((SELECT SUM(N_PAGES * (PAGE_SIZE+0.0))/1024/1024 FROM V\\\$BUFFERPOOL))||'MB' AS BUFFER_SIZE,
round((SELECT SUM(TOTAL_SIZE)/1024/1024 FROM V\\\$MEM_POOL))||'MB' AS MEM_POOL,
round(((SELECT SUM(N_PAGES * (PAGE_SIZE+0.0)) FROM V\\\$BUFFERPOOL)+(SELECT SUM(TOTAL_SIZE) FROM V\\\$MEM_POOL))/1024/1024/1024)||'GB' AS TOTAL_SIZE
FROM DUAL;--asm磁盘组使用率
select group_name,total_mb/1024 total_gb,free_mb/1024 free_gb,round((total_mb-free_mb)/1024,1) used_gb,round((1-(free_mb+0.0)/total_mb)*100,2) used_per,au_size from v\\\$asmgroup;--表空间使用率
SELECT UPPER(F.TABLESPACE_NAME) tsname,-- "表空间名",D.TOT_GROOTTE_MB size_mb,-- "表空间大小(M)",D.TOT_GROOTTE_MB - F.TOTAL_BYTES used_mb,--"已使用空间(M)",TO_CHAR(ROUND(( D.TOT_GROOTTE_MB - F.TOTAL_BYTES ) / D.TOT_GROOTTE_MB * 100, 2), '990.99')|| '%' use_percent,-- "使用比",F.TOTAL_BYTES free_mb,-- "空闲空间(M)",F.MAX_BYTES max_size -- "最大块(M)"
FROM (SELECT TABLESPACE_NAME,ROUND(SUM(BYTES) / ( 1024 * 1024 ), 2) TOTAL_BYTES,ROUND(MAX(BYTES) / ( 1024 * 1024 ), 2) MAX_BYTESFROM SYS.DBA_FREE_SPACEGROUP BY TABLESPACE_NAME) F,(SELECT DD.TABLESPACE_NAME,ROUND(SUM(DD.BYTES) / ( 1024 * 1024 ), 2) TOT_GROOTTE_MBFROM SYS.DBA_DATA_FILES DDGROUP BY DD.TABLESPACE_NAME) D
WHERE D.TABLESPACE_NAME = F.TABLESPACE_NAME
ORDER BY 2 DESC;--近期error
select ECPT_CODE, count(0) from v\\\$runtime_err_history where err_time>sysdate-1 group by ECPT_CODE;
select to_char(err_time,'yyyy-mm-dd hh24:mi:ss')err_time,SESS_ID,SESS_SEQ,ECPT_CODE,substr(ECPT_DESC,1,20)ecpt_desc,substr(SQL_TEXT,1,20)sql_txt from v\\\$runtime_err_history where err_time>sysdate-1 and rownum<11;
select log_time,thread_name,level$,substr(txt,1,50) sql_txt from v\\\$INSTANCE_LOG_HISTORY where LEVEL$ in ('ERROR','FATAL') and LOG_TIME>sysdate-10 and rownum<11; --会话个数
select INSTANCE_NAME,user_name,count(0) cnt from gv\\\$sessions group by INSTANCE_NAME,user_name order by 1,2;--当前执行超过2秒的会话
SELECT* FROM (SELECT SESS_ID, user_name,DATEDIFF(SS,LAST_RECV_TIME,SYSDATE) Y_EXETIME, substr(SF_GET_SESSION_SQL(SESS_ID),1,100) sql_txt,CLNT_IP FROM V\\\$SESSIONS WHERE STATE='ACTIVE' and sess_id<> SYS_CONTEXT('USERENV','SID') ) WHERE Y_EXETIME>=2 ;--慢sql
select top 10 sess_id,sql_id,substr(sql_text,1,50)sql_txt,round(exec_time/1000) miao,n_runs, finish_time from V\\\$LONG_EXEC_SQLS order by exec_time desc;--查被阻塞会话
SELECT DS.SESS_ID blked_sid,substr(DS.SQL_TEXT,1,30) blked_sql,DS.TRX_ID blked_trx,(CASE L.LTYPE WHEN 'OBJECT' THEN '对象锁' WHEN 'TID' THEN '事务锁' END CASE ) lx,to_char(DS.CREATE_TIME,'yyyy-mm-dd hh24:mi:ss') create_time,SS.SESS_ID blocker_sid,--substr(SS.SQL_TEXT,1,30) blocker_sql,--SS. CLNT_HOST blocker,L.TID blocker_trx
FROM V\\\$LOCK L LEFT JOIN V\\\$SESSIONS DS ON DS.TRX_ID = L.TRX_ID LEFT JOIN V\\\$SESSIONS SS ON SS.TRX_ID = L.TID
WHERE L.BLOCKED = 1;--单个会话内存使用总量
SELECT top 10 A.CREATOR ,b.sess_id,b.user_name,substr(B.SQL_TEXT ,1,40) sql_txt,round(SUM(A.TOTAL_SIZE)/1024/1024) TOTAL_MB,--当前总量(包括扩展)round(SUM(A.DATA_SIZE)/1024/1024) DATA_MB--实际使用量
FROM V\\\$MEM_POOL A, V\\\$SESSIONS B
WHERE A.CREATOR=B.THRD_ID
GROUP BY A.CREATOR, b.sess_id,b.user_name, (B.SQL_TEXT ,1,60)
ORDER BY TOTAL_MB DESC;--dg delay
selectSEQNO, apply_cmt_time, last_cmt_time ,timestampdiff(sql_tsi_frac_second, apply_cmt_time, last_cmt_time) diff
fromv\\\$rapply_stat;exit
!getrole() {
disql -S " \$sqlpwd " <<!
set HEA off
set feedback off
set echo off
set time off
set timing off
select MODE$ from v\\\$instance;
exit
!
}std_delay() {
disql -S " \$sqlpwd "<<!
selectSEQNO, apply_cmt_time, last_cmt_time ,timestampdiff(sql_tsi_frac_second, apply_cmt_time, last_cmt_time) diff
fromv\\\$rapply_stat;
exit
!
} #std_delay()EOF
)>$CMDFILE
dbuser=`ps -ef|grep dmserver|grep -v grep |awk '{print $1}'|head -1`for sid in $instlist
do#获取端口port=`grep -w PORT_NUM $sid|awk -F'=' '{print $2}'|awk -F'#' '{print $1}'|awk 'gsub(/^ *| *$/,"")'`inst=`grep -w INSTANCE_NAME $sid|awk -F'=' '{print $2}'|awk -F'#' '{print $1}'|awk 'gsub(/^ *| *$/,"")'`#echo $portsu - $dbuser "-c /bin/bash $CMDFILE $port $sid $inst"echo
done#w
#vmstat 3 3printf '%.s*' {1..80}
echo
echo "__CHECK END" `date '+%Y-%m-%d %H:%M:%S'`"__"exit 0
看看效果
我觉得,没什么问题,除了锁表。