DB2 Tuning and Troubleshooting Commands



Quickly look at some basic tuning measurements with Database Snapshot:

db2 "select db_name,  rows_read, rows_selected, lock_waits, lock_wait_time, deadlocks, lock_escals, total_sorts, total_sort_time from table (snapshot_database (' ', -1) ) as snapshot_database"

SQL Queries for a health check:

##What Event Monitors are turned on?

SELECT substr(evmonname,1,15) as eventmonname, EVENT_MON_STATE(evmonname) as eventmon_state,target_type, substr(target,1,15) as target, autostart FROM syscat.eventmonitors ORDER BY evmonname;

##DB Wait Summary

select total_app_commits, total_app_rollbacks,dec(avg_rqst_cpu_time,20,2) as AVG_RQST_CPU_TIME,act_wait_time_percent,io_wait_time_percent,lock_wait_time_percent,network_wait_time_percent,section_sort_proc_time_percent,dec(rows_read_per_rows_returned,20,2) as ROWS_READ_PER_ROWS_RETURNED,total_bp_hit_ratio_percent from sysibmadm.mon_db_summary;

##Table RUNSTATS recent history

select substr(tabschema,1,15) as tabschema, substr(tabname,1,20) as tabname, stats_time from syscat.tables where stats_time is not null order by stats_time desc fetch first 20 rows only;

##Recent DB backups

select operationtype,substr(location,1,25) as LOCATION,devicetype,sqlcode, start_time,end_time from sysibmadm.db_history where operation = 'B' order by start_time desc fetch first 10 rows only;

##Recent Table Reorgs

select substr(tabname,1,15)as TABNAME, reorg_status,reorg_completion,reorg_start, reorg_end from sysibmadm.snaptab_reorg order by reorg_end desc fetch first 20 rows only;

##Tablespace Utilization

select substr(tbsp_name,1,20) as TBSP_NAME , tbsp_type as TBSP_TYPE , int(tbsp_total_size_kb/1024) as TBSP_SIZE_MB , smallint(tbsp_utilization_percent) as UTIL_PCNT , int(tbsp_free_size_kb / 1024) as FREE_SIZE_MB,DBPARTITIONNUM from sysibmadm.tbsp_utilization where TBSP_TYPE='DMS' order by DBPARTITIONNUM;

##Top 10 tables sorted by number of table scans

SELECT varchar(tabschema,15) as tabschema, varchar(tabname,30) as tabname, sum(rows_read) as total_rows_read, sum(rows_inserted) as total_rows_inserted, sum(rows_updated) as total_rows_updated, sum(rows_deleted) as total_rows_deleted, sum(table_scans) as table_scans FROM TABLE(MON_GET_TABLE('','',-2)) AS t where tabschema not like '%SYS%' GROUP BY tabschema, tabname ORDER BY table_scans DESC fetch first 10 rows only;

##Top 10 tables sorted by number of rows read

SELECT varchar(tabschema,15) as tabschema, varchar(tabname,30) as tabname, sum(rows_read) as total_rows_read, sum(rows_inserted) as total_rows_inserted, sum(rows_updated) as total_rows_updated, sum(rows_deleted) as total_rows_deleted, sum(table_scans) as table_scans FROM TABLE(MON_GET_TABLE('','',-2)) AS t where tabschema not like '%SYS%' GROUP BY tabschema, tabname ORDER BY 3 DESC fetch first 10 rows only;

##Transaction log utilization

SELECT int(total_log_used_KB/1024) as "Used_MB", int(total_log_available_KB/1024) as Free_MB, log_utilization_percent as Pct_Used, int(total_log_used_top_KB/1024) as Max_Log_Used_MB from sysibmadm.log_utilization;

##Application holding the oldest log

SELECT AI.APPL_STATUS as Status, SUBSTR(AI.PRIMARY_AUTH_ID,1,10) AS "Authid", SUBSTR(AI.APPL_NAME,1,15) AS "Appl_Name", AP.UOW_LOG_SPACE_USED AS "Log_Used_MB", INT(AP.APPL_IDLE_TIME/60) AS "Idle_for_min", AP.APPL_CON_TIME AS "Connected_Since" FROM SYSIBMADM.SNAPDB DB, SYSIBMADM.SNAPAPPL  AP,SYSIBMADM.SNAPAPPL_INFO AI WHERE   AI.AGENT_ID = DB.APPL_ID_OLDEST_XACT AND   AI.AGENT_ID = AP.AGENT_ID;

##Lock Wait information

SELECT substr(ai.appl_name,1,10) as Application, substr(ai.primary_auth_id,1,10) as AuthID, int(ap.lock_waits) as Lock_Waits, int(ap.lock_wait_time/1000) as Total_Wait_S, int( ap.lock_wait_time / ap.lock_waits) as Avg_Wait_MS from    sysibmadm.snapappl ap, sysibmadm.snapappl_info ai where   ap.agent_id = ai.agent_id and     ap.lock_waits > 0;

##Bufferpool Metrics

WITH BPMETRICS AS ( SELECT bp_name, pool_data_l_reads + pool_temp_data_l_reads + pool_index_l_reads + pool_temp_index_l_reads + pool_xda_l_reads + pool_temp_xda_l_reads as logical_reads, pool_data_p_reads + pool_temp_data_p_reads + pool_index_p_reads + pool_temp_index_p_reads + pool_xda_p_reads + pool_temp_xda_p_reads as physical_reads, member FROM TABLE(MON_GET_BUFFERPOOL('',-2)) AS METRICS) SELECT VARCHAR(bp_name,20) AS bp_name, logical_reads, physical_reads, CASE WHEN logical_reads > 0 THEN DEC((1 - (FLOAT(physical_reads) / FLOAT(logical_reads))) * 100,5,2) ELSE NULL END AS HIT_RATIO, member FROM BPMETRICS order by member;

##BP Write IO

select substr(bp_name,1,20) as BP_NAME ,total_writes,dec(average_write_time_ms,20,2) as AVG_WRITE_TIME_MS, percent_writes_async,dbpartitionnum from sysibmadm.bp_write_io order by dbpartitionnum;

##BP Read IO

select substr(bp_name,1,20) as BP_NAME ,total_physical_reads,total_async_reads, percent_sync_reads, async_not_read_percent,dbpartitionnum from sysibmadm.bp_read_io order by dbpartitionnum;

##Overflow rows read -- indicates a REORG is needed

select substr(tabname,1,25) as TABNAME,overflow_accesses,rows_read,rows_written from sysibmadm.snaptab order by OVERFLOW_ACCESSES desc fetch first 10 rows only;

##Rows read per transaction(TBRRTX) < 100 is good, < 10 is best

select substr(a.tabname,1,30) as TABNAME, a.rows_read as RowsRead, (a.rows_read / (b.commit_sql_stmts + b.rollback_sql_stmts + 1)) as TBRRTX, (a.rows_read / b.rows_selected) as IREF, (b.commit_sql_stmts + b.rollback_sql_stmts) as TXCNT from sysibmadm.snaptab a, sysibmadm.snapdb b where a.dbpartitionnum = b.dbpartitionnum and b.db_name = '$dbname' order by a.rows_read desc fetch first 20 rows only;

##Lock info

SELECT substr(ai.appl_name,1,10) as Application, substr(ai.primary_auth_id,1,10) as AuthID, int(ap.lock_waits) as Lock_Waits, int(ap.lock_wait_time/1000) as Total_Wait_s, int( ap.lock_wait_time / ap.lock_waits) as Avg_Wait_ms from sysibmadm.snapappl ap, sysibmadm.snapappl_info ai where ap.agent_id = ai.agent_id and ap.lock_waits > 0;

##Asynchronous Reads

select substr(tbsp_name,1,30) as TABLESPACE, dbpartitionnum, dec((100-((POOL_ASYNC_DATA_READS + (POOL_ASYNC_INDEX_READS + 1) * 100) / (POOL_DATA_P_READS + (POOL_INDEX_P_READS+1)))),20,2) as ASYNCH_READ_PERCENT from sysibmadm.snaptbsp where TBSP_TYPE = 'DMS' order by dbpartitionnum;

##ORMS = Overal read miliseconds

select (POOL_READ_TIME / (POOL_DATA_P_READS + POOL_INDEX_P_READS + POOL_TEMP_DATA_P_READS + POOL_TEMP_INDEX_P_READS + 1)) as ORMS from sysibmadm.snapdb;

##TSORMS = tablespace overall read miliseconds...if any TSORMS are significantly higher than average ORMS, you should research the tbspace definition and disk

select substr(tbsp_name,1,20), dec((POOL_READ_TIME / (POOL_DATA_P_READS + POOL_INDEX_P_READS + POOL_TEMP_DATA_P_READS + POOL_TEMP_INDEX_P_READS + 1)),20,2) as TSORMS,dbpartitionnum from sysibmadm.snaptbsp order by TSORMS desc fetch first 10 rows only;

##OWMS = Overall write miliseconds

select (POOL_WRITE_TIME / (POOL_DATA_WRITES + POOL_INDEX_WRITES + 1)) as OWMS from sysibmadm.snapdb;

##TSOWMS = Tablespace overall write miliseconds

select substr(tbsp_name,1,20), dec((POOL_WRITE_TIME / (POOL_DATA_WRITES + POOL_INDEX_WRITES + 1)),20,2) as TSOWMS,dbpartitionnum from sysibmadm.snaptbsp order by TSOWMS desc fetch first 10 rows only;

##BPRIOTX = bufferpool read IO per transaction

select (POOL_READ_TIME/(COMMIT_SQL_STMTS+ROLLBACK_SQL_STMTS)) as BPRIOTX from sysibmadm.snapdb;

##Direct reads MS

select dec((DIRECT_READ_TIME/(DIRECT_READS+1)),20,2) as DirectReadMS,substr(tbsp_name,1,20) from sysibmadm.snaptbsp;

##Direct writes MS

select dec((DIRECT_WRITE_TIME/(DIRECT_WRITES+1)),20,2) as DirectWriteMS,substr(tbsp_name,1,20) from sysibmadm.snaptbsp;

##Direct reads per txn

select (DIRECT_READ_TIME/(COMMIT_SQL_STMTS+ROLLBACK_SQL_STMTS)) as DirectReadMSPerTXN from sysibmadm.snapdb;

##Direct writes per txn

select (DIRECT_WRITE_TIME/(COMMIT_SQL_STMTS+ROLLBACK_SQL_STMTS)) as DirectWriteMSPerTXN from sysibmadm.snapdb;

##Unused indexes......drop these if you can

SELECT substr(T.tabschema,1,10) as TABSCHEMA,substr(T.tabname,1,20) AS TABNAME,substr(S.INDSCHEMA,1,10) as INDSCHEMA,substr(S.INDNAME,1,20) AS IND_NAME,indextype,T.INDEX_SCANS as INDEX_SCANS FROM TABLE(MON_GET_INDEX('','', -1)) as T, SYSCAT.INDEXES AS S WHERE T.TABSCHEMA NOT LIKE 'SYS%' AND T.TABSCHEMA = S.TABSCHEMA AND T.TABNAME = S.TABNAME AND T.IID = S.IID  and T.INDEX_SCANS = 0;

##Container with high read time

SELECT varchar(container_name,70) as container_name, varchar(tbsp_name,20) as tbsp_name, pool_read_time FROM TABLE(MON_GET_CONTAINER('',-2)) AS t ORDER BY pool_read_time DESC;

##SQL that runs long...over a minute

SELECT ELAPSED_TIME_MIN, SUBSTR(AUTHID,1,10) AS AUTH_ID, AGENT_ID, APPL_STATUS, SUBSTR(STMT_TEXT,1,60) AS SQL_TEXT FROM SYSIBMADM.LONG_RUNNING_SQL WHERE ELAPSED_TIME_MIN > 0 ORDER BY ELAPSED_TIME_MIN DESC;

##SQL by number of executions

SELECT substr(NUM_EXECUTIONS,1,10)as NUM_EXECS,SUBSTR(STMT_TEXT,1,250) FROM    SYSIBMADM.TOP_DYNAMIC_SQL ORDER BY NUM_EXECUTIONS DESC FETCH FIRST 5 ROWS ONLY;

##SQL by average execution time

SELECT substr(AVERAGE_EXECUTION_TIME_S,1,5) as AVG_EXECTIME_S,SUBSTR(STMT_TEXT,1,250) FROM   SYSIBMADM.TOP_DYNAMIC_SQL ORDER BY AVERAGE_EXECUTION_TIME_S DESC FETCH FIRST 5 ROWS ONLY;

##SQL in the package cache ordered by AVG CPU TIME

SELECT NUM_EXECUTIONS , dec(TOTAL_CPU_TIME/NUM_EXEC_WITH_METRICS,20,2) as AVG_CPU_TIME,STMT_EXEC_TIME,substr(STMT_TEXT,1,500) FROM TABLE(MON_GET_PKG_CACHE_STMT ( 'D', NULL, NULL, -2)) as T WHERE T.NUM_EXEC_WITH_METRICS <> 0 ORDER BY AVG_CPU_TIME DESC FETCH FIRST 20 ROWS ONLY;

##Mempools

SELECT varchar(memory_set_type, 20) AS set_type, varchar(memory_pool_type,20) AS pool_type,  memory_pool_used, memory_pool_used_hwm,member FROM TABLE( MON_GET_MEMORY_POOL(NULL, CURRENT_SERVER, -2)) where memory_set_type != 'PRIVATE' order by member;

 

##########DB2 TRACE
db2trc on
... recreate the problem ...
db2trc dump trace.dmp
db2trc off
db2trc flw trace.dmp trace.flw
db2trc fmt trace.dmp trace.fmt

 

###Explain plan for a stored procedure
Here are the steps on how to generate an explain plan for a stored procedure:

1) db2 connect to <dbname>
2) db2 "call SYSPROC.SET_ROUTINE_OPTS('EXPLAIN ALL REOPT ALWAYS')"
3) db2 "drop procedure XXXXX"
4) db2 "create procedure XXXXX......"
5) db2exfmt -d <dbname> -g TIC -w -1 -n % -s % -# 0 -o explain.out
6) db2 terminate

###Generate a formatted explain plan

% db2 connect to <database_name>
% db2 set current explain mode explain
% db2 -tvf <Input file with an SQL statement ended with a semicolon>
% db2 set current explain mode no
% db2exfmt -d <dbname> -g TIC -w -1 -n % -s % -# 0 -o <output file>

 

###Generate an explain plan with recommended indexes
db2 set current explain mode recommend indexes
db2 -tvf sqlfile
db2 set current explain mode evaluate indexes
db2 -tvf sqlfile
db2exfmt -d <dbname> -g TIC -w -1 -n % -s % -# 0 -o <output file>
db2 set current explain mode no

###Generate an explain plan from a file of multiple queries ending in @

db2expln -d DBNAME -o explnplan.out -f query.sql -z @ -graph

###DB2Batch example

add these three lines to the top of a file containing the sql you want to run db2batch

-- db2batch.sql
-- ------------
--#SET PERF_DETAIL 3 ROWS_OUT 5

###Execute db2batch. 

It will run the sql and returnn only 5 rows and give you an application snapshot and the execution time.  it will use the same resources as the SQL normally uses and if you need to kill it, you need to force off the db2batch application.

=>db2batch -d sample -f db2batch.sql