A simple
monitor for long running sql’s in an Oracle database.
If you are
here, you are probably searching for something that I searched for before
putting this together. This is a simple job that checks for any sessions that
is taking long time to execute in the database. Since a single, bad, long
running query could cause performance issue at the database level, it is good
to be aware of what is happening in our database, especially in oltp
environment where response times are critical.
We can also use
this to be aware of the problem sqls that are candidates for tuning.
- We can schedule the job according to our needs. When it runs and how often it is run.
- We can exclude user accounts that should not be monitored (SYS, SYSTEM, DBSNMP, etc)
- We determine the long running alert threshold. Anything running beyond this number of seconds is a candidate session/sql for the job to alert us.
- We log into a temporary table. We also store the history for 30 days in a history table.
- We capture important details that we’d like to know (user, machine, sql_id, sql_text, etc)
- The job sends an email with the full sql text as well.
- Alerts are sent only 1 time within the hour for the same user and sql combination, even if it continues to run long throughout the hour. Another email is sent if the same combination runs long in the next hour.
- For multiple parallel sessions, we get 1 email so we don’t get bombarded with several emails.
Below is a
screenshot of how we get notified. This is a very basic setup but it works for
me and is RAC aware.
How to setup the job
Create our work
table(dba_long_sqlstat) and history table(dba_hist_long_sqlstat).
CREATE TABLE
"SYS"."DBA_LONG_SQLSTAT"
( "INST_ID"
NUMBER,
"USERNAME" VARCHAR2(30 BYTE),
"OSUSER" VARCHAR2(30 BYTE),
"MACHINE" VARCHAR2(64 BYTE),
"PROGRAM" VARCHAR2(48 BYTE),
"SQL_ID" VARCHAR2(13 BYTE),
"LAST_CALL_ET" NUMBER,
"SQL_TEXT" CLOB
) PCTFREE 10 PCTUSED 40 INITRANS 1 MAXTRANS
255 NOCOMPRESS LOGGING
STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS
1 MAXEXTENTS 2147483645
PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1
BUFFER_POOL DEFAULT FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
TABLESPACE "USERS"
LOB ("SQL_TEXT") STORE AS BASICFILE
(
TABLESPACE "USERS" ENABLE STORAGE
IN ROW CHUNK 8192 RETENTION
NOCACHE LOGGING
STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS
1 MAXEXTENTS 2147483645
PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1
BUFFER_POOL DEFAULT FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)) ;
CREATE TABLE
"SYS"."DBA_HIST_LONG_SQLSTAT"
( "CAPTURE_TIMESTAMP"
DATE DEFAULT SYSDATE,
"INST_ID" NUMBER,
"USERNAME" VARCHAR2(30 BYTE),
"OSUSER" VARCHAR2(30 BYTE),
"MACHINE" VARCHAR2(64 BYTE),
"PROGRAM" VARCHAR2(48 BYTE),
"SQL_ID" VARCHAR2(13 BYTE),
"LAST_CALL_ET" NUMBER,
"SQL_TEXT" CLOB
) PCTFREE 10 PCTUSED 40 INITRANS 1 MAXTRANS
255 NOCOMPRESS LOGGING
STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS
1 MAXEXTENTS 2147483645
PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1
BUFFER_POOL DEFAULT FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
TABLESPACE "USERS"
LOB ("SQL_TEXT") STORE AS BASICFILE
(
TABLESPACE "USERS" ENABLE STORAGE
IN ROW CHUNK 8192 RETENTION
NOCACHE LOGGING
STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS
1 MAXEXTENTS 2147483645
PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1
BUFFER_POOL DEFAULT FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)) ;
Create the
procedure
Customize the
procedure for your needs and create the procedure. The procedure should work as
is. I have highlighted in red the areas which you should likely customize.
1) The user
account we don’t want to monitor.
2) last_call_et
=> in my case any sql running above 180 seconds will be alerted.
3) notification
email address
create
or replace
procedure long_running_sql
is
v_db_name
v$parameter.value%type;
crlf
VARCHAR2(2):= CHR( 13 ) || CHR( 10 );
begin
dbms_utility.exec_ddl_statement('truncate
table dba_long_sqlstat');
insert
into dba_long_sqlstat
select
s.inst_id,
s.username, s.osuser, s.machine, s.program, s.sql_id,l_et,st.sql_fulltext
from
(
select
distinct inst_id, username, osuser, machine, program, sql_id, max(last_call_et)
as l_et
from
gv$session
where
username
is not null and
username
not in ('SYS','SYSTEM','DBSNMP') and
status='ACTIVE'
and
last_call_et > 180
group
by
inst_id,
username, osuser, machine, program, sql_id
)
s
left
outer join gv$sql st
on
s.sql_id = st.sql_id;
commit;
select
value into v_db_name from v$parameter where name = 'db_name';
for
rec in
(
select
* FROM dba_long_sqlstat WHERE ROWID IN
(SELECT MAX(ROWID) FROM
(SELECT inst_id, username,
osuser, machine, program, sql_id, last_call_et, sql_text
FROM (
SELECT inst_id,
username, osuser, machine, program, sql_id, last_call_et, sql_text, rank() over
(partition by sql_id order by last_call_et desc) rnk
FROM
dba_long_sqlstat
where
sql_id not in (select sql_id from dba_hist_long_sqlstat where capture_timestamp
between TRUNC(SYSDATE, 'HH24') and sysdate)
)
WHERE
rnk = 1
)
group by sql_id)
)
loop
declare
v_from
varchar2(80) := v_db_name;
v_recipient varchar2(80) := 'xxxx@gmail.com';
v_subject
varchar2(80) := 'long running sql on '||v_db_name;
v_mail_host varchar2(30) := 'localhost';
v_mail_conn utl_smtp.connection;
crlf
varchar2(2) := chr(13)||chr(10);
begin
v_mail_conn :=
utl_smtp.open_connection(v_mail_host,25);
utl_smtp.helo(v_mail_conn, v_mail_host);
utl_smtp.mail(v_mail_conn, v_from);
utl_smtp.rcpt(v_mail_conn, v_recipient);
utl_smtp.data(v_mail_conn, 'Content-Type:
text/html;' || crlf ||
'To: ' || v_recipient || crlf ||
'Subject: '||
v_subject || crlf ||
'The following sql is
running for '|| rec.last_call_et || '
seconds in database '|| v_db_name ||'<br><br></html>' ||
'DB User: '||
'<html><font color="darkblue">'|| rec.username ||
'</font><br></html>' ||
'On Instance: '||
'<html><font color="darkblue">'|| rec.inst_id ||
'</font><br></html>' ||
'OS User: '||
'<html><font color="darkblue">'|| rec.osuser ||
'</font><br></html>' ||
'Source Machine: '||
'<html><font color="darkblue">'|| rec.machine ||
'</font><br></html>' ||
'Program: '||
'<html><font color="darkblue">'|| rec.program ||
'</font><br><br></html>' ||
'SQL ID: '||
'<html><font color="red">'|| rec.sql_id ||
'</font><br><br></html>' ||
'<html><body><font
face="courier" size="1" color="maroon">'||
rec.sql_text ||'</font></body></html>');
utl_smtp.quit(v_mail_conn);
end;
end
loop;
insert
into dba_hist_long_sqlstat(
inst_id,
username,
osuser,
machine,
program,
sql_id,
last_call_et,
sql_text)
select
* from dba_long_sqlstat;
delete
from dba_hist_long_sqlstat where capture_timestamp < (sysdate - 30);
commit;
end
long_running_sql ;
Schedule the job.
Again, customize it according to your needs.
I have set this
to run every 2 minutes, during business hours(gmt) and on weekdays.
BEGIN
DBMS_SCHEDULER.create_job (
job_name => 'long_running_sql_check_job',
job_type => 'PLSQL_BLOCK',
job_action => 'BEGIN long_running_sql; END;',
start_date => SYSTIMESTAMP,
repeat_interval =>
'FREQ=MINUTELY;INTERVAL=2;BYHOUR=12,13,14,15,16,17,18,19,20,21,22;BYDAY=MON,TUE,WED,THU,FRI',
end_date
=> NULL,
enabled => TRUE,
comments => 'Runs weekdays during business
hours ');
END;
/
Test
the procedure using the below block.
You should
login as a user other than the users you don’t monitor (SYS, SYSTEM, DBSNMP)
BEGIN
DBMS_LOCK.sleep(seconds => 500);
END;
/
We use Grid
control and several other tools that Oracle and other vendors provide for monitoring
databases and sqls. The above job is a simple setup if we want alerted real-time
and notified via email about any sql that is running long and which could
potentially cause an issue.
As always, test
before using it. It works for me. You take all responsibility if you want to
try it out J
Drop a note on
any enhancements that you do which improves this from basic to a better setup.
thanks, this is very good.
ReplyDeletewe are using DMBS_SCHEDULER JOBS to schedule all of our jobs. can you please let me know if you have any procedure to capture long running jobs to alert us.