[dw-ops] Add MySQL replication checking.
[commit: http://hg.dwscoalition.org/dw-ops/rev/b60429c76622]
Add MySQL replication checking.
Patch by
mark.
Files modified:
Add MySQL replication checking.
Patch by
![[staff profile]](https://www.dreamwidth.org/img/silk/identity/user_staff.png)
Files modified:
- nagios/conf.d/config/commands.cfg
- nagios/conf.d/config/hostgroups.cfg
- nagios/conf.d/hosts/dfw-db-a01-auto.cfg
- nagios/conf.d/services/db.cfg
- nagios/custom/check_mysql_replication.pl
-------------------------------------------------------------------------------- diff -r 5ba9e2aee2ac -r b60429c76622 nagios/conf.d/config/commands.cfg --- a/nagios/conf.d/config/commands.cfg Sun Apr 26 08:06:17 2009 +0000 +++ b/nagios/conf.d/config/commands.cfg Wed Apr 29 09:21:03 2009 +0000 @@ -21,6 +21,13 @@ define command { } +# replication check +define command { + command_name dw_check_mysql_repl + command_line $USER1$/custom/check_mysql_replication.pl -H $HOSTADDRESS$ -u $USER3$ -p $USER4$ +} + + # checks that we are not falling behind on TheSchwartz jobs define command { command_name dw_check_theschwartz_queue diff -r 5ba9e2aee2ac -r b60429c76622 nagios/conf.d/config/hostgroups.cfg --- a/nagios/conf.d/config/hostgroups.cfg Sun Apr 26 08:06:17 2009 +0000 +++ b/nagios/conf.d/config/hostgroups.cfg Wed Apr 29 09:21:03 2009 +0000 @@ -87,6 +87,14 @@ define hostgroup { # databases define hostgroup { + hostgroup_name db_slave + alias Database Servers - Slaves +} + + + +# databases +define hostgroup { hostgroup_name db_schwartz alias Database Servers - TheSchwartz } diff -r 5ba9e2aee2ac -r b60429c76622 nagios/conf.d/hosts/dfw-db-a01-auto.cfg --- a/nagios/conf.d/hosts/dfw-db-a01-auto.cfg Sun Apr 26 08:06:17 2009 +0000 +++ b/nagios/conf.d/hosts/dfw-db-a01-auto.cfg Wed Apr 29 09:21:03 2009 +0000 @@ -4,7 +4,7 @@ define host { define host { host_name dfw-db-a01 alias dfw-db-a01 - hostgroups db + hostgroups db,db_slave address 10.176.71.86 use generic-host } diff -r 5ba9e2aee2ac -r b60429c76622 nagios/conf.d/services/db.cfg --- a/nagios/conf.d/services/db.cfg Sun Apr 26 08:06:17 2009 +0000 +++ b/nagios/conf.d/services/db.cfg Wed Apr 29 09:21:03 2009 +0000 @@ -23,6 +23,15 @@ define service { } +# verify that MySQL is responding +define service { + hostgroup_name db_slave + service_description MySQL Replication + check_command dw_check_mysql_repl + use generic-service +} + + # in particular, some databases have theschwartz on them and should # be monitored for the queues define service { diff -r 5ba9e2aee2ac -r b60429c76622 nagios/custom/check_mysql_replication.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nagios/custom/check_mysql_replication.pl Wed Apr 29 09:21:03 2009 +0000 @@ -0,0 +1,57 @@ +#!/usr/bin/perl + +use strict; +use DBI; +use Getopt::Long; + +my ( $user, $pass, $host ); +GetOptions( + 'host|H=s' => \$host, + 'user=s' => \$user, + 'password=s' => \$pass, + ) or usage(); +usage() + unless $user && $pass && $host; + +# connect to the database +my $dbh = DBI->connect( "DBI:mysql:host=$host", $user, $pass ) + or crit( 'Unable to connect to database.' ); + +my $rep = $dbh->selectrow_hashref( 'SHOW SLAVE STATUS' ); +crit( 'Database error or database not a slave.' ) + unless $rep && $rep->{Slave_IO_State}; +crit( "Replication stopped: IO=$rep->{Slave_IO_Running}, SQL=$rep->{Slave_SQL_Running}." ) + unless $rep->{Slave_IO_Running} eq 'Yes' && + $rep->{Slave_SQL_Running} eq 'Yes'; + +# very tight limits +crit( "Replication very far behind: $rep->{Seconds_Behind_Master}." ) + if $rep->{Seconds_Behind_Master} > 60; +_warn( "Replication behind: $rep->{Seconds_Behind_Master}." ) + if $rep->{Seconds_Behind_Master} > 15; + +ok( 'Replication okay.' ); + +# return usage information on this script +sub usage { + print <<EOF; +check_mysql_replication.pl -- easy way to monitor replication +EOF + exit 3; +} + +# nagios return codes +sub crit { + print "$_[0]\n"; + exit 2; +} + +sub _warn { + print "$_[0]\n"; + exit 1; +} + +sub ok { + print "$_[0]\n"; + exit 0; +} --------------------------------------------------------------------------------