#!/usr/local/bin/perl
#
# Check nagios latency
#
# $Header: /opt/home/doke/work/nagios/RCS/check_nagios_latency,v 1.1 2012/04/12 18:52:37 doke Exp $


use vars qw( $crit_service $warn_service $crit_host $warn_host ); 

$crit_service = 120; 	# critical active service check latency in seconds
$warn_service = 30; 		# warning active service check latency in seconds
$crit_host = 120; 		# critical active host check latency in seconds
$warn_host = 30; 		# warning active host check latency in seconds


use strict;
use warnings;
use Getopt::Long;

$ENV{PATH} = "/usr/local/nagios/bin:/usr/bin:/bin";

use vars qw( $verbose $help @crits @warns @unknowns @oks );


$verbose = 0;
$help = 0;


sub usage {
    my( $rc ) = @_;
    print "Usage: $0 [options]
    --cs n  critical active service check latency [$crit_service seconds]
    --ws n  warning active service check latency [$crit_service seconds]
    --ch n  critical active host check latency [$crit_service seconds]
    --wh n  warning active host check latency [$crit_service seconds]
    -v      verbose
    -h      help
";
    exit $rc;
    }

Getopt::Long::Configure ("bundling");
GetOptions(
    'cs=i' => \$crit_service, 
    'ws=i' => \$warn_service, 
    'ch=i' => \$crit_host,
    'wh=i' => \$warn_host,
    'v+' => \$verbose,
    'h' => \$help,
    );
&usage( 0 ) if ( $help );


# Just in case of problems, let's not hang 
$SIG{'ALRM'} = sub {
    print ("ERROR: timed out\n");
    exit 2;
    };
alarm( 10 );

check_nagiostats();


my $rc = 0;
my $sep = '';
if ( $#crits >= 0 ) {
    $rc = 2;
    printf "%d CRITICAL errors: %s", scalar( @crits ), join( ", ", @crits );
    $sep = '; ';
    }
if ( $#warns >= 0 ) {
    $rc = 1 if ( $rc == 0 );
    printf "%s%d Warnings: %s", $sep, scalar( @warns ), join( ", ", @warns );
    $sep = '; ';
    }
if ( $#unknowns >= 0 ) {
    $rc = -1 if ( $rc == 0 );
    printf "%s%d Unknowns: %s", $sep, scalar( @unknowns ), join( ", ", @unknowns );
    $sep = '; ';
    }
if ( $rc == 0 && $#oks >= 0 ) {
    printf "Ok: %s", join( ", ", @oks );
    }

print "\n";
exit $rc;


##################





sub check_nagiostats {
    my( $cmd, $service_latency, $host_latency );

    $cmd = "/usr/local/nagios/bin/nagiostats |";
    $verbose && print "+ $cmd\n";
    if ( ! open( pH, $cmd ) ) {
	push @unknowns, "can't run $cmd: $!";
	return;
	}
    $service_latency = -1;
    $host_latency = -1;
    while( <pH> ) {
	chomp;
	$verbose && print "> $_\n";
	if ( m/Active \s Service \s Latency: \s+ [\d\.]+ \s+ \/ \s+ [\d\.]+ \s+ \/ \s+ ([\d\.]+) \s+ sec/ix ) {
	    $service_latency = $1;
	    }
	elsif ( m/Active \s Host \s Latency: \s+ [\d\.]+ \s+ \/ \s+ [\d\.]+ \s+ \/ \s+ ([\d\.]+) \s+ sec/ix ) {
	    $host_latency = $1;
	    }
	}
    close pH;

    if ( $service_latency < 0 || $host_latency < 0 ) { 
	push @unknowns, "unable to get latency stats"; 
	}
    elsif ( $service_latency >= $crit_service ) { 
	push @crits, "service latency $service_latency sec"; 
	}
    elsif ( $service_latency >= $warn_service ) { 
	push @warns, "service latency $service_latency sec"; 
	}
    elsif ( $host_latency >= $crit_host ) { 
	push @crits, "host latency $host_latency sec"; 
	}
    elsif ( $host_latency >= $warn_host ) { 
	push @warns, "host latency $host_latency sec"; 
	}
    else { 
	push @oks, "service latency $service_latency sec"; 
	push @oks, "host latency $host_latency sec"; 
	}

    }




