#!/usr/local/bin/perl # # $Header: /opt/home/doke/work/nagios/RCS/check_sysUpTime,v 1.8 2015/11/02 15:37:38 doke Exp $ # # check a router or switch's sysUpTime, and compare with saved value, to # see if it rebooted. # # This version uses a popen call to an external snmpget program. This way # is more than twice as fast as using Net::SNMP when you're only doing one # snmp get. If you're doing a bunch then Net::SNMP is much faster, but for # just one this wins. Maybe Net::SNMP's session setup has a lot of # overhead? Maybe it's because I'm doing this on a dual cpu box, and the # external process gets to run on the other cpu? # # I need check this is still true in nagios 2.10 with the embedded perl use strict; use warnings; no warnings 'redefine'; use Getopt::Long; use NDBM_File; use Time::HiRes qw( usleep ); use lib '/usr/local/nagios/libexec'; use utils qw( $TIMEOUT %ERRORS ); use vars qw( $historydb $verbose $help $host $community $snmp_port $snmp_version ); $historydb = "/usr/local/nagios/var/check_sysUpTime_history"; $snmp_port = 161; $snmp_version = '2c'; $community = 'public'; $verbose = 0; $ENV{PATH} = "/usr/local/bin:/usr/local/net-snmp/bin:/usr/sfw/bin:" . $ENV{PATH}; ############################# sub print_usage { warn "Usage: $0 [-v] [-f ] -H -C [-V { 1 | 2c } -H host to query -C snmp community string [$community] -f history ndbm database [$historydb] -V { 1 | 2c } snmp version, must be '1' or '2c', [$snmp_version] -P snmp udp port [$snmp_port] \n"; exit $_[0]; } Getopt::Long::Configure('bundling'); GetOptions( "H=s" => \$host, "C=s" => \$community, "f=s" => \$historydb, "P=i" => \$snmp_port, "V=s" => \$snmp_version, "v+" => \$verbose, "h" => \$help ); print_usage( 0 ) if $help; &print_usage() if ( ! $host ); &print_usage() if ( ! $community ); &print_usage() if ( $snmp_version ne '1' && $snmp_version ne '2c' ); # Just in case of problems, let's not hang Nagios $SIG{'ALRM'} = sub { print ("ERROR: No response from snmp server (alarm)\n"); exit $ERRORS{"UNKNOWN"}; }; alarm($TIMEOUT); check_sysuptime( $host, $community ); exit 0; ####################### sub check_sysuptime { my( $host, $community ) = @_; my( $found, $cmd, $uptime, $uptime_str, %hist, $prev_uptime, $tied ); $found = 0; # mib-2.system.sysUpTime.0 $cmd = "snmpget -v $snmp_version -On -Oq -Ot -c $community $host:$snmp_port .1.3.6.1.2.1.1.3.0 2>&1 |"; $verbose && print "+ $cmd\n"; if ( ! open( fH, $cmd ) ) { print "can't run snmpget $host: $!\n"; exit 3; } while ( ) { if ( m/\.1\.3\.0 (\d+)/ ) { $uptime = $1; $found = 1; last; } elsif ( m/^Timeout:/i ) { print $_; exit 3; # 0 ok, 1 warn, 2 crit, 3 unknown } elsif ( m/snmpget: Unknown host/i ) { print "Unknown host\n"; exit 3; # 0 ok, 1 warn, 2 crit, 3 unknown } else { warn $_; } } close fH; if ( ! $found ) { print "couldn't snmpget timeticks value\n"; exit 3; # 0 ok, 1 warn, 2 crit, 3 unknown } $uptime_str = &ticks_to_str( $uptime ); $verbose && print "uptime $uptime = $uptime_str\n"; # try to tie the NDBM database, retry a few times with a small sleep $tied = 0; for ( 1 .. 20 ) { # 0100 = O_CREAT # 02 = O_RDWR # 0644 = permissions on new dbm files, if created if ( tie( %hist, 'NDBM_File', $historydb, 0102, 0644 ) ) { $tied = 1; last; } if ( $! =~ m/No such file or directory/i & ! -f "$historydb.dir" ) { if ( ! open( dfH, '>', "$historydb.dir" ) ) { my $dir = $historydb; $dir =~ s!/[^/]*!!; mkdir $dir; chmod 0775, $dir; open( dfH, '>', "$historydb.dir" ); } close dfH; open( pfH, '>', "$historydb.pag" ); close pfH; } usleep( 10000 + int( rand 100000 ) ); } if ( ! $tied ) { print "internal plugin error, can't tie dbm: $!\n"; exit 3; # 0 ok, 1 warn, 2 crit, 3 unknown } $prev_uptime = $hist{ $host }; $prev_uptime = 0 if ( ! defined $prev_uptime ); $hist{ $host } = $uptime; untie( %hist ); $verbose && printf( "prev_uptime %u, uptime %u\n", $prev_uptime, $uptime ); # if uptime got smaller, then it either rebooted, or the counter rolled over # so ignore it if the prev_uptime was within an hour of rolling over # Remember, sysUpTime is in 100s of seconds, and rolls over every 497.1 days. # if ( $uptime < $prev_uptime && $prev_uptime < 4294607296 ) { #printf "sysUpTime CRITICAL - reset from %u to %u\n", $prev_uptime, $uptime; #exit 2; printf "rebooted at %s, uptime changed from %s (%u) to %s (%u)\n", scalar( localtime( time() - ( $uptime / 100 ) ) ), ticks_to_str( $prev_uptime ), $prev_uptime, ticks_to_str( $uptime ), $uptime; exit 1; # 0 ok, 1 warn, 2 crit, 3 unknown } print "up $uptime_str ($uptime)\n"; } sub ticks_to_str { my( $ticks ) = @_; my( @intervals, @letters, $interval, $str, $i, $n, $started ); @intervals = ( 60480000, 8640000, 360000, 6000, 100, ); @letters = ( 'w ', 'd ', 'h ', 'm ', 's', ); $str = ''; for ( $i = 0; $i < 5; $i++ ) { $interval = $intervals[ $i ]; if ( $ticks >= $interval || $started ) { $n = int( $ticks / $interval ); $ticks -= $n * $interval; $str .= sprintf( "%u%s", $n, $letters[ $i ] ); $started = 1; # show days in 3weeks 0days 3hours } } return $str; }