#! /usr/local/bin/perl # # $Id: check_ntp,v 1.2 2007/09/17 23:39:14 doke Exp $ # # Nagios plugin to check NTP time servers. # # By Doke Scott, doke@udel.edu, 2004.9.16 # # based on check_ntp by # (c)1999 Ian Cass, Knowledge Matters Ltd. # Read the GNU copyright stuff for all the legalese require 5.004; use POSIX; #use strict; use Getopt::Long; use lib "/usr/local/nagios-1.1/libexec" ; use utils qw($TIMEOUT %ERRORS &print_revision &support); use vars qw($opt_V $opt_h $opt_H $opt_w $opt_c $opt_j $opt_k ); my( @crit_errors, @warn_errors, @unknown_errors, $verbose, $tally, $rc ); my $PROGNAME="check_ntp"; $ENV{'PATH'}=''; $ENV{'BASH_ENV'}=''; $ENV{'ENV'}=''; # defaults in msec my $DEFAULT_OFFSET_WARN = 60000; # 1 minute my $DEFAULT_OFFSET_CRIT = 120000; # 2 minutes # default in millisec my $DEFAULT_JITTER_WARN = 5000; # 5 sec my $DEFAULT_JITTER_CRIT = 10000; # 10 sec my $syspeer = undef; my $stratum = undef; my $offset = undef; my $jitter = undef; my $candidates = 0; my $found_peer = 0; my $owarn = $DEFAULT_OFFSET_WARN; my $ocrit = $DEFAULT_OFFSET_CRIT; my $jwarn = $DEFAULT_JITTER_WARN; my $jcrit = $DEFAULT_JITTER_CRIT; ######################## Getopt::Long::Configure('bundling'); GetOptions( "V" => \$opt_V, "version" => \$opt_V, "h" => \$opt_h, "help" => \$opt_h, "v" => \$verbose, "verbose" => \$verbose, "w=f" => \$opt_w, "warning=f" => \$opt_w, # offset|adjust warning if above this number "c=f" => \$opt_c, "critical=f" => \$opt_c, # offset|adjust critical if above this number "j=s" => \$opt_j, "jwarn=s" => \$opt_j, # jitter warning if above this number "k=s" => \$opt_k, "jcrit=s" => \$opt_k, # jitter critical if above this number "H=s" => \$opt_H, "hostname=s" => \$opt_H ); if ($opt_V) { print_revision($PROGNAME,'$Revision: 1.2 $ '); exit $ERRORS{'OK'}; } if ($opt_h) { &print_help(); exit $ERRORS{'OK'}; } $opt_H = shift unless ($opt_H); my $host = $1 if ($opt_H && $opt_H =~ m/^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+|[a-zA-Z][-a-zA-Z0-9]+(\.[a-zA-Z][-a-zA-Z0-9]+)*)$/); unless ($host) { print "No target host specified\n"; &print_usage(); exit $ERRORS{'UNKNOWN'}; } $owarn = $1 if ($opt_w =~ /([0-9.]+)/); $ocrit = $1 if ($opt_c =~ /([0-9.]+)/); $jwarn = $1 if ($opt_j =~ /([0-9]+)/); $jcrit = $1 if ($opt_k =~ /([0-9]+)/); if ( $ocrit < $owarn ) { print "Critical offset should be larger than warning offset\n"; &print_usage(); exit $ERRORS{"UNKNOWN"}; } if ( $opt_k < $opt_j ) { print "Critical jitter should be larger than warning jitter\n"; &print_usage(); exit $ERRORS{'UNKNOWN'}; } # Just in case of problems, let's not hang Nagios $SIG{'ALRM'} = sub { print ("ERROR: No response from ntp server (alarm)\n"); exit $ERRORS{"UNKNOWN"}; }; alarm($TIMEOUT); # # Scan ntpq # # Field 1: Tally Code ( Space, 'x','.','-','+','#','*','o') # We want '*' which implies sys.peer # or 'o' which implies pps.peer # If we can't get that, we want '#' or '+' which means candidate # Use last peer, if there is one. Othereise use last candidate. # Field 2: address of the remote peer # Field 3: Refid of the clock (0.0.0.0 if unknown, WWWV/PPS/GPS/ACTS/USNO/PCS/... if Stratum1) # Field 4: stratum (0-15) # Field 5: Type of the peer: local (l), unicast (u), multicast (m) # broadcast (b); not sure about multicast/broadcast # Field 6: last packet receive (in seconds) # Field 7: polling interval # Field 8: reachability resgister (octal) # Field 9: delay # Field 10: offset in ms # Field 11: dispersion/jitter in ms # # some systems don't have a proper ntpq (migrated from ntpdc) my $have_ntpq = undef; if ($utils::PATH_TO_NTPQ && -x $utils::PATH_TO_NTPQ ) { $have_ntpq = 1; } else { #push @crit_errors, "no ntpq on monitoring system"; $have_ntpq = 0; } if ($have_ntpq) { if ( open(NTPQ,"$utils::PATH_TO_NTPQ -np $host 2>&1 |") ) { while () { print $_ if ($verbose); if ( /timed out/ ){ last ; } # match sys.peer or pps.peer if (/^([\*o\*\+\#]) ([-\d\.]+) \s+ ([-\d\w\.]+) \s+ ([-\d\.]+) \s+ ([lumb-]+) \s+ ([-\d]+) \s+ ([-\d]+) \s+ ([-\d]+) \s+ ([-\d\.]+) \s+ ([-\d\.]+) \s+ ([-\d\.]+)/ix ) { $tally = $1; print "match\n" if $verbose; if ( $tally eq '*' || $tally eq 'o' ) { $found_peer = 1; $syspeer = $2; $stratum = $4; $offset = $10; $jitter = $11; } elsif ( $tally eq '#' || $tally eq '+' ) { ++$candidates; if ( ! $found_peer ) { $syspeer = $2; $stratum = $4; $offset = $10; $jitter = $11; } } } } close NTPQ; if ( ! $found_peer ) { if ( $candidates ) { $verbose && print "no sys.peer, using candidate\n"; #push @warn_errors, "no sys.peer using candidate"; } else { #push @crit_errors, "no sys.peer or candidate"; } } } } if ( ! $syspeer ) { # # ntpq didn't work, so let's try getting partial data from ntpdate # $verbose && print "running '$utils::PATH_TO_NTPDATE -q $host 2>&1 |'\n"; if (!open (NTPDATE, "$utils::PATH_TO_NTPDATE -q $host 2>&1 |")) { print "Could not open ntpdate\n"; exit $ERRORS{"UNKNOWN"}; } while () { print if ($verbose); if ( m/server \s+ ([\d\.-]+)/xi ) { $syspeer = $1; } if ( m/stratum \s+ (\d+)/xi ) { $stratum = $1; } if ( m/(offset|adjust) \s+ ([-\.\d]+)/xi ) { $offset = $2 * 1000; # convert s to ms; } if ( m/no server suitable for synchronization found/i ) { if ($stratum == 16) { push @crit_errors, "Desynchronized peer server found"; } else { push @crit_errors, "No suitable peer server found"; } } } close (NTPDATE); # declare an error if we also get a non-zero return code from ntpdate # unless already set to critical if ( $? && $#crit_errors < 0 ) { push @unknown_errors, "ntpdate returned $?"; } } if ( ! defined( $syspeer ) ) { push @crit_errors, "didn't get a syspeer"; } if ( ! defined( $stratum ) ) { if ( $#crit_errors < 0 ) { push @crit_errors, "didn't get a stratum"; } } elsif ( $stratum >= 16 ) { push @crit_errors, "stratum $stratum"; } if ( ! defined ($offset) ) { if ( $#crit_errors < 0 ) { push @crit_errors, "didn't get an offset"; } } else { # An offset of 0.000000 with an error is probably bogus. Actually, # it's probably always bogus, but let's be paranoid here. #if ($offset == 0) { undef $offset;} if ( abs( $offset ) > $ocrit ) { push @crit_errors, "offset $offset msec > +/- $ocrit msec"; } elsif ( abs( $offset ) > $owarn ) { push @warn_errors, "offset $offset msec > +/- $owarn msec"; } } if ( ! defined( $jitter ) ) { # this is ok, ntpdate doesn't return it. } else { if ( $jitter > $jcrit ) { push @crit_errors, "jitter $jitter msec > +/- $jcrit msec"; } elsif ( $jitter > $jwarn ) { push @warn_errors, "jitter $jitter msec > +/- $jwarn msec"; } } $rc = 0; # nagios ok exit code $" = ", "; if ( scalar( @crit_errors ) ) { print "CRITICAL: @crit_errors "; $rc = 2; } if ( scalar( @warn_errors ) ) { print "; " if ( $rc != 0 ); print "Warning: @warn_errors "; $rc = 1 if ( $rc == 0 ); } if ( scalar( @unknown_errors ) ) { print "; " if ( $rc != 0 ); print "Unknown: @unknown_errors "; $rc = -1 if ( $rc == 0 ); } elsif ( $rc == 0 ) { if ( defined( $jitter ) ) { printf "OK -- peer %s, stratum %d, offset %0.3f ms, jitter %0.3f ms", $syspeer, $stratum, $offset, $jitter; } else { printf "OK -- server %s, stratum %d, offset %0.3f ms", $syspeer, $stratum, $offset; } } print "\n"; exit $rc; #### #### subs sub print_usage () { print "Usage: $PROGNAME -H [-w ] [-c ] [-j ] [-k ] [-v verbose]\n"; } sub print_help () { print_revision($PROGNAME,'$Revision: 1.2 $'); print "Copyright (c) 2003 Bo Kersey/Karl DeBisschop\n"; print "\n"; &print_usage(); print " Checks the local timestamp offset versus with ntpdate Checks the jitter/dispersion of clock signal between and its sys.peer with ntpq\n -w ( --warning) Clock offset in milliseconds at which a warning message will be generated.\n Defaults to $DEFAULT_OFFSET_WARN. -c (--critical) Clock offset in milliseconds at which a critical message will be generated.\n Defaults to $DEFAULT_OFFSET_CRIT. -j (--jwarn) Clock jitter in milliseconds at which a warning message will be generated.\n Defaults to $DEFAULT_JITTER_WARN. -k (--jcrit) Clock jitter in milliseconds at which a warning message will be generated.\n Defaults to $DEFAULT_JITTER_CRIT.\n If jitter/dispersion is specified with -j or -k and ntpq times out, then a warning is returned. "; support(); }