#!/usr/local/bin/perl # # Check optical power levels on juniper SFP, SFP+, and XFP modules # # $Header: /home/doke/work/nagios/RCS/check_juniper_optics,v 1.36 2016/09/26 15:36:23 doke Exp $ # # use strict; use warnings; no warnings 'uninitialized'; no warnings 'redefine'; use Getopt::Long; use Net::SSH2; use XML::DOM; use DBI; #use Data::Dumper; use vars qw( $hostname $target_interface @ignore_interfaces $login $passwdfile $verbose $help @names @tags %values @crits @warns @unknowns @oks @ignores $rc $sep $oper_down_window $dbname $dbuser $dbpasswd_file %last_ups %ignore_interfaces $hostaddr $max_retries $ssh2 $timeout ); $hostname = ''; $target_interface = ''; $login = 'nagios'; $passwdfile = '/usr/local/nagios/etc/nagios.pw'; $dbname = "nagios"; $dbuser = "nagios"; $dbpasswd_file = '/usr/local/nagios/etc/nagios.pw'; # How long an interface must be continuously operationally down, before we # just ignore it. Since the default "last_up" time is 1 second after epoch, # this should let us ignore interfaces that have never been up. # $oper_down_window = 86400 * 30; $max_retries = 3; $timeout = 30; ################# # print the usage of this script sub usage { print "Usage: $0 [options] -H -i name of interface to check [default all] -I name of interface to ignore [default none] can use multiple times -l A login name accepted by the target router. -p file containing the password for the login name. -v increase verbosity -h help "; exit -1; } # check arguments Getopt::Long::Configure ("bundling"); GetOptions( 'H=s' => \$hostname, 'i=s' => \$target_interface, 'I=s' => \@ignore_interfaces, 'l=s' => \$login, 'p=s' => \$passwdfile, 'v+' => \$verbose, 'h' => \$help, ); &usage( 0 ) if ( $help ); $hostname || usage(); $login || usage(); $passwdfile || usage(); $hostaddr = $hostname; check_juniper_optics(); $rc = 0; $sep = ''; if ( $#crits >= 0 ) { $rc = 2; print "CRITICAL ", join( ", ", @crits ); $sep = '; '; } if ( $#warns >= 0 ) { $rc = 1 if ( $rc == 0 ); print $sep, "Warning ", join( ", ", @warns ); $sep = '; '; } if ( $#unknowns >= 0 ) { $rc = 3 if ( $rc == 0 ); print $sep, "Unknown ", join( ", ", @unknowns ); $sep = '; '; } if ( $rc == 0 || $verbose ) { print $sep, "Ok ", join( ", ", @oks ); $sep = '; '; } if ( $#ignores >= 0 ) { print $sep, "Ignoring ", join( ", ", @ignores ); } print "\n"; exit $rc; ########################### sub check_juniper_optics { my( $stdout, $stderr, $rc, $cmd, $xml, $parser, $tree, $path, $fpc, $pic, $xcvr, $interface, $model, $admin, $oper, $descr, %optics, $passwd, $fpcpicxcvr, $ago, $retries ); $verbose && print "check_juniper_optics()\n"; # Load the last_up and monitor values for each port on this device from # the mysql database maintained by check_switch. If a port has never # been up (or not in a really long time), then we ignore it. load_db_data(); # convert interfaces to ignore to a hash for easy lookup if ( ! $target_interface ) { foreach $interface ( @ignore_interfaces ) { $ignore_interfaces{ $interface } = 1; } } ssh2_setup(); # get the type of each optic # damn I wish there was a better way to do this than # show chassis hardware $verbose && print "\ngetting type of each optic module\n"; $cmd = "show chassis hardware | display xml"; $stdout = ssh2_cmd( $cmd ); if ( ! $stdout ) { push @unknowns, "unable to ssh to $hostname"; return; } parse_xml( $stdout ); foreach $path ( sort keys %values ) { if ( $path =~ m/FPC (\d+) .* PIC (\d+) .* Xcvr (\d+)/ ) { $fpc = $1; $pic = $2; $xcvr = $3; $fpcpicxcvr = "$fpc/$pic/$xcvr"; $descr = $values{ $path }{ 'description' }; $verbose && print "module ?e-$fpcpicxcvr $descr\n"; $optics{ $fpcpicxcvr }{ 'model' } = $descr; } } # find out which interfaces are admin down, so we can ignore them $verbose && print "\ngetting admin status of each interface\n"; $cmd = "show interface terse | display xml"; $stdout = ssh2_cmd( $cmd ); parse_xml( $stdout ); foreach $path ( sort keys %values ) { if ( $path =~ m!-> ([fgx]e-(\d+/\d+/\d+))$!i ) { $interface = $1; $fpcpicxcvr = $2; $admin = $values{ $path }{ 'admin-status' }; $oper = $values{ $path }{ 'oper-status' }; $descr = $values{ $path }{ 'description' }; $verbose && print "physical interface $interface $admin $descr\n"; if ( $admin ne 'up' ) { $ignore_interfaces{ $fpcpicxcvr } = 1; } $optics{ $fpcpicxcvr }{ 'description' } = $descr; $optics{ $fpcpicxcvr }{ 'admin' } = $admin; $optics{ $fpcpicxcvr }{ 'oper' } = $oper; } elsif ( $path =~ m!-> ([fgx]e-(\d+/\d+/\d))\.(\d+)$!i ) { $interface = $1; $fpcpicxcvr = $2; $admin = $values{ $path }{ 'admin-status' }; $oper = $values{ $path }{ 'oper-status' }; $descr = $values{ $path }{ 'description' }; $verbose && print "logical interface $interface.$3 $admin $descr\n"; next if ( $admin ne 'up' ); if ( $optics{ $fpcpicxcvr }{ 'description' } ) { $optics{ $fpcpicxcvr }{ 'description' } .= ' ' . $descr; } else { $optics{ $fpcpicxcvr }{ 'description' } = $descr; } } } # get optics diagnostics $verbose && print "\ngetting optical diagnostics\n"; $cmd = "show interface diagnostic optic | display xml"; $stdout = ssh2_cmd( $cmd ); parse_xml( $stdout ); $ago = time() - $oper_down_window; foreach $path ( sort keys %values ) { if ( $path =~ m{ ([fgx]e-(\d+/\d+/\d+)) } ) { my( $interface, $descr, $pwr, $temp, $volt, $highcrit, $highwarn, $lowwarn, $lowcrit ); $verbose && print "xml path $path\n"; $interface = $1; $fpcpicxcvr = $2; if ( $target_interface ) { if ( $interface ne $target_interface && "$interface.0" ne $target_interface ) { $verbose && print "$interface does not match $target_interface\n"; next; } } else { if ( exists $ignore_interfaces{ $fpcpicxcvr } || exists $ignore_interfaces{ "$interface" } || exists $ignore_interfaces{ "$interface.0" } ) { $verbose && print "$interface is to be ignored\n"; next; } } $model = $optics{ "$fpcpicxcvr" }{ 'model' }; $model ||= ''; #$model =~ s/UNKNOWN/unkn-model/; $model =~ s/UNKNOWN/unknown-sfp/; $model =~ s/\s+$//; $descr = $optics{ "$fpcpicxcvr" }{ 'description' }; #$descr ||= 'unkn-descr'; $descr ||= ''; $descr =~ s/\s+$//; $admin = $optics{ $fpcpicxcvr }{ 'admin' }; $admin ||= 'down'; $oper = $optics{ $fpcpicxcvr }{ 'oper' }; $oper ||= 'down'; $verbose && print "$interface $model $descr $admin $oper $last_ups{ $interface }\n"; if ( $admin ne 'up' ) { $verbose && print "ignoring $interface, administratively down\n"; next; } if ( $oper ne 'up' && $last_ups{ $interface } < $ago ) { $verbose && print "ignoring $interface, operationally down since $last_ups{ $interface }\n"; next; } if ( exists $values{ $path }{ 'optic-diagnostics-not-available' } && $values{ $path }{ 'optic-diagnostics-not-available' } eq 'N/A' ) { $verbose && print "ignoring $interface, optical diagnostics not available\n"; next; } # rx power $pwr = $values{ $path }{ 'rx-signal-avg-optical-power-dbm' }; if ( ! defined $pwr ) { # wtf? Some return different xml tags? $pwr = $values{ $path }{ 'laser-rx-optical-power-dbm' }; } $highcrit = $values{ $path }{ 'laser-rx-power-high-alarm-threshold-dbm' }; $highwarn = $values{ $path }{ 'laser-rx-power-high-warn-threshold-dbm' }; $lowwarn = $values{ $path }{ 'laser-rx-power-low-warn-threshold-dbm' }; $lowcrit = $values{ $path }{ 'laser-rx-power-low-alarm-threshold-dbm' }; if ( $pwr eq '- Inf' ) { $pwr = -60; } if ( $highcrit eq '- Inf' ) { $highcrit = -60; } if ( $lowcrit eq '- Inf' ) { $lowcrit = -60; } if ( $highwarn eq '- Inf' ) { $highwarn = -60; } if ( $lowwarn eq '- Inf' ) { $lowwarn = -60; } $lowcrit += 3.0; $lowwarn += 3.0; # We've been getting some false warnings from XFP-10G-SRs # and SFP+-10G-SRs about optical RX at -0.82 dBm. # when the warning threshold is -1.00. The critical is 0.00. # so fudge it up a quarter dB. if ( $model eq 'XFP-10G-SR' || $model eq 'SFP+-10G-SR' ) { $highwarn += 0.25; } $verbose && print "$interface $model $descr rx: $lowcrit < $lowwarn < $pwr < $highwarn < $highcrit\n"; if ( $pwr > $highcrit ) { push @crits, "$interface $model $descr rx $pwr dBm gt $highcrit"; } elsif ( $pwr < $lowcrit ) { push @crits, "$interface $model $descr rx $pwr dBm lt $lowcrit"; } # a few of these are just a hair high, fudge it a little elsif ( $pwr > ( $highwarn + 0.15 ) ) { push @warns, "$interface $model $descr rx $pwr dBm gt $highwarn"; } elsif ( $pwr < $lowwarn ) { push @warns, "$interface $model $descr rx $pwr dBm lt $lowwarn"; } else { push @oks, "$interface $model $descr rx $pwr dBm"; } # tx power $pwr = $values{ $path }{ 'laser-output-power-dbm' }; $highcrit = $values{ $path }{ 'laser-tx-power-high-alarm-threshold-dbm' }; $highwarn = $values{ $path }{ 'laser-tx-power-high-warn-threshold-dbm' }; $lowwarn = $values{ $path }{ 'laser-tx-power-low-warn-threshold-dbm' }; $lowcrit = $values{ $path }{ 'laser-tx-power-low-alarm-threshold-dbm' }; if ( $pwr eq '- Inf' ) { $pwr = -60; } if ( $highcrit eq '- Inf' ) { $highcrit = 60; } if ( $lowcrit eq '- Inf' ) { $lowcrit = -60; } if ( $highwarn eq '- Inf' ) { $highwarn = -60; } if ( $lowwarn eq '- Inf' ) { $lowwarn = -60; } $verbose && print "$interface $model $descr tx: $lowcrit < $lowwarn < $pwr < $highwarn < $highcrit\n"; if ( $pwr > $highcrit ) { push @crits, "$interface $model $descr tx $pwr dBm gt $highcrit"; } elsif ( $pwr < $lowcrit ) { push @crits, "$interface $model $descr tx $pwr dBm lt $lowcrit"; } elsif ( $pwr > $highwarn ) { # how is this even possible? Bad QA? # I'm going to mask this, because there's nothing we can do about it. #push @warns, "$interface $model $descr tx $pwr dBm gt $highwarn"; $verbose && print "warning $interface $model $descr tx $pwr dBm gt $highwarn\n"; } elsif ( $pwr < $lowwarn ) { # how is this even possible? Bad QA? push @warns, "$interface $model $descr tx $pwr dBm lt $lowwarn"; } else { $verbose && print "ok $interface $model $descr tx $pwr\n"; } # laser temperature $temp = $values{ $path }{ 'module-temperature' }; $highcrit = $values{ $path }{ 'module-temperature-high-alarm-threshold' }; $highwarn = $values{ $path }{ 'module-temperature-high-warn-threshold' }; $lowwarn = $values{ $path }{ 'module-temperature-low-alarm-threshold' }; $lowcrit = $values{ $path }{ 'module-temperature-low-warn-threshold' }; $temp =~ s/ degrees.*//; $highcrit =~ s/ degrees.*//; $highwarn =~ s/ degrees.*//; $lowwarn =~ s/ degrees.*//; $lowcrit =~ s/ degrees.*//; $verbose && print "$interface $model $descr temp: $lowcrit < $lowwarn < $temp < $highwarn < $highcrit\n"; if ( $temp > $highcrit ) { push @crits, "$interface $model $descr $temp C gt $highcrit"; } elsif ( $temp < $lowcrit ) { push @crits, "$interface $model $descr $temp C lt $lowcrit"; } elsif ( $temp > $highwarn ) { push @warns, "$interface $model $descr $temp C gt $highwarn"; } elsif ( $temp < $lowwarn ) { push @warns, "$interface $model $descr $temp C lt $lowwarn"; } else { $verbose && print "ok $interface $model $descr $temp C\n"; } # voltage $volt = $values{ $path }{ 'module-voltage' }; $highcrit = $values{ $path }{ 'module-voltage-high-alarm-threshold' }; $highwarn = $values{ $path }{ 'module-voltage-high-warn-threshold' }; $lowwarn = $values{ $path }{ 'module-voltage-low-warn-threshold' }; $lowcrit = $values{ $path }{ 'module-voltage-low-alarm-threshold' }; $verbose && print "$interface $model $descr volt: $lowcrit < $lowwarn < $volt < $highwarn < $highcrit\n"; if ( $volt > $highcrit ) { push @crits, "$interface $model $descr $volt V gt $highcrit"; } elsif ( $volt < $lowcrit ) { push @crits, "$interface $model $descr $volt V lt $lowcrit"; } elsif ( $volt > $highwarn ) { push @warns, "$interface $model $descr $volt V gt $highwarn"; } elsif ( $volt < $lowwarn ) { push @warns, "$interface $model $descr $volt V lt $lowwarn"; } else { $verbose && print "ok $interface $model $descr $volt V\n"; } } } } sub parse_xml { my( $stdout ) = @_; my( $parser, $tree, $path, $tag ); $verbose && print $stdout; $stdout =~ s/^[^<]*//s; # trim off any leading junk $stdout =~ s/[^>]*$//s; # trim off any trailing junk undef @names; undef @tags; undef %values; $parser = new XML::DOM::Parser; eval { $tree = $parser->parse( $stdout ); }; if ( ! $tree ) { push @unknowns, "can't parse xm response"; return; } #$verbose > 1 && print "tree: ", Dumper( $tree ), "\n\n"; descend_tree( $tree, 0 ); if ( $verbose > 1 ) { foreach $path ( keys %values ) { print "$path:\n"; foreach $tag ( sort keys %{$values{ $path }} ) { printf " %s %s\n", $tag, $values{ $path }{ $tag }; } } } } sub descend_tree { my( $node, $level ) = @_; my( $tag, $type, $value, $path, $parenttag, @children, $child ); $tag = $node->getNodeName(); $tags[ $level ] = $tag; $names[ $level ] ||= $tag; $type = $node->getNodeType(); if ( $level >= 2 && $type == 3 ) { $value = $node->getNodeValue(); chomp $value; $value =~ s/\s+$//; $parenttag = $tags[ $level - 1 ]; if ( $parenttag eq 'name' ) { $names[ $level - 2 ] = $value; } $path = join( ' -> ', @names[ 0 .. $level - 2 ] ); $values{ $path }{ $parenttag } = $value; $verbose > 1 && print "- $path $parenttag = $value\n"; } @children = $node->getChildNodes(); foreach $child ( @children ) { descend_tree( $child, $level + 1 ); } } # get db history for ports sub load_db_data { my( $dbpasswd, $dbh, $sql, $rows, $row, $port, $last_up, $port2, $n, @parms ); $verbose && print "load_db_data()\n"; $dbpasswd = &get_passwd( $dbpasswd_file ); $dbh = DBI->connect( "dbi:mysql:dbname=$dbname", $dbuser, $dbpasswd, { AutoCommit => 0, RaiseError => 0, PrintError => 1, } ); if ( ! $dbh ) { push @unknowns, "can't open database: " . $DBI::errstr; return; } # Try to convert an ipaddr on the cmdline to a switch name that we # can lookup in the database port tables. if ( $hostname =~ m/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ ) { $hostaddr = $hostname; $sql = "select sw from sw where ipaddr = ?"; ( $hostname ) = $dbh->selectrow_array( $sql, undef, ( $hostaddr ) ); $hostname ||= $hostaddr; # fallback to ip, monitorportopt won't work } else { $sql = "select ipaddr from sw where sw = ?"; ( $hostaddr ) = $dbh->selectrow_array( $sql, undef, ( $hostname ) ); $hostaddr ||= $hostname; # fallback to name, and hope dns works } $sql = "select port, last_up from swports where sw = ?"; $rows = $dbh->selectall_arrayref( $sql, undef, ( $hostname ) ); foreach $row ( @$rows ) { $verbose && print "< ", join( ', ', @$row ), "\n"; ( $port, $last_up ) = @$row; $last_ups{ $port } = $last_up; } # load the monitorportopt stuff # If this is a standalone, ie cisco, we have the sw and maybe port. # If this is a stack, we have the name of the stack master, and maybe a port # and this will be in the mangled switch port syntax. # So sw-ne37-211-20 ge-2/0/25 is mangled to sw-ne37-211-22 ge-x/0/25 # But sw-ne37-110-3m f0/23 is in the normal format $sql = "select ss2.sw, ss2.idx, m.port, m.opt, m.val from swstk ss join swstk ss2 using( stk ) join monitorportopt m on ss2.sw = m.sw where ss.sw = ? union select m2.sw, -1, m2.port, m2.opt, m2.val from monitorportopt m2 where m2.sw = ?"; @parms = ( $hostname, $hostname ); if ( $port ) { $sql .= " and m2.port = ?"; push @parms, $port; } $rows = $dbh->selectall_arrayref( $sql, undef, @parms ); $n = 0; foreach $row ( @$rows ) { $verbose && print "< ", join( ', ', @$row ), "\n"; my( $sw_mangled, $idx, $port_mangled, $opt, $val ) = @$row; if ( $idx >= 0 ) { # we have an idx, so this is a stack and in mangled format ( $port2 = $port_mangled ) =~ s!-x/!-$idx/!; } else { $port2 = $port_mangled; } if ( $opt eq 'monitor' ) { if ( $val =~ m/^(?:t|true|y|yes|on|1)\s*$/i ) { $verbose && print "setting \$ports_to_expect_up{ $port2 }\n"; #$ports_to_expect_up{ $port2 } = 1; $n++; } elsif ( $val =~ m/^(?:f|false|n|no|off|0)\s*$/i ) { $verbose && print "setting \$ignore_interfaces{ $port2 }\n"; $ignore_interfaces{ $port2 } = 1; $n++; } } } $verbose && print "loaded $n rows from monitorportopt database\n"; $dbh->disconnect(); } # ssh to the switch, run a command and grab the result. # Can't do this entirely in here, because we need to use Expect to give the # password to ssh, and the embedded Perl environment hates the Expect # module. So we use an external helper script. # sub ssh_cmd { my( $cmd ) = @_; my( $output ); $cmd = "/usr/local/nagios/libexec/ssh_cmd -t 60 -H $hostaddr -U $login -P $passwdfile '$cmd' 2>&1"; $verbose && print "ssh cmd: $cmd\n"; if ( ! open( pH, '-|', $cmd ) ) { push @unknowns, "can't run '$cmd': $!"; return ''; } $output = ''; while ( ) { $output .= $_; } close pH; $verbose && print "ssh output: $output\n"; return $output; } sub ssh2_setup { my( $rport, $retries, $error_code, $error_name, $error_string, $passwd, $msg ); # Get ssh tcp port number if it exists $rport = ( getservbyname( 'ssh', 'tcp' ) )[2]; $rport ||= 22; $retries = 0; while ( $retries < $max_retries ) { # create the ssh session # Can't connect messages will occur here, and the library will exit $verbose && print "\ncreating Net::SSH2 object\n"; eval { $ssh2 = Net::SSH2->new( timeout => $timeout * 1000, ); }; if ( ! $ssh2 ) { ( $error_code, $error_name, $error_string ) = $ssh2->error(); $msg = "can't create ssh session to $hostname: $error_string"; $verbose && print $msg, "\n"; push @unknowns, $msg; next; } if ( $verbose >= 2 ) { $ssh2->trace( -1 ); $ssh2->debug( 1 ); } #$ssh2->keepalive_config( 1, 15 ); $ssh2->blocking( 1 ); $verbose && print "\nconnecting to $hostaddr $rport\n"; eval { $rc = $ssh2->connect( $hostaddr, $rport ); }; if ( ! $rc ) { ( $error_code, $error_name, $error_string ) = $ssh2->error(); $msg = "can't ssh connect to $hostname: $error_string"; $verbose && print $msg, "\n"; push @unknowns, $msg; next; } # log in $passwd = get_passwd( $passwdfile ); $verbose && print "tring ssh login as $login\n"; #$verbose >= 2 && print " password '$passwd'\n"; eval { $rc = $ssh2->auth_password( $login, $passwd ); }; if ( ! $rc ) { ( $error_code, $error_name, $error_string ) = $ssh2->error(); $msg = "can't login to $hostname: $error_string"; $verbose && print $msg, "\n"; push @unknowns, $msg; next; } last; } } sub ssh2_cmd { my( $cmd ) = @_; my( $chan, $stdout, $error_code, $error_name, $error_string, $msg ); $verbose && print "ssh2_cmd( $cmd )\n"; $stdout = ''; eval { $chan = $ssh2->channel(); if ( ! $chan ) { ( $error_code, $error_name, $error_string ) = $ssh2->error(); $verbose && print "LIBSSH2_SESSION_BLOCK_INBOUND 1 \n"; $verbose && print "LIBSSH2_SESSION_BLOCK_OUTBOUND 2\n"; $verbose && print "block_directions: ", $ssh2->block_directions, "\n"; $verbose && print "error_code $error_code, $error_name, $error_string\n"; $msg = "can't ssh $hostname chan $cmd: $error_name $error_string"; $verbose && print $msg, "\n"; push @unknowns, $msg; return ''; } $chan->blocking( 1 ); if ( ! $chan->exec( $cmd ) ) { ( $error_code, $error_name, $error_string ) = $ssh2->error(); $msg = "can't ssh $hostname exec $cmd: $error_string"; $verbose && print $msg, "\n"; push @unknowns, $msg; return ''; } if ( ! $chan->send_eof() ) { ( $error_code, $error_name, $error_string ) = $ssh2->error(); $msg = "can't ssh $hostname eof $cmd: $error_string"; $verbose && print $msg, "\n"; push @unknowns, $msg; return ''; } while ( <$chan> ) { $stdout .= $_; } }; $verbose && print "stdout: $stdout\n"; if ( ! defined $stdout ) { push @unknowns, "no data from ssh $hostname $cmd: " . ($ssh2->error())[2]; return; } return $stdout; } sub get_passwd { my( $file ) = @_; open( fH, $file ) || die "can't open $file: $!\n"; my $passwd = ; close fH; chomp $passwd; return $passwd; }