#!/usr/bin/perl # # Check an bmc equiped server # So far, this means x86 models # also works over the lan to a Dell idrac6 # # $Header: /opt/home/doke/work/nagios/RCS/check_bmc,v 1.23 2015/12/08 23:38:42 doke Exp $ use strict; use warnings; use Getopt::Long; #use Data::Dumper; use vars qw( $host $username $passwordfile $interface $verbose $help @crits @warns @unknowns @oks @ignores ); $ENV{PATH}='/usr/local/bin:/opt/sfw/bin:/usr/sfw/bin:/usr/bin:/bin:/usr/sbin:/sbin'; $host = undef; $username = undef; $passwordfile = undef; $interface = 'auto'; $verbose = 0; $help = 0; sub usage { my( $rc ) = @_; print "Usage: $0 [-vh] [-H host] [-U username] [-f pwfile] -H s host -U s username -f s file containing password -I s interface: bmc, open, lan, lanplus [$interface] -v verbose -h help "; exit $rc; } Getopt::Long::Configure ("bundling"); GetOptions( 'H=s' => \$host, 'U=s' => \$username, 'f=s' => \$passwordfile, 'I=s' => \$interface, 'v+' => \$verbose, 'h' => \$help, ); &usage( 0 ) if ( $help ); &check(); my $rc = 0; my $sep = ''; if ( $#crits >= 0 ) { $rc = 2; print "CRITICAL ", join( ", ", @crits ); $sep = '; '; } if ( $#warns >= 0 ) { $rc = 1 if ( $rc == 0 ); print $sep, "Warning ", join( ", ", @warns ); $sep = '; '; } if ( $#unknowns >= 0 ) { $rc = -1 if ( $rc == 0 ); print $sep, "Unknown ", join( ", ", @unknowns ); $sep = '; '; } if ( $rc == 0 ) { print "Ok ", join( ", ", @oks ); $sep = '; '; } if ( $#ignores >= 0 ) { print $sep, "Ignoring ", join( ", ", @ignores ); } print "\n"; exit $rc; ################## sub check { my( $ipmicmd, $dev, $sdr_cache, $cmd, $nok, $key, $val, $descr, $junk, %frus, $fru, $mfg, $model ); $cmd = "ipmitool -V > /dev/null"; $verbose && print "+ $cmd\n"; system( $cmd ); if ( ( $? >> 8 ) != 0 ) { push @unknowns, "unable to run ipmitool: $!"; return; } if ( $host ) { # remote host # first make sure we can ping it. # ipmitool has some really long timeouts if ( ! ping( $host ) ) { push @unknowns, "can't ping $host"; return; } if ( $interface eq 'auto' || $interface eq 'bmc' ) { $interface = 'lan'; } $ipmicmd = "ipmitool -I $interface -H '$host'"; $ipmicmd .= " -U '$username'" if $username; $ipmicmd .= " -f '$passwordfile'" if $passwordfile; # In ipmitool 1.8.8, the -c option for csv seperated output is very broken # It runs lines together. $sdr_cache = "/tmp/sdr_cache.$host.$<"; if ( ! ( -f $sdr_cache ) || ( -M _ ) > 1.0 ) { $cmd = "$ipmicmd sdr dump $sdr_cache > /dev/null"; $verbose && print "+ $cmd\n"; system( $cmd ); if ( ( $? >> 8 ) == 0 ) { $ipmicmd .= " -S '$sdr_cache'"; } else { push @warns, "unable to create sdr cache"; } } } else { # no host, we're local if ( $interface eq 'auto' ) { if ( -c "/dev/bmc" ) { $dev = "/dev/bmc"; $interface = 'bmc'; } elsif ( -c "/dev/ipmi0" ) { $dev = "/dev/ipmi0"; $interface = "open"; } elsif ( -c "/dev/ipmi/0" ) { $dev = "/dev/ipmi/0"; $interface = "open"; } elsif ( -c "/dev/ipmidev/0" ) { $dev = "/dev/ipmidev/0"; $interface = "open"; } else { push @unknowns, "can't find bmc or ipmi device"; return; } } if ( -w $dev ) { $ipmicmd = "ipmitool -I $interface"; } else { $ipmicmd = "sudo -n ipmitool -I $interface"; } } $cmd = "$ipmicmd chassis status"; $verbose && print "+ $cmd\n"; if ( ! open( pH, "$cmd |" ) ) { push @unknowns, "can't run $cmd: $!"; return; } while( ) { $verbose && print; chomp; ( $key, $val ) = split( m/\s*:\s*/, $_, 2 ); if ( $key eq 'System Power' && $val ne 'on' ) { # wtf? So how is this program running? push @crits, "$key: $val"; } elsif ( $key eq 'Power Restore Policy' && $val !~ m/always-on|previous/i ) { push @ignores, "$key: $val"; } elsif ( $key =~ m/Main Power Fault/i && $val =~ m/true/i ) { # sometimes this gets left on after the problem clears # so only report it if there are other problems # we've seen this on ironchef, an X4600 with ilom v2.0.2.5 push @ignores, "main power fault $val"; } elsif ( $key =~ m/fault|lockout|overload/i && $val !~ m/false|inactive/i ) { push @warns, "$key: $val"; } } close pH; $frus{ 'mainboard' }{ 'product_manufacturer' } = ''; $cmd = "$ipmicmd fru list"; $verbose && print "+ $cmd\n"; if ( ! open( pH, "$cmd 2>&1 |" ) ) { push @unknowns, "can't run $cmd: $!"; return; } while( ) { $verbose && print; chomp; next unless ( m/\s*(\S*?)\s*:\s*(\S.*)\s*$/ ); ( $key, $val ) = split( m/\s*:\s*/, $_, 2 ); $verbose && print "key $key, val $val\n"; if ( $key =~ m/FRU Device Description/i ) { $fru = $val; $verbose && print "fru $fru\n"; } elsif ( $key =~ m/Product Manufacturer/i ) { $verbose && print "fru $fru, product_manufacturer $val\n"; $frus{ $fru }{ 'product_manufacturer' } = $val; } elsif ( $key =~ m/Product Name/i ) { $verbose && print "fru $fru, product_name $val\n"; $frus{ $fru }{ 'product_name' } = $val; } elsif ( $key =~ m/Product Serial/i ) { $frus{ $fru }{ 'product_serial' } = $val; } elsif ( $key =~ m/Chassis Serial/i ) { $frus{ $fru }{ 'chassis_serial' } = $val; } elsif ( $key =~ m/Board Mfg/i ) { $frus{ $fru }{ 'board_mfg' } = $val; } elsif ( $key =~ m/Board Product/i ) { $frus{ $fru }{ 'board_product' } = $val; } elsif ( $key =~ m/Board Serial/i ) { $frus{ $fru }{ 'board_serial' } = $val; } elsif ( $verbose > 1 ) { print "unparsed: $_\n"; } } close pH; $mfg = ''; $model = ''; # sun style, works on SUN FIRE X4600, SPARC T3-1, Sun Fire X2200 M2 foreach $fru ( keys %frus ) { if ( $fru =~ m%Mainboard|^/SYS \(ID%i ) { foreach $key ( keys %{$frus{ $fru }} ) { $val = $frus{ $fru }{ $key }; $verbose && print "fru $fru, key $key, val $val, mfg $mfg, model $model\n"; if ( ! $mfg && $key =~ m/Product.Manufacturer/i ) { $mfg = $val; } elsif ( ! $model && $key =~ m/Product.Name/i ) { $model = $val; } } } } if ( ! $mfg || ! $model ) { # Dell style, works on PowerEdge R515, PowerEdge R720 foreach $fru ( keys %frus ) { if ( $fru =~ m/Builtin FRU Device/i ) { foreach $key ( keys %{$frus{ $fru }} ) { $val = $frus{ $fru }{ $key }; $verbose && print "fru $fru, key $key, val $val, mfg $mfg, model $model\n"; if ( ! $mfg && $key =~ m/Board.Mfg/i ) { $mfg = $val; } elsif ( ! $model && $key =~ m/Board.Product/i ) { $model = $val; } } } } } $verbose && print "mfg $mfg, model $model\n"; $cmd = "$ipmicmd sdr elist all"; $verbose && print "+ $cmd\n"; if ( ! open( pH, "$cmd |" ) ) { push @unknowns, "can't run $cmd: $!"; return; } $nok = 0; while( ) { $verbose && print; chomp; ( $key, $junk, $val, $junk, $descr ) = split( m/\s*\|\s*/, $_, 5 ); if ( $val eq 'ok' ) { $nok++; } # I have no idea what these values mean, but they seem to be useless elsif ( $val eq 'ns' || $val eq 'lcr' || $val eq 'lnc' ) { # ignore it } elsif ( $descr =~ '0 unspecified' ) { # ignore it } else { push @crits, "$key $val $descr"; } } push @oks, "$nok ok sensors"; close pH; if ( $mfg =~ m/Sun/i ) { # sunoem commands need to be run as root, no idea why if ( $ipmicmd =~ m/^sudo / ) { $cmd = "$ipmicmd sunoem led get"; } else { $cmd = "sudo -n $ipmicmd sunoem led get"; } $verbose && print "+ $cmd\n"; if ( ! open( pH, "$cmd 2>&1 |" ) ) { push @unknowns, "can't run $cmd: $!"; return; } $nok = 0; while( ) { $verbose && print; chomp; next if ( m/command failed/i ); ( $key, $val ) = split( m/\s*\|\s*/, $_, 2 ); if ( ( $key eq 'sys.power.led' || $key eq 'bp.power.led' # specific to Sun X4100? || $key eq 'fp.power.led' # specific to Sun X4100? || $key =~ '(ft[0-4]|sc)\.act\.led' ) # specific to Sun X4500? && $val eq 'ON' ) { $nok++; } elsif ( $key eq 'sys.locate.led' ) { # don't care about the locating led $nok++; next; } elsif ( $val eq 'OFF' ) { $nok++; next; } elsif ( $key eq 'bmc_send_cmd: Permission denied' ) { # ignore it } else { push @crits, "$key $val"; } } if ( $nok ) { push @oks, "$nok ok leds"; } else { # lots of systems produce no led output, all x2200s? #push @unknowns, "no leds could be checked"; } close pH; } #elsif ( $mfg =~ m/Dell/i ) { # There's a delloem command, but so far I havn't found anything # useful to monitor in it. You can see the mac address, or set the # led states, but you can't get the led states. #} if ( $mfg ) { push @oks, $mfg; } if ( $model ) { push @oks, $model; } } # return true if can ping host sub ping { my( $host ) = @_; my( $cmd, $rc ); $verbose && print "pinging host '$host'\n"; $cmd = "/usr/local/sbin/fping -A -q -t 100 -p 100 '$host'"; $verbose > 1 && print "+ $cmd\n"; $rc = system( $cmd ) >> 8; if ( $rc ) { # failed $verbose && print "can't ping host '$host'\n"; return 0; } else { # worked $verbose && print "successfully pinged host '$host'\n"; return 1; } }