#!/usr/local/bin/perl # # check a Liebert ups # # $Header: /home/doke/work/nagios/RCS/check_liebert_ups,v 1.27 2017/05/12 20:29:02 doke Exp $ use vars qw( $warn_runtime $crit_runtime $warn_load $crit_load $warn_temp $crit_temp $warn_freezing $crit_freezing ); $warn_runtime = 15; $crit_runtime = 10; # only applies if on battery $warn_load = 70; $crit_load = 90; # from GXT3 user manual # www.emersonnetworkpower.com/en-US/Products/ACPower/RackmountUPS/Documents/SL-23180.pdf # Operating Temperature, F (C) 32 to 104 (0 to 40) # However, that's ambient, and the sensor is deep inside the device where it's warmer. $warn_temp = 47; # C $crit_temp = 50; # C $warn_freezing = 0; # C $crit_freezing = -3; # C ########################### use strict; use warnings; no warnings 'redefine'; no warnings 'uninitialized'; use Getopt::Long; use Net::SNMP; use vars qw( $mib2 $enterprises $upsmib $liebert $verbose $help $host $community @crit_msgs @warn_msgs @unknown_msgs @ok_msgs @ignores @perfs $rc ); $mib2 = '1.3.6.1.2.1'; $enterprises = '.1.3.6.1.4.1'; $upsmib = "$mib2.33"; $liebert = "$enterprises.476"; $verbose = 0; $help = 0; ############################# sub usage { my( $rc ) = @_; warn "Usage: $0 [-v] [-w n] [-c n] -H -C -H s hostname -C s snmp community -w n warn if remaining runtime is less than n minutes [$warn_runtime] -c n critical if remaining runtime is less than n minutes [$crit_runtime] -l n warn if load is greater than n % [$warn_load] -L n critical if load is greater than n % [$crit_load] -t n warn if exceed this temperature [$warn_temp] -T n critical if exceed this temperature [$crit_temp] (must be > warn) -u n warn if below this temperature [$warn_freezing] -U n critical if below this temperature [$crit_freezing] \n"; exit $rc } Getopt::Long::Configure( "bundling" ); GetOptions( 'H=s' => \$host, 'C=s' => \$community, 'w=i' => \$warn_runtime, 'c=i' => \$crit_runtime, 'l=i' => \$warn_load, # % 'L=i' => \$crit_load, # % 't=i' => \$warn_temp, # in C 'T=i' => \$crit_temp, # in C 'u=i' => \$warn_freezing, # in C 'U=i' => \$crit_freezing, # in C 'v+' => \$verbose, 'h' => \$help, ); usage( 0 ) if ( $help ); &usage( 1 ) if ( ! $host ); &usage( 1 ) if ( ! $community ); &usage( 1 ) if ( $warn_temp > $crit_temp ); # Just in case of problems, let's not hang Nagios $SIG{'ALRM'} = sub { print ("ERROR: No response from snmp server (alarm)\n"); exit 3; }; alarm( 10 ); &check_ups( $host, $community ); $rc = 0; # nagios ok exit code $" = ", "; if ( scalar( @crit_msgs ) ) { print "CRITICAL: @crit_msgs"; $rc = 2; } if ( scalar( @warn_msgs ) ) { print "; " if ( $rc != 0 ); print "Warning: @warn_msgs"; $rc = 1 if ( $rc == 0 ); } if ( scalar( @unknown_msgs ) ) { print "; " if ( $rc != 0 ); print "Unknown: @unknown_msgs"; $rc = 3 if ( $rc == 0 ); } elsif ( $rc == 0 ) { print "OK @ok_msgs"; if ( scalar( @ignores ) ) { print "; Ignoring: @ignores"; } } print " | @perfs"; print "\n"; exit $rc; ################################## sub check_ups { my( $host, $community ) = @_; my( $session, $error, %variables, @oids, $result, $var, $val, $manuf, $model, $name, $load, $capacity, $percent, $oid, %temps, %temp_limits, %temp_descrs, $row, $col, $temp, $warn_limit, $crit_limit, $descr, $msg, $secs_on_bat ); $verbose && print "opening session\n"; ( $session, $error ) = Net::SNMP->session( -version => 'snmpv1', -hostname => $host, -community => $community, -timeout => 5, -translate => [ -timeticks => 0x0 ], # Turn off so sysUpTime is numeric -retries => 3, #-debug => 0x02 ); if ( ! defined( $session ) ) { print "snmp error: $error\n"; exit 3; } %variables = ( #"upsIdentManufacturer" => "$upsmib.1.1.1.0", "upsIdentModel" => "$upsmib.1.1.2.0", "upsIdentName" => "$upsmib.1.1.5.0", "upsBatteryStatus" => "$upsmib.1.2.1.0", "upsSecondsOnBattery" => "$upsmib.1.2.2.0", "upsEstimatedMinutesRemaining" => "$upsmib.1.2.3.0", "upsEstimatedChargeRemaining" => "$upsmib.1.2.4.0", "upsBatteryVoltage" => "$upsmib.1.2.5.0", #"upsInputFrequency" => "$upsmib.1.3.3.1.2.1", "upsInputVoltage" => "$upsmib.1.3.3.1.3.1", #"upsInputCurrent" => "$upsmib.1.3.3.1.4.1", # PSi has this, GXT2 doesn't "upsOutputSource" => "$upsmib.1.4.1.0", #"upsOutputFrequency" => "$upsmib.1.4.2.0", "upsOutputVoltage" => "$upsmib.1.4.4.1.2.1", "upsOutputCurrent" => "$upsmib.1.4.4.1.3.1", "upsAlarmsPresent" => "$upsmib.1.6.1.0", "upsConfigInputVoltage" => "$upsmib.1.9.1.0", "upsConfigOutputVA" => "$upsmib.1.9.5.0", ); @oids = sort values %variables; $verbose && print "doing initial get\n"; $result = $session->get_request( -varbindlist => \@oids ); if ( ! defined( $result ) ) { push @unknown_msgs, $session->error(); return; } if ( $verbose ) { print "raw results:\n"; foreach $var ( sort keys %variables ) { $val = $result->{ $variables{ $var } }; print " $var $val\n"; } } #$manuf = $result->{ $variables{ 'upsIdentManufacturer' } }; $model = $result->{ $variables{ 'upsIdentModel' } }; $model =~ s/\s+$//; $name = $result->{ $variables{ 'upsIdentName' } }; $verbose && print "model $model\n"; $verbose && print "name $name\n"; push @ok_msgs, "$model $name"; my $model2 = $model; $model2 =~ s/[^\w\d\.\/-]//ig; push @perfs, "model=$model2"; # battery status $val = $result->{ $variables{ 'upsBatteryStatus' } }; my %upsBatteryStatuses = ( 1 => 'unknown', 2 => 'Normal', 3 => 'Low', 4 => 'Depleted', ); my $status = $upsBatteryStatuses{ $val }; $verbose && print "upsBatteryStatus $val $status\n"; if ( $val == 2 ) { push @ok_msgs, "battery $status"; } elsif ( $val == 3 ) { push @warn_msgs, "battery $status"; } elsif ( $val == 4 ) { push @crit_msgs, "battery $status"; } else { push @unknown_msgs, "battery status $status"; } push @perfs, "battery_status=$val"; $secs_on_bat = $result->{ $variables{ 'upsSecondsOnBattery' } }; $verbose && print "upsSecondsOnBattery $secs_on_bat\n"; if ( $secs_on_bat != 0 ) { push @warn_msgs, "on battery for $secs_on_bat secs"; } push @perfs, "seconds_on_battery=$secs_on_bat"; # time remaining $val = $result->{ $variables{ 'upsEstimatedMinutesRemaining' } }; $verbose && print "upsEstimatedMinutesRemaining $val\n"; if ( $val < ( $crit_runtime ) || $#crit_msgs >= 0 ) { push @crit_msgs, "battery time remaining $val min"; } elsif ( $val < ( $warn_runtime ) || $#warn_msgs >= 0 ) { push @warn_msgs, "battery time remaining $val min"; } else { push @ok_msgs, "battery time remaining $val min"; } push @perfs, "time_remaining=$val"; # charge remaining in percent $val = $result->{ $variables{ 'upsEstimatedChargeRemaining' } }; $verbose && print "upsEstimatedChargeRemaining $val\n"; if ( $val < 75 && $secs_on_bat > 0 ) { push @crit_msgs, "estimated charge $val %"; } elsif ( $val < 85 && $secs_on_bat > 0 ) { push @warn_msgs, "estimated charge $val %"; } elsif ( $val < 40 ) { push @warn_msgs, "estimated charge $val %"; } else { push @ok_msgs, "estimated charge $val %"; } push @perfs, "charge_remaining=$val"; # output source $val = $result->{ $variables{ 'upsOutputSource' } }; my %upsOutputSources = ( 1 => 'other', 2 => 'none', 3 => 'normal', 4 => 'bypass', 5 => 'battery', 6 => 'booster', 7 => 'reducer', ); my $source = $upsOutputSources{ $val }; $verbose && print "upsOutputSource $val $source\n"; if ( $val == 3 || $val == 4 ) { push @ok_msgs, "output source $source"; } elsif ( $val == 5 || $val == 6 || $val == 7 ) { push @warn_msgs, "output source $source"; } else { push @unknown_msgs, "output source $source"; } push @perfs, "output_source=$val"; # load $load = $result->{ $variables{ 'upsOutputVoltage' } } * $result->{ $variables{ upsOutputCurrent } } / 10.0; $capacity = $result->{ $variables{ 'upsConfigOutputVA' } }; $percent = $load * 100.0 / $capacity; $msg = sprintf( "load %d / %d VA = %0.1f %%", $load, $capacity, $percent ); $verbose && print $msg, "\n"; if ( $percent > $crit_load ) { push @crit_msgs, $msg; } elsif ( $percent > $warn_load ) { push @warn_msgs, $msg; } else { push @ok_msgs, $msg; } push @perfs, sprintf( "load_pcnt=%0.1f", $percent ); # alarms $val = $result->{ $variables{ 'upsAlarmsPresent' } }; $verbose && print "upsAlarmsPresent $val\n"; push @perfs, "alarms=$val"; if ( $val != 0 ) { push @warn_msgs, "$val Alarms Present"; my %ups_alarm_descrs = ( "$upsmib.1.6.3.1" => 'battery bad', "$upsmib.1.6.3.2" => 'on battery', "$upsmib.1.6.3.3" => 'low battery', "$upsmib.1.6.3.4" => 'depleted battery', "$upsmib.1.6.3.5" => 'temp bad', "$upsmib.1.6.3.6" => 'input bad', "$upsmib.1.6.3.7" => 'ouput bad', "$upsmib.1.6.3.8" => 'output overload', "$upsmib.1.6.3.9" => 'on bypass', "$upsmib.1.6.3.10" => 'bypass bad', "$upsmib.1.6.3.11" => 'output off as requested', "$upsmib.1.6.3.12" => 'UPS off as requested', "$upsmib.1.6.3.13" => 'Charger Failed', "$upsmib.1.6.3.14" => 'UPS ouptut off', "$upsmib.1.6.3.15" => 'UPS system off', "$upsmib.1.6.3.16" => 'fan failure', "$upsmib.1.6.3.17" => 'fuse failure', "$upsmib.1.6.3.18" => 'general fault', "$upsmib.1.6.3.19" => 'diagnostic test failed', "$upsmib.1.6.3.20" => 'communications lost', "$upsmib.1.6.3.21" => 'awaiting power', "$upsmib.1.6.3.22" => 'shutdown pending', "$upsmib.1.6.3.23" => 'shutdown imminent', "$upsmib.1.6.3.24" => 'test in progress', ); my $upsAlarmTable = "$upsmib.1.6.2"; $result = $session->get_table( -baseoid => "$upsAlarmTable" ); if ( ! defined( $result ) ) { push @unknown_msgs, $session->error(); return; } foreach $oid ( sort keys %$result ) { $val = $result->{ $oid }; $verbose && print "$oid $val\n"; next if ( $oid !~ m/\.(\d+)\.(\d+)$/ ); $col = $1; $row = $2; next if ( $col == 1 ); if ( $col == 2 ) { $descr = $ups_alarm_descrs{ $val }; $verbose && print "ups alarms $val $descr\n"; push @warn_msgs, $descr; } elsif ( $col == 3 ) { # ignore time for now } } } # input voltage $val = $result->{ $variables{ 'upsInputVoltage' } }; $verbose && print "upsInputVoltage $val\n"; if ( $val ) { my( $voltage_low_crit, $voltage_low_warn, $voltage_high_warn, $voltage_high_crit, $config_voltage ); $config_voltage = $result->{ $variables{ 'upsConfigInputVoltage' } }; # GXT2-2000RT120 datasheet says 60 to 140 VAC, load dependant # I've seen it throw a bypass bad alarm at 132 (maybe lower?) # GXT3-2000 manual says # Input AC # variable based on output load # 120VAC nominal 208VAC nominal; # 90 - 100% loading 102VAC/140VAC 177VAC/280VAC # 70 - 90% loading 96VAC/140VAC 168VAC/280VAC # 30 - 70% loading 84VAC/140VAC 150VAC/280VAC # 0 - 30% loading 60VAC/140VAC 115VAC/280VAC # Tom Hartly from Diamond Electric says +/- 8% is ok if ( $config_voltage > 190 && $config_voltage < 250 ) { $voltage_low_crit = 208 * 0.84; $voltage_low_warn = 208 * 0.916; # should be 0.92 $voltage_high_warn = 240 * 1.08; $voltage_high_crit = 240 * 1.16; } elsif ( $config_voltage ) { $voltage_low_crit = $config_voltage * 0.84; $voltage_low_warn = $config_voltage * 0.900; # should be 0.92, but then alarms at 109V $voltage_high_warn = $config_voltage * 1.08; $voltage_high_crit = $config_voltage * 1.16; } else { $voltage_low_crit = 120 * 0.84; $voltage_low_warn = 120 * 0.900; # should be 0.92, but then alarms at 109V $voltage_high_warn = 120 * 1.08; $voltage_high_crit = 120 * 1.16; } if ( $val < $voltage_low_crit || $voltage_high_crit < $val ) { push @crit_msgs, "input voltage $val VAC"; } elsif ( $val < $voltage_low_warn || $voltage_high_warn < $val ) { push @warn_msgs, "input voltage $val VAC"; } push @perfs, "inputvoltage=$val"; } # temperatures my %liebert_temp_descrs = ( "$liebert.1.42.3.4.1.1.1" => 'Control', "$liebert.1.42.3.4.1.1.2" => 'Return Air', "$liebert.1.42.3.4.1.1.3" => 'Supply Air', "$liebert.1.42.3.4.1.1.4" => 'Ambient', "$liebert.1.42.3.4.1.1.5" => 'Inverter', "$liebert.1.42.3.4.1.1.6" => 'Battery', "$liebert.1.42.3.4.1.1.7" => 'AC-DC Converter', "$liebert.1.42.3.4.1.1.8" => 'PFC circuitry', "$liebert.1.42.3.4.1.1.9" => 'Transformer', ); my $lgpEnvTemperatureTableDegC = "$liebert.1.42.3.4.1.3.3"; $result = $session->get_table( -baseoid => "$lgpEnvTemperatureTableDegC" ); if ( ! defined( $result ) ) { push @unknown_msgs, $session->error(); return; } foreach $oid ( sort keys %$result ) { $val = $result->{ $oid }; $verbose && print "$oid $val\n"; next if ( $oid !~ m/\.(\d+)\.(\d+)$/ ); $col = $1; $row = $2; next if ( $col == 1 ); if ( $col == 2 ) { $temp_descrs{ $row } = $liebert_temp_descrs{ $val } } elsif ( $col == 3 ) { $temps{ $row } = $val } elsif ( $col == 4 ) { $temp_limits{ $row } = $val } } my $highest = 0; foreach $row ( keys %temps ) { $temp = $temps{ $row }; $warn_limit = $temp_limits{ $row }; $descr = $temp_descrs{ $row }; if ( $warn_limit ) { $crit_limit = $warn_limit * 1.10; } else { $warn_limit = $warn_temp; $crit_limit = $crit_temp; } $verbose && print "temp $descr: $crit_freezing < $warn_freezing < [ $temp C ] < $warn_limit < $crit_limit\n"; if ( $temp > $crit_limit ) { push @crit_msgs, "$descr temperature $temp C over limit $crit_limit C"; } elsif ( $temp > $warn_limit ) { push @warn_msgs, "$descr temperature $temp C near limit $warn_limit C"; } elsif ( $temp < $crit_freezing ) { push @crit_msgs, "$descr temperature $temp C under limit $crit_freezing C"; } elsif ( $temp < $warn_freezing ) { push @warn_msgs, "$descr temperature $temp C under limit $warn_freezing C"; } if ( $temp > $highest ) { $highest = $temp; } } push @ok_msgs, "temp ${highest}C"; push @perfs, "temp=$highest"; }