#!/usr/local/bin/perl
#
# check a Liebert ups
#
# $Header: /home/doke/work/nagios/RCS/check_liebert_ups,v 1.27 2017/05/12 20:29:02 doke Exp $

use vars qw( $warn_runtime $crit_runtime $warn_load $crit_load $warn_temp
    $crit_temp $warn_freezing $crit_freezing );

$warn_runtime = 15;
$crit_runtime = 10;   # only applies if on battery

$warn_load = 70;
$crit_load = 90;

# from GXT3 user manual 
# www.emersonnetworkpower.com/en-US/Products/ACPower/RackmountUPS/Documents/SL-23180.pdf
# Operating Temperature, F (C) 32 to 104 (0 to 40) 
# However, that's ambient, and the sensor is deep inside the device where it's warmer.
$warn_temp = 47;   # C
$crit_temp = 50;   # C

$warn_freezing = 0;   # C
$crit_freezing = -3;   # C

###########################

use strict;
use warnings;
no warnings 'redefine';
no warnings 'uninitialized';
use Getopt::Long;
use Net::SNMP;

use vars qw( $mib2 $enterprises $upsmib $liebert $verbose $help $host
    $community @crit_msgs @warn_msgs @unknown_msgs @ok_msgs
    @ignores @perfs $rc );

$mib2 = '1.3.6.1.2.1';
$enterprises = '.1.3.6.1.4.1';
$upsmib = "$mib2.33";
$liebert = "$enterprises.476";

$verbose = 0;
$help = 0;


#############################


sub usage {
    my( $rc ) = @_;
    warn "Usage: $0 [-v] [-w n] [-c n] -H <host> -C <community>
    -H s    hostname
    -C s    snmp community
    -w n    warn if remaining runtime is less than n minutes [$warn_runtime]
    -c n    critical if remaining runtime is less than n minutes [$crit_runtime]
    -l n    warn if load is greater than n % [$warn_load]
    -L n    critical if load is greater than n % [$crit_load]
    -t n    warn if exceed this temperature [$warn_temp]
    -T n    critical if exceed this temperature [$crit_temp] (must be > warn)
    -u n    warn if below this temperature [$warn_freezing]
    -U n    critical if below this temperature [$crit_freezing]
    \n";
    exit $rc
    }


Getopt::Long::Configure( "bundling" );
GetOptions(
    'H=s' => \$host,
    'C=s' => \$community,
    'w=i' => \$warn_runtime,
    'c=i' => \$crit_runtime,
    'l=i' => \$warn_load,	# %
    'L=i' => \$crit_load,	# %
    't=i' => \$warn_temp,     # in C
    'T=i' => \$crit_temp,     # in C
    'u=i' => \$warn_freezing,     # in C
    'U=i' => \$crit_freezing,     # in C
    'v+' => \$verbose,
    'h' => \$help,
    );
usage( 0 ) if ( $help );

&usage( 1 ) if ( ! $host );
&usage( 1 ) if ( ! $community );
&usage( 1 ) if ( $warn_temp > $crit_temp );

# Just in case of problems, let's not hang Nagios
$SIG{'ALRM'} = sub {
        print ("ERROR: No response from snmp server (alarm)\n");
        exit 3;
    };
alarm( 10 );

&check_ups( $host, $community );


$rc = 0;   # nagios ok exit code
$" = ", ";
if ( scalar( @crit_msgs ) ) {
    print "CRITICAL: @crit_msgs";
    $rc = 2;
    }
if ( scalar( @warn_msgs ) ) {
    print "; " if ( $rc != 0 );
    print "Warning: @warn_msgs";
    $rc = 1 if ( $rc == 0 );
    }
if ( scalar( @unknown_msgs ) ) {
    print "; " if ( $rc != 0 );
    print "Unknown: @unknown_msgs";
    $rc = 3 if ( $rc == 0 );
    }
elsif ( $rc == 0 ) {
    print "OK @ok_msgs";
    if ( scalar( @ignores ) ) {
	print "; Ignoring: @ignores";
	}
    }
print " | @perfs";
print "\n";
exit $rc;



##################################

sub check_ups {
    my( $host, $community ) = @_;
    my( $session, $error, %variables, @oids, $result, $var, $val, $manuf,
	$model, $name, $load, $capacity, $percent, $oid, %temps,
	%temp_limits, %temp_descrs, $row, $col, $temp, $warn_limit,
	$crit_limit, $descr, $msg, $secs_on_bat );

    $verbose && print "opening session\n";
    ( $session, $error ) = Net::SNMP->session(
	-version => 'snmpv1',
	-hostname => $host,
	-community => $community,
	-timeout => 5,
	-translate => [ -timeticks => 0x0 ],  # Turn off so sysUpTime is numeric
	-retries => 3,
	#-debug => 0x02
	);
    if ( ! defined( $session ) ) {
	print "snmp error: $error\n";
	exit 3;
	}

    %variables = (
	#"upsIdentManufacturer" 	=> "$upsmib.1.1.1.0",
	"upsIdentModel" 		=> "$upsmib.1.1.2.0",
	"upsIdentName" 			=> "$upsmib.1.1.5.0",

	"upsBatteryStatus" 		=> "$upsmib.1.2.1.0",
	"upsSecondsOnBattery" 		=> "$upsmib.1.2.2.0",
	"upsEstimatedMinutesRemaining" 	=> "$upsmib.1.2.3.0",
	"upsEstimatedChargeRemaining" 	=> "$upsmib.1.2.4.0",
	"upsBatteryVoltage" 		=> "$upsmib.1.2.5.0",

	#"upsInputFrequency" 		=> "$upsmib.1.3.3.1.2.1",
	"upsInputVoltage" 		=> "$upsmib.1.3.3.1.3.1",
	#"upsInputCurrent" 		=> "$upsmib.1.3.3.1.4.1",  # PSi has this, GXT2 doesn't

	"upsOutputSource" 		=> "$upsmib.1.4.1.0",
	#"upsOutputFrequency" 		=> "$upsmib.1.4.2.0",
	"upsOutputVoltage" 		=> "$upsmib.1.4.4.1.2.1",
	"upsOutputCurrent" 		=> "$upsmib.1.4.4.1.3.1",

	"upsAlarmsPresent" 		=> "$upsmib.1.6.1.0",

	"upsConfigInputVoltage" 	=> "$upsmib.1.9.1.0",
	"upsConfigOutputVA" 		=> "$upsmib.1.9.5.0",

	);

    @oids = sort values %variables;

    $verbose && print "doing initial get\n";
    $result = $session->get_request( -varbindlist => \@oids );
    if ( ! defined( $result ) ) {
	push @unknown_msgs, $session->error();
	return;
	}
    if ( $verbose ) {
	print "raw results:\n";
	foreach $var ( sort keys %variables ) {
	    $val = $result->{ $variables{ $var } };
	    print "    $var $val\n";
	    }
	}


    #$manuf = $result->{ $variables{ 'upsIdentManufacturer' } };
    $model = $result->{ $variables{ 'upsIdentModel' } };
    $model =~ s/\s+$//;
    $name = $result->{ $variables{ 'upsIdentName' } };

    $verbose && print "model $model\n";
    $verbose && print "name $name\n";
    push @ok_msgs, "$model $name";

    my $model2 = $model;
    $model2 =~ s/[^\w\d\.\/-]//ig;
    push @perfs, "model=$model2";


    # battery status
    $val = $result->{ $variables{ 'upsBatteryStatus' } };
    my %upsBatteryStatuses = (
	1 => 'unknown',
	2 => 'Normal',
	3 => 'Low',
	4 => 'Depleted',
	);
    my $status = $upsBatteryStatuses{ $val };
    $verbose && print "upsBatteryStatus $val $status\n";
    if ( $val == 2 ) {
	push @ok_msgs, "battery $status";
	}
    elsif ( $val == 3 ) {
	push @warn_msgs, "battery $status";
	}
    elsif ( $val == 4 ) {
	push @crit_msgs, "battery $status";
	}
    else {
	push @unknown_msgs, "battery status $status";
	}
    push @perfs, "battery_status=$val";

    $secs_on_bat = $result->{ $variables{ 'upsSecondsOnBattery' } };
    $verbose && print "upsSecondsOnBattery $secs_on_bat\n";
    if ( $secs_on_bat != 0 ) {
	push @warn_msgs, "on battery for $secs_on_bat secs";
	}
    push @perfs, "seconds_on_battery=$secs_on_bat";


    # time remaining
    $val = $result->{ $variables{ 'upsEstimatedMinutesRemaining' } };
    $verbose && print "upsEstimatedMinutesRemaining $val\n";
    if ( $val < ( $crit_runtime ) || $#crit_msgs >= 0 ) {
	push @crit_msgs, "battery time remaining $val min";
	}
    elsif ( $val < ( $warn_runtime ) || $#warn_msgs >= 0 ) {
	push @warn_msgs, "battery time remaining $val min";
	}
    else {
	push @ok_msgs, "battery time remaining $val min";
	}
    push @perfs, "time_remaining=$val";

    # charge remaining in percent
    $val = $result->{ $variables{ 'upsEstimatedChargeRemaining' } };
    $verbose && print "upsEstimatedChargeRemaining $val\n";
    if ( $val < 75 && $secs_on_bat > 0 ) {
	push @crit_msgs, "estimated charge $val %";
	}
    elsif ( $val < 85 && $secs_on_bat > 0 ) {
	push @warn_msgs, "estimated charge $val %";
	}
    elsif ( $val < 40 ) {
	push @warn_msgs, "estimated charge $val %";
	}
    else {
	push @ok_msgs, "estimated charge $val %";
	}
    push @perfs, "charge_remaining=$val";


    # output source
    $val = $result->{ $variables{ 'upsOutputSource' } };
    my %upsOutputSources = (
	1 => 'other',
	2 => 'none',
	3 => 'normal',
	4 => 'bypass',
	5 => 'battery',
	6 => 'booster',
	7 => 'reducer',
	);
    my $source = $upsOutputSources{ $val };
    $verbose && print "upsOutputSource $val $source\n";
    if ( $val == 3 || $val == 4 ) {
	push @ok_msgs, "output source $source";
	}
    elsif ( $val == 5 || $val == 6 || $val == 7 ) {
	push @warn_msgs, "output source $source";
	}
    else {
	push @unknown_msgs, "output source $source";
	}
    push @perfs, "output_source=$val";



    # load
    $load = $result->{ $variables{ 'upsOutputVoltage' } }
	* $result->{ $variables{ upsOutputCurrent } }
	/ 10.0;
    $capacity = $result->{ $variables{ 'upsConfigOutputVA' } };
    $percent = $load * 100.0 / $capacity;
    $msg = sprintf( "load %d / %d VA = %0.1f %%", $load, $capacity, $percent );
    $verbose && print $msg, "\n";
    if ( $percent > $crit_load ) {
	push @crit_msgs, $msg;
	}
    elsif ( $percent > $warn_load ) {
	push @warn_msgs, $msg;
	}
    else {
	push @ok_msgs, $msg;
	}
    push @perfs, sprintf( "load_pcnt=%0.1f", $percent );


    # alarms
    $val = $result->{ $variables{ 'upsAlarmsPresent' } };
    $verbose && print "upsAlarmsPresent $val\n";
    push @perfs, "alarms=$val";
    if ( $val != 0 ) {
	push @warn_msgs, "$val Alarms Present";

	my %ups_alarm_descrs = (
	    "$upsmib.1.6.3.1" => 'battery bad',
	    "$upsmib.1.6.3.2" => 'on battery',
	    "$upsmib.1.6.3.3" => 'low battery',
	    "$upsmib.1.6.3.4" => 'depleted battery',
	    "$upsmib.1.6.3.5" => 'temp bad',
	    "$upsmib.1.6.3.6" => 'input bad',
	    "$upsmib.1.6.3.7" => 'ouput bad',
	    "$upsmib.1.6.3.8" => 'output overload',
	    "$upsmib.1.6.3.9" => 'on bypass',
	    "$upsmib.1.6.3.10" => 'bypass bad',
	    "$upsmib.1.6.3.11" => 'output off as requested',
	    "$upsmib.1.6.3.12" => 'UPS off as requested',
	    "$upsmib.1.6.3.13" => 'Charger Failed',
	    "$upsmib.1.6.3.14" => 'UPS ouptut off',
	    "$upsmib.1.6.3.15" => 'UPS system off',
	    "$upsmib.1.6.3.16" => 'fan failure',
	    "$upsmib.1.6.3.17" => 'fuse failure',
	    "$upsmib.1.6.3.18" => 'general fault',
	    "$upsmib.1.6.3.19" => 'diagnostic test failed',
	    "$upsmib.1.6.3.20" => 'communications lost',
	    "$upsmib.1.6.3.21" => 'awaiting power',
	    "$upsmib.1.6.3.22" => 'shutdown pending',
	    "$upsmib.1.6.3.23" => 'shutdown imminent',
	    "$upsmib.1.6.3.24" => 'test in progress',
	    );

	my $upsAlarmTable	= "$upsmib.1.6.2";
	$result = $session->get_table( -baseoid => "$upsAlarmTable" );
	if ( ! defined( $result ) ) {
	    push @unknown_msgs, $session->error();
	    return;
	    }
	foreach $oid ( sort keys %$result ) {
	    $val = $result->{ $oid };
	    $verbose && print "$oid $val\n";
	    next if ( $oid !~ m/\.(\d+)\.(\d+)$/ );
	    $col = $1;
	    $row = $2;
	    next if ( $col == 1 );
	    if ( $col == 2 ) {
		$descr = $ups_alarm_descrs{ $val };
		$verbose && print "ups alarms $val $descr\n";
		push @warn_msgs, $descr;
		}
	    elsif ( $col == 3 ) {
		# ignore time for now
		}
	    }
	}

    # input voltage
    $val = $result->{ $variables{ 'upsInputVoltage' } };
    $verbose && print "upsInputVoltage $val\n";
    if ( $val ) {
	my( $voltage_low_crit, $voltage_low_warn, $voltage_high_warn,
	    $voltage_high_crit, $config_voltage );

	$config_voltage = $result->{ $variables{ 'upsConfigInputVoltage' } };

	# GXT2-2000RT120 datasheet says 60 to 140 VAC, load dependant
	# I've seen it throw a bypass bad alarm at 132 (maybe lower?)

	# GXT3-2000 manual says
	# Input AC
	# variable based on output load
	#                    120VAC nominal     208VAC nominal; 
	#  90 - 100% loading  102VAC/140VAC  	177VAC/280VAC
	#  70 - 90% loading    96VAC/140VAC 	168VAC/280VAC
	#  30 - 70% loading    84VAC/140VAC 	150VAC/280VAC
	#  0 - 30% loading     60VAC/140VAC 	115VAC/280VAC

	# Tom Hartly from Diamond Electric says +/- 8% is ok 

	if ( $config_voltage > 190 && $config_voltage < 250 ) { 
	    $voltage_low_crit = 208 * 0.84;
	    $voltage_low_warn = 208 * 0.916;  # should be 0.92
	    $voltage_high_warn = 240 * 1.08;
	    $voltage_high_crit = 240 * 1.16;
	    }
	elsif ( $config_voltage ) {
	    $voltage_low_crit = $config_voltage * 0.84;
	    $voltage_low_warn = $config_voltage * 0.900;   # should be 0.92, but then alarms at 109V
	    $voltage_high_warn = $config_voltage * 1.08;
	    $voltage_high_crit = $config_voltage * 1.16;
	    }
	else {
	    $voltage_low_crit = 120 * 0.84;
	    $voltage_low_warn = 120 * 0.900;   # should be 0.92, but then alarms at 109V
	    $voltage_high_warn = 120 * 1.08;
	    $voltage_high_crit = 120 * 1.16;
	    }
	if ( $val < $voltage_low_crit || $voltage_high_crit < $val ) {
	    push @crit_msgs, "input voltage $val VAC";
	    }
	elsif ( $val < $voltage_low_warn || $voltage_high_warn < $val ) {
	    push @warn_msgs, "input voltage $val VAC";
	    }
	push @perfs, "inputvoltage=$val";
	}


    # temperatures
    my %liebert_temp_descrs = (
	"$liebert.1.42.3.4.1.1.1" => 'Control',
	"$liebert.1.42.3.4.1.1.2" => 'Return Air',
	"$liebert.1.42.3.4.1.1.3" => 'Supply Air',
	"$liebert.1.42.3.4.1.1.4" => 'Ambient',
	"$liebert.1.42.3.4.1.1.5" => 'Inverter',
	"$liebert.1.42.3.4.1.1.6" => 'Battery',
	"$liebert.1.42.3.4.1.1.7" => 'AC-DC Converter',
	"$liebert.1.42.3.4.1.1.8" => 'PFC circuitry',
	"$liebert.1.42.3.4.1.1.9" => 'Transformer',
	);

    my $lgpEnvTemperatureTableDegC	= "$liebert.1.42.3.4.1.3.3";
    $result = $session->get_table( -baseoid => "$lgpEnvTemperatureTableDegC" );
    if ( ! defined( $result ) ) {
	push @unknown_msgs, $session->error();
	return;
	}
    foreach $oid ( sort keys %$result ) {
        $val = $result->{ $oid };
        $verbose && print "$oid $val\n";
        next if ( $oid !~ m/\.(\d+)\.(\d+)$/ );
        $col = $1;
        $row = $2;
	next if ( $col == 1 );
	if ( $col == 2 ) {
	    $temp_descrs{ $row } = $liebert_temp_descrs{ $val }
	    }
	elsif ( $col == 3 ) {
	    $temps{ $row } = $val
	    }
	elsif ( $col == 4 ) {
	    $temp_limits{ $row } = $val
	    }
	}

    my $highest = 0;
    foreach $row ( keys %temps ) {
	$temp = $temps{ $row };
	$warn_limit = $temp_limits{ $row };
	$descr = $temp_descrs{ $row };
	if ( $warn_limit ) {
	    $crit_limit = $warn_limit * 1.10;
	    }
	else {
	    $warn_limit = $warn_temp;
	    $crit_limit = $crit_temp;
	    }

	$verbose && print "temp $descr: $crit_freezing < $warn_freezing < [ $temp C ] < $warn_limit < $crit_limit\n";
	if ( $temp > $crit_limit ) {
	    push @crit_msgs, "$descr temperature $temp C over limit $crit_limit C";
	    }
	elsif ( $temp > $warn_limit ) {
	    push @warn_msgs, "$descr temperature $temp C near limit $warn_limit C";
	    }
	elsif ( $temp < $crit_freezing ) {
	    push @crit_msgs, "$descr temperature $temp C under limit $crit_freezing C";
	    }
	elsif ( $temp < $warn_freezing ) {
	    push @warn_msgs, "$descr temperature $temp C under limit $warn_freezing C";
	    }
	if ( $temp > $highest ) {
	    $highest = $temp;
	    }
	}
    push @ok_msgs, "temp ${highest}C";
    push @perfs, "temp=$highest";


    }