#!/usr/local/bin/perl
#
# check an Emerson Network Power NetSure DC rectifier 
#
# $Header: /opt/home/doke/work/nagios/RCS/check_emerson_netsure,v 1.3 2012/11/13 19:47:36 doke Exp $


###########################

use vars qw( $warn_load $crit_load );

$warn_load = 80;  # percent
$crit_load = 90;

use strict;
use warnings;
no warnings 'redefine';
no warnings 'uninitialized';
use Getopt::Long;
use Net::SNMP;

use vars qw( $mib2 $enterprises $emerson $verbose $help $host
    $community @crits @warns @unknowns @oks
    @ignores $rc );

$mib2 = '1.3.6.1.2.1';
$enterprises = '.1.3.6.1.4.1';
$emerson = "$enterprises.6302";

$community = 'public';

$verbose = 0;
$help = 0;


#############################


sub usage {
    my( $rc ) = @_;
    warn "Usage: $0 [-v] [-w n] [-c n] -H <host> -C <community>
    -H s    hostname
    -C s    snmp community
    -w n    warn if load is greater than n % [$warn_load] 
    -c n    critical if load is greater than n % [$crit_load] 
    \n";
    exit $rc
    }


Getopt::Long::Configure( "bundling" );
GetOptions(
    'H=s' => \$host,
    'C=s' => \$community,
    'w=i' => \$warn_load,	# %
    'c=i' => \$crit_load,	# %
    'v+' => \$verbose,
    'h' => \$help,
    );
usage( 0 ) if ( $help );

&usage( 1 ) if ( ! $host );
&usage( 1 ) if ( ! $community );
&usage( 1 ) if ( $warn_load > $crit_load );
&usage( 1 ) if ( $crit_load > 100 );

# Just in case of problems, let's not hang Nagios
$SIG{'ALRM'} = sub {
    print ("ERROR: No response from snmp server (alarm)\n");
    exit 3;
    };
alarm( 10 );

&check_netsure( $host, $community );


$rc = 0;   # nagios ok exit code
$" = ", ";
if ( scalar( @crits ) ) {
    print "CRITICAL: @crits";
    $rc = 2;
    }
if ( scalar( @warns ) ) {
    print "; " if ( $rc != 0 );
    print "Warning: @warns";
    $rc = 1 if ( $rc == 0 );
    }
if ( scalar( @unknowns ) ) {
    print "; " if ( $rc != 0 );
    print "Unknown: @unknowns";
    $rc = 3 if ( $rc == 0 );
    }
elsif ( $rc == 0 || $verbose ) {
    print "OK @oks";
    if ( scalar( @ignores ) ) { 
	print "; Ignoring: @ignores";
	}
    }
print "\n";
exit $rc;



##################################

sub check_netsure {
    my( $host, $community ) = @_;
    my( $session, $error, %variables, @oids, $result, $var, $val, $manuf,
	$model, $fw, $name, $load, $capacity, $percent, $oid, @alarms, $n,
	$row, $col, $octets, @date, $datetime, $systemStatus, $ninputs );

    $verbose && print "opening session\n";
    ( $session, $error ) = Net::SNMP->session(
	-version => 'snmpv1',
	-hostname => $host,
	-community => $community,
	-timeout => 5,
	-translate => [ 
	    -timeticks => 0x0,     # Turn off so sysUpTime is numeric
	    -octetstring => 0x0,     # Turn off so DateAndTime is octets
	    ],  
	-retries => 3,
	#-debug => 0x02
	);
    if ( ! defined( $session ) ) {
	print "snmp error: $error\n";
	exit 3;
	}

    %variables = ( 
	#"identManufacturer" 		=> "$emerson.2.1.1.1.0",
	"identModel" 			=> "$emerson.2.1.1.2.0",
	"identControllerFirmwareVersion"  => "$emerson.2.1.1.3.0",
	"identName" 			=> "$emerson.2.1.1.4.0",

	"systemStatus" 			=> "$emerson.2.1.2.1.0",
	"systemVoltage" 		=> "$emerson.2.1.2.2.0",
	"systemCurrent" 		=> "$emerson.2.1.2.3.0",
	"systemUsedCapacity" 		=> "$emerson.2.1.2.4.0",

	"psInputLineAVoltage" 		=> "$emerson.2.1.2.6.1.0",
	"psInputLineBVoltage" 		=> "$emerson.2.1.2.6.2.0",
	"psInputLineCVoltage" 		=> "$emerson.2.1.2.6.3.0",
	#"psTemperature1" 		=> "$emerson.2.1.2.7.1.0",  
	#"psTemperature2" 		=> "$emerson.2.1.2.7.2.0",  
	"psStatusCommunication" 	=> "$emerson.2.1.2.8.0",  
	"psStatusBatteryMode" 		=> "$emerson.2.1.2.9.0",  

	#"alarmLastTrapNo" 		=> "$emerson.2.1.3.0",   
	);

    @oids = sort values %variables;

    $verbose && print "doing initial get\n";
    $result = $session->get_request( -varbindlist => \@oids );
    if ( ! defined( $result ) ) {
	push @unknowns, $session->error();
	return;
	}
    if ( $verbose ) { 
	print "raw results:\n";
	foreach $var ( sort keys %variables ) { 
	    $val = $result->{ $variables{ $var } };
	    print "    $var $val\n"; 
	    }
	}


    #$manuf = $result->{ $variables{ 'identManufacturer' } };
    $model = $result->{ $variables{ 'identModel' } };
    $model =~ s/\s+$//;
    $fw = $result->{ $variables{ 'identControllerFirmwareVersion' } };
    $name = $result->{ $variables{ 'identName' } };

    push @oks, "$name $model $fw";

    # system status
    # This integer value represents the operational or administrative
    # status of the system. Also used as alarm severity.
    # Depending on situation there may be limits on allowed values.
    # Operational values:
    # (1) unknown - status has not yet been defined
    # (2) normal - there are no activated alarms
    # (3) warning - OA, lowest level of 'abnormal' status
    # (4) minor - A3
    # (5) major - MA
    # (6) critical - CA, highest level of 'abnormal' status
    # Administrative values:
    # (7) unmanaged
    # (8) restricted
    # (9) testing
    # (10) disabled"

    $val = $result->{ $variables{ 'systemStatus' } };
    my %statuses = (
	0 => 'unknown',
	1 => 'unknown',
	2 => 'normal',
	3 => 'warning',
	4 => 'minor',
	5 => 'major',
	6 => 'critical',
	7 => 'unmanaged',
	8 => 'restricted',
	9 => 'testing',
	10 => 'disabled',
	11 => 'unknown',
	);
    my $status = $statuses{ $val };
    $verbose && print "system status $val $status\n";
    if ( $val == 2 ) {
	push @oks, "system $status";
	}
    elsif ( $val == 3 || $val == 4 ) {
	push @warns, "system $status";
	}
    elsif ( $val == 5 || $val == 6 ) {
	push @crits, "system $status";
	}
    else {
	push @unknowns, "system $status";
	}
    $systemStatus = $val;

    # systemVoltage
    $val = $result->{ $variables{ 'systemVoltage' } };
    push @oks, sprintf( "%0.2f VDC", $val / 1000 );

    # systemCurrent
    $val = $result->{ $variables{ 'systemCurrent' } };
    push @oks, sprintf( "%0.2f A", $val / 1000 );


    # systemUsedCapacity 		
    $val = $result->{ $variables{ 'systemUsedCapacity' } };
    if ( $val > $crit_load ) { 
	push @crits, sprintf( "%d %%", $val );
	}
    elsif ( $val > $warn_load ) { 
	push @warns, sprintf( "%d %%", $val );
	}
    else { 
	push @oks, sprintf( "%d %%", $val );
	}


    # psInputLineAVoltage
    # seems to be three for 3 phase systems.  On ours they're all the same.
    $ninputs = 0;
    foreach $var ( 
	    "psInputLineAVoltage", 		
	    "psInputLineBVoltage",
	    "psInputLineCVoltage" ) { 
	$val = $result->{ $variables{ $var } };
	if ( $val == 0 ) { 
	    # This input doesn't exist, or is off.  Depends on how it's wired.
	    # ignore it?
	    }
	elsif ( $val < 208 * 0.88 * 1000 || $val > 208 * 1.12 * 1000 ) { 
	    push @crits, sprintf( "%0.1f VAC", $val / 1000 );
	    }
	# Tom Hartley says 8% is a good threshold
	elsif ( $val < 208 * 0.92 * 1000 || $val > 208 * 1.08 * 1000 ) { 
	    push @warns, sprintf( "%0.1f VAC", $val / 1000 );
	    }
	else { 
	    push @oks, sprintf( "%0.1f VAC", $val / 1000 );
	    $ninputs++;
	    }
	}
    if ( $ninputs < 2 ) { 
	push @warns, "running on only $ninputs ac inputs";
	}


    # psTemperature1
    # psTemperature2
    # I don't have the SCU+ optional temperature sensors to see how this
    # would look.
    
    # The SCU+ knows each rectifier's temperature, 
    # and shows it in the web interface,
    # but doesn't have it in the mib 


    # psStatusCommunication
    $val = $result->{ $variables{ 'psStatusCommunication' } };
    my %ps_comm_statuses = (
	0 => 'unknown',
	1 => 'unknown',
	2 => 'normal',
	3 => 'interrupt',
	4 => 'unknown',
	);
    $status = $ps_comm_statuses{ $val };
    $verbose && print "ps comm status $val $status\n";
    if ( $val == 2 ) {
	push @oks, "ps comm $status";
	}
    elsif ( $val == 3 ) {
	push @crits, "ps comm $status";
	}
    else {
	push @unknowns, "system $status";
	}


    # psStatusBatteryMode
    $val = $result->{ $variables{ 'psStatusBatteryMode' } };
    my %psStatusBatteryModes = (
	0 => 'unknown',
	1 => 'unknown',
	2 => 'FloatCharging',
	3 => 'ShortTest',
	4 => 'BoostChargingForTest',
	5 => 'ManualTesting',
	6 => 'PlanTesting',
	7 => 'ACFailTesting',
	8 => 'ACFail',
	9 => 'ManualBoostCharging',
	10 => 'AutoBoostCharging',
	11 => 'CyclicBoostCharging',
	12 => 'MasterBoostCharging',
	13 => 'MasterBateryTesting',
	14 => 'unknown',
	);
    $status = $psStatusBatteryModes{ $val };
    $verbose && print "battery $val $status\n";
    if ( $val == 2 ) {
	push @oks, "battery $status";
	}
    elsif ( $val < 2 || $val > 13 ) {
	push @unknowns, "battery status $status";
	}
    elsif ( $val == 7 || $val == 8 ) {
	push @crits, "battery $status";
	}
    else {
	push @warns, "battery $status";
	}



    # alarms
    # reading the alarm table will time out if there aren't any
    if ( $systemStatus != 2 ) { 

	my $alarmTrapTable	= "$emerson.2.1.4";
	# 1 alarmTrapNo Counter32,
	# 2 alarmTime DateAndTime,
	# 3 alarmStatusChange StatusChange,
	# 4 alarmSeverity Status,
	# 5 alarmDescription DisplayString,
	# 6 alarmType Integer32 
	$result = $session->get_table( -baseoid => "$alarmTrapTable" );
	if ( ! defined( $result ) ) {
	    push @unknowns, $session->error();
	    return;
	    }
	$n = 0;
	foreach $oid ( sort keys %$result ) {
	    $val = $result->{ $oid };
	    $verbose && print "$oid $val\n";
	    next if ( $oid !~ m/\.(\d+)\.(\d+)$/ );
	    $col = $1;
	    $row = $2;
	    $alarms[$row][$col] = $val;
	    $n++ if ( $col == 1 );
	    }
	if ( $n > 0 ) { 
	    push @crits, "$n alarms";
	    foreach $row ( 0 .. $#alarms ) { 
		if ( defined $alarms[ $row ][ 1 ] ) { 
		    if ( 0 ) { 
			# forget this, it doesn't have ntp, so the times are always wrong
			$octets = $alarms[$row][2];
			$verbose && print "octets $octets\n";
			# from rhesa at http://www.perlmonks.org/?node_id=612467
			@date = unpack 'n C6 a C2', $octets;
			$verbose && print "date @date\n";
			$datetime = sprintf "%04d-%02d-%02d %02d:%02d:%02d", @date; # no time +zone 
			#$datetime = sprintf "%04d-%02d-%02d %02d:%02d:%02d.%d%s%02d:%02d", @date; # with tz info
			push @crits, "$datetime $alarms[$row][5]";
			}
		    else { 
			push @crits, $alarms[$row][5];
			}
		    }
		}
	    }
	}

    }



