#!/usr/local/bin/perl
# 
# check a sun 3510 disk array
# 

my $sccli = "/opt/sfw/bin/sudo /usr/sbin/sccli";

my $crit_period = 7 * 86400;  # seconds til battery expiration
my $warn_period = 30 * 86400;  # seconds til battery expiration

###########################

use strict;
use warnings;
use Getopt::Long;
use POSIX;

my $verbose = 0;
my $help = 0;

my %months = ( 
    'Jan' => 0,
    'Feb' => 1,
    'Mar' => 2,
    'Apr' => 3,
    'May' => 4,
    'Jun' => 5,
    'Jul' => 6,
    'Aug' => 7,
    'Sep' => 8,
    'Oct' => 9,
    'Nov' => 10,
    'Dec' => 11,
    );

my( $wwn, @crit_msgs, @warn_msgs, @unknown_msgs, @ok_msgs, $rc );

#############################

sub usage { 
    my( $rc ) = @_;
    print "Usage: $0 [-vh] <wwn>\n";
    exit $rc;
    }

&Getopt::Long::config('bundling');
GetOptions( 
    'v+' => \$verbose,
    'h' => \$help
    );
&usage( 0 ) if ( $help );

&usage( 1 ) if ( $#ARGV < 0 );

foreach $wwn ( @ARGV ) { 
    &check_3510( $wwn );
    }


$rc = 0;   # nagios ok exit code
$" = ", ";
if ( scalar( @crit_msgs ) ) {
    print "CRITICAL: @crit_msgs ";
    $rc = 2;
    }
if ( scalar( @warn_msgs ) ) {
    print "; " if ( $rc != 0 );
    print "Warning: @warn_msgs ";
    $rc = 1 if ( $rc == 0 );
    }
if ( scalar( @unknown_msgs ) ) {
    print "; " if ( $rc != 0 );
    print "Unknown: @unknown_msgs ";
    $rc = -1 if ( $rc == 0 );
    }
elsif ( $rc == 0 ) {
    print "OK @ok_msgs";
    }
print "\n";
exit $rc;

##################################


sub check_3510 { 
    my( $wwn ) = @_;
    my( $dev );

    $dev = &find_dev( $wwn );
    return if ( ! $dev );
    $verbose && print "dev $dev\n";

    push @ok_msgs, $wwn;

    &show_disks( $dev );
    &show_logical_drives( $dev );
    &show_enclosure_status( $dev );
    &show_redundancy_mode( $dev );
    &show_peripheral_device_status( $dev );
    &show_battery_status( $dev );
    }



sub find_dev { 
    my( $wwn ) = @_;
    my( $cmd );

    $cmd = "$sccli --list 2>&1 |"; 
    $verbose && print "cmd $cmd\n";
    if ( ! open( pH, $cmd ) ) { 
	push @unknown_msgs, "can't run sccli --list";
	return;
	}
    while ( <pH> ) { 
        $verbose && print ">$_";
	# /dev/rdsk/c4t600C0FF0000000000984FC38B143E700d0s2
	if ( m!^(/dev/rdsk/c\d+ t[\da-f]{16} $wwn [\da-f]{8} 
		\d\d d\d+) \s!ix ) {
	    close pH;
	    return $1;
	    }
	elsif ( m!^(/dev/rdsk/c\d+ t[\da-f]{16} $wwn [\da-f]{8} 
		\d\d d\d+ s\d) \s!ix ) {
	    close pH;
	    return $1;
	    }
	elsif ( m!^(/dev/rdsk/c\d+ t\d+ d\d+ s\d+) \s+ Sun \s+ StorEdge 
		\s+ 3510 \s+ SN# $wwn!ix ) {
	    close pH;
	    return $1;
	    }
	elsif ( m!is not in the sudoers file|must be setuid root!i ) { 
	    push @unknown_msgs, $_;
	    return undef;
	    }
	}
    close pH;
    push @unknown_msgs, "can't find dev for $wwn";
    return undef;
    }




# show disks
#sccli: selected device /dev/rdsk/c4t600C0FF0000000000984FC38B143E700d0s2 [SUN StorEdge 3510 SN#0984FC]
#Ch     Id      Size   Speed  LD     Status     IDs                      Rev  
#----------------------------------------------------------------------------
# 2(3)   0  279.40GB   200MB  ld0    ONLINE   M SEAGATE ST330000FSUN300G 055A 
#                                                   S/N 3450WWYB        
#                                                  WWNN 20000014C316C27F

# 2(3)   3       N/A   N/A    NONE   BAD        FUJITSU MAW3147FCSUN146G 1303 
#                                                   S/N 000707C0B7K4    
#                                                  WWNN 500000E01484EEC0

# 2     38       N/A   N/A    NONE   ABSENT     FUJITSU MAT3147F SUN146G 0602 
#                                                   S/N 000534C057G0    



sub show_disks { 
    my( $dev ) = @_;
    my( $cmd, $nonline, $nstandby );

    $nonline = $nstandby = 0;
    $cmd = "$sccli $dev show disks 2>&1 |"; 
    $verbose && print "cmd $cmd\n";
    if ( ! open( pH, $cmd ) ) { 
	push @unknown_msgs, "can't run sccli show disks";
	return;
	}
    while ( <pH> ) { 
	$verbose && print;
	if ( m/^\s* \d(\(\d\))? \s+ (\d+) \s+ ([\d\.]+GB|N\/A) \s+ (\d+MB|N\/A) 
		\s+ (ld\d+|GLOBAL|NONE) \s+ (\S+) /ix ) { 
	    if ( $6 eq "ONLINE" ) { 
		# ok
		$nonline++;
		}
	    elsif ( $5 eq "GLOBAL" && $6 eq "STAND-BY" ) { 
		# ok, it's a hot spare
		$nstandby++;
		}
	    else { 
		push @crit_msgs, "disk $2 $6";
		}
	    }
	elsif ( m/^sccli: selected/ || m/^Ch\s+Id/ || m/^-----------/ 
		|| m!^\s+S/N [\w]+!i || m/^\s+WWNN [\w]+/i )  { 
	    # ignore
	    }
	else { 
	    push @unknown_msgs, "unknown line in disks $_";
	    }
	}
    close pH;

    if ( $nonline > 0 ) { 
	push @ok_msgs, "$nonline disks online, $nstandby standby";
	}
    else { 
	push @crit_msgs, "no disks online";
	}
    }




#sccli> show logical-drives
#LD    LD-ID        Size  Assigned  Type   Disks Spare  Failed Status     
#------------------------------------------------------------------------
#ld0   38B143E7   1.64TB  Primary   RAID1  12    2      0      Good  
#                         Write-Policy Default          StripeSize 128KB
#ld1   68F3DB7D   1.36TB  Secondary RAID1  10    2      0      Good  
#                         Write-Policy Default          StripeSize 128KB


sub show_logical_drives { 
    my( $dev ) = @_;
    my( $cmd, $ngood );

    $ngood = 0;
    $cmd = "$sccli $dev show logical-drives 2>&1 |"; 
    $verbose && print "cmd $cmd\n";
    if ( ! open( pH, $cmd ) ) { 
	push @unknown_msgs, "can't run sccli show logical-drives";
	return;
	}
    while ( <pH> ) { 
	$verbose && print;
	if ( m/^ (ld\d+) \s+ ([\da-f]{8}) \s+ [\d\.]+[MGT]B \s+ \w+ \s+ 
		RAID\d+ \s+ \d+ \s+ \d+ \s+ (\d) \s+ (\S+) /ix ) { 
	    if ( $3 != 0 ) { 
		push @crit_msgs, "$1 has $3 failed disks";
		}
	    elsif ( $4 ne "Good" ) { 
		push @crit_msgs, "$1 status is $4";
		}
	    else { 
		# ok
		$ngood++;
		}
	    }
	elsif ( m/^sccli: selected/ || m/^LD\s+LD-ID/ || m/^--------/ 
		|| m/^\s+Write-Policy/i )  { 
	    # ignore
	    }
	}
    close pH;

    if ( $ngood > 0 ) { 
	push @ok_msgs, "$ngood logical-drives";
	}
    else { 
	push @crit_msgs, "no logical-drives";
	}
    }






#Ch  Id Chassis Vendor/Product ID    Rev  PLD  WWNN             WWPN
#-------------------------------------------------------------------------------
# 2  12 0984FC  SUN StorEdge 3510F A 1046 1000 204000C0FF0984FC 214000C0FF0984FC
#                                      Topology: loop(a)  Status:      OK
# 3  12 0984FC  SUN StorEdge 3510F A 1046 1000 204000C0FF0984FC 224000C0FF0984FC
#                                      Topology: loop(b)  Status:      OK
#Enclosure Component Status:
#     Type Unit Status   FRU P/N   FRU S/N   Add'l Data
#------------------------------------------------------------------
#      Fan 0    OK       370-6776  GM297N     --
#      Fan 1    OK       370-6776  GM297N     --
#     Temp 0    OK       370-5535  0984FC     temp=25
#  Voltage 0    OK       370-6776  GM297N     voltage=5.350V
# DiskSlot 0    OK       370-5535  0984FC     addr=0,led=off
# DiskSlot 11   OK       370-5535  0984FC     addr=11,led=off


sub show_enclosure_status { 
    my( $dev ) = @_;
    my( $cmd, $nloopok, $channel, $id, $chassis, $product, $wwpn );

    $cmd = "$sccli $dev show enclosure-status 2>&1 |"; 
    $verbose && print "cmd $cmd\n";
    if ( ! open( pH, $cmd ) ) { 
	push @unknown_msgs, "can't run sccli show enclosure-status";
	return;
	}
    while ( <pH> ) { 
	$verbose && print;
	if ( m/^ \s* (\d+) \s+ (\d+) \s+ ([\da-f]{6}) \s+ SUN \s+ StorEdge 
		\s+ (351\w+) \s+ \w+ \s+ \d+ \s+ \d+ \s+ ([\da-f]+)
		\s+ ([\da-f]+) /ix ) { 
	    ( $channel, $id, $chassis, $product, $wwpn ) 
		= ( $1, $2, $3, $4, $5 )
	    }
	elsif ( m/^ \s+ Topology: \s+ loop.(\w). \s+ Status: \s+ (\w+)/ix ) { 
	    if ( $2 ne "OK" ) { 
		push @crit_msgs, "$product $chassis loop $1 $2";
		}
	    }

	# Temp 0    OK       370-5535  0984FC     temp=25
	elsif ( m/^ \s+ (\w+) \s+ (\d+) \s+ (\w+) \s+ \S+ \s+ \S+ \s+ (.*)/x ){
	    if ( $3 ne "OK" ) { 
		push @crit_msgs, "product $chassis $1 $2 $3 $4";
		}
	    }
	elsif ( m/^sccli: selected/ || m/^Ch\s+Id/ || m/^--------/ || 
		m/^Enclosure Component Status:/ || m/^ \s+ Type \s+ Unit/x ) {
	    # ignore
	    }
	}
    close pH;
    }










# good
#sccli>  show redundancy-mode
# Primary controller serial number: 8052320
# Primary controller location: Upper
# Redundancy mode: Active-Active
# Redundancy status: Enabled
# Secondary controller serial number: 8052261

# bad
#sccli> show redundancy-mode
# Primary controller serial number: 8051463
# Primary controller location: Upper
# Redundancy mode: Active-Active
# Redundancy status: Failed
# Secondary controller serial number: 8051922


sub show_redundancy_mode { 
    my( $dev ) = @_;
    my( $cmd );

    $cmd = "$sccli $dev show redundancy-mode 2>&1 |"; 
    $verbose && print "cmd $cmd\n";
    if ( ! open( pH, $cmd ) ) { 
	push @unknown_msgs, "can't run sccli show redundancy-mode";
	return;
	}
    while ( <pH> ) { 
	$verbose && print;
	if ( m/sccli:.selected.device|controller serial number|controller location/ ) { 
	    # ignore
	    }
	elsif ( m/^ \s* Redundancy \s+ mode: \s+ (\S+) /ix ) { 
	    if ( $1 ne 'Active-Active' ) { 
		push @crit_msgs, "redundancy mode $1";
		}
	    }
	elsif ( m/^ \s* Redundancy \s+ status: \s+ (\S+) /ix ) { 
	    if ( $1 ne 'Enabled' ) { 
		push @crit_msgs, "redundancy status $1";
		}
	    }
	else { 
	    push @unknown_msgs, "unknown line '$_'";
	    }
	}
    close pH;
    }






#sccli>  show peripheral-device-status
# Item                                Value      status
#-------------------------------------------------------------
# CPU Temp Sensor(primary)            53.00C    within safety range
# Board1 Temp Sensor(primary)         54.00C    within safety range
# Board2 Temp Sensor(primary)         61.00C    within safety range
# +3.3V Value(primary)                3.416V    within safety range
# +5V Value(primary)                  5.180V    within safety range
# +12V Value(primary)                 12.564V   within safety range
# Battery-Backup Battery(primary)     --        OK
# CPU Temp Sensor(secondary)          41.00C    within safety range
# Board1 Temp Sensor(secondary)       49.00C    within safety range
# Board2 Temp Sensor(secondary)       54.00C    within safety range
# +3.3V Value(secondary)              3.368V    within safety range
# +5V Value(secondary)                5.072V    within safety range
# +12V Value(secondary)               12.260V   within safety range
# Battery-Backup Battery(secondary)   --        OK




#* peripheral-device-status 
#
# Item                                Value      status
#-------------------------------------------------------------
# CPU Temp Sensor(primary)            57.50C    within safety range
# Board1 Temp Sensor(primary)         54.00C    within safety range
# Board2 Temp Sensor(primary)         60.00C    within safety range
# +3.3V Value(primary)                3.416V    within safety range
# +5V Value(primary)                  5.126V    within safety range
# +12V Value(primary)                 12.442V   within safety range
# Battery-Backup Battery(primary)     --        OK
# CPU Temp Sensor(secondary)          N/A       N/A
# Board1 Temp Sensor(secondary)       N/A       N/A
# Board2 Temp Sensor(secondary)       N/A       N/A
# +3.3V Value(secondary)              N/A       N/A
# +5V Value(secondary)                N/A       N/A
# +12V Value(secondary)               N/A       N/A
# Battery-Backup Battery(secondary)   --        OK


# Battery-Backup Battery(primary)     --        Warning

# Battery-Backup Battery(primary)     --        Expired

sub show_peripheral_device_status { 
    my( $dev ) = @_;
    my( $cmd, $item, $value, $status );

    $cmd = "$sccli $dev show peripheral-device-status 2>&1 |"; 
    $verbose && print "cmd $cmd\n";
    if ( ! open( pH, $cmd ) ) { 
	push @unknown_msgs, "can't run sccli show peripheral-device-status";
	return;
	}
    while ( <pH> ) { 
	$verbose && print;
	if ( m/sccli:.selected.device|Item \s+ Value \s+ status|^-+$/ix ) { 
	    # ignore
	    }
	elsif ( m/^ \s* (\S.*\((?:primary|secondary)\)) 
		\s+ ([\d\.]+[CV]|--|N\/A) 
		\s+ (within.safety.range|OK|N\/A|\w+)/ix ) { 
	    ( $item, $value, $status ) = ( $1, $2, $3 );
	    #$verbose && print "item $item, value $value, status $status\n";
	    if ( $status =~ m/warning/i ) { 
		push @warn_msgs, "peripheral-device-status $item $value $status";
		}
	    elsif ( $status !~ m/^within safety range|^OK/i ) { 
		push @crit_msgs, "peripheral-device-status $item $value $status";
		}
	    }
	else { 
	    push @unknown_msgs, "unknown line '$_'";
	    }
	}
    close pH;
    }





#sccli>  show battery-status

# Upper Battery Type: 1
# Upper Battery Manufacturing Date: Wed Jul 20 08:00:00 2005
# Upper Battery Placed In Service:  Wed Sep  7 03:17:14 2005
# Upper Battery Expiration Date:    Fri Sep  7 03:17:14 2007
# Upper Battery Status: Expired
#
#
# Lower Battery Type: 1
# Lower Battery Manufacturing Date: Wed Jul 20 08:00:00 2005
# Lower Battery Placed In Service:  Wed Sep  7 03:17:13 2005
# Lower Battery Expiration Date:    Fri Sep  7 03:17:13 2007
# Lower Battery Status: Expired

# Upper Battery Type: 1
# Upper Battery Manufacturing Date: Wed Aug 31 08:00:00 2005
# Upper Battery Placed In Service:  Thu Sep 22 22:57:11 2005
# Upper Battery Expiration Date:    Sat Sep 22 22:57:11 2007
# Upper Battery Status: Warning
#
#
# Lower Battery Type: 1
# Lower Battery Manufacturing Date: Wed Aug 31 08:00:00 2005
# Lower Battery Placed In Service:  Thu Sep 22 22:57:11 2005
# Lower Battery Expiration Date:    Sat Sep 22 22:57:11 2007
# Lower Battery Status: Warning
#



sub show_battery_status { 
    my( $dev ) = @_;
    my( $cmd, $item, $battery, $date, $status, $dow, $mon, $day, $hour,
	$min, $sec, $year, $time );

    $cmd = "$sccli $dev show battery-status 2>&1 |"; 
    $verbose && print "cmd $cmd\n";
    if ( ! open( pH, $cmd ) ) { 
	push @unknown_msgs, "can't run sccli show battery-status";
	return;
	}
    while ( <pH> ) { 
	$verbose && print;
	chomp;
	if ( m/sccli:.selected.device/i ) { 
	    # ignore
	    }
	elsif ( m/(\w+) Battery Type: *(\w+)/i ) { 
	    # ignore
	    }
	elsif ( m/(\w+) Battery Manufacturing Date: *(\w.+)/i ) { 
	    # ignore
	    }
	elsif ( m/(\w+) Battery Placed In Service: *(\w.+)/i ) { 
	    # ignore
	    }
	elsif ( m/(\w+) Battery Expiration Date: *(\w.+)/i ) { 
	    ( $battery, $date ) = ( $1, $2 );
	    if ( $date =~ m/^ \s* (\w+) \s+ (\w+) \s+ (\d+) \s+ 
		    (\d+):(\d+):(\d+) \s+ (\d+) \s*$/ix ) { 
		( $dow, $mon, $day, $hour, $min, $sec, $year ) 
		    = ( $1, $2, $3, $4, $5, $6, $7 );
		$mon = $months{ $mon };
		$year -= 1900;
		$time = POSIX::mktime( $sec, $min, $hour, $day, $mon, $year );
		$verbose && print "parsed time $time = ", 
		    scalar( localtime( $time ) ), "\n";
		if ( time() + $crit_period > $time ) { 
		    push @crit_msgs, "$battery battery expires at $date";
		    }
		elsif ( time() + $warn_period > $time ) { 
		    push @warn_msgs, "$battery battery expires at $date";
		    }
		}
	    else { 
		push @unknown_msgs, 
		    "can't parse expire time of $battery battery '$date'";
		}
	    }
	elsif ( m/(\w+) Battery Status: *(\w.+)/i ) { 
	    ( $battery, $status ) = ( $1, $2 );
	    if ( $status =~ m/warning/i ) { 
		push @warn_msgs, "$battery battery $status";
		}
	    elsif ( $status !~ m/^OK$/i ) { 
		push @crit_msgs, "$battery battery $status";
		}
	    }
	elsif ( m/^\s*$/ ) { 
	    # ignore 
	    }
	else { 
	    push @unknown_msgs, "unknown line '$_'";
	    }
	}
    close pH;
    }

