#!/usr/local/bin/perl
# 
# Check LACP links on Juniper routers and switches

# nagios: -epn
# Above line tells nagios not to use ePN.
# Must be in first 10 lines of file.
# Need this due to Net::SSH::Perl library

#
# $Header: /home/doke/work/nagios/RCS/check_juniper_lacp,v 1.12 2016/07/14 14:38:55 doke Exp doke $
#
#

use strict;
use warnings;
no warnings 'uninitialized';
no warnings 'redefine';
use Net::SSH2;
use Getopt::Long;
use XML::DOM;
use DBI;
#use Data::Dumper;

use vars qw( $hostname $target_interface @ignore_interfaces $login
    $passwdfile $verbose $help @names @tags %values @crits @warns @unknowns
    @oks @ignores $rc $sep $oper_down_window $dbname $dbuser $dbpasswd_file
    %last_ups %ignore_interfaces $hostaddr %optics $max_retries $ssh2
    $timeout );

$hostname = '';
$target_interface = '';
$login = 'nagios';
$passwdfile = '/usr/local/nagios/etc/nagios.pw';

$dbname = "nagios";
$dbuser = "nagios";
$dbpasswd_file = '/usr/local/nagios/etc/nagios.pw';

# How long an interface must be continuously operationally down, before we
# just ignore it.  Since the default "last_up" time is 1 second after epoch, 
# this should let us ignore interfaces that have never been up.  
# 
$oper_down_window = 86400 * 30;

$max_retries = 3;
$timeout = 30;

#################

# print the usage of this script
sub usage {
    print "Usage: $0 [options] -H <switch-name>
    -i <interface>   name of interface to check [default all]
    -I <interface>   name of interface to ignore [default none] can use multiple times
    -l <login>    A login name accepted by the target router.
    -p <password file>  file containing the password for the login name.
    -v    increase verbosity
    -h    help
";
    exit -1;
}

# check arguments
Getopt::Long::Configure ("bundling");
GetOptions(
    'H=s' => \$hostname,
    'i=s' => \$target_interface,
    'I=s' => \@ignore_interfaces,
    'l=s' => \$login,
    'p=s' => \$passwdfile,
    'v+' => \$verbose,
    'h' => \$help,
    );
&usage( 0 ) if ( $help );

$hostname || usage();
$login || usage();
$passwdfile || usage();

$hostaddr = $hostname;

check_juniper_lacp();


$rc = 0;
$sep = '';
if ( $#crits >= 0 ) {
    $rc = 2;
    print "CRITICAL ", join( ", ", @crits );
    $sep = '; ';
    }
if ( $#warns >= 0 ) {
    $rc = 1 if ( $rc == 0 );
    print $sep, "Warning ", join( ", ", @warns );
    $sep = '; ';
    }
if ( $#unknowns >= 0 ) {
    $rc = 3 if ( $rc == 0 );
    print $sep, "Unknown ", join( ", ", @unknowns );
    $sep = '; ';
    }
if ( $rc == 0 || $verbose ) {
    print $sep, "Ok ", join( ", ", @oks );
    $sep = '; ';
    }
if ( $#ignores >= 0 ) {
    print $sep, "Ignoring ", join( ", ", @ignores );
    }
print "\n";
exit $rc;


###########################

sub check_juniper_lacp { 
    my( $stdout, $stderr, $rc, $cmd, $xml, $parser, $tree,
	$path, $interface, $lacp_state, $descr, $admin, $oper,
	$ago );

    $verbose && print "check_juniper_lacp()\n";

    # Load the last_up and monitor values for each port on this device from
    # the mysql database maintained by check_switch.  If a port has never
    # been up (or not in a really long time), then we ignore it. 
    load_db_data();

    # convert interfaces to ignore to a hash for easy lookup
    if ( ! $target_interface ) { 
	foreach $interface ( @ignore_interfaces ) { 
	    $ignore_interfaces{ $interface } = 1;
	    }
	}

    ssh2_setup();

    # find out which interfaces are admin down, so we can ignore them
    $verbose && print "\ngetting admin status of each interface\n";
    $cmd = "show interface terse | display xml";
    $stdout = ssh2_cmd( $cmd );
    if ( ! $stdout ) { 
	return;
	}
    parse_xml( $stdout );
    foreach $path ( sort keys %values ) { 
	if ( $path =~ m!-> ([fgx]e-\d+/\d+/\d+)$!i ) { 
	    $interface = $1;
	    $admin = $values{ $path }{ 'admin-status' }; 
	    $oper = $values{ $path }{ 'oper-status' }; 
	    $descr = $values{ $path }{ 'description' }; 
	    $verbose && print "physical interface $interface $admin $descr\n";
	    if ( $admin ne 'up' ) { 
		$ignore_interfaces{ $interface } = 1;
		}
	    $optics{ $interface }{ 'description' } = $descr;
	    $optics{ $interface }{ 'admin' } = $admin;
	    $optics{ $interface }{ 'oper' } = $oper;
	    }
	elsif ( $path =~ m!-> ([fgx]e-(\d+/\d+/\d))\.(\d+)$!i ) { 
	    $interface = $1;
	    $admin = $values{ $path }{ 'admin-status' }; 
	    $oper = $values{ $path }{ 'oper-status' }; 
	    $descr = $values{ $path }{ 'description' }; 
	    $verbose && print "logical interface $interface.$3 $admin $descr\n";
	    next if ( $admin ne 'up' );
	    if ( $optics{ $interface }{ 'description' } ) { 
		$optics{ $interface }{ 'description' } .= ' ' . $descr;
		}
	    else { 
		$optics{ $interface }{ 'description' } = $descr;
		}
	    }
	}

    # get the lacp status
    $verbose && print "\ngetting lacp status\n";
    $cmd = "show lacp interfaces | display xml";
    $stdout = ssh2_cmd( $cmd );

    if ( $stdout =~ m/lacp subsystem not running - not needed by configuration/i ) { 
	push @oks, "lacp not in use"; 
	return;
	}

    parse_xml( $stdout );
    $ago = time() - $oper_down_window; 
    foreach $path ( sort keys %values ) { 
	$verbose && print "path: $path\n";
	if ( $path =~ m!-> ([fgx]e-\d+/\d+/\d+)$!i ) { 
	    $interface = $1;

	    if ( $target_interface ) { 
		if ( $interface ne $target_interface 
			&& "$interface.0" ne $target_interface ) { 
		    $verbose && print "$interface does not match $target_interface\n";
		    next;
		    }
		}
	    else { 
		if ( exists $ignore_interfaces{ "$interface" } 
			|| exists $ignore_interfaces{ "$interface.0" } ) { 
		    $verbose && print "$interface is to be ignored\n";
		    next;
		    }
		}

	    $descr = $optics{ "$interface" }{ 'description' };
	    $descr ||= '';
	    $descr =~ s/\s+$//;

	    $admin = $optics{ $interface }{ 'admin' };
	    $admin ||= 'down';

	    $oper = $optics{ $interface }{ 'oper' };
	    $oper ||= 'down';

	    $lacp_state = $values{ $path }{ 'lacp-mux-state' };

	    $verbose && print "int $interface, lacp $lacp_state, descr $descr, admin $admin, oper $oper, last_up $last_ups{ $interface }\n";

	    if ( $admin ne 'up' ) { 
		$verbose && print "ignoring $interface, administratively down\n";
		#next;
		}

	    if ( $oper ne 'up' && $last_ups{ $interface } < $ago ) {  
		$verbose && print "ignoring $interface, operationally down since $last_ups{ $interface }\n";
		#next;
		}

	    if ( $lacp_state eq 'Collecting distributing' ) { 
		push @oks, "$interface ok";
		}
	    else { 
		push @warns, "$interface lacp state $lacp_state";
		}
	    }
	}
    }







sub parse_xml { 
    my( $stdout ) = @_; 
    my( $parser, $tree, $path, $tag, $msg );

    $verbose && print $stdout;
    $stdout =~ s/^[^<]*//s;   # trim off any leading junk
    $stdout =~ s/[^>]*$//s;   # trim off any trailing junk

    undef @names;
    undef @tags;
    undef %values; 

    $parser = new XML::DOM::Parser;
    eval { 
	$tree = $parser->parse( $stdout );
	};
    #$verbose > 1 && print "tree: ", Dumper( $tree ), "\n\n"; 
    if ( ! $tree ) { 
	$msg = "xml parser returned nothing";
	$verbose && print $msg, "\n";
	push @unknowns, $msg;
	return;
	}
    descend_tree( $tree, 0 );

    if ( $verbose > 1 ) { 
	foreach $path ( keys %values ) { 
	    print "$path:\n";
	    foreach $tag ( sort keys %{$values{ $path }} ) { 
		printf "    %s %s\n", $tag, $values{ $path }{ $tag };
		}
	    }
	}
    }





sub descend_tree { 
    my( $node, $level ) = @_; 
    my( $tag, $type, $value, $path, $parenttag, @children, $child );

    #$verbose > 1 && print "descend_tree( $node, $level )\n";

    $tag = $node->getNodeName();
    $tags[ $level ] = $tag;

    $names[ $level ] ||= $tag;

    $type = $node->getNodeType();
    $verbose > 1 && print "descend_tree tag $tag, type $type\n";

    if ( $level >= 2 && $type == 3 ) { 
	$value = $node->getNodeValue();
	chomp $value;
	$value =~ s/\s+$//;

	$parenttag = $tags[ $level - 1 ];

	if ( $parenttag eq 'name' || $parenttag eq 'aggregate-name' ) { 
	    $names[ $level - 2 ] = $value;
	    }

	$path = join( ' -> ', @names[ 0 .. $level - 2 ] );

	$values{ $path }{ $parenttag } = $value;
	$verbose > 1 && print "- path $path, tag $tag, type $type, value $value\n";
	}
    
    @children = $node->getChildNodes();
    foreach $child ( @children ) { 
	descend_tree( $child, $level + 1 );
	}
    }








# get db history for ports
sub load_db_data { 
    my( $dbpasswd, $dbh, $sql, $rows, $row, $port, $last_up, $port2, $n, @params );

    $verbose && print "load_db_data()\n";

    $dbpasswd = &get_passwd( $dbpasswd_file );
    $dbh = DBI->connect( "dbi:mysql:dbname=$dbname",
        $dbuser, $dbpasswd, {
	    AutoCommit => 0,
	    RaiseError => 0,
	    PrintError => 1,
	    } );
    if ( ! $dbh ) {
	push @unknowns, "can't open database: " . $DBI::errstr;
	return;
	}

    # Try to convert an ipaddr on the cmdline to a switch name that we 
    # can lookup in the database port tables.
    if ( $hostname =~ m/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ ) { 
	$hostaddr = $hostname; 
	$sql = "select sw
	    from sw
	    where ipaddr = ?";
	@params = ( $hostaddr );
	$verbose && print "sql '$sql', ", join( ', ', @params ), "\n"; 
	( $hostname ) = $dbh->selectrow_array( $sql, undef, @params );
	$hostname ||= $hostaddr;  # fallback to ip, monitorportopt won't work
	}
    else { 
	$sql = "select ipaddr
	    from sw
	    where sw = ?";
	@params = ( $hostname );
	$verbose && print "sql '$sql', ", join( ', ', @params ), "\n"; 
	( $hostaddr ) = $dbh->selectrow_array( $sql, undef, @params );
	$hostaddr ||= $hostname;  # fallback to name, and hope dns works
	}

    $sql = "select port, last_up 
	from swports
	where sw = ?";
    @params = ( $hostname );
    $verbose && print "sql '$sql', ", join( ', ', @params ), "\n"; 
    $rows = $dbh->selectall_arrayref( $sql, undef, @params );
    foreach $row ( @$rows ) {
	$verbose && print "< ", join( ', ', @$row ), "\n";
	( $port, $last_up ) = @$row;
	$last_ups{ $port } = $last_up;
	}

    # load the monitorportopt stuff 
    # If this is a standalone, ie cisco, we have the sw and maybe port.
    # If this is a stack, we have the name of the stack master, and maybe a port
    # and this will be in the mangled switch port syntax. 
    # So sw-ne37-211-20 ge-2/0/25 is mangled to sw-ne37-211-22 ge-x/0/25
    # But sw-ne37-110-3m f0/23 is in the normal format 
    $sql = "select ss2.sw, ss2.idx, m.port, m.opt, m.val 
	    from swstk ss join swstk ss2 using( stk ) 
		join monitorportopt m on ss2.sw = m.sw  
	    where ss.sw = ? or ss.sw = ?
	union
	select m2.sw, -1, m2.port, m2.opt, m2.val 
	    from monitorportopt m2 
	    where m2.sw = ? or m2.sw = ?";
    @params = ( $hostname, "$hostname.nss", $hostname, "$hostname.nss" );
    $verbose && print "sql '$sql', ", join( ', ', @params ), "\n"; 
    $rows = $dbh->selectall_arrayref( $sql, undef, @params );
    $n = 0;
    foreach $row ( @$rows ) {
	$verbose && print "< ", join( ', ', @$row ), "\n";
	my( $sw_mangled, $idx, $port_mangled, $opt, $val ) = @$row;
	if ( $idx >= 0 )  { 
	    # we have an idx, so this is a stack and in mangled format 
	    ( $port2 = $port_mangled ) =~ s!-x/!-$idx/!;
	    }
	else { 
	    # not mangled
	    $port2 = $port_mangled;
	    }
	if ( $opt eq 'monitor' ) { 
	    if ( $val =~ m/^(?:1|t|y|on|yes|true)\s*$/i ) { 
		$verbose && print "setting \$ports_to_expect_up{ $port2 }\n"; 
		#$ports_to_expect_up{ $port2 } = 1;
		$n++;
		}
	    elsif ( $val =~ m/^(?:f|false|n|no|off|0)\s*$/i ) { 
		$verbose && print "setting \$ignore_interfaces{ $port2 }\n"; 
		$ignore_interfaces{ $port2 } = 1;
		$n++;
		}
	    }
	}
    $verbose && print "loaded $n rows from monitorportopt database\n";

    $dbh->disconnect();
    }








sub ssh2_setup { 
    my( $rport, $retries, $error_code, $error_name, $error_string, $passwd,
	$msg );

    # Get ssh tcp port number if it exists
    $rport = ( getservbyname( 'ssh', 'tcp' ) )[2];
    $rport ||= 22;

    $retries = 0;
    while ( $retries < $max_retries )  { 
	# create the ssh session
	# Can't connect messages will occur here, and the library will exit
	$verbose && print "\ncreating Net::SSH2 object\n";
	eval { 
	    $ssh2 = Net::SSH2->new(
		timeout => $timeout * 1000,
		);
	    };
	if ( ! $ssh2 ) { 
	    ( $error_code, $error_name, $error_string ) = $ssh2->error();
	    $msg = "can't create ssh session to $hostname: $error_string";
	    $verbose && print $msg, "\n";
	    push @unknowns, $msg;
	    next;
	    }

	if ( $verbose >= 2 ) { 
	    $ssh2->trace( -1 );
	    $ssh2->debug( 1 );
	    }
	#$ssh2->keepalive_config( 1, 15 );
	$ssh2->blocking( 1 );

	$verbose && print "\nconnecting to $hostaddr $rport\n";
	eval { 
	    $rc = $ssh2->connect( $hostaddr, $rport );
	    };
	if ( ! $rc ) { 
	    ( $error_code, $error_name, $error_string ) = $ssh2->error();
	    $msg = "can't ssh connect to $hostname: $error_string";
	    $verbose && print $msg, "\n";
	    push @unknowns, $msg;
	    next;
	    }

	# log in
	$passwd = get_passwd( $passwdfile );
	$verbose && print "tring ssh login as $login\n";
	#$verbose >= 2 && print "    password '$passwd'\n";
	eval { 
	    $rc = $ssh2->auth_password( $login, $passwd );
	    };
	if ( ! $rc ) { 
	    ( $error_code, $error_name, $error_string ) = $ssh2->error();
	    $msg = "can't login to $hostname: $error_string";
	    $verbose && print $msg, "\n";
	    push @unknowns, $msg;
	    next;
	    }
	last;
	}
    }







sub ssh2_cmd { 
    my( $cmd ) = @_;
    my( $chan, $stdout, $error_code, $error_name, $error_string, $msg );

    $verbose && print "ssh2_cmd( $cmd )\n";

    $stdout = '';
    eval { 
	$chan = $ssh2->channel();
	if ( ! $chan ) { 
	    ( $error_code, $error_name, $error_string ) = $ssh2->error();
	    $verbose && print "LIBSSH2_SESSION_BLOCK_INBOUND 1 \n";
	    $verbose && print "LIBSSH2_SESSION_BLOCK_OUTBOUND 2\n";
	    $verbose && print "block_directions: ", $ssh2->block_directions, "\n";
	    $verbose && print "error_code $error_code, $error_name, $error_string\n";
	    $msg = "can't ssh $hostname chan $cmd: $error_name $error_string";
	    $verbose && print $msg, "\n";
	    push @unknowns, $msg;
	    return '';
	    }
	$chan->blocking( 1 );
	if ( ! $chan->exec( $cmd ) ) { 
	    ( $error_code, $error_name, $error_string ) = $ssh2->error();
	    $msg = "can't ssh $hostname exec $cmd: $error_string";
	    $verbose && print $msg, "\n";
	    push @unknowns, $msg;
	    return '';
	    }
	if ( ! $chan->send_eof() ) { 
	    ( $error_code, $error_name, $error_string ) = $ssh2->error();
	    $msg = "can't ssh $hostname eof $cmd: $error_string";
	    $verbose && print $msg, "\n";
	    push @unknowns, $msg;
	    return '';
	    }

	while ( <$chan> ) { 
	    $stdout .= $_; 
	    }
	};
    $verbose && print "stdout: $stdout\n"; 
    if ( ! defined $stdout ) { 
	push @unknowns, "no data from ssh $hostname $cmd: " . ($ssh2->error())[2];
	return;
	}
    return $stdout;
    }







sub get_passwd { 
    my( $file ) = @_;

    open( fH, $file ) || die "can't open $file: $!\n";
    my $passwd = <fH>;
    close fH;
    chomp $passwd;
    return $passwd;
    }



