#!/usr/local/bin/perl # # multiple checks of ps output # # by Doke Scott, doke@udel.edu, 4 Feb 2004 # # Heading Description # ========= ====================================================== # CPUID The name of the cpu to which this information applies. # RUN The run number of the Production Control file (Symphony). # NODE The node type, and cpu type. Node types are: MPEV, MPIX, # and UNIX. Cpu types are: MASTER, SLAVE, REMOTE, FTA, and # X-AGENT. # LIMIT The Maestro job limit. # FENCE The Maestro job fence. # DATE TIME The date and time batchman started executing the # Production Control file (Symphony). # STATE The state of the cpu link from your login cpu (where # conman is running) to CPUID. The six character positions # are displayed as follows: # L[D]I J[W] # [T] [M] # L The link is open (linked). # D The link is DS. # T The link is TCP/IP. # I Jobman has completed startup initialization. # J Jobman is running. # W The cpu is linked via TCP/IP. # M The cpu is linked via DS. # # Note the above is wrong. The DT and WM can also be H. #use strict; use Getopt::Long; my $conman_cmd = '/opt/tws/TWS/bin/conman sc'; my $config_file = "/usr/local/nagios/etc/check_conman.cfg"; $ENV{PATH} = "/usr/bin"; my $verbose = 0; my $help = 0; my( %opts, @warn_errors, @crit_errors, @unknown_errors, $nlinks, $rc, %config, %ok_hosts, %states ); ########################### sub usage { print qq{Usage: $0 [-uv] [-f ] }; exit -1; } Getopt::Long::Configure( "bundling" ); GetOptions( 'f=s' => \$config_file, 'v+' => \$verbose, 'h' => \$help ); &usage( 0 ) if ( $help ); &read_config( $config_file ); &run_conman(); &find_missing(); $rc = 0; # nagios ok exit code $" = ", "; if ( scalar( @crit_errors ) ) { print "CRITICAL: @crit_errors "; $rc = 2; } if ( scalar( @warn_errors ) ) { print "Warning: @warn_errors "; $rc = 1 if ( $rc == 0 ); } if ( scalar( @unknown_errors ) ) { print "Unknown: @unknown_errors "; $rc = -1 if ( $rc == 0 ); } elsif ( $rc == 0 ) { $nlinks = scalar( keys %ok_hosts ); print "OK: $nlinks hosts linked"; } print "\n"; exit $rc; ################################## sub read_config{ my( $config_file ) = @_; my( $host, $state ); if ( ! -e $config_file ) { print "no config file!\n"; exit -1; # unknown return code } elsif ( ! -f _ ) { print "config file is not a plain file: $config_file\n"; exit -1; # unknown return code } my ( $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks ) = stat( _ ); if ( $mode & 0133 ) { print "config file permissions are too open: $config_file\n"; exit -1; # unknown return code } if ( ! open( fH, $config_file ) ) { print "can't read-open config file $config_file: $!\n"; exit -1; # unknown return code } while ( ) { next if ( m/^\s*#/ || m/^\s*$/ ); if ( m/^ (\S+) \s+ (\S.*)/x ) { ( $host, $state ) = ( $1, $2 ); if ( exists $config{ $host } ) { push @warn_errors, "Warning: $config_file line $. repeats host $host"; next; } $state =~ s/\s+$//; $config{ $host } = $state; } else { push @unknown_errors, "Warning: can't parse $config_file line $."; next; } } close fH; } sub run_conman { my ( $header, $host, $run, $node, $node2, $limit, $fence, $datetime, $state ); if ( ! open( pH, "$conman_cmd < /dev/null 2>&1 |" ) ) { print "UNKNOWN -- unable to run $conman_cmd: $!"; exit -1; } $header = 1; while ( ) { $verbose && print $_; chomp; # eat the header if ( m/CPUID \s+ RUN \s/x ) { $header = 0; } elsif ( $header ) { # skip it } elsif ( m/^ (\w[\w\d+-]*) \s+ (\d+) \s+ (\S+) \s+ (\S+) \s+ (\d+) \s+ (\d+) \s+ (\d+\/\d+\/\d+ \s \d+:\d\d) \s+ ([L\s][DTH\s][I\s] \s+ [J\s][WMH\s]) \s/ix ) { ( $host, $run, $node, $node2, $limit, $fence, $datetime, $state ) = ( $1, $2, $3, $4, $5, $6, $7, $8 ); $verbose && print "'$host' '$node2' '$state'\n"; &check_host( $host, $node2, $state ); $states{ $host } = $state; } elsif ( m/^ (\w[\w\d+-]*) \s+ (\d+) \s+ (\S+) \s+ (\S+) \s+ (\d+) \s+ (\d+) /ix ) { # ignore it } else { $verbose && print "can't parse $_"; push @unknown_errors, "can't parse: $_"; } } close pH; } sub check_host { my( $host, $node2, $state ) = @_; my( $pat ); return if ( ! exists $config{ $host } ); $pat = $config{ $host }; if ( $state =~ m/$pat/ || ( $node2 =~ m/MASTER/ && $state =~ m/[L\s][DTH\s]I \s J[WMH\s]/x ) || $state =~ m/L[DTH]I \s J[WMH]/x ) { $ok_hosts{ $host } = 1; $verbose && print "ok\n"; } } sub find_missing { my( $host, $error ); foreach $host ( sort keys %config ) { if ( ! exists $ok_hosts{ $host } ) { $error = "$host not linked"; $error .= " " . $states{ $host } if exists $states{ $host }; push @crit_errors, $error; } } }