Backport changes to branch_3_0
authorZdeněk Šustr <sustr4@cesnet.cz>
Tue, 6 Sep 2011 08:29:39 +0000 (08:29 +0000)
committerZdeněk Šustr <sustr4@cesnet.cz>
Tue, 6 Sep 2011 08:29:39 +0000 (08:29 +0000)
org.glite.lb.nagios/Makefile
org.glite.lb.nagios/configure
org.glite.lb.nagios/src/LB-probe

index ac373ac..4c230e1 100644 (file)
@@ -3,19 +3,24 @@ stagedir=.
 package=emi-nagios-lb
 version=0.0.0
 prefix=
-LOCATION=/lib64/nagios/plugins/contrib/
 INSTALL=install
 
 -include Makefile.inc
 -include ${top_srcdir}/project/version.properties
 
 install:
-       mkdir -p ${DESTDIR}${PREFIX}${prefix}/lib64/nagios/plugins/contrib
-       ${INSTALL} -m 0755 src/LB-probe ${DESTDIR}${PREFIX}${prefix}/lib64/nagios/plugins/contrib/check_lb_server
+       mkdir -p ${DESTDIR}${PREFIX}/var/lib/grid-monitoring/emi.lb
+       chmod 750 ${DESTDIR}${PREFIX}/var/lib/grid-monitoring/emi.lb
+       -chown nagios:nagios ${DESTDIR}${PREFIX}/var/lib/grid-monitoring/emi.lb
+       mkdir -p ${DESTDIR}${PREFIX}${prefix}/libexec/grid-monitoring/probes/emi.lb
+       ${INSTALL} -m 0755 src/LB-probe ${DESTDIR}${PREFIX}${prefix}/libexec/grid-monitoring/probes/emi.lb
 
 stage:
        $(MAKE) install PREFIX=${stagedir}
 
+check:
+       @echo "No test"
+
 clean:
 
-.PHONY: all install stage clean
+.PHONY: all install stage clean check
index 0856391..5f94259 100755 (executable)
@@ -527,7 +527,7 @@ BEGIN{
        'lb.doc' => [ qw/tetex-latex:B/ ],
        'lb.logger' => [ qw/cppunit:B libtool:B/ ],
        'lb.logger-msg' => [ qw/cppunit:B activemq libtool:B/ ],
-       'lb.nagios' => [ qw/globus_essentials:R/ ],
+       'lb.nagios' => [ qw/globus_proxy_utils:R/ ],
        'lb.server' => [ qw/globus_essentials:R globus:B expat cares mysql:R mysql-server:R mysql-devel:B cppunit:B gsoap:B classads voms lcas gridsite bison:B libtool:B libxml2 flex:B/ ],
        'lb.state-machine' => [ qw/classads libtool:B libxslt:B expat:B/ ],
        'lb.utils' => [ qw/cppunit:B libtool:B/ ],
@@ -604,8 +604,9 @@ for my $jar (keys %need_jars) {
                lb.logger
        / ],
        'lb.nagios' => [ qw/
-               lb.client
-               lb.ws-test
+               lb.client:R
+               lb.ws-test:R
+               lb.utils:R
        / ],
        'lb.server' => [ qw/
                lb.ws-interface lb.types:B lb.common lb.state-machine
@@ -774,6 +775,7 @@ for my $ext (keys %deps_aux) {
                        default => {
                                globus_essentials=>'vdt_globus_essentials',
                                globus=>'globus',
+                               globus_proxy_utils=>'vdt_globus_essentials',
                                gridsite=>'org.gridsite.shared',
                                yaim_core=>'org.glite.yaim.core',
                                gip_release=>'glite-info-provider-release',
@@ -796,7 +798,7 @@ for my $ext (keys %deps_aux) {
                        },
                },
                etics_projects => {
-                       vdt=>[qw/globus globus_essentials gpt/],
+                       vdt=>[qw/globus globus_essentials globus_proxy_utils gpt/],
                        'org.glite'=>[qw/voms gridsite lcas gip_release gip_service bdii glite_version glite_info_templates glue_schema yaim_core/],
                },
                etics_locations => {
@@ -840,6 +842,7 @@ for my $ext (keys %deps_aux) {
                        default => {
                                globus_essentials=>'globus-gssapi-gsi',
                                globus=>'globus-gssapi-gsi-devel',
+                               globus_proxy_utils=>'globus-proxy-utils',
                                gridsite=>'emi.gridsite.shared',
                                yaim_core=>'emi.yaim.yaim-core',
                                yaim_bdii=>'emi.bdii.yaim-bdii',
@@ -864,6 +867,7 @@ for my $ext (keys %deps_aux) {
                        deb6_x86_64_gcc445 => {
                                globus_essentials => 'globus-gssapi-gsi4',
                                globus => 'libglobus-gssapi-gsi-dev',
+                               globus_proxy_utils=>'globus-proxy-utils',
                                axis => 'libaxis-java',
                                cares => 'libc-ares2',
                                cppunit => 'libcppunit',
index 51e5ded..5bd11a6 100755 (executable)
@@ -24,10 +24,11 @@ cat << EndHelpHeader
 Nagios probe for testing the status of L&B
 
 Tests called:
-    job registration
-    notification registration
-    logging events
-    receiving notifications
+    1. Register job
+    2. Register to receive notifications 
+    3. Log events
+    4. Check job state
+    5. Receive notifications
 
 Return values:
     0: Passed
@@ -36,19 +37,21 @@ Return values:
     3: Unknown
 
 Console output:
-    OK|<time to transit from Registered to Cleared>
+    OK|<time the test job took to transit from Registered to Cleared>
     WARNING: <reason>
     DOWN: <reason>
     UNKNOWN: <reason>
 
 EndHelpHeader
 
-       echo "Usage: $progname [-h] [-v[v[v]]] [server[:port]]"
+       echo "Usage: $progname [-h] [-v[v[v]]] {[-H server] [-p port] | [server[:port]]} [-t <timeout>] [-T <tmpdir>]"
        echo "Options:"
-       echo "    -h | --help       Show this help message."
-       echo "    -v[vv]            Verbosity level."
-       echo "    server:           Environmental variables are used if unspecified"
-       echo "    port:             Environmental variables or defaults are used if unspecified)"
+       echo "    -h | --help       Show this help message"
+       echo "    -v[vv]            Verbosity level"
+       echo "    -H <server>       server (Environmental variables are used if unspecified)"
+       echo "    -p <server>       port (Environmental variables or defaults are used if unspecified)"
+       echo "    -t <timeout>      Probe timeout in seconds"
+       echo "    -T <tmpdir>       Temporary directory (default /var/lib/grid-monitoring/emi.lb)"
        echo ""
 }
 
@@ -82,7 +85,7 @@ function check_binaries()
         do
                 check_exec $file
                 if [ $? -gt 0 ]; then
-                        vprintf 3 "\nfile $file not found\n"
+                        vprintf 2 "\nfile $file not found\n"
                         ret=1
                 fi
         done
@@ -93,7 +96,7 @@ function try_purge()
 {
                         joblist=$1
 
-                        vprintf 2 "\n[LB Probe] Trying to purge test job"
+                        vprintf 1 "\n[LB Probe] Trying to purge test job"
 
                         glite-lb-purge -j ${joblist} > /dev/null 2> /dev/null
                         rm ${joblist}
@@ -120,28 +123,43 @@ function log_cleared()
 }
 
 VERBLEVEL=0
+TIMEOUT=0
+TMPDIR="/var/lib/grid-monitoring/emi.lb"
 
 while test -n "$1"
 do
        case "$1" in
                "-h" | "--help") showHelp && exit 2 ;;
-               "-v" )  VERBLEVEL=$(( $VERBLEVEL + 1 )) ;;
+               "-v" | "--verbose")  VERBLEVEL=$(( $VERBLEVEL + 1 )) ;;
                "-vv" )  VERBLEVEL=$(( $VERBLEVEL + 2 )) ;;
                "-vvv" )  VERBLEVEL=$(( $VERBLEVEL + 3 )) ;;
+               "-H" | "--hostname") shift && SRVPORT="${1}$SRVPORT" ;;
+               "-p" | "--port") shift && SRVPORT="$SRVPORT:${1}" ;;
+               "-t" | "--timeout") shift && TIMEOUT=$1 ;;
+               "-T" | "--tmpdir") shift && TMPDIR=$1 ;;
                *) SRVPORT="$1" ;;
-#              "-t" | "--text")  setOutputASCII ;;
        esac
        shift
 done
 
 export VERBLEVEL
 
+# Arrange timeout
+if [ $TIMEOUT -gt 0 ]; then
+mypid=$$
+(trap 'exit' TERM; sleep $TIMEOUT; vprintf 0 "UNKNOWN: Probe timed out\n"; kill -s SIGINT $mypid; exit 3)&
+watchpid=$!
+
+trap 'exit 3' INT
+
+fi
+
 #Set path to L&B example commands used by the probe
 for exdir in /usr/lib64/glite-lb/examples /usr/lib/glite-lb/examples /opt/glite/examples
 do
        if [ -d "$exdir" ]; then
                export PATH=$PATH:$exdir
-               vprintf 3 "[LB Probe] adding $exdir to PATH\n"
+               vprintf 2 "[LB Probe] adding $exdir to PATH\n"
        fi
 done
 
@@ -187,7 +205,25 @@ if [ ! -z $SRVPORT ]; then
        export GLITE_LB_SERVER_PORT=$portnumber
 fi
 
-if [ $VERBLEVEL -ge 2 ]; then
+#Check if tmpdir writable
+touch $TMPDIR/$$_probecheck > /dev/null 2> /dev/null
+if [ -f $TMPDIR/$$_probecheck ]; then
+       rm $TMPDIR/$$_probecheck
+else
+       vprintf 1 "[LB Probe] Could not write to $TMPDIR. Falling back to /tmp.\n"
+       TMPDIR="/tmp"
+       touch $TMPDIR/$$_probecheck > /dev/null 2> /dev/null
+       if [ -f $TMPDIR/$$_probecheck ]; then
+               rm $TMPDIR/$$_probecheck
+       else
+               vprintf 1 "[LB Probe] Could not write to $TMPDIR.\n"
+               vprintf 0 "UNKNOWN: Probe could not write temporary files\n"
+               exit 3
+       fi
+fi
+
+
+if [ $VERBLEVEL -ge 3 ]; then
        env | grep -E "GLITE_|PATH"
        printf "*** $servername:$portnumber\n"
 fi
@@ -197,96 +233,103 @@ fi
 #####################
 
 {
-vprintf 2 "[LB Probe] Starting test"
+vprintf 1 "[LB Probe] Starting test"
 
 EXITCODE=0
 EXITMESSAGE=""
 
 # check_binaries
-vprintf 3 "\n[LB Probe] Testing if all binaries are available"
-check_binaries grid-proxy-info grep sed echo wc cat awk glite-lb-notify glite-lb-job_reg glite-lb-job_status glite-lb-purge glite-lb-ws_getversion glite-lb-logevent
+vprintf 2 "\n[LB Probe] Testing if all binaries are available"
+check_binaries grid-proxy-info grep sed echo wc cat awk kill glite-lb-notify glite-lb-job_reg glite-lb-job_status glite-lb-purge glite-lb-ws_getversion glite-lb-logevent 
 if [ $? -gt 0 ]; then
-       vprintf 2 "\n[LB Probe] Some Commands are unavailable\n\n"
+       vprintf 1 "\n[LB Probe] Some Commands are unavailable\n\n"
        vprintf 0 "UNKNOWN: Some commands are not available\n"
        exit 3
 fi
 
-vprintf 2 "\n[LB Probe] Testing credentials"
+vprintf 1 "\n[LB Probe] Testing credentials"
 
 timeleft=`grid-proxy-info | grep -E "^timeleft" | sed "s/timeleft\s*:\s//"`
 
 if [ "$timeleft" = "" ]; then
-        vprintf 2 "\n[LB Probe] Test failed -- No credentials\n\n"
-       vprintf 0 "UNKNOWN: NO CREDENTIALS\n"
+        vprintf 1 "\n[LB Probe] Test failed -- No credentials\n\n"
+       vprintf 0 "UNKNOWN: No Credentials\n"
        exit 3
 else
         if [ "$timeleft" = "0:00:00" ]; then
-                vprintf 2 "\n[LB Probe] Test failed -- Credentials expired\n\n"
-               vprintf 0 "UNKNOWN: CREDENTIALS EXPIRED\n"
+                vprintf 1 "\n[LB Probe] Test failed -- Credentials expired\n\n"
+               vprintf 0 "UNKNOWN: Credentials Expired\n"
                exit 3
         else
                
-               vprintf 2 "\n[LB Probe] Getting server version"
+               vprintf 1 "\n[LB Probe] Getting server version"
                serverversion=`glite-lb-ws_getversion -m $servername:$wsportnumber`
                if [ -z "$serverversion" ]; then
-                       vprintf 2 "\n[LB Probe] Test failed -- server did not respond\n\n"
-                       vprintf 0 "DOWN: UNABLE TO GET SERVER VERSION\n"
+                       vprintf 1 "\n[LB Probe] Test failed -- server did not respond\n\n"
+                       vprintf 0 "DOWN: Unable to Get Server Version\n"
                        exit 2
                else
                        echo $serverversion | grep -E "version.*[0-9]+\.[0-9]+\.[0-9]+" > /dev/null
                        if [ $? = 0 ]; then
-                               vprintf 3 ": $serverversion"
+                               vprintf 2 ": $serverversion"
                        else
-                               vprintf 2 " - unexpected output ($serverversion). A WARNING will be returned."
+                               vprintf 1 " - unexpected output ($serverversion). A WARNING will be returned."
                                EXITMESSAGE="$EXITMESSAGE[Unexpected version output ($serverversion)]"
                                EXITCODE=1
                        fi
                fi
 
                # Register job:
-               vprintf 2 "\n[LB Probe] Registering testing job "
+               vprintf 1 "\n[LB Probe] Registering testing job "
                jobid=`glite-lb-job_reg -m ${GLITE_WMS_QUERY_SERVER} -s application 2>&1 | grep "new jobid" | awk '{ print $3 }'`
 
                if [ -z $jobid ]; then
-                       vprintf 2 " Failed to register job\n[LB Probe] Test failed \n\n"
-                       vprintf 0 "DOWN: JOB REGISTRATION FAILED LOCALLY\n"
+                       vprintf 1 " Failed to register job\n[LB Probe] Test failed \n\n"
+                       vprintf 0 "DOWN: Job Registration Failed Locally\n"
                        exit 2
                else
-                       vprintf 3 "${jobid}"
+                       vprintf 2 "${jobid}"
 
                        jobstate=`glite-lb-job_status ${jobid} | grep "state :" | awk '{print $3}'`
                        if [ "${jobstate}" = "Submitted" ]; then
-                               vprintf 3 ", server side OK"
+                               vprintf 2 ", server side OK"
                         else
-                               vprintf 2 "\n[LB Probe] Test failed -- Job has not been submitted to server\n\n"
-                               vprintf 0 "DOWN: L&B SERVER NOT RUNNING\n"
+                               vprintf 1 "\n[LB Probe] Test failed -- Job has not been submitted to server\n\n"
+                               vprintf 0 "DOWN: L&B Server Not Running\n"
                                exit 2
                         fi
                fi
        
 
                # Register notification:
-               vprintf 2 "\n[LB Probe] Registering notification "
+               vprintf 1 "\n[LB Probe] Registering notification "
 
                notifid=`glite-lb-notify new -j ${jobid} | grep "notification ID" | awk '{ print $3 }'`
 
                if [ -z $notifid ]; then
-                       vprintf 2 "\n[LB Probe] Test failed -- Failed to register notification\n\n"
-                       vprintf 0 "DOWN: L&B SERVER NOT RUNNING\n"
+                       vprintf 1 "\n[LB Probe] Test failed -- Failed to register notification\n\n"
+                       vprintf 0 "DOWN: L&B Server Not Running\n"
                        exit 2
                else
-                       vprintf 3 "${notifid}"
+                       vprintf 2 "${notifid}"
 
-                       vprintf 2 "\n[LB Probe] Logging events resulting in state Cleared"
+                       vprintf 1 "\n[LB Probe] Logging events resulting in state Cleared"
                        log_cleared ${jobid}
 
                        NOTIFFILE="/tmp/$$_notifications.txt"           
                        STATEFILE="/tmp/$$_jobstat.txt"         
                        echo '' > $NOTIFFILE
-       
-                       TOREPS=4; #Repetitions before timeout
+
+                       vprintf 1 "\n[LB Probe] Waiting for delivery/processing"
+                       if [ $TIMEOUT -gt 0 ]; then
+                               #Assume about 3/4 of the timeout may be used to wait for messages
+                               TOREDUCED=`expr $TIMEOUT \* 3 / 4`
+                               TOREPS=`expr $TOREDUCED  / 5`
+                               vprintf 2 " (split $TOREDUCED-s span into $TOREPS wait cycles)"
+                       else    
+                               TOREPS=4; #Repetitions before timeout
+                       fi
                        CLRNOTIFIED=0;
-                       vprintf 2 "\n[LB Probe] Waiting for delivery/processing"
                        while [ $CLRNOTIFIED -eq 0 -a $TOREPS -gt 0 ]
                        do
                                glite-lb-notify receive -i 5 ${notifid} >> $NOTIFFILE 2> /dev/null 
@@ -298,31 +341,31 @@ else
                        glite-lb-job_status ${jobid} > $STATEFILE
                        jobstate=`cat $STATEFILE | grep "state :" | awk '{print $3}'`
 
-                       vprintf 2 "\n[LB Probe] Checking job state"
+                       vprintf 1 "\n[LB Probe] Checking job state"
                        if [ "${jobstate}" = "Submitted" ]; then
-                               vprintf 2 "\n[LB Probe] Test failed -- Job state has not changed (${jobstate})\n\n"
-                               vprintf 0 "DOWN: EVENT DELIVERY CHAIN (LOGGER/INTERLOGGER) NOT RUNNING\n"
+                               vprintf 1 "\n[LB Probe] Test failed -- Job state has not changed (${jobstate})\n\n"
+                               vprintf 0 "DOWN: Event Delivery Chain (Logger/Interlogger) Not Running\n"
                                rm $NOTIFFILE $STATEFILE
                                exit 2
                         else
-                               vprintf 3 " -- ${jobstate}"
+                               vprintf 2 " -- ${jobstate}"
                                if [ "${jobstate}" != "Cleared" ]; then
-                                       vprintf 3 ", not Cleared. A WARNING will be returned."
+                                       vprintf 2 ", not Cleared. A WARNING will be returned."
                                        EXITMESSAGE="$EXITMESSAGE[Unexpected state of test job (${jobstate})]"
                                        EXITCODE=1
                                fi
                         fi
 
-                       vprintf 2 "\n[LB Probe] Checking if notifications were delivered"
+                       vprintf 1 "\n[LB Probe] Checking if notifications were delivered"
                        NOTIFS=`cat $NOTIFFILE | wc -l`
 
                        grep ${jobid} $NOTIFFILE > /dev/null
 
                        if [ $? = 0 ]; then
-                               vprintf 3 ", OK ($NOTIFS messages)"
+                               vprintf 2 ", OK ($NOTIFS messages)"
                        else
-                               vprintf 2 "\n[LB Probe] Test failed -- Notifications were not delivered\n\n"
-                               vprintf 0 "DOWN: NOTIFICATION INTERLOGGER NOT RUNNING\n"
+                               vprintf 1 "\n[LB Probe] Test failed -- Notifications were not delivered\n\n"
+                               vprintf 0 "DOWN: Notification Interlogger Not Running\n"
                                rm $NOTIFFILE $STATEFILE
                                exit 2
                        fi
@@ -339,13 +382,14 @@ else
                        rm $NOTIFFILE $STATEFILE
 
                        #Drop notification
-                       vprintf 2 "\n[LB Probe] Dropping the test notification (${notifid})"
+                       vprintf 1 "\n[LB Probe] Dropping the test notification"
+                       vprintf 2 " (${notifid})"
                        dropresult=`glite-lb-notify drop ${notifid} 2>&1`
                        if [ -z $dropresult ]; then
-                               vprintf 3 ""
+                               vprintf 2 ""
                        else
-                               vprintf 2 "\n[LB Probe] Test failed"
-                               vprintf 2 " Failed to drop notification ${dropresult}, A WARNING will be returned."
+                               vprintf 1 "\n[LB Probe] Test failed"
+                               vprintf 1 " Failed to drop notification ${dropresult}, A WARNING will be returned."
                                EXITMESSAGE="$EXITMESSAGE[Could not drop notification]"
                                EXITCODE=1
                        fi
@@ -359,7 +403,11 @@ else
        fi
 fi
 
-vprintf 2 "\n[LB Probe] Test finished\n\n"
+if [ ! -z $watchpid ]; then
+       kill -s SIGTERM "$watchpid"
+fi
+
+vprintf 1 "\n[LB Probe] Test finished\n\n"
 }
 
 if [ $EXITCODE -eq 0 ]; then