From: cvs2svn Date: Fri, 5 Mar 2010 17:01:21 +0000 (+0000) Subject: This commit was manufactured by cvs2svn to create tag 'glite-lb_R_1_10_0_1'. X-Git-Tag: glite-lb_R_1_10_0_1 X-Git-Url: http://scientific.zcu.cz/git/?a=commitdiff_plain;h=c0f5eb12a9d25ddd68169ce0c6488bf608b7162b;p=jra1mw.git This commit was manufactured by cvs2svn to create tag 'glite-lb_R_1_10_0_1'. Sprout from branch_RC31_3 2010-03-05 15:19:07 UTC Zdeněk Šustr 'New versions' Cherrypick from master 2010-03-05 17:01:20 UTC František Dvořák 'Fix build without database.': org.glite.lb.harvester/Makefile org.glite.lb.harvester/configure org.glite.lb.harvester/doc/INSTALL org.glite.lb.harvester/doc/README org.glite.lb.harvester/doc/glite-lb-harvester.sgml org.glite.lb.harvester/examples/test.sh org.glite.lb.harvester/examples/test.sql org.glite.lb.harvester/project/ChangeLog org.glite.lb.harvester/project/package.description org.glite.lb.harvester/project/package.summary org.glite.lb.harvester/project/version.properties org.glite.lb.harvester/src/harvester.c org.glite.lb.logger/project/package.description org.glite.lb.logger/project/package.summary org.glite.lb.logger/src-nt/Connection.cpp org.glite.lb.logger/src-nt/EventManager.cpp org.glite.lb.logger/src-nt/InputChannel.H org.glite.lb.logger/src-nt/InputChannel.cpp org.glite.lb.logger/src-nt/Message.H org.glite.lb.logger/src-nt/MessageStore.H org.glite.lb.logger/src-nt/MessageStore.cpp org.glite.lb.logger/src-nt/Properties.H org.glite.lb.logger/src-nt/Transport.cpp org.glite.lb.logger/src/event_store_http.c org.glite.lb.logger/src/http.c org.glite.lb.logger/src/input_queue_socket_http.c org.glite.lb.logger/src/queue_mgr_http.c org.glite.lb.logger/src/send_event_http.c org.glite.lb.logger/src/server_msg_http.c Cherrypick from master 2009-04-08 08:15:08 UTC Aleš Křenek 'The most recent version copied. Do not modify this instance (RW in ./org.glite.lb).': org.glite.lb.logger/configure --- diff --git a/org.glite.lb.harvester/Makefile b/org.glite.lb.harvester/Makefile new file mode 100644 index 0000000..15f0b92 --- /dev/null +++ b/org.glite.lb.harvester/Makefile @@ -0,0 +1,89 @@ +top_srcdir=.. +stagedir=. +package=glite-lb-harvester +module.version=0.0.0 +PREFIX=/opt/glite +globus_prefix=/opt/globus + +archlib:=lib +thrflavour:=gcc32dbgpthr +host_cpu:=${shell uname -m} +ifeq (${host_cpu},x86_64) + archlib:=lib64 + thrflavour:=gcc64dbgpthr +endif + +-include Makefile.inc +-include ../project/version.properties +version:=${module.version} + +CC=gcc +VPATH=${top_srcdir}/src:${top_srcdir}/doc + +GLOBUS_CPPFLAGS:=-I${globus_prefix}/include/${thrflavour} +CPPFLAGS:=-I${stagedir}/include -D_GNU_SOURCE -D_REENTRANT ${CPPFLAGS} +CFLAGS:=-W -Wall -g -O2 ${CFLAGS} +LDFLAGS:=${LDFLAGS} +LIBS:=-L${stagedir}/${archlib} -L${stagedir}/lib \ + -lglite_lb_common_${thrflavour} \ + -lglite_lb_client_${thrflavour} \ + -lpthread -lglite_security_gss_${thrflavour} + +ifneq ($(GLITE_LB_HARVESTER_WITH_LBU_DB),no) +CPPFLAGS:=$(CPPFLAGS) -DWITH_LBU_DB=1 +LIBS:=$(LIBS) -lglite_lbu_db +endif +ifeq ($(GLITE_LB_HARVESTER_WITH_OLD_LB),yes) +CPPFLAGS:=${GLOBUS_CPPFLAGS} $(CPPFLAGS) -DWITH_OLD_LB=1 +LIBS:=$(LIBS) -lglite_wmsutils_cjobid +else +LIBS:=$(LIBS) -lglite_jobid -lglite_lbu_trio +endif + +COMPILE:=libtool --mode=compile ${CC} ${CPPFLAGS} ${CFLAGS} +LINK:=libtool --mode=link ${CC} ${LDFLAGS} +INSTALL:=libtool --mode=install install + +default: all + +compile all: harvester doc debug + +check: + +debug: harvester-dbg + +doc: glite-lb-harvester.8 + +stage: compile + $(MAKE) install PREFIX=${stagedir} + +install: compile + -mkdir -p ${PREFIX}/bin ${PREFIX}/examples ${PREFIX}/share/doc/${package}-${version} ${PREFIX}/share/man/man8 + ${INSTALL} -m 755 harvester ${PREFIX}/bin/glite-lb-harvester + ${INSTALL} -m 755 harvester-dbg ${PREFIX}/examples/glite-lb-harvester-dbg + ${INSTALL} -m 755 ../examples/test.sh ${PREFIX}/examples/glite-lb-harvester-test.sh + ${INSTALL} -m 444 ../doc/README ${PREFIX}/share/doc/${package}-${version} + ${INSTALL} -m 444 glite-lb-harvester.8 ${PREFIX}/share/man/man8 + +clean: + rm -rfv *.o *.lo *.loT .libs/ manpage.links manpage.refs + rm -rvf harvester harvester-dbg glite-lb-harvester.* + rm -rvf log.xml project/ rpmbuild/ RPMS/ tgz/ + +harvester: harvester.o + ${LINK} -o $@ $+ ${LIBS} + +harvester-dbg: harvester-dbg.o + ${LINK} -o $@ $+ ${LIBS} + +harvester-dbg.o: harvester.c + ${COMPILE} -DLOG=1 -DWITH_RTM_SQL_STORAGE=1 -c $< -o $@ + +%.o: %.c + ${COMPILE} -c $< + +%.8: %.sgml + docbook2man $< + mv $(@:.8=.1) $@ + +.PHONY: default all compile debug check doc stage install clean diff --git a/org.glite.lb.harvester/configure b/org.glite.lb.harvester/configure new file mode 100755 index 0000000..565574d --- /dev/null +++ b/org.glite.lb.harvester/configure @@ -0,0 +1,712 @@ +#!/usr/bin/perl + +# WARNING: Don't edit this file unless it is the master copy in org.glite.lb +# +# For the purpose of standalone builds of lb/jobid/lbjp-common components +# it is copied on tagging + +# $Header$ + +use Getopt::Long; + +my $pwd = `pwd`; chomp $pwd; +my $prefix = $pwd.'/stage'; +my $stagedir; +my $staged; +my $module; +my $thrflavour = 'gcc64dbgpthr'; +my $nothrflavour = 'gcc64dbg'; +my $mode = 'build'; +my $help = 0; +my $listmodules; +my $version; +my $output; +my $lb_tag = ''; +my $lbjp_tag = ''; +my $jp_tag = ''; +my $sec_tag = ''; +my $jobid_tag = ''; +my $libdir = 'lib'; + +my @nodes = qw/client server logger utils client-java doc ws-test db jpprimary jpindex jpclient harvester/; +my %enable_nodes; +my %disable_nodes; + +my %extern_prefix = ( + cares => '/opt/c-ares', + classads => '/opt/classads', + cppunit => '/usr', + expat => '/usr', + globus => '/opt/globus', + gsoap => '/usr', + mysql => '/usr', + 'mysql-devel' => '', + 'mysql-server' => '', + voms => '/opt/glite', + gridsite => '/opt/glite', + lcas => '/opt/glite', + trustmanager => '/opt/glite', + ant => '/usr', + jdk => '/usr', + libtar => '/usr', + axis => '/usr', + log4c => '/usr', + postgresql => '/usr' +); + +my %jar = ( + 'commons-codec' => '/usr/share/java/commons-codec.jar', + 'commons-lang' => '/usr/share/java/commons-lang.jar', +); + + +my %glite_prefix; +my %need_externs; +my %need_externs_type; +my %need_jars; +my %extrafull; +my %extranodmod; +my %deps; +my %deps_type; +my %topbuild; + +my %lbmodules = ( + 'lb' => [ qw/client client-java common doc logger server state-machine types utils ws-interface ws-test harvester/], + 'security' => [qw/gss gsoap-plugin/], + 'lbjp-common' => [qw/db log maildir server-bones trio jp-interface/], + 'jobid' => [qw/api-c api-cpp api-java/], + 'jp' => [ qw/client doc index primary server-common ws-interface/ ], + ); + + +my @opts = ( + 'prefix=s' => \$prefix, + 'staged=s' => \$staged, + 'module=s' => \$module, + 'thrflavour=s' => \$thrflavour, + 'nothrflavour=s' => \$nothrflavour, + 'mode=s' => \$mode, + 'listmodules=s' => \$listmodules, + 'version=s' => \$version, + 'output=s' => \$output, + 'stage=s' => \$stagedir, + 'lb-tag=s' => \$lb_tag, + 'lbjp-common-tag=s' => \$lbjp_tag, + 'jp-tag=s' => \$jp_tag, + 'security-tag=s' => \$sec_tag, + 'jobid-tag=s' => \$jobid_tag, + 'help' => \$help, + 'libdir=s' => \$libdir, +); + +for (@nodes) { + $enable_nodes{$_} = 0; + $disable_nodes{$_} = 0; + + push @opts,"disable-$_",\$disable_nodes{$_}; + push @opts,"enable-$_",\$enable_nodes{$_}; +} + +push @opts,"with-$_=s",\$extern_prefix{$_} for keys %extern_prefix; +push @opts,"with-$_=s",\$jar{$_} for keys %jar; + +my @keeparg = @ARGV; + +GetOptions @opts or die "Errors parsing command line\n"; + +$extern_prefix{'mysql-devel'}=$extern_prefix{mysql} if $extern_prefix{'mysql-devel'} eq ''; +$extern_prefix{'mysql-server'}=$extern_prefix{mysql} if $extern_prefix{'mysql-server'} eq ''; + +if ($help) { usage(); exit 0; } + +if ($listmodules) { + my @m = map "org.glite.$listmodules.$_",@{$lbmodules{$listmodules}}; + print "@m\n"; + exit 0; +} + +warn "$0: --version and --output make sense only in --mode=etics\n" + if ($version || $output) && $mode ne 'etics'; + +my $en; +for (keys %enable_nodes) { $en = 1 if $enable_nodes{$_}; } + +my $dis; +for (keys %disable_nodes) { $dis = 1 if $disable_nodes{$_}; } + +die "--enable-* and --disable-* are mutually exclusive\n" + if $en && $dis; + +die "--module cannot be used with --enable-* or --disable-*\n" + if $module && ($en || $dis); + +die "$module: unknown module\n" if $module && ! grep $module,@{$lbmodules{lb}},@{$lbmodules{security}},{$lbmodules{jp}}; + +if ($dis) { + for (@nodes) { + $enable_nodes{$_} = 1 unless $disable_nodes{$_}; + } +} + +if (!$en && !$dis) { $enable_nodes{$_} = 1 for (@nodes) } ; + +for (keys %enable_nodes) { delete $enable_nodes{$_} unless $enable_nodes{$_}; } + +$stagedir = $prefix unless $stagedir; + +if ($mode eq 'build') { + print "Writing config.status\n"; + open CONF,">config.status" or die "config.status: $!\n"; + print CONF "$0 @keeparg\n"; + close CONF; +} + + +my @modules; +my %aux; + +if ($module) { +# push @modules,split(/[,.]+/,$module); + push @modules,$module; +} +else { + @modules = map(($extranodmod{$_} ? $extranodmod{$_} : 'lb.'.$_),(keys %enable_nodes)); + + my $n; + + do { + local $"="\n"; + $n = $#modules; + push @modules,(map @{$deps{$_}},@modules); + + undef %aux; @aux{@modules} = (1) x ($#modules+1); + @modules = keys %aux; + } while ($#modules > $n); +} + +@aux{@modules} = (1) x ($#modules+1); +delete $aux{$_} for (split /,/,$staged); +@modules = keys %aux; + +mode_build() if $mode eq 'build'; +mode_checkout() if $mode eq 'checkout'; +mode_etics($module) if $mode eq 'etics'; + +sub mode_build { + print "\nBuilding modules: @modules\n"; + + my @ext = map @{$need_externs{$_}},@modules; + my @myjars = map @{$need_jars{$_}},@modules; + undef %aux; @aux{@ext} = 1; + @ext = keys %aux; + undef %aux; @aux{@myjars} = (1) x ($#myjars+1); + @myjars = keys %aux; + + print "\nRequired externals:\n"; + print "\t$_: $extern_prefix{$_}\n" for @ext; + print "\t$_: $jar{$_}\n" for @myjars; + print "\nThis is a poor-man configure, it's up to you to have sources and externals there\n\n"; + + mkinc($_) for @modules; + + print "Creating Makefile\n"; + + open MAK,">Makefile" or die "Makefile: $!\n"; + + print MAK "all: @modules\n\nclean:\n"; + + for (@modules) { + my $full = full($_); + my $build = $topbuild{$_} ? '': '/build'; + print MAK "\tcd $full$build && \${MAKE} clean\n" + } + + print MAK "\ndistclean:\n"; + + for (@modules) { + my $full = full($_); + print MAK $topbuild{$_} ? + "\tcd $full$build && \${MAKE} distclean\n" : + "\trm -rf $full$build\n" + } + + print MAK "\n"; + + for (@modules) { + my %ldeps; undef %ldeps; + @ldeps{@{$deps{$_}}} = 1; + for my $x (split /,/,$staged) { delete $ldeps{$x}; } + my @dnames = $module ? () : keys %ldeps; + + my $full = full($_); + my $build = $topbuild{$_} ? '': '/build'; + + print MAK "$_: @dnames\n\tcd $full$build && \${MAKE} && \${MAKE} install\n\n"; + } + + close MAK; +} + +sub mode_checkout() { + for (@modules) { + my $module = $_; + my $tag = ""; + if ($lb_tag){ + for (@{$lbmodules{lb}}){ + if ("lb.".$_ eq $module){ + $tag = '-r '.$lb_tag; + } + } + } + if ($lbjp_tag){ + for (@{$lbmodules{'lbjp-common'}}){ + if ("lbjp-common.".$_ eq $module){ + $tag = '-r '.$lbjp_tag; + } + } + } + if ($jp_tag){ + for (@{$lbmodules{'jp'}}){ + if ("jp.".$_ eq $module){ + $tag = '-r '.$jp_tag; + } + } + } + if ($sec_tag){ + for (@{$lbmodules{security}}){ + if ("security.".$_ eq $module){ + $tag = '-r '.$sec_tag; + } + } + } + if ($jobid_tag){ + for (@{$lbmodules{jobid}}){ + if ("jobid.".$_ eq $module){ + $tag = '-r '.$jobid_tag; + } + } + } + #if (grep {"lb.".$_ eq $module} @{$lbmodules{lb}}){ + # print "found"; + #} + $_ = full($_); + print "\n*** Checking out $_\n"; + system("cvs checkout $tag $_") == 0 or die "cvs checkout $tag $_: $?\n"; + } +} + +BEGIN{ +%need_externs_aux = ( + 'lb.client' => [ qw/cppunit:B classads/ ], + 'lb.client-java' => [ qw/ant:B jdk:B axis:B trustmanager/ ], + 'lb.common' => [ qw/expat cppunit:B classads/ ], + 'lb.doc' => [], + 'lb.logger' => [ qw/cppunit:B log4c/ ], + 'lb.server' => [ qw/globus_essentials:R globus:B expat cares mysql:R mysql-server:R mysql-devel:B cppunit:B gsoap:B classads voms lcas gridsite log4c/ ], + 'lb.state-machine' => [ qw/classads/ ], + 'lb.utils' => [ qw/cppunit:B/ ], + 'lb.ws-interface' => [], + 'lb.ws-test' => [ qw/gsoap:B/ ], + 'lb.types' => [ qw// ], + 'lb.harvester' => [ qw/postgresql:R/ ], + 'lbjp-common.db' => [ qw/mysql:B mysql-devel:B postgresql:B/ ], + 'lbjp-common.log' => [ qw// ], + 'lbjp-common.maildir' => [ qw// ], + 'lbjp-common.server-bones' => [ qw// ], + 'lbjp-common.trio' => [ qw/cppunit:B/ ], + 'lbjp-common.jp-interface' => [ qw/cppunit:B/ ], + 'security.gss' => [ qw/globus_essentials:R globus:B cares cppunit:B/ ], + 'security.gsoap-plugin' => [ qw/cppunit:B globus_essentials:R globus:B cares:B gsoap:B/ ], + 'jobid.api-c' => [ qw/cppunit:B/ ], + 'jobid.api-cpp' => [ qw/cppunit:B/ ], + 'jobid.api-java' => [ qw/ant:B jdk:B/ ], + 'jp.client' => [ qw/gsoap libtar globus_essentials:R globus:B/ ], + 'jp.doc' => [], + 'jp.index' => [ qw/gsoap globus_essentials:R globus:B/ ], + 'jp.primary' => [ qw/classads gsoap libtar globus_essentials:R globus:B/ ], + 'jp.server-common' => [], + 'jp.ws-interface' => [], +); + +for my $ext (keys %need_externs_aux) { + for (@{$need_externs_aux{$ext}}) { + /([^:]*)(?::(.*))?/; + push @{$need_externs{$ext}},$1; + my $type = $2 ? $2 : 'BR'; + $need_externs_type{$ext}->{$1} = $type; + } +} + +%need_jars = ( + 'jobid.api-java' => [ qw/commons-codec/ ], + 'lb.client-java' => [ qw/commons-lang/ ], +); + +for my $jar (keys %need_jars) { + for (@{$need_jars{$jar}}) { + $need_externs_type{$jar}->{$_} = 'BR'; # XXX + } +} + +%deps_aux = ( + 'lb.client' => [ qw/ + lb.types:B lb.common + lbjp-common.trio + jobid.api-cpp jobid.api-c + security.gss + / ], + 'lb.client-java' => [ qw/ + lb.types:B + lb.ws-interface:B + jobid.api-java + / ], + 'lb.common' => [ qw/ + jobid.api-cpp jobid.api-c + lb.types:B lbjp-common.trio security.gss + / ], + 'lb.doc' => [ qw/lb.types:B/ ], + 'lb.logger' => [ qw/ + lbjp-common.trio + lbjp-common.log + jobid.api-c + lb.common + security.gss + / ], + 'lb.server' => [ qw/ + lb.ws-interface lb.types:B lb.common lb.state-machine + lbjp-common.db lbjp-common.server-bones lbjp-common.trio lbjp-common.maildir lbjp-common.log + jobid.api-c + security.gsoap-plugin security.gss + / ], + 'lb.state-machine' => [ qw/lb.types:B lb.common lbjp-common.jp-interface security.gss/ ], + 'lb.utils' => [ qw/ + lbjp-common.jp-interface + jobid.api-c + lbjp-common.trio lbjp-common.maildir + lb.client lb.state-machine + / ], + 'lb.ws-test' => [ qw/security.gsoap-plugin lb.ws-interface/ ], + 'lb.ws-interface' => [ qw/lb.types:B/ ], + 'lb.types' => [ qw// ], + 'lb.harvester' => [ qw/ + jobid.api-c lbjp-common.trio lbjp-common.db lb.common lb.client + security.gss + / ], + 'lbjp-common.db' => [ qw/lbjp-common.trio/ ], + 'lbjp-common.maildir' => [ qw// ], + 'lbjp-common.server-bones' => [ qw/lbjp-common.log/ ], + 'lbjp-common.trio' => [ qw// ], + 'security.gss' => [ qw// ], + 'security.gsoap-plugin' => [ qw/security.gss/ ], + 'jobid.api-c' => [ qw// ], + 'jobid.api-cpp' => [ qw/jobid.api-c/ ], + 'jobid.api-java' => [ qw// ], + + 'lbjp-common.jp-interface' => [ qw/lbjp-common.db jobid.api-c/ ], + + 'jp.client' => [ qw/ + jp.ws-interface + lbjp-common.jp-interface lbjp-common.maildir + jobid.api-c + security.gsoap-plugin + / ], + 'jp.doc' => [ qw// ], + 'jp.index' => [ qw/ + jp.server-common jp.ws-interface + lbjp-common.jp-interface lbjp-common.trio lbjp-common.db lbjp-common.server-bones + security.gsoap-plugin + / ], + 'jp.primary' => [ qw/ + jobid.api-c + jp.server-common jp.ws-interface + lb.state-machine + lbjp-common.jp-interface lbjp-common.trio lbjp-common.db lbjp-common.server-bones + security.gsoap-plugin + / ], + 'jp.server-common' => [ qw/ + lbjp-common.jp-interface lbjp-common.db + / ], + 'jp.ws-interface' => [ qw// ], +); + +for my $ext (keys %deps_aux) { + for (@{$deps_aux{$ext}}) { + /([^:]*)(?::(.*))?/; + push @{$deps{$ext}},$1; + my $type = $2 ? $2 : 'BR'; + $deps_type{$ext}->{$1} = $type; + } +} + + +%extrafull = ( gridsite=>'org.gridsite.core'); + +#( java => 'client-java' ); +%extranodmod = ( + db => 'lbjp-common.db', + jpprimary => 'jp.primary', + jpindex => 'jp.index', + jpclient => 'jp.client', +); + +my @t = qw/lb.client-java jobid.api-java lb.types lbjp-common.log/; +@topbuild{@t} = (1) x ($#t+1); +} + +sub full +{ + my $short = shift; + return $extrafull{$short} ? $extrafull{$short} : 'org.glite.'.$short; +} + +sub mkinc +{ + my %aux; + undef %aux; + my @m=qw/ +lb.client lb.doc lb.state-machine lb.ws-interface lb.logger lb.types lb.common lb.server lb.utils lb.ws-test lb.client-java lb.harvester +security.gss security.gsoap-plugin +jobid.api-c jobid.api-cpp jobid.api-java +lbjp-common.db lbjp-common.log lbjp-common.maildir lbjp-common.server-bones lbjp-common.trio lbjp-common.jp-interface +jp.client jp.doc jp.index jp.primary jp.server-common jp.ws-interface +/; + @aux{@m} = (1) x ($#m+1); + + my $short = shift; + my $full = full $short; + + unless ($aux{$short}) { + print "Makefile.inc not needed in $full\n"; + return; + } + + my $build = ''; + + unless ($topbuild{$_}) { + $build = '/build'; + unless (-d "$full/build") { + mkdir "$full/build" or die "mkdir $full/build: $!\n"; + } + unlink "$full/build/Makefile"; + symlink "../Makefile","$full/build/Makefile" or die "symlink ../Makefile $full/build/Makefile: $!\n"; + } + + open MKINC,">$full$build/Makefile.inc" + or die "$full$build/Makefile.inc: $!\n"; + + print "Creating $full$build/Makefile.inc\n"; + + print MKINC qq{ +PREFIX = $prefix +stagedir = $stagedir +thrflavour = $thrflavour +nothrflavour = $nothrflavour +libdir = $libdir +}; + + for (@{$need_externs{$short}}) { + print MKINC "${_}_prefix = $extern_prefix{$_}\n" + } + + for (@{$need_jars{$short}}) { + print MKINC "${_}_jar = $jar{$_}\n" + } + + my $need_gsoap = 0; + for (@{$need_externs{$short}}) { $need_gsoap = 1 if $_ eq 'gsoap'; } + + print MKINC "gsoap_default_version=".gsoap_version()."\n" if $need_gsoap; + + close MKINC; +} + +my %etics_externs; +my %etics_projects; +BEGIN{ + %etics_externs = ( + globus_essentials=>'vdt_globus_essentials', + globus=>'globus', + cares=>'c-ares', + voms=>'org.glite.security.voms-api-cpp', + gridsite=>'org.gridsite.shared', + lcas=>'org.glite.security.lcas', + trustmanager=>'org.glite.security.trustmanager', + ); + %etics_projects = ( + vdt=>[qw/globus globus_essentials/], + 'org.glite'=>[qw/voms gridsite lcas/], + ); +}; + +sub mode_etics { + $fmod = shift; + + die "$0: --module required with --etics\n" unless $fmod; + + my ($subsys,$module) = split /\./,$fmod; + + my ($major,$minor,$rev,$age); + + if ($version) { + $version =~ /([[:digit:]]+)\.([[:digit:]]+)\.([[:digit:]]+)-(.+)/; + ($major,$minor,$rev,$age) = ($1,$2,$3,$4); + } + else { + open V,"org.glite.$subsys.$module/project/version.properties" + or die "org.glite.$subsys.$module/project/version.properties: $!\n"; + + while ($_ = ) { + chomp; + ($major,$minor,$rev) = ($1,$2,$3) if /module\.version\s*=\s*([[:digit:]]+)\.([[:digit:]]+)\.([[:digit:]]+)/; + $age = $1 if /module\.age\s*=\s*([[:digit:]]+)/; + } + close V; + } + + my @copts = (); + my %ge; + @ge{@{$etics_projects{'org.glite'}}} = (1) x ($#{$etics_projects{'org.glite'}}+1); + + for (@{$need_externs{"$subsys.$module"}}) { + if ($need_externs_type{"$subsys.$module"}->{$_}=~/B/) { + my $eext = $etics_externs{$_} ? $etics_externs{$_} : $_; + push @copts,$ge{$_} ? "--with-$_=\${stageDir}" : "--with-$_=\${$eext.location}"; + } + } + + for (@{$need_jars{"$subsys.$module"}}) { + my $eext = $etics_externs{$_} ? $etics_externs{$_} : $_; + + push @copts,"--with-$_ \${$eext.location}/$_*.jar"; + } + + + my $conf = "glite-$subsys-${module}_R_${major}_${minor}_${rev}_${age}"; + my $file = $output ? $output : "$conf.ini"; + open C,">$file" or die "$file: $!\n"; + + my $buildroot = $topbuild{"$subsys.$module"} ? '' : "build.root = build\n"; + + my $confdir = $topbuild{"$subsys.$module"} ? '..' : '../..'; + + print STDERR "Writing $file\n"; + print C qq{ +[Configuration-$conf] +profile = None +moduleName = org.glite.$subsys.$module +displayName = $conf +description = org.glite.$subsys.$module +projectName = org.glite +age = $age +deploymentType = None +tag = $conf +version = $major.$minor.$rev +path = \${projectName}/\${moduleName}/\${version}/\${platformName}/\${packageName}-\${version}-\${age}.tar.gz + +[Platform-default:VcsCommand] +displayName = None +description = None +tag = cvs -d \${vcsroot} tag -R \${tag} \${moduleName} +branch = None +commit = None +checkout = cvs -d \${vcsroot} co -r \${tag} \${moduleName} + +[Platform-default:BuildCommand] +postpublish = None +packaging = None +displayName = None +description = None +doc = None +prepublish = None +publish = None +compile = make +init = None +install = make install +clean = make clean +test = make check +configure = cd $confdir && \${moduleName}/configure --thrflavour=\${globus.thr.flavor} --nothrflavour=\${globus.nothr.flavor} --prefix=\${prefix} --stage=\${stageDir} --libdir=\${libdir} --module $subsys.$module @copts +checkstyle = None + +[Platform-default:Property] +$buildroot + +[Platform-default:DynamicDependency] + +}; + for (@{$need_externs{"$subsys.$module"}},@{$need_jars{"$subsys.$module"}}) { + my $eext = $etics_externs{$_} ? $etics_externs{$_} : $_; + + my $proj = 'externals'; + for my $p (keys %etics_projects) { + for $m (@{$etics_projects{$p}}) { + $proj = $p if $m eq $_; + } + } + + my $type = $need_externs_type{"$subsys.$module"}->{$_}; + print C "$proj|$eext = $type\n"; + } + + for (@{$deps{"$subsys.$module"}}) { + my $type = $deps_type{"$subsys.$module"}->{$_}; + print C "org.glite|org.glite.$_ = $type\n"; + } + + close C; +} + +sub gsoap_version { + local $_; + my $gsoap_version; + open S,"$extern_prefix{gsoap}/bin/soapcpp2 -v 2>&1 |" or die "$extern_prefix{gsoap}/bin/soapcpp2: $!\n"; + + while ($_ = ) { + chomp; + + $gsoap_version = $1 if /The gSOAP Stub and Skeleton Compiler for C and C\+\+ ([.[:digit:][:alpha:]]+)$/; + } + close S; + return $gsoap_version; +} + + +sub usage { + my @ext = keys %extern_prefix; + my @myjars, keys %jar; + + print STDERR qq{ +usage: $0 options + +General options (defaults in []): + --prefix=PREFIX destination directory [./stage] + --staged=module,module,... what is already in PREFIX (specify without org.glite.) + --thrflavour=flavour + --nothrflavour=flavour threaded and non-treaded flavours [gcc64dbgpthr,gcc64dbg] + --listmodules=subsys list modules of a subsystem + --libdir=libdir typically [lib,lib64] postfix + +Mode of operation: + --mode={checkout|build|etics} what to do [build] + +What to build: + --module=module build this module only (mostly in-Etics operation) + --enable-NODE build this "node" (set of modules) only. Available nodes are + @{$lbmodules{lb}},@{$lbmodules{security}} + --disable-NODE don't build this node + --lb-tag=tag checkout LB modules with specific tag + --jp-tag=tag checkout JP modules with specific tag + --lbjp-common-tag=tag checkout lbjp-common modules with specific tag + --security-tag=tag checkout security modules with specific tag + --jobid-tag=tag checkout jobid modules with specific tag + +Dependencies: + --with-EXTERNAL=PATH where to look for an external. Required externals + (not all for all modules) are: + @ext + --with-JAR=JAR where to look for jars. Required jars are: + @myjars + Summary of what will be used is always printed + +}; + +} diff --git a/org.glite.lb.harvester/doc/INSTALL b/org.glite.lb.harvester/doc/INSTALL new file mode 100644 index 0000000..f5ff9c9 --- /dev/null +++ b/org.glite.lb.harvester/doc/INSTALL @@ -0,0 +1,42 @@ +Requirements +============ + +1) gLite +- client L&B libraries: + - glite-jobid-api-c + - glite-lb-common + - glite-lb-client + - glite-security-gss + - globus essential libraries (threaded flavour), + use the vesion with the external SSL, not with bundled SSL (!) + - glite-lbjp-common-db (build only) + - mysql-devel (build only) +2) postgresql-devel + + +Steps +===== + +./configure +make +make install + +Use './configure --help' for the options. + + +Manual way +========== + +configure is simple script generating Makefile.inc. You can build harvester +straight away by make defining the variables manually. For example with gLite +installed in ~/glite/stage: + +(rm Makefile.inc) +make stagedir=$HOME/glite/stage + + +Testing +======= + +Test for basic functionality covered by 'test.sh' script in sources. +See './test.sh --help'. diff --git a/org.glite.lb.harvester/doc/README b/org.glite.lb.harvester/doc/README new file mode 100644 index 0000000..f1c393d --- /dev/null +++ b/org.glite.lb.harvester/doc/README @@ -0,0 +1,81 @@ +Introduction +============ + +L&B Harvester gathers information about jobs from L&B servers using effective +L&B notification mechanism. It manages notifications and keeps them in +a persistent storage (file or database table) to reuse later on next launch. +It takes care about refreshing notifications and queries L&B servers back when +some notification on expires. + +The tool was initially written for Real Time Monitor (project at Imperial +College in London), later was extended with messaging mechanism for WLCG. + + +Requirements +============ + +- lastUpdateTime index on L&B servers +- harvester identity in super users file on L&B servers + + +Launch (with msg-publish sending messages) +========================================= + +Harvester is sending notifications via msg-publish infrastructure. List of the +L&B server to harvest is specified via -c option. + +1) with newer LB 2.0 servers: + + glite-lb-harvester -c servers.txt -C certfile -K keyfile --wlcg + +2) with older LB servers (backward compatible but greedy notifications): + + glite-lb-harvester -c servers.txt -C certfile -K keyfile --wlcg --old + +Custom configuration of messaging: + --wlcg-binary $HOME/bin/msg-publish + --wlcg-topic org.wlcg.usage.JobStatus2 + --wlcg-config $HOME/etc/msg-publish.conf.wlcg + + +Launch (Real Time Monitor and storing to the database) +====================================================== + +Harvester is using postgres database. Table 'lb20' with L&B servers to +harvest (read-only), table 'jobs' with result job states (read/write). It's +possible to specify L&B servers list by file instead of 'lb20' table, +via -c option. + + glite-lb-harvester -C certfile -K keyfile --pg rtm/@:rtm + +The connection string after '--pg' is in format: + USER/PASSWORD@HOST:DATABASE +Database schema in 'test.sql'. + + +Other recommended options +========================= + +Use 'glite-lb-harvester --help' for additional options. + +For example: + - deamonizing and using syslog: + '--daemonize --pidfile /var/run/glite-lb-harvester.pid' + - decreasing verbosity: + '-d 2' (2 for errors and warnings only) + + +Stop +==== + +In non-daemon mode CTRL-C can be used, in daemon mode using specified +pidfile: + + kill `cat /var/run/glite-lb-harvester.pid` + +pidfile will vanish after exit. + +All notifications are preserved on LB servers, and will expire later. You can +purge them now, if they won't be needed: + + glite-lb-harvester --cleanup diff --git a/org.glite.lb.harvester/doc/glite-lb-harvester.sgml b/org.glite.lb.harvester/doc/glite-lb-harvester.sgml new file mode 100644 index 0000000..5d9f75a --- /dev/null +++ b/org.glite.lb.harvester/doc/glite-lb-harvester.sgml @@ -0,0 +1,480 @@ + + + + + + glite-lb-harvester + 1 + EU EGEE Project + + + + glite-lb-harvester + daemon for processing L&B notifications + + + + + glite-lb-harvester + + + -h + --help + + + + -v + --version + + + + -d + --debug + LEVEL + + + -D + --daemon + + + + -i + --pidfile + PIDFILE + + + -s + --threads + N + + + -t + --ttl + TIME + + + -H + --history + TIME + + + -c + --config + + + + -m + --pg + USER/PWD@SERVER:DBNAME + + + -n + --notifs + FILE + + + -p + --port + PORT + + + -C + --cert + FILE + + + -K + --key + FILE + + + -o + --old + + + + -l + --cleanup + + + + -u + --no-purge + + + + -w + --wlcg + + + + --wlcg-binary + EXECUTABLE + + + --wlcg-topic + TOPIC + + + --wlcg-config + FILENAME + + + --wlcg-flush + + + + + + DESCRIPTION + +L&B Harvester gathers information about jobs from L&B servers using efficient +L&B notification mechanism. It manages notifications and keeps them in +a persistent storage (file or database table) to reuse later on next launch. +It takes care about refreshing notifications and queries L&B servers back when +some notification expires. + +The tool was initially written for Real Time Monitor (project at Imperial +College in London), later was extended by MSG publish messaging mechanism for WLCG. + + + + + Requirements + +It is required on L&B servers side: + + +lastUpdateTime index, see "Changing Index Configuration" section in L&B Admin Guide + + +L&B harvester identity (certification subject) in super users file + + + + + + + OPTIONS + + + + | + +Print short usage. + + + + + + + | + +Print harvester version identifier. + + + + + | + + Verbosity level: + + 0error only + 1warnings + 2info/progress + 3debug + 4insane + +8 (8,9,10,11,12)don't fork and no preventive restarts + + + + + + | + +Daemonize and detach from console. Error messages are directed to syslog. + + + + + | + +The file with process ID. Automatically removed on shutdown. + + + + + | + +Number of threads (slaves). Configured L&B servers are equally distributed between threads. + + + + + | + +Validity (time to live) of the notifications. Daemon regularly refreshes notification in advance as needed. + + + + + | + +Historic dive limit in seconds. <= means unlimited. + + When staring, the L&B harvester queries the L&B servers for existing jobs. It queries L&B server when notification expires too and can't be refreshed on time. This parameter is used for limit, how deep into history L&B harvester should go. + + Another usage of this parameter is for derivation of the maximal time of retries. When some L&B server is inaccessible or it is in error condition, harvester linearly increases retry time. The maximal retry time is half of this parameter. + + + + + | + + Config file name with list of L&B servers. When used together with database option (), this parameter has precedence before lb20 table. + + + + + | + +Database connection string in the USER/PWD@SERVER:DBNAME form. There are used following tables in database: + + + lb20 - the list of L&B servers is taken from this table. But when is specified option () too, the file has precedence before this table. + +There is kept a column monitored in too: if there is any inactive notification because of errors on given L&B server (one expired or it was unable to create a new one), the false value is set. After refreshing or creating the notification, the value is set back to true. + + + jobs - table for storing job states. Each record is updated for each incoming notification - when state of the job changes in L&B server. + + +Database schema can be found in source code of org.glite.lb.harvester: examples/test.sql + +Developer note: information about notifications are kept in a file. It is possible to compile a binary keeping states in the database. It is used in the test in examples source directory. + + + + + | + +File for internal usage in L&B harvester. There is kept persistent information about active notifications or errors on L&B servers. Default is /var/tmp/notifs.txt. + + + + + | + +Specifies the port for listening and requests L&B nodes to send notification messages only to this port. May be needed for networks behind NAT or behind firewalls. + + + + + | + +X509 certificate file. + + + + + | + +X509 key file. + + + + + | + +"silly" mode for L&B servers < 2.0. In this mode transfer of the notification is not optimized at all. On the other hand it will work with older L&B servers. + + + + + | + +Cleans up all active notifications and quits. + +Each notification automatically expires. But if you know, than notifications used in previous run of L&B harvester won;t be needed, it is recommended to clean up the notifications and spare the resources on L&B servers (queue with undelivered notification messages and matching rules). + + + + + | + +By default jobs are purged from local database when purged on L&B server. This option forces keeping all jobs in database, only with changed state to 'Purged'. + +For using together with (). + + + + + | + +Enables delivery to MSG publish. Messages are sent by executing a binary with proper parameters. + + + + + + +Full path to msg-publish binary executable, which is called for sending messages. Default is /usr/bin/msg-publish. + + + + + + +Topic used in MSG publish messages. Default is org.wlcg.usage.jobStatus. + + + + + + +Config file used in MSG publish. Default is /etc/msg-publish/msg-publish.conf. + + + + + + +Messages are sent to MSG publish in batches by default. This option enforce sending the messages one by one on each notification from L&B server - for each job state change. + + + + + + + + ENVIRONMENT + + + + GLITE_LB_HARVESTER_NO_REMOVE + +0 or false instructs L&B harvester to not remove temporary files with sent messages for MSG publish. By default temporary files with successfully sent messages are removed. Files with failed messages are always preserved. + +Intended for debugging purposes. + + + + + + + EXAMPLES + + + MSG publish infrastructure + +Harvester will send notifications using msg-publish infrastructure. List of the L&B servers to harvest is specified in config file specified by option: + + + + glite-lb-harvester -c servers.txt -C certfile -K keyfile --wlcg + +With newer L&B servers >= 2.0. + + + + glite-lb-harvester -c servers.txt -C certfile -K keyfile --wlcg --old + +With older L&B servers < 2.0 (backward compatible but greedy notifications). + + + + + +Custom configuration examples for MSG publish: + + + $HOME/bin/msg-publish + + org.wlcg.usage.JobStatus2 + + $HOME/etc/msg-publish.conf.wlcg + + + + + + + Real Time Monitor + +Harvester will use postgres database. Table lb20 with L&B servers to harvest (read-only), table jobs for result job states (read/write): + + + + glite-lb-harvester -C certfile -K keyfile --pg rtm/@:rtm -p 9004 + +In this case the L&B harvester will connect to database rtm on localhost as user rtm. For incoming notification it will request and listen only on port 9004. + + + + + + + Other recommended options + +Use glite-lb-harvester --help for the whole summary. + +For example: + + + + +Daemonizing and using syslog. + + + + + + +Decreasing verbosity (2 for errors and warnings only). + + + + + + + + + EXIT + +In non-daemon mode CTRL-C can be used. + +Use the pidfile in daemon mode (pidfile will vanish after exit): + +kill `cat /var/run/glite-lb-harvester.pid` + +All notifications are preserved on LB servers, and will expire later. You can +purge them at once, if they won't be needed: + +glite-lb-harvester --cleanup + + + + + EXIT STATUS + + + 0 + Success. + + + 1 + Reloading, used only internally for preventive restarts. + + + 2 + Error occurred. Messages go on console (foreground run) or into syslog (daemon run), depending on verbosity. + + + + + + AUTHOR + gLite L&B product team, CESNET. + + + diff --git a/org.glite.lb.harvester/examples/test.sh b/org.glite.lb.harvester/examples/test.sh new file mode 100755 index 0000000..4bfba32 --- /dev/null +++ b/org.glite.lb.harvester/examples/test.sh @@ -0,0 +1,876 @@ +#! /bin/sh + + +usage() { +cat <&1| \ + grep timeleft| sed 's/^.* //'` + if [ "$timeleft" = "0:00:00" -o -z "$timeleft" ]; then + echo "Proxy certificate check failed."\ + " Aborting." + exit 1 + fi + else + echo "Can't check proxy cert (grid-proxy-info not found). If you do not have valid proxy certificate, set GLITE_HOST_KEY/GLITE_HOST_KEY - otherwise tests will fail!" + fi +# fi + identity=`X509_USER_KEY=${X509_USER_KEY} X509_USER_CERT=${X509_USER_CERT} $GLOBUS_LOCATION/bin/grid-proxy-info 2>&1| \ + grep identity| sed 's/^[^/]*//'` + + if [ -z "$GLITE_LB_TEST_DB" ]; then + GLITE_LB_TEST_DB="lbserver/@localhost:lbserver20test" + need_new_lb_db=1; + fi + DB_USER=`echo $GLITE_LB_TEST_DB| sed 's!/.*$!!'` + DB_HOST=`echo $GLITE_LB_TEST_DB| sed 's!^.*@!!' | sed 's!:.*!!'` + DB_NAME=`echo $GLITE_LB_TEST_DB| sed 's!^.*:!!'` + MYSQL_ARGS="-u ${GLITE_MYSQL_ROOT_USER:-root}" + [ -z "$GLITE_MYSQL_ROOT_PASSWORD" ] || ARGS="--password=${GLITE_MYSQL_ROOT_PASSWORD} $MYSQL_ARGS" + + if [ -z "$GLITE_RTM_TEST_DB" ]; then + GLITE_RTM_TEST_DB="rtm/@localhost:rtmtest" + need_new_rtm_db=1; + fi + RTM_USER=`echo $GLITE_RTM_TEST_DB| sed 's!/.*$!!'` + RTM_HOST=`echo $GLITE_RTM_TEST_DB| sed 's!^.*@!!' | sed 's!:.*!!'` + RTM_NAME=`echo $GLITE_RTM_TEST_DB| sed 's!^.*:!!'` + PG_ARGS="-U ${GLITE_PG_ROOT_USER:-postgres}" + + #other stuff + GLITE_LB_TEST_SERVER_PORT=${GLITE_LB_TEST_SERVER_PORT:-"10000"} + GLITE_LB_TEST_PIDFILE=${GLITE_LB_TEST_PIDFILE:-"/tmp/glite-lb-test.pid"} + GLITE_RTM_TEST_PIDFILE=${GLITE_RTM_TEST_PIDFILE:-"/tmp/glite-rtm-test.pid"} + GLITE_RTM_TEST_TTL=${GLITE_RTM_TEST_TTL:-"60"} + + jobreg="$GLITE_LOCATION/examples/glite-lb-job_reg -m `hostname -f`:${GLITE_LB_TEST_SERVER_PORT} -s UserInterface" + logev="$GLITE_LOCATION/bin/glite-lb-logevent -x -S `pwd`/LB/proxy.sockstore.sock -U localhost" + purge="$GLITE_LOCATION/bin/glite-lb-purge" + [ -x "$purge" ] || purge="$GLITE_LOCATION/sbin/glite-lb-purge" + for dir in "$GLITE_LOCATION/examlpes" "`pwd`/../build" "`pwd`"; do + if [ -x "$dir/glite-lb-harvester-dbg" ]; then + rtm="$dir/glite-lb-harvester-dbg" + fi + if [ -x "$dir/harvester-dbg" ]; then + rtm="$dir/harvester-dbg" + fi + done + if [ -z "$rtm" ]; then + echo "glite-lb-harvester-dbg not found" + return 1 + fi + + if echo "$GLITE_RTM_TEST_ADDITIONAL_ARGS" | grep -- '[^-]\?\(--old\>\|-o\>\)' >/dev/null; then + n_notifs=1 + else + n_notifs=2 + fi + + rm -f log +} + + +drop_db() { +return 0 + [ -z "$lb_db_created" ] || mysqladmin -f $MYSQL_ARGS drop "$DB_NAME" + [ -z "$rtm_db_created" ] || dropdb $PG_ARGS "$RTM_NAME" +} + + +create_db() { + echo -n "mysql." + # create database when needed + if [ "x$need_new_lb_db" = "x1" ]; then + mysqladmin -f $MYSQL_ARGS drop $DB_NAME > /dev/null 2>&1 + echo -n "." + mysqladmin -f $MYSQL_ARGS create $DB_NAME && \ + echo -n "." + mysql $MYSQL_ARGS -e "GRANT ALL on $DB_NAME.* to $DB_USER@$DB_HOST" && \ + echo -n "." + mysql -u $DB_USER $DB_NAME -h $DB_HOST < $GLITE_LOCATION/etc/glite-lb-dbsetup.sql || return $? + echo -n "." + mkdir -p `pwd`/LB + cat > `pwd`/LB/glite-lb-index.conf << EOF +[ + JobIndices = { + [ type = "system"; name = "lastUpdateTime" ] + } +] +EOF + LBDB="$GLITE_LB_TEST_DB" $GLITE_LOCATION/bin/glite-lb-bkindex -r `pwd`/LB/glite-lb-index.conf || return $? + lb_db_created="1" + echo -n "." + else + cleanup_mysql || return $? + fi + echo -n "OK psql." + if [ "x$need_new_rtm_db" = "x1" ]; then + dropdb $PG_ARGS "$RTM_NAME" >/dev/null 2>&1 + echo -n "." +# createuser $PG_ARGS -A -D "$RTM_NAME" >/dev/null 2>&1 +# echo -n "." + createdb $PG_ARGS --encoding "UTF-8" --owner "$RTM_USER" "$RTM_NAME" >psql-create.log 2>&1 || return $? + rm psql-create.log + echo -n "." + rtm_db_created="1" + echo "\i test.sql" | psql -AtF ',' -U "$RTM_USER" "$RTM_NAME" >/dev/null || return $? + echo -n "." + else + cleanup_pg || return $? + fi + echo "OK" +} + + +cleanup_mysql() { + cat << EOF | mysql -u $DB_USER $DB_NAME -h $DB_HOST || return $? +DELETE FROM acls; +DELETE FROM events; +DELETE FROM events_flesh; +DELETE FROM jobs; +DELETE FROM long_fields; +DELETE FROM notif_jobs; +DELETE FROM notif_registrations; +DELETE FROM server_state; +DELETE FROM short_fields; +DELETE FROM states; +DELETE FROM status_tags; +DELETE FROM users; +DELETE FROM zombie_jobs; +DELETE FROM zombie_prefixes; +DELETE FROM zombie_suffixes; +EOF + echo -n "." +} + + +cleanup_pg() { + cat << EOF | psql -AtF ',' -U "$RTM_USER" "$RTM_NAME" >/dev/null || return $? +DELETE FROM jobs; +DELETE FROM notifs; +EOF + echo -n "." +} + + +run_daemons() { + mkdir -p LB/dump LB/purge LB/voms 2>/dev/null + + # checks + if [ -f "${GLITE_LB_TEST_PIDFILE}" ]; then + echo "L&B server already running (${GLITE_LB_TEST_PIDFILE}, `cat ${GLITE_LB_TEST_PIDFILE}`)" + quit=1 + fi + if [ -f "${GLITE_RTM_TEST_PIDFILE}" ]; then + echo "L&B harvester already running (${GLITE_RTM_TEST_PIDFILE}, `cat ${GLITE_RTM_TEST_PIDFILE}`)" + quit=1 + fi + if [ -e "`pwd`/LB/notif.sock" ]; then + if [ "`lsof -t $(pwd)/LB/notif.sock | wc -l`" != "0" ]; then + echo "Notification interlogger already running (using LB/notif.sock, `lsof -t $(pwd)/LB/notif.sock`)" + quit=1 + fi + fi + if [ -e "`pwd`/LB/proxy-il.sock" ]; then + if [ "`lsof -t $(pwd)/LB/proxy-il.sock | wc -l`" != "0" ]; then + echo "Proxy interlogger already running (using LB/proxy-il.sock, `lsof -t $(pwd)/LB/proxy-il.sock`)" + quit=1 + fi + fi + [ -z "$quit" ] || exit 1 + + # run L&B server + echo -n "L" + X509_USER_KEY=${X509_USER_KEY} X509_USER_CERT=${X509_USER_CERT} \ + $GLITE_LOCATION/bin/glite-lb-bkserverd \ + -m $GLITE_LB_TEST_DB \ + -p $GLITE_LB_TEST_SERVER_PORT -w $(($GLITE_LB_TEST_SERVER_PORT + 3))\ + -i ${GLITE_LB_TEST_PIDFILE} \ + --withproxy -o `pwd`/LB/proxy.sock\ + --proxy-il-sock `pwd`/LB/proxy-il.sock --proxy-il-fprefix `pwd`/LB/proxy-data \ + -D `pwd`/LB/dump -S `pwd`/LB/purge \ + -V `pwd`/LB/voms \ + --notif-il-sock `pwd`/LB/notif.sock --notif-il-fprefix `pwd`/LB/notif-data \ + --super-user "$identity" > `pwd`/LB/glite-lb-test-pre.log 2>&1 + if [ x"$?" != x"0" ]; then + cat `pwd`/LB/glite-lb-test-pre.log + echo FAILED + drop_db; + exit 1 + fi + echo -n "B " + + # run L&B interlogger + echo -n "L" + X509_USER_KEY=${X509_USER_KEY} X509_USER_CERT=${X509_USER_CERT} \ + $GLITE_LOCATION/bin/glite-lb-interlogd \ + --file-prefix `pwd`/LB/proxy-data --socket `pwd`/LB/proxy-il.sock > `pwd`/LB/glite-interlog-test-pre.log 2>&1 + if [ x"$?" != x"0" ]; then + cat `pwd`/LB/glite-interlog-test-pre.log + echo FAILED + kill_bkserver + drop_db; + exit 1 + fi + echo -n "I " + + # run L&B notification interlogger + echo -n "N" + X509_USER_KEY=${X509_USER_KEY} X509_USER_CERT=${X509_USER_CERT} \ + $GLITE_LOCATION/bin/glite-lb-notif-interlogd \ + --file-prefix `pwd`/LB/notif-data --socket `pwd`/LB/notif.sock > `pwd`/LB/glite-notif-test-pre.log 2>&1 + if [ x"$?" != x"0" ]; then + cat `pwd`/LB/glite-notif-test-pre.log + echo FAILED + kill_daemons + drop_db; + exit 1 + fi + echo -n "I " + + if ! start_harvester; then + kill_daemons; + drop_db; + exit 1 + fi + + # wait for pidfiles + i=0 + while [ ! -s "${GLITE_LB_TEST_PIDFILE}" -a $i -lt 20 ]; do + sleep 0.1 + i=$(($i+1)) + done + if [ ! -s "${GLITE_LB_TEST_PIDFILE}" ]; then + echo "Can't startup L&B server." + kill_daemons; + drop_db; + exit 1 + fi + + echo -n "notifs." + pg_wait 20 "SELECT refresh FROM notifs WHERE notifid IS NOT NULL" $n_notifs || return $? + refresh=`echo "$result" | head -n 1` + if [ -z "$refresh" ]; then + echo "FAIL" + return 1 + fi +} + + +start_harvester() { + # run L&B harvester server + echo -n "R" + rm -Rf RTM + mkdir RTM 2>/dev/null + echo "`hostname -f`:${GLITE_LB_TEST_SERVER_PORT}" > `pwd`/RTM/config.txt + X509_USER_KEY=${X509_USER_KEY} X509_USER_CERT=${X509_USER_CERT} \ + ${rtm} \ + -m $GLITE_RTM_TEST_DB \ + --pidfile ${GLITE_RTM_TEST_PIDFILE} \ + --ttl ${GLITE_RTM_TEST_TTL} \ + --history $((GLITE_RTM_TEST_TTL / 2)) \ + --debug 12 \ + --config `pwd`/RTM/config.txt \ + --daemonize ${GLITE_RTM_TEST_ADDITIONAL_ARGS} 2>`pwd`/RTM/glite-rtm-test-pre.log >`pwd`/RTM/notifs.log + if [ x"$?" != x"0" ]; then + cat `pwd`/RTM/glite-rtm-test-pre.log + echo FAILED + return 1 + fi + + i=0 + while [ ! -s "${GLITE_RTM_TEST_PIDFILE}" -a $i -lt 20 ]; do + sleep 0.1 + i=$(($i+1)) + done + if [ ! -s "${GLITE_RTM_TEST_PIDFILE}" ]; then + echo "Can't startup L&B harvester." + kill_daemons; + drop_db; + exit 1 + fi + + echo -n "M " +} + + +cleanup_harvester() { + echo -n "cleaning up..." + X509_USER_KEY=${X509_USER_KEY} X509_USER_CERT=${X509_USER_CERT} \ + ${rtm} \ + -m $GLITE_RTM_TEST_DB \ + --cleanup \ + --debug 12 ${GLITE_RTM_TEST_ADDITIONAL_ARGS} >`pwd`/RTM/glite-rtm-test-cleanup.log 2>&1 + if [ x"$?" != x"0" ]; then + cat `pwd`/RTM/glite-rtm-test-cleanup.log + echo FAILED + return 1 + fi + echo -n "OK " +} + + +kill_daemons() { + pid1=`cat ${GLITE_LB_TEST_PIDFILE} 2>/dev/null` + [ -f "${GLITE_RTM_TEST_PIDFILE}" ] && pid2=`cat ${GLITE_RTM_TEST_PIDFILE}` + pid3=`lsof -t $(pwd)/LB/notif.sock 2>/dev/null` + pid4=`lsof -t $(pwd)/LB/proxy-il.sock 2>/dev/null` + [ ! -z "$pid1" ] && kill $pid1 + [ ! -z "$pid2" ] && kill -2 $pid2 + [ ! -z "$pid3" ] && kill $pid3 + [ ! -z "$pid4" ] && kill $pid4 + sleep 1; + [ ! -z "$pid1" ] && kill -9 $pid1 2>/dev/null + [ ! -z "$pid2" ] && kill -9 $pid2 2>/dev/null + [ ! -z "$pid3" ] && kill -9 $pid3 2>/dev/null + [ ! -z "$pid4" ] && kill -9 $pid4 2>/dev/null + rm -f "${GLITE_LB_TEST_PIDFILE}" "${GLITE_RTM_TEST_PIDFILE}" + rm -f `pwd`/LB/*.sock +} + + +kill_bkserver() { + pid=`cat ${GLITE_LB_TEST_PIDFILE} 2>/dev/null` + if [ ! -z "$pid1" ]; then + kill $pid; + sleep 1; + kill -9 $pid + fi + rm -f "${GLITE_LB_TEST_PIDFILE}" +} + + +kill_harvester() { + pid=`cat ${GLITE_RTM_TEST_PIDFILE} 2>/dev/null` + if [ ! -z "$pid1" ]; then + kill $pid + sleep 1; + kill -9 $pid 2>/dev/null + fi + rm -f "${GLITE_RTM_TEST_PIDFILE}" +} + + +reg() { + echo -n "R" + echo $jobreg $@ >> log + $jobreg $@ > jobreg.tmp + if [ $? -ne 0 ]; then + cat jobreg.tmp + rm -f jobreg.tmp + echo " FAIL!" + return 1; + fi + script=`cat jobreg.tmp | tail -n 2` + rm -f jobreg.tmp + EDG_JOBID= + EDG_WL_SEQUENCE= + eval $script + if [ -z "$EDG_JOBID" -o -z "$EDG_WL_SEQUENCE" ]; then + echo " FAIL!" + return 1; + fi + echo -n "G " +} + + +ev() { + echo -n "E" + echo $logev -j "$EDG_JOBID" -c "$EDG_WL_SEQUENCE" "$@" >> log + $logev -j "$EDG_JOBID" -c "$EDG_WL_SEQUENCE" "$@" 2> logev-err.tmp >logev.tmp + if [ $? -ne 0 ]; then + echo " FAIL!" + return 2; + fi + EDG_WL_SEQUENCE=`cat logev.tmp` + rm logev.tmp logev-err.tmp + echo -n "V " +} + + +pg_get() { + result= + lines= + echo "$1" | psql -AtF ',' -U "$RTM_USER" "$RTM_NAME" > psql.tmp + if [ $? != 0 ]; then + return $? + fi + result="`cat psql.tmp`" + lines=`wc -l psql.tmp | sed 's/^[ ]*//' | cut -f1 -d' '` +# rm psql.tmp + return 0 +} + + +pg_wait() { + timeout=$(($1*2)) + sql="$2" + n="$3" + + i=0 + found=0 + result= + echo -n "S" + echo "`date '+%Y-%m-%d %H:%M:%S'` $sql" >> log + while [ "$found" = "0" -a $i -lt $timeout ]; do + pg_get "$sql" || return $? + echo -n "." + if [ -z "$n" ]; then + if [ "$lines" != "0" ]; then found=1; fi + else + if [ "$lines" = "$n" ]; then found=1; fi + fi + if [ "$found" = "0" ]; then sleep 0.5; fi + i=$(($i+1)) + done + echo -n "Q " + result="$result" + echo "`date '+%Y-%m-%d %H:%M:%S'` $lines lines" >> log + if [ ! -z "$result" ]; then + echo "$result" | sed -e 's/\(.*\)/\t\1/' >> log + fi + return 0 +} + + +my_get() { + result= + lines= + echo "`date '+%Y-%m-%d %H:%M:%S'` $1" >> log + echo "$1" | mysql -B -u "$DB_USER" "$DB_NAME" > mysql.tmp + if [ $? != 0 ]; then + return $? + fi + result=`cat mysql.tmp | tail -n +2` + lines=`echo "$result" | grep -v '^$' | wc -l | sed 's/^[ ]*//'` + echo "`date '+%Y-%m-%d %H:%M:%S'` $lines lines" >> log + if [ ! -z "$result" ]; then + echo "$result" | sed -e 's/\(.*\)/\t\1/' >> log + fi +# rm -f mysql.tmp + return 0 +} + + +# notif propagation +test_basic() { + ok=0 + + # submited + echo -n "submitted..." + reg || return $? + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Submitted'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + + # waiting + echo -n "waiting..." + ev -s NetworkServer -e Accepted --from='UserInterface' --from_host=`hostname -f` --from_instance="pid$$" || return $? + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Waiting'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + + # running + echo -n "running..." + ev -s LogMonitor -e Running --node="worker node" || return $? + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Running'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + + ok=1 + echo "OK" +} + + +# proper notif registration cleanup +test_rebind() { + ok=0 + + # ---- active --- + echo -n "$n_notifs notifications " + my_get "SELECT notifid FROM notif_registrations" || return $? + # STATUS and JDL + if [ "$lines" != "$n_notifs" ]; then + echo "FAIL" + return 0 + fi + echo -n "OK " + + # ---- store & stop --- + echo -n "store&quit" + pid=`cat ${GLITE_RTM_TEST_PIDFILE}` + kill $pid + i=0 + while [ -s "${GLITE_RTM_TEST_PIDFILE}" -a $i -lt 200 ]; do + echo -n "." + sleep 0.5 + i=$(($i+1)) + done + if [ -s "${GLITE_RTM_TEST_PIDFILE}" ]; then + echo "FAIL" + return 0 + fi + echo -n "OK notifs " + my_get "SELECT notifid FROM notif_registrations" || return $? + if [ "$lines" != "$n_notifs" ]; then + echo "FAIL" + return 0 + fi + echo -n "OK " + + # ---- launch & rebind --- + if ! start_harvester; then + kill_daemons; + drop_db; + exit 1 + fi + + echo -n "bind" + pg_wait 20 "SELECT notifid FROM notifs WHERE notifid IS NOT NULL" $n_notifs || return $? + if [ x"$lines" != x"$n_notifs" ]; then + echo "FAIL" + return 0 + fi + + echo -n "Done " + ev -s LogMonitor -e Done --status_code=OK --reason="Finished, yeah!" --exit_code=0 || return $? + pg_wait 20 "SELECT jobid, state FROM jobs WHERE state='Done'" + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + + ok=1 + echo "OK" +} + + +test_cleanup() { + ok=0 + + # ---- deep stop --- + echo -n "deep quit" + pid=`cat ${GLITE_RTM_TEST_PIDFILE}` + kill -2 $pid + i=0 + while [ -s "${GLITE_RTM_TEST_PIDFILE}" -a $i -lt 200 ]; do + echo -n "." + sleep 0.5 + i=$(($i+1)) + done + if [ -s "${GLITE_RTMTESTPIDFILE}" ]; then + echo "FAIL" + return 0 + fi + + echo -n "$n_notifs notifications..." + my_get "SELECT notifid FROM notif_registrations" || return 1 + if [ "$lines" != "$n_notifs" ]; then + echo "FAIL" + return 0 + fi + + cleanup_harvester || return $? + echo -n "0 notifications..." + my_get "SELECT notifid FROM notif_registrations" || return 1 + if [ "$lines" != "0" ]; then + echo "FAIL" + return 0 + fi + + echo -n "cleandb." + cleanup_pg || return $? + start_harvester || return $? + + echo -n "notifs." + pg_wait 20 "SELECT refresh FROM notifs WHERE notifid IS NOT NULL" $n_notifs || return $? + refresh=`echo "$result" | head -n 1` + if [ -z "$refresh" ]; then + echo "FAIL" + return 0 + fi + + ok=1 + echo "OK" +} + + +test_refresh() { + ok=0 + + echo -n "refresh." + pg_wait $((GLITE_RTM_TEST_TTL * 3 / 4)) "SELECT notifid FROM notifs WHERE notifid IS NOT NULL AND refresh>'$refresh'" $n_notifs || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + + ok=1 + echo "OK" +} + + +test_jdl() { + ok=0 + +# kill_daemons +# cleanup_mysql && cleanup_pg || return $? +# run_daemons || return $? + + # need to wait for notifications to avoid bootstrap + echo -n "notifs." + pg_wait 20 "SELECT refresh FROM notifs WHERE notifid IS NOT NULL" $n_notifs || return $? + refresh=`echo "$result" | head -n 1` + if [ -z "$refresh" ]; then + echo "FAIL" + return 0 + fi + echo -n "OK " + + echo -n "submitted..." + reg || return $? + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Submitted'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + echo -n "OK " + + echo -n "waiting..." + cat > jdl.txt << EOF +[ + VirtualOrganisation = "TestingVO"; +] +EOF + ev -s NetworkServer -e Accepted --from='UserInterface' --from_host=`hostname -f` --from_instance="pid$$" || return $? + ev -s NetworkServer -e EnQueued --queue "very long and chaotic queue" --job=`pwd`/jdl.txt --result START || return $? + ev -s NetworkServer -e EnQueued --queue "very long and chaotic queue" --job="`cat jdl.txt`" --result OK || return $? + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Waiting'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + echo -n "OK " + + echo -n "waiting and VO..." + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Waiting' AND vo='TestingVO'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + echo -n "OK " + + # + # test JDL via VO change + # + # never do it at home ;-) + # + + echo -n "changed JDL..." + ev -s NetworkServer -e EnQueued --queue "very long and chaotic queue" --job="[ VirtualOrganisation=\"TestingVO2\";]" --result OK || return $? + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Waiting' AND vo='TestingVO2'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + echo -n "OK " + + echo -n "changed after waiting..." + ev -s WorkloadManager -e EnQueued --queue "very long and chaotic queue" --destination LogMonitor --dest_host localhost --dest_instance pid$$ --job "(car 'testing=true)" --result=OK || return $? + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Ready' AND vo='TestingVO2'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + echo -n "ready..." + ev -s NetworkServer -e EnQueued --queue "very long and chaotic queue" --job="[ VirtualOrganisation=\"TestingVO3\";]" --result OK || return $? + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Waiting' AND vo='TestingVO3'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + + ok=1 + echo "OK" +} + + +test_purge() { + ok=0 + + echo -n "purge." + pg_get "SELECT jobid FROM jobs" || return $? + if [ -z "$lines" -o $lines -le 0 ]; then + echo "no jobs! FAIL" + return 0 + fi + echo -n "P" + jobunique=`echo "$result" | head -n 1 | tr -d '\n'` + jobid="https://`hostname -f`:${GLITE_LB_TEST_SERVER_PORT}/$jobunique" + echo $jobid > jobs + echo "${purge} -a1 -c1 -n1 -e1 -o1 -m "`hostname -f`:${GLITE_LB_TEST_SERVER_PORT}" -j jobs" >> log + echo " jobs = `cat jobs` | tr -d '\n'" >> log + X509_USER_KEY=${X509_USER_KEY} X509_USER_CERT=${X509_USER_CERT} ${purge} -l -a1 -c1 -n1 -e1 -o1 -m "`hostname -f`:${GLITE_LB_TEST_SERVER_PORT}" -j jobs 2> purge-err.tmp >purge.tmp + if [ $? -ne 0 ]; then + echo " FAIL!" + return 2; + fi + rm -f jobs + echo -n "R " + + pg_wait 10 "SELECT * FROM jobs WHERE jobid='$jobunique'" 0 || return $? + if [ x"$lines" != x"0" ]; then + echo "FAIL" + return 0 + fi + + ok=1 + echo "OK" +} + + +quit() { + if [ x"$started" = x"" ]; then + kill_daemons + drop_db + fi + exit 1 +} + + +fatal() { + echo "Fatal error, end" + quit +} + + +start() { + echo -n "Launch: " + create_db || fatal + run_daemons || fatal + echo "OK" + started=1 +} + + +stop() { + kill_daemons + drop_db +} + + +test() { + echo -n "Basic: " + test_basic || fatal + if [ $ok != 1 ]; then quit; fi + + echo -n "Rebind: " + test_rebind || fatal + if [ $ok != 1 ]; then quit; fi + + echo -n "Cleanup: " + test_cleanup || fatal + if [ $ok != 1 ]; then quit; fi + + echo -n "Refresh: " + test_refresh || fatal + if [ $ok != 1 ]; then quit; fi + + echo -n "JDL: " + test_jdl || fatal + if [ $ok != 1 ]; then quit; fi + +# echo -n "Purge: " +# test_purge || fatal +# if [ $ok != 1]; then quit; fi +} + + +case x"$1" in +xstart) + init + start + ;; + +xstop) + init + stop + ;; + +xtest) + init + test + ;; + +x) + init + start + test + stop + ;; + +*) + usage + exit 1 +esac diff --git a/org.glite.lb.harvester/examples/test.sql b/org.glite.lb.harvester/examples/test.sql new file mode 100644 index 0000000..15736f6 --- /dev/null +++ b/org.glite.lb.harvester/examples/test.sql @@ -0,0 +1,55 @@ +-- +-- Inicialization (replace pgsql by actual postgres superuser): +-- +-- 1) grant privileges, someting like this in $data/pg_hba.conf: +-- local all all trust +-- +-- 2) create user: +-- createuser -U pgsql rtm +-- +-- 3) crate database: +-- createuser -U pgsql rtm +-- +-- 4) create tables: +-- psql -U rtm rtm < test.sql +-- + +CREATE TABLE "jobs" ( + jobid VARCHAR PRIMARY KEY, + lb VARCHAR, + ce VARCHAR, + queue VARCHAR, + rb VARCHAR, + ui VARCHAR, + state VARCHAR, + state_entered TIMESTAMP, + rtm_timestamp TIMESTAMP, + active BOOLEAN, + state_changed BOOLEAN, + registered TIMESTAMP, + vo VARCHAR +); + +CREATE TABLE "lb20" ( + ip TEXT NOT NULL, + branch TEXT NOT NULL, + serv_version TEXT NOT NULL, + monitored BOOLEAN DEFAULT FALSE, + last_seen DATE, + first_seen DATE, + + PRIMARY KEY(ip) +); + +CREATE TABLE "notifs" ( + lb VARCHAR, + port INTEGER, + notifid VARCHAR, + notiftype VARCHAR, + valid TIMESTAMP, + refresh TIMESTAMP, + last_update TIMESTAMP, + errors INTEGER, + + PRIMARY KEY(lb, port, notiftype) +); diff --git a/org.glite.lb.harvester/project/ChangeLog b/org.glite.lb.harvester/project/ChangeLog new file mode 100644 index 0000000..46d50c2 --- /dev/null +++ b/org.glite.lb.harvester/project/ChangeLog @@ -0,0 +1,17 @@ +1.0.0-1 +- Initial version + +1.0.1-1 +- Changes for Real Time Monitor +- Workaround for older Globus +- Minor memleak fixes + +1.0.2-1 +- Less verbosity in reporting single notifications +- Fixed postgres dependency + +1.0.3-1 +- Extended documentation +- Fixed purging +- Fixed build issues + diff --git a/org.glite.lb.harvester/project/package.description b/org.glite.lb.harvester/project/package.description new file mode 100644 index 0000000..c969e8c --- /dev/null +++ b/org.glite.lb.harvester/project/package.description @@ -0,0 +1,3 @@ +L&B Harvester gathers information about jobs from L&B servers using efficient L&B notification mechanism. It manages notifications and keeps them in a persistent storage (file or database table) to reuse later on next launch. It takes care about refreshing notifications and queries L&B servers back when some notification expires. + +The tool was initially written for Real Time Monitor (project at Imperial College in London), later was extended by MSG publish messaging mechanism for WLCG. diff --git a/org.glite.lb.harvester/project/package.summary b/org.glite.lb.harvester/project/package.summary new file mode 100644 index 0000000..062972a --- /dev/null +++ b/org.glite.lb.harvester/project/package.summary @@ -0,0 +1 @@ +Enhanced L&B notification client. diff --git a/org.glite.lb.harvester/project/version.properties b/org.glite.lb.harvester/project/version.properties new file mode 100644 index 0000000..38e2f93 --- /dev/null +++ b/org.glite.lb.harvester/project/version.properties @@ -0,0 +1,2 @@ +module.version=1.0.3 +module.age=1 diff --git a/org.glite.lb.harvester/src/harvester.c b/org.glite.lb.harvester/src/harvester.c new file mode 100644 index 0000000..c105189 --- /dev/null +++ b/org.glite.lb.harvester/src/harvester.c @@ -0,0 +1,2691 @@ +#ident "$Header$" + +/* + * Real time monitor. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef WITH_LBU_DB +#include +#include +#endif +#include +#ifndef WITH_OLD_LB +#include +#endif +#include +#include + + +// default number of the threads/sockets +#define RTM_THREADS 5 +// requested notification life in seconds +#define RTM_NOTIF_TTL 86400 +// consider end of the notification life sooner +#define RTM_NOTIF_TTL_TO_DEAD 2 +// poll timeout in seconds +#define RTM_NOTIF_READ_TIMEOUT 5 +// recheck LB server after error in seconds +#define RTM_ERROR_REPEAT_RATE 120 +// initial read loop time (can be infinity) +#define RTM_NOTIF_LOOP_MAX_TIME 1800 +// idle "quit" poll +#define RTM_IDLE_POLL_TIME 0.5 +// purge & summary jobs poll time +#define RTM_SUMMARY_POLL_TIME 600 +// preventive suicide against memleaks and ugly things (12 h) +#define RTM_SUICIDE_TIME 43200 + +#define RTM_SUMMARY_JOBS 100 + +#define RTM_DB_TABLE_JOBS "jobs" +#define RTM_DB_TABLE_LBS "lb20" +#define DBPAR(N) ("$" (N)) +#define DBAMP "\"" + +// debug message level: insane, debug, progress, warning, error +#define INS 4 +#define DBG 3 +#define INF 2 +#define WRN 1 +#define ERR 0 +#define DEBUG_LEVEL_MASK 7 +#define DEBUG_GUARD_MASK 8 + +// internal quit codes +#define RTM_QUIT_RUN 0 +#define RTM_QUIT_CLEANUP 1 +#define RTM_QUIT_PRESERVE 2 +#define RTM_QUIT_RELOAD 3 + +// exit codes +#define RTM_EXIT_OK 0 +#define RTM_EXIT_RELOAD 1 +#define RTM_EXIT_ERROR 2 + +#define RTM_NOTIF_TYPE_STATUS 1 +#define RTM_NOTIF_TYPE_JDL 2 +#define RTM_NOTIF_TYPE_OLD 3 +#define RTM_NOTIF_TYPE_DONE 4 + +#ifdef RTM_NO_COLORS +#define RTM_TTY_RED "" +#define RTM_TTY_GREEN "" +#define RTM_TTY_RST "" +#else +#define RTM_TTY_RED "\e[1;31m" +#define RTM_TTY_GREEN "\e[1;32m" +#define RTM_TTY_RST "\e[0;39m" +#endif + +#ifndef LINE_MAX +#define LINE_MAX 1023 +#endif + +#define RTM_FILE_NOTIFS "/var/tmp/notifs.txt" +#define RTM_FILE_NOTIF_PRINTF "%s\t%s\t%s\t%s\t%s\t%d\n" +#define RTM_FILE_NOTIF_SCANF "%511[^\t]\t%511[^\t]\t%511[^\t]\t%511[^\t]\t%511[^\t]\t%511[^\t\r\n]\n" +#define RTM_FILE_NOTIF_NUM 6 + +#define WLCG_FILENAME_TEMPLATE "/tmp/wlcg_%02d_XXXXXX" +#define WLCG_COMMAND_MESSAGE "/opt/lcg/bin/msg-publish -c /opt/lcg/etc/msg-publish.conf org.wlcg.usage.jobStatus %s" +#define WLCG_BINARY "/usr/bin/msg-publish" +#define WLCG_CONFIG "/etc/msg-publish/msg-publish.conf" +#define WLCG_TOPIC "org.wlcg.usage.jobStatus" + + +#ifdef WITH_OLD_LB +#define glite_jobid_t edg_wlc_JobId +#define glite_jobid_create edg_wlc_JobIdCreate +#define glite_jobid_recreate edg_wlc_JobIdRecreate +#define glite_jobid_dup edg_wlc_JobIdDup +#define glite_jobid_free edg_wlc_JobIdFree +#define glite_jobid_parse edg_wlc_JobIdParse +#define glite_jobid_unparse edg_wlc_JobIdUnparse +#define glite_jobid_getServer edg_wlc_JobIdGetServer +#define glite_jobid_getServerParts edg_wlc_JobIdGetServerParts +#define glite_jobid_getUnique edg_wlc_JobIdGetUnique +#define edg_wll_NotifNew(CTX, CONDS, FLAGS, SOCK, LADDR, ID, VALID) edg_wll_NotifNew((CTX), (CONDS), (SOCK), (LADDR), (ID), (VALID)) +#define edg_wll_JDLField(STAT, NAME) NULL +#ifndef GLITE_JOBID_DEFAULT_PORT +#define GLITE_JOBID_DEFAULT_PORT GLITE_WMSC_JOBID_DEFAULT_PORT +#endif +#endif + +// TODO: ipv6? :-) + +typedef struct { + edg_wll_NotifId id; // notification context (after bootstrap/rebind) + char *id_str; // notification id string + int type; // for distinguish various notifications on one LB + char *server; // LB server hostname + unsigned int port; // LB server port + time_t valid; // maximal validity of the notification + time_t refresh; // when try to refresh (before expiration), + // used for retry time after error too + double last_update; // last change from the server + int active; // helper (compare LB servers and notifications, + // if to save to the persistent storage) + int error; // errors counter +} notif_t; + +typedef struct { + int id; + pthread_t thread; + notif_t *notifs; + int nservers; + time_t next_refresh; + char time_s[100]; + char *dash_filename; + int dash_fd; +#ifdef WITH_LBU_DB + glite_lbu_DBContext dbctx; + glite_lbu_Statement insertcmd, updatecmd, updatecmd_vo, updatecmd_mon, deletecmd; + int dbcaps; +#endif +} thread_t; + +typedef struct { + char *local_address; + int nthreads; + char *config_file; + char *notif_file; + int debug; + int guard; + int daemonize; + char *pidfile; + int dive; + char *dbcs; // DB connection string + char *cert, *key; + int ttl; // requested time to live (validity) of the notifications + int cleanup; // if to clean up notifications on LB servers + int wlcg; // dashboard messaging + int wlcg_no_remove; // don't remove temporary files (for debugging) + char *wlcg_binary; // path msg-publish binary + char *wlcg_config; // msg config file + char *wlcg_topic; // msg topic + int wlcg_flush; // send message for eachnotification + int silly; // old LB 1.9 mode + int no_purge; // disabled reaction on purge state + + int nservers; + notif_t *notifs; +} config_t; + +typedef struct { + notif_t *notifs; + int n, maxn; + pthread_mutex_t lock; + double last_check; + int was_summary; // flag for debugging +#ifdef WITH_LBU_DB + glite_lbu_DBContext dbctx; +#endif +} db_t; + + +static const char rcsid[] = "@(#)$Id$"; + +static int rtm2syslog[] = { + LOG_ERR, + LOG_WARNING, + LOG_INFO, + LOG_DEBUG, + LOG_DEBUG, +}; + +static const struct option opts[] = { + { "wlcg-binary", required_argument, NULL, 0}, + { "wlcg-config", required_argument, NULL, 0}, + { "wlcg-topic", required_argument, NULL, 0}, + { "wlcg-flush", no_argument, NULL, 0}, + { "help", no_argument, NULL, 'h'}, + { "version", no_argument, NULL, 'v'}, + { "threads", required_argument, NULL, 's'}, + { "debug", required_argument, NULL, 'd'}, + { "daemonize", no_argument, NULL, 'D'}, + { "pidfile", required_argument, NULL, 'i'}, + { "ttl", required_argument, NULL, 't'}, + { "history", required_argument, NULL, 'H'}, + { "config", required_argument, NULL, 'c'}, + { "notifs", required_argument, NULL, 'n'}, + { "port", required_argument, NULL, 'p'}, + { "pg", required_argument, NULL, 'm'}, + { "cert", required_argument, NULL, 'C'}, + { "key", required_argument, NULL, 'K'}, + { "wlcg", no_argument, NULL, 'w'}, + { "old", no_argument, NULL, 'o'}, + { "cleanup", no_argument, NULL, 'l'}, + { "no-purge", no_argument, NULL, 'u'}, + { NULL, no_argument, NULL, 0} +}; + +static const char *opts_line = "hvs:d:Di:t:H:c:n:p:m:C:K:wolu"; + +config_t config = { + local_address: NULL, + nthreads: RTM_THREADS, + config_file: NULL, + notif_file: NULL, + debug: DBG, + guard: 1, + dive: 10800, + dbcs: NULL, + cert: NULL, + key: NULL, + ttl: RTM_NOTIF_TTL, + cleanup: 0, + wlcg: 0, + silly: 0, + no_purge: 0, + + nservers: 0, + notifs: NULL, +}; +db_t db = { + notifs: NULL, + n: 0, + maxn: 0, + lock: PTHREAD_MUTEX_INITIALIZER, +#ifdef WITH_LBU_DB + dbctx: NULL +#endif +}; +thread_t *threads = NULL; +volatile sig_atomic_t quit = RTM_QUIT_RUN; + +static int listen_port = 0; + +#define lprintf(T, LEVEL, FMT, ARGS...) \ + if ((LEVEL) <= config.debug) lprintf_func((T), (LEVEL), (FMT), ##ARGS) +#define lprintf_ctx(T, LEVEL, CTX, FMT, ARGS...) \ + if ((LEVEL) <= config.debug) lprintf_ctx_func((T), (CTX), (LEVEL), (FMT), ##ARGS) +#define lprintf_dbctx(T, LEVEL, FMT, ARGS...) \ + if ((LEVEL) <= config.debug) lprintf_dbctx_func((T), (LEVEL), (FMT), ##ARGS) + +#ifdef WITH_OLD_LB +int edg_wll_gss_initialize() { + if (globus_module_activate(GLOBUS_GSI_GSSAPI_MODULE) != GLOBUS_SUCCESS) return EINVAL; + return 0; +} +#endif + +void lvprintf_func(thread_t *t, const char *description, int level, const char *fmt, va_list ap) { + char prefix[10]; + char *msg, *line; + + if (t) snprintf(prefix, sizeof prefix, "[%02d]", t->id); + else memcpy(prefix, "[main]", 8); + vasprintf(&msg, fmt, ap); + if (description) asprintf(&line, "%s %s, %s\n", prefix, msg, description); + else asprintf(&line, "%s %s\n", prefix, msg); + free(msg); + + if (level <= WRN && !config.daemonize) fprintf(stderr, RTM_TTY_RED); + if (config.daemonize) { + openlog(NULL, LOG_PID | LOG_CONS, LOG_DAEMON); + syslog(rtm2syslog[level], "%s", line); + closelog(); + } else { + fputs(line, stderr); + } + if (level <= WRN && !config.daemonize) fprintf(stderr, RTM_TTY_RST); + + free(line); +} + + +void lprintf_func(thread_t *t, int level, const char *fmt, ...) { + va_list ap; + + va_start(ap, fmt); + lvprintf_func(t, NULL, level, fmt, ap); + va_end(ap); +} + + +void lprintf_ctx_func(thread_t *t, edg_wll_Context ctx, int level, const char *fmt, ...) { + va_list ap; + char *errText, *errDesc, *s; + + va_start(ap, fmt); + edg_wll_Error(ctx, &errText, &errDesc); + asprintf(&s, "%s: %s", errText, errDesc); + lvprintf_func(t, s, level, fmt, ap); + free(errText); + free(errDesc); + free(s); + va_end(ap); +} + + +#ifdef WITH_LBU_DB +void lprintf_dbctx_func(thread_t *t, int level, const char *fmt, ...) { + va_list ap; + char *errText = NULL, *errDesc = NULL, *s = NULL; + glite_lbu_DBContext dbctx = t ? t->dbctx : db.dbctx; + + va_start(ap, fmt); + if (dbctx) { + glite_lbu_DBError(dbctx, &errText, &errDesc); + asprintf(&s, "%s: %s", errText, errDesc); + } + lvprintf_func(t, s, level, fmt, ap); + free(errText); + free(errDesc); + free(s); + va_end(ap); +} +#endif + +#ifndef WITH_LBU_DB +time_t glite_lbu_StrToTime(const char *str) { + struct tm tm; + + memset(&tm,0,sizeof(tm)); + putenv("TZ=UTC"); tzset(); + sscanf(str,"%4d-%02d-%02d %02d:%02d:%02d", + &tm.tm_year,&tm.tm_mon,&tm.tm_mday, + &tm.tm_hour,&tm.tm_min,&tm.tm_sec); + tm.tm_year -= 1900; + tm.tm_mon--; + + return mktime(&tm); +} + +double glite_lbu_StrToTimestamp(const char *str) { + struct tm tm; + double sec; + + memset(&tm,0,sizeof(tm)); + putenv("TZ=UTC"); tzset(); + sscanf(str,"%4d-%02d-%02d %02d:%02d:%lf", + &tm.tm_year,&tm.tm_mon,&tm.tm_mday, + &tm.tm_hour,&tm.tm_min,&sec); + tm.tm_year -= 1900; + tm.tm_mon--; + tm.tm_sec = sec; + + return (sec - tm.tm_sec) + mktime(&tm); +} +#endif + + +// hacky time->string conversion +char *time2str(thread_t *t, time_t time) { + struct tm tm; + + if ((int)time <= 0) memcpy(t->time_s, "-", sizeof("-")); + else { + localtime_r(&time, &tm); + strftime(t->time_s, sizeof(t->time_s), "%F %T", &tm); + } + return t->time_s; +} + + +double rtm_gettimeofday() { + struct timeval tv; + + gettimeofday(&tv, NULL); + return tv.tv_sec + tv.tv_usec / 1000000.0; +} + + +void rtm_time2str(time_t t, char **str) { + struct tm *tm; + + if (t) { + tm = gmtime(&t); + asprintf(str,"%4d-%02d-%02d %02d:%02d:%02d",tm->tm_year+1900,tm->tm_mon+1, + tm->tm_mday,tm->tm_hour,tm->tm_min,tm->tm_sec); + } else + *str = strdup("-"); +} + + +void rtm_timestamp2str(double t, char **str) { + time_t tsec = t; + struct tm *tm = gmtime(&tsec); + + if (t) { + t = t - tsec + tm->tm_sec; + asprintf(str,"%4d-%02d-%02d %02d:%02d:%02.09f",tm->tm_year+1900,tm->tm_mon+1, + tm->tm_mday,tm->tm_hour,tm->tm_min,t); + } else + *str = strdup("-"); +} + + +int rtm_str2time(const char *s) { + time_t t; + + if (s && memcmp(s, "-", 2) != 0) { + t = glite_lbu_StrToTime(s); + if (t == (time_t)-1) return 0; + } else + t = 0; + + return t; +} + + +int rtm_str2timestamp(const char *s) { + double t; + + if (s && memcmp(s, "-", 2) != 0) { + t = glite_lbu_StrToTimestamp(s); + if (t <= 0.5) return 0.0; + } else + t = 0.0; + + return t; +} + + +int rtm_str2notiftype(const char *str) { + if (strcasecmp(str, "STATUS") == 0) return RTM_NOTIF_TYPE_STATUS; + if (strcasecmp(str, "DONE") == 0) return RTM_NOTIF_TYPE_DONE; + if (strcasecmp(str, "JDL") == 0) return RTM_NOTIF_TYPE_JDL; + if (strcasecmp(str, "OLD") == 0) return RTM_NOTIF_TYPE_OLD; + return -1; +} + + +const char *rtm_notiftype2str(int type) { + switch (type) { + case RTM_NOTIF_TYPE_STATUS: return "STATUS"; + case RTM_NOTIF_TYPE_DONE: return "DONE"; + case RTM_NOTIF_TYPE_JDL: return "JDL"; + case RTM_NOTIF_TYPE_OLD: return "OLD"; + default: return NULL; + } +} + + +/** + * Cut the network server hostname from the full URL (got from RegJob event). + * + * Formats (only the first one should be in the wild): + * https://wms2.egee.cesnet.cz:7443/glite_wms_wmproxy_server + * wms2.egee.cesnet.cz + * 147.228.1.129 + * HTTPS://[2001:0f68:0000:0000:0000:0000:1986:69af]:80/ + * 2001:0f68::1986:69af + */ +char* rtm_ns2hostname(const char *network_server) { + char *ns, *pos; + size_t len; + + if (strncasecmp(network_server, "https://", 8) == 0) { + ns = strdup(network_server + 8); + // first backslash - path + pos = strchr(ns, '/'); + if (pos) pos[0] = '\0'; + // last colon - port separator + pos = strrchr(ns, ':'); + if (pos) pos[0] = '\0'; + // brackets - IPv6 address + len = strlen(ns); + if (len >= 2 && ns[0] == '[' && ns[len - 1] == ']') { + pos = strndup(ns + 1, len - 2); + free(ns); + ns = pos; + } + return ns; + } else + return strdup(network_server); +} + + +void wlcg_timeval2str(struct timeval *t, char **str) { + struct tm *tm; + + tm = gmtime(&t->tv_sec); + asprintf(str,"%4d-%02d-%02dT%02d:%02d:%02dZ",tm->tm_year+1900,tm->tm_mon+1, + tm->tm_mday,tm->tm_hour,tm->tm_min,tm->tm_sec); +} + + +int wlcg_store_message(thread_t *t, __attribute((unused))notif_t *notif, edg_wll_JobStat *stat) { + unsigned int port; + int status = 0; + char *jobid_str = NULL, *state_str = NULL, *vo = NULL, *lbhost = NULL, *network_host = NULL; + char *wlcg_last_update_time_str = NULL, *wlcg_state_start_time_str = NULL; + + jobid_str = stat->jobId ? glite_jobid_unparse(stat->jobId) : strdup("Unknown"); + glite_jobid_getServerParts(stat->jobId, &lbhost, &port); + state_str = edg_wll_StatToString(stat->state); + vo = edg_wll_JDLField(stat,"VirtualOrganisation") ? : strdup("Unknown"); + network_host = stat->network_server ? rtm_ns2hostname(stat->network_server) : NULL; + + if (!t->dash_filename || !t->dash_fd) { + free(t->dash_filename); + asprintf(&t->dash_filename, WLCG_FILENAME_TEMPLATE, t->id); + if ((t->dash_fd = mkstemp(t->dash_filename)) == -1) { + status = errno; + lprintf(t, ERR, "can't create temporary file '%s': %s", t->dash_filename, strerror(status)); + free(t->dash_filename); + t->dash_filename = NULL; + goto quit; + } + } + + wlcg_timeval2str(&stat->lastUpdateTime, &wlcg_last_update_time_str); + wlcg_timeval2str(&stat->stateEnterTime, &wlcg_state_start_time_str); + + dprintf(t->dash_fd, "jobId: %s\n\ +stateName: %s\n\ +ownerDN: %s\n\ +voname: %s\n\ +bkHost: %s:%d\n\ +networkHost: %s\n\ +lastUpdateTime: %s\n\ +stateStartTime: %s\n\ +exitCode: %d\n\ +DoneCode: %d\n\ +destSite: %s\n\ +condorId: %s\n\ +StatusReason: %s\n\ +EOT\n", jobid_str, state_str, stat->owner, vo, lbhost, port, network_host ? : "unknown", wlcg_last_update_time_str, wlcg_state_start_time_str, stat->exit_code, stat->done_code, stat->destination ? : "NULLByPublisher", stat->condorId ? : "0", stat->reason && stat->reason[strspn(stat->reason, " \t\n\r")] != '\0' ? stat->reason : "UNAVAILABLE By Publisher"); + + free(wlcg_last_update_time_str); + free(wlcg_state_start_time_str); +quit: + free(jobid_str); + free(lbhost); + free(network_host); + free(state_str); + free(vo); + return status; +} + + +int wlcg_send_message(thread_t *t) { + int status = 0; + char *command; + + // WLCG message + if (t->dash_fd) { // send only if anything to send + close(t->dash_fd); + asprintf(&command, "'%s' -c '%s' '%s' '%s'", config.wlcg_binary, config.wlcg_config, config.wlcg_topic, t->dash_filename); + lprintf(t, DBG, "calling %s", command); + switch (vfork()) { + case 0: + if (execlp("/bin/sh", "/bin/sh", "-c", command, NULL) == -1) { + lprintf(t, ERR, "can't exec '%s':%s", command, strerror(errno)); + } + _exit(1); + break; + case -1: + lprintf(t, ERR, "can't fork: %s", strerror(errno)); + break; + default: + break; + } + wait(&status); + free(command); + if (WIFEXITED(status)) { + status = WEXITSTATUS(status); + if (status) { + lprintf(t, WRN, "%s exited with %d", config.wlcg_binary, status); + } else { + lprintf(t, DBG, "%s exited successfully", config.wlcg_binary); + if (!config.wlcg_no_remove) remove(t->dash_filename); + } + } else { + lprintf(t, ERR, "%s not exited normally", config.wlcg_binary); + status = -1; + } + free(t->dash_filename); + t->dash_filename = NULL; + t->dash_fd = 0; + } + + return status; +} + + +void notif_free(notif_t *notif) { + edg_wll_NotifIdFree(notif->id); + free(notif->id_str); + free(notif->server); + memset(notif, 0, sizeof(notif_t)); +} + + +void notif_invalidate(notif_t *notif) { + edg_wll_NotifIdFree(notif->id); + free(notif->id_str); + notif->id = NULL; + notif->id_str = NULL; + notif->error = 0; +} + + +int notif_copy(notif_t *dest, notif_t *src) { + if (!src || !dest) return EINVAL; + memset(dest, 0, sizeof(notif_t)); + if (src->id) dest->id = edg_wll_NotifIdDup(src->id); + if (src->id_str) dest->id_str = strdup(src->id_str); + dest->type = src->type; + if (src->server) dest->server = strdup(src->server); + dest->port = src->port; + dest->valid = src->valid; + dest->refresh = src->refresh; + dest->last_update = src->last_update; + dest->active = src->active; + dest->error = src->error; + return 0; +} + + +#ifdef WITH_LBU_DB +static int db_init(thread_t *t, glite_lbu_DBContext *dbctx) { + int err, dbcaps; + + if (config.dbcs) { + if ((err = glite_lbu_InitDBContext(dbctx, GLITE_LBU_DB_BACKEND_PSQL)) != 0) { + lprintf_dbctx(t, ERR, "can't initialize DB context"); + return err; + } + while ((err = glite_lbu_DBConnect(*dbctx, config.dbcs)) != 0 && !quit) { + lprintf_dbctx(t, ERR, "can't connect to '%s'", config.dbcs); + lprintf(t, INF, "still trying..."); + sleep(5); + } + if (err == 0) { + if ((dbcaps = glite_lbu_DBQueryCaps(*dbctx)) == -1) { + lprintf_dbctx(t, ERR, "can't get database capabilities"); + dbcaps = 0; + } + lprintf(t, INF, "DB connected, cs: %s, capabilities: %d", config.dbcs, dbcaps); + if (t == NULL && (dbcaps & GLITE_LBU_DB_CAP_PREPARED) == 0) { + lprintf(NULL, WRN, "postgresql server doesn't support SQL prepared commands, recommended version >= 8.2"); + } + if (t) t->dbcaps = dbcaps; + return 0; + } else { + glite_lbu_FreeDBContext(*dbctx); + return err; + } + } else { + lprintf(t, DBG, "no DB configured (--pg option)"); + return -1; + } +} + + +static void db_free(__attribute((unused))thread_t *t, glite_lbu_DBContext dbctx) { + if (dbctx) { + glite_lbu_DBClose(dbctx); + glite_lbu_FreeDBContext(dbctx); + } +} +#endif + + +static notif_t *db_add_notif(char *notifid, int type, time_t valid, time_t refresh, double last_update, char *server, int port, int active, int errors) { + void *tmp; + notif_t *notif; + + if (db.n >= db.maxn) { + db.maxn = db.n + 20; + if ((tmp = realloc(db.notifs, db.maxn * sizeof(notif_t))) == NULL) return NULL; + db.notifs = (notif_t *)tmp; + memset(db.notifs + db.n, 0, (db.maxn - db.n) * sizeof(notif_t)); + } + notif = db.notifs + db.n; + notif->id_str = notifid; + notif->type = type; + notif->valid = valid; + notif->refresh = refresh; + notif->last_update = last_update; + notif->server = server; + notif->port = port; + notif->active = active; + notif->error = errors; + db.n++; + + return notif; +} + + +static int db_save_notifs_file(thread_t *t) { + FILE *f; + char *filename = NULL; + int retval = 1; + notif_t *notif; + int i; + char *valid_str = NULL, *refresh_str = NULL, *last_update_str = NULL, *id_str = NULL; + + asprintf(&filename, "%s-new", config.notif_file); + if ((f = fopen(filename, "wt")) == NULL) { + lprintf(t, ERR, "can't write '%s': %s", filename, strerror(errno)); + goto quit; + } + + for (i = 0; i < db.n; i++) { + notif = db.notifs + i; + if (!notif->active) { + lprintf(t, DBG, "not saving inactive notif %s (%s), server %s:%d", notif->id_str, rtm_notiftype2str(notif->type), notif->server, notif->port); + continue; + } + + if (notif->id_str) id_str = strdup(notif->id_str); + else if (notif->error) asprintf(&id_str, "%s:%d", notif->server, notif->port); + if (id_str) { + rtm_time2str(notif->valid, &valid_str); + rtm_time2str(notif->refresh, &refresh_str); + rtm_timestamp2str(notif->last_update, &last_update_str); + + fprintf(f, RTM_FILE_NOTIF_PRINTF, id_str, rtm_notiftype2str(notif->type), valid_str, refresh_str, last_update_str, notif->error); + + free(valid_str); valid_str = NULL; + free(refresh_str); refresh_str = NULL; + free(last_update_str); last_update_str = NULL; + } + free(id_str); + id_str = NULL; + } + fclose(f); + if (rename(filename, config.notif_file) != 0) { + lprintf(t, ERR, "can't move new notification file '%s' to '%s': %s", filename, config.notif_file, strerror(errno)); + goto quit; + } + retval = 0; +quit: + free(filename); + free(valid_str); + free(refresh_str); + free(last_update_str); + return 0; +} + + +#if defined(WITH_RTM_SQL_STORAGE) && defined(WITH_LBU_DB) +static int db_save_notifs_sql(thread_t *t) { + int retval = 1; + notif_t *notif; + int i; + char *sql = NULL, *valid_str = NULL, *refresh_str = NULL, *last_update_str = NULL; + const char *type_str, *amp; + + for (i = 0; i < db.n; i++) { + notif = db.notifs + i; +/* + if (!notif->active) { + lprintf(t, INS, "not saving inactive notif %s (%s:%d)", notif->id_str, notif->server, notif->port); + continue; + } +*/ + type_str = rtm_notiftype2str(notif->type); + if (notif->id_str || notif->error) { + if (notif->valid) glite_lbu_TimeToDB(db.dbctx, notif->valid, &valid_str); + else valid_str = strdup("NULL"); + if (notif->refresh) glite_lbu_TimeToDB(db.dbctx, notif->refresh, &refresh_str); + else refresh_str = strdup("NULL"); + if (notif->last_update) glite_lbu_TimestampToDB(db.dbctx, notif->last_update, &last_update_str); + else last_update_str = strdup("NULL"); + amp = notif->id_str ? "'" : " "; + trio_asprintf(&sql, "UPDATE notifs SET notifid=%s%|Ss%s, valid=%s, refresh=%s, last_update=%s, errors=%d WHERE lb='%|Ss' AND port=%d AND notiftype='%|Ss'", amp, notif->id_str ? : "NULL", amp, valid_str, refresh_str, last_update_str, notif->error, notif->server, notif->port, type_str); + switch (glite_lbu_ExecSQL(db.dbctx, sql, NULL)) { + case 0: + // not found - insert + // can be handy when using file as input of LBs + free(sql); + trio_asprintf(&sql, "INSERT INTO notifs (lb, port, notifid, notiftype, valid, refresh, last_update, errors) VALUES ('%|Ss', %d, %s%|Ss%s, '%|Ss', %s, %s, %s, %d)", notif->server, notif->port, amp, notif->id_str ? : "NULL", amp, type_str, valid_str, refresh_str, last_update_str, notif->error); + switch (glite_lbu_ExecSQL(db.dbctx, sql, NULL)) { + case -1: + lprintf_dbctx(t, ERR, "notif '%s' (%s) insert failed", notif->id_str, type_str); + goto quit; + case 0: + lprintf(t, ERR, "notif '%s' (%s) not inserted for unknown reason", type_str); + break; + default: + lprintf(t, INS, "notif '%s' (%s) inserted", notif->id_str, type_str); + break; + } + break; + case -1: + lprintf_dbctx(t, ERR, "notif '%s' (%s) update failed", notif->id_str, type_str); + goto quit; + default: + lprintf(t, INS, "notif '%s' updated", notif->id_str); + break; + } + } else { + trio_asprintf(&sql, "UPDATE notifs SET notifid=NULL, valid=NULL, refresh=NULL, last_update=NULL WHERE lb='%|Ss' AND port=%d AND notiftype='%|Ss'", notif->server, notif->port, type_str); + switch (glite_lbu_ExecSQL(db.dbctx, sql, NULL)) { + case 0: + lprintf(t, INS, "cleared %s notif for %s:%d not found, ok", type_str, notif->server, notif->port); + break; + case -1: + lprintf_dbctx(t, ERR, "clearing notif %s for %s:%d failed", type_str, notif->server, notif->port); + goto quit; + default: + lprintf(t, INS, "cleared notif %s for %s:%d", type_str, notif->server, notif->port); + break; + } + } + free(sql); sql = NULL; + free(valid_str); valid_str = NULL; + free(refresh_str); refresh_str = NULL; + free(last_update_str); last_update_str = NULL; + } + retval = 0; +quit: + free(sql); + free(valid_str); + free(refresh_str); + free(last_update_str); + return 0; +} +#endif + + +static int db_save_notifs(thread_t *t) { +#if 0 + int i; + + for (i = 0; i < db.n; i++) { + notif_t *notif = db.notifs + i; + lprintf(NULL, DBG, "save: %s (%s), server: %s:%d, active: %d", notif->id_str, rtm_notiftype2str(notif->type), notif->server, notif->port, notif->active); + } +#endif + +#if defined(WITH_LBU_DB) + int i, ret; + notif_t *notif; + + // + // Keep monitored flag when: + // 1) used and opened DB + // 2) LB servers not from config file + // + if (t && t->dbctx && !config.config_file) { + for (i = 0; i < t->nservers; i++) { + notif = t->notifs + i; + + if (notif->type == RTM_NOTIF_TYPE_OLD || notif->type == RTM_NOTIF_TYPE_JDL) { + lprintf(t, DBG, "changing monitored flag of %d. notification for %s:%d to %d", i, notif->server, notif->port, notif->error ? 0 : 1); + if ((t->dbcaps & GLITE_LBU_DB_CAP_PREPARED) == 0) { + char *sql; + + trio_asprintf(&sql, "UPDATE " DBAMP RTM_DB_TABLE_LBS DBAMP " SET monitored=%s WHERE ip='%|Ss'", notif->error ? "false" : "true", notif->server); + ret = glite_lbu_ExecSQL(t->dbctx, sql, NULL); + free(sql); + } else { + ret = glite_lbu_ExecPreparedStmt(t->updatecmd_mon, 2, + GLITE_LBU_DB_TYPE_BOOLEAN, notif->error ? 0 : 1, + GLITE_LBU_DB_TYPE_VARCHAR, notif->server + ); + } + if (ret == -1) { + lprintf_dbctx(t, ERR, "can't update monitored flag in " RTM_DB_TABLE_LBS " table"); + return 1; + } + } + } + } +#endif + +#if defined(WITH_RTM_SQL_STORAGE) && defined(WITH_LBU_DB) + if (!db.dbctx) return db_save_notifs_file(t); + else return db_save_notifs_sql(t); +#else + return db_save_notifs_file(t); +#endif +} + + +static notif_t *db_search_notif(notif_t *notifs, int n, const char *notifid) { + int i; + + for (i = 0; i < n && (!notifs[i].id_str || strcmp(notifs[i].id_str, notifid) != 0); i++); + return i == n ? NULL : notifs + i; +} + + +static notif_t *db_search_notif_by_server(notif_t *notifs, int n, const char *server, unsigned int port, int type) { + int i; + + for (i = 0; i < n; i++) { + if (strcmp(notifs[i].server, server) == 0 && notifs[i].port == port && notifs[i].type == type) break; + } + + return i == n ? NULL : notifs + i; +} + + +#ifdef WITH_LBU_DB +typedef struct { + char *lb; + char *jobid; + char *unique_str; + char *ce; + char *queue; + char *rb; + char *ui; + char *state; + double state_entered; + double rtm_timestamp; + int registered; + char * vo; +} db_job_t; + + +// +// store state into dababase +// on purged status deletes the record +// +static void db_store_change_perform_sql(thread_t *t, edg_wll_JobStatCode state, db_job_t *rec) { + char *state_entered_str = NULL, *rtm_timestamp_str = NULL, *regtime_str = NULL; + char *sql = NULL, *sql2 = NULL, *sql_part = NULL; + const char *active = "true", *state_changed = "true"; + + if (state == EDG_WLL_JOB_PURGED) { + if (!config.no_purge) { + lprintf(t, DBG, "purge %s", rec->jobid); + if ((t->dbcaps & GLITE_LBU_DB_CAP_PREPARED) == 0) { + trio_asprintf(&sql, "DELETE FROM " RTM_DB_TABLE_JOBS " WHERE jobid='%|Ss' AND lb='%|Ss'", rec->unique_str, rec->lb); + lprintf(t, INS, "delete: %s", sql); + if (glite_lbu_ExecSQL(t->dbctx, sql, NULL) == -1) { + lprintf_dbctx(t, WRN, "can't delete job %s", rec->jobid); + goto quit; + } + } else { + if (glite_lbu_ExecPreparedStmt(t->deletecmd, 2, + GLITE_LBU_DB_TYPE_VARCHAR, rec->unique_str, + GLITE_LBU_DB_TYPE_VARCHAR, rec->lb + ) == -1) { + lprintf_dbctx(t, WRN, "can't delete job %s", rec->jobid); + goto quit; + } + } + } + } else { + if ((t->dbcaps & GLITE_LBU_DB_CAP_PREPARED) == 0) { + glite_lbu_TimestampToDB(t->dbctx, rec->state_entered, &state_entered_str); + glite_lbu_TimestampToDB(t->dbctx, rec->rtm_timestamp, &rtm_timestamp_str); + glite_lbu_TimeToDB(t->dbctx, rec->registered, ®time_str); + + if (rec->vo) trio_asprintf(&sql_part, ", vo='%|Ss' ", rec->vo); + else sql_part = strdup(""); + trio_asprintf(&sql, "UPDATE " RTM_DB_TABLE_JOBS " SET ce='%|Ss', queue='%|Ss', rb='%|Ss', ui='%|Ss', state='%|Ss', state_entered=%s, rtm_timestamp=%s, active=%s, state_changed=%s, registered=%s%sWHERE jobid='%|Ss' AND lb='%|Ss'", rec->ce, rec->queue, rec->rb, rec->ui, rec->state, state_entered_str, rtm_timestamp_str, active, state_changed, regtime_str, sql_part, rec->unique_str, rec->lb); + lprintf(t, INS, "update: %s", sql); + switch (glite_lbu_ExecSQL(t->dbctx, sql, NULL)) { + case -1: + lprintf_dbctx(t, ERR, "can't get jobs"); + goto quit; + case 0: + trio_asprintf(&sql2, "INSERT INTO " RTM_DB_TABLE_JOBS " " + "(ce, queue, rb, ui, state, state_entered, rtm_timestamp, jobid, lb, active, state_changed, registered, vo) VALUES " + "('%|Ss', '%|Ss', '%|Ss', '%|Ss', '%|Ss', %s, %s, '%|Ss', '%|Ss', %s, %s, %s, '%|Ss')", rec->ce, rec->queue, rec->rb, rec->ui, rec->state, state_entered_str, rtm_timestamp_str, rec->unique_str, rec->lb, active, state_changed, regtime_str, rec->vo ? : "unknown"); + lprintf(t, INS, "insert: %s", sql2); + if (glite_lbu_ExecSQL(t->dbctx, sql2, NULL) == -1) { + lprintf_dbctx(t, ERR, "can't insert job"); + goto quit; + } + break; + default: + break; + } + } else { // prepared commands + int ret; + + if (rec->vo) { + ret = glite_lbu_ExecPreparedStmt(t->updatecmd_vo, 13, + GLITE_LBU_DB_TYPE_VARCHAR, rec->ce, + GLITE_LBU_DB_TYPE_VARCHAR, rec->queue, + GLITE_LBU_DB_TYPE_VARCHAR, rec->rb, + GLITE_LBU_DB_TYPE_VARCHAR, rec->ui, + GLITE_LBU_DB_TYPE_VARCHAR, rec->state, + GLITE_LBU_DB_TYPE_TIMESTAMP, rec->state_entered, + GLITE_LBU_DB_TYPE_TIMESTAMP, rec->rtm_timestamp, + GLITE_LBU_DB_TYPE_BOOLEAN, 1, // active + GLITE_LBU_DB_TYPE_BOOLEAN, 1, // state_changed + GLITE_LBU_DB_TYPE_TIMESTAMP, (double)rec->registered, + GLITE_LBU_DB_TYPE_VARCHAR, rec->vo, // VO + + GLITE_LBU_DB_TYPE_VARCHAR, rec->unique_str, // jobid + GLITE_LBU_DB_TYPE_VARCHAR, rec->lb // L&B server + ); + } else { + ret = glite_lbu_ExecPreparedStmt(t->updatecmd, 12, + GLITE_LBU_DB_TYPE_VARCHAR, rec->ce, + GLITE_LBU_DB_TYPE_VARCHAR, rec->queue, + GLITE_LBU_DB_TYPE_VARCHAR, rec->rb, + GLITE_LBU_DB_TYPE_VARCHAR, rec->ui, + GLITE_LBU_DB_TYPE_VARCHAR, rec->state, + GLITE_LBU_DB_TYPE_TIMESTAMP, rec->state_entered, + GLITE_LBU_DB_TYPE_TIMESTAMP, rec->rtm_timestamp, + GLITE_LBU_DB_TYPE_BOOLEAN, 1, // active + GLITE_LBU_DB_TYPE_BOOLEAN, 1, // state_changed + GLITE_LBU_DB_TYPE_TIMESTAMP, (double)rec->registered, + + GLITE_LBU_DB_TYPE_VARCHAR, rec->unique_str, // jobid + GLITE_LBU_DB_TYPE_VARCHAR, rec->lb // L&B server + ); + } + + switch (ret) { + case -1: + lprintf_dbctx(t, ERR, "can't update " RTM_DB_TABLE_JOBS " table"); + goto quit; + case 0: + if (glite_lbu_ExecPreparedStmt(t->insertcmd, 13, + GLITE_LBU_DB_TYPE_VARCHAR, rec->ce, + GLITE_LBU_DB_TYPE_VARCHAR, rec->queue, + GLITE_LBU_DB_TYPE_VARCHAR, rec->rb, + GLITE_LBU_DB_TYPE_VARCHAR, rec->ui, + GLITE_LBU_DB_TYPE_VARCHAR, rec->state, + GLITE_LBU_DB_TYPE_TIMESTAMP, rec->state_entered, + GLITE_LBU_DB_TYPE_TIMESTAMP, rec->rtm_timestamp, + GLITE_LBU_DB_TYPE_VARCHAR, rec->unique_str, // jobid + GLITE_LBU_DB_TYPE_VARCHAR, rec->lb, // L&B server + GLITE_LBU_DB_TYPE_BOOLEAN, 1, // active + GLITE_LBU_DB_TYPE_BOOLEAN, 1, // state_changed + GLITE_LBU_DB_TYPE_TIMESTAMP, (double)rec->registered, + GLITE_LBU_DB_TYPE_VARCHAR, rec->vo ? : "unknown" // VO + ) == -1) { + lprintf_dbctx(t, ERR, "can't insert to " RTM_DB_TABLE_JOBS " table"); + goto quit; + } + break; + default: + break; + } + } // prepare commands + } + +quit: + free(sql); + free(sql2); + free(sql_part); + free(state_entered_str); + free(rtm_timestamp_str); + free(regtime_str); +} +#endif + + +static int db_store_change(thread_t *t, notif_t *notif, __attribute((unused))int index, edg_wll_JobStat *stat) { + char *jobid_str = NULL, *state_str = NULL, *vo = NULL, *lbhost = NULL; + unsigned int port; + + jobid_str = stat->jobId ? glite_jobid_unparse(stat->jobId) : strdup("unknown"); + glite_jobid_getServerParts(stat->jobId, &lbhost, &port); + state_str = edg_wll_StatToString(stat->state); + vo = edg_wll_JDLField(stat, "VirtualOrganisation"); + printf(RTM_TTY_GREEN "notifid: %s (%s), jobid: %s, state: %s, vo: %s, last time: %lf" RTM_TTY_RST "\n", notif->id_str, rtm_notiftype2str(notif->type), jobid_str, state_str, vo, notif->last_update); + +#ifdef WITH_LBU_DB + if (config.dbcs && t->dbctx) { + db_job_t rec; + char *colon; + char *unique_str = NULL, *network_server = NULL; + + memset(&rec, 0, sizeof rec); + // L&B server + rec.lb = lbhost; + // jobid + uniqe + unique_str = glite_jobid_getUnique(stat->jobId); + rec.unique_str = unique_str; + rec.jobid = jobid_str; + // CE + rec.ce = stat->destination ? : "unknown"; + // queue + rec.queue = strchr(rec.ce, '/'); + if (rec.queue) *rec.queue++='\0'; + else rec.queue = "unknown"; + colon = strchr(rec.ce, ':'); + if (colon) colon[0] = '\0'; + // Virtual Organization + rec.vo = vo; + // Resource Broker + network_server = stat->network_server ? rtm_ns2hostname(stat->network_server) : strdup("unknown"); + rec.rb = network_server; + // UI + rec.ui = stat->ui_host ? : "unknown"; + // state + rec.state = state_str ? : "unknown"; + // state time + rec.state_entered = stat->stateEnterTime.tv_sec + stat->stateEnterTime.tv_usec / 1000000.0; + // notification time + rec.rtm_timestamp = rtm_gettimeofday(); + // registration time + rec.registered = stat->stateEnterTimes[1 + EDG_WLL_JOB_SUBMITTED]; + + // store! + db_store_change_perform_sql(t, stat->state, &rec); + + free(unique_str); + free(network_server); + } +#endif + + // store message + if (config.wlcg) { + if (wlcg_store_message(t, notif, stat) != 0) goto quit; + if (config.wlcg_flush) wlcg_send_message(t); + } + +quit: + free(jobid_str); + free(state_str); + free(lbhost); + free(vo); + + return 0; +} + + +static int db_summary_getjobids(__attribute((unused))db_t *db, __attribute((unused))int maxn, __attribute((unused))char **jobids, int *n) { +/* + switch (db->was_summary) { + case 0: + *n = 3; + jobids[0] = strdup("https://skurut68-2.cesnet.cz:9000/FJldtiAR2EHC12C3Zz8WjQ"); + jobids[1] = strdup("https://skurut68-2.cesnet.cz:9000/AWTCWrUCr3uUh6cuRFaENQ"); + jobids[2] = strdup("https://skurut68-1.cesnet.cz:9000/o73CG2wrNdEQ909mG0Ac1g"); + break; + case 1: + *n = 1; + jobids[0] = strdup("https://skurut68-2.cesnet.cz:9000/-46Qa2ag4gLsA_Ki-3bSLw"); + + break; + default: *n = 0; break; + } + db->was_summary = (db->was_summary + 1) % 3; + return 0; +*/ + *n = 0; + return 0; +} + + +static int db_summary_setinfo(__attribute((unused))db_t *db, edg_wll_JobStat *stat) { + char *jobidstr; + + jobidstr = stat->jobId ? glite_jobid_unparse(stat->jobId) : NULL; + printf(RTM_TTY_GREEN "summary: jobid='%s'" RTM_TTY_RST "\n", jobidstr); + free(jobidstr); + return 0; +} + + +int rtm_summary(edg_wll_Context ctx, db_t *db) { + char *jobids[RTM_SUMMARY_JOBS]; + edg_wll_QueryRec lbquery[RTM_SUMMARY_JOBS + 1], *qr; + const edg_wll_QueryRec *lbqueryext[2]; + edg_wll_JobStat *jobstates = NULL; + int err = 0, ijob = 0, njobs = 0, iquery = 0, k, server_changed = 0; + glite_jobid_t jid = NULL; + char *server = NULL, *new_server = NULL; + unsigned int port = 0, new_port = 0; + + lprintf(NULL, INS, "Summary"); + + lbqueryext[0] = lbquery; + lbqueryext[1] = NULL; + memset(lbquery, 0, sizeof(lbquery)); + + do { + if (server) { + + if ((iquery >= RTM_SUMMARY_JOBS || server_changed || !njobs) && iquery) { + if ((err = edg_wll_QueryJobsExt(ctx, lbqueryext, 0, NULL, &jobstates)) != 0) { + lprintf_ctx(NULL, ERR, ctx, "query to '%s:%u' failed: %s", server, port, strerror(err)); + // report error jobids and skip the job (do nothing) + // TODO + } + for (k = 0; k < iquery; k++) glite_jobid_free((glite_jobid_t)lbquery[k].value.j); + + if (err == 0) { + for (k = 0; jobstates[k].state != EDG_WLL_JOB_UNDEF; k++) { + if ((err = db_summary_setinfo(db, jobstates + k)) != 0) lprintf(NULL, ERR, "Can't store %d. summary info for %s:%u", k, server, port); + edg_wll_FreeStatus(jobstates + k); + } + free(jobstates); + lprintf(NULL, DBG, "query to '%s:%u' succeed", server, port); + } + + iquery = 0; + memset(lbquery, 0, sizeof(lbquery)); + if (!njobs) break; // not needed, just spare summary select + + server_changed = 0; + } else { + lprintf(NULL, DBG, "summary pushed %d. %s\n", iquery, jobids[ijob]); + qr = lbquery + iquery; + iquery++; + qr->attr = EDG_WLL_QUERY_ATTR_JOBID; + qr->op = EDG_WLL_QUERY_OP_EQUAL; + glite_jobid_parse(jobids[ijob], (glite_jobid_t *)&qr->value.j); + free(jobids[ijob]); jobids[ijob] = NULL; + ijob++; + } + + } // server + + if (ijob >= njobs) { + ijob = 0; + memset(jobids, 0, sizeof(jobids)); + njobs = 0; + if ((err = db_summary_getjobids(db, RTM_SUMMARY_JOBS, jobids, &njobs)) != 0) { + lprintf(NULL, ERR, "Can't get jobs for the summary"); + return err; + } + lprintf(NULL, DBG, "summary for %d jobs", njobs); + if (!njobs) { + if (iquery) continue; // do the last query + else break; + } + } + + if ((err = glite_jobid_parse(jobids[ijob], &jid)) != 0) { + lprintf(NULL, ERR, "Can't parse jobid '%s': %s", jobids[ijob], strerror(err)); + // report error jobid and skip the job + // TODO + glite_jobid_free(jid); jid = NULL; + free(jobids[ijob]); jobids[ijob] = NULL; + ijob++; + continue; + } + free(new_server); + glite_jobid_getServerParts(jid, &new_server, &new_port); + glite_jobid_free(jid); jid = NULL; + + // first or different LB server + if (new_server && (!server || strcmp(server, new_server) != 0 || port != new_port)) { + if (server) server_changed = 1; + + free(server); + server = new_server; + port = new_port; + + new_server = NULL; + new_port = 0; + + edg_wll_SetParam(ctx, EDG_WLL_PARAM_QUERY_SERVER, server); + edg_wll_SetParam(ctx, EDG_WLL_PARAM_QUERY_SERVER_PORT, port); + lprintf(NULL, INF, "summary LB server '%s:%u'", server, port); + } + } while (njobs || iquery); + + free(server); + free(new_server); + + return err; +} + + +/* + * Updates error counter and retry times on the notification. + * + * On errors it lineary increases delay. Minimum delay is + * RTM_ERROR_REPEAT_RATE, maximum is half of the configured + * bootstrap time. + * + * \param t thread context + * \param notif updated notification + * \param[IN] index notification order (for debug printing) + * \param is_error[IN] error state (to reset or increment error counter) + * + */ +static int rtm_update_error_state(thread_t *t, notif_t *notif, int index, int is_error) { + int old_error, max_count; + + old_error = notif->error; + if (is_error) { + if (!notif->error++ || !notif->refresh) notif->refresh = time(NULL); + max_count = config.dive / RTM_ERROR_REPEAT_RATE / 2; + if (max_count <= 0) max_count = 1; + notif->refresh += (notif->error <= max_count ? notif->error : max_count) * RTM_ERROR_REPEAT_RATE; + lprintf(t, DBG, "planned to retry at %s", time2str(t, notif->refresh)); + } else { + notif->error = 0; + } + if (old_error != notif->error) { + lprintf(t, DBG, "error count of %d. server %s:%d changed from %d to %d", index, notif->server, notif->port, old_error, notif->error); + } + + return 0; +} + + +/** + * Updates notifications in persistent storage. Used to send WLCG messages too. + * + * \param t thread context + * \param[IN] new_notif updating notification, NULL = no change in shared memory + * \param[IN] store 0=light (just shared memory), 1=save (flush, really store) + * \retval 0 if OK + */ +int rtm_update_notif(thread_t *t, notif_t *new_notif, int store) { + notif_t *notif; + int retval = 1; + + pthread_mutex_lock(&db.lock); + + if (new_notif) { + if ((notif = db_search_notif_by_server(db.notifs, db.n, new_notif->server, new_notif->port, new_notif->type)) == NULL) { + if (db_add_notif(strdup(new_notif->id_str), new_notif->type, new_notif->valid, new_notif->refresh, new_notif->last_update, strdup(new_notif->server), new_notif->port, 1, 0) == NULL) { + lprintf(t, ERR, "can't realloc"); + goto quit; + } + } else { + notif_free(notif); + notif_copy(notif, new_notif); + } + } + + wlcg_send_message(t); + + if (store) { + if (db_save_notifs(t) != 0) goto quit; + } + retval = 0; + +quit: + pthread_mutex_unlock(&db.lock); + return retval; +} + + +int rtm_drop_notif(thread_t *t, char *notifid, int store) { + notif_t *notif; + int retval = 1; + + pthread_mutex_lock(&db.lock); + if ((notif = db_search_notif(db.notifs, db.n, notifid)) != NULL) { + notif_invalidate(notif); + if (store) + if (db_save_notifs(t) != 0) goto quit; + } + retval = 0; +quit: + pthread_mutex_unlock(&db.lock); + return retval; +} + + +int load_notifs_file() { + FILE *f; + char *results[5]; + notif_t *new_notif; + int err; + char *notifidstr; + time_t valid, refresh; + double last_update; + edg_wll_NotifId id; + int type, i, errcnt, port; + int retval = 1; + + if ((f = fopen(config.notif_file, "rt")) == NULL) { + lprintf(NULL, WRN, "WARNING: can't open notification file '%s'", config.notif_file); + return 0; + } + + results[0] = malloc(RTM_FILE_NOTIF_NUM * 512); + for (i = 1; i < RTM_FILE_NOTIF_NUM; i++) { + results[i] = results[0] + i * 512; + } + while ((err = fscanf(f, RTM_FILE_NOTIF_SCANF, results[0], results[1], results[2], results[3], results[4], results[5])) == RTM_FILE_NOTIF_NUM) { + notifidstr = results[0]; + if ((type = rtm_str2notiftype(results[1])) == -1) { + lprintf(NULL, ERR, "unknown notification type '%s' in '%s'", results[1], notifidstr); + continue; + } + + valid = rtm_str2time(results[2]); + refresh = rtm_str2time(results[3]); + last_update = rtm_str2timestamp(results[4]); + + errcnt = 0; + if (results[5] && strcasecmp(results[5], "-") != 0) { + errcnt = atoi(results[5]); + } + + if (errcnt) { + if (sscanf(notifidstr, "%511[^:]:%d", results[1], &port) != 2) { + lprintf(NULL, WRN, "can't parse server specification '%s'", notifidstr); + continue; + } + if ((new_notif = db_add_notif(NULL, type, valid, refresh, last_update, strdup(results[1]), port, 0, errcnt)) == NULL) { + lprintf(NULL, ERR, "can't alloc"); + goto quit; + } + } else { + if (edg_wll_NotifIdParse(notifidstr, &id) != 0) { + lprintf(NULL, WRN, "can't parse notification ID '%s'", notifidstr); + continue; + } + if ((new_notif = db_add_notif(strdup(notifidstr), type, valid, refresh, last_update, NULL, 0, 0, errcnt)) == NULL) { + lprintf(NULL, ERR, "can't alloc"); + goto quit; + } + edg_wll_NotifIdGetServerParts(id, &new_notif->server, &new_notif->port); + edg_wll_NotifIdFree(id); + } + } + if (err == EOF) retval = 0; + else lprintf(NULL, ERR, "can't parse notification file '%s'", config.notif_file); +quit: + fclose(f); + free(results[0]); + return retval; +} + + +#if defined(WITH_RTM_SQL_STORAGE) && defined(WITH_LBU_DB) +int load_notifs_sql() { + notif_t *new_notif; + int err; + char *notifidstr; + time_t valid, refresh; + double last_update; + edg_wll_NotifId id; + int type, i, errcnt; + int retval = 1; + glite_lbu_Statement stmt = NULL; + char *results[8]; + + if (glite_lbu_ExecSQL(db.dbctx, "SELECT notifid, notiftype, valid, refresh, last_update, errors, lb, port FROM notifs", &stmt) == -1) { + lprintf_dbctx(NULL, ERR, "fetching notification failed"); + goto quit; + } + while ((err = glite_lbu_FetchRow(stmt, 8, NULL, results)) > 0) { + if (results[0] && results[0][0]) notifidstr = strdup(results[0]); + else notifidstr = NULL; + free(results[0]); + results[0] = NULL; + + if ((type = rtm_str2notiftype(results[1])) == -1) { + lprintf(NULL, ERR, "unknown notification type '%s' in '%s'", results[1], notifidstr); + for (i = 0; i < 8; i++) free(results[i]); + free(notifidstr); + continue; + } + free(results[1]); + + valid = 0; + if (results[2] && results[2][0]) { + valid = glite_lbu_DBToTime(db.dbctx, results[2]); + } + free(results[2]); + + refresh = 0; + if (results[3] && results[3][0]) { + refresh = glite_lbu_DBToTime(db.dbctx, results[3]); + } + free(results[3]); + + last_update = 0; + if (results[4] && results[4][0]) { + last_update = glite_lbu_DBToTimestamp(db.dbctx, results[4]); + } + free(results[4]); + + errcnt = 0; + if (results[5] && results[5][0]) errcnt = atoi(results[5]); + free(results[5]); + + if ((new_notif = db_add_notif(notifidstr, type, valid, refresh, last_update, (results[6] && !notifidstr) ? strdup(results[6]) : NULL, atoi(results[7]), 0, errcnt)) == NULL) { + free(notifidstr); + free(results[6]); + free(results[7]); + lprintf(NULL, ERR, "can't alloc"); + goto quit; + } + free(results[6]); + free(results[7]); + if (notifidstr) { + if (edg_wll_NotifIdParse(notifidstr, &id) != 0) { + lprintf(NULL, WRN, "can't parse notification IDs '%s'", notifidstr); + notif_free(new_notif); + db.n--; + continue; + } + edg_wll_NotifIdGetServerParts(id, &new_notif->server, &new_notif->port); + edg_wll_NotifIdFree(id); + } + } + if (err == 0) retval = 0; + else lprintf_dbctx(NULL, ERR, "fetching failed"); +quit: + if (stmt) glite_lbu_FreeStmt(&stmt); + return retval; +} +#endif + + +int load_notifs() { + int i, ret; + + pthread_mutex_lock(&db.lock); + +#if defined(WITH_RTM_SQL_STORAGE) && defined(WITH_LBU_DB) + if (!db.dbctx) ret = load_notifs_file(); + else ret = load_notifs_sql(); +#else + ret = load_notifs_file(); +#endif + // try to reconnect on bad notifications immediately + for (i = 0; i < db.n; i++) + if (db.notifs[i].error) db.notifs[i].refresh = 0; + + pthread_mutex_unlock(&db.lock); + + return ret; +} + + +void db_free_notifs() { + int i; + + for (i = 0; i < db.n; i++) notif_free(db.notifs + i); + free(db.notifs); + db.notifs = NULL; + db.n = db.maxn = 0; +} + + +void *notify_thread(void *thread_data) { + struct sockaddr_in addr; + int i, j, err; + time_t now, bootstrap; + edg_wll_NotifId notifid; + struct timeval to; + edg_wll_JobStat jobstat, *jobstates; + notif_t *notif, *notif_jdl; + edg_wll_QueryRec *conditions[3] = { NULL, NULL, NULL }, condition[2], condition2[2]; + int sock = -1, updated = 0, error = 0, received = 0; + thread_t *t = (thread_t *)thread_data; + edg_wll_Context ctx = NULL; + int flags = 0; + + const int one = 1; + + lprintf(t, DBG, "thread started"); + + if (!t->nservers) goto exit; + + // LB + if (edg_wll_InitContext(&ctx) != 0) { + lprintf(t, ERR, "can't init LB context: %s", strerror(errno)); + goto exit; + } + if (config.cert) edg_wll_SetParam(ctx, EDG_WLL_PARAM_X509_CERT, config.cert); + if (config.key) edg_wll_SetParam(ctx, EDG_WLL_PARAM_X509_KEY, config.key); + + // socket + if ((sock = socket(PF_INET, SOCK_STREAM, 0)) < 0) { + lprintf(t, ERR, "can't create socket: %s", strerror(errno)); + goto exit; + } + lprintf(t, DBG, "socket created: %d", sock); + + setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); + + memset(&addr, 0, sizeof addr); + addr.sin_family = AF_INET; + if (listen_port) addr.sin_port = htons(listen_port + t->id); + addr.sin_addr.s_addr = INADDR_ANY; + if (bind(sock, (const struct sockaddr*)&addr, sizeof addr) != 0) { + lprintf(t, ERR, "can't bind socket: %s, port = %d", strerror(errno), listen_port ? listen_port + t->id : -1); + goto exit; + } + if (listen(sock, 10) != 0) { + lprintf(t, ERR, "can't listen on socket: %s", strerror(errno)); + goto exit; + } + +#ifdef WITH_LBU_DB + if (db_init(t, &t->dbctx) == 0) + if ((t->dbcaps & GLITE_LBU_DB_CAP_PREPARED) != 0) { + if (glite_lbu_PrepareStmt(t->dbctx, "INSERT INTO " DBAMP RTM_DB_TABLE_JOBS DBAMP " " + "(ce, queue, rb, ui, state, state_entered, rtm_timestamp, jobid, lb, active, state_changed, registered, vo)" + " VALUES " + "($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)", + &t->insertcmd) != 0 || glite_lbu_PrepareStmt(t->dbctx, "UPDATE " DBAMP RTM_DB_TABLE_JOBS DBAMP " " + "SET ce=$1, queue=$2, rb=$3, ui=$4, state=$5, state_entered=$6, rtm_timestamp=$7, active=$8, state_changed=$9, registered=$10 WHERE jobid=$11 AND lb=$12", + &t->updatecmd) != 0 || glite_lbu_PrepareStmt(t->dbctx, "UPDATE " DBAMP RTM_DB_TABLE_JOBS DBAMP " " + "SET ce=$1, queue=$2, rb=$3, ui=$4, state=$5, state_entered=$6, rtm_timestamp=$7, active=$8, state_changed=$9, registered=$10, vo=$11 WHERE jobid=$12 AND lb=$13", + &t->updatecmd_vo) != 0 || glite_lbu_PrepareStmt(t->dbctx, "UPDATE " DBAMP RTM_DB_TABLE_LBS DBAMP " " + "SET monitored=$1 WHERE ip=$2", + &t->updatecmd_mon) != 0 || glite_lbu_PrepareStmt(t->dbctx, "DELETE FROM " DBAMP RTM_DB_TABLE_JOBS DBAMP " WHERE jobid=$1 AND lb=$2", + &t->deletecmd) != 0) { + lprintf_dbctx(t, ERR, "can't create prepare commands"); + lprintf(t, DBG, "insertcmd=%p, updatecmd=%p, updatecmd_vo=%p, updatecmd_mon=%p, deletecmd=%p", t->insertcmd, t->updatecmd, t->updatecmd_vo, t->updatecmd_mon, t->deletecmd); + quit = RTM_QUIT_PRESERVE; + } + } +#endif + + // + // notifications loop: + // - refresh/create with bootstrap + // - receive & store changes + // + while (!quit) { + now = time(NULL); + t->next_refresh = now + RTM_NOTIF_LOOP_MAX_TIME; + for (i = 0; i < t->nservers; i++) { + notif = t->notifs + i; + if (!notif->active) { + lprintf(t, INS, "inactive %d. notification '%s' (%s)", i, notif->id_str, rtm_notiftype2str(notif->type)); + continue; + } + // skip invalid LBs if not planned yet + if (notif->error) { + if (notif->refresh > now) { + lprintf(t, INS, "not planned to retry previously failed %d. notification '%s' (%s), plan %s", i, notif->id_str, rtm_notiftype2str(notif->type), time2str(t, notif->refresh)); + if (t->next_refresh > notif->refresh) t->next_refresh = notif->refresh; + continue; + } + lprintf(t, DBG, "retry previously failed %d. notification '%s' (%s)", i, notif->id_str, rtm_notiftype2str(notif->type)); + } + edg_wll_SetParam(ctx, EDG_WLL_PARAM_NOTIF_SERVER, notif->server); + edg_wll_SetParam(ctx, EDG_WLL_PARAM_NOTIF_SERVER_PORT, notif->port); + now = time(NULL); + if (!notif->valid || notif->valid - RTM_NOTIF_TTL_TO_DEAD <= now || !notif->id_str) { + // new notification + lprintf(t, DBG, "host %s:%d, valid %s, notifstr '%s', notifid %p", notif->server, notif->port, time2str(t, notif->valid), notif->id_str, notif->id); + + // crazy inter-notif interactions + switch (notif->type) { + case RTM_NOTIF_TYPE_STATUS: + // STATUS must wait for existing JDL notification + notif_jdl = db_search_notif_by_server(t->notifs, t->nservers, notif->server, notif->port, RTM_NOTIF_TYPE_JDL); + if (!notif_jdl || !notif_jdl->valid || notif_jdl->valid - RTM_NOTIF_TTL_TO_DEAD <= now || !notif_jdl->id_str) { + lprintf(t, DBG, "not created %d. notification for %s:%d (%s), waiting for %d. (JDL)", i, notif->server, notif->port, rtm_notiftype2str(notif->type), i + RTM_NOTIF_TYPE_JDL - RTM_NOTIF_TYPE_STATUS); + // next retry of STATUS stright before the JDL + if (notif_jdl) { + notif->refresh = notif_jdl->refresh; + if (t->next_refresh > notif->refresh) t->next_refresh = notif->refresh; + } + continue; + } + break; + default: + break; + } + bootstrap = notif->valid > RTM_NOTIF_TTL_TO_DEAD ? notif->valid - RTM_NOTIF_TTL_TO_DEAD : 0; + if (config.dive > 0 && now - bootstrap > config.dive) { + bootstrap = now - config.dive; + lprintf(t, INS, "dive from %s:%d cut to %s (max. dive %d)", notif->server, notif->port, time2str(t, bootstrap), config.dive); + } + // explicitly drop old (failed) notification, if any + if (notif->id_str) { + if (notif->id) { + if (edg_wll_NotifDrop(ctx, notif->id)) lprintf_ctx(t, WRN, ctx, "dropping %d. notification '%s' (%s) failed", i, notif->id_str, rtm_notiftype2str(notif->type)); + } + // remove from the persistent storage now, + // invalidate && update + rtm_drop_notif(t, notif->id_str, 1); + // free the notification in the current thread + notif_invalidate(notif); + now = time(NULL); + } + // create the new notification + notif->valid = now + config.ttl; + + memset(conditions, 0, sizeof(conditions)); + memset(condition, 0, sizeof(condition)); + memset(condition2, 0, sizeof(condition2)); + flags = 0; + switch(notif->type) { +#ifndef WITH_OLD_LB + case RTM_NOTIF_TYPE_STATUS: + conditions[0] = condition; + condition[0].attr = EDG_WLL_QUERY_ATTR_STATUS; + condition[0].op = EDG_WLL_QUERY_OP_CHANGED; + break; + case RTM_NOTIF_TYPE_JDL: + conditions[0] = condition; + conditions[1] = condition2; + condition[0].attr = EDG_WLL_QUERY_ATTR_STATUS; + condition[0].op = EDG_WLL_QUERY_OP_EQUAL; + condition[0].value.i = EDG_WLL_JOB_WAITING; + condition2[0].attr = EDG_WLL_QUERY_ATTR_JDL_ATTR; + condition2[0].op = EDG_WLL_QUERY_OP_CHANGED; + flags = EDG_WLL_STAT_CLASSADS; + break; +#endif + case RTM_NOTIF_TYPE_OLD: + flags = EDG_WLL_STAT_CLASSADS; + break; + case RTM_NOTIF_TYPE_DONE: + conditions[0] = condition; + condition[0].attr = EDG_WLL_QUERY_ATTR_STATUS; + condition[0].op = EDG_WLL_QUERY_OP_EQUAL; + condition[0].value.i = EDG_WLL_JOB_DONE; + flags = EDG_WLL_STAT_CHILDREN; + break; + default: + assert(notif->type != notif->type); // unknown type + break; + } + if (edg_wll_NotifNew(ctx, (edg_wll_QueryRec const * const *) conditions, flags, sock, config.local_address, ¬if->id, ¬if->valid)) { + memset(condition,0,sizeof condition); + lprintf_ctx(t, ERR, ctx, "can't create notification on %s:%d", notif->server, notif->port); + notif->valid = 0; + notif->id = NULL; + rtm_update_error_state(t, notif, i, 1); + error = 1; + goto cont; + } + notif->id_str = edg_wll_NotifIdUnparse(notif->id); + lprintf(t, INF, "created %d. notification '%s' (%s), valid: %s", i, notif->id_str, rtm_notiftype2str(notif->type), time2str(t, notif->valid)); + + // bootstrap + memset(condition, 0, sizeof(condition)); + flags = 0; + switch (notif->type) { + case RTM_NOTIF_TYPE_STATUS: + condition[0].attr = EDG_WLL_QUERY_ATTR_LASTUPDATETIME; + condition[0].op = EDG_WLL_QUERY_OP_WITHIN; + condition[0].value.t.tv_sec = bootstrap; + condition[0].value2.t.tv_sec = now; + flags = EDG_WLL_STAT_CLASSADS; + break; + case RTM_NOTIF_TYPE_OLD: + break; + case RTM_NOTIF_TYPE_JDL: + break; + case RTM_NOTIF_TYPE_DONE: + break; + default: + assert(notif->type != notif->type); // unknown type + break; + } + + if (condition[0].attr) { + + lprintf(t, INF, "bootstrap %s:%d (%d), time %s..%d(now)", notif->server, notif->port, i, time2str(t, bootstrap), now); + edg_wll_SetParam(ctx, EDG_WLL_PARAM_QUERY_SERVER, notif->server); + edg_wll_SetParam(ctx, EDG_WLL_PARAM_QUERY_SERVER_PORT, notif->port); + if ((err = edg_wll_QueryJobs(ctx, condition, flags, NULL, &jobstates)) != 0 && err != ENOENT) { + lprintf_ctx(t, ERR, ctx, "can't bootstrap jobs on %s:%d, time %s..%d(now)", notif->server, notif->port, time2str(t, bootstrap), now); + // + // destroy the notification after failed bootstrap + // + // This error means there is something nasty on the remote LB server. + // It could lost some messages between recreating notification, + // so destroy this notification now. + // + if (edg_wll_NotifDrop(ctx, notif->id)) { + lprintf_ctx(t, WRN, ctx, "dropping %d. notification '%s' (%s) after failed bootstrap failed", i, notif->id_str, rtm_notiftype2str(notif->type)); + } else { + lprintf(t, INF, "dropped %d. notification '%s' (%s) after failed bootstrap", i, notif->id_str, rtm_notiftype2str(notif->type)); + } + // free the notification instance in the current thread + // (not propagated to the persistent storage yet) + edg_wll_NotifIdFree(notif->id); + notif->id = NULL; + free(notif->id_str); + notif->id_str = NULL; + notif->valid = 0; + rtm_update_error_state(t, notif, i, 1); + error = 1; + if (t->next_refresh > notif->refresh) t->next_refresh = notif->refresh; + goto cont; + } else { + for (j = 0; jobstates[j].state != EDG_WLL_JOB_UNDEF; j++) { + notif->last_update = jobstates[j].lastUpdateTime.tv_sec + jobstates[j].lastUpdateTime.tv_usec / 1000000.0; + db_store_change(t, notif, i, jobstates + j); + edg_wll_FreeStatus(jobstates + j); + } + free(jobstates); + lprintf(t, INF, "bootstrap %s:%d (%d), found %d jobs", notif->server, notif->port, i, j); + rtm_update_error_state(t, notif, i, 0); + updated = 1; + } + + } else { + rtm_update_error_state(t, notif, i, 0); + updated = 1; + } + } else if (!notif->id) { + // rebind existing still valid notification + if (edg_wll_NotifIdParse(notif->id_str, ¬if->id)) { + lprintf_ctx(t, WRN, ctx, "can't parse %d. notification '%s' (%s)", i, notif->id_str, rtm_notiftype2str(notif->type)); + notif->valid = 0; + notif->id = NULL; + i--; + continue; + } + notif->valid = now + config.ttl; + if (edg_wll_NotifBind(ctx, notif->id, sock, config.local_address, ¬if->valid)) { + lprintf_ctx(t, WRN, ctx, "can't rebind %d. notification '%s' (%s)", i, notif->id_str, rtm_notiftype2str(notif->type)); + notif->valid = 0; + edg_wll_NotifIdFree(notif->id); + notif->id = NULL; + i--; + continue; + } + lprintf(t, INF, "bound %d. notification '%s' (%s), valid: %s", i, notif->id_str, rtm_notiftype2str(notif->type), time2str(t, notif->valid)); + rtm_update_error_state(t, notif, i, 0); + // no bootstrap here, reliable delivery will send changes + updated = 1; + } else if (!notif->refresh || notif->refresh <= now) { + // refresh notification + time_t valid; + + valid = now + config.ttl; + if (edg_wll_NotifRefresh(ctx, notif->id, &valid)) { + lprintf_ctx(t, WRN, ctx, "can't refresh %d. notification '%s' (%s), will try up to %s...", i, notif->id_str, rtm_notiftype2str(notif->type), time2str(t, notif->valid - RTM_NOTIF_TTL_TO_DEAD)); + // refresh failed, just move the refresh time... + updated = 1; + } else { + notif->valid = valid; + lprintf(t, INF, "refreshed %d. notification '%s' (%s), valid: %s", i, notif->id_str, rtm_notiftype2str(notif->type), time2str(t, notif->valid)); + rtm_update_error_state(t, notif, i, 0); + updated = 1; + } + } else { + lprintf(t, INS, "no change in %d. notification '%s' (%s)", i, notif->id_str, rtm_notiftype2str(notif->type)); + } + +cont: + if (updated || error) { + if (!error) { + assert(notif->valid); + notif->refresh = notif->valid ? (now + ((notif->valid - now) >> 1)) : 0; + + } + // create or refresh OK, bootstrap if needed OK, store the new notification + updated = 0; + error = 0; + + // quicker refresh (or recreate) if needed + now = time(NULL); + if (notif->valid && now >= notif->refresh) { + lprintf(t, WRN, "operation not in time, refreshing/recreating the notification '%s' (%s) now", notif->id_str, rtm_notiftype2str(notif->type)); + i--; + continue; + } + rtm_update_notif(t, notif, 1); + } + + // compute time of the next event from the new refresh on notification + if (t->next_refresh > notif->refresh) t->next_refresh = notif->refresh; + } + + // receive + // + // cycle here locally around NotifReceive, we know about next + // refresh time + // + lprintf(t, DBG, "waiting for the notifications up to %s...", t->next_refresh ? time2str(t, t->next_refresh) : "0 (no wait)"); + while (t->next_refresh > now && !quit) { + to.tv_sec = t->next_refresh - now; + if (to.tv_sec > RTM_NOTIF_READ_TIMEOUT) to.tv_sec = RTM_NOTIF_READ_TIMEOUT; + to.tv_usec = 0; + memset(&jobstat, 0, sizeof(jobstat)); + notifid = NULL; + err = edg_wll_NotifReceive(ctx, sock, &to, &jobstat, ¬ifid); + lprintf(t, INS, "received, err=%d%s", err, err == ETIMEDOUT ? " (timeout)":""); + if (err != 0) { + if (err != ETIMEDOUT) { + lprintf_ctx(t, ERR, ctx, "can't receive notifications"); + // don't cycle too quick... + sleep(1); + } + // lazily refresh persistent storage here, only after timeouts + if (received) { + lprintf(t, DBG, "storing notification times"); + rtm_update_notif(t, NULL, 1); + received = 0; + } + } else { + char *jobidstr, *notifidstr; + double last_update; + + if (notifid) { + jobidstr = jobstat.jobId ? glite_jobid_unparse(jobstat.jobId) : NULL; + notifidstr = notifid ? edg_wll_NotifIdUnparse(notifid) : NULL; + for (i = 0; i < t->nservers && (!t->notifs[i].id_str || strcmp(notifidstr, t->notifs[i].id_str) != 0); i++); + if (i == t->nservers) { + lprintf(t, ERR, "received notify '%s' not found", notifidstr); + } else { + received = 1; + notif = t->notifs + i; + // + // last changed time from the arrived notification + // + last_update = jobstat.lastUpdateTime.tv_sec + jobstat.lastUpdateTime.tv_usec / 1000000.0; + if (last_update > notif->last_update) notif->last_update = last_update; + db_store_change(t, notif, i, &jobstat); + rtm_update_notif(t, notif, 0); + } + free(jobidstr); + free(notifidstr); + } + } + if (jobstat.state != EDG_WLL_JOB_UNDEF) edg_wll_FreeStatus(&jobstat); + if (notifid) edg_wll_NotifIdFree(notifid); + + now = time(NULL); + } // receive + } // main loop + +exit: + if (sock != -1) close(sock); +// for (i = 0; conditions[i]; i++) free(conditions[i]); + if (t->nservers && quit != RTM_QUIT_PRESERVE && quit != RTM_QUIT_RELOAD) { + for (i = 0; i < t->nservers; i++) { + if (t->notifs[i].id) { + char *notifidstr; + + notifidstr = edg_wll_NotifIdUnparse(t->notifs[i].id); + edg_wll_SetParam(ctx, EDG_WLL_PARAM_NOTIF_SERVER, t->notifs[i].server); + edg_wll_SetParam(ctx, EDG_WLL_PARAM_NOTIF_SERVER_PORT, t->notifs[i].port); + if (edg_wll_NotifDrop(ctx, t->notifs[i].id)) { + lprintf_ctx(t, WRN, ctx, "can't drop %s (%s)", notifidstr, rtm_notiftype2str(t->notifs[i].type)); + } else { + lprintf(t, INF, "notification %s (%s) dropped", notifidstr, rtm_notiftype2str(t->notifs[i].type)); + } + rtm_drop_notif(t, t->notifs[i].id_str, 0); + free(notifidstr); + } + } + rtm_update_notif(t, NULL, 1); + } +#ifdef WITH_LBU_DB + if (t->insertcmd) glite_lbu_FreeStmt(&t->insertcmd); + if (t->updatecmd) glite_lbu_FreeStmt(&t->updatecmd); + if (t->updatecmd_vo) glite_lbu_FreeStmt(&t->updatecmd_vo); + if (t->updatecmd_mon) glite_lbu_FreeStmt(&t->updatecmd_mon); + if (t->deletecmd) glite_lbu_FreeStmt(&t->deletecmd); + db_free(t, t->dbctx); +#endif + if (ctx) edg_wll_FreeContext(ctx); + lprintf(t, DBG, "thread ended"); + pthread_exit(NULL); + return NULL; +} + + +int reconcile_threads() { + int iserver, ithread, inotif, gran, mod, nnotifs; + int i, j, oldn, type, typestart, typeend; + notif_t *a, *b; + edg_wll_Context ctx = NULL; + edg_wll_NotifId notifid; + thread_t *t; + + if (!config.cleanup) { + if (config.silly) { + typestart = RTM_NOTIF_TYPE_OLD; + typeend = RTM_NOTIF_TYPE_OLD; + nnotifs = 1; + } else { + typestart = RTM_NOTIF_TYPE_STATUS; + typeend = RTM_NOTIF_TYPE_JDL; + nnotifs = 2; + } + + oldn = db.n; + + // distribute LB servers between threads + // (always use existing loaded notification when found) + threads = (thread_t *)calloc(config.nthreads, sizeof(thread_t)); + gran = config.nservers / config.nthreads, mod = config.nservers % config.nthreads; + t = NULL; + ithread = 0; + inotif = 0; + for (iserver = 0; iserver < config.nservers; iserver++) { + // new thread + if (!t || inotif + nnotifs > t->nservers) { + assert(ithread < config.nthreads); // proper number of threads + assert(!t || inotif == t->nservers); // start or exactly distributed + t = threads + ithread; + t->nservers = nnotifs * ((ithread < mod) ? gran + 1 : gran); + t->notifs = (notif_t *)calloc(t->nservers, sizeof(notif_t)); + lprintf(NULL, DBG, "%d. thread: %d notifications", ithread, t->nservers); + ithread++; + inotif = 0; + } + + // next configured server + a = config.notifs + iserver; + for (type = typestart; type <= typeend; type++) { + // find or create all notification types + b = db_search_notif_by_server(db.notifs, oldn, a->server, a->port, type); + if (!b) b = db_add_notif(NULL, type, 0, 0, 0, strdup(a->server), a->port, 1, 0); + else { + if (b->id_str) { + lprintf(NULL, INF, "found previous notification '%s' (%s)", b->id_str, rtm_notiftype2str(b->type)); + } else { + lprintf(NULL, INF, "found previous server %s:%d (%s), %d errors", b->server, b->port, rtm_notiftype2str(b->type), b->error); + } + b->active = 1; + } + // and add each to the thread + notif_copy(t->notifs + inotif, b); + lprintf(NULL, INS, "thread[%d][%d] <- %s:%d (%s), id %s", ithread-1, inotif, b->server, b->port, rtm_notiftype2str(b->type), b->id_str); + inotif++; + } + } + j = 0; + for (i = 0; i < db.n; i++) + if (db.notifs[i].active) j++; + assert(j % nnotifs == 0); // each server all notifs + } + + if (edg_wll_InitContext(&ctx) != 0) { + lprintf(NULL, ERR, "can't init LB context: %s", strerror(errno)); + return 1; + } + if (config.cert) edg_wll_SetParam(ctx, EDG_WLL_PARAM_X509_CERT, config.cert); + if (config.key) edg_wll_SetParam(ctx, EDG_WLL_PARAM_X509_KEY, config.key); + for (j = 0; j < db.n; j++) { + if (!db.notifs[j].active) { + if (db.notifs[j].id_str) { + lprintf(NULL, INF, "dropping previous notification '%s' (%s)", db.notifs[j].id_str, rtm_notiftype2str(db.notifs[j].type)); + if (edg_wll_NotifIdParse(db.notifs[j].id_str, ¬ifid)) { + lprintf(NULL, WRN, "can't parse notification ID '%s'", db.notifs[j].id_str); + continue; + } + edg_wll_SetParam(ctx, EDG_WLL_PARAM_NOTIF_SERVER, db.notifs[j].server); + edg_wll_SetParam(ctx, EDG_WLL_PARAM_NOTIF_SERVER_PORT, db.notifs[j].port); + if (edg_wll_NotifDrop(ctx, notifid) != 0) { + lprintf_ctx(NULL, WRN, ctx, "can't drop %s (%s)", db.notifs[j].id_str, rtm_notiftype2str(db.notifs[j].type)); + } + edg_wll_NotifIdFree(notifid); + notif_invalidate(db.notifs + j); + } + } + } + edg_wll_FreeContext(ctx); + + return db_save_notifs(NULL); +} + + +void usage(const char *prog) { + fprintf(stderr, "Usage: %s [options]\n" + " -h, --help display this help\n" + " -v, --version display version\n" + " -d, --debug LEVEL debug level (0=error,1=warn,2=info,3=debug,4=insane,\n" + " +8=not fork)\n" + " -D, --daemonize daemonize\n" + " -i, --pidfile the file with process ID\n" + " -s, --threads N number of slave threads\n" + " -t, --ttl TIME time to live (validity) of the notifications\n" + " in seconds (%d)\n" + " -H, --history historic dive in seconds (<=0 is unlimited)\n" + " -c, --config config file name (list of LB servers), precedence before " RTM_DB_TABLE_LBS " table\n" +#ifdef WITH_LBU_DB + " -m, --pg db connection string (user/pwd@server:dbname) to " RTM_DB_TABLE_LBS " table\n" +#endif + " -n, --notifs file for persistent information about active\n" + " notifications\n" + " -p, --port listen only on this port (default: use any)\n" + " -C, --cert X509 certificate file\n" + " -K, --key X509 key file\n" + " -o, --old \"silly\" mode for old L&B 1.9 servers\n" + " -l, --cleanup clean up the notifications and exit\n" + " -u, --no-purge disable purging from RTM database\n" + " -w, --wlcg enable messaging for dashboard\n" + " --wlcg-binary full path to msg-publish binary\n" + " --wlcg-topic topic for msg-publish\n" + " --wlcg-config config file for msg-publish\n" + " --wlcg-flush send message on each notification\n" + , prog, RTM_NOTIF_TTL); + fprintf(stderr, "\n"); + fprintf(stderr, "List of L&B servers: first it's read the config file if specified (-c option). When config file is not used and connection to database is specified, it's tried DB table " RTM_DB_TABLE_LBS ".\n"); + fprintf(stderr, "\n"); +} + + +int config_preload(int argn, char *argv[]) { + int opt, intval, index; + char *err, *s; + + while ((opt = getopt_long(argn, argv, opts_line, opts, &index)) != EOF) { + switch (opt) { + case 'h': + case '?': + usage(argv[0]); + return 1; + case 'v': + fprintf(stderr, "%s: %s\n", argv[0], rcsid); + return 1; + case 'd': + intval = strtol(optarg, &err, 10); + if (err && err[0]) { + lprintf(NULL, ERR, "debug level number required"); + return 2; + } + config.debug = (intval & DEBUG_LEVEL_MASK); + config.guard = !(intval & DEBUG_GUARD_MASK); + break; + case 'D': + config.daemonize = 1; + break; + case 'i': + config.pidfile = strdup(optarg); + break; + case 's': + intval = strtol(optarg, &err, 10); + if (err && err[0]) { + lprintf(NULL, ERR, "number of threads required"); + return 2; + } + config.nthreads = intval; + break; + case 't': + intval = strtol(optarg, &err, 10); + if (err && err[0]) { + lprintf(NULL, ERR, "requested validity in seconds required"); + return 2; + } + config.ttl = intval; + break; + case 'H': + intval = strtol(optarg, &err, 10); + if (err && err[0]) { + lprintf(NULL, ERR, "historic dive in seconds required"); + return 2; + } + config.dive = intval; + break; + case 'c': + free(config.config_file); + config.config_file = strdup(optarg); + break; + case 'n': + free(config.notif_file); + config.notif_file = strdup(optarg); + break; + case 'p': + listen_port = atoi(optarg); + break; + case 'm': + free(config.dbcs); + config.dbcs = strdup(optarg); + break; + case 'C': + free(config.cert); + config.cert = strdup(optarg); + break; + case 'K': + free(config.key); + config.key = strdup(optarg); + break; + case 'l': + config.cleanup = 1; + break; + case 'w': + config.wlcg = 1; + break; + case 'o': + config.silly = 1; + break; + case 'u': + config.no_purge = 1; + break; + case 0: + switch(index) { + case 0: + config.wlcg_binary = strdup(optarg); + break; + case 1: + config.wlcg_config = strdup(optarg); + break; + case 2: + config.wlcg_topic = strdup(optarg); + break; + case 3: + config.wlcg_flush = 1; + break; + default: + lprintf(NULL, ERR, "crazy option, index %d", index); + break; + } + break; + } + } + if (!config.notif_file) config.notif_file = strdup(RTM_FILE_NOTIFS); + if (config.wlcg) { + if (!config.wlcg_binary) config.wlcg_binary = strdup(WLCG_BINARY); + if (!config.wlcg_config) config.wlcg_config = strdup(WLCG_CONFIG); + if (!config.wlcg_topic) config.wlcg_topic = strdup(WLCG_TOPIC); + } +#ifdef WITH_OLD_LB + if (!config.silly) { + lprintf(NULL, WRN, "compiled with older LB library, switching on silly mode"); + config.silly = 1; + } +#endif + + if ((s = getenv("GLITE_LB_HARVESTER_NO_REMOVE")) != NULL) { + if (s[0] != '0' && strcasecmp(s, "false") != 0) config.wlcg_no_remove = 1; + } + + if (INF <= config.debug) { + lprintf(NULL, INF, "threads: %d", config.nthreads); + lprintf(NULL, INF, "notifs ttl: %d", config.ttl); + lprintf(NULL, INF, "historic dive: %d", config.dive); + if (config.dbcs) { + lprintf(NULL, INF, "database storage: '%s'", config.dbcs); + } else { + lprintf(NULL, INF, "file storage: '%s'", config.notif_file); + } + lprintf(NULL, INF, "WLCG messaging: %s%s", config.wlcg ? "enabled" : "disabled", config.wlcg_no_remove ? " (not removing tmp files)" : ""); + lprintf(NULL, INF, "debug level: %d", config.debug); + lprintf(NULL, INF, "daemonize: %s", config.daemonize ? "enabled" : "disabled"); + lprintf(NULL, INF, "fork guard: %s", config.guard ? "enabled" : "disabled"); + lprintf(NULL, INF, "silly compatibility mode: %s", config.silly ? "enabled" : "disabled"); + lprintf(NULL, INF, "purge: %s", !config.no_purge ? "enabled" : "disabled"); + } + + return 0; +} + + +int config_load() { + char line[LINE_MAX], *port, *s; + FILE *f; + void *tmp; + int i, n; +#ifdef WITH_LBU_DB + int major, minor, sub, version; + char *results[2]; + char *result = NULL; + glite_lbu_Statement stmt = NULL; + int err = 0; +#endif + + if (config.config_file) { + if ((f = fopen(config.config_file, "rt")) == NULL) { + lprintf(NULL, ERR, "can't open config file '%s': %s", config.config_file, strerror(errno)); + return 1; + } + + n = 10; + while (fgets(line, sizeof(line), f) != NULL) { + if ((s = strpbrk(line, "\n\r")) != NULL) s[0] = '\0'; + if (line[0] == '\0' || line[0] == '#') continue; + if (config.nservers >= n || !config.notifs) { + n = 2 * n; + if ((tmp = (notif_t *)realloc(config.notifs, n * sizeof(notif_t))) == NULL) { + lprintf(NULL, ERR, "insufficient memory"); + return 1; + } + config.notifs = tmp; + memset(config.notifs + config.nservers, 0, (n - config.nservers) * sizeof(notif_t)); + } + if ((port = strrchr(line, ':')) != NULL) { port[0] = '\0'; port++; } + config.notifs[config.nservers].server = strdup(line); + config.notifs[config.nservers++].port = (port && port[0]) ? atoi(port) : GLITE_JOBID_DEFAULT_PORT; + } + + fclose(f); + } else +#ifdef WITH_LBU_DB + if (db.dbctx) { + if ((err = glite_lbu_ExecSQL(db.dbctx, "SELECT COUNT(*) FROM " RTM_DB_TABLE_LBS, &stmt)) < 0 || + (err = glite_lbu_FetchRow(stmt, 1, NULL, &result)) < 0) { + goto err; + } + if (err == 0) { + lprintf(NULL, ERR, "can't count LB servers"); + goto err; + } + n = atoi(result); + free(result); + glite_lbu_FreeStmt(&stmt); + + config.notifs = calloc(n, sizeof(notif_t)); + config.nservers = 0; + if ((err = glite_lbu_ExecSQL(db.dbctx, "SELECT DISTINCT ip, serv_version FROM " RTM_DB_TABLE_LBS, &stmt)) < 0) { + goto err; + } + while (config.nservers < n && (err = glite_lbu_FetchRow(stmt, 2, NULL, results)) > 0) { + if (sscanf(results[1], "%d.%d.%d", &major, &minor, &sub) != 3) { + lprintf(NULL, ERR, "can't parse LB server version '%s'", results[1]); + free(results[1]); + break; + } + version = 10000 * major + 100 * minor + sub; + if (version >= 20000 || config.silly) { + config.notifs[config.nservers].server = strdup(results[0]); + config.notifs[config.nservers++].port = GLITE_JOBID_DEFAULT_PORT; + } else { + lprintf(NULL, INF, "skipped older LB server %s (version '%s')", results[0], results[1]); + } + free(results[0]); + free(results[1]); + } + if (err < 0) goto err; + glite_lbu_FreeStmt(&stmt); + } +#endif + + if (INF <= config.debug) { + lprintf(NULL, INF, "servers: %d", config.nservers); + for (i = 0; i < config.nservers; i++) lprintf(NULL, INF, " %s:%d", config.notifs[i].server, config.notifs[i].port); + } + + return 0; +#ifdef WITH_LBU_DB +err: + if (err) lprintf_dbctx(NULL, ERR, "can't get LB servers"); + if (stmt) glite_lbu_FreeStmt(&stmt); + if (result) free(result); +#endif + return 1; +} + + +void config_free() { + int i; + + for (i = 0; i < config.nservers; i++) free(config.notifs[i].server); + free(config.config_file); + free(config.notif_file); + free(config.pidfile); + free(config.dbcs); + free(config.notifs); + free(config.cert); + free(config.key); + free(config.wlcg_binary); + free(config.wlcg_config); + free(config.wlcg_topic); +} + + +// on keyboard cleanup notification, on termination signal break with +// notification preserved +void handle_signal(int num) { + lprintf(NULL, INF, "received signal %d", num); + switch (num) { + case SIGINT: + case SIGTERM: + default: + quit = RTM_QUIT_PRESERVE; + break; + } +} + + +int main(int argn, char *argv[]) { + struct sigaction sa; + sigset_t sset; + int i, j; + double t1, t2, last_summary = 0, start_time; + thread_t *t; + struct stat pstat; + pid_t watched; + int status; + edg_wll_Context ctx = NULL; + int retval = RTM_EXIT_ERROR; + int cert_mtime = 0; + + // load basic configurations + switch (config_preload(argn, argv)) { + case 0: + break; + case 1: + retval = RTM_EXIT_OK; + goto quit_guard0; + break; + default: + retval = RTM_EXIT_ERROR; + goto quit_guard0; + } + + // daemonize + if (config.pidfile) { + FILE *f; + char s[256]; + + if ((f = fopen(config.pidfile, "rt"))) { + if (fscanf(f, "%255[^\n\r]", s) == 1) { + if (kill(atoi(s),0)) { + lprintf(NULL, WRN, "stale pidfile, pid = %s, pidfile '%s'", s, config.pidfile); + fclose(f); + } + else { + lprintf(NULL, ERR, "another instance running, pid = %s, pidfile '%s'", s, config.pidfile); + fclose(f); + goto quit_guard0; + } + } else { + lprintf(NULL, ERR, "another instance possibly running, can't read pidfile '%s': %s", config.pidfile, strerror(errno)); + fclose(f); + goto quit_guard0; + } + } else if (errno != ENOENT) { + lprintf(NULL, ERR, "error opening pidfile '%s': %s", config.pidfile, strerror(errno)); + goto quit_guard0; + } + } + if (config.daemonize) { + if (daemon(0, 0) == -1) { + lprintf(NULL, ERR, "can't daemonize: %s", strerror(errno)); + goto quit_guard0; + } + } + + // disable signals to the guardian + sigemptyset(&sset); + sigaddset(&sset, SIGABRT); + sigaddset(&sset, SIGTERM); + sigaddset(&sset, SIGINT); + pthread_sigmask(SIG_BLOCK, &sset, NULL); + + if (!config.guard) { + // not guard + if (config.pidfile) { + FILE *f; + + if ((f = fopen(config.pidfile, "wt")) == NULL) { + lprintf(NULL, ERR, "can't create pidfile '%s': %s", config.pidfile, strerror(errno)); + goto quit_guard0; + } + fprintf(f, "%d", getpid()); + fclose(f); + } + } else + // guard + while ((watched = fork()) != 0) { + if (watched == -1) { + lprintf(NULL, ERR, "fork() failed: %s", strerror(errno)); + goto quit_guard; + } + if (config.pidfile) { + FILE *f; + + if ((f = fopen(config.pidfile, "wt")) == NULL) { + lprintf(NULL, ERR, "can't create pidfile '%s': %s", config.pidfile, strerror(errno)); + goto quit_guard0; + } + fprintf(f, "%d", watched); + fclose(f); + } + if (waitpid(watched, &status, 0) == -1) { + lprintf(NULL, ERR, "waitpid() failed: %s", strerror(errno)); + // orpaned child will restart later anyway, + // better to end the child process just now + kill(watched, SIGTERM); + goto quit_guard; + } + if (WIFSIGNALED(status)) { + switch (WTERMSIG(status)) { + case SIGSEGV: + case SIGILL: + case SIGABRT: +#ifdef SIGBUS + case SIGBUS: +#endif + lprintf(NULL, ERR, "caught signal %d from process %d, resurrecting...", WTERMSIG(status), watched); + // slow down the core generator ;-) + // disabled signals and ended child in pidfile, live with it + pthread_sigmask(SIG_UNBLOCK, &sset, NULL); + if (config.pidfile) { + if (remove(config.pidfile) == -1) lprintf(NULL, WRN, "can't remove pidfile '%s': %s", config.pidfile, strerror(errno)); + } + sleep(2); + pthread_sigmask(SIG_BLOCK, &sset, NULL); + break; + default: + lprintf(NULL, WRN, "ended with signal %d", WTERMSIG(status)); + goto quit_guard; + } + } else if (WIFEXITED(status)) { + retval = WEXITSTATUS(status); + switch(retval) { + case RTM_EXIT_OK: + lprintf(NULL, INF, "exit with status %d, OK", retval); + goto quit_guard; + case RTM_EXIT_RELOAD: + lprintf(NULL, INF, "exit with status %d, reloading", retval); + break; + default: + lprintf(NULL, WRN, "exit with status %d, error", retval); + goto quit_guard; + } + } else { + lprintf(NULL, ERR, "unknown child status"); + goto quit_guard; + } + } + + // child continues... + + // threads && Globus + if (edg_wll_gss_initialize()) { + lprintf(NULL, ERR, "can't initialize GSS"); + goto quit_guard; + } + +#ifndef WITH_OLD_LB + // connection pool manually (just for tuning memory leaks) + if (!edg_wll_initConnections()) { + lprintf(NULL, ERR, "can't initialize LB connections"); + goto quit_guard; + } +#endif + +#ifdef WITH_LBU_DB + // database + switch(db_init(NULL, &db.dbctx)) { + case 0: + break; + case -1: + // no db + break; + default: + // error + goto quit; + } +#endif + + // load configurations + if (config_load()) goto quit; +#ifdef WITH_OLD_LB + // other client certificate settings ignored by older globus, + // using environment (certificate the same for all threads) + { + char *s; + + if (config.cert) { + asprintf(&s, "X509_USER_CERT=%s", config.cert); + putenv(s); + } + if (config.key) { + asprintf(&s, "X509_USER_KEY=%s", config.key); + putenv(s); + } + } +#endif + + // load previous notifications + if (load_notifs()) goto quit; + // compare lb servers from configuration and notifications, + // or clean up and exit if specified + if (reconcile_threads()) goto quit; + if (config.cleanup) { + retval = RTM_EXIT_OK; + goto quit; + } + + // signal handler + sa.sa_handler = handle_signal; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESETHAND; + if (sigaction(SIGABRT, &sa, NULL) == -1 + || sigaction(SIGTERM, &sa, NULL) == -1 + || sigaction(SIGINT, &sa, NULL) == -1) { + lprintf(NULL, ERR, "can't handle signal: %s", strerror(errno)); + goto quit; + } + // enable signals in main + pthread_sigmask(SIG_UNBLOCK, &sset, NULL); + + // launch the threads + for (i = 0; i < config.nthreads; i++) { + t = threads + i; + t->id = i; + if (pthread_create(&threads[i].thread, NULL, notify_thread, t) != 0) { + lprintf(NULL, ERR, "[main] can't create %d. thread: %s\n", i, strerror(errno)); + goto quit; + } + } + + edg_wll_InitContext(&ctx); + if (config.cert) edg_wll_SetParam(ctx, EDG_WLL_PARAM_X509_CERT, config.cert); + if (config.key) edg_wll_SetParam(ctx, EDG_WLL_PARAM_X509_KEY, config.key); + last_summary = 0; + start_time = rtm_gettimeofday(); + while (!quit) { + t1 = rtm_gettimeofday(); + if (t1 - last_summary > RTM_SUMMARY_POLL_TIME) { + last_summary = t1; + rtm_summary(ctx, &db); + } + if (config.guard) { + if (t1 - start_time > RTM_SUICIDE_TIME) { + quit = RTM_QUIT_RELOAD; + lprintf(NULL, INF, "preventive suicide"); + break; + } + if (config.cert) { + if (stat(config.cert, &pstat) == 0) { + if (!cert_mtime) cert_mtime = pstat.st_mtime; + if (cert_mtime < pstat.st_mtime) { + lprintf(NULL, INF, "certificate '%s' changed, reloading", config.cert); + quit = RTM_QUIT_RELOAD; + break; + } + } else { + lprintf(NULL, ERR, "can't check certificate file '%s'", config.cert, strerror(errno)); + } + } + } + t2 = rtm_gettimeofday(); + if (t2 - t1 < RTM_IDLE_POLL_TIME) usleep((RTM_IDLE_POLL_TIME + t1 - t2) * 1000000); + } + retval = quit == RTM_QUIT_RELOAD ? RTM_EXIT_RELOAD : RTM_EXIT_OK; +quit: + // cleanup on error + if (!quit) quit = RTM_QUIT_CLEANUP; + if (threads) { + for (i = 0; i < config.nthreads; i++) { + t = threads + i; + if (t->thread) pthread_join(t->thread, NULL); + for (j = 0; j < t->nservers; j++) notif_free(t->notifs + j); + free(t->notifs); + } + free(threads); + } + + if (config.pidfile && !config.guard) { + if (remove(config.pidfile) == -1) lprintf(NULL, WRN, "can't remove pidfile '%s': %s", config.pidfile, strerror(errno)); + } + +#ifdef WITH_LBU_DB + db_free(NULL, db.dbctx); +#endif + edg_wll_FreeContext(ctx); + db_free_notifs(); + config_free(); +#ifndef WITH_OLD_LB + edg_wll_poolFree(); +#endif + + return retval; + +quit_guard: + if (config.pidfile) { + if (remove(config.pidfile) == -1) lprintf(NULL, WRN, "can't remove pidfile '%s': %s", config.pidfile, strerror(errno)); + } +quit_guard0: + config_free(); + return retval; +} diff --git a/org.glite.lb.logger/configure b/org.glite.lb.logger/configure new file mode 100755 index 0000000..c289773 --- /dev/null +++ b/org.glite.lb.logger/configure @@ -0,0 +1,691 @@ +#!/usr/bin/perl + +# WARNING: Don't edit this file unless it is the master copy in org.glite.lb +# +# For the purpose of standalone builds of lb/jobid/lbjp-common components +# it is copied on tagging + +# $Header$ + +use Getopt::Long; + +my $pwd = `pwd`; chomp $pwd; +my $prefix = $pwd.'/stage'; +my $stagedir; +my $staged; +my $module; +my $thrflavour = 'gcc64dbgpthr'; +my $nothrflavour = 'gcc64dbg'; +my $mode = 'build'; +my $help = 0; +my $listmodules; +my $version; +my $output; +my $lb_tag = ''; +my $lbjp_tag = ''; +my $jp_tag = ''; +my $sec_tag = ''; +my $jobid_tag = ''; + +my @nodes = qw/client server logger utils client-java doc ws-test db jpprimary jpindex jpclient/; +my %enable_nodes; +my %disable_nodes; + +my %extern_prefix = ( + cares => '/opt/c-ares', + classads => '/opt/classads', + cppunit => '/usr', + expat => '/usr', + globus => '/opt/globus', + gsoap => '/usr', + mysql => '/usr', + 'mysql-devel' => '', + voms => '/opt/glite', + gridsite => '/opt/glite', + lcas => '/opt/glite', + ant => '/usr', + jdk => '/usr', + libtar => '/usr', +); + +my %jar = ( + 'commons-codec' => '/usr/share/java/commons-codec-1.3.jar', +); + + +my %glite_prefix; +my %need_externs; +my %need_externs_type; +my %need_jars; +my %extrafull; +my %extranodmod; +my %deps; +my %deps_type; +my %topbuild; + +my %lbmodules = ( + 'lb' => [ qw/client client-java common doc logger server state-machine types utils ws-interface ws-test/], + 'security' => [qw/gss gsoap-plugin/], + 'lbjp-common' => [qw/db maildir server-bones trio jp-interface/], + 'jobid' => [qw/api-c api-cpp api-java/], + 'jp' => [ qw/client doc index primary server-common ws-interface/ ], + ); + + +my @opts = ( + 'prefix=s' => \$prefix, + 'staged=s' => \$staged, + 'module=s' => \$module, + 'thrflavour=s' => \$thrflavour, + 'nothrflavour=s' => \$nothrflavour, + 'mode=s' => \$mode, + 'listmodules=s' => \$listmodules, + 'version=s' => \$version, + 'output=s' => \$output, + 'stage=s' => \$stagedir, + 'lb-tag=s' => \$lb_tag, + 'lbjp-common-tag=s' => \$lbjp_tag, + 'jp-tag=s' => \$jp_tag, + 'security-tag=s' => \$sec_tag, + 'jobid-tag=s' => \$jobid_tag, + 'help' => \$help, +); + +for (@nodes) { + $enable_nodes{$_} = 0; + $disable_nodes{$_} = 0; + + push @opts,"disable-$_",\$disable_nodes{$_}; + push @opts,"enable-$_",\$enable_nodes{$_}; +} + +push @opts,"with-$_=s",\$extern_prefix{$_} for keys %extern_prefix; +push @opts,"with-$_=s",\$jar{$_} for keys %jar; + +my @keeparg = @ARGV; + +GetOptions @opts or die "Errors parsing command line\n"; + +$extern_prefix{'mysql-devel'}=$extern_prefix{mysql} if $extern_prefix{'mysql-devel'} eq ''; + +if ($help) { usage(); exit 0; } + +if ($listmodules) { + my @m = map "org.glite.$listmodules.$_",@{$lbmodules{$listmodules}}; + print "@m\n"; + exit 0; +} + +warn "$0: --version and --output make sense only in --mode=etics\n" + if ($version || $output) && $mode ne 'etics'; + +my $en; +for (keys %enable_nodes) { $en = 1 if $enable_nodes{$_}; } + +my $dis; +for (keys %disable_nodes) { $dis = 1 if $disable_nodes{$_}; } + +die "--enable-* and --disable-* are mutually exclusive\n" + if $en && $dis; + +die "--module cannot be used with --enable-* or --disable-*\n" + if $module && ($en || $dis); + +die "$module: unknown module\n" if $module && ! grep $module,@{$lbmodules{lb}},@{$lbmodules{security}},{$lbmodules{jp}}; + +if ($dis) { + for (@nodes) { + $enable_nodes{$_} = 1 unless $disable_nodes{$_}; + } +} + +if (!$en && !$dis) { $enable_nodes{$_} = 1 for (@nodes) } ; + +for (keys %enable_nodes) { delete $enable_nodes{$_} unless $enable_nodes{$_}; } + +$stagedir = $prefix unless $stagedir; + +if ($mode eq 'build') { + print "Writing config.status\n"; + open CONF,">config.status" or die "config.status: $!\n"; + print CONF "$0 @keeparg\n"; + close CONF; +} + + +my @modules; +my %aux; + +if ($module) { +# push @modules,split(/[,.]+/,$module); + push @modules,$module; +} +else { + @modules = map(($extranodmod{$_} ? $extranodmod{$_} : 'lb.'.$_),(keys %enable_nodes)); + + my $n; + + do { + local $"="\n"; + $n = $#modules; + push @modules,(map @{$deps{$_}},@modules); + + undef %aux; @aux{@modules} = (1) x ($#modules+1); + @modules = keys %aux; + } while ($#modules > $n); +} + +@aux{@modules} = (1) x ($#modules+1); +delete $aux{$_} for (split /,/,$staged); +@modules = keys %aux; + +mode_build() if $mode eq 'build'; +mode_checkout() if $mode eq 'checkout'; +mode_etics($module) if $mode eq 'etics'; + +sub mode_build { + print "\nBuilding modules: @modules\n"; + + my @ext = map @{$need_externs{$_}},@modules; + my @myjars = map @{$need_jars{$_}},@modules; + undef %aux; @aux{@ext} = 1; + @ext = keys %aux; + undef %aux; @aux{@myjars} = (1) x ($#myjars+1); + @myjars = keys %aux; + + print "\nRequired externals:\n"; + print "\t$_: $extern_prefix{$_}\n" for @ext; + print "\t$_: $jar{$_}\n" for @myjars; + print "\nThis is a poor-man configure, it's up to you to have sources and externals there\n\n"; + + mkinc($_) for @modules; + + print "Creating Makefile\n"; + + open MAK,">Makefile" or die "Makefile: $!\n"; + + print MAK "all: @modules\n\nclean:\n"; + + for (@modules) { + my $full = full($_); + my $build = $topbuild{$_} ? '': '/build'; + print MAK "\tcd $full$build && \${MAKE} clean\n" + } + + print MAK "\ndistclean:\n"; + + for (@modules) { + my $full = full($_); + print MAK $topbuild{$_} ? + "\tcd $full$build && \${MAKE} distclean\n" : + "\trm -rf $full$build\n" + } + + print MAK "\n"; + + for (@modules) { + my %ldeps; undef %ldeps; + @ldeps{@{$deps{$_}}} = 1; + for my $x (split /,/,$staged) { delete $ldeps{$x}; } + my @dnames = $module ? () : keys %ldeps; + + my $full = full($_); + my $build = $topbuild{$_} ? '': '/build'; + + print MAK "$_: @dnames\n\tcd $full$build && \${MAKE} && \${MAKE} install\n\n"; + } + + close MAK; +} + +sub mode_checkout() { + for (@modules) { + my $module = $_; + my $tag = ""; + if ($lb_tag){ + for (@{$lbmodules{lb}}){ + if ("lb.".$_ eq $module){ + $tag = '-r '.$lb_tag; + } + } + } + if ($lbjp_tag){ + for (@{$lbmodules{'lbjp-common'}}){ + if ("lbjp-common.".$_ eq $module){ + $tag = '-r '.$lbjp_tag; + } + } + } + if ($jp_tag){ + for (@{$lbmodules{'jp'}}){ + if ("jp.".$_ eq $module){ + $tag = '-r '.$jp_tag; + } + } + } + if ($sec_tag){ + for (@{$lbmodules{security}}){ + if ("security.".$_ eq $module){ + $tag = '-r '.$sec_tag; + } + } + } + if ($jobid_tag){ + for (@{$lbmodules{jobid}}){ + if ("jobid.".$_ eq $module){ + $tag = '-r '.$jobid_tag; + } + } + } + #if (grep {"lb.".$_ eq $module} @{$lbmodules{lb}}){ + # print "found"; + #} + $_ = full($_); + print "\n*** Checking out $_\n"; + system("cvs checkout $tag $_") == 0 or die "cvs checkout $tag $_: $?\n"; + } +} + +BEGIN{ +%need_externs_aux = ( + 'lb.client' => [ qw/cppunit:B classads/ ], + 'lb.client-java' => [ qw/ant:B/ ], + 'lb.common' => [ qw/expat cppunit:B classads/ ], + 'lb.doc' => [], + 'lb.logger' => [ qw/cppunit:B/ ], + 'lb.server' => [ qw/globus_essentials:R globus:B expat cares mysql cppunit:B gsoap:B classads voms lcas gridsite/ ], + 'lb.state-machine' => [ qw/classads/ ], + 'lb.utils' => [ qw/cppunit:B/ ], + 'lb.ws-interface' => [], + 'lb.ws-test' => [ qw/gsoap:B/ ], + 'lb.types' => [ qw// ], + 'lbjp-common.db' => [ qw/mysql:R mysql-devel:B/ ], + 'lbjp-common.maildir' => [ qw// ], + 'lbjp-common.server-bones' => [ qw// ], + 'lbjp-common.trio' => [ qw/cppunit:B/ ], + 'lbjp-common.jp-interface' => [ qw/cppunit:B/ ], + 'security.gss' => [ qw/globus_essentials:R globus:B cares cppunit:B/ ], + 'security.gsoap-plugin' => [ qw/cppunit:B globus_essentials:R globus:B cares gsoap:B/ ], + 'jobid.api-c' => [ qw/cppunit:B/ ], + 'jobid.api-cpp' => [ qw/cppunit:B/ ], + 'jobid.api-java' => [ qw/ant:B jdk:B/ ], + 'jp.client' => [ qw/gsoap libtar globus_essentials:R globus:B/ ], + 'jp.doc' => [], + 'jp.index' => [ qw/gsoap globus_essentials:R globus:B/ ], + 'jp.primary' => [ qw/classads gsoap libtar globus_essentials:R globus:B/ ], + 'jp.server-common' => [], + 'jp.ws-interface' => [], +); + +for my $ext (keys %need_externs_aux) { + for (@{$need_externs_aux{$ext}}) { + /([^:]*)(?::(.*))?/; + push @{$need_externs{$ext}},$1; + my $type = $2 ? $2 : 'BR'; + $need_externs_type{$ext}->{$1} = $type; + } +} + +%need_jars = ( + 'jobid.api-java' => [ qw/commons-codec/ ], +); + +for my $jar (keys %need_jars) { + for (@{$need_jars{$jar}}) { + $need_externs_type{$jar}->{$_} = 'BR'; # XXX + } +} + +%deps_aux = ( + 'lb.client' => [ qw/ + lb.types:B lb.common + lbjp-common.trio + jobid.api-cpp jobid.api-c + security.gss + / ], + 'lb.client-java' => [ qw/ + lb.types:B + jobid.api-java + / ], + 'lb.common' => [ qw/ + jobid.api-cpp jobid.api-c + lb.types:B lbjp-common.trio security.gss + / ], + 'lb.doc' => [ qw/lb.types:B/ ], + 'lb.logger' => [ qw/ + lbjp-common.trio + jobid.api-c + lb.common + security.gss + / ], + 'lb.server' => [ qw/ + lb.ws-interface lb.types:B lb.common lb.state-machine + lbjp-common.db lbjp-common.server-bones lbjp-common.trio lbjp-common.maildir + jobid.api-c + security.gsoap-plugin security.gss + / ], + 'lb.state-machine' => [ qw/lb.common lbjp-common.jp-interface security.gss/ ], + 'lb.utils' => [ qw/ + lbjp-common.jp-interface + jobid.api-c + lbjp-common.trio lbjp-common.maildir + lb.client lb.state-machine + / ], + 'lb.ws-test' => [ qw/security.gsoap-plugin lb.ws-interface/ ], + 'lb.ws-interface' => [ qw/lb.types:B/ ], + 'lb.types' => [ qw// ], + 'lbjp-common.db' => [ qw/lbjp-common.trio/ ], + 'lbjp-common.maildir' => [ qw// ], + 'lbjp-common.server-bones' => [ qw// ], + 'lbjp-common.trio' => [ qw// ], + 'security.gss' => [ qw// ], + 'security.gsoap-plugin' => [ qw/security.gss/ ], + 'jobid.api-c' => [ qw// ], + 'jobid.api-cpp' => [ qw/jobid.api-c/ ], + 'jobid.api-java' => [ qw// ], + + 'lbjp-common.jp-interface' => [ qw/lbjp-common.db jobid.api-c/ ], + + 'jp.client' => [ qw/ + jp.ws-interface + lbjp-common.jp-interface lbjp-common.maildir + jobid.api-c + security.gsoap-plugin + / ], + 'jp.doc' => [ qw// ], + 'jp.index' => [ qw/ + jp.server-common jp.ws-interface + lbjp-common.jp-interface lbjp-common.trio lbjp-common.db lbjp-common.server-bones + security.gsoap-plugin + / ], + 'jp.primary' => [ qw/ + jobid.api-c + jp.server-common jp.ws-interface + lb.state-machine + lbjp-common.jp-interface lbjp-common.trio lbjp-common.db lbjp-common.server-bones + security.gsoap-plugin + / ], + 'jp.server-common' => [ qw/ + lbjp-common.jp-interface lbjp-common.db + / ], + 'jp.ws-interface' => [ qw// ], +); + +for my $ext (keys %deps_aux) { + for (@{$deps_aux{$ext}}) { + /([^:]*)(?::(.*))?/; + push @{$deps{$ext}},$1; + my $type = $2 ? $2 : 'BR'; + $deps_type{$ext}->{$1} = $type; + } +} + + +%extrafull = ( gridsite=>'org.gridsite.core'); + +#( java => 'client-java' ); +%extranodmod = ( + db => 'lbjp-common.db', + jpprimary => 'jp.primary', + jpindex => 'jp.index', + jpclient => 'jp.client', +); + +my @t = qw/lb.client-java jobid.api-java lb.types/; +@topbuild{@t} = (1) x ($#t+1); +} + +sub full +{ + my $short = shift; + return $extrafull{$short} ? $extrafull{$short} : 'org.glite.'.$short; +} + +sub mkinc +{ + my %aux; + undef %aux; + my @m=qw/ +lb.client lb.doc lb.state-machine lb.ws-interface lb.logger lb.types lb.common lb.server lb.utils lb.ws-test lb.client-java +security.gss security.gsoap-plugin +jobid.api-c jobid.api-cpp jobid.api-java +lbjp-common.db lbjp-common.maildir lbjp-common.server-bones lbjp-common.trio lbjp-common.jp-interface +jp.client jp.doc jp.index jp.primary jp.server-common jp.ws-interface +/; + @aux{@m} = (1) x ($#m+1); + + my $short = shift; + my $full = full $short; + + unless ($aux{$short}) { + print "Makefile.inc not needed in $full\n"; + return; + } + + my $build = ''; + + unless ($topbuild{$_}) { + $build = '/build'; + unless (-d "$full/build") { + mkdir "$full/build" or die "mkdir $full/build: $!\n"; + } + unlink "$full/build/Makefile"; + symlink "../Makefile","$full/build/Makefile" or die "symlink ../Makefile $full/build/Makefile: $!\n"; + } + + open MKINC,">$full$build/Makefile.inc" + or die "$full$build/Makefile.inc: $!\n"; + + print "Creating $full$build/Makefile.inc\n"; + + print MKINC qq{ +PREFIX = $prefix +stagedir = $stagedir +thrflavour = $thrflavour +nothrflavour = $nothrflavour +}; + + for (@{$need_externs{$short}}) { + print MKINC "${_}_prefix = $extern_prefix{$_}\n" + } + + for (@{$need_jars{$short}}) { + print MKINC "${_}_jar = $jar{$_}\n" + } + + my $need_gsoap = 0; + for (@{$need_externs{$short}}) { $need_gsoap = 1 if $_ eq 'gsoap'; } + + print MKINC "gsoap_default_version=".gsoap_version()."\n" if $need_gsoap; + + close MKINC; +} + +my %etics_externs; +my %etics_projects; +BEGIN{ + %etics_externs = ( + globus_essentials=>'vdt_globus_essentials', + globus=>'globus', + cares=>'c-ares', + voms=>'org.glite.security.voms-api-cpp', + gridsite=>'org.gridsite.shared', + lcas=>'org.glite.security.lcas', + ); + %etics_projects = ( + vdt=>[qw/globus globus_essentials/], + 'org.glite'=>[qw/voms gridsite lcas/], + ); +}; + +sub mode_etics { + $fmod = shift; + + die "$0: --module required with --etics\n" unless $fmod; + + my ($subsys,$module) = split /\./,$fmod; + + my ($major,$minor,$rev,$age); + + if ($version) { + $version =~ /([[:digit:]]+)\.([[:digit:]]+)\.([[:digit:]]+)-(.+)/; + ($major,$minor,$rev,$age) = ($1,$2,$3,$4); + } + else { + open V,"org.glite.$subsys.$module/project/version.properties" + or die "org.glite.$subsys.$module/project/version.properties: $!\n"; + + while ($_ = ) { + chomp; + ($major,$minor,$rev) = ($1,$2,$3) if /module\.version\s*=\s*([[:digit:]]+)\.([[:digit:]]+)\.([[:digit:]]+)/; + $age = $1 if /module\.age\s*=\s*([[:digit:]]+)/; + } + close V; + } + + my @copts = (); + my %ge; + @ge{@{$etics_projects{'org.glite'}}} = (1) x ($#{$etics_projects{'org.glite'}}+1); + + for (@{$need_externs{"$subsys.$module"}}) { + if ($need_externs_type{"$subsys.$module"}->{$_}=~/B/) { + my $eext = $etics_externs{$_} ? $etics_externs{$_} : $_; + push @copts,$ge{$_} ? "--with-$_=\${stageDir}" : "--with-$_=\${$eext.location}"; + } + } + + for (@{$need_jars{"$subsys.$module"}}) { + my $eext = $etics_externs{$_} ? $etics_externs{$_} : $_; + + push @copts,"--with-$_ \${$eext.location}/$_*.jar"; + } + + + my $conf = "glite-$subsys-${module}_R_${major}_${minor}_${rev}_${age}"; + my $file = $output ? $output : "$conf.ini"; + open C,">$file" or die "$file: $!\n"; + + my $buildroot = $topbuild{"$subsys.$module"} ? '' : "build.root = build\n"; + + my $confdir = $topbuild{"$subsys.$module"} ? '..' : '../..'; + + print STDERR "Writing $file\n"; + print C qq{ +[Configuration-$conf] +profile = None +moduleName = org.glite.$subsys.$module +displayName = $conf +description = org.glite.$subsys.$module +projectName = org.glite +age = $age +deploymentType = None +tag = $conf +version = $major.$minor.$rev +path = \${projectName}/\${moduleName}/\${version}/\${platformName}/\${packageName}-\${version}-\${age}.tar.gz + +[Platform-default:VcsCommand] +displayName = None +description = None +tag = cvs -d \${vcsroot} tag -R \${tag} \${moduleName} +branch = None +commit = None +checkout = cvs -d \${vcsroot} co -r \${tag} \${moduleName} + +[Platform-default:BuildCommand] +postpublish = None +packaging = None +displayName = None +description = None +doc = None +prepublish = None +publish = None +compile = make +init = None +install = make install +clean = make clean +test = make check +configure = cd $confdir && \${moduleName}/configure --thrflavour=\${globus.thr.flavor} --nothrflavour=\${globus.nothr.flavor} --prefix=\${prefix} --stage=\${stageDir} --module $subsys.$module @copts +checkstyle = None + +[Platform-default:Property] +$buildroot + +[Platform-default:DynamicDependency] + +}; + for (@{$need_externs{"$subsys.$module"}},@{$need_jars{"$subsys.$module"}}) { + my $eext = $etics_externs{$_} ? $etics_externs{$_} : $_; + + my $proj = 'externals'; + for my $p (keys %etics_projects) { + for $m (@{$etics_projects{$p}}) { + $proj = $p if $m eq $_; + } + } + + my $type = $need_externs_type{"$subsys.$module"}->{$_}; + print C "$proj|$eext = $type\n"; + } + + for (@{$deps{"$subsys.$module"}}) { + my $type = $deps_type{"$subsys.$module"}->{$_}; + print C "org.glite|org.glite.$_ = $type\n"; + } + + close C; +} + +sub gsoap_version { + local $_; + my $gsoap_version; + open S,"$extern_prefix{gsoap}/bin/soapcpp2 -v 2>&1 |" or die "$extern_prefix{gsoap}/bin/soapcpp2: $!\n"; + + while ($_ = ) { + chomp; + + $gsoap_version = $1 if /The gSOAP Stub and Skeleton Compiler for C and C\+\+ ([.[:digit:][:alpha:]]+)$/; + } + close S; + return $gsoap_version; +} + + +sub usage { + my @ext = keys %extern_prefix; + my @myjars, keys %jar; + + print STDERR qq{ +usage: $0 options + +General options (defaults in []): + --prefix=PREFIX destination directory [./stage] + --staged=module,module,... what is already in PREFIX (specify without org.glite.) + --thrflavour=flavour + --nothrflavour=flavour threaded and non-treaded flavours [gcc64dbgpthr,gcc64dbg] + --listmodules=subsys list modules of a subsystem + +Mode of operation: + --mode={checkout|build|etics} what to do [build] + +What to build: + --module=module build this module only (mostly in-Etics operation) + --enable-NODE build this "node" (set of modules) only. Available nodes are + @{$lbmodules{lb}},@{$lbmodules{security}} + --disable-NODE don't build this node + --lb-tag=tag checkout LB modules with specific tag + --jp-tag=tag checkout JP modules with specific tag + --lbjp-common-tag=tag checkout lbjp-common modules with specific tag + --security-tag=tag checkout security modules with specific tag + --jobid-tag=tag checkout jobid modules with specific tag + +Dependencies: + --with-EXTERNAL=PATH where to look for an external. Required externals + (not all for all modules) are: + @ext + --with-JAR=JAR where to look for jars. Required jars are: + @myjars + Summary of what will be used is always printed + +}; + +} diff --git a/org.glite.lb.logger/project/package.description b/org.glite.lb.logger/project/package.description new file mode 100644 index 0000000..cd0621b --- /dev/null +++ b/org.glite.lb.logger/project/package.description @@ -0,0 +1 @@ +glite-lb-logger is the gLite LB local-logger and inter-logger. This package contains the local-logger (glite-lb-logd), inter-logger (glite-lb-interlogd) and notification inter-logger (glite-lb-notif-interlogd) daemons. diff --git a/org.glite.lb.logger/project/package.summary b/org.glite.lb.logger/project/package.summary new file mode 100644 index 0000000..089b630 --- /dev/null +++ b/org.glite.lb.logger/project/package.summary @@ -0,0 +1 @@ +gLite Logging and Bookkeeping local-logger and inter-logger diff --git a/org.glite.lb.logger/src-nt/Connection.cpp b/org.glite.lb.logger/src-nt/Connection.cpp new file mode 100644 index 0000000..7cac943 --- /dev/null +++ b/org.glite.lb.logger/src-nt/Connection.cpp @@ -0,0 +1,4 @@ +#include "Connection.H" + +Connection::Factory::~Factory() { +} diff --git a/org.glite.lb.logger/src-nt/EventManager.cpp b/org.glite.lb.logger/src-nt/EventManager.cpp new file mode 100644 index 0000000..91efb12 --- /dev/null +++ b/org.glite.lb.logger/src-nt/EventManager.cpp @@ -0,0 +1,23 @@ +#include "EventManager.H" + +int +EventManager::postEvent(Event* &e) +{ + for(std::list::iterator i = handlers.begin(); + i != handlers.end(); + i++) { + (*i)->handleEvent(e); + } + return 0; +} + +void +EventManager::addHandler(EventHandler *handler) +{ + handlers.push_back(handler); +} + +void +EventManager::removeHandler(EventHandler *handler) +{ +} diff --git a/org.glite.lb.logger/src-nt/InputChannel.H b/org.glite.lb.logger/src-nt/InputChannel.H new file mode 100644 index 0000000..2bac262 --- /dev/null +++ b/org.glite.lb.logger/src-nt/InputChannel.H @@ -0,0 +1,29 @@ +#ifndef _INPUT_CHANNEL_H_ +#define _INPUT_CHANNEL_H_ + +#include "ThreadPool.H" +#include "Connection.H" +#include "Transport.H" + +class InputChannel + : public ThreadPool::WorkDescription { +public: + + InputChannel(Connection *conn, Transport *trans) + : ThreadPool::WorkDescription(conn->getFD()), + m_connection(conn), m_transport(trans) + {} + + void start(); + +protected: + virtual void onReady(); + virtual void onTimeout(); + virtual void onError(); + +private: + Connection *m_connection; + Transport *m_transport; +}; + +#endif diff --git a/org.glite.lb.logger/src-nt/InputChannel.cpp b/org.glite.lb.logger/src-nt/InputChannel.cpp new file mode 100644 index 0000000..e3ac9c8 --- /dev/null +++ b/org.glite.lb.logger/src-nt/InputChannel.cpp @@ -0,0 +1,37 @@ +#include "InputChannel.H" +#include "ThreadPool.H" +#include "EventManager.H" + +extern EventManager theEventManager; + +void +InputChannel::start() +{ + ThreadPool::instance()->queueWorkRead(this); +} + +void +InputChannel::onReady() +{ + Transport::Message *msg = NULL; + int ret = m_transport->receive(m_connection, msg); + if(ret <= 0) { + // no new data read + } else if(msg) { + // we have a new message + + } else { + // still need more data + ThreadPool::instance()->queueWorkRead(this); + } +} + +void +InputChannel::onTimeout() +{ +} + +void +InputChannel::onError() +{ +} diff --git a/org.glite.lb.logger/src-nt/Message.H b/org.glite.lb.logger/src-nt/Message.H new file mode 100644 index 0000000..725966a --- /dev/null +++ b/org.glite.lb.logger/src-nt/Message.H @@ -0,0 +1,66 @@ +#ifndef _MESSAGE_H_ +#define _MESSAGE_H + +#include "Properties.H" +#include "MessageStore.H" + +#include + +class Message: public MessageStore::Storable { +public: + + /** class that holds message state + * + */ + class State : public MessageStore::Storable { + public: + + /** Get size needed for storage (from Storable). + */ + virtual int getStorageSize() const; + + /** Save State (from Storable) + */ + virtual int save(void* data, int len) const; + + /** Load State (from Storable) + */ + virtual int load(void* data, int len); + }; + + + Message(); + + Message(void * data, unsigned int length) + : m_length(length), + m_data(data) + {} + + + int getContent(void* &data) const + { data = m_data; return m_length; } + + int getContentLength() const + { return m_length; } + + std::string getProperty(const std::string &name, std::string &val) + { return m_properties.getProperty(name); } + + void setProperty(const std::string &name, std::string &val) + { m_properties.setProperty(name, val); } + + Properties& getProperties() + { return m_properties; } + + void setProperties(Properties &) + {} + +private: + MessageStore::ID m_id; + unsigned int m_length; + void * m_data; + Properties m_properties; +}; + + +#endif diff --git a/org.glite.lb.logger/src-nt/MessageStore.H b/org.glite.lb.logger/src-nt/MessageStore.H new file mode 100644 index 0000000..ff03a9b --- /dev/null +++ b/org.glite.lb.logger/src-nt/MessageStore.H @@ -0,0 +1,84 @@ +#ifndef _MESSAGE_STORE_H_ +#define _MESSAGE_STORE_H_ + +#include + +/** Permanent storage for messages and their states. + */ + +class MessageStore { +public: + + /** Base class for everything that can be stored here. + */ + class Storable { + public: + /** Get size needed for object storage. + */ + virtual int getStorageSize() const = 0; + + /** Save state of object into binary data. + */ + virtual int save(void* data, int len) const = 0; + + /** Load state of object from binary data. + */ + virtual int load(void* data, int len) = 0; + + virtual ~Storable() {} + }; + + + /** Class that uniquely identifies stored content. + */ + class ID: public Storable { + public: + /** Default constructor. + * + * Creates new unique ID. + */ + ID(); + + /** Copy constructor. + */ + ID(const ID& src); + + /** Destructor. + */ + ~ID() {}; + + /** Assignment operator. + */ + ID& operator=(const ID& src); + + /** Return the string suitable for printing. + */ + std::string toString() const; + + /** Comparison operator + */ + int operator==(const ID& second); + + /** Get size needed for storage (from Storable). + */ + virtual int getStorageSize() const; + + /** Save ID (from Storable) + */ + virtual int save(void* data, int len) const; + + /** Load ID (from Storable) + */ + virtual int load(void* data, int len); + + protected: + unsigned long long getID() {return id;} + + private: + static pthread_mutex_t counterLock; + static unsigned counter; + unsigned long long id; + }; +}; + +#endif diff --git a/org.glite.lb.logger/src-nt/MessageStore.cpp b/org.glite.lb.logger/src-nt/MessageStore.cpp new file mode 100644 index 0000000..eb9de7a --- /dev/null +++ b/org.glite.lb.logger/src-nt/MessageStore.cpp @@ -0,0 +1,24 @@ +#include +#include +#include + +#include "MessageStore.H" + +pthread_mutex_t MessageStore::ID::counterLock = PTHREAD_MUTEX_INITIALIZER; +unsigned MessageStore::ID::counter = 0; + +MessageStore::ID::ID(){ + time_t t; + time(&t); + pthread_mutex_lock(&counterLock); + counter++; + id = ((unsigned long long) counter << 32) + t; + pthread_mutex_unlock(&counterLock); +} + +std::string MessageStore::ID::toString() const{ + std::ostringstream oss; + oss << id; + return oss.str(); +} + diff --git a/org.glite.lb.logger/src-nt/Properties.H b/org.glite.lb.logger/src-nt/Properties.H new file mode 100644 index 0000000..77d216d --- /dev/null +++ b/org.glite.lb.logger/src-nt/Properties.H @@ -0,0 +1,36 @@ +#ifndef _PROPERTIES_H_ +#define _PROPERTIES_H_ + +#include +#include + +class Properties { +public: + + // default constructor + Properties() + : properties() + {} + + // accessors + std::string& getProperty(const std::string &key) + { return properties[key]; } + + void setProperty(const std::string &key, std::string &val) + { properties[key] = val; } + + // iterators + typedef std::map::iterator iterator; + + iterator begin() + { return properties.begin(); } + + iterator end() + { return properties.end(); } + + +private: + std::map properties; +}; + +#endif diff --git a/org.glite.lb.logger/src-nt/Transport.cpp b/org.glite.lb.logger/src-nt/Transport.cpp new file mode 100644 index 0000000..2544997 --- /dev/null +++ b/org.glite.lb.logger/src-nt/Transport.cpp @@ -0,0 +1,5 @@ +#include "Transport.H" + +Transport::~Transport() +{ +} diff --git a/org.glite.lb.logger/src/event_store_http.c b/org.glite.lb.logger/src/event_store_http.c new file mode 100644 index 0000000..ebd5523 --- /dev/null +++ b/org.glite.lb.logger/src/event_store_http.c @@ -0,0 +1,1113 @@ +#ident "$Header$" + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include + +#include "glite/lb/events_parse.h" + +#include "interlogd.h" + +#ifdef __GNUC__ +#define UNUSED_VAR __attribute__((unused)) +#else +#define UNUSED_VAR +#endif + +static char *file_prefix = NULL; + + +struct event_store_list { + struct event_store *es; + struct event_store_list *next; +}; + + +static struct event_store_list *store_list; +static pthread_rwlock_t store_list_lock = PTHREAD_RWLOCK_INITIALIZER; + + +/* ---------------- + * helper functions + * ---------------- + */ +static +char * +jobid2eventfile(const char *job_id_s) +{ + char *buffer; + + if(job_id_s) { + asprintf(&buffer, "%s.%s", file_prefix, job_id_s); + } else + asprintf(&buffer, "%s.default", file_prefix); + + return(buffer); +} + + +static +char * +jobid2controlfile(char *job_id_s) +{ + char *buffer; + char *hash; + + if(job_id_s) { + asprintf(&buffer, "%s.%s.ctl", file_prefix, job_id_s); + } else + asprintf(&buffer, "%s.default.ctl", file_prefix); + + return(buffer); +} + +static +int +file_reader(void *user_data, char *buffer, const int len) +{ + size_t ret = 0; + + if(len > 0) { + ret = fread(buffer, 1, len, (FILE*)user_data); + if(ret == 0 && ferror((FILE*)user_data)) { + return -1; + } + } + return ret; +} + + +static +int +read_event_string(FILE *file, il_http_message_t *msg) +{ + int len, ret; + int fd = fileno(file); + long start; + + /* remember the start position */ + start = ftell(file); + ret = receive_http(file, file_reader, msg); + if(ret < 0) return ret; + /* seek at the end of message in case the reader read ahead */ + len = fseek(file, start + msg->len, SEEK_SET); + len = fgetc(file); + if(len != '\n') { + il_log(LOG_ERR, "error reading event from file, missing terminator character at %d, found %c(%d))\n", + start+msg->len, len, len); + if(msg->data) { free(msg->data); msg->data = NULL; } + if(msg->host) { free(msg->host); msg->host = NULL; } + return EINVAL; + } + return ret; +} + + + +/* ------------------------------ + * event_store 'member' functions + * ------------------------------ + */ +static +int +event_store_free(struct event_store *es) +{ + assert(es != NULL); + + if(es->job_id_s) free(es->job_id_s); + if(es->event_file_name) free(es->event_file_name); + if(es->control_file_name) free(es->control_file_name); + pthread_rwlock_destroy(&es->use_lock); + pthread_rwlock_destroy(&es->commit_lock); + free(es); + + return(0); +} + + +static +struct event_store * +event_store_create(char *job_id_s) +{ + struct event_store *es; + + es = malloc(sizeof(*es)); + if(es == NULL) { + set_error(IL_NOMEM, ENOMEM, "event_store_create: error allocating room for structure"); + return(NULL); + } + + memset(es, 0, sizeof(*es)); + + il_log(LOG_DEBUG, " creating event store for id %s\n", job_id_s); + + es->job_id_s = strdup(job_id_s); + es->event_file_name = jobid2eventfile(job_id_s); + es->control_file_name = jobid2controlfile(job_id_s); + + if(pthread_rwlock_init(&es->commit_lock, NULL)) + abort(); + if(pthread_rwlock_init(&es->use_lock, NULL)) + abort(); + + return(es); +} + + +static +int +event_store_lock_ro(struct event_store *es) +{ + assert(es != NULL); + + if(pthread_rwlock_rdlock(&es->commit_lock)) + abort(); + + return(0); +} + + +static +int +event_store_lock(struct event_store *es) +{ + assert(es != NULL); + + if(pthread_rwlock_wrlock(&es->commit_lock)) + abort(); + + return(0); +} + + +static +int +event_store_unlock(struct event_store *es) +{ + assert(es != NULL); + + if(pthread_rwlock_unlock(&es->commit_lock)) + abort(); + return(0); +} + + +static +int +event_store_read_ctl(struct event_store *es) +{ + FILE *ctl_file; + + assert(es != NULL); + + event_store_lock(es); + if((ctl_file = fopen(es->control_file_name, "r")) == NULL) { + /* no control file, new event file */ + es->last_committed_ls = 0; + es->last_committed_bs = 0; + } else { + /* read last seen and last committed counts */ + fscanf(ctl_file, "%*s\n%ld\n%ld\n", + &es->last_committed_ls, + &es->last_committed_bs); + fclose(ctl_file); + } + event_store_unlock(es); + + return(0); +} + + +static +int +event_store_write_ctl(struct event_store *es) +{ + FILE *ctl; + + assert(es != NULL); + + ctl = fopen(es->control_file_name, "w"); + if(ctl == NULL) { + set_error(IL_SYS, errno, "event_store_write_ctl: error opening control file"); + return(-1); + } + + if(fprintf(ctl, "%s\n%ld\n%ld\n", + es->job_id_s, + es->last_committed_ls, + es->last_committed_bs) < 0) { + set_error(IL_SYS, errno, "event_store_write_ctl: error writing control record"); + return(-1); + } + + if(fclose(ctl) < 0) { + set_error(IL_SYS, errno, "event_store_write_ctl: error closing control file"); + return(-1); + } + + return(0); +} + + +/* + * event_store_qurantine() + * - rename damaged event store file + * - essentially does the same actions as cleanup, but the event store + * does not have to be empty + * returns 0 on success, -1 on error + */ +static +int +event_store_quarantine(struct event_store *es) +{ + int num; + char newname[MAXPATHLEN+1]; + + /* find available qurantine name */ + /* we give it at most 1024 tries */ + for(num = 0; num < 1024; num++) { + struct stat st; + + snprintf(newname, MAXPATHLEN, "%s.quarantine.%d", es->event_file_name, num); + newname[MAXPATHLEN] = 0; + if(stat(newname, &st) < 0) { + if(errno == ENOENT) { + /* file not found */ + break; + } else { + /* some other error with name, probably permanent */ + set_error(IL_SYS, errno, "event_store_qurantine: error looking for qurantine filename"); + return(-1); + + } + } else { + /* the filename is used already */ + } + } + if(num >= 1024) { + /* new name not found */ + /* XXX - is there more suitable error? */ + set_error(IL_SYS, ENOSPC, "event_store_quarantine: exhausted number of retries looking for quarantine filename"); + return(-1); + } + + /* actually rename the file */ + il_log(LOG_DEBUG, " renaming damaged event file from %s to %s\n", + es->event_file_name, newname); + if(rename(es->event_file_name, newname) < 0) { + set_error(IL_SYS, errno, "event_store_quarantine: error renaming event file"); + return(-1); + } + + /* clear the counters */ + es->last_committed_ls = 0; + es->last_committed_bs = 0; + es->offset = 0; + + return(0); +} + + +/* + * event_store_recover() + * - recover after restart or catch up when events missing in IPC + * - if offset > 0, read everything behind it + * - if offset == 0, read everything behind min(last_committed_bs, last_committed_es) + */ +int +event_store_recover(struct event_store *es) +{ + struct event_queue *eq_l = NULL, *eq_b = NULL; + struct server_msg *msg; + il_http_message_t hmsg; + char *event_s; + int fd, ret; + long last; + FILE *ef; + struct flock efl; + char err_msg[128]; + struct stat stbuf; + + assert(es != NULL); + +#if defined(IL_NOTIFICATIONS) + /* destination queue has to be found for each message separately */ +#else + /* find bookkepping server queue */ + eq_b = queue_list_get(es->job_id_s); + if(eq_b == NULL) + return(-1); +#endif + +#if !defined(IL_NOTIFICATIONS) + /* get log server queue */ + eq_l = queue_list_get(NULL); +#endif + + event_store_lock(es); + + il_log(LOG_DEBUG, " reading events from %s\n", es->event_file_name); + + /* open event file */ + ef = fopen(es->event_file_name, "r"); + if(ef == NULL) { + snprintf(err_msg, sizeof(err_msg), + "event_store_recover: error opening event file %s", + es->event_file_name); + set_error(IL_SYS, errno, err_msg); + event_store_unlock(es); + return(-1); + } + + /* lock the file for reading (we should not read while dglogd is writing) */ + fd = fileno(ef); + efl.l_type = F_RDLCK; + efl.l_whence = SEEK_SET; + efl.l_start = 0; + efl.l_len = 0; + if(fcntl(fd, F_SETLKW, &efl) < 0) { + snprintf(err_msg, sizeof(err_msg), + "event_store_recover: error locking event file %s", + es->event_file_name); + set_error(IL_SYS, errno, err_msg); + event_store_unlock(es); + fclose(ef); + return(-1); + } + + /* check the file modification time and size to avoid unnecessary operations */ + memset(&stbuf, 0, sizeof(stbuf)); + if(fstat(fd, &stbuf) < 0) { + il_log(LOG_ERR, " could not stat event file %s: %s\n", es->event_file_name, strerror(errno)); + fclose(ef); + event_store_unlock(es); + return -1; + } else { + if((es->offset == stbuf.st_size) && (es->last_modified == stbuf.st_mtime)) { + il_log(LOG_DEBUG, " event file not modified since last visit, skipping\n"); + fclose(ef); + event_store_unlock(es); + return(0); + } + } + + while(1) { /* try, try, try */ + + /* get the position in file to be sought */ + if(es->offset) + last = es->offset; + else { + last = es->last_committed_bs; + } + + il_log(LOG_DEBUG, " setting starting file position to %ld\n", last); + il_log(LOG_DEBUG, " bytes sent to destination: %d\n", es->last_committed_bs); + + if(last > 0) { + int c; + + /* skip all committed or already enqueued events */ + /* be careful - check, if the offset really points to the + beginning of event string */ + if(fseek(ef, last - 1, SEEK_SET) < 0) { + set_error(IL_SYS, errno, "event_store_recover: error setting position for read"); + event_store_unlock(es); + fclose(ef); + return(-1); + } + /* the last enqueued event MUST end with \n */ + if((c=fgetc(ef)) != '\n') { + /* Houston, we have got a problem */ + il_log(LOG_WARNING, + " file position %ld does not point at the beginning of event string, backing off!\n", + last); + /* now, where were we? */ + if(es->offset) { + /* next try will be with + last_commited_bs */ + es->offset = 0; + } else { + /* this is really weird... back off completely */ + es->last_committed_ls = es->last_committed_bs = 0; + } + } else { + /* OK, break out of the loop */ + break; + } + } else { + /* this breaks out of the loop, we are starting at + * the beginning of file + */ + if(fseek(ef, 0, SEEK_SET) < 0) { + set_error(IL_SYS, errno, "event_store_recover: error setting position for read"); + event_store_unlock(es); + fclose(ef); + return(-1); + } + break; + } + } + + /* enqueue all remaining events */ + ret = 1; + msg = NULL; + while(read_event_string(ef, &hmsg) >= 0) { + + /* last holds the starting position of event_s in file */ + il_log(LOG_DEBUG, " reading event at %ld\n", last); + + /* break from now on means there was some error */ + ret = -1; + + /* create message for server */ + msg = server_msg_create((il_octet_string_t*)&hmsg, last); + if(msg == NULL) { + il_log(LOG_ALERT, " event file corrupted! I will try to move it to quarantine (ie. rename it).\n"); + /* actually do not bother if quarantine succeeded or not - we could not do more */ + event_store_quarantine(es); + fclose(ef); + event_store_unlock(es); + return(-1); + } + msg->es = es; + + /* first enqueue to the LS */ + if(!bs_only && (last >= es->last_committed_ls)) { + + il_log(LOG_DEBUG, " queueing event at %ld to server %s\n", last, eq_l->dest_name); + +#if !defined(IL_NOTIFICATIONS) + if(enqueue_msg(eq_l, msg) < 0) + break; +#endif + } + +#ifdef IL_NOTIFICATIONS + eq_b = queue_list_get(msg->dest); +#endif + + /* now enqueue to the BS, if neccessary */ + if((eq_b != eq_l) && + (last >= es->last_committed_bs)) { + + il_log(LOG_DEBUG, " queueing event at %ld to server %s\n", last, eq_b->dest_name); + + if(enqueue_msg(eq_b, msg) < 0) + break; + } + server_msg_free(msg); + msg = NULL; + + /* now last is also the offset behind the last successfully queued event */ + last = ftell(ef); + + /* ret == 0 means EOF or incomplete event found */ + ret = 0; + + } /* while */ + + /* due to this little assignment we had to lock the event_store for writing */ + es->offset = last; + es->last_modified = stbuf.st_mtime; + il_log(LOG_DEBUG, " event store offset set to %ld\n", last); + + if(msg) + server_msg_free(msg); + + fclose(ef); + il_log(LOG_DEBUG, " finished reading events with %d\n", ret); + + event_store_unlock(es); + return(ret); +} + + +/* + * event_store_sync() + * - check the position of event and fill holes from file + * - return 1 if the event is new, + * 0 if it was seen before, + * -1 if there was an error + */ +int +event_store_sync(struct event_store *es, long offset) +{ + int ret; + + assert(es != NULL); + + /* all events are actually read from file, the event on socket + * is ignored and serves just to notify us about file change + */ + ret = event_store_recover(es); + ret = (ret < 0) ? ret : 0; + return(ret); +} + + +int +event_store_next(struct event_store *es, long offset, int len) +{ + assert(es != NULL); + + /* offsets are good only to detect losses (differences between socket and file), + which is not possible now */ + return 0; +} + + +/* + * event_store_commit() + * + */ +int +event_store_commit(struct event_store *es, int len, int ls) +{ + assert(es != NULL); + + event_store_lock(es); + + if(ls) + es->last_committed_ls += len; + else { + es->last_committed_bs += len; + if (bs_only) es->last_committed_ls += len; + } + + if(event_store_write_ctl(es) < 0) { + event_store_unlock(es); + return(-1); + } + + event_store_unlock(es); + + + return(0); +} + + +/* + * event_store_clean() + * - remove the event files (event and ctl), if they are not needed anymore + * - returns 0 if event_store is in use, 1 if it was removed and -1 on error + * + * Q: How do we know that we can safely remove the files? + * A: When all events from file have been committed both by LS and BS. + */ +static +int +event_store_clean(struct event_store *es) +{ + long last; + int fd; + FILE *ef; + struct flock efl; + + assert(es != NULL); + + /* prevent sender threads from updating */ + event_store_lock(es); + + il_log(LOG_DEBUG, " trying to cleanup event store %s\n", es->job_id_s); + il_log(LOG_DEBUG, " bytes sent to logging server: %d\n", es->last_committed_ls); + il_log(LOG_DEBUG, " bytes sent to bookkeeping server: %d\n", es->last_committed_bs); + + /* preliminary check to avoid opening event file */ + /* if the positions differ, some events still have to be sent */ + if(es->last_committed_ls != es->last_committed_bs) { + event_store_unlock(es); + il_log(LOG_DEBUG, " not all events sent, cleanup aborted\n"); + return(0); + } + + /* the file can only be removed when all the events were succesfully sent + (ie. committed both by LS and BS */ + /* That also implies that the event queues are 'empty' at the moment. */ + ef = fopen(es->event_file_name, "r+"); + if(ef == NULL) { + /* if we can not open the event store, it is an error and the struct should be removed */ + /* XXX - is it true? */ + event_store_unlock(es); + il_log(LOG_ERR, " event_store_clean: error opening event file: %s\n", strerror(errno)); + return(1); + } + + fd = fileno(ef); + + /* prevent local-logger from writing into event file */ + efl.l_type = F_WRLCK; + efl.l_whence = SEEK_SET; + efl.l_start = 0; + efl.l_len = 0; + if(fcntl(fd, F_SETLK, &efl) < 0) { + il_log(LOG_DEBUG, " could not lock event file, cleanup aborted\n"); + fclose(ef); + event_store_unlock(es); + if(errno != EACCES && + errno != EAGAIN) { + set_error(IL_SYS, errno, "event_store_clean: error locking event file"); + return(-1); + } + return(0); + } + + /* now the file should not contain partially written event, so it is safe + to get offset behind last event by seeking the end of file */ + if(fseek(ef, 0, SEEK_END) < 0) { + set_error(IL_SYS, errno, "event_store_clean: error seeking the end of file"); + event_store_unlock(es); + fclose(ef); + return(-1); + } + + last = ftell(ef); + il_log(LOG_DEBUG, " total bytes in file: %d\n", last); + + if(es->last_committed_ls < last) { + fclose(ef); + event_store_unlock(es); + il_log(LOG_DEBUG, " events still waiting in queue, cleanup aborted\n"); + return(0); + } else if( es->last_committed_ls > last) { + il_log(LOG_WARNING, " warning: event file seems to shrink!\n"); + /* XXX - in that case we can not continue because there may be + some undelivered events referring to that event store */ + fclose(ef); + event_store_unlock(es); + return(0); + } + + /* now we are sure that all events were sent and the event queues are empty */ + il_log(LOG_INFO, " removing event file %s\n", es->event_file_name); + + /* remove the event file */ + unlink(es->event_file_name); + unlink(es->control_file_name); + + /* clear the counters */ + es->last_committed_ls = 0; + es->last_committed_bs = 0; + es->offset = 0; + + /* unlock the event_store even if it is going to be removed */ + event_store_unlock(es); + + /* close the event file (that unlocks it as well) */ + fclose(ef); + + /* indicate that it is safe to remove this event_store */ + return(1); +} + + +/* -------------------------------- + * event store management functions + * -------------------------------- + */ +struct event_store * +event_store_find(char *job_id_s) +{ + struct event_store_list *q, *p; + struct event_store *es; + + if(pthread_rwlock_wrlock(&store_list_lock)) { + abort(); + } + + es = NULL; + + q = NULL; + p = store_list; + + while(p) { + if(strcmp(p->es->job_id_s, job_id_s) == 0) { + es = p->es; + if(pthread_rwlock_rdlock(&es->use_lock)) + abort(); + if(pthread_rwlock_unlock(&store_list_lock)) + abort(); + return(es); + } + + q = p; + p = p->next; + } + + es = event_store_create(job_id_s); + if(es == NULL) { + if(pthread_rwlock_unlock(&store_list_lock)) + abort(); + return(NULL); + } + + p = malloc(sizeof(*p)); + if(p == NULL) { + set_error(IL_NOMEM, ENOMEM, "event_store_find: no room for new event store"); + if(pthread_rwlock_unlock(&store_list_lock)) + abort(); + return(NULL); + } + + p->next = store_list; + store_list = p; + + p->es = es; + + if(pthread_rwlock_rdlock(&es->use_lock)) + abort(); + + if(pthread_rwlock_unlock(&store_list_lock)) + abort(); + + return(es); +} + + +int +event_store_release(struct event_store *es) +{ + assert(es != NULL); + + if(pthread_rwlock_unlock(&es->use_lock)) + abort(); + il_log(LOG_DEBUG, " released lock on %s\n", es->job_id_s); + return(0); +} + + +event_store_from_file(char *filename) +{ + struct event_store *es; + FILE *event_file; + char *job_id_s = NULL, *p; + il_http_message_t hmsg; + int ret; + + il_log(LOG_INFO, " attaching to event file: %s\n", filename); + + if(strstr(filename, "quarantine") != NULL) { + il_log(LOG_INFO, " file name belongs to quarantine, not touching that.\n"); + return(0); + } + + event_file = fopen(filename, "r"); + if(event_file == NULL) { + set_error(IL_SYS, errno, "event_store_from_file: error opening event file"); + return(-1); + } + ret = read_event_string(event_file, &hmsg); + fclose(event_file); + if(ret < 0) + return(0); + + /* get id aka dest */ + job_id_s = hmsg.host; + + il_log(LOG_DEBUG, " message dest: '%s'\n", job_id_s); + if(job_id_s == NULL) { + il_log(LOG_NOTICE, " skipping file, could not parse event\n"); + ret = 0; + goto out; + } + + es=event_store_find(job_id_s); + + if(es == NULL) { + ret = -1; + goto out; + } + + if((es->last_committed_ls == 0) && + (es->last_committed_bs == 0) && + (es->offset == 0)) { + ret = event_store_read_ctl(es); + } else + ret = 0; + + event_store_release(es); + +out: + if(hmsg.data) free(hmsg.data); + if(job_id_s) free(job_id_s); + return(ret); +} + + +int +event_store_init(char *prefix) +{ + if(file_prefix == NULL) { + file_prefix = strdup(prefix); + store_list = NULL; + } + + /* read directory and get a list of event files */ + { + int len; + + char *p, *dir; + DIR *event_dir; + struct dirent *entry; + + + /* get directory name */ + p = strrchr(file_prefix, '/'); + if(p == NULL) { + dir = strdup("."); + p = ""; + len = 0; + } else { + *p = '\0'; + dir = strdup(file_prefix); + *p++ = '/'; + len = strlen(p); + } + + event_dir = opendir(dir); + if(event_dir == NULL) { + free(dir); + set_error(IL_SYS, errno, "event_store_init: error opening event directory"); + return(-1); + } + + while((entry=readdir(event_dir))) { + char *s; + + /* skip all files that do not match prefix */ + if(strncmp(entry->d_name, p, len) != 0) + continue; + + /* skip all control files */ + if((s=strstr(entry->d_name, ".ctl")) != NULL && + s[4] == '\0') + continue; + + s = malloc(strlen(dir) + strlen(entry->d_name) + 2); + if(s == NULL) { + free(dir); + set_error(IL_NOMEM, ENOMEM, "event_store_init: no room for file name"); + return(-1); + } + + *s = '\0'; + strcat(s, dir); + strcat(s, "/"); + strcat(s, entry->d_name); + + if(event_store_from_file(s) < 0) { + free(dir); + free(s); + closedir(event_dir); + return(-1); + } + + free(s); + } + closedir(event_dir); + + /* one more pass - this time remove stale .ctl files */ + event_dir = opendir(dir); + if(event_dir == NULL) { + free(dir); + set_error(IL_SYS, errno, "event_store_init: error opening event directory"); + return(-1); + } + + while((entry=readdir(event_dir))) { + char *s; + + /* skip all files that do not match prefix */ + if(strncmp(entry->d_name, p, len) != 0) + continue; + + /* find all control files */ + if((s=strstr(entry->d_name, ".ctl")) != NULL && + s[4] == '\0') { + char *ef; + struct stat st; + + /* is there corresponding event file? */ + ef = malloc(strlen(dir) + strlen(entry->d_name) + 2); + if(ef == NULL) { + free(dir); + set_error(IL_NOMEM, ENOMEM, "event_store_init: no room for event file name"); + return(-1); + } + + s[0] = 0; + *ef = '\0'; + strcat(ef, dir); + strcat(ef, "/"); + strcat(ef, entry->d_name); + s[0] = '.'; + + if(stat(ef, &st) == 0) { + /* something is there */ + /* XXX - it could be something else than event file, but do not bother now */ + } else { + /* could not stat file, remove ctl */ + strcat(ef, s); + il_log(LOG_DEBUG, " removing stale file %s\n", ef); + if(unlink(ef)) + il_log(LOG_ERR, " could not remove file %s: %s\n", ef, strerror(errno)); + + } + free(ef); + + } + } + closedir(event_dir); + free(dir); + } + + return(0); +} + + +int +event_store_recover_all() +{ + struct event_store_list *sl; + + + if(pthread_rwlock_rdlock(&store_list_lock)) + abort(); + + /* recover all event stores */ + sl = store_list; + while(sl != NULL) { + + /* recover this event store */ + /* no need to lock use_lock in event_store, the store_list_lock is in place */ + if(event_store_recover(sl->es) < 0) { + il_log(LOG_ERR, " error recovering event store %s:\n %s\n", sl->es->event_file_name, error_get_msg()); + clear_error(); + } + sl = sl->next; + } + + if(pthread_rwlock_unlock(&store_list_lock)) + abort(); + + return(0); +} + + +#if 0 +int +event_store_remove(struct event_store *es) +{ + struct event_store_list *p, **q; + + assert(es != NULL); + + switch(event_store_clean(es)) { + case 0: + il_log(LOG_DEBUG, " event store not removed, still used\n"); + return(0); + + case 1: + if(pthread_rwlock_wrlock(&store_list_lock) < 0) { + set_error(IL_SYS, errno, " event_store_remove: error locking event store list"); + return(-1); + } + + p = store_list; + q = &store_list; + + while(p) { + if(p->es == es) { + (*q) = p->next; + event_store_free(es); + free(p); + break; + } + q = &(p->next); + p = p->next; + } + + if(pthread_rwlock_unlock(&store_list_lock) < 0) { + set_error(IL_SYS, errno, " event_store_remove: error unlocking event store list"); + return(-1); + } + return(1); + + default: + return(-1); + } + /* not reached */ + return(0); +} +#endif + +int +event_store_cleanup() +{ + struct event_store_list *sl; + struct event_store_list *slnext; + struct event_store_list **prev; + + /* try to remove event files */ + + if(pthread_rwlock_wrlock(&store_list_lock)) + abort(); + + sl = store_list; + prev = &store_list; + + while(sl != NULL) { + int ret; + + slnext = sl->next; + + /* one event store at time */ + ret = pthread_rwlock_trywrlock(&sl->es->use_lock); + if(ret == EBUSY) { + il_log(LOG_DEBUG, " event_store %s is in use by another thread\n", + sl->es->job_id_s); + sl = slnext; + continue; + } else if (ret < 0) + abort(); + + switch(event_store_clean(sl->es)) { + + case 1: + /* remove this event store */ + (*prev) = slnext; + event_store_free(sl->es); + free(sl); + break; + + case -1: + il_log(LOG_ERR, " error removing event store %s (file %s):\n %s\n", + sl->es->job_id_s, sl->es->event_file_name, error_get_msg()); + /* event_store_release(sl->es); */ + clear_error(); + /* go on to the next */ + + default: + event_store_release(sl->es); + prev = &(sl->next); + break; + } + + sl = slnext; + } + + if(pthread_rwlock_unlock(&store_list_lock)) + abort(); + + return(0); +} + diff --git a/org.glite.lb.logger/src/http.c b/org.glite.lb.logger/src/http.c new file mode 100644 index 0000000..c9fb89b --- /dev/null +++ b/org.glite.lb.logger/src/http.c @@ -0,0 +1,197 @@ +#ident "$Header$" + +#include +#include + +#include "interlogd.h" + + +int +parse_request(const char *s, il_http_message_t *msg) +{ + if(!strncasecmp(s, "HTTP", 4)) { + msg->msg_type = IL_HTTP_REPLY; + } else if(!strncasecmp(s, "POST", 4)) { + msg->msg_type = IL_HTTP_POST; + } else if(!strncasecmp(s, "GET", 3)) { + msg->msg_type = IL_HTTP_GET; + } else { + msg->msg_type = IL_HTTP_OTHER; + } + if(msg->msg_type == IL_HTTP_REPLY) { + char *p = strchr(s, ' '); + + if(!p) goto parse_end; + p++; + msg->reply_code=atoi(p); + p = strchr(p, ' '); + if(!p) goto parse_end; + p++; + msg->reply_string = strdup(p); + + parse_end: + ; + } +} + + +int +parse_header(const char *s, il_http_message_t *msg) +{ + if(!strncasecmp(s, "Content-Length:", 15)) { + msg->content_length = atoi(s + 15); + } else if(!strncasecmp(s, "Host:", 5)) { + const char *p = s + 4; + while(*++p == ' '); /* skip spaces */ + msg->host = strdup(p); + } + return(0); +} + + +#define DEFAULT_CHUNK_SIZE 1024 + +// read what is available and parse what can be parsed +// returns the result of read operation of the underlying connection, +// ie. the number of bytes read or error code +int +receive_http(void *user_data, int (*reader)(void *, char *, const int), il_http_message_t *msg) +{ + static enum { NONE, IN_REQUEST, IN_HEADERS, IN_BODY } state = NONE; + int len, alen, clen, i, buffer_free, min_buffer_free = DEFAULT_CHUNK_SIZE; + char *buffer, *p, *s, *cr; + + memset(msg, 0, sizeof(*msg)); + // msg->data = NULL; + // msg->len = 0; + state = IN_REQUEST; + alen = 0; + buffer = NULL; + buffer_free = 0; + p = NULL; + s = NULL; + + do { + /* p - first empty position in buffer + alen - size of allocated buffer + len - number of bytes received in last read + s - points behind last scanned CRLF or at buffer start + buffer_free = alen - (p - buffer) + */ + + /* prepare at least chunk_size bytes for next data */ + if(buffer_free < min_buffer_free) { + char *n; + + alen += min_buffer_free; + n = realloc(buffer, alen); + if(n == NULL) { + free(buffer); + set_error(IL_NOMEM, ENOMEM, "read_event: no room for event"); + return(-1); + } + buffer_free += min_buffer_free; + p = n + (p - buffer); + s = n + (s - buffer); + buffer = n; + } + + if(buffer_free > 0) { + len = (*reader)(user_data, p, buffer_free); + if(len < 0) { + // error + free(buffer); + // set_error(IL_SYS, errno, "receive_http: error reading data"); + return -1; + } else if(len == 0) { + // EOF + free(buffer); + set_error(IL_PROTO, errno, "receive_http: error reading data - premature EOF"); + return -1; + } + buffer_free -= len; + p+= len; + } + + + switch(state) { + + // parse buffer, look for CRLFs + // s - start scan position + // p - start of current token + // cr - current CRLF position + + case IN_REQUEST: + if((s < p - 1) && + (cr = (char*)memchr(s, '\r', p - s - 1)) && + (cr[1] == '\n')) { + *cr = 0; + parse_request(s, msg); + *cr = '\r'; + // change state + state = IN_HEADERS; + // start new tokens (cr < p - 1 -> s < p + 1 <-> s <= p) + s = cr + 2; + } else { + break; + } + + case IN_HEADERS: + while((state != IN_BODY) && + (s < p - 1) && + (cr = (char*)memchr(s, '\r', p - s - 1)) && + (cr[1] == '\n')) { + if(s == cr) { /* do not consider request starting with CRLF */ + // found CRLFCRLF + state = IN_BODY; + } else { + *cr = 0; + parse_header(s, msg); + *cr = '\r'; + } + // next scan starts after CRLF + s = cr + 2; + } + if(state == IN_BODY) { + // we found body + // content-length should be set at the moment + if(msg->content_length > 0) { + int need_free = msg->content_length - (p - s); + char *n; + + alen += need_free - buffer_free + 1; + n = realloc(buffer, alen); + if(n == NULL) { + free(buffer); + set_error(IL_NOMEM, ENOMEM, "read_event: no room for event"); + return(-1); + } + buffer_free = need_free; + min_buffer_free = 0; + p = n + (p - buffer); + s = n + (s - buffer); + buffer = n; + } else { + // report error + free(buffer); + set_error(IL_PROTO, EINVAL, "receive_http: error reading data - no content length specified\n"); + return -1; + } + } + break; + + case IN_BODY: + if(buffer_free == 0) { + // finished reading + *p = 0; + state = NONE; + } + break; + } + } while(state != NONE); + + msg->data = buffer; + msg->len = p - buffer; + + return 0; +} diff --git a/org.glite.lb.logger/src/input_queue_socket_http.c b/org.glite.lb.logger/src/input_queue_socket_http.c new file mode 100644 index 0000000..939c45f --- /dev/null +++ b/org.glite.lb.logger/src/input_queue_socket_http.c @@ -0,0 +1,167 @@ +#ident "$Header$" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "interlogd.h" + +static const int SOCK_QUEUE_MAX = 50; +extern char *socket_path; +extern char *file_prefix; + +static int sock; +static int accepted; + +static +int plain_reader(void *user_data, char *buffer, const int len) +{ + return (recv(*(int*)user_data, buffer, len, MSG_NOSIGNAL)); +} + + +int +input_queue_attach() +{ + struct sockaddr_un saddr; + + if((sock=socket(PF_UNIX, SOCK_STREAM, 0)) < 0) { + set_error(IL_SYS, errno, "input_queue_attach: error creating socket"); + return(-1); + } + + memset(&saddr, 0, sizeof(saddr)); + saddr.sun_family = AF_UNIX; + strcpy(saddr.sun_path, socket_path); + + /* test for the presence of the socket and another instance + of interlogger listening */ + if(connect(sock, (struct sockaddr *)&saddr, sizeof(saddr.sun_path)) < 0) { + if(errno == ECONNREFUSED) { + /* socket present, but no one at the other end; remove it */ + il_log(LOG_WARNING, " removing stale input socket %s\n", socket_path); + unlink(socket_path); + } + /* ignore other errors for now */ + } else { + /* connection was successful, so bail out - there is + another interlogger running */ + set_error(IL_SYS, EADDRINUSE, "input_queue_attach: another instance of interlogger is running"); + return(-1); + } + + if(bind(sock, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) { + set_error(IL_SYS, errno, "input_queue_attach: error binding socket"); + return(-1); + } + + if (listen(sock, SOCK_QUEUE_MAX)) { + set_error(IL_SYS, errno, "input_queue_attach: error listening on socket"); + return -1; + } + + return(0); +} + + +void input_queue_detach() +{ + if (sock >= 0) + close(sock); + unlink(socket_path); +} + + + +/* + * Returns: -1 on error, 0 if no message available, message length otherwise + * + */ +#ifdef PERF_EVENTS_INLINE +int +input_queue_get(il_octet_string_t **buffer, long *offset, int timeout) +{ + static long o = 0; + int len; + char *jobid; + static il_octet_string_t my_buffer; + + assert(buffer != NULL); + + *buffer = &my_buffer; + + len = glite_wll_perftest_produceEventString(&my_buffer.data, &jobid); + my_buffer.len = len; + if(len) { + o += len; + *offset = o; + } else if (len == 0) { + sleep(timeout); + } + return(len); +} +#else +int +input_queue_get(il_octet_string_t **buffer, long *offset, int timeout) +{ + fd_set fds; + struct timeval tv; + int msg_len; + static il_http_message_t msg; + + assert(buffer != NULL); + + *buffer = (il_octet_string_t *)&msg; + + FD_ZERO(&fds); + FD_SET(sock, &fds); + + tv.tv_sec = timeout; + tv.tv_usec = 0; + + msg_len = select(sock + 1, &fds, NULL, NULL, timeout >= 0 ? &tv : NULL); + switch(msg_len) { + + case 0: /* timeout */ + return(0); + + case -1: /* error */ + switch(errno) { + case EINTR: + il_log(LOG_DEBUG, " interrupted while waiting for event!\n"); + return(0); + + default: + set_error(IL_SYS, errno, "input_queue_get: error waiting for event"); + return(-1); + } + default: + break; + } + + if((accepted=accept(sock, NULL, NULL)) < 0) { + set_error(IL_SYS, errno, "input_queue_get: error accepting connection"); + return(-1); + } + + msg_len = receive_http(&accepted, plain_reader, &msg); + + if(msg_len < 0) { + close(accepted); + if(error_get_maj() != IL_OK) + return -1; + else + return 0; + } + + close(accepted); + *offset = -1; + return(msg.len); +} +#endif + diff --git a/org.glite.lb.logger/src/queue_mgr_http.c b/org.glite.lb.logger/src/queue_mgr_http.c new file mode 100644 index 0000000..777e620 --- /dev/null +++ b/org.glite.lb.logger/src/queue_mgr_http.c @@ -0,0 +1,164 @@ +#ident "$Header$" + +#include +#include +#include + +#include "glite/jobid/cjobid.h" +#include "glite/lb/context.h" + +#include "interlogd.h" + +struct queue_list { + struct event_queue *queue; + char *dest; + struct queue_list *next; + time_t expires; +}; + +static struct event_queue *log_queue; +static struct queue_list *queues; + + +static +int +queue_list_create() +{ + queues = NULL; + + return(0); +} + + +static +int +queue_list_find(struct queue_list *ql, const char *dest, struct queue_list **el, struct queue_list **prev) +{ + struct queue_list *q, *p; + + assert(el != NULL); + + *el = NULL; + if(prev) + *prev = NULL; + + if(ql == NULL) + return(0); + + q = NULL; + p = ql; + + while(p) { + if(strcmp(p->dest, dest) == 0) { + *el = p; + if(prev) + *prev = q; + return(1); + } + + q = p; + p = p->next; + }; + + return(0); +} + + +static +int +queue_list_add(struct queue_list **ql, const char *dest, struct event_queue *eq) +{ + struct queue_list *el; + + assert(dest != NULL); + assert(eq != NULL); + assert(ql != NULL); + + el = malloc(sizeof(*el)); + if(el == NULL) { + set_error(IL_NOMEM, ENOMEM, "queue_list_add: not enough room for new queue"); + return(-1); + } + + el->dest = strdup(dest); + if(el->dest == NULL) { + free(el); + set_error(IL_NOMEM, ENOMEM, "queue_list_add: not enough memory for new queue"); + return(-1); + } + el->queue = eq; + el->next = *ql; + *ql = el; + return 0; +} + + +struct event_queue * +queue_list_get(char *job_id_s) +{ + char *dest; + struct queue_list *q; + struct event_queue *eq; + dest = job_id_s; + + if(dest == NULL) + return(NULL); + + if(queue_list_find(queues, dest, &q, NULL)) { + return(q->queue); + } else { + eq = event_queue_create(dest); + if(eq) + queue_list_add(&queues, dest, eq); + return(eq); + } +} + + +int +queue_list_is_log(struct event_queue *eq) +{ + return(eq == queue_list_get(NULL)); +} + + +int +queue_list_init(char *ls) +{ + return(queue_list_create()); +} + + +static struct queue_list *current; + + +struct event_queue * +queue_list_first() +{ + current = queues; + return(current ? current->queue : NULL); +} + + +struct event_queue * +queue_list_next() +{ + current = current ? current->next : NULL; + return(current ? current->queue : NULL); +} + + +int +queue_list_remove_queue(struct event_queue *eq) +{ + assert(eq != NULL); + + free(eq); + return(1); +} + + + +/* Local Variables: */ +/* c-indentation-style: gnu */ +/* End: */ diff --git a/org.glite.lb.logger/src/send_event_http.c b/org.glite.lb.logger/src/send_event_http.c new file mode 100644 index 0000000..3c90562 --- /dev/null +++ b/org.glite.lb.logger/src/send_event_http.c @@ -0,0 +1,282 @@ +#ident "$Header$" + +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#include +#include + + +/* + * - L/B server protocol handling routines + */ + +#include "glite/jobid/cjobid.h" +#include "glite/lb/il_string.h" +#include "glite/lb/context.h" + +#include "interlogd.h" + +struct reader_data { + edg_wll_GssConnection *gss; + struct timeval *timeout; +}; + + +static +int +gss_reader(void *user_data, char *buffer, int max_len) +{ + int ret; + struct reader_data *data = (struct reader_data *)user_data; + edg_wll_GssStatus gss_stat; + + ret = edg_wll_gss_read(data->gss, buffer, max_len, data->timeout, &gss_stat); + if(ret < 0) { + char *gss_err = NULL; + + if(ret == EDG_WLL_GSS_ERROR_GSS) { + edg_wll_gss_get_error(&gss_stat, "get_reply", &gss_err); + set_error(IL_DGGSS, ret, gss_err); + free(gss_err); + } else + set_error(IL_DGGSS, ret, "get_reply"); + } + return(ret); +} + + +/* + * Read reply from server. + * Returns: -1 - error reading message, + * code > 0 - http status code from server + */ +static +int +get_reply(struct event_queue *eq, char **buf, int *code_min) +{ + int ret, code; + int len; + struct timeval tv; + struct reader_data data; + il_http_message_t msg; + + tv.tv_sec = TIMEOUT; + tv.tv_usec = 0; + data.gss = &eq->gss; + data.timeout = &tv; + len = receive_http(&data, gss_reader, &msg); + if(len < 0) { + set_error(IL_PROTO, LB_PROTO, "get_reply: error reading server reply"); + return(-1); + } + if(msg.data) free(msg.data); + if(msg.reply_string) *buf = msg.reply_string; + *code_min = 0; /* XXX fill in flag for fault */ + return(msg.reply_code); +} + + + +/* + * Returns: 0 - not connected, timeout set, 1 - OK + */ +int +event_queue_connect(struct event_queue *eq) +{ + int ret; + struct timeval tv; + edg_wll_GssStatus gss_stat; + cred_handle_t *local_cred_handle; + + assert(eq != NULL); + +#ifdef LB_PERF + if(!nosend) { +#endif + + if(eq->gss.context == NULL) { + + tv.tv_sec = TIMEOUT; + tv.tv_usec = 0; + + /* get pointer to the credentials */ + if(pthread_mutex_lock(&cred_handle_lock) < 0) + abort(); + local_cred_handle = cred_handle; + local_cred_handle->counter++; + if(pthread_mutex_unlock(&cred_handle_lock) < 0) + abort(); + + il_log(LOG_DEBUG, " trying to connect to %s:%d\n", eq->dest_name, eq->dest_port); + ret = edg_wll_gss_connect(local_cred_handle->creds, eq->dest_name, eq->dest_port, &tv, &eq->gss, &gss_stat); + if(pthread_mutex_lock(&cred_handle_lock) < 0) + abort(); + /* check if we need to release the credentials */ + --local_cred_handle->counter; + if(local_cred_handle != cred_handle && local_cred_handle->counter == 0) { + edg_wll_gss_release_cred(&local_cred_handle->creds, NULL); + free(local_cred_handle); + il_log(LOG_DEBUG, " freed credentials, not used anymore\n"); + } + if(pthread_mutex_unlock(&cred_handle_lock) < 0) + abort(); + + if(ret < 0) { + char *gss_err = NULL; + + if (ret == EDG_WLL_GSS_ERROR_GSS) + edg_wll_gss_get_error(&gss_stat, "event_queue_connect: edg_wll_gss_connect", &gss_err); + set_error(IL_DGGSS, ret, + (ret == EDG_WLL_GSS_ERROR_GSS) ? gss_err : "event_queue_connect: edg_wll_gss_connect"); + if (gss_err) free(gss_err); + eq->gss.context = NULL; + eq->timeout = TIMEOUT; + return(0); + } + } + +#ifdef LB_PERF + } +#endif + + return(1); +} + + +int +event_queue_close(struct event_queue *eq) +{ + assert(eq != NULL); + +#ifdef LB_PERF + if(!nosend) { +#endif + + if(eq->gss.context != NULL) { + edg_wll_gss_close(&eq->gss, NULL); + eq->gss.context = NULL; + } +#ifdef LB_PERF + } +#endif + return(0); +} + + +/* + * Send all events from the queue. + * Returns: -1 - system error, 0 - not sent, 1 - queue empty + */ +int +event_queue_send(struct event_queue *eq) +{ + int events_sent = 0; + assert(eq != NULL); + +#ifdef LB_PERF + if(!nosend) { +#endif + if(eq->gss.context == NULL) + return(0); +#ifdef LB_PERF + } +#endif + + /* feed the server with events */ + while (!event_queue_empty(eq)) { + struct server_msg *msg; + char *rep; + int ret, code, code_min; + size_t bytes_sent; + struct timeval tv; + edg_wll_GssStatus gss_stat; + + clear_error(); + + if(event_queue_get(eq, &msg) < 0) + return(-1); + + il_log(LOG_DEBUG, " trying to deliver event at offset %d for job %s\n", msg->offset, msg->job_id_s); + +#ifdef LB_PERF + if(!nosend) { +#endif + /* XXX: ljocha -- does it make sense to send empty messages ? */ + if (msg->len) { + tv.tv_sec = TIMEOUT; + tv.tv_usec = 0; + ret = edg_wll_gss_write_full(&eq->gss, msg->msg, msg->len, &tv, &bytes_sent, &gss_stat); + if(ret < 0) { + if (ret == EDG_WLL_GSS_ERROR_ERRNO && errno == EPIPE && events_sent > 0) { + eq->timeout = 0; + } else { + il_log(LOG_ERR, "send_event: %s\n", error_get_msg()); + eq->timeout = TIMEOUT; + } + return(0); + } + if((code = get_reply(eq, &rep, &code_min)) < 0) { + /* could not get the reply properly, so try again later */ + if (events_sent>0) + eq->timeout = 1; + else { + eq->timeout = TIMEOUT; + il_log(LOG_ERR, " error reading server %s reply:\n %s\n", eq->dest_name, error_get_msg()); + } + return(0); + } + } + else { code = 200; code_min = 0; rep = strdup("not sending empty message"); } +#ifdef LB_PERF + } else { + glite_wll_perftest_consumeEventIlMsg(msg->msg+17); + code = 200; + rep = strdup("OK"); + } +#endif + + il_log(LOG_DEBUG, " event sent, server %s replied with %d, %s\n", eq->dest_name, code, rep); + free(rep); + + /* the reply is back here, decide what to do with message */ + /* HTTP error codes: + 1xx - informational (eg. 100 Continue) + 2xx - successful (eg. 200 OK) + 3xx - redirection (eg. 301 Moved Permanently) + 4xx - client error (eq. 400 Bad Request) + 5xx - server error (eq. 500 Internal Server Error) + */ + if(code >= 100 && code < 200) { + + /* non fatal errors (for us), try to deliver later */ + eq->timeout = TIMEOUT; + return(0); + } + + /* the message was consumed (successfully or not) */ + /* update the event pointer */ + if(event_store_commit(msg->es, msg->ev_len, queue_list_is_log(eq)) < 0) + /* failure committing message, this is bad */ + return(-1); + + event_queue_remove(eq); + events_sent++; + } /* while */ + + return(1); + +} /* send_events */ + + +/* this is just not used */ +int +send_confirmation(long lllid, int code) +{ + return 0; +} diff --git a/org.glite.lb.logger/src/server_msg_http.c b/org.glite.lb.logger/src/server_msg_http.c new file mode 100644 index 0000000..8bd3623 --- /dev/null +++ b/org.glite.lb.logger/src/server_msg_http.c @@ -0,0 +1,128 @@ +#ident "$Header$" + +#include +#include +#include + +#include "interlogd.h" +#include "glite/lb/il_msg.h" +#include "glite/lb/events_parse.h" +#include "glite/lb/context.h" + +static +int +create_msg(il_http_message_t *ev, char **buffer, long *receipt, time_t *expires) +{ + char *event = ev->data; + + *receipt = 0; + *expires = 0; + + *buffer = ev->data; + return ev->len;; +} + + +struct server_msg * +server_msg_create(il_octet_string_t *event, long offset) +{ + struct server_msg *msg; + + msg = malloc(sizeof(*msg)); + if(msg == NULL) { + set_error(IL_NOMEM, ENOMEM, "server_msg_create: out of memory allocating message"); + return(NULL); + } + + if(server_msg_init(msg, event) < 0) { + server_msg_free(msg); + return(NULL); + } + msg->offset = offset; + + return(msg); +} + + +struct server_msg * +server_msg_copy(struct server_msg *src) +{ + struct server_msg *msg; + + msg = malloc(sizeof(*msg)); + if(msg == NULL) { + set_error(IL_NOMEM, ENOMEM, "server_msg_copy: out of memory allocating message"); + return(NULL); + } + + msg->msg = malloc(src->len); + if(msg->msg == NULL) { + set_error(IL_NOMEM, ENOMEM, "server_msg_copy: out of memory allocating server message"); + server_msg_free(msg); + return(NULL); + } + msg->len = src->len; + memcpy(msg->msg, src->msg, src->len); + + msg->job_id_s = strdup(src->job_id_s); + msg->ev_len = src->ev_len; + msg->es = src->es; + msg->receipt_to = src->receipt_to; + msg->offset = src->offset; +#if defined(IL_NOTIFICATIONS) + msg->dest_name = strdup(src->dest_name); + msg->dest_port = src->dest_port; + msg->dest = strdup(src->dest); +#endif + msg->expires = src->expires; + return(msg); +} + + +int +server_msg_init(struct server_msg *msg, il_octet_string_t *event) +{ + il_http_message_t *hmsg = (il_http_message_t *)event; + + assert(msg != NULL); + assert(event != NULL); + + memset(msg, 0, sizeof(*msg)); + + + msg->job_id_s = hmsg->host; + if(msg->job_id_s == NULL) { + set_error(IL_LBAPI, EDG_WLL_ERROR_PARSE_BROKEN_ULM, "server_msg_init: error getting id"); + return -1; + } + msg->len = create_msg(hmsg, &msg->msg, &msg->receipt_to, &msg->expires); + if(msg->len < 0) + return -1; + /* set this to indicate new data owner */ + hmsg->data = NULL; + hmsg->host = NULL; + msg->ev_len = hmsg->len + 1; /* must add separator size too */ + return 0; + +} + + +int +server_msg_is_priority(struct server_msg *msg) +{ + assert(msg != NULL); + + return(msg->receipt_to != 0); +} + + +int +server_msg_free(struct server_msg *msg) +{ + assert(msg != NULL); + + if(msg->msg) free(msg->msg); + if(msg->job_id_s) free(msg->job_id_s); + free(msg); + return 0; +}