From: cvs2svn Date: Wed, 18 Nov 2009 14:12:38 +0000 (+0000) Subject: This commit was manufactured by cvs2svn to create tag 'glite-lb_R_1_9_3_1'. X-Git-Tag: glite-lb_R_1_9_3_1 X-Git-Url: http://scientific.zcu.cz/git/?a=commitdiff_plain;h=9856ff32f628933318c955f47607370689b5ec5f;p=jra1mw.git This commit was manufactured by cvs2svn to create tag 'glite-lb_R_1_9_3_1'. Sprout from branch_RC31_3 2009-10-15 14:24:23 UTC Zdeněk Šustr 'Module repacked' Cherrypick from branch_RC31_3 2008-06-10 10:50:05 UTC Zdeněk Šustr 'push minor version': org.glite.lb.utils/project/version.properties Cherrypick from branch_RC31_3 2009-11-18 14:12:37 UTC Zdeněk Šustr 'New LB version': org.glite.lb/project/version.properties Cherrypick from master 2009-11-14 14:50:42 UTC František Dvořák 'Finalize build.': org.glite.lb.harvester/Makefile org.glite.lb.harvester/configure org.glite.lb.harvester/doc/INSTALL org.glite.lb.harvester/doc/README org.glite.lb.harvester/examples/test.sh org.glite.lb.harvester/examples/test.sql org.glite.lb.harvester/project/ChangeLog org.glite.lb.harvester/project/package.description org.glite.lb.harvester/project/package.summary org.glite.lb.harvester/project/version.properties org.glite.lb.harvester/src/harvester.c org.glite.lb.logger/config/startup org.glite.lb.logger/configure org.glite.lb.logger/project/ChangeLog org.glite.lb.logger/project/package.description org.glite.lb.logger/project/package.summary org.glite.lb.logger/src-nt/Connection.H org.glite.lb.logger/src-nt/Connection.cpp org.glite.lb.logger/src-nt/EventManager.H org.glite.lb.logger/src-nt/EventManager.cpp org.glite.lb.logger/src-nt/Exception.H org.glite.lb.logger/src-nt/HTTPTransport.H org.glite.lb.logger/src-nt/HTTPTransport.cpp org.glite.lb.logger/src-nt/InputChannel.H org.glite.lb.logger/src-nt/InputChannel.cpp org.glite.lb.logger/src-nt/Makefile org.glite.lb.logger/src-nt/Message.H org.glite.lb.logger/src-nt/MessageStore.H org.glite.lb.logger/src-nt/MessageStore.cpp org.glite.lb.logger/src-nt/PlainConnection.H org.glite.lb.logger/src-nt/PlainConnection.cpp org.glite.lb.logger/src-nt/PluginManager.H org.glite.lb.logger/src-nt/PluginManager.cpp org.glite.lb.logger/src-nt/Properties.H org.glite.lb.logger/src-nt/SocketInput.cpp org.glite.lb.logger/src-nt/ThreadPool.H org.glite.lb.logger/src-nt/ThreadPool.cpp org.glite.lb.logger/src-nt/Transport.H org.glite.lb.logger/src-nt/Transport.cpp org.glite.lb.logger/src-nt/main.cpp org.glite.lb.logger/src-nt/test/EventManagerTest.cpp org.glite.lb.logger/src-nt/test/ThreadPoolTest.cpp org.glite.lb.logger/src/event_store_http.c org.glite.lb.logger/src/http.c org.glite.lb.logger/src/input_queue_socket_http.c org.glite.lb.logger/src/queue_mgr_http.c org.glite.lb.logger/src/send_event_http.c org.glite.lb.logger/src/server_msg_http.c Cherrypick from master 2009-03-25 15:09:32 UTC Michal Voců 'set socket option': org.glite.lb.logger/doc/glite-lb-interlogd.8 org.glite.lb.logger/doc/glite-lb-logd.8 org.glite.lb.logger/src/il_error.c org.glite.lb.logger/src/il_error.h org.glite.lb.logger/src/input_queue_socket.c org.glite.lb.logger/src/logd.c org.glite.lb.logger/src/logd_proto.c org.glite.lb.logger/src/perftest_il.sh org.glite.lb.logger/src/queue_thread.c Cherrypick from branch_2_0_notif_GL31 2009-04-14 11:19:19 UTC Aleš Křenek 'Handle copying messages with empty destination': org.glite.lb.logger/Makefile org.glite.lb.logger/src/event_queue.c org.glite.lb.logger/src/event_store.c org.glite.lb.logger/src/il_master.c org.glite.lb.logger/src/interlogd.c org.glite.lb.logger/src/interlogd.h org.glite.lb.logger/src/queue_mgr.c org.glite.lb.logger/src/recover.c org.glite.lb.logger/src/send_event.c org.glite.lb.logger/src/server_msg.c --- diff --git a/org.glite.lb.harvester/Makefile b/org.glite.lb.harvester/Makefile new file mode 100644 index 0000000..63e3886 --- /dev/null +++ b/org.glite.lb.harvester/Makefile @@ -0,0 +1,81 @@ +top_srcdir=.. +stagedir=. +package=glite-lb-harvester +module.version=0.0.0 +PREFIX=/opt/glite +globus_prefix=/opt/globus + +archlib:=lib +thrflavour:=gcc32dbgpthr +host_cpu:=${shell uname -m} +ifeq (${host_cpu},x86_64) + archlib:=lib64 + thrflavour:=gcc64dbgpthr +endif + +-include Makefile.inc +-include ../project/version.properties +version:=${module.version} + +CC=gcc +VPATH=${top_srcdir}/src + +GLOBUS_CPPFLAGS:=-I${globus_prefix}/include/${thrflavour} +CPPFLAGS:=-I${stagedir}/include -D_GNU_SOURCE -D_REENTRANT ${CPPFLAGS} +CFLAGS:=-W -Wall -g -O2 ${CFLAGS} +LDFLAGS:=${LDFLAGS} +LIBS:=-L${stagedir}/${archlib} -L${stagedir}/lib \ + -lglite_lb_common_${thrflavour} \ + -lglite_lb_client_${thrflavour} \ + -lpthread -lglite_security_gss_${thrflavour} + +ifneq ($(GLITE_LB_HARVESTER_WITH_LBU_DB),no) +CPPFLAGS:=$(CPPFLAGS) -DWITH_LBU_DB=1 +LIBS:=$(LIBS) -lglite_lbu_db +endif +ifeq ($(GLITE_LB_HARVESTER_WITH_OLD_LB),yes) +CPPFLAGS:=${GLOBUS_CPPFLAGS} $(CPPFLAGS) -DWITH_OLD_LB=1 +LIBS:=$(LIBS) -lglite_wmsutils_cjobid +else +LIBS:=$(LIBS) -lglite_jobid +endif + +COMPILE:=libtool --mode=compile ${CC} ${CPPFLAGS} ${CFLAGS} +LINK:=libtool --mode=link ${CC} ${LDFLAGS} +INSTALL:=libtool --mode=install install + +default: all + +compile all: harvester + +check: + +debug: harvester-dbg + +doc: + +stage: compile + $(MAKE) install PREFIX=${stagedir} + +install: compile + -mkdir -p ${PREFIX}/bin ${PREFIX}/share/doc/${package}-${version} + ${INSTALL} -m 755 harvester ${PREFIX}/bin/glite-lb-harvester + ${INSTALL} -m 444 ../doc/README ${PREFIX}/share/doc/${package}-${version} + +clean: + rm -rfv *.o *.lo .libs/ harvester harvester-dbg + rm -rvf log.xml project/ rpmbuild/ RPMS/ tgz/ + +harvester: harvester.o + ${LINK} -o $@ $+ ${LIBS} + +harvester-dbg: harvester-dbg.o + ${LINK} -o $@ $+ ${LIBS} + +harvester-dbg.o: harvester.c + ${COMPILE} -Werror -DLOG=1 -DWITH_RTM_SQL_STORAGE=1 -c $< -o $@ + +%.o: %.c + ${COMPILE} -c $< + +.PHONY: default all compike debug check doc stage install clean diff --git a/org.glite.lb.harvester/configure b/org.glite.lb.harvester/configure new file mode 100755 index 0000000..ff59db3 --- /dev/null +++ b/org.glite.lb.harvester/configure @@ -0,0 +1,703 @@ +#!/usr/bin/perl + +# WARNING: Don't edit this file unless it is the master copy in org.glite.lb +# +# For the purpose of standalone builds of lb/jobid/lbjp-common components +# it is copied on tagging + +# $Header$ + +use Getopt::Long; + +my $pwd = `pwd`; chomp $pwd; +my $prefix = $pwd.'/stage'; +my $stagedir; +my $staged; +my $module; +my $thrflavour = 'gcc64dbgpthr'; +my $nothrflavour = 'gcc64dbg'; +my $mode = 'build'; +my $help = 0; +my $listmodules; +my $version; +my $output; +my $lb_tag = ''; +my $lbjp_tag = ''; +my $jp_tag = ''; +my $sec_tag = ''; +my $jobid_tag = ''; + +my @nodes = qw/client server logger utils client-java doc ws-test db jpprimary jpindex jpclient harvester/; +my %enable_nodes; +my %disable_nodes; + +my %extern_prefix = ( + cares => '/opt/c-ares', + classads => '/opt/classads', + cppunit => '/usr', + expat => '/usr', + globus => '/opt/globus', + gsoap => '/usr', + mysql => '/usr', + 'mysql-devel' => '', + voms => '/opt/glite', + gridsite => '/opt/glite', + lcas => '/opt/glite', + trustmanager => '/opt/glite', + ant => '/usr', + jdk => '/usr', + libtar => '/usr', + axis => '/usr', + log4c => '/usr', + postgresql => '/usr' +); + +my %jar = ( + 'commons-codec' => '/usr/share/java/commons-codec.jar', + 'commons-lang' => '/usr/share/java/commons-lang.jar', +); + + +my %glite_prefix; +my %need_externs; +my %need_externs_type; +my %need_jars; +my %extrafull; +my %extranodmod; +my %deps; +my %deps_type; +my %topbuild; + +my %lbmodules = ( + 'lb' => [ qw/client client-java common doc logger server state-machine types utils ws-interface ws-test harvester/], + 'security' => [qw/gss gsoap-plugin/], + 'lbjp-common' => [qw/db log maildir server-bones trio jp-interface/], + 'jobid' => [qw/api-c api-cpp api-java/], + 'jp' => [ qw/client doc index primary server-common ws-interface/ ], + ); + + +my @opts = ( + 'prefix=s' => \$prefix, + 'staged=s' => \$staged, + 'module=s' => \$module, + 'thrflavour=s' => \$thrflavour, + 'nothrflavour=s' => \$nothrflavour, + 'mode=s' => \$mode, + 'listmodules=s' => \$listmodules, + 'version=s' => \$version, + 'output=s' => \$output, + 'stage=s' => \$stagedir, + 'lb-tag=s' => \$lb_tag, + 'lbjp-common-tag=s' => \$lbjp_tag, + 'jp-tag=s' => \$jp_tag, + 'security-tag=s' => \$sec_tag, + 'jobid-tag=s' => \$jobid_tag, + 'help' => \$help, +); + +for (@nodes) { + $enable_nodes{$_} = 0; + $disable_nodes{$_} = 0; + + push @opts,"disable-$_",\$disable_nodes{$_}; + push @opts,"enable-$_",\$enable_nodes{$_}; +} + +push @opts,"with-$_=s",\$extern_prefix{$_} for keys %extern_prefix; +push @opts,"with-$_=s",\$jar{$_} for keys %jar; + +my @keeparg = @ARGV; + +GetOptions @opts or die "Errors parsing command line\n"; + +$extern_prefix{'mysql-devel'}=$extern_prefix{mysql} if $extern_prefix{'mysql-devel'} eq ''; + +if ($help) { usage(); exit 0; } + +if ($listmodules) { + my @m = map "org.glite.$listmodules.$_",@{$lbmodules{$listmodules}}; + print "@m\n"; + exit 0; +} + +warn "$0: --version and --output make sense only in --mode=etics\n" + if ($version || $output) && $mode ne 'etics'; + +my $en; +for (keys %enable_nodes) { $en = 1 if $enable_nodes{$_}; } + +my $dis; +for (keys %disable_nodes) { $dis = 1 if $disable_nodes{$_}; } + +die "--enable-* and --disable-* are mutually exclusive\n" + if $en && $dis; + +die "--module cannot be used with --enable-* or --disable-*\n" + if $module && ($en || $dis); + +die "$module: unknown module\n" if $module && ! grep $module,@{$lbmodules{lb}},@{$lbmodules{security}},{$lbmodules{jp}}; + +if ($dis) { + for (@nodes) { + $enable_nodes{$_} = 1 unless $disable_nodes{$_}; + } +} + +if (!$en && !$dis) { $enable_nodes{$_} = 1 for (@nodes) } ; + +for (keys %enable_nodes) { delete $enable_nodes{$_} unless $enable_nodes{$_}; } + +$stagedir = $prefix unless $stagedir; + +if ($mode eq 'build') { + print "Writing config.status\n"; + open CONF,">config.status" or die "config.status: $!\n"; + print CONF "$0 @keeparg\n"; + close CONF; +} + + +my @modules; +my %aux; + +if ($module) { +# push @modules,split(/[,.]+/,$module); + push @modules,$module; +} +else { + @modules = map(($extranodmod{$_} ? $extranodmod{$_} : 'lb.'.$_),(keys %enable_nodes)); + + my $n; + + do { + local $"="\n"; + $n = $#modules; + push @modules,(map @{$deps{$_}},@modules); + + undef %aux; @aux{@modules} = (1) x ($#modules+1); + @modules = keys %aux; + } while ($#modules > $n); +} + +@aux{@modules} = (1) x ($#modules+1); +delete $aux{$_} for (split /,/,$staged); +@modules = keys %aux; + +mode_build() if $mode eq 'build'; +mode_checkout() if $mode eq 'checkout'; +mode_etics($module) if $mode eq 'etics'; + +sub mode_build { + print "\nBuilding modules: @modules\n"; + + my @ext = map @{$need_externs{$_}},@modules; + my @myjars = map @{$need_jars{$_}},@modules; + undef %aux; @aux{@ext} = 1; + @ext = keys %aux; + undef %aux; @aux{@myjars} = (1) x ($#myjars+1); + @myjars = keys %aux; + + print "\nRequired externals:\n"; + print "\t$_: $extern_prefix{$_}\n" for @ext; + print "\t$_: $jar{$_}\n" for @myjars; + print "\nThis is a poor-man configure, it's up to you to have sources and externals there\n\n"; + + mkinc($_) for @modules; + + print "Creating Makefile\n"; + + open MAK,">Makefile" or die "Makefile: $!\n"; + + print MAK "all: @modules\n\nclean:\n"; + + for (@modules) { + my $full = full($_); + my $build = $topbuild{$_} ? '': '/build'; + print MAK "\tcd $full$build && \${MAKE} clean\n" + } + + print MAK "\ndistclean:\n"; + + for (@modules) { + my $full = full($_); + print MAK $topbuild{$_} ? + "\tcd $full$build && \${MAKE} distclean\n" : + "\trm -rf $full$build\n" + } + + print MAK "\n"; + + for (@modules) { + my %ldeps; undef %ldeps; + @ldeps{@{$deps{$_}}} = 1; + for my $x (split /,/,$staged) { delete $ldeps{$x}; } + my @dnames = $module ? () : keys %ldeps; + + my $full = full($_); + my $build = $topbuild{$_} ? '': '/build'; + + print MAK "$_: @dnames\n\tcd $full$build && \${MAKE} && \${MAKE} install\n\n"; + } + + close MAK; +} + +sub mode_checkout() { + for (@modules) { + my $module = $_; + my $tag = ""; + if ($lb_tag){ + for (@{$lbmodules{lb}}){ + if ("lb.".$_ eq $module){ + $tag = '-r '.$lb_tag; + } + } + } + if ($lbjp_tag){ + for (@{$lbmodules{'lbjp-common'}}){ + if ("lbjp-common.".$_ eq $module){ + $tag = '-r '.$lbjp_tag; + } + } + } + if ($jp_tag){ + for (@{$lbmodules{'jp'}}){ + if ("jp.".$_ eq $module){ + $tag = '-r '.$jp_tag; + } + } + } + if ($sec_tag){ + for (@{$lbmodules{security}}){ + if ("security.".$_ eq $module){ + $tag = '-r '.$sec_tag; + } + } + } + if ($jobid_tag){ + for (@{$lbmodules{jobid}}){ + if ("jobid.".$_ eq $module){ + $tag = '-r '.$jobid_tag; + } + } + } + #if (grep {"lb.".$_ eq $module} @{$lbmodules{lb}}){ + # print "found"; + #} + $_ = full($_); + print "\n*** Checking out $_\n"; + system("cvs checkout $tag $_") == 0 or die "cvs checkout $tag $_: $?\n"; + } +} + +BEGIN{ +%need_externs_aux = ( + 'lb.client' => [ qw/cppunit:B classads/ ], + 'lb.client-java' => [ qw/ant:B jdk:B axis:B trustmanager/ ], + 'lb.common' => [ qw/expat cppunit:B classads/ ], + 'lb.doc' => [], + 'lb.logger' => [ qw/cppunit:B log4c/ ], + 'lb.server' => [ qw/globus_essentials:R globus:B expat cares mysql cppunit:B gsoap:B classads voms lcas gridsite log4c/ ], + 'lb.state-machine' => [ qw/classads/ ], + 'lb.utils' => [ qw/cppunit:B/ ], + 'lb.ws-interface' => [], + 'lb.ws-test' => [ qw/gsoap:B/ ], + 'lb.types' => [ qw// ], + 'lb.harvester' => [ qw/postgresql:R/ ], + 'lbjp-common.db' => [ qw/mysql:B mysql-devel:B postgresql:B/ ], + 'lbjp-common.log' => [ qw// ], + 'lbjp-common.maildir' => [ qw// ], + 'lbjp-common.server-bones' => [ qw// ], + 'lbjp-common.trio' => [ qw/cppunit:B/ ], + 'lbjp-common.jp-interface' => [ qw/cppunit:B/ ], + 'security.gss' => [ qw/globus_essentials:R globus:B cares cppunit:B/ ], + 'security.gsoap-plugin' => [ qw/cppunit:B globus_essentials:R globus:B cares gsoap:B/ ], + 'jobid.api-c' => [ qw/cppunit:B/ ], + 'jobid.api-cpp' => [ qw/cppunit:B/ ], + 'jobid.api-java' => [ qw/ant:B jdk:B/ ], + 'jp.client' => [ qw/gsoap libtar globus_essentials:R globus:B/ ], + 'jp.doc' => [], + 'jp.index' => [ qw/gsoap globus_essentials:R globus:B/ ], + 'jp.primary' => [ qw/classads gsoap libtar globus_essentials:R globus:B/ ], + 'jp.server-common' => [], + 'jp.ws-interface' => [], +); + +for my $ext (keys %need_externs_aux) { + for (@{$need_externs_aux{$ext}}) { + /([^:]*)(?::(.*))?/; + push @{$need_externs{$ext}},$1; + my $type = $2 ? $2 : 'BR'; + $need_externs_type{$ext}->{$1} = $type; + } +} + +%need_jars = ( + 'jobid.api-java' => [ qw/commons-codec/ ], + 'lb.client-java' => [ qw/commons-lang/ ], +); + +for my $jar (keys %need_jars) { + for (@{$need_jars{$jar}}) { + $need_externs_type{$jar}->{$_} = 'BR'; # XXX + } +} + +%deps_aux = ( + 'lb.client' => [ qw/ + lb.types:B lb.common + lbjp-common.trio + jobid.api-cpp jobid.api-c + security.gss + / ], + 'lb.client-java' => [ qw/ + lb.types:B + lb.ws-interface:B + jobid.api-java + / ], + 'lb.common' => [ qw/ + jobid.api-cpp jobid.api-c + lb.types:B lbjp-common.trio security.gss + / ], + 'lb.doc' => [ qw/lb.types:B/ ], + 'lb.logger' => [ qw/ + lbjp-common.trio + lbjp-common.log + jobid.api-c + lb.common + security.gss + / ], + 'lb.server' => [ qw/ + lb.ws-interface lb.types:B lb.common lb.state-machine + lbjp-common.db lbjp-common.server-bones lbjp-common.trio lbjp-common.maildir lbjp-common.log + jobid.api-c + security.gsoap-plugin security.gss + / ], + 'lb.state-machine' => [ qw/lb.common lbjp-common.jp-interface security.gss/ ], + 'lb.utils' => [ qw/ + lbjp-common.jp-interface + jobid.api-c + lbjp-common.trio lbjp-common.maildir + lb.client lb.state-machine + / ], + 'lb.ws-test' => [ qw/security.gsoap-plugin lb.ws-interface/ ], + 'lb.ws-interface' => [ qw/lb.types:B/ ], + 'lb.types' => [ qw// ], + 'lb.harvester' => [ qw/jobid.api-c lbjp-common.trio lbjp-common.db lb.common lb.client/ ], + 'lbjp-common.db' => [ qw/lbjp-common.trio/ ], + 'lbjp-common.maildir' => [ qw// ], + 'lbjp-common.server-bones' => [ qw// ], + 'lbjp-common.trio' => [ qw// ], + 'security.gss' => [ qw// ], + 'security.gsoap-plugin' => [ qw/security.gss/ ], + 'jobid.api-c' => [ qw// ], + 'jobid.api-cpp' => [ qw/jobid.api-c/ ], + 'jobid.api-java' => [ qw// ], + + 'lbjp-common.jp-interface' => [ qw/lbjp-common.db jobid.api-c/ ], + + 'jp.client' => [ qw/ + jp.ws-interface + lbjp-common.jp-interface lbjp-common.maildir + jobid.api-c + security.gsoap-plugin + / ], + 'jp.doc' => [ qw// ], + 'jp.index' => [ qw/ + jp.server-common jp.ws-interface + lbjp-common.jp-interface lbjp-common.trio lbjp-common.db lbjp-common.server-bones + security.gsoap-plugin + / ], + 'jp.primary' => [ qw/ + jobid.api-c + jp.server-common jp.ws-interface + lb.state-machine + lbjp-common.jp-interface lbjp-common.trio lbjp-common.db lbjp-common.server-bones + security.gsoap-plugin + / ], + 'jp.server-common' => [ qw/ + lbjp-common.jp-interface lbjp-common.db + / ], + 'jp.ws-interface' => [ qw// ], +); + +for my $ext (keys %deps_aux) { + for (@{$deps_aux{$ext}}) { + /([^:]*)(?::(.*))?/; + push @{$deps{$ext}},$1; + my $type = $2 ? $2 : 'BR'; + $deps_type{$ext}->{$1} = $type; + } +} + + +%extrafull = ( gridsite=>'org.gridsite.core'); + +#( java => 'client-java' ); +%extranodmod = ( + db => 'lbjp-common.db', + jpprimary => 'jp.primary', + jpindex => 'jp.index', + jpclient => 'jp.client', +); + +my @t = qw/lb.client-java jobid.api-java lb.types lbjp-common.log/; +@topbuild{@t} = (1) x ($#t+1); +} + +sub full +{ + my $short = shift; + return $extrafull{$short} ? $extrafull{$short} : 'org.glite.'.$short; +} + +sub mkinc +{ + my %aux; + undef %aux; + my @m=qw/ +lb.client lb.doc lb.state-machine lb.ws-interface lb.logger lb.types lb.common lb.server lb.utils lb.ws-test lb.client-java lb.harvester +security.gss security.gsoap-plugin +jobid.api-c jobid.api-cpp jobid.api-java +lbjp-common.db lbjp-common.log lbjp-common.maildir lbjp-common.server-bones lbjp-common.trio lbjp-common.jp-interface +jp.client jp.doc jp.index jp.primary jp.server-common jp.ws-interface +/; + @aux{@m} = (1) x ($#m+1); + + my $short = shift; + my $full = full $short; + + unless ($aux{$short}) { + print "Makefile.inc not needed in $full\n"; + return; + } + + my $build = ''; + + unless ($topbuild{$_}) { + $build = '/build'; + unless (-d "$full/build") { + mkdir "$full/build" or die "mkdir $full/build: $!\n"; + } + unlink "$full/build/Makefile"; + symlink "../Makefile","$full/build/Makefile" or die "symlink ../Makefile $full/build/Makefile: $!\n"; + } + + open MKINC,">$full$build/Makefile.inc" + or die "$full$build/Makefile.inc: $!\n"; + + print "Creating $full$build/Makefile.inc\n"; + + print MKINC qq{ +PREFIX = $prefix +stagedir = $stagedir +thrflavour = $thrflavour +nothrflavour = $nothrflavour +}; + + for (@{$need_externs{$short}}) { + print MKINC "${_}_prefix = $extern_prefix{$_}\n" + } + + for (@{$need_jars{$short}}) { + print MKINC "${_}_jar = $jar{$_}\n" + } + + my $need_gsoap = 0; + for (@{$need_externs{$short}}) { $need_gsoap = 1 if $_ eq 'gsoap'; } + + print MKINC "gsoap_default_version=".gsoap_version()."\n" if $need_gsoap; + + close MKINC; +} + +my %etics_externs; +my %etics_projects; +BEGIN{ + %etics_externs = ( + globus_essentials=>'vdt_globus_essentials', + globus=>'globus', + cares=>'c-ares', + voms=>'org.glite.security.voms-api-cpp', + gridsite=>'org.gridsite.shared', + lcas=>'org.glite.security.lcas', + trustmanager=>'org.glite.security.trustmanager', + ); + %etics_projects = ( + vdt=>[qw/globus globus_essentials/], + 'org.glite'=>[qw/voms gridsite lcas/], + ); +}; + +sub mode_etics { + $fmod = shift; + + die "$0: --module required with --etics\n" unless $fmod; + + my ($subsys,$module) = split /\./,$fmod; + + my ($major,$minor,$rev,$age); + + if ($version) { + $version =~ /([[:digit:]]+)\.([[:digit:]]+)\.([[:digit:]]+)-(.+)/; + ($major,$minor,$rev,$age) = ($1,$2,$3,$4); + } + else { + open V,"org.glite.$subsys.$module/project/version.properties" + or die "org.glite.$subsys.$module/project/version.properties: $!\n"; + + while ($_ = ) { + chomp; + ($major,$minor,$rev) = ($1,$2,$3) if /module\.version\s*=\s*([[:digit:]]+)\.([[:digit:]]+)\.([[:digit:]]+)/; + $age = $1 if /module\.age\s*=\s*([[:digit:]]+)/; + } + close V; + } + + my @copts = (); + my %ge; + @ge{@{$etics_projects{'org.glite'}}} = (1) x ($#{$etics_projects{'org.glite'}}+1); + + for (@{$need_externs{"$subsys.$module"}}) { + if ($need_externs_type{"$subsys.$module"}->{$_}=~/B/) { + my $eext = $etics_externs{$_} ? $etics_externs{$_} : $_; + push @copts,$ge{$_} ? "--with-$_=\${stageDir}" : "--with-$_=\${$eext.location}"; + } + } + + for (@{$need_jars{"$subsys.$module"}}) { + my $eext = $etics_externs{$_} ? $etics_externs{$_} : $_; + + push @copts,"--with-$_ \${$eext.location}/$_*.jar"; + } + + + my $conf = "glite-$subsys-${module}_R_${major}_${minor}_${rev}_${age}"; + my $file = $output ? $output : "$conf.ini"; + open C,">$file" or die "$file: $!\n"; + + my $buildroot = $topbuild{"$subsys.$module"} ? '' : "build.root = build\n"; + + my $confdir = $topbuild{"$subsys.$module"} ? '..' : '../..'; + + print STDERR "Writing $file\n"; + print C qq{ +[Configuration-$conf] +profile = None +moduleName = org.glite.$subsys.$module +displayName = $conf +description = org.glite.$subsys.$module +projectName = org.glite +age = $age +deploymentType = None +tag = $conf +version = $major.$minor.$rev +path = \${projectName}/\${moduleName}/\${version}/\${platformName}/\${packageName}-\${version}-\${age}.tar.gz + +[Platform-default:VcsCommand] +displayName = None +description = None +tag = cvs -d \${vcsroot} tag -R \${tag} \${moduleName} +branch = None +commit = None +checkout = cvs -d \${vcsroot} co -r \${tag} \${moduleName} + +[Platform-default:BuildCommand] +postpublish = None +packaging = None +displayName = None +description = None +doc = None +prepublish = None +publish = None +compile = make +init = None +install = make install +clean = make clean +test = make check +configure = cd $confdir && \${moduleName}/configure --thrflavour=\${globus.thr.flavor} --nothrflavour=\${globus.nothr.flavor} --prefix=\${prefix} --stage=\${stageDir} --module $subsys.$module @copts +checkstyle = None + +[Platform-default:Property] +$buildroot + +[Platform-default:DynamicDependency] + +}; + for (@{$need_externs{"$subsys.$module"}},@{$need_jars{"$subsys.$module"}}) { + my $eext = $etics_externs{$_} ? $etics_externs{$_} : $_; + + my $proj = 'externals'; + for my $p (keys %etics_projects) { + for $m (@{$etics_projects{$p}}) { + $proj = $p if $m eq $_; + } + } + + my $type = $need_externs_type{"$subsys.$module"}->{$_}; + print C "$proj|$eext = $type\n"; + } + + for (@{$deps{"$subsys.$module"}}) { + my $type = $deps_type{"$subsys.$module"}->{$_}; + print C "org.glite|org.glite.$_ = $type\n"; + } + + close C; +} + +sub gsoap_version { + local $_; + my $gsoap_version; + open S,"$extern_prefix{gsoap}/bin/soapcpp2 -v 2>&1 |" or die "$extern_prefix{gsoap}/bin/soapcpp2: $!\n"; + + while ($_ = ) { + chomp; + + $gsoap_version = $1 if /The gSOAP Stub and Skeleton Compiler for C and C\+\+ ([.[:digit:][:alpha:]]+)$/; + } + close S; + return $gsoap_version; +} + + +sub usage { + my @ext = keys %extern_prefix; + my @myjars, keys %jar; + + print STDERR qq{ +usage: $0 options + +General options (defaults in []): + --prefix=PREFIX destination directory [./stage] + --staged=module,module,... what is already in PREFIX (specify without org.glite.) + --thrflavour=flavour + --nothrflavour=flavour threaded and non-treaded flavours [gcc64dbgpthr,gcc64dbg] + --listmodules=subsys list modules of a subsystem + +Mode of operation: + --mode={checkout|build|etics} what to do [build] + +What to build: + --module=module build this module only (mostly in-Etics operation) + --enable-NODE build this "node" (set of modules) only. Available nodes are + @{$lbmodules{lb}},@{$lbmodules{security}} + --disable-NODE don't build this node + --lb-tag=tag checkout LB modules with specific tag + --jp-tag=tag checkout JP modules with specific tag + --lbjp-common-tag=tag checkout lbjp-common modules with specific tag + --security-tag=tag checkout security modules with specific tag + --jobid-tag=tag checkout jobid modules with specific tag + +Dependencies: + --with-EXTERNAL=PATH where to look for an external. Required externals + (not all for all modules) are: + @ext + --with-JAR=JAR where to look for jars. Required jars are: + @myjars + Summary of what will be used is always printed + +}; + +} diff --git a/org.glite.lb.harvester/doc/INSTALL b/org.glite.lb.harvester/doc/INSTALL new file mode 100644 index 0000000..f5ff9c9 --- /dev/null +++ b/org.glite.lb.harvester/doc/INSTALL @@ -0,0 +1,42 @@ +Requirements +============ + +1) gLite +- client L&B libraries: + - glite-jobid-api-c + - glite-lb-common + - glite-lb-client + - glite-security-gss + - globus essential libraries (threaded flavour), + use the vesion with the external SSL, not with bundled SSL (!) + - glite-lbjp-common-db (build only) + - mysql-devel (build only) +2) postgresql-devel + + +Steps +===== + +./configure +make +make install + +Use './configure --help' for the options. + + +Manual way +========== + +configure is simple script generating Makefile.inc. You can build harvester +straight away by make defining the variables manually. For example with gLite +installed in ~/glite/stage: + +(rm Makefile.inc) +make stagedir=$HOME/glite/stage + + +Testing +======= + +Test for basic functionality covered by 'test.sh' script in sources. +See './test.sh --help'. diff --git a/org.glite.lb.harvester/doc/README b/org.glite.lb.harvester/doc/README new file mode 100644 index 0000000..f1c393d --- /dev/null +++ b/org.glite.lb.harvester/doc/README @@ -0,0 +1,81 @@ +Introduction +============ + +L&B Harvester gathers information about jobs from L&B servers using effective +L&B notification mechanism. It manages notifications and keeps them in +a persistent storage (file or database table) to reuse later on next launch. +It takes care about refreshing notifications and queries L&B servers back when +some notification on expires. + +The tool was initially written for Real Time Monitor (project at Imperial +College in London), later was extended with messaging mechanism for WLCG. + + +Requirements +============ + +- lastUpdateTime index on L&B servers +- harvester identity in super users file on L&B servers + + +Launch (with msg-publish sending messages) +========================================= + +Harvester is sending notifications via msg-publish infrastructure. List of the +L&B server to harvest is specified via -c option. + +1) with newer LB 2.0 servers: + + glite-lb-harvester -c servers.txt -C certfile -K keyfile --wlcg + +2) with older LB servers (backward compatible but greedy notifications): + + glite-lb-harvester -c servers.txt -C certfile -K keyfile --wlcg --old + +Custom configuration of messaging: + --wlcg-binary $HOME/bin/msg-publish + --wlcg-topic org.wlcg.usage.JobStatus2 + --wlcg-config $HOME/etc/msg-publish.conf.wlcg + + +Launch (Real Time Monitor and storing to the database) +====================================================== + +Harvester is using postgres database. Table 'lb20' with L&B servers to +harvest (read-only), table 'jobs' with result job states (read/write). It's +possible to specify L&B servers list by file instead of 'lb20' table, +via -c option. + + glite-lb-harvester -C certfile -K keyfile --pg rtm/@:rtm + +The connection string after '--pg' is in format: + USER/PASSWORD@HOST:DATABASE +Database schema in 'test.sql'. + + +Other recommended options +========================= + +Use 'glite-lb-harvester --help' for additional options. + +For example: + - deamonizing and using syslog: + '--daemonize --pidfile /var/run/glite-lb-harvester.pid' + - decreasing verbosity: + '-d 2' (2 for errors and warnings only) + + +Stop +==== + +In non-daemon mode CTRL-C can be used, in daemon mode using specified +pidfile: + + kill `cat /var/run/glite-lb-harvester.pid` + +pidfile will vanish after exit. + +All notifications are preserved on LB servers, and will expire later. You can +purge them now, if they won't be needed: + + glite-lb-harvester --cleanup diff --git a/org.glite.lb.harvester/examples/test.sh b/org.glite.lb.harvester/examples/test.sh new file mode 100755 index 0000000..7105ee1 --- /dev/null +++ b/org.glite.lb.harvester/examples/test.sh @@ -0,0 +1,836 @@ +#! /bin/sh + + +usage() { +cat <&1| \ + grep timeleft| sed 's/^.* //'` + if [ "$timeleft" = "0:00:00" -o -z "$timeleft" ]; then + echo "Proxy certificate check failed."\ + " Aborting." + exit 1 + fi + else + echo "Can't check proxy cert (grid-proxy-info not found). If you do not have valid proxy certificate, set GLITE_HOST_KEY/GLITE_HOST_KEY - otherwise tests will fail!" + fi +# fi + identity=`X509_USER_KEY=${X509_USER_KEY} X509_USER_CERT=${X509_USER_CERT} $GLOBUS_LOCATION/bin/grid-proxy-info 2>&1| \ + grep identity| sed 's/^[^/]*//'` + + if [ -z "$GLITE_LB_TEST_DB" ]; then + GLITE_LB_TEST_DB="lbserver/@localhost:lbserver20test" + need_new_lb_db=1; + fi + DB_USER=`echo $GLITE_LB_TEST_DB| sed 's!/.*$!!'` + DB_HOST=`echo $GLITE_LB_TEST_DB| sed 's!^.*@!!' | sed 's!:.*!!'` + DB_NAME=`echo $GLITE_LB_TEST_DB| sed 's!^.*:!!'` + MYSQL_ARGS="-u ${GLITE_MYSQL_ROOT_USER:-root}" + [ -z "$GLITE_MYSQL_ROOT_PASSWORD" ] || ARGS="--password=${GLITE_MYSQL_ROOT_PASSWORD} $MYSQL_ARGS" + + if [ -z "$GLITE_RTM_TEST_DB" ]; then + GLITE_RTM_TEST_DB="rtm/@localhost:rtmtest" + need_new_rtm_db=1; + fi + RTM_USER=`echo $GLITE_RTM_TEST_DB| sed 's!/.*$!!'` + RTM_HOST=`echo $GLITE_RTM_TEST_DB| sed 's!^.*@!!' | sed 's!:.*!!'` + RTM_NAME=`echo $GLITE_RTM_TEST_DB| sed 's!^.*:!!'` + PG_ARGS="-U ${GLITE_PG_ROOT_USER:-postgres}" + + #other stuff + GLITE_LB_TEST_SERVER_PORT=${GLITE_LB_TEST_SERVER_PORT:-"10000"} + GLITE_LB_TEST_PIDFILE=${GLITE_LB_TEST_PIDFILE:-"/tmp/glite-lb-test.pid"} + GLITE_RTM_TEST_PIDFILE=${GLITE_RTM_TEST_PIDFILE:-"/tmp/glite-rtm-test.pid"} + GLITE_RTM_TEST_TTL=${GLITE_RTM_TEST_TTL:-"60"} + + jobreg="$GLITE_LOCATION/examples/glite-lb-job_reg -m `hostname -f`:${GLITE_LB_TEST_SERVER_PORT} -s UserInterface" + logev="$GLITE_LOCATION/bin/glite-lb-logevent -x -S `pwd`/LB/proxy.sockstore.sock -U localhost" + for dir in "$GLITE_LOCATION/bin" "`pwd`/../build" "`pwd`"; do + if [ -x "$dir/glite-lb-harvester-dbg" ]; then + rtm="$dir/glite-lb-harvester-dbg" + fi + if [ -x "$dir/harvester-dbg" ]; then + rtm="$dir/harvester-dbg" + fi + done + if [ -z "$rtm" ]; then + echo "glite-lb-harvester-dbg not found" + return 1 + fi + + if echo "$GLITE_RTM_TEST_ADDITIONAL_ARGS" | grep -- '[^-]\?\(--old\>\|-o\>\)' >/dev/null; then + n_notifs=1 + else + n_notifs=2 + fi + + rm -f log +} + + +drop_db() { +return 0 + [ -z "$lb_db_created" ] || mysqladmin -f $MYSQL_ARGS drop "$DB_NAME" + [ -z "$rtm_db_created" ] || dropdb $PG_ARGS "$RTM_NAME" +} + + +create_db() { + echo -n "mysql." + # create database when needed + if [ "x$need_new_lb_db" = "x1" ]; then + mysqladmin -f $MYSQL_ARGS drop $DB_NAME > /dev/null 2>&1 + echo -n "." + mysqladmin -f $MYSQL_ARGS create $DB_NAME && \ + echo -n "." + mysql $MYSQL_ARGS -e "GRANT ALL on $DB_NAME.* to $DB_USER@$DB_HOST" && \ + echo -n "." + mysql -u $DB_USER $DB_NAME -h $DB_HOST < $GLITE_LOCATION/etc/glite-lb-dbsetup.sql || return $? + echo -n "." + mkdir -p `pwd`/LB + cat > `pwd`/LB/glite-lb-index.conf << EOF +[ + JobIndices = { + [ type = "system"; name = "lastUpdateTime" ] + } +] +EOF + LBDB="$GLITE_LB_TEST_DB" $GLITE_LOCATION/bin/glite-lb-bkindex -r `pwd`/LB/glite-lb-index.conf || return $? + lb_db_created="1" + echo -n "." + else + cleanup_mysql || return $? + fi + echo -n "OK psql." + if [ "x$need_new_rtm_db" = "x1" ]; then + dropdb $PG_ARGS "$RTM_NAME" >/dev/null 2>&1 + echo -n "." +# createuser $PG_ARGS -A -D "$RTM_NAME" >/dev/null 2>&1 +# echo -n "." + createdb $PG_ARGS --encoding "UTF-8" --owner "$RTM_USER" "$RTM_NAME" >psql-create.log 2>&1 || return $? + rm psql-create.log + echo -n "." + rtm_db_created="1" + echo "\i test.sql" | psql -AtF ',' -U "$RTM_USER" "$RTM_NAME" >/dev/null || return $? + echo -n "." + else + cleanup_pg || return $? + fi + echo "OK" +} + + +cleanup_mysql() { + cat << EOF | mysql -u $DB_USER $DB_NAME -h $DB_HOST || return $? +DELETE FROM acls; +DELETE FROM events; +DELETE FROM events_flesh; +DELETE FROM jobs; +DELETE FROM long_fields; +DELETE FROM notif_jobs; +DELETE FROM notif_registrations; +DELETE FROM server_state; +DELETE FROM short_fields; +DELETE FROM states; +DELETE FROM status_tags; +DELETE FROM users; +DELETE FROM zombie_jobs; +DELETE FROM zombie_prefixes; +DELETE FROM zombie_suffixes; +EOF + echo -n "." +} + + +cleanup_pg() { + cat << EOF | psql -AtF ',' -U "$RTM_USER" "$RTM_NAME" >/dev/null || return $? +DELETE FROM jobs; +DELETE FROM notifs; +EOF + echo -n "." +} + + +run_daemons() { + mkdir -p LB/dump LB/purge LB/voms 2>/dev/null + + # checks + if [ -f "${GLITE_LB_TEST_PIDFILE}" ]; then + echo "L&B server already running (${GLITE_LB_TEST_PIDFILE}, `cat ${GLITE_LB_TEST_PIDFILE}`)" + quit=1 + fi + if [ -f "${GLITE_RTM_TEST_PIDFILE}" ]; then + echo "L&B harvester already running (${GLITE_RTM_TEST_PIDFILE}, `cat ${GLITE_RTM_TEST_PIDFILE}`)" + quit=1 + fi + if [ -e "`pwd`/LB/notif.sock" ]; then + if [ "`lsof -t $(pwd)/LB/notif.sock | wc -l`" != "0" ]; then + echo "Notification interlogger already running (using LB/notif.sock, `lsof -t $(pwd)/LB/notif.sock`)" + quit=1 + fi + fi + if [ -e "`pwd`/LB/proxy-il.sock" ]; then + if [ "`lsof -t $(pwd)/LB/proxy-il.sock | wc -l`" != "0" ]; then + echo "Proxy interlogger already running (using LB/proxy-il.sock, `lsof -t $(pwd)/LB/proxy-il.sock`)" + quit=1 + fi + fi + [ -z "$quit" ] || exit 1 + + # run L&B server + echo -n "L" + X509_USER_KEY=${X509_USER_KEY} X509_USER_CERT=${X509_USER_CERT} \ + $GLITE_LOCATION/bin/glite-lb-bkserverd \ + -m $GLITE_LB_TEST_DB \ + -p $GLITE_LB_TEST_SERVER_PORT -w $(($GLITE_LB_TEST_SERVER_PORT + 3))\ + -i ${GLITE_LB_TEST_PIDFILE} \ + --withproxy -o `pwd`/LB/proxy.sock\ + --proxy-il-sock `pwd`/LB/proxy-il.sock --proxy-il-fprefix `pwd`/LB/proxy-data \ + -D `pwd`/LB/dump -S `pwd`/LB/purge \ + -V `pwd`/LB/voms \ + --notif-il-sock `pwd`/LB/notif.sock --notif-il-fprefix `pwd`/LB/notif-data \ + --super-user "$identity" > `pwd`/LB/glite-lb-test-pre.log 2>&1 + if [ x"$?" != x"0" ]; then + cat `pwd`/LB/glite-lb-test-pre.log + echo FAILED + drop_db; + exit 1 + fi + echo -n "B " + + # run L&B interlogger + echo -n "L" + X509_USER_KEY=${X509_USER_KEY} X509_USER_CERT=${X509_USER_CERT} \ + $GLITE_LOCATION/bin/glite-lb-interlogd \ + --file-prefix `pwd`/LB/proxy-data --socket `pwd`/LB/proxy-il.sock > `pwd`/LB/glite-interlog-test-pre.log 2>&1 + if [ x"$?" != x"0" ]; then + cat `pwd`/LB/glite-interlog-test-pre.log + echo FAILED + kill_bkserver + drop_db; + exit 1 + fi + echo -n "I " + + # run L&B notification interlogger + echo -n "N" + X509_USER_KEY=${X509_USER_KEY} X509_USER_CERT=${X509_USER_CERT} \ + $GLITE_LOCATION/bin/glite-lb-notif-interlogd \ + --file-prefix `pwd`/LB/notif-data --socket `pwd`/LB/notif.sock > `pwd`/LB/glite-notif-test-pre.log 2>&1 + if [ x"$?" != x"0" ]; then + cat `pwd`/LB/glite-notif-test-pre.log + echo FAILED + kill_daemons + drop_db; + exit 1 + fi + echo -n "I " + + if ! start_harvester; then + kill_daemons; + drop_db; + exit 1 + fi + + # wait for pidfiles + i=0 + while [ ! -s "${GLITE_LB_TEST_PIDFILE}" -a $i -lt 20 ]; do + sleep 0.1 + i=$(($i+1)) + done + if [ ! -s "${GLITE_LB_TEST_PIDFILE}" ]; then + echo "Can't startup L&B server." + kill_daemons; + drop_db; + exit 1 + fi + + echo -n "notifs." + pg_wait 20 "SELECT refresh FROM notifs WHERE notifid IS NOT NULL" $n_notifs || return $? + refresh=`echo "$result" | head -n 1` + if [ -z "$refresh" ]; then + echo "FAIL" + return 1 + fi +} + + +start_harvester() { + # run L&B harvester server + echo -n "R" + rm -Rf RTM + mkdir RTM 2>/dev/null + echo "`hostname -f`:${GLITE_LB_TEST_SERVER_PORT}" > `pwd`/RTM/config.txt + X509_USER_KEY=${X509_USER_KEY} X509_USER_CERT=${X509_USER_CERT} \ + ${rtm} \ + -m $GLITE_RTM_TEST_DB \ + --pidfile ${GLITE_RTM_TEST_PIDFILE} \ + --ttl ${GLITE_RTM_TEST_TTL} \ + --history $((GLITE_RTM_TEST_TTL / 2)) \ + --debug 12 \ + --config `pwd`/RTM/config.txt \ + --daemonize ${GLITE_RTM_TEST_ADDITIONAL_ARGS} 2>`pwd`/RTM/glite-rtm-test-pre.log >`pwd`/RTM/notifs.log + if [ x"$?" != x"0" ]; then + cat `pwd`/RTM/glite-rtm-test-pre.log + echo FAILED + return 1 + fi + + i=0 + while [ ! -s "${GLITE_RTM_TEST_PIDFILE}" -a $i -lt 20 ]; do + sleep 0.1 + i=$(($i+1)) + done + if [ ! -s "${GLITE_RTM_TEST_PIDFILE}" ]; then + echo "Can't startup L&B harvester." + kill_daemons; + drop_db; + exit 1 + fi + + echo -n "M " +} + + +cleanup_harvester() { + echo -n "cleaning up..." + X509_USER_KEY=${X509_USER_KEY} X509_USER_CERT=${X509_USER_CERT} \ + ${rtm} \ + -m $GLITE_RTM_TEST_DB \ + --cleanup \ + --debug 12 ${GLITE_RTM_TEST_ADDITIONAL_ARGS} >`pwd`/RTM/glite-rtm-test-cleanup.log 2>&1 + if [ x"$?" != x"0" ]; then + cat `pwd`/RTM/glite-rtm-test-cleanup.log + echo FAILED + return 1 + fi + echo -n "OK " +} + + +kill_daemons() { + pid1=`cat ${GLITE_LB_TEST_PIDFILE} 2>/dev/null` + [ -f "${GLITE_RTM_TEST_PIDFILE}" ] && pid2=`cat ${GLITE_RTM_TEST_PIDFILE}` + pid3=`lsof -t $(pwd)/LB/notif.sock 2>/dev/null` + pid4=`lsof -t $(pwd)/LB/proxy-il.sock 2>/dev/null` + [ ! -z "$pid1" ] && kill $pid1 + [ ! -z "$pid2" ] && kill -2 $pid2 + [ ! -z "$pid3" ] && kill $pid3 + [ ! -z "$pid4" ] && kill $pid4 + sleep 1; + [ ! -z "$pid1" ] && kill -9 $pid1 2>/dev/null + [ ! -z "$pid2" ] && kill -9 $pid2 2>/dev/null + [ ! -z "$pid3" ] && kill -9 $pid3 2>/dev/null + [ ! -z "$pid4" ] && kill -9 $pid4 2>/dev/null + rm -f "${GLITE_LB_TEST_PIDFILE}" "${GLITE_RTM_TEST_PIDFILE}" + rm -f `pwd`/LB/*.sock +} + + +kill_bkserver() { + pid=`cat ${GLITE_LB_TEST_PIDFILE} 2>/dev/null` + if [ ! -z "$pid1" ]; then + kill $pid; + sleep 1; + kill -9 $pid + fi + rm -f "${GLITE_LB_TEST_PIDFILE}" +} + + +kill_harvester() { + pid=`cat ${GLITE_RTM_TEST_PIDFILE} 2>/dev/null` + if [ ! -z "$pid1" ]; then + kill $pid + sleep 1; + kill -9 $pid 2>/dev/null + fi + rm -f "${GLITE_RTM_TEST_PIDFILE}" +} + + +reg() { + echo -n "R" + echo $jobreg $@ >> log + $jobreg $@ > jobreg.tmp + if [ $? -ne 0 ]; then + cat jobreg.tmp + rm -f jobreg.tmp + echo " FAIL!" + return 1; + fi + script=`cat jobreg.tmp | tail -n 2` + rm -f jobreg.tmp + EDG_JOBID= + EDG_WL_SEQUENCE= + eval $script + if [ -z "$EDG_JOBID" -o -z "$EDG_WL_SEQUENCE" ]; then + echo " FAIL!" + return 1; + fi + echo -n "G " +} + + +ev() { + echo -n "E" + echo $logev -j "$EDG_JOBID" -c "$EDG_WL_SEQUENCE" "$@" >> log + $logev -j "$EDG_JOBID" -c "$EDG_WL_SEQUENCE" "$@" 2> logev-err.tmp >logev.tmp + if [ $? -ne 0 ]; then + echo " FAIL!" + return 2; + fi + EDG_WL_SEQUENCE=`cat logev.tmp` + rm logev.tmp logev-err.tmp + echo -n "V " +} + + +pg_get() { + result= + lines= + echo "$1" | psql -AtF ',' -U "$RTM_USER" "$RTM_NAME" > psql.tmp + if [ $? != 0 ]; then + return $? + fi + result="`cat psql.tmp`" + lines=`wc -l psql.tmp | sed 's/^[ ]*//' | cut -f1 -d' '` +# rm psql.tmp + return 0 +} + + +pg_wait() { + timeout=$(($1*2)) + sql="$2" + n="$3" + + i=0 + found=0 + result= + echo -n "S" + echo "`date '+%Y-%m-%d %H:%M:%S'` $sql" >> log + while [ "$found" = "0" -a $i -lt $timeout ]; do + pg_get "$sql" || return $? + echo -n "." + if [ -z "$n" ]; then + if [ "$lines" != "0" ]; then found=1; fi + else + if [ "$lines" = "$n" ]; then found=1; fi + fi + if [ "$found" = "0" ]; then sleep 0.5; fi + i=$(($i+1)) + done + echo -n "Q " + result="$result" + echo "`date '+%Y-%m-%d %H:%M:%S'` $lines lines" >> log + if [ ! -z "$result" ]; then + echo "$result" | sed -e 's/\(.*\)/\t\1/' >> log + fi + return 0 +} + + +my_get() { + result= + lines= + echo "`date '+%Y-%m-%d %H:%M:%S'` $1" >> log + echo "$1" | mysql -B -u "$DB_USER" "$DB_NAME" > mysql.tmp + if [ $? != 0 ]; then + return $? + fi + result=`cat mysql.tmp | tail -n +2` + lines=`echo "$result" | grep -v '^$' | wc -l | sed 's/^[ ]*//'` + echo "`date '+%Y-%m-%d %H:%M:%S'` $lines lines" >> log + if [ ! -z "$result" ]; then + echo "$result" | sed -e 's/\(.*\)/\t\1/' >> log + fi +# rm -f mysql.tmp + return 0 +} + + +# notif propagation +test_basic() { + ok=0 + + # submited + echo -n "submitted..." + reg || return $? + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Submitted'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + + # waiting + echo -n "waiting..." + ev -s NetworkServer -e Accepted --from='UserInterface' --from_host=`hostname -f` --from_instance="pid$$" || return $? + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Waiting'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + + # running + echo -n "running..." + ev -s LogMonitor -e Running --node="worker node" || return $? + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Running'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + + ok=1 + echo "OK" +} + + +# proper notif registration cleanup +test_rebind() { + ok=0 + + # ---- active --- + echo -n "$n_notifs notifications " + my_get "SELECT notifid FROM notif_registrations" || return $? + # STATUS and JDL + if [ "$lines" != "$n_notifs" ]; then + echo "FAIL" + return 0 + fi + echo -n "OK " + + # ---- store & stop --- + echo -n "store&quit" + pid=`cat ${GLITE_RTM_TEST_PIDFILE}` + kill $pid + i=0 + while [ -s "${GLITE_RTM_TEST_PIDFILE}" -a $i -lt 200 ]; do + echo -n "." + sleep 0.5 + i=$(($i+1)) + done + if [ -s "${GLITE_RTM_TEST_PIDFILE}" ]; then + echo "FAIL" + return 0 + fi + echo -n "OK notifs " + my_get "SELECT notifid FROM notif_registrations" || return $? + if [ "$lines" != "$n_notifs" ]; then + echo "FAIL" + return 0 + fi + echo -n "OK " + + # ---- launch & rebind --- + if ! start_harvester; then + kill_daemons; + drop_db; + exit 1 + fi + + echo -n "bind" + pg_wait 20 "SELECT notifid FROM notifs WHERE notifid IS NOT NULL" $n_notifs || return $? + if [ x"$lines" != x"$n_notifs" ]; then + echo "FAIL" + return 0 + fi + + echo -n "Done " + ev -s LogMonitor -e Done --status_code=OK --reason="Finished, yeah!" --exit_code=0 || return $? + pg_wait 20 "SELECT jobid, state FROM jobs WHERE state='Done'" + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + + ok=1 + echo "OK" +} + + +test_cleanup() { + ok=0 + + # ---- deep stop --- + echo -n "deep quit" + pid=`cat ${GLITE_RTM_TEST_PIDFILE}` + kill -2 $pid + i=0 + while [ -s "${GLITE_RTM_TEST_PIDFILE}" -a $i -lt 200 ]; do + echo -n "." + sleep 0.5 + i=$(($i+1)) + done + if [ -s "${GLITE_RTMTESTPIDFILE}" ]; then + echo "FAIL" + return 0 + fi + + echo -n "$n_notifs notifications..." + my_get "SELECT notifid FROM notif_registrations" || return 1 + if [ "$lines" != "$n_notifs" ]; then + echo "FAIL" + return 0 + fi + + cleanup_harvester || return $? + echo -n "0 notifications..." + my_get "SELECT notifid FROM notif_registrations" || return 1 + if [ "$lines" != "0" ]; then + echo "FAIL" + return 0 + fi + + echo -n "cleandb." + cleanup_pg || return $? + start_harvester || return $? + + echo -n "notifs." + pg_wait 20 "SELECT refresh FROM notifs WHERE notifid IS NOT NULL" $n_notifs || return $? + refresh=`echo "$result" | head -n 1` + if [ -z "$refresh" ]; then + echo "FAIL" + return 0 + fi + + ok=1 + echo "OK" +} + + +test_refresh() { + ok=0 + + echo -n "refresh." + pg_wait $((GLITE_RTM_TEST_TTL * 3 / 4)) "SELECT notifid FROM notifs WHERE notifid IS NOT NULL AND refresh>'$refresh'" $n_notifs || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + + ok=1 + echo "OK" +} + + +test_jdl() { + ok=0 + +# kill_daemons +# cleanup_mysql && cleanup_pg || return $? +# run_daemons || return $? + + # need to wait for notifications to avoid bootstrap + echo -n "notifs." + pg_wait 20 "SELECT refresh FROM notifs WHERE notifid IS NOT NULL" $n_notifs || return $? + refresh=`echo "$result" | head -n 1` + if [ -z "$refresh" ]; then + echo "FAIL" + return 0 + fi + echo -n "OK " + + echo -n "submitted..." + reg || return $? + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Submitted'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + echo -n "OK " + + echo -n "waiting..." + cat > jdl.txt << EOF +[ + VirtualOrganisation = "TestingVO"; +] +EOF + ev -s NetworkServer -e Accepted --from='UserInterface' --from_host=`hostname -f` --from_instance="pid$$" || return $? + ev -s NetworkServer -e EnQueued --queue "very long and chaotic queue" --job=`pwd`/jdl.txt --result START || return $? + ev -s NetworkServer -e EnQueued --queue "very long and chaotic queue" --job="`cat jdl.txt`" --result OK || return $? + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Waiting'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + echo -n "OK " + + echo -n "waiting and VO..." + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Waiting' AND vo='TestingVO'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + echo -n "OK " + + # + # test JDL via VO change + # + # never do it at home ;-) + # + + echo -n "changed JDL..." + ev -s NetworkServer -e EnQueued --queue "very long and chaotic queue" --job="[ VirtualOrganisation=\"TestingVO2\";]" --result OK || return $? + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Waiting' AND vo='TestingVO2'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + echo -n "OK" + + echo -n "changed after waiting..." + ev -s WorkloadManager -e EnQueued --queue "very long and chaotic queue" --destination LogMonitor --dest_host localhost --dest_instance pid$$ --job "(car 'testing=true)" --result=OK || return $? + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Ready' AND vo='TestingVO2'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + echo -n "ready..." + ev -s NetworkServer -e EnQueued --queue "very long and chaotic queue" --job="[ VirtualOrganisation=\"TestingVO3\";]" --result OK || return $? + pg_wait 10 "SELECT jobid, state FROM jobs WHERE state='Waiting' AND vo='TestingVO3'" || return $? + if [ -z "$result" ]; then + echo "FAIL" + return 0 + fi + + ok=1 + echo "OK" +} + + +quit() { + if [ x"$started" = x"" ]; then + kill_daemons + drop_db + fi + exit 1 +} + + +fatal() { + echo "Fatal error, end" + quit +} + + +start() { + echo -n "Launch: " + create_db || fatal + run_daemons || fatal + echo "OK" + started=1 +} + + +stop() { + kill_daemons + drop_db +} + + +test() { + echo -n "Basic: " + test_basic || fatal + if [ $ok != 1 ]; then quit; fi + + echo -n "Rebind: " + test_rebind || fatal + if [ $ok != 1 ]; then quit; fi + + echo -n "Cleanup: " + test_cleanup || fatal + if [ $ok != 1 ]; then quit; fi + + echo -n "Refresh: " + test_refresh || fatal + if [ $ok != 1 ]; then quit; fi + + echo -n "JDL: " + test_jdl || fatal + if [ $ok != 1 ]; then quit; fi +} + + +case x"$1" in +xstart) + init + start + ;; + +xstop) + init + stop + ;; + +xtest) + init + test + ;; + +x) + init + start + test + stop + ;; + +*) + usage + exit 1 +esac diff --git a/org.glite.lb.harvester/examples/test.sql b/org.glite.lb.harvester/examples/test.sql new file mode 100644 index 0000000..7bf81b5 --- /dev/null +++ b/org.glite.lb.harvester/examples/test.sql @@ -0,0 +1,35 @@ +CREATE TABLE "jobs" ( + jobid VARCHAR PRIMARY KEY, + lb VARCHAR, + ce VARCHAR, + queue VARCHAR, + rb VARCHAR, + ui VARCHAR, + state VARCHAR, + state_entered TIMESTAMP, + rtm_timestamp TIMESTAMP, + active BOOLEAN, + state_changed BOOLEAN, + registered TIMESTAMP, + vo VARCHAR +); + +CREATE TABLE "lb20" ( + lb VARCHAR, + port INTEGER, + + PRIMARY KEY(lb, port) +); + +CREATE TABLE "notifs" ( + lb VARCHAR, + port INTEGER, + notifid VARCHAR, + notiftype VARCHAR, + valid TIMESTAMP, + refresh TIMESTAMP, + last_update TIMESTAMP, + errors INTEGER, + + PRIMARY KEY(lb, port, notiftype) +); diff --git a/org.glite.lb.harvester/project/ChangeLog b/org.glite.lb.harvester/project/ChangeLog new file mode 100644 index 0000000..5dba172 --- /dev/null +++ b/org.glite.lb.harvester/project/ChangeLog @@ -0,0 +1,3 @@ +1.0.0-1 +- Initial version + diff --git a/org.glite.lb.harvester/project/package.description b/org.glite.lb.harvester/project/package.description new file mode 100644 index 0000000..addf934 --- /dev/null +++ b/org.glite.lb.harvester/project/package.description @@ -0,0 +1,3 @@ +L&B Harvester gathers information about jobs from L&B servers using effective L&B notification mechanism. It manages notifications and keeps them in a persistent storage (file or database table) to reuse later on next launch. It takes care about refreshing notifications and queries L&B servers back when some notification expires. + +The tool was initially written for Real Time Monitor (project at Imperial College in London), later was extended with messaging mechanism for WLCG. diff --git a/org.glite.lb.harvester/project/package.summary b/org.glite.lb.harvester/project/package.summary new file mode 100644 index 0000000..062972a --- /dev/null +++ b/org.glite.lb.harvester/project/package.summary @@ -0,0 +1 @@ +Enhanced L&B notification client. diff --git a/org.glite.lb.harvester/project/version.properties b/org.glite.lb.harvester/project/version.properties new file mode 100644 index 0000000..cd1e9e7 --- /dev/null +++ b/org.glite.lb.harvester/project/version.properties @@ -0,0 +1,2 @@ +module.version=1.0.0 +module.age=1 diff --git a/org.glite.lb.harvester/src/harvester.c b/org.glite.lb.harvester/src/harvester.c new file mode 100644 index 0000000..7af1b5e --- /dev/null +++ b/org.glite.lb.harvester/src/harvester.c @@ -0,0 +1,2423 @@ +#ident "$Header$" + +/* + * Real time monitor. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef WITH_LBU_DB +#include +#include +#endif +#include +#ifndef WITH_OLD_LB +#include +#endif +#include +#include + + +// default number of the threads/sockets +#define RTM_THREADS 5 +// requested notification life in seconds +#define RTM_NOTIF_TTL 86400 +// consider end of the notification life sooner +#define RTM_NOTIF_TTL_TO_DEAD 2 +// poll timeout in seconds +#define RTM_NOTIF_READ_TIMEOUT 5 +// recheck LB server after error in seconds +#define RTM_ERROR_REPEAT_RATE 120 +// initial read loop time (can be infinity) +#define RTM_NOTIF_LOOP_MAX_TIME 1800 +// idle "quit" poll +#define RTM_IDLE_POLL_TIME 0.5 +// purge & summary jobs poll time +#define RTM_SUMMARY_POLL_TIME 600 +// preventive suicide against memleaks and ugly things (12 h) +#define RTM_SUICIDE_TIME 43200 + +#define RTM_SUMMARY_JOBS 100 + +#define RTM_DB_TABLE_JOBS "jobs" +#define RTM_DB_TABLE_LBS "lb20" +#define DBPAR(N) ("$" (N)) +#define DBAMP "\"" + +// debug message level: insane, debug, progress, warning, error +#define INS 4 +#define DBG 3 +#define INF 2 +#define WRN 1 +#define ERR 0 +#define DEBUG_LEVEL_MASK 7 +#define DEBUG_GUARD_MASK 8 + +// internal quit codes +#define RTM_QUIT_RUN 0 +#define RTM_QUIT_CLEANUP 1 +#define RTM_QUIT_PRESERVE 2 +#define RTM_QUIT_RELOAD 3 + +// exit codes +#define RTM_EXIT_OK 0 +#define RTM_EXIT_RELOAD 1 +#define RTM_EXIT_ERROR 2 + +#define RTM_NOTIF_TYPE_STATUS 1 +#define RTM_NOTIF_TYPE_JDL 2 +#define RTM_NOTIF_TYPE_OLD 3 +#define RTM_NOTIF_TYPE_DONE 4 + +#ifdef RTM_NO_COLORS +#define RTM_TTY_RED "" +#define RTM_TTY_GREEN "" +#define RTM_TTY_RST "" +#else +#define RTM_TTY_RED "\e[1;31m" +#define RTM_TTY_GREEN "\e[1;32m" +#define RTM_TTY_RST "\e[0;39m" +#endif + +#ifndef LINE_MAX +#define LINE_MAX 1023 +#endif + +#define RTM_FILE_NOTIFS "/var/tmp/notifs.txt" +#define WLCG_FILENAME_TEMPLATE "/tmp/wlcg_%02d_XXXXXX" +#define WLCG_COMMAND_MESSAGE "/opt/lcg/bin/msg-publish -c /opt/lcg/etc/msg-publish.conf org.wlcg.usage.jobStatus %s" +#define WLCG_BINARY "/opt/lcg/bin/msg-publish" +#define WLCG_CONFIG "/opt/lcg/etc/msg/msg-publish.conf.wlcg" +#define WLCG_TOPIC "org.wlcg.usage.jobStatus" + + +#ifdef WITH_OLD_LB +#define glite_jobid_t edg_wlc_JobId +#define glite_jobid_create edg_wlc_JobIdCreate +#define glite_jobid_recreate edg_wlc_JobIdRecreate +#define glite_jobid_dup edg_wlc_JobIdDup +#define glite_jobid_free edg_wlc_JobIdFree +#define glite_jobid_parse edg_wlc_JobIdParse +#define glite_jobid_unparse edg_wlc_JobIdUnparse +#define glite_jobid_getServer edg_wlc_JobIdGetServer +#define glite_jobid_getServerParts edg_wlc_JobIdGetServerParts +#define glite_jobid_getUnique edg_wlc_JobIdGetUnique +#endif +#ifndef GLITE_JOBID_DEFAULT_PORT +#define GLITE_JOBID_DEFAULT_PORT GLITE_WMSC_JOBID_DEFAULT_PORT +#define edg_wll_NotifNew(CTX, CONDS, FLAGS, SOCK, LADDR, ID, VALID) edg_wll_NotifNew((CTX), (CONDS), (SOCK), (LADDR), (ID), (VALID)) +#define edg_wll_JDLField(STAT, NAME) NULL +#endif + +// TODO: ipv6? :-) + +typedef struct { + edg_wll_NotifId id; // notification context (after bootstrap/rebind) + char *id_str; // notification id string + int type; // for distinguish various notifications on one LB + char *server; // LB server hostname + unsigned int port; // LB server port + time_t valid; // maximal validity of the notification + time_t refresh; // when try to refresh (before expiration), + // used for retry time after error too + double last_update; // last change from the server + int active; // helper (compare LB servers and notifications, + // if to save to the persistent storage) + int error; // errors counter +} notif_t; + +typedef struct { + int id; + pthread_t thread; + notif_t *notifs; + int nservers; + time_t first_refresh; + char time_s[100]; + char *dash_filename; + int dash_fd; +#ifdef WITH_LBU_DB + glite_lbu_DBContext dbctx; + glite_lbu_Statement insertcmd, updatecmd, updatecmd_vo; + int dbcaps; +#endif +} thread_t; + +typedef struct { + char *local_address; + int nthreads; + char *config_file; + char *notif_file; + int debug; + int guard; + int daemonize; + char *pidfile; + int dive; + char *dbcs; // DB connection string + char *cert, *key; + int ttl; // requested time to live (validity) of the notifications + int cleanup; // if to clean up notifications on LB servers + int wlcg; // dashboard messaging + int wlcg_no_remove; // don't remove temporary files (for debugging) + char *wlcg_binary; // path msg-publish binary + char *wlcg_config; // msg config file + char *wlcg_topic; // msg topic + int wlcg_flush; // send message for eachnotification + int silly; // old LB 3.1 mode + + int nservers; + notif_t *notifs; +} config_t; + +typedef struct { + notif_t *notifs; + int n, maxn; + pthread_mutex_t lock; + double last_check; + int was_summary; // flag for debugging +#ifdef WITH_LBU_DB + glite_lbu_DBContext dbctx; +#endif +} db_t; + + +static const char rcsid[] = "@(#)$Id$"; + +static int rtm2syslog[] = { + LOG_ERR, + LOG_WARNING, + LOG_INFO, + LOG_DEBUG, + LOG_DEBUG, +}; + +static const struct option opts[] = { + { "wlcg-binary", required_argument, NULL, 0}, + { "wlcg-config", required_argument, NULL, 0}, + { "wlcg-topic", required_argument, NULL, 0}, + { "wlcg-flush", no_argument, NULL, 0}, + { "help", no_argument, NULL, 'h'}, + { "version", no_argument, NULL, 'v'}, + { "threads", required_argument, NULL, 's'}, + { "debug", required_argument, NULL, 'd'}, + { "daemonize", no_argument, NULL, 'D'}, + { "pidfile", required_argument, NULL, 'i'}, + { "ttl", required_argument, NULL, 't'}, + { "history", required_argument, NULL, 'H'}, + { "config", required_argument, NULL, 'c'}, + { "notifs", required_argument, NULL, 'n'}, + { "port", required_argument, NULL, 'p'}, + { "pg", required_argument, NULL, 'm'}, + { "cert", required_argument, NULL, 'C'}, + { "key", required_argument, NULL, 'K'}, + { "wlcg", no_argument, NULL, 'w'}, + { "old", no_argument, NULL, 'o'}, + { "cleanup", no_argument, NULL, 'l'}, + { NULL, no_argument, NULL, 0} +}; + +static const char *opts_line = "hvs:d:Di:t:H:c:n:p:m:C:K:wo"; + +config_t config = { + local_address: NULL, + nthreads: RTM_THREADS, + config_file: NULL, + notif_file: NULL, + debug: DBG, + guard: 1, + dive: 10800, + dbcs: NULL, + cert: NULL, + key: NULL, + ttl: RTM_NOTIF_TTL, + cleanup: 0, + wlcg: 0, + silly: 0, + + nservers: 0, + notifs: NULL, +}; +db_t db = { + notifs: NULL, + n: 0, + maxn: 0, + lock: PTHREAD_MUTEX_INITIALIZER, +#ifdef WITH_LBU_DB + dbctx: NULL +#endif +}; +thread_t *threads = NULL; +volatile sig_atomic_t quit = RTM_QUIT_RUN; + +static int listen_port = 0; + +#define lprintf(T, LEVEL, FMT, ARGS...) \ + if ((LEVEL) <= config.debug) lprintf_func((T), (LEVEL), (FMT), ##ARGS) +#define lprintf_ctx(T, LEVEL, CTX, FMT, ARGS...) \ + if ((LEVEL) <= config.debug) lprintf_ctx_func((T), (CTX), (LEVEL), (FMT), ##ARGS) +#define lprintf_dbctx(T, LEVEL, FMT, ARGS...) \ + if ((LEVEL) <= config.debug) lprintf_dbctx_func((T), (LEVEL), (FMT), ##ARGS) + +#ifdef WITH_OLD_LB +int edg_wll_gss_initialize() { + if (globus_module_activate(GLOBUS_GSI_GSSAPI_MODULE) != GLOBUS_SUCCESS) return EINVAL; + return 0; +} +#endif + +void lvprintf_func(thread_t *t, const char *description, int level, const char *fmt, va_list ap) { + char prefix[10]; + char *msg, *line; + + if (t) snprintf(prefix, sizeof prefix, "[%02d]", t->id); + else memcpy(prefix, "[main]", 8); + vasprintf(&msg, fmt, ap); + if (description) asprintf(&line, "%s %s, %s\n", prefix, msg, description); + else asprintf(&line, "%s %s\n", prefix, msg); + free(msg); + + if (level <= WRN && !config.daemonize) fprintf(stderr, RTM_TTY_RED); + if (config.daemonize) { + openlog(NULL, LOG_PID | LOG_CONS, LOG_DAEMON); + syslog(rtm2syslog[level], "%s", line); + closelog(); + } else { + fputs(line, stderr); + } + if (level <= WRN && !config.daemonize) fprintf(stderr, RTM_TTY_RST); + + free(line); +} + + +void lprintf_func(thread_t *t, int level, const char *fmt, ...) { + va_list ap; + + va_start(ap, fmt); + lvprintf_func(t, NULL, level, fmt, ap); + va_end(ap); +} + + +void lprintf_ctx_func(thread_t *t, edg_wll_Context ctx, int level, const char *fmt, ...) { + va_list ap; + char *errText, *errDesc, *s; + + va_start(ap, fmt); + edg_wll_Error(ctx, &errText, &errDesc); + asprintf(&s, "%s: %s", errText, errDesc); + lvprintf_func(t, s, level, fmt, ap); + free(errText); + free(errDesc); + free(s); + va_end(ap); +} + + +#ifdef WITH_LBU_DB +void lprintf_dbctx_func(thread_t *t, int level, const char *fmt, ...) { + va_list ap; + char *errText = NULL, *errDesc = NULL, *s = NULL; + glite_lbu_DBContext dbctx = t ? t->dbctx : db.dbctx; + + va_start(ap, fmt); + if (dbctx) { + glite_lbu_DBError(dbctx, &errText, &errDesc); + asprintf(&s, "%s: %s", errText, errDesc); + } + lvprintf_func(t, s, level, fmt, ap); + free(errText); + free(errDesc); + free(s); + va_end(ap); +} +#endif + +#ifndef WITH_LBU_DB +time_t glite_lbu_StrToTime(const char *str) { + struct tm tm; + + memset(&tm,0,sizeof(tm)); + putenv("TZ=UTC"); tzset(); + sscanf(str,"%4d-%02d-%02d %02d:%02d:%02d", + &tm.tm_year,&tm.tm_mon,&tm.tm_mday, + &tm.tm_hour,&tm.tm_min,&tm.tm_sec); + tm.tm_year -= 1900; + tm.tm_mon--; + + return mktime(&tm); +} + +double glite_lbu_StrToTimestamp(const char *str) { + struct tm tm; + double sec; + + memset(&tm,0,sizeof(tm)); + putenv("TZ=UTC"); tzset(); + sscanf(str,"%4d-%02d-%02d %02d:%02d:%lf", + &tm.tm_year,&tm.tm_mon,&tm.tm_mday, + &tm.tm_hour,&tm.tm_min,&sec); + tm.tm_year -= 1900; + tm.tm_mon--; + tm.tm_sec = sec; + + return (sec - tm.tm_sec) + mktime(&tm); +} +#endif + + +// hacky time->string conversion +char *time2str(thread_t *t, time_t time) { + struct tm tm; + + if ((int)time <= 0) memcpy(t->time_s, "-", sizeof("-")); + else { + localtime_r(&time, &tm); + strftime(t->time_s, sizeof(t->time_s), "%F %T", &tm); + } + return t->time_s; +} + + +double rtm_gettimeofday() { + struct timeval tv; + + gettimeofday(&tv, NULL); + return tv.tv_sec + tv.tv_usec / 1000000.0; +} + + +void rtm_time2str(time_t t, char **str) { + struct tm *tm; + + if (t) { + tm = gmtime(&t); + asprintf(str,"%4d-%02d-%02d %02d:%02d:%02d",tm->tm_year+1900,tm->tm_mon+1, + tm->tm_mday,tm->tm_hour,tm->tm_min,tm->tm_sec); + } else + *str = strdup("-"); +} + + +void rtm_timestamp2str(double t, char **str) { + time_t tsec = t; + struct tm *tm = gmtime(&tsec); + + if (t) { + t = t - tsec + tm->tm_sec; + asprintf(str,"%4d-%02d-%02d %02d:%02d:%02.09f",tm->tm_year+1900,tm->tm_mon+1, + tm->tm_mday,tm->tm_hour,tm->tm_min,t); + } else + *str = strdup("-"); +} + + +int rtm_str2notiftype(const char *str) { + if (strcasecmp(str, "STATUS") == 0) return RTM_NOTIF_TYPE_STATUS; + if (strcasecmp(str, "DONE") == 0) return RTM_NOTIF_TYPE_DONE; + if (strcasecmp(str, "JDL") == 0) return RTM_NOTIF_TYPE_JDL; + if (strcasecmp(str, "OLD") == 0) return RTM_NOTIF_TYPE_OLD; + return -1; +} + + +const char *rtm_notiftype2str(int type) { + switch (type) { + case RTM_NOTIF_TYPE_STATUS: return "STATUS"; + case RTM_NOTIF_TYPE_DONE: return "DONE"; + case RTM_NOTIF_TYPE_JDL: return "JDL"; + case RTM_NOTIF_TYPE_OLD: return "OLD"; + default: return NULL; + } +} + + +void wlcg_timeval2str(struct timeval *t, char **str) { + struct tm *tm; + + tm = gmtime(&t->tv_sec); + asprintf(str,"%4d-%02d-%02dT%02d:%02d:%02dZ",tm->tm_year+1900,tm->tm_mon+1, + tm->tm_mday,tm->tm_hour,tm->tm_min,tm->tm_sec); +} + + +int wlcg_store_message(thread_t *t, __attribute((unused))notif_t *notif, edg_wll_JobStat *stat) { + unsigned int port; + int status = 0; + char *jobid_str = NULL, *state_str = NULL, *vo = NULL, *lbhost = NULL; + char *wlcg_last_update_time_str = NULL, *wlcg_state_start_time_str = NULL; + + jobid_str = stat->jobId ? glite_jobid_unparse(stat->jobId) : strdup("Unknown"); + glite_jobid_getServerParts(stat->jobId, &lbhost, &port); + state_str = edg_wll_StatToString(stat->state); + vo = edg_wll_JDLField(stat,"VirtualOrganisation") ? : strdup("Unknown"); + + if (!t->dash_filename || !t->dash_fd) { + free(t->dash_filename); + asprintf(&t->dash_filename, WLCG_FILENAME_TEMPLATE, t->id); + if ((t->dash_fd = mkstemp(t->dash_filename)) == -1) { + status = errno; + lprintf(t, ERR, "can't create temporary file '%s': %s", t->dash_filename, strerror(status)); + free(t->dash_filename); + t->dash_filename = NULL; + goto quit; + } + } + + wlcg_timeval2str(&stat->lastUpdateTime, &wlcg_last_update_time_str); + wlcg_timeval2str(&stat->stateEnterTime, &wlcg_state_start_time_str); + + dprintf(t->dash_fd, "jobId: %s\n\ +stateName: %s\n\ +ownerDN: %s\n\ +voname: %s\n\ +bkHost: %s:%d\n\ +networkHost: %s\n\ +lastUpdateTime: %s\n\ +stateStartTime: %s\n\ +exitCode: %d\n\ +DoneCode: %d\n\ +destSite: %s\n\ +condorId: %s\n\ +StatusReason: %s\n\ +EOT\n", jobid_str, state_str, stat->owner, vo, lbhost, port, stat->network_server ? : "unknown", wlcg_last_update_time_str, wlcg_state_start_time_str, stat->exit_code, stat->done_code, stat->destination ? : "NULLByPublisher", stat->condorId ? : "0", stat->reason && stat->reason[strspn(stat->reason, " \t\n\r")] != '\0' ? stat->reason : "UNAVAILABLE By Publisher"); + + free(wlcg_last_update_time_str); + free(wlcg_state_start_time_str); +quit: + free(jobid_str); + free(lbhost); + free(state_str); + free(vo); + return status; +} + + +int wlcg_send_message(thread_t *t) { + int status = 0; + char *command; + + // WLCG message + if (t->dash_fd) { + close(t->dash_fd); + asprintf(&command, "'%s' -c '%s' '%s' '%s'", config.wlcg_binary, config.wlcg_config, config.wlcg_topic, t->dash_filename); + lprintf(t, DBG, "calling %s", command); + switch (vfork()) { + case 0: + if (execlp("/bin/sh", "/bin/sh", "-c", command, NULL) == -1) { + lprintf(t, ERR, "can't exec '%s':%s", command, strerror(errno)); + } + _exit(1); + break; + case -1: + lprintf(t, ERR, "can't fork: %s", strerror(errno)); + break; + default: + break; + } + wait(&status); + free(command); + if (WIFEXITED(status)) { + status = WEXITSTATUS(status); + if (status) { + lprintf(t, WRN, "%s exited with %d", config.wlcg_binary, status); + } else { + lprintf(t, INF, "%s exited successfully", config.wlcg_binary); + if (!config.wlcg_no_remove) remove(t->dash_filename); + } + } else { + lprintf(t, ERR, "%s not exited normally", config.wlcg_binary); + status = -1; + } + free(t->dash_filename); + t->dash_filename = NULL; + t->dash_fd = 0; + } + + return status; +} + + +void notif_free(notif_t *notif) { + edg_wll_NotifIdFree(notif->id); + free(notif->id_str); + free(notif->server); + memset(notif, 0, sizeof(notif_t)); +} + + +void notif_invalidate(notif_t *notif) { + edg_wll_NotifIdFree(notif->id); + free(notif->id_str); + notif->id = NULL; + notif->id_str = NULL; + notif->error = 0; +} + + +int notif_copy(notif_t *dest, notif_t *src) { + if (!src || !dest) return EINVAL; + memset(dest, 0, sizeof(notif_t)); + if (src->id) dest->id = edg_wll_NotifIdDup(src->id); + if (src->id_str) dest->id_str = strdup(src->id_str); + dest->type = src->type; + if (src->server) dest->server = strdup(src->server); + dest->port = src->port; + dest->valid = src->valid; + dest->refresh = src->refresh; + dest->last_update = src->last_update; + dest->active = src->active; + dest->error = src->error; + return 0; +} + + +#ifdef WITH_LBU_DB +static int db_init(thread_t *t, glite_lbu_DBContext *dbctx) { + int err, dbcaps; + + if (config.dbcs) { + if ((err = glite_lbu_InitDBContext(dbctx, GLITE_LBU_DB_BACKEND_PSQL)) != 0) { + lprintf_dbctx(t, ERR, "can't initialize DB context"); + return err; + } + while ((err = glite_lbu_DBConnect(*dbctx, config.dbcs)) != 0 && !quit) { + lprintf_dbctx(t, ERR, "can't connect to '%s'", config.dbcs); + lprintf(t, INF, "still trying..."); + sleep(5); + } + if (err == 0) { + if ((dbcaps = glite_lbu_DBQueryCaps(*dbctx)) == -1) { + lprintf_dbctx(t, ERR, "can't get database capabilities"); + dbcaps = 0; + } + lprintf(t, INF, "DB connected, cs: %s, capabilities: %d", config.dbcs, dbcaps); + if (t == NULL && (dbcaps & GLITE_LBU_DB_CAP_PREPARED) == 0) { + lprintf(NULL, WRN, "postgresql server doesn't support SQL prepared commands, recommended version >= 8.2"); + } + if (t) t->dbcaps = dbcaps; + return 0; + } else { + glite_lbu_FreeDBContext(*dbctx); + return err; + } + } else { + lprintf(t, DBG, "no DB configured (--pg option)"); + return -1; + } +} + + +static void db_free(__attribute((unused))thread_t *t, glite_lbu_DBContext dbctx) { + if (dbctx) { + glite_lbu_DBClose(dbctx); + glite_lbu_FreeDBContext(dbctx); + } +} +#endif + + +static notif_t *db_add_notif(char *notifid, int type, time_t valid, time_t refresh, double last_update, char *server, int port, int active) { + void *tmp; + notif_t *notif; + + if (db.n >= db.maxn) { + db.maxn = db.n + 20; + if ((tmp = realloc(db.notifs, db.maxn * sizeof(notif_t))) == NULL) return NULL; + db.notifs = (notif_t *)tmp; + memset(db.notifs + db.n, 0, (db.maxn - db.n) * sizeof(notif_t)); + } + notif = db.notifs + db.n; + notif->id_str = notifid; + notif->type = type; + notif->valid = valid; + notif->refresh = refresh; + notif->last_update = last_update; + notif->server = server; + notif->port = port; + notif->active = active; + db.n++; + + return notif; +} + + +static int db_save_notifs_file(thread_t *t) { + FILE *f; + char *filename = NULL; + int retval = 1; + notif_t *notif; + int i; + char *valid_str = NULL, *refresh_str = NULL, *last_update_str = NULL; + + asprintf(&filename, "%s-new", config.notif_file); + if ((f = fopen(filename, "wt")) == NULL) { + lprintf(t, ERR, "can't write '%s': %s", filename, strerror(errno)); + goto quit; + } + + for (i = 0; i < db.n; i++) { + notif = db.notifs + i; + if (!notif->active) { + lprintf(t, DBG, "not saving inactive notif %s (%s), server %s:%d", notif->id_str, rtm_notiftype2str(notif->type), notif->server, notif->port); + continue; + } + if (notif->id_str) { + rtm_time2str(notif->valid, &valid_str); + rtm_time2str(notif->refresh, &refresh_str); + rtm_timestamp2str(notif->last_update, &last_update_str); + + fprintf(f, "%s\t%s\t%s\t%s\t%s\n", notif->id_str, rtm_notiftype2str(notif->type), valid_str, refresh_str, last_update_str); + + free(valid_str); valid_str = NULL; + free(refresh_str); refresh_str = NULL; + free(last_update_str); last_update_str = NULL; + } + } + fclose(f); + if (rename(filename, config.notif_file) != 0) { + lprintf(t, ERR, "can't move new notification file '%s' to '%s': %s", filename, config.notif_file, strerror(errno)); + goto quit; + } + retval = 0; +quit: + free(filename); + free(valid_str); + free(refresh_str); + free(last_update_str); + return 0; +} + + +#if defined(WITH_RTM_SQL_STORAGE) && defined(WITH_LBU_DB) +static int db_save_notifs_sql(thread_t *t) { + int retval = 1; + notif_t *notif; + int i; + char *sql = NULL, *valid_str = NULL, *refresh_str = NULL, *last_update_str = NULL; + const char *type_str; + + for (i = 0; i < db.n; i++) { + notif = db.notifs + i; +/* + if (!notif->active) { + lprintf(t, INS, "not saving inactive notif %s (%s:%d)", notif->id_str, notif->server, notif->port); + continue; + } +*/ + type_str = rtm_notiftype2str(notif->type); + if (notif->id_str) { + if (notif->valid) glite_lbu_TimeToDB(db.dbctx, notif->valid, &valid_str); + else valid_str = strdup("NULL"); + if (notif->refresh) glite_lbu_TimeToDB(db.dbctx, notif->refresh, &refresh_str); + else refresh_str = strdup("NULL"); + if (notif->last_update) glite_lbu_TimestampToDB(db.dbctx, notif->last_update, &last_update_str); + else last_update_str = strdup("NULL"); + trio_asprintf(&sql, "UPDATE notifs SET notifid='%|Ss', valid=%s, refresh=%s, last_update=%s WHERE lb='%|Ss' AND port=%d AND notiftype='%|Ss'", notif->id_str, valid_str, refresh_str, last_update_str, notif->server, notif->port, type_str); + switch (glite_lbu_ExecSQL(db.dbctx, sql, NULL)) { + case 0: + // not found - insert + // can be handy when using file as input of LBs + free(sql); + trio_asprintf(&sql, "INSERT INTO notifs (lb, port, notifid, notiftype, valid, refresh, last_update) VALUES ('%|Ss', %d, '%|Ss', '%|Ss', %s, %s, %s)", notif->server, notif->port, notif->id_str, type_str, valid_str, refresh_str, last_update_str); + switch (glite_lbu_ExecSQL(db.dbctx, sql, NULL)) { + case -1: + lprintf_dbctx(t, ERR, "notif '%s' (%s) insert failed", notif->id_str, type_str); + goto quit; + case 0: + lprintf(t, ERR, "notif '%s' (%s) not inserted for unknown reason", type_str); + break; + default: + lprintf(t, INS, "notif '%s' (%s) inserted", notif->id_str, type_str); + break; + } + break; + case -1: + lprintf_dbctx(t, ERR, "notif '%s' (%s) update failed", notif->id_str, type_str); + goto quit; + default: + lprintf(t, INS, "notif '%s' updated", notif->id_str); + break; + } + } else { + trio_asprintf(&sql, "UPDATE notifs SET notifid=NULL, valid=NULL, refresh=NULL, last_update=NULL WHERE lb='%|Ss' AND port=%d AND notiftype='%|Ss'", notif->server, notif->port, type_str); + switch (glite_lbu_ExecSQL(db.dbctx, sql, NULL)) { + case 0: + lprintf(t, INS, "cleared %s notif for %s:%d not found, ok", type_str, notif->server, notif->port); + break; + case -1: + lprintf_dbctx(t, ERR, "clearing notif %s for %s:%d failed", type_str, notif->server, notif->port); + goto quit; + default: + lprintf(t, INS, "cleared notif %s for %s:%d", type_str, notif->server, notif->port); + break; + } + } + free(sql); sql = NULL; + free(valid_str); valid_str = NULL; + free(refresh_str); refresh_str = NULL; + free(last_update_str); last_update_str = NULL; + } + retval = 0; +quit: + free(sql); + free(valid_str); + free(refresh_str); + free(last_update_str); + return 0; +} +#endif + + +static int db_save_notifs(thread_t *t) { +#if 0 + int i; + + for (i = 0; i < db.n; i++) { + notif_t *notif = db.notifs + i; + lprintf(NULL, DBG, "save: %s (%s), server: %s:%d, active: %d", notif->id_str, rtm_notiftype2str(notif->type), notif->server, notif->port, notif->active); + } +#endif + +#if defined(WITH_RTM_SQL_STORAGE) && defined(WITH_LBU_DB) + if (!db.dbctx) return db_save_notifs_file(t); + else return db_save_notifs_sql(t); +#else + return db_save_notifs_file(t); +#endif +} + + +static notif_t *db_search_notif(notif_t *notifs, int n, const char *notifid) { + int i; + + for (i = 0; i < n && (!notifs[i].id_str || strcmp(notifs[i].id_str, notifid) != 0); i++); + return i == n ? NULL : notifs + i; +} + + +static notif_t *db_search_notif_by_server(notif_t *notifs, int n, const char *server, unsigned int port, int type) { + int i; + + for (i = 0; i < n; i++) { + if (strcmp(notifs[i].server, server) == 0 && notifs[i].port == port && notifs[i].type == type) break; + } + + return i == n ? NULL : notifs + i; +} + + +static int db_store_change(__attribute((unused))thread_t *t, notif_t *notif, __attribute((unused))int index, edg_wll_JobStat *stat) { + char *jobid_str = NULL, *state_str = NULL, *sql = NULL, *sql2 = NULL, *state_entered_str = NULL, *rtm_timestamp_str = NULL, *lbhost = NULL, *unique_str = NULL, *regtime_str = NULL,*vo = NULL; + unsigned int port; + + jobid_str = stat->jobId ? glite_jobid_unparse(stat->jobId) : strdup("unknown"); + glite_jobid_getServerParts(stat->jobId, &lbhost, &port); + unique_str = glite_jobid_getUnique(stat->jobId); + state_str = edg_wll_StatToString(stat->state); + vo = edg_wll_JDLField(stat,"VirtualOrganisation"); + printf(RTM_TTY_GREEN "notifid: %s (%s), jobid: %s, state: %s, vo: %s, last time: %lf" RTM_TTY_RST "\n", notif->id_str, rtm_notiftype2str(notif->type), jobid_str, state_str, vo, notif->last_update); + +#ifdef WITH_LBU_DB + if (config.dbcs && t->dbctx) { + double state_entered, rtm_timestamp; + char *ce, *queue, *colon, *sql_part; + const char *rb, *ui, *state, *active, *state_changed, *lb; + time_t registered; + + ce = stat->destination ? : "unknown"; + queue = strchr(ce, '/'); + if (queue) *queue++='\0'; + else queue = "unknown"; + colon = strchr(ce, ':'); + if (colon) colon[0] = '\0'; + rb = stat->network_server ? : "unknown"; + ui = stat->ui_host ? : "unknown"; + state = state_str ? : "unknown"; + state_entered = stat->stateEnterTime.tv_sec + stat->stateEnterTime.tv_usec / 1000000.0; + rtm_timestamp = rtm_gettimeofday(); + registered = stat->stateEnterTimes[1 + EDG_WLL_JOB_SUBMITTED]; + lb = lbhost; + active = "true"; + state_changed = "true"; + + if ((t->dbcaps & GLITE_LBU_DB_CAP_PREPARED) == 0) { + + glite_lbu_TimestampToDB(t->dbctx, state_entered, &state_entered_str); + glite_lbu_TimestampToDB(t->dbctx, rtm_timestamp, &rtm_timestamp_str); + glite_lbu_TimeToDB(t->dbctx, registered, ®time_str); + + if (vo) trio_asprintf(&sql_part, ", vo='%|Ss' ", vo); + else sql_part = strdup(""); + trio_asprintf(&sql, "UPDATE " RTM_DB_TABLE_JOBS " SET ce='%|Ss', queue='%|Ss', rb='%|Ss', ui='%|Ss', state='%|Ss', state_entered=%s, rtm_timestamp=%s, active=%s, state_changed=%s, registered=%s%sWHERE jobid='%|Ss' AND lb='%|Ss'", ce, queue, rb, ui, state, state_entered_str, rtm_timestamp_str, active, state_changed, regtime_str, sql_part, unique_str, lb); + free(sql_part); + lprintf(t, INS, "update: %s", sql); + switch (glite_lbu_ExecSQL(t->dbctx, sql, NULL)) { + case -1: + lprintf_dbctx(t, ERR, "can't get jobs"); + goto quit; + case 0: + trio_asprintf(&sql2, "INSERT INTO " RTM_DB_TABLE_JOBS " " + "(ce, queue, rb, ui, state, state_entered, rtm_timestamp, jobid, lb, active, state_changed, registered, vo) VALUES " + "('%|Ss', '%|Ss', '%|Ss', '%|Ss', '%|Ss', %s, %s, '%|Ss', '%|Ss', %s, %s, %s, '%|Ss')", ce, queue, rb, ui, state, state_entered_str, rtm_timestamp_str, unique_str, lb, active, state_changed, regtime_str, vo ? : "unknown"); + lprintf(t, INS, "insert: %s", sql2); + if (glite_lbu_ExecSQL(t->dbctx, sql2, NULL) == -1) { + lprintf_dbctx(t, ERR, "can't insert job"); + goto quit; + } + break; + default: + break; + } + + } else { // prepared commands + int ret; + + if (vo) { + ret = glite_lbu_ExecPreparedStmt(t->updatecmd_vo, 13, + GLITE_LBU_DB_TYPE_VARCHAR, ce, + GLITE_LBU_DB_TYPE_VARCHAR, queue, + GLITE_LBU_DB_TYPE_VARCHAR, rb, + GLITE_LBU_DB_TYPE_VARCHAR, ui, + GLITE_LBU_DB_TYPE_VARCHAR, state, + GLITE_LBU_DB_TYPE_TIMESTAMP, state_entered, + GLITE_LBU_DB_TYPE_TIMESTAMP, rtm_timestamp, + GLITE_LBU_DB_TYPE_BOOLEAN, 1, // active + GLITE_LBU_DB_TYPE_BOOLEAN, 1, // state_changed + GLITE_LBU_DB_TYPE_TIMESTAMP, (double)registered, + GLITE_LBU_DB_TYPE_VARCHAR, vo, // VO + + GLITE_LBU_DB_TYPE_VARCHAR, unique_str, // jobid + GLITE_LBU_DB_TYPE_VARCHAR, lb // L&B server + ); + } else { + ret = glite_lbu_ExecPreparedStmt(t->updatecmd, 12, + GLITE_LBU_DB_TYPE_VARCHAR, ce, + GLITE_LBU_DB_TYPE_VARCHAR, queue, + GLITE_LBU_DB_TYPE_VARCHAR, rb, + GLITE_LBU_DB_TYPE_VARCHAR, ui, + GLITE_LBU_DB_TYPE_VARCHAR, state, + GLITE_LBU_DB_TYPE_TIMESTAMP, state_entered, + GLITE_LBU_DB_TYPE_TIMESTAMP, rtm_timestamp, + GLITE_LBU_DB_TYPE_BOOLEAN, 1, // active + GLITE_LBU_DB_TYPE_BOOLEAN, 1, // state_changed + GLITE_LBU_DB_TYPE_TIMESTAMP, (double)registered, + + GLITE_LBU_DB_TYPE_VARCHAR, unique_str, // jobid + GLITE_LBU_DB_TYPE_VARCHAR, lb // L&B server + ); + } + + switch (ret) { + case -1: + lprintf_dbctx(t, ERR, "can't update " RTM_DB_TABLE_JOBS " table"); + goto quit; + case 0: + if (glite_lbu_ExecPreparedStmt(t->insertcmd, 13, + GLITE_LBU_DB_TYPE_VARCHAR, ce, + GLITE_LBU_DB_TYPE_VARCHAR, queue, + GLITE_LBU_DB_TYPE_VARCHAR, rb, + GLITE_LBU_DB_TYPE_VARCHAR, ui, + GLITE_LBU_DB_TYPE_VARCHAR, state, + GLITE_LBU_DB_TYPE_TIMESTAMP, state_entered, + GLITE_LBU_DB_TYPE_TIMESTAMP, rtm_timestamp, + GLITE_LBU_DB_TYPE_VARCHAR, unique_str, // jobid + GLITE_LBU_DB_TYPE_VARCHAR, lb, // L&B server + GLITE_LBU_DB_TYPE_BOOLEAN, 1, // active + GLITE_LBU_DB_TYPE_BOOLEAN, 1, // state_changed + GLITE_LBU_DB_TYPE_TIMESTAMP, (double)registered, + GLITE_LBU_DB_TYPE_VARCHAR, vo ? : "unknown" // VO + ) == -1) { + lprintf_dbctx(t, ERR, "can't insert to " RTM_DB_TABLE_JOBS " table"); + goto quit; + } + break; + default: + break; + } + } // prepare commands + + } +#endif + + // store message + if (config.wlcg) { + if (wlcg_store_message(t, notif, stat) != 0) goto quit; + if (config.wlcg_flush) wlcg_send_message(t); + } + +quit: + free(jobid_str); + free(state_str); + free(sql); + free(sql2); + free(state_entered_str); + free(rtm_timestamp_str); + free(lbhost); + free(unique_str); + free(regtime_str); + free(vo); + + return 0; +} + + +static int db_summary_getjobids(__attribute((unused))db_t *db, __attribute((unused))int maxn, __attribute((unused))char **jobids, int *n) { +/* + switch (db->was_summary) { + case 0: + *n = 3; + jobids[0] = strdup("https://skurut68-2.cesnet.cz:9000/FJldtiAR2EHC12C3Zz8WjQ"); + jobids[1] = strdup("https://skurut68-2.cesnet.cz:9000/AWTCWrUCr3uUh6cuRFaENQ"); + jobids[2] = strdup("https://skurut68-1.cesnet.cz:9000/o73CG2wrNdEQ909mG0Ac1g"); + break; + case 1: + *n = 1; + jobids[0] = strdup("https://skurut68-2.cesnet.cz:9000/-46Qa2ag4gLsA_Ki-3bSLw"); + + break; + default: *n = 0; break; + } + db->was_summary = (db->was_summary + 1) % 3; + return 0; +*/ + *n = 0; + return 0; +} + + +static int db_summary_setinfo(__attribute((unused))db_t *db, edg_wll_JobStat *stat) { + char *jobidstr; + + jobidstr = stat->jobId ? glite_jobid_unparse(stat->jobId) : NULL; + printf(RTM_TTY_GREEN "summary: jobid='%s'" RTM_TTY_RST "\n", jobidstr); + free(jobidstr); + return 0; +} + + +int rtm_summary(edg_wll_Context ctx, db_t *db) { + char *jobids[RTM_SUMMARY_JOBS]; + edg_wll_QueryRec lbquery[RTM_SUMMARY_JOBS + 1], *qr; + const edg_wll_QueryRec *lbqueryext[2]; + edg_wll_JobStat *jobstates = NULL; + int err = 0, ijob = 0, njobs = 0, iquery = 0, k, server_changed = 0; + glite_jobid_t jid = NULL; + char *server = NULL, *new_server = NULL; + unsigned int port = 0, new_port = 0; + + lprintf(NULL, INS, "Summary"); + + lbqueryext[0] = lbquery; + lbqueryext[1] = NULL; + memset(lbquery, 0, sizeof(lbquery)); + + do { + if (server) { + + if ((iquery >= RTM_SUMMARY_JOBS || server_changed || !njobs) && iquery) { + if ((err = edg_wll_QueryJobsExt(ctx, lbqueryext, 0, NULL, &jobstates)) != 0) { + lprintf_ctx(NULL, ERR, ctx, "query to '%s:%u' failed: %s", server, port, strerror(err)); + // report error jobids and skip the job (do nothing) + // TODO + } + for (k = 0; k < iquery; k++) glite_jobid_free(lbquery[k].value.j); + + if (err == 0) { + for (k = 0; jobstates[k].state != EDG_WLL_JOB_UNDEF; k++) { + if ((err = db_summary_setinfo(db, jobstates + k)) != 0) lprintf(NULL, ERR, "Can't store %d. summary info for %s:%u", k, server, port); + edg_wll_FreeStatus(jobstates + k); + } + free(jobstates); + lprintf(NULL, DBG, "query to '%s:%u' succeed", server, port); + } + + iquery = 0; + memset(lbquery, 0, sizeof(lbquery)); + if (!njobs) break; // not needed, just spare summary select + + server_changed = 0; + } else { + lprintf(NULL, DBG, "summary pushed %d. %s\n", iquery, jobids[ijob]); + qr = lbquery + iquery; + iquery++; + qr->attr = EDG_WLL_QUERY_ATTR_JOBID; + qr->op = EDG_WLL_QUERY_OP_EQUAL; + glite_jobid_parse(jobids[ijob], &qr->value.j); + free(jobids[ijob]); jobids[ijob] = NULL; + ijob++; + } + + } // server + + if (ijob >= njobs) { + ijob = 0; + memset(jobids, 0, sizeof(jobids)); + njobs = 0; + if ((err = db_summary_getjobids(db, RTM_SUMMARY_JOBS, jobids, &njobs)) != 0) { + lprintf(NULL, ERR, "Can't get jobs for the summary"); + return err; + } + lprintf(NULL, DBG, "summary for %d jobs", njobs); + if (!njobs) { + if (iquery) continue; // do the last query + else break; + } + } + + if ((err = glite_jobid_parse(jobids[ijob], &jid)) != 0) { + lprintf(NULL, ERR, "Can't parse jobid '%s': %s", jobids[ijob], strerror(err)); + // report error jobid and skip the job + // TODO + glite_jobid_free(jid); jid = NULL; + free(jobids[ijob]); jobids[ijob] = NULL; + ijob++; + continue; + } + free(new_server); + glite_jobid_getServerParts(jid, &new_server, &new_port); + glite_jobid_free(jid); jid = NULL; + + // first or different LB server + if (new_server && (!server || strcmp(server, new_server) != 0 || port != new_port)) { + if (server) server_changed = 1; + + free(server); + server = new_server; + port = new_port; + + new_server = NULL; + new_port = 0; + + edg_wll_SetParam(ctx, EDG_WLL_PARAM_QUERY_SERVER, server); + edg_wll_SetParam(ctx, EDG_WLL_PARAM_QUERY_SERVER_PORT, port); + lprintf(NULL, INF, "summary LB server '%s:%u'", server, port); + } + } while (njobs || iquery); + + free(server); + free(new_server); + + return err; +} + + +/* + * Updates error counter and retry times on the notification. + * + * On errors it lineary increases delay. Minimum delay is + * RTM_ERROR_REPEAT_RATE, maximum is half of the configured + * bootstrap time. + * + * \param t thread context + * \param notif updated notification + * \param[IN] index notification order (for debug printing) + * \param is_error[IN] error state (to reset or increment error counter) + * + */ +static int rtm_update_error_state(thread_t *t, notif_t *notif, int index, int is_error) { + int old_error, max_count; + + old_error = notif->error; + if (is_error) { + if (!notif->error++) notif->refresh = time(NULL); + max_count = config.dive / RTM_ERROR_REPEAT_RATE / 2; + if (max_count <= 0) max_count = 1; + notif->refresh += (notif->error <= max_count ? notif->error : max_count) * RTM_ERROR_REPEAT_RATE; + lprintf(t, DBG, "planned to retry at %s", time2str(t, notif->refresh)); + } else { + notif->error = 0; + } + if (old_error != notif->error) { + lprintf(t, DBG, "error count of %d. server %s:%d changed from %d to %d", index, notif->server, notif->port, old_error, notif->error); + } + + return 0; +} + + +/** + * Updates notifications in persistent storage. Used to send WLCG messages too. + * + * \param t thread context + * \param[IN] new_notif updating notification, NULL = no change in shared memory + * \param[IN] store 0=light (just shared memory), 1=save (flush, really store) + * \retval 0 if OK + */ +int rtm_update_notif(thread_t *t, notif_t *new_notif, int store) { + notif_t *notif; + int retval = 1; + + pthread_mutex_lock(&db.lock); + + if (new_notif) { + if ((notif = db_search_notif_by_server(db.notifs, db.n, new_notif->server, new_notif->port, new_notif->type)) == NULL) { + if (db_add_notif(strdup(new_notif->id_str), new_notif->type, new_notif->valid, new_notif->refresh, new_notif->last_update, strdup(new_notif->server), new_notif->port, 1) == NULL) { + lprintf(t, ERR, "can't realloc"); + goto quit; + } + } else { + notif_free(notif); + notif_copy(notif, new_notif); + } + } + + wlcg_send_message(t); + + if (store) { + if (db_save_notifs(t) != 0) goto quit; + } + retval = 0; + +quit: + pthread_mutex_unlock(&db.lock); + return retval; +} + + +int rtm_drop_notif(thread_t *t, char *notifid, int store) { + notif_t *notif; + int retval = 1; + + pthread_mutex_lock(&db.lock); + if ((notif = db_search_notif(db.notifs, db.n, notifid)) != NULL) { + notif_invalidate(notif); + if (store) + if (db_save_notifs(t) != 0) goto quit; + } + retval = 0; +quit: + pthread_mutex_unlock(&db.lock); + return retval; +} + + +int load_notifs_file() { + FILE *f; + char *results[5]; + notif_t *new_notif; + int err; + char *notifidstr; + time_t valid, refresh; + double last_update; + edg_wll_NotifId id; + int type; + int retval = 1; + + if ((f = fopen(config.notif_file, "rt")) == NULL) { + lprintf(NULL, WRN, "WARNING: can't open notification file '%s'", config.notif_file); + return 0; + } + + results[0] = malloc(5 * 512); + results[1] = results[0] + 512; + results[2] = results[0] + 1024; + results[3] = results[0] + 1536; + results[4] = results[0] + 2048; + while ((err = fscanf(f, "%511[^\t]\t%511[^\t]\t%511[^\t]\t%511[^\t]\t%511[^\t\r\n]\n", results[0], results[1], results[2], results[3], results[4])) == 5) { + notifidstr = results[0]; + if ((type = rtm_str2notiftype(results[1])) == -1) { + lprintf(NULL, ERR, "unknown notification type '%s' in '%s'", results[1], notifidstr); + continue; + } + + valid = 0; + if (results[2] && strcasecmp(results[2], "-") != 0) { + valid = glite_lbu_StrToTime(results[2]); + } + + refresh = 0; + if (results[3] && strcasecmp(results[3], "-") != 0) { + refresh = glite_lbu_StrToTime(results[2]); + } + + last_update = 0; + if (results[4] && strcasecmp(results[4], "-") != 0) { + last_update = glite_lbu_StrToTimestamp(results[4]); + } + + if ((new_notif = db_add_notif(strdup(notifidstr), type, valid, refresh, last_update, NULL, 0, 0)) == NULL) { + lprintf(NULL, ERR, "can't alloc"); + goto quit; + } + if (edg_wll_NotifIdParse(notifidstr, &id) != 0) { + lprintf(NULL, WRN, "can't parse notification ID '%s'", notifidstr); + notif_free(new_notif); + db.n--; + continue; + } + edg_wll_NotifIdGetServerParts(id, &new_notif->server, &new_notif->port); + edg_wll_NotifIdFree(id); + } + if (err == EOF) retval = 0; + else lprintf(NULL, ERR, "can't parse notification file '%s'", config.notif_file); +quit: + fclose(f); + free(results[0]); + return retval; +} + + +#if defined(WITH_RTM_SQL_STORAGE) && defined(WITH_LBU_DB) +int load_notifs_sql() { + notif_t *new_notif; + int err; + char *notifidstr; + time_t valid, refresh; + double last_update; + edg_wll_NotifId id; + int type; + int retval = 1; + glite_lbu_Statement stmt = NULL; + char *results[5]; + + if (glite_lbu_ExecSQL(db.dbctx, "SELECT notifid, notiftype, valid, refresh, last_update FROM notifs WHERE notifid IS NOT NULL", &stmt) == -1) { + lprintf_dbctx(NULL, ERR, "fetching notification failed"); + goto quit; + } + while ((err = glite_lbu_FetchRow(stmt, 5, NULL, results)) > 0) { + notifidstr = results[0]; + results[0] = NULL; + if ((type = rtm_str2notiftype(results[1])) == -1) { + lprintf(NULL, ERR, "unknown notification type '%s' in '%s'", results[1], notifidstr); + free(results[1]); + free(results[2]); + free(results[3]); + free(results[4]); + continue; + } + free(results[1]); + + valid = 0; + if (results[2] && results[2][0]) { + valid = glite_lbu_DBToTime(db.dbctx, results[2]); + } + free(results[2]); + + refresh = 0; + if (results[3] && results[3][0]) { + refresh = glite_lbu_DBToTime(db.dbctx, results[3]); + } + free(results[3]); + + last_update = 0; + if (results[4] && results[4][0]) { + last_update = glite_lbu_DBToTimestamp(db.dbctx, results[4]); + } + free(results[4]); + + if ((new_notif = db_add_notif(notifidstr, type, valid, refresh, last_update, NULL, 0, 0)) == NULL) { + free(notifidstr); + lprintf(NULL, ERR, "can't alloc"); + goto quit; + } + if (edg_wll_NotifIdParse(notifidstr, &id) != 0) { + lprintf(NULL, WRN, "can't parse notification IDs '%s'", notifidstr); + notif_free(new_notif); + db.n--; + continue; + } + edg_wll_NotifIdGetServerParts(id, &new_notif->server, &new_notif->port); + edg_wll_NotifIdFree(id); + } + if (err == 0) retval = 0; + else lprintf_dbctx(NULL, ERR, "fetching failed"); +quit: + if (stmt) glite_lbu_FreeStmt(&stmt); + return retval; +} +#endif + + +int load_notifs() { + int ret; + + pthread_mutex_lock(&db.lock); + +#if defined(WITH_RTM_SQL_STORAGE) && defined(WITH_LBU_DB) + if (!db.dbctx) ret = load_notifs_file(); + else ret = load_notifs_sql(); +#else + ret = load_notifs_file(); +#endif + + pthread_mutex_unlock(&db.lock); + + return ret; +} + + +void db_free_notifs() { + int i; + + for (i = 0; i < db.n; i++) notif_free(db.notifs + i); + free(db.notifs); + db.notifs = NULL; + db.n = db.maxn = 0; +} + + +void *notify_thread(void *thread_data) { + struct sockaddr_in addr; + int i, j, err; + time_t now, bootstrap; + edg_wll_NotifId notifid; + struct timeval to; + edg_wll_JobStat jobstat, *jobstates; + notif_t *notif, *notif_jdl; + edg_wll_QueryRec *conditions[3] = { NULL, NULL, NULL }, condition[2], condition2[2]; + int sock = -1, updated = 0, received = 0; + thread_t *t = (thread_t *)thread_data; + edg_wll_Context ctx = NULL; + int flags = 0; + + const int one = 1; + + lprintf(t, DBG, "thread started"); + + if (!t->nservers) goto exit; + + // LB + if (edg_wll_InitContext(&ctx) != 0) { + lprintf(t, ERR, "can't init LB context: %s", strerror(errno)); + goto exit; + } + if (config.cert) edg_wll_SetParam(ctx, EDG_WLL_PARAM_X509_CERT, config.cert); + if (config.key) edg_wll_SetParam(ctx, EDG_WLL_PARAM_X509_KEY, config.key); + + // socket + if ((sock = socket(PF_INET, SOCK_STREAM, 0)) < 0) { + lprintf(t, ERR, "can't create socket: %s", strerror(errno)); + goto exit; + } + lprintf(t, DBG, "socket created: %d", sock); + + setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); + + memset(&addr, 0, sizeof addr); + addr.sin_family = AF_INET; + if (listen_port) addr.sin_port = htons(listen_port + t->id); + addr.sin_addr.s_addr = INADDR_ANY; + if (bind(sock, (const struct sockaddr*)&addr, sizeof addr) != 0) { + lprintf(t, ERR, "can't bind socket: %s, port = %d", strerror(errno), listen_port ? listen_port + t->id : -1); + goto exit; + } + if (listen(sock, 10) != 0) { + lprintf(t, ERR, "can't listen on socket: %s", strerror(errno)); + goto exit; + } + +#ifdef WITH_LBU_DB + if (db_init(t, &t->dbctx) == 0) + if ((t->dbcaps & GLITE_LBU_DB_CAP_PREPARED) != 0) { + if (glite_lbu_PrepareStmt(t->dbctx, "INSERT INTO " DBAMP RTM_DB_TABLE_JOBS DBAMP " " + "(ce, queue, rb, ui, state, state_entered, rtm_timestamp, jobid, lb, active, state_changed, registered, vo)" + " VALUES " + "($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)", + &t->insertcmd) != 0 || glite_lbu_PrepareStmt(t->dbctx, "UPDATE " DBAMP RTM_DB_TABLE_JOBS DBAMP " " + "SET ce=$1, queue=$2, rb=$3, ui=$4, state=$5, state_entered=$6, rtm_timestamp=$7, active=$8, state_changed=$9, registered=$10 WHERE jobid=$11 AND lb=$12", + &t->updatecmd) != 0 || glite_lbu_PrepareStmt(t->dbctx, "UPDATE " DBAMP RTM_DB_TABLE_JOBS DBAMP " " + "SET ce=$1, queue=$2, rb=$3, ui=$4, state=$5, state_entered=$6, rtm_timestamp=$7, active=$8, state_changed=$9, registered=$10, vo=$11 WHERE jobid=$12 AND lb=$13", + &t->updatecmd_vo) != 0) { + lprintf_dbctx(t, ERR, "can't create prepare commands"); + lprintf(t, DBG, "insertcmd=%p, updatecmd=%p, updatecmd_vo=%p", t->insertcmd, t->updatecmd, t->updatecmd_vo); + quit = RTM_QUIT_PRESERVE; + } + } +#endif + + // + // notifications loop: + // - refresh/create with bootstrap + // - receive & store changes + // + while (!quit) { + now = time(NULL); + t->first_refresh = now + RTM_NOTIF_LOOP_MAX_TIME; + for (i = 0; i < t->nservers; i++) { + notif = t->notifs + i; + if (!notif->active) { + lprintf(t, INS, "inactive %d. notification '%s' (%s)", i, notif->id_str, rtm_notiftype2str(notif->type)); + continue; + } + // skip invalid LBs if not planned yet + if (notif->error) { + if (notif->refresh > now) { + lprintf(t, INS, "not planned to retry previously failed %d. notification '%s' (%s), plan %s", i, notif->id_str, rtm_notiftype2str(notif->type), time2str(t, notif->refresh)); + if (t->first_refresh > notif->refresh) t->first_refresh = notif->refresh; + continue; + } + lprintf(t, DBG, "retry previously failed %d. notification '%s' (%s)", i, notif->id_str, rtm_notiftype2str(notif->type)); + } + edg_wll_SetParam(ctx, EDG_WLL_PARAM_NOTIF_SERVER, notif->server); + edg_wll_SetParam(ctx, EDG_WLL_PARAM_NOTIF_SERVER_PORT, notif->port); + now = time(NULL); + if (!notif->valid || notif->valid - RTM_NOTIF_TTL_TO_DEAD <= now || !notif->id_str) { + // new notification + lprintf(t, DBG, "host %s:%d, valid %s, notifstr '%s', notifid %p", notif->server, notif->port, time2str(t, notif->valid), notif->id_str, notif->id); + + // crazy inter-notif interactions + switch (notif->type) { + case RTM_NOTIF_TYPE_STATUS: + // STATUS must wait for existing JDL notification + notif_jdl = db_search_notif_by_server(t->notifs, t->nservers, notif->server, notif->port, RTM_NOTIF_TYPE_JDL); + if (!notif_jdl || !notif_jdl->valid || notif_jdl->valid - RTM_NOTIF_TTL_TO_DEAD <= now || !notif_jdl->id_str) { + lprintf(t, DBG, "not created %d. notification for %s:%d (%s), waiting for %d. (JDL)", i, notif->server, notif->port, rtm_notiftype2str(notif->type), i + RTM_NOTIF_TYPE_JDL - RTM_NOTIF_TYPE_STATUS); + // next retry of STATUS stright before the JDL + if (notif_jdl) { + notif->refresh = notif_jdl->refresh; + if (t->first_refresh > notif->refresh) t->first_refresh = notif->refresh; + } + continue; + } + break; + default: + break; + } + bootstrap = notif->valid > RTM_NOTIF_TTL_TO_DEAD ? notif->valid - RTM_NOTIF_TTL_TO_DEAD : 0; + if (config.dive > 0 && now - bootstrap > config.dive) { + bootstrap = now - config.dive; + lprintf(t, INS, "dive from %s:%d cut to %s (max. dive %d)", notif->server, notif->port, time2str(t, bootstrap), config.dive); + } + // explicitly drop old (failed) notification, if any + if (notif->id_str) { + if (notif->id) { + if (edg_wll_NotifDrop(ctx, notif->id)) lprintf_ctx(t, WRN, ctx, "dropping %d. notification '%s' (%s) failed", i, notif->id_str, rtm_notiftype2str(notif->type)); + } + // remove from the persistent storage now, + // invalidate && update + rtm_drop_notif(t, notif->id_str, 1); + // free the notification in the current thread + notif_invalidate(notif); + now = time(NULL); + } + // create the new notification + notif->valid = now + config.ttl; + + memset(conditions, 0, sizeof(conditions)); + memset(condition, 0, sizeof(condition)); + memset(condition2, 0, sizeof(condition2)); + flags = 0; + switch(notif->type) { +#ifndef WITH_OLD_LB + case RTM_NOTIF_TYPE_STATUS: + conditions[0] = condition; + condition[0].attr = EDG_WLL_QUERY_ATTR_STATUS; + condition[0].op = EDG_WLL_QUERY_OP_CHANGED; + break; + case RTM_NOTIF_TYPE_JDL: + conditions[0] = condition; + conditions[1] = condition2; + condition[0].attr = EDG_WLL_QUERY_ATTR_STATUS; + condition[0].op = EDG_WLL_QUERY_OP_EQUAL; + condition[0].value.i = EDG_WLL_JOB_WAITING; + condition2[0].attr = EDG_WLL_QUERY_ATTR_JDL_ATTR; + condition2[0].op = EDG_WLL_QUERY_OP_CHANGED; + flags = EDG_WLL_STAT_CLASSADS; + break; +#endif + case RTM_NOTIF_TYPE_OLD: + flags = EDG_WLL_STAT_CLASSADS; + break; + case RTM_NOTIF_TYPE_DONE: + conditions[0] = condition; + condition[0].attr = EDG_WLL_QUERY_ATTR_STATUS; + condition[0].op = EDG_WLL_QUERY_OP_EQUAL; + condition[0].value.i = EDG_WLL_JOB_DONE; + flags = EDG_WLL_STAT_CHILDREN; + break; + default: + assert(notif->type != notif->type); // unknown type + break; + } + if (edg_wll_NotifNew(ctx, (edg_wll_QueryRec const * const *) conditions, flags, sock, config.local_address, ¬if->id, ¬if->valid)) { + memset(condition,0,sizeof condition); + lprintf_ctx(t, ERR, ctx, "can't create notification on %s:%d", notif->server, notif->port); + notif->valid = 0; + notif->id = NULL; + rtm_update_error_state(t, notif, i, 1); + if (t->first_refresh > notif->refresh) t->first_refresh = notif->refresh; + continue; + } + notif->id_str = edg_wll_NotifIdUnparse(notif->id); + lprintf(t, INF, "created %d. notification '%s' (%s), valid: %s", i, notif->id_str, rtm_notiftype2str(notif->type), time2str(t, notif->valid)); + + // bootstrap + memset(condition, 0, sizeof(condition)); + flags = 0; + switch (notif->type) { + case RTM_NOTIF_TYPE_STATUS: + condition[0].attr = EDG_WLL_QUERY_ATTR_LASTUPDATETIME; + condition[0].op = EDG_WLL_QUERY_OP_WITHIN; + condition[0].value.t.tv_sec = bootstrap; + condition[0].value2.t.tv_sec = now; + flags = EDG_WLL_STAT_CLASSADS; + break; + case RTM_NOTIF_TYPE_OLD: + break; + case RTM_NOTIF_TYPE_JDL: + break; + case RTM_NOTIF_TYPE_DONE: + break; + default: + assert(notif->type != notif->type); // unknown type + break; + } + + if (condition[0].attr) { + + lprintf(t, INF, "bootstrap %s:%d (%d), time %s..%d(now)", notif->server, notif->port, i, time2str(t, bootstrap), now); + edg_wll_SetParam(ctx, EDG_WLL_PARAM_QUERY_SERVER, notif->server); + edg_wll_SetParam(ctx, EDG_WLL_PARAM_QUERY_SERVER_PORT, notif->port); + if ((err = edg_wll_QueryJobs(ctx, condition, flags, NULL, &jobstates)) != 0 && err != ENOENT) { + lprintf_ctx(t, ERR, ctx, "can't bootstrap jobs on %s:%d, time %s..%d(now)", notif->server, notif->port, time2str(t, bootstrap), now); + // + // destroy the notification after failed bootstrap + // + // This error means there is something nasty on the remote LB server. + // It could lost some messages between recreating notification, + // so destroy this notification now. + // + if (edg_wll_NotifDrop(ctx, notif->id)) { + lprintf_ctx(t, WRN, ctx, "dropping %d. notification '%s' (%s) after failed bootstrap failed", i, notif->id_str, rtm_notiftype2str(notif->type)); + } else { + lprintf(t, INF, "dropped %d. notification '%s' (%s) after failed bootstrap", i, notif->id_str, rtm_notiftype2str(notif->type)); + } + // free the notification instance in the current thread + // (not propagated to the persistent storage yet) + edg_wll_NotifIdFree(notif->id); + notif->id = NULL; + free(notif->id_str); + notif->id_str = NULL; + notif->valid = 0; + rtm_update_error_state(t, notif, i, 1); + if (t->first_refresh > notif->refresh) t->first_refresh = notif->refresh; + continue; + } else { + for (j = 0; jobstates[j].state != EDG_WLL_JOB_UNDEF; j++) { + notif->last_update = jobstates[j].lastUpdateTime.tv_sec + jobstates[j].lastUpdateTime.tv_usec / 1000000.0; + db_store_change(t, notif, i, jobstates + j); + edg_wll_FreeStatus(jobstates + j); + } + free(jobstates); + lprintf(t, INF, "bootstrap %s:%d (%d), found %d jobs", notif->server, notif->port, i, j); + rtm_update_error_state(t, notif, i, 0); + updated = 1; + } + + } else { + rtm_update_error_state(t, notif, i, 0); + updated = 1; + } + } else if (!notif->id) { + // rebind existing still valid notification + if (edg_wll_NotifIdParse(notif->id_str, ¬if->id)) { + lprintf_ctx(t, WRN, ctx, "can't parse %d. notification '%s' (%s)", i, notif->id_str, rtm_notiftype2str(notif->type)); + notif->valid = 0; + notif->id = NULL; + i--; + continue; + } + notif->valid = now + config.ttl; + if (edg_wll_NotifBind(ctx, notif->id, sock, config.local_address, ¬if->valid)) { + lprintf_ctx(t, WRN, ctx, "can't rebind %d. notification '%s' (%s)", i, notif->id_str, rtm_notiftype2str(notif->type)); + notif->valid = 0; + edg_wll_NotifIdFree(notif->id); + notif->id = NULL; + i--; + continue; + } + lprintf(t, INF, "bound %d. notification '%s' (%s), valid: %s", i, notif->id_str, rtm_notiftype2str(notif->type), time2str(t, notif->valid)); + rtm_update_error_state(t, notif, i, 0); + // no bootstrap here, reliable delivery will send changes + updated = 1; + } else if (!notif->refresh || notif->refresh <= now) { + // refresh notification + time_t valid; + + valid = now + config.ttl; + if (edg_wll_NotifRefresh(ctx, notif->id, &valid)) { + lprintf_ctx(t, WRN, ctx, "can't refresh %d. notification '%s' (%s), will try up to %s...", i, notif->id_str, rtm_notiftype2str(notif->type), time2str(t, notif->valid - RTM_NOTIF_TTL_TO_DEAD)); + // refresh failed, just move the refresh time... + updated = 1; + } else { + notif->valid = valid; + lprintf(t, INF, "refreshed %d. notification '%s' (%s), valid: %s", i, notif->id_str, rtm_notiftype2str(notif->type), time2str(t, notif->valid)); + rtm_update_error_state(t, notif, i, 0); + updated = 1; + } + } else { + lprintf(t, INS, "no change in %d. notification '%s' (%s)", i, notif->id_str, rtm_notiftype2str(notif->type)); + } + + if (updated) { + assert(notif->valid); + // create or refresh OK, bootstrap if needed OK, store the new notification + updated = 0; + notif->refresh = notif->valid ? (now + ((notif->valid - now) >> 1)) : 0; + // quicker refresh (or recreate) if needed + now = time(NULL); + if (notif->valid && now >= notif->refresh) { + lprintf(t, WRN, "operation not in time, refreshing/recreating the notification '%s' (%s) now", notif->id_str, rtm_notiftype2str(notif->type)); + i--; + continue; + } + rtm_update_notif(t, notif, 1); + } + + // compute time of the next event from the new refresh on notification + if (t->first_refresh > notif->refresh) t->first_refresh = notif->refresh; + } + + // receive + // + // cycle here locally around NotifReceive, we know about next + // refresh time + // + lprintf(t, DBG, "waiting for the notifications up to %s...", t->first_refresh ? time2str(t, t->first_refresh) : "0 (no wait)"); + while (t->first_refresh > now && !quit) { + to.tv_sec = t->first_refresh - now; + if (to.tv_sec > RTM_NOTIF_READ_TIMEOUT) to.tv_sec = RTM_NOTIF_READ_TIMEOUT; + to.tv_usec = 0; + memset(&jobstat, 0, sizeof(jobstat)); + notifid = NULL; + err = edg_wll_NotifReceive(ctx, sock, &to, &jobstat, ¬ifid); + lprintf(t, INS, "received, err=%d%s", err, err == ETIMEDOUT ? " (timeout)":""); + if (err != 0) { + if (err != ETIMEDOUT) { + lprintf_ctx(t, ERR, ctx, "can't receive notifications"); + // don't cycle too quick... + sleep(1); + } + // lazily refresh persistent storage here, only after timeouts + if (received) { + lprintf(t, DBG, "storing notification times"); + rtm_update_notif(t, NULL, 1); + received = 0; + } + } else { + char *jobidstr, *notifidstr; + double last_update; + + if (notifid) { + jobidstr = jobstat.jobId ? glite_jobid_unparse(jobstat.jobId) : NULL; + notifidstr = notifid ? edg_wll_NotifIdUnparse(notifid) : NULL; + for (i = 0; i < t->nservers && (!t->notifs[i].id_str || strcmp(notifidstr, t->notifs[i].id_str) != 0); i++); + if (i == t->nservers) { + lprintf(t, ERR, "received notify '%s' not found", notifidstr); + } else { + received = 1; + notif = t->notifs + i; + // + // last changed time from the arrived notification + // + last_update = jobstat.lastUpdateTime.tv_sec + jobstat.lastUpdateTime.tv_usec / 1000000.0; + if (last_update > notif->last_update) notif->last_update = last_update; + db_store_change(t, notif, i, &jobstat); + rtm_update_notif(t, notif, 0); + } + free(jobidstr); + free(notifidstr); + } + } + if (jobstat.state != EDG_WLL_JOB_UNDEF) edg_wll_FreeStatus(&jobstat); + if (notifid) edg_wll_NotifIdFree(notifid); + + now = time(NULL); + } // receive + } // main loop + +exit: + if (sock != -1) close(sock); +// for (i = 0; conditions[i]; i++) free(conditions[i]); + if (t->nservers && quit != RTM_QUIT_PRESERVE && quit != RTM_QUIT_RELOAD) { + for (i = 0; i < t->nservers; i++) { + if (t->notifs[i].id) { + char *notifidstr; + + notifidstr = edg_wll_NotifIdUnparse(t->notifs[i].id); + edg_wll_SetParam(ctx, EDG_WLL_PARAM_NOTIF_SERVER, t->notifs[i].server); + edg_wll_SetParam(ctx, EDG_WLL_PARAM_NOTIF_SERVER_PORT, t->notifs[i].port); + if (edg_wll_NotifDrop(ctx, t->notifs[i].id)) { + lprintf_ctx(t, WRN, ctx, "can't drop %s (%s)", notifidstr, rtm_notiftype2str(t->notifs[i].type)); + } else { + lprintf(t, INF, "notification %s (%s) dropped", notifidstr, rtm_notiftype2str(t->notifs[i].type)); + } + rtm_drop_notif(t, t->notifs[i].id_str, 0); + free(notifidstr); + } + } + rtm_update_notif(t, NULL, 1); + } +#ifdef WITH_LBU_DB + if (t->insertcmd) glite_lbu_FreeStmt(&t->insertcmd); + if (t->updatecmd) glite_lbu_FreeStmt(&t->updatecmd); + if (t->updatecmd_vo) glite_lbu_FreeStmt(&t->updatecmd_vo); + db_free(t, t->dbctx); +#endif + if (ctx) edg_wll_FreeContext(ctx); + lprintf(t, DBG, "thread ended"); + pthread_exit(NULL); + return NULL; +} + + +int reconcile_config_db() { + int i, j, n, type, typestart, typeend; + notif_t *a, *b; + edg_wll_Context ctx = NULL; + edg_wll_NotifId notifid; + + if (!config.cleanup) { + if (config.silly) { + typestart = RTM_NOTIF_TYPE_OLD; + typeend = RTM_NOTIF_TYPE_OLD; + } else { + typestart = RTM_NOTIF_TYPE_STATUS; + typeend = RTM_NOTIF_TYPE_JDL; + } + n = db.n; + for (i = 0; i < config.nservers; i++) { + a = config.notifs + i; + for (type = typestart; type <= typeend; type++) + { + b = db_search_notif_by_server(db.notifs, n, a->server, a->port, type); + if (!b) b = db_add_notif(NULL, type, 0, 0, 0, strdup(a->server), a->port, 1); + else lprintf(NULL, INF, "found previous notification '%s' (%s)", b->id_str, rtm_notiftype2str(b->type)); + b->active = 1; + } + } + } + + if (edg_wll_InitContext(&ctx) != 0) { + lprintf(NULL, ERR, "can't init LB context: %s", strerror(errno)); + return 1; + } + if (config.cert) edg_wll_SetParam(ctx, EDG_WLL_PARAM_X509_CERT, config.cert); + if (config.key) edg_wll_SetParam(ctx, EDG_WLL_PARAM_X509_KEY, config.key); + for (j = 0; j < db.n; j++) { + if (!db.notifs[j].active) { + if (db.notifs[j].id_str) { + lprintf(NULL, INF, "dropping previous notification '%s' (%s)", db.notifs[j].id_str, rtm_notiftype2str(db.notifs[j].type)); + if (edg_wll_NotifIdParse(db.notifs[j].id_str, ¬ifid)) { + lprintf(NULL, WRN, "can't parse notification ID '%s'", db.notifs[j].id_str); + continue; + } + edg_wll_SetParam(ctx, EDG_WLL_PARAM_NOTIF_SERVER, db.notifs[j].server); + edg_wll_SetParam(ctx, EDG_WLL_PARAM_NOTIF_SERVER_PORT, db.notifs[j].port); + if (edg_wll_NotifDrop(ctx, notifid) != 0) { + lprintf_ctx(NULL, WRN, ctx, "can't drop %s (%s)", db.notifs[j].id_str, rtm_notiftype2str(db.notifs[j].type)); + } + edg_wll_NotifIdFree(notifid); + notif_invalidate(db.notifs + j); + } + } + } + edg_wll_FreeContext(ctx); + + return db_save_notifs(NULL); +} + + +void usage(const char *prog) { + fprintf(stderr, "Usage: %s [options]\n" + " -h, --help display this help\n" + " -v, --version display version\n" + " -d, --debug LEVEL debug level (0=error,1=warn,2=info,3=debug,4=insane,\n" + " +8=not fork)\n" + " -D, --daemonize daemonize\n" + " -i, --pidfile the file with process ID\n" + " -s, --threads N number of slave threads\n" + " -t, --ttl TIME time to live (validity) of the notifications\n" + " in seconds (%d)\n" + " -H, --history historic dive in seconds (<=0 is unlimited)\n" + " -c, --config config file name (list of LB servers), precedence before " RTM_DB_TABLE_LBS " table\n" + " -n, --notifs file for persistent information about active\n" + " notifications\n" +#ifdef WITH_LBU_DB + " -m, --pg db connection string (user/pwd@server:dbname)\n" +#endif + " -C, --cert X509 certificate file\n" + " -K, --key X509 key file\n" + " -o, --old \"silly\" mode for old L&B 3.1 servers\n" + " -l, --cleanup clean up the notifications and exit\n" + " -w, --wlcg enable messaging for dashboard\n" + " --wlcg-binary full path to msg-publish binary\n" + " --wlcg-topic topic for msg-publish\n" + " --wlcg-config config file for msg-publish\n" + " --wlcg-flush send message on each notification\n" + , prog, RTM_NOTIF_TTL); + fprintf(stderr, "\n"); + fprintf(stderr, "List of L&B servers: first it's read the config file if specified (-c option). When config file is not used and connection to database is specified, it's tried DB table " RTM_DB_TABLE_LBS ".\n"); + fprintf(stderr, "\n"); +} + + +int config_preload(int argn, char *argv[]) { + int opt, intval, index; + char *err, *s; + + while ((opt = getopt_long(argn, argv, opts_line, opts, &index)) != EOF) { + switch (opt) { + case 'h': + case '?': + usage(argv[0]); + return 1; + case 'v': + fprintf(stderr, "%s: %s\n", argv[0], rcsid); + return 1; + case 'd': + intval = strtol(optarg, &err, 10); + if (err && err[0]) { + lprintf(NULL, ERR, "debug level number required"); + return 2; + } + config.debug = (intval & DEBUG_LEVEL_MASK); + config.guard = !(intval & DEBUG_GUARD_MASK); + break; + case 'D': + config.daemonize = 1; + break; + case 'i': + config.pidfile = strdup(optarg); + break; + case 's': + intval = strtol(optarg, &err, 10); + if (err && err[0]) { + lprintf(NULL, ERR, "number of threads required"); + return 2; + } + config.nthreads = intval; + break; + case 't': + intval = strtol(optarg, &err, 10); + if (err && err[0]) { + lprintf(NULL, ERR, "requested validity in seconds required"); + return 2; + } + config.ttl = intval; + break; + case 'H': + intval = strtol(optarg, &err, 10); + if (err && err[0]) { + lprintf(NULL, ERR, "historic dive in seconds required"); + return 2; + } + config.dive = intval; + break; + case 'c': + free(config.config_file); + config.config_file = strdup(optarg); + break; + case 'n': + free(config.notif_file); + config.notif_file = strdup(optarg); + break; + case 'p': + listen_port = atoi(optarg); + break; + case 'm': + free(config.dbcs); + config.dbcs = strdup(optarg); + break; + case 'C': + free(config.cert); + config.cert = strdup(optarg); + break; + case 'K': + free(config.key); + config.key = strdup(optarg); + break; + case 'l': + config.cleanup = 1; + break; + case 'w': + config.wlcg = 1; + break; + case 'o': + config.silly = 1; + break; + case 0: + switch(index) { + case 0: + config.wlcg_binary = strdup(optarg); + break; + case 1: + config.wlcg_config = strdup(optarg); + break; + case 2: + config.wlcg_topic = strdup(optarg); + break; + case 3: + config.wlcg_flush = 1; + break; + default: + lprintf(NULL, ERR, "crazy option, index %d", index); + break; + } + break; + } + } + if (!config.notif_file) config.notif_file = strdup(RTM_FILE_NOTIFS); + if (config.wlcg) { + if (!config.wlcg_binary) config.wlcg_binary = strdup(WLCG_BINARY); + if (!config.wlcg_config) config.wlcg_config = strdup(WLCG_CONFIG); + if (!config.wlcg_topic) config.wlcg_topic = strdup(WLCG_TOPIC); + } +#ifdef WITH_OLD_LB + if (!config.silly) { + lprintf(NULL, WRN, "compiled with older LB library, switching on silly mode"); + config.silly = 1; + } +#endif + + if ((s = getenv("GLITE_LB_HARVESTER_NO_REMOVE")) != NULL) { + if (s[0] != '0' && strcasecmp(s, "false") != 0) config.wlcg_no_remove = 1; + } + + if (INF <= config.debug) { + lprintf(NULL, INF, "threads: %d", config.nthreads); + lprintf(NULL, INF, "notifs ttl: %d", config.ttl); + lprintf(NULL, INF, "historic dive: %d", config.dive); + if (config.dbcs) { + lprintf(NULL, INF, "database storage: '%s'", config.dbcs); + } else { + lprintf(NULL, INF, "file storage: '%s'", config.notif_file); + } + lprintf(NULL, INF, "WLCG messaging: %s%s", config.wlcg ? "enabled" : "disabled", config.wlcg_no_remove ? " (not removing tmp files)" : ""); + lprintf(NULL, INF, "debug level: %d", config.debug); + lprintf(NULL, INF, "daemonize: %s", config.daemonize ? "enabled" : "disabled"); + lprintf(NULL, INF, "fork guard: %s", config.guard ? "enabled" : "disabled"); + lprintf(NULL, INF, "silly compatibility mode: %s", config.silly ? "enabled" : "disabled"); + } + + return 0; +} + + +int config_load() { + char line[LINE_MAX], *port, *s; + FILE *f; + void *tmp; + int i, n; +#ifdef WITH_LBU_DB + char *results[2]; + char *result = NULL; + glite_lbu_Statement stmt = NULL; + int err = 0; +#endif + + if (config.config_file) { + if ((f = fopen(config.config_file, "rt")) == NULL) { + lprintf(NULL, ERR, "can't open config file '%s': %s", config.config_file, strerror(errno)); + return 1; + } + + n = 10; + while (fgets(line, sizeof(line), f) != NULL) { + if ((s = strpbrk(line, "\n\r")) != NULL) s[0] = '\0'; + if (line[0] == '\0' || line[0] == '#') continue; + if (config.nservers >= n || !config.notifs) { + n = 2 * n; + if ((tmp = (notif_t *)realloc(config.notifs, n * sizeof(notif_t))) == NULL) { + lprintf(NULL, ERR, "insufficient memory"); + return 1; + } + config.notifs = tmp; + memset(config.notifs + config.nservers, 0, (n - config.nservers) * sizeof(notif_t)); + } + if ((port = strrchr(line, ':')) != NULL) { port[0] = '\0'; port++; } + config.notifs[config.nservers].server = strdup(line); + config.notifs[config.nservers++].port = (port && port[0]) ? atoi(port) : GLITE_JOBID_DEFAULT_PORT; + } + + fclose(f); + } else +#ifdef WITH_LBU_DB + if (db.dbctx) { + if ((err = glite_lbu_ExecSQL(db.dbctx, "SELECT COUNT(*) FROM " RTM_DB_TABLE_LBS, &stmt)) < 0 || + (err = glite_lbu_FetchRow(stmt, 1, NULL, &result)) < 0) { + goto err; + } + if (err == 0) { + lprintf(NULL, ERR, "can't count LB servers"); + goto err; + } + n = atoi(result); + free(result); + glite_lbu_FreeStmt(&stmt); + + config.notifs = calloc(n, sizeof(notif_t)); + config.nservers = 0; + if ((err = glite_lbu_ExecSQL(db.dbctx, "SELECT DISTINCT lb, port FROM " RTM_DB_TABLE_LBS, &stmt)) < 0) { + goto err; + } + while (config.nservers < n && (err = glite_lbu_FetchRow(stmt, 2, NULL, results)) > 0) { + config.notifs[config.nservers].server = strdup(results[0]); + config.notifs[config.nservers++].port = atoi(results[1]); + free(results[0]); + free(results[1]); + } + if (err < 0) goto err; + glite_lbu_FreeStmt(&stmt); + } +#endif + + if (INF <= config.debug) { + lprintf(NULL, INF, "servers: %d", config.nservers); + for (i = 0; i < config.nservers; i++) lprintf(NULL, INF, " %s:%d", config.notifs[i].server, config.notifs[i].port); + } + + return 0; +#ifdef WITH_LBU_DB +err: + if (err) lprintf_dbctx(NULL, ERR, "can't get LB servers"); + if (stmt) glite_lbu_FreeStmt(&stmt); + if (result) free(result); +#endif + return 1; +} + + +void config_free() { + int i; + + for (i = 0; i < config.nservers; i++) free(config.notifs[i].server); + free(config.config_file); + free(config.notif_file); + free(config.pidfile); + free(config.dbcs); + free(config.notifs); + free(config.cert); + free(config.key); + free(config.wlcg_binary); + free(config.wlcg_config); + free(config.wlcg_topic); +} + + +// on keyboard cleanup notification, on termination signal break with +// notification preserved +void handle_signal(int num) { + lprintf(NULL, INF, "received signal %d", num); + switch (num) { + case SIGINT: + case SIGTERM: + default: + quit = RTM_QUIT_PRESERVE; + break; + } +} + + +int main(int argn, char *argv[]) { + struct sigaction sa; + sigset_t sset; + int i, j, k, gran, mod, nnotifs; + double t1, t2, last_summary = 0, start_time; + thread_t *t; + struct stat pstat; + pid_t watched; + int status; + edg_wll_Context ctx = NULL; + int retval = RTM_EXIT_ERROR; + int cert_mtime = 0; + + // load basic configurations + switch (config_preload(argn, argv)) { + case 0: + break; + case 1: + retval = RTM_EXIT_OK; + goto quit_guard0; + break; + default: + retval = RTM_EXIT_ERROR; + goto quit_guard0; + } + + // daemonize + if (config.pidfile) { + FILE *f; + char s[256]; + + if ((f = fopen(config.pidfile, "rt"))) { + if (fscanf(f, "%255[^\n\r]", s) == 1) { + if (kill(atoi(s),0)) { + lprintf(NULL, WRN, "stale pidfile, pid = %s, pidfile '%s'", s, config.pidfile); + rewind(f); + } + else { + lprintf(NULL, ERR, "another instance running, pid = %s, pidfile '%s'", s, config.pidfile); + fclose(f); + goto quit_guard0; + } + } else { + lprintf(NULL, ERR, "another instance possibly running, can't read pidfile '%s': %s", config.pidfile, strerror(errno)); + fclose(f); + goto quit_guard0; + } + } else if (errno != ENOENT) { + lprintf(NULL, ERR, "error opening pidfile '%s': %s", config.pidfile, strerror(errno)); + goto quit_guard0; + } + } + if (config.daemonize) { + if (daemon(0, 0) == -1) { + lprintf(NULL, ERR, "can't daemonize: %s", strerror(errno)); + goto quit_guard0; + } + } + + // disable signals to the guardian + sigemptyset(&sset); + sigaddset(&sset, SIGABRT); + sigaddset(&sset, SIGTERM); + sigaddset(&sset, SIGINT); + pthread_sigmask(SIG_BLOCK, &sset, NULL); + + if (!config.guard) { + // not guard + if (config.pidfile) { + FILE *f; + + if ((f = fopen(config.pidfile, "wt")) == NULL) { + lprintf(NULL, ERR, "can't create pidfile '%s': %s", config.pidfile, strerror(errno)); + goto quit_guard0; + } + fprintf(f, "%d", getpid()); + fclose(f); + } + } else + // guard + while ((watched = fork()) != 0) { + if (watched == -1) { + lprintf(NULL, ERR, "fork() failed: %s", strerror(errno)); + goto quit_guard; + } + if (config.pidfile) { + FILE *f; + + if ((f = fopen(config.pidfile, "wt")) == NULL) { + lprintf(NULL, ERR, "can't create pidfile '%s': %s", config.pidfile, strerror(errno)); + goto quit_guard0; + } + fprintf(f, "%d", watched); + fclose(f); + } + if (waitpid(watched, &status, 0) == -1) { + lprintf(NULL, ERR, "waitpid() failed: %s", strerror(errno)); + // orpaned child will restart later anyway, + // better to end the child process just now + kill(watched, SIGTERM); + goto quit_guard; + } + if (WIFSIGNALED(status)) { + switch (WTERMSIG(status)) { + case SIGSEGV: + case SIGILL: + case SIGABRT: +#ifdef SIGBUS + case SIGBUS: +#endif + lprintf(NULL, ERR, "caught signal %d from process %d, resurrecting...", WTERMSIG(status), watched); + // slow down the core generator ;-) + // disabled signals and ended child in pidfile, live with it + sleep(2); + break; + default: + lprintf(NULL, WRN, "ended with signal %d", WTERMSIG(status)); + goto quit_guard; + } + } else if (WIFEXITED(status)) { + retval = WEXITSTATUS(status); + switch(retval) { + case RTM_EXIT_OK: + lprintf(NULL, INF, "exit with status %d, OK", retval); + goto quit_guard; + case RTM_EXIT_RELOAD: + lprintf(NULL, INF, "exit with status %d, reloading", retval); + break; + default: + lprintf(NULL, WRN, "exit with status %d, error", retval); + goto quit_guard; + } + } else { + lprintf(NULL, ERR, "unknown child status"); + goto quit_guard; + } + } + + // threads && Globus + if (edg_wll_gss_initialize()) { + lprintf(NULL, ERR, "can't initialize GSS"); + goto quit_guard; + } + +#ifndef WITH_OLD_LB + // connection pool manually (just for tuning memory leaks) + if (!edg_wll_initConnections()) { + lprintf(NULL, ERR, "can't initialize LB connections"); + goto quit_guard; + } +#endif + +#ifdef WITH_LBU_DB + // database + switch(db_init(NULL, &db.dbctx)) { + case 0: + break; + case -1: + // no db + break; + default: + // error + goto quit; + } +#endif + + // load configurations + if (config_load()) goto quit; + + // load previous notifications + if (load_notifs()) goto quit; + // compare lb servers from configuration and notifications, + // or clean up and exit if specified + if (reconcile_config_db()) goto quit; + if (config.cleanup) { + retval = RTM_EXIT_OK; + goto quit; + } + + // signal handler + sa.sa_handler = handle_signal; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESETHAND; + if (sigaction(SIGABRT, &sa, NULL) == -1 + || sigaction(SIGTERM, &sa, NULL) == -1 + || sigaction(SIGINT, &sa, NULL) == -1) { + lprintf(NULL, ERR, "can't handle signal: %s", strerror(errno)); + goto quit; + } + // enable signals in main + pthread_sigmask(SIG_UNBLOCK, &sset, NULL); + + // distribution LB servers between threads + nnotifs = config.silly ? 1 : 2; + threads = (thread_t *)calloc(config.nthreads, sizeof(thread_t)); + assert(db.n % nnotifs == 0); // each server RTM_NOTIF_TYPE_LAST notification types + gran = (db.n / nnotifs) / config.nthreads, mod = (db.n / nnotifs) % config.nthreads; + i = 0; + j = 0; + do { + assert(j < config.nthreads); + t = threads + j; + t->nservers = nnotifs * ((j < mod) ? gran + 1 : gran); + lprintf(NULL, DBG, "%d thread: %d notifications", j, t->nservers); + if (t->nservers) { + t->notifs = (notif_t *)calloc(t->nservers, sizeof(notif_t)); + for (k = 0; k < t->nservers; k++) { + notif_copy(t->notifs + k, db.notifs + i); + i++; + } + } + j++; + } while (i < db.n); + // launch the threads + for (j = 0; j < config.nthreads; j++) { + t = threads + j; + t->id = j; + if (pthread_create(&threads[j].thread, NULL, notify_thread, t) != 0) { + lprintf(NULL, ERR, "[main] can't create %d. thread: %s\n", j, strerror(errno)); + goto quit; + } + } + + edg_wll_InitContext(&ctx); + if (config.cert) edg_wll_SetParam(ctx, EDG_WLL_PARAM_X509_CERT, config.cert); + if (config.key) edg_wll_SetParam(ctx, EDG_WLL_PARAM_X509_KEY, config.key); + last_summary = 0; + start_time = rtm_gettimeofday(); + while (!quit) { + t1 = rtm_gettimeofday(); + if (t1 - last_summary > RTM_SUMMARY_POLL_TIME) { + last_summary = t1; + rtm_summary(ctx, &db); + } + if (config.guard) { + if (t1 - start_time > RTM_SUICIDE_TIME) { + quit = RTM_QUIT_RELOAD; + lprintf(NULL, INF, "preventive suicide"); + break; + } + if (config.cert) { + if (stat(config.cert, &pstat) == 0) { + if (!cert_mtime) cert_mtime = pstat.st_mtime; + if (cert_mtime < pstat.st_mtime) { + lprintf(NULL, INF, "certificate '%s' changed, reloading", config.cert); + quit = RTM_QUIT_RELOAD; + break; + } + } else { + lprintf(NULL, ERR, "can't check certificate file '%s'", config.cert, strerror(errno)); + } + } + } + t2 = rtm_gettimeofday(); + if (t2 - t1 < RTM_IDLE_POLL_TIME) usleep((RTM_IDLE_POLL_TIME + t1 - t2) * 1000000); + } + retval = quit == RTM_QUIT_RELOAD ? RTM_EXIT_RELOAD : RTM_EXIT_OK; +quit: + // cleanup on error + if (!quit) quit = RTM_QUIT_CLEANUP; + if (threads) { + for (i = 0; i < config.nthreads; i++) { + t = threads + i; + if (t->thread) pthread_join(t->thread, NULL); + for (j = 0; j < t->nservers; j++) notif_free(t->notifs + j); + free(t->notifs); + } + free(threads); + } + + if (config.pidfile && !config.guard) { + if (remove(config.pidfile) == -1) lprintf(NULL, WRN, "can't remove pidfile '%s': %s", config.pidfile, strerror(errno)); + } + +#ifdef WITH_LBU_DB + db_free(NULL, db.dbctx); +#endif + edg_wll_FreeContext(ctx); + db_free_notifs(); + config_free(); +#ifndef WITH_OLD_LB + edg_wll_poolFree(); +#endif + + return retval; + +quit_guard: + if (config.pidfile) { + if (remove(config.pidfile) == -1) lprintf(NULL, WRN, "can't remove pidfile '%s': %s", config.pidfile, strerror(errno)); + } +quit_guard0: + config_free(); + return retval; +} diff --git a/org.glite.lb.logger/Makefile b/org.glite.lb.logger/Makefile index a759033..733ff60 100644 --- a/org.glite.lb.logger/Makefile +++ b/org.glite.lb.logger/Makefile @@ -1,15 +1,12 @@ # defaults -top_srcdir=. +top_srcdir=.. stagedir=. -distdir=. globalprefix=glite lbprefix=lb package=glite-lb-client version=0.0.0 PREFIX=/opt/glite -glite_location=/opt/glite -globus_prefix=/opt/globus nothrflavour=gcc32 thrflavour=gcc32pthr @@ -26,10 +23,6 @@ VERSION=-DVERSION=\"GLite-${version}\" SUFFIXES=.no -GLOBUSINC=-I${globus_prefix}/include/${nothrflavour} - -GLOBUSTHRINC=-I${globus_prefix}/include/${thrflavour} - ifdef LB_STANDALONE LB_STANDALONE_FLAGS:=-DLB_STANDALONE endif @@ -45,8 +38,11 @@ else LOGD:=glite-lb-logd INTERLOGD:=glite-lb-interlogd NOTIF_INTERLOGD:=glite-lb-notif-interlogd + WS_INTERLOGD:=glite-lb-ws-interlogd endif +GLOBUSTHRINC=-I${globus_prefix}/include/${thrflavour} + DEBUG:=-g -O0 CFLAGS:=${DEBUG} \ -I${stagedir}/include -I${top_srcdir}/src \ @@ -60,14 +56,6 @@ LINK:=libtool --mode=link ${CC} ${LDFLAGS} LINKXX:=libtool --mode=link ${CXX} -rpath ${stagedir}/lib ${LDFLAGS} INSTALL:=libtool --mode=install install -GLOBUS_LIBS:= -L${globus_prefix}/lib \ - -lglobus_common_${nothrflavour} \ - -lglobus_gssapi_gsi_${nothrflavour} - -GLOBUS_THRLIBS:= -L${globus_prefix}/lib \ - -lglobus_common_${thrflavour} \ - -lglobus_gssapi_gsi_${thrflavour} - #ifneq (${expat_prefix},/usr) # EXPAT_LIBS:=-L${expat_prefix}/lib #endif @@ -77,6 +65,8 @@ GLOBUS_THRLIBS:= -L${globus_prefix}/lib \ COMMON_LIB:=-lglite_lb_common +TRIO_LIB:=-lglite_lbu_trio + GLITE_GSS_LIB:=-lglite_security_gss TEST_LIBS:=-L${cppunit_prefix}/lib -lcppunit @@ -91,7 +81,13 @@ INTERLOG_OBJS:=il_error.o input_queue_socket.o \ event_queue.o event_store.o il_master.o interlogd.o \ queue_mgr.o server_msg.o queue_thread.o +WS_INTERLOG_OBJS:=il_error.o input_queue_socket_http.o \ + recover.o http.o send_event_http.o \ + event_queue.o event_store_http.o il_master.o interlogd.o \ + queue_mgr_http.o server_msg_http.o queue_thread.o + INTERLOG_NOBJS:=${INTERLOG_OBJS:.o=.no} +INTERLOG_WOBJS:=${WS_INTERLOG_OBJS:.o=.wo} INTERLOG_PERF_OBJS:=${INTERLOG_OBJS:.o=.perf.o} INTERLOG_EMPTY_OBJS:=${INTERLOG_OBJS:.o=.empty.o} #INTERLOG_INLINE_EMPTY_OBJS:=${INTERLOG_OBJS:.o=.io} @@ -118,13 +114,13 @@ MAN = $(MAN_GZ:.gz=) default: all -all compile: $(LOGD) $(INTERLOGD) ${MAN_GZ} +all compile: $(NOTIF_INTERLOGD) glite-lb-logd: ${LOGD_OBJS} - ${LINK} -o $@ ${LOGD_OBJS} ${COMMON_LIB}_${nothrflavour} + ${LINK} -o $@ ${LOGD_OBJS} ${COMMON_LIB}_${nothrflavour} ${TRIO_LIB} glite-lb-logd-perf: ${LOGD_OBJS} - ${LINK} -o $@ ${LOGD_OBJS} ${COMMON_LIB}_${nothrflavour} + ${LINK} -o $@ ${LOGD_OBJS} ${COMMON_LIB}_${nothrflavour} ${TRIO_LIB} glite-lb-logd-nofile: ${LOGD_NOBJS} ${LINK} -o $@ ${LOGD_NOBJS} ${COMMON_LIB}_${nothrflavour} @@ -144,25 +140,29 @@ glite-lb-interlogd-perf: ${INTERLOG_PERF_OBJS} glite-lb-interlogd-perf-empty: ${INTERLOG_EMPTY_OBJS} ${LINK} -o $@ ${INTERLOG_EMPTY_OBJS} ${COMMON_LIB}_${thrflavour} +glite-lb-ws-interlogd: ${INTERLOG_WOBJS} + ${LINK} -o $@ ${INTERLOG_WOBJS} ${COMMON_LIB}_${thrflavour} + #glite-lb-interlogd-perf-inline-empty: ${INTERLOG_INLINE_EMPTY_OBJS} # ${LINK} -o $@ ${INTERLOG_INLINE_EMPTY_OBJS} \ # ${COMMON_LIB}_${thrflavour} ${MAN_GZ}: ${MAN} + rm -f ${MAN_GZ} ${MAN} cp $? . - gzip $(notdir $?) + gzip -f $(notdir $?) man: ${MAN_GZ} stage: compile - $(MAKE) install PREFIX=${stagedir} DOSTAGE=yes + $(MAKE) install PREFIX=${stagedir} check: # do nothing until test/ is really added to CVS # check.ll check.il #check.ll: logd_proto_test.o ll_test.o -# ${LINKXX} -o $@ ${COMMON_LIB}_${nothrflavour} ${EXT_LIBS} ${GLOBUS_LIBS} ${TEST_LIBS} $+ +# ${LINKXX} -o $@ ${COMMON_LIB}_${nothrflavour} ${EXT_LIBS} ${TEST_LIBS} $+ # ./check.ll check.ll: @@ -171,66 +171,48 @@ check.ll: check.il: ${INTERLOG_TEST_OBJS} ${LINKXX} -o $@ ${COMMON_LIB}_${thrflavour} ${GLITE_GSS_LIB}_${nothrflavour} ${TEST_LIBS} -lpthread $+ -dist: distsrc distbin - -distsrc: - mkdir -p ${top_srcdir}/${package}-${version} - cd ${top_srcdir} && GLOBIGNORE="${package}-${version}" && cp -Rf * ${package}-${version} - cd ${top_srcdir} && tar -czf ${distdir}/${package}-${version}_src.tar.gz --exclude-from=project/tar_exclude ${package}-${version} - rm -rf ${top_srcdir}/${package}-${version} - -distbin: - $(MAKE) install PREFIX=`pwd`/tmpbuilddir${stagedir} - save_dir=`pwd`; cd tmpbuilddir${stagedir} && tar -czf $$save_dir/${top_srcdir}/${distdir}/${package}-${version}_bin.tar.gz *; cd $$save_dir - rm -rf tmpbuilddir - install: -mkdir -p ${PREFIX}/bin -mkdir -p ${PREFIX}/etc/init.d -mkdir -p ${PREFIX}/share/doc/${package}-${version} -mkdir -p ${PREFIX}/share/man/man8 - ${INSTALL} -m 755 ${LOGD} ${PREFIX}/bin - ${INSTALL} -m 755 ${INTERLOGD} ${PREFIX}/bin -# if [ x${DOSTAGE} = xyes ]; then \ -# ${INSTALL} -m 755 ${NOTIF_INTERLOGD} ${PREFIX}/bin; \ -# fi -ifdef LB_PERF - ${INSTALL} -m 755 ${top_srcdir}/src/perftest_ll.sh ${PREFIX}/sbin - ${INSTALL} -m 755 ${top_srcdir}/src/perftest_il.sh ${PREFIX}/sbin -endif - ${INSTALL} -m 755 ${top_srcdir}/config/startup ${PREFIX}/etc/init.d/glite-lb-locallogger - ${INSTALL} -m 644 ${top_srcdir}/LICENSE ${PREFIX}/share/doc/${package}-${version} - ${INSTALL} -m 644 ${MAN_GZ} ${PREFIX}/share/man/man8 +# ${INSTALL} -m 755 ${LOGD} ${PREFIX}/bin +# ${INSTALL} -m 755 ${INTERLOGD} ${PREFIX}/bin + ${INSTALL} -m 755 ${NOTIF_INTERLOGD} ${PREFIX}/bin ${INTERLOG_NOBJS}: %.no: %.c ${CC} ${CFLAGS} ${GLOBUSTHRINC} -DIL_NOTIFICATIONS -c $< -o $@ ${INTERLOG_OBJS}: %.o: %.c - ${CC} ${CFLAGS} ${GLOBUSTHRINC} -c $< -o $@ + ${CC} ${CFLAGS} -c $< -o $@ + +${INTERLOG_WOBJS}: %.wo: %.c + ${CC} ${CFLAGS} -DIL_WS -c $< -o $@ ${INTERLOG_EMPTY_OBJS}: %.empty.o: %.c - ${CC} ${CFLAGS} ${GLOBUSTHRINC} -DPERF_EMPTY -c $< -o $@ + ${CC} ${CFLAGS} -DPERF_EMPTY -c $< -o $@ ${INTERLOG_PERF_OBJS}: %.perf.o: %.c - ${CC} ${CFLAGS} ${GLOBUSTHRINC} -c $< -o $@ + ${CC} ${CFLAGS} -c $< -o $@ #${INTERLOG_INLINE_EMPTY_OBJS}: %.io: %.c -# ${CC} ${CFLAGS} ${GLOBUSTHRINC} -DLB_PERF -DPERF_EMPTY -DPERF_EVENTS_INLINE -c $< -o $@ +# ${CC} ${CFLAGS} -DLB_PERF -DPERF_EMPTY -DPERF_EVENTS_INLINE -c $< -o $@ ${LOGD_NOBJS}: %.no: %.c - ${CC} ${CFLAGS} ${GLOBUSINC} -DLOGD_NOFILE -c $< -o $@ + ${CC} ${CFLAGS} -DLOGD_NOFILE -c $< -o $@ ${LOGD_OBJS}: %.o: %.c - ${CC} ${CFLAGS} ${GLOBUSINC} -c $< -o $@ + ${CC} ${CFLAGS} -c $< -o $@ logd_proto_test.o: %.o: %.c - ${CC} ${CFLAGS} ${GLOBUSINC} -c $< -o $@ + ${CC} ${CFLAGS} -c $< -o $@ ll_test.o: %.o: %.cpp ${CXX} ${CFLAGS} ${TEST_INC} -c $< -o $@ il_test.o IlTestBase.o server_msgTest.o event_queueTest.o input_queue_socketTest.o event_storeTest.o: %.o: %.cpp - ${CXX} ${CFLAGS} ${GLOBUSTHRINC} ${TEST_INC} -c $< -o $@ + ${CXX} ${CFLAGS} ${TEST_INC} -c $< -o $@ clean: - rm -rf .libs/ *.o *.no ${LOGD} ${INTERLOGD} ${NOTIF_INTERLOGD} {MAN_GZ} + rm -rvf .libs/ *.o *.no ${LOGD} ${INTERLOGD} ${NOTIF_INTERLOGD} ${MAN_GZ} + rm -rvf log.xml project/ rpmbuild/ RPMS/ tgz/ diff --git a/org.glite.lb.logger/configure b/org.glite.lb.logger/configure new file mode 100755 index 0000000..c289773 --- /dev/null +++ b/org.glite.lb.logger/configure @@ -0,0 +1,691 @@ +#!/usr/bin/perl + +# WARNING: Don't edit this file unless it is the master copy in org.glite.lb +# +# For the purpose of standalone builds of lb/jobid/lbjp-common components +# it is copied on tagging + +# $Header$ + +use Getopt::Long; + +my $pwd = `pwd`; chomp $pwd; +my $prefix = $pwd.'/stage'; +my $stagedir; +my $staged; +my $module; +my $thrflavour = 'gcc64dbgpthr'; +my $nothrflavour = 'gcc64dbg'; +my $mode = 'build'; +my $help = 0; +my $listmodules; +my $version; +my $output; +my $lb_tag = ''; +my $lbjp_tag = ''; +my $jp_tag = ''; +my $sec_tag = ''; +my $jobid_tag = ''; + +my @nodes = qw/client server logger utils client-java doc ws-test db jpprimary jpindex jpclient/; +my %enable_nodes; +my %disable_nodes; + +my %extern_prefix = ( + cares => '/opt/c-ares', + classads => '/opt/classads', + cppunit => '/usr', + expat => '/usr', + globus => '/opt/globus', + gsoap => '/usr', + mysql => '/usr', + 'mysql-devel' => '', + voms => '/opt/glite', + gridsite => '/opt/glite', + lcas => '/opt/glite', + ant => '/usr', + jdk => '/usr', + libtar => '/usr', +); + +my %jar = ( + 'commons-codec' => '/usr/share/java/commons-codec-1.3.jar', +); + + +my %glite_prefix; +my %need_externs; +my %need_externs_type; +my %need_jars; +my %extrafull; +my %extranodmod; +my %deps; +my %deps_type; +my %topbuild; + +my %lbmodules = ( + 'lb' => [ qw/client client-java common doc logger server state-machine types utils ws-interface ws-test/], + 'security' => [qw/gss gsoap-plugin/], + 'lbjp-common' => [qw/db maildir server-bones trio jp-interface/], + 'jobid' => [qw/api-c api-cpp api-java/], + 'jp' => [ qw/client doc index primary server-common ws-interface/ ], + ); + + +my @opts = ( + 'prefix=s' => \$prefix, + 'staged=s' => \$staged, + 'module=s' => \$module, + 'thrflavour=s' => \$thrflavour, + 'nothrflavour=s' => \$nothrflavour, + 'mode=s' => \$mode, + 'listmodules=s' => \$listmodules, + 'version=s' => \$version, + 'output=s' => \$output, + 'stage=s' => \$stagedir, + 'lb-tag=s' => \$lb_tag, + 'lbjp-common-tag=s' => \$lbjp_tag, + 'jp-tag=s' => \$jp_tag, + 'security-tag=s' => \$sec_tag, + 'jobid-tag=s' => \$jobid_tag, + 'help' => \$help, +); + +for (@nodes) { + $enable_nodes{$_} = 0; + $disable_nodes{$_} = 0; + + push @opts,"disable-$_",\$disable_nodes{$_}; + push @opts,"enable-$_",\$enable_nodes{$_}; +} + +push @opts,"with-$_=s",\$extern_prefix{$_} for keys %extern_prefix; +push @opts,"with-$_=s",\$jar{$_} for keys %jar; + +my @keeparg = @ARGV; + +GetOptions @opts or die "Errors parsing command line\n"; + +$extern_prefix{'mysql-devel'}=$extern_prefix{mysql} if $extern_prefix{'mysql-devel'} eq ''; + +if ($help) { usage(); exit 0; } + +if ($listmodules) { + my @m = map "org.glite.$listmodules.$_",@{$lbmodules{$listmodules}}; + print "@m\n"; + exit 0; +} + +warn "$0: --version and --output make sense only in --mode=etics\n" + if ($version || $output) && $mode ne 'etics'; + +my $en; +for (keys %enable_nodes) { $en = 1 if $enable_nodes{$_}; } + +my $dis; +for (keys %disable_nodes) { $dis = 1 if $disable_nodes{$_}; } + +die "--enable-* and --disable-* are mutually exclusive\n" + if $en && $dis; + +die "--module cannot be used with --enable-* or --disable-*\n" + if $module && ($en || $dis); + +die "$module: unknown module\n" if $module && ! grep $module,@{$lbmodules{lb}},@{$lbmodules{security}},{$lbmodules{jp}}; + +if ($dis) { + for (@nodes) { + $enable_nodes{$_} = 1 unless $disable_nodes{$_}; + } +} + +if (!$en && !$dis) { $enable_nodes{$_} = 1 for (@nodes) } ; + +for (keys %enable_nodes) { delete $enable_nodes{$_} unless $enable_nodes{$_}; } + +$stagedir = $prefix unless $stagedir; + +if ($mode eq 'build') { + print "Writing config.status\n"; + open CONF,">config.status" or die "config.status: $!\n"; + print CONF "$0 @keeparg\n"; + close CONF; +} + + +my @modules; +my %aux; + +if ($module) { +# push @modules,split(/[,.]+/,$module); + push @modules,$module; +} +else { + @modules = map(($extranodmod{$_} ? $extranodmod{$_} : 'lb.'.$_),(keys %enable_nodes)); + + my $n; + + do { + local $"="\n"; + $n = $#modules; + push @modules,(map @{$deps{$_}},@modules); + + undef %aux; @aux{@modules} = (1) x ($#modules+1); + @modules = keys %aux; + } while ($#modules > $n); +} + +@aux{@modules} = (1) x ($#modules+1); +delete $aux{$_} for (split /,/,$staged); +@modules = keys %aux; + +mode_build() if $mode eq 'build'; +mode_checkout() if $mode eq 'checkout'; +mode_etics($module) if $mode eq 'etics'; + +sub mode_build { + print "\nBuilding modules: @modules\n"; + + my @ext = map @{$need_externs{$_}},@modules; + my @myjars = map @{$need_jars{$_}},@modules; + undef %aux; @aux{@ext} = 1; + @ext = keys %aux; + undef %aux; @aux{@myjars} = (1) x ($#myjars+1); + @myjars = keys %aux; + + print "\nRequired externals:\n"; + print "\t$_: $extern_prefix{$_}\n" for @ext; + print "\t$_: $jar{$_}\n" for @myjars; + print "\nThis is a poor-man configure, it's up to you to have sources and externals there\n\n"; + + mkinc($_) for @modules; + + print "Creating Makefile\n"; + + open MAK,">Makefile" or die "Makefile: $!\n"; + + print MAK "all: @modules\n\nclean:\n"; + + for (@modules) { + my $full = full($_); + my $build = $topbuild{$_} ? '': '/build'; + print MAK "\tcd $full$build && \${MAKE} clean\n" + } + + print MAK "\ndistclean:\n"; + + for (@modules) { + my $full = full($_); + print MAK $topbuild{$_} ? + "\tcd $full$build && \${MAKE} distclean\n" : + "\trm -rf $full$build\n" + } + + print MAK "\n"; + + for (@modules) { + my %ldeps; undef %ldeps; + @ldeps{@{$deps{$_}}} = 1; + for my $x (split /,/,$staged) { delete $ldeps{$x}; } + my @dnames = $module ? () : keys %ldeps; + + my $full = full($_); + my $build = $topbuild{$_} ? '': '/build'; + + print MAK "$_: @dnames\n\tcd $full$build && \${MAKE} && \${MAKE} install\n\n"; + } + + close MAK; +} + +sub mode_checkout() { + for (@modules) { + my $module = $_; + my $tag = ""; + if ($lb_tag){ + for (@{$lbmodules{lb}}){ + if ("lb.".$_ eq $module){ + $tag = '-r '.$lb_tag; + } + } + } + if ($lbjp_tag){ + for (@{$lbmodules{'lbjp-common'}}){ + if ("lbjp-common.".$_ eq $module){ + $tag = '-r '.$lbjp_tag; + } + } + } + if ($jp_tag){ + for (@{$lbmodules{'jp'}}){ + if ("jp.".$_ eq $module){ + $tag = '-r '.$jp_tag; + } + } + } + if ($sec_tag){ + for (@{$lbmodules{security}}){ + if ("security.".$_ eq $module){ + $tag = '-r '.$sec_tag; + } + } + } + if ($jobid_tag){ + for (@{$lbmodules{jobid}}){ + if ("jobid.".$_ eq $module){ + $tag = '-r '.$jobid_tag; + } + } + } + #if (grep {"lb.".$_ eq $module} @{$lbmodules{lb}}){ + # print "found"; + #} + $_ = full($_); + print "\n*** Checking out $_\n"; + system("cvs checkout $tag $_") == 0 or die "cvs checkout $tag $_: $?\n"; + } +} + +BEGIN{ +%need_externs_aux = ( + 'lb.client' => [ qw/cppunit:B classads/ ], + 'lb.client-java' => [ qw/ant:B/ ], + 'lb.common' => [ qw/expat cppunit:B classads/ ], + 'lb.doc' => [], + 'lb.logger' => [ qw/cppunit:B/ ], + 'lb.server' => [ qw/globus_essentials:R globus:B expat cares mysql cppunit:B gsoap:B classads voms lcas gridsite/ ], + 'lb.state-machine' => [ qw/classads/ ], + 'lb.utils' => [ qw/cppunit:B/ ], + 'lb.ws-interface' => [], + 'lb.ws-test' => [ qw/gsoap:B/ ], + 'lb.types' => [ qw// ], + 'lbjp-common.db' => [ qw/mysql:R mysql-devel:B/ ], + 'lbjp-common.maildir' => [ qw// ], + 'lbjp-common.server-bones' => [ qw// ], + 'lbjp-common.trio' => [ qw/cppunit:B/ ], + 'lbjp-common.jp-interface' => [ qw/cppunit:B/ ], + 'security.gss' => [ qw/globus_essentials:R globus:B cares cppunit:B/ ], + 'security.gsoap-plugin' => [ qw/cppunit:B globus_essentials:R globus:B cares gsoap:B/ ], + 'jobid.api-c' => [ qw/cppunit:B/ ], + 'jobid.api-cpp' => [ qw/cppunit:B/ ], + 'jobid.api-java' => [ qw/ant:B jdk:B/ ], + 'jp.client' => [ qw/gsoap libtar globus_essentials:R globus:B/ ], + 'jp.doc' => [], + 'jp.index' => [ qw/gsoap globus_essentials:R globus:B/ ], + 'jp.primary' => [ qw/classads gsoap libtar globus_essentials:R globus:B/ ], + 'jp.server-common' => [], + 'jp.ws-interface' => [], +); + +for my $ext (keys %need_externs_aux) { + for (@{$need_externs_aux{$ext}}) { + /([^:]*)(?::(.*))?/; + push @{$need_externs{$ext}},$1; + my $type = $2 ? $2 : 'BR'; + $need_externs_type{$ext}->{$1} = $type; + } +} + +%need_jars = ( + 'jobid.api-java' => [ qw/commons-codec/ ], +); + +for my $jar (keys %need_jars) { + for (@{$need_jars{$jar}}) { + $need_externs_type{$jar}->{$_} = 'BR'; # XXX + } +} + +%deps_aux = ( + 'lb.client' => [ qw/ + lb.types:B lb.common + lbjp-common.trio + jobid.api-cpp jobid.api-c + security.gss + / ], + 'lb.client-java' => [ qw/ + lb.types:B + jobid.api-java + / ], + 'lb.common' => [ qw/ + jobid.api-cpp jobid.api-c + lb.types:B lbjp-common.trio security.gss + / ], + 'lb.doc' => [ qw/lb.types:B/ ], + 'lb.logger' => [ qw/ + lbjp-common.trio + jobid.api-c + lb.common + security.gss + / ], + 'lb.server' => [ qw/ + lb.ws-interface lb.types:B lb.common lb.state-machine + lbjp-common.db lbjp-common.server-bones lbjp-common.trio lbjp-common.maildir + jobid.api-c + security.gsoap-plugin security.gss + / ], + 'lb.state-machine' => [ qw/lb.common lbjp-common.jp-interface security.gss/ ], + 'lb.utils' => [ qw/ + lbjp-common.jp-interface + jobid.api-c + lbjp-common.trio lbjp-common.maildir + lb.client lb.state-machine + / ], + 'lb.ws-test' => [ qw/security.gsoap-plugin lb.ws-interface/ ], + 'lb.ws-interface' => [ qw/lb.types:B/ ], + 'lb.types' => [ qw// ], + 'lbjp-common.db' => [ qw/lbjp-common.trio/ ], + 'lbjp-common.maildir' => [ qw// ], + 'lbjp-common.server-bones' => [ qw// ], + 'lbjp-common.trio' => [ qw// ], + 'security.gss' => [ qw// ], + 'security.gsoap-plugin' => [ qw/security.gss/ ], + 'jobid.api-c' => [ qw// ], + 'jobid.api-cpp' => [ qw/jobid.api-c/ ], + 'jobid.api-java' => [ qw// ], + + 'lbjp-common.jp-interface' => [ qw/lbjp-common.db jobid.api-c/ ], + + 'jp.client' => [ qw/ + jp.ws-interface + lbjp-common.jp-interface lbjp-common.maildir + jobid.api-c + security.gsoap-plugin + / ], + 'jp.doc' => [ qw// ], + 'jp.index' => [ qw/ + jp.server-common jp.ws-interface + lbjp-common.jp-interface lbjp-common.trio lbjp-common.db lbjp-common.server-bones + security.gsoap-plugin + / ], + 'jp.primary' => [ qw/ + jobid.api-c + jp.server-common jp.ws-interface + lb.state-machine + lbjp-common.jp-interface lbjp-common.trio lbjp-common.db lbjp-common.server-bones + security.gsoap-plugin + / ], + 'jp.server-common' => [ qw/ + lbjp-common.jp-interface lbjp-common.db + / ], + 'jp.ws-interface' => [ qw// ], +); + +for my $ext (keys %deps_aux) { + for (@{$deps_aux{$ext}}) { + /([^:]*)(?::(.*))?/; + push @{$deps{$ext}},$1; + my $type = $2 ? $2 : 'BR'; + $deps_type{$ext}->{$1} = $type; + } +} + + +%extrafull = ( gridsite=>'org.gridsite.core'); + +#( java => 'client-java' ); +%extranodmod = ( + db => 'lbjp-common.db', + jpprimary => 'jp.primary', + jpindex => 'jp.index', + jpclient => 'jp.client', +); + +my @t = qw/lb.client-java jobid.api-java lb.types/; +@topbuild{@t} = (1) x ($#t+1); +} + +sub full +{ + my $short = shift; + return $extrafull{$short} ? $extrafull{$short} : 'org.glite.'.$short; +} + +sub mkinc +{ + my %aux; + undef %aux; + my @m=qw/ +lb.client lb.doc lb.state-machine lb.ws-interface lb.logger lb.types lb.common lb.server lb.utils lb.ws-test lb.client-java +security.gss security.gsoap-plugin +jobid.api-c jobid.api-cpp jobid.api-java +lbjp-common.db lbjp-common.maildir lbjp-common.server-bones lbjp-common.trio lbjp-common.jp-interface +jp.client jp.doc jp.index jp.primary jp.server-common jp.ws-interface +/; + @aux{@m} = (1) x ($#m+1); + + my $short = shift; + my $full = full $short; + + unless ($aux{$short}) { + print "Makefile.inc not needed in $full\n"; + return; + } + + my $build = ''; + + unless ($topbuild{$_}) { + $build = '/build'; + unless (-d "$full/build") { + mkdir "$full/build" or die "mkdir $full/build: $!\n"; + } + unlink "$full/build/Makefile"; + symlink "../Makefile","$full/build/Makefile" or die "symlink ../Makefile $full/build/Makefile: $!\n"; + } + + open MKINC,">$full$build/Makefile.inc" + or die "$full$build/Makefile.inc: $!\n"; + + print "Creating $full$build/Makefile.inc\n"; + + print MKINC qq{ +PREFIX = $prefix +stagedir = $stagedir +thrflavour = $thrflavour +nothrflavour = $nothrflavour +}; + + for (@{$need_externs{$short}}) { + print MKINC "${_}_prefix = $extern_prefix{$_}\n" + } + + for (@{$need_jars{$short}}) { + print MKINC "${_}_jar = $jar{$_}\n" + } + + my $need_gsoap = 0; + for (@{$need_externs{$short}}) { $need_gsoap = 1 if $_ eq 'gsoap'; } + + print MKINC "gsoap_default_version=".gsoap_version()."\n" if $need_gsoap; + + close MKINC; +} + +my %etics_externs; +my %etics_projects; +BEGIN{ + %etics_externs = ( + globus_essentials=>'vdt_globus_essentials', + globus=>'globus', + cares=>'c-ares', + voms=>'org.glite.security.voms-api-cpp', + gridsite=>'org.gridsite.shared', + lcas=>'org.glite.security.lcas', + ); + %etics_projects = ( + vdt=>[qw/globus globus_essentials/], + 'org.glite'=>[qw/voms gridsite lcas/], + ); +}; + +sub mode_etics { + $fmod = shift; + + die "$0: --module required with --etics\n" unless $fmod; + + my ($subsys,$module) = split /\./,$fmod; + + my ($major,$minor,$rev,$age); + + if ($version) { + $version =~ /([[:digit:]]+)\.([[:digit:]]+)\.([[:digit:]]+)-(.+)/; + ($major,$minor,$rev,$age) = ($1,$2,$3,$4); + } + else { + open V,"org.glite.$subsys.$module/project/version.properties" + or die "org.glite.$subsys.$module/project/version.properties: $!\n"; + + while ($_ = ) { + chomp; + ($major,$minor,$rev) = ($1,$2,$3) if /module\.version\s*=\s*([[:digit:]]+)\.([[:digit:]]+)\.([[:digit:]]+)/; + $age = $1 if /module\.age\s*=\s*([[:digit:]]+)/; + } + close V; + } + + my @copts = (); + my %ge; + @ge{@{$etics_projects{'org.glite'}}} = (1) x ($#{$etics_projects{'org.glite'}}+1); + + for (@{$need_externs{"$subsys.$module"}}) { + if ($need_externs_type{"$subsys.$module"}->{$_}=~/B/) { + my $eext = $etics_externs{$_} ? $etics_externs{$_} : $_; + push @copts,$ge{$_} ? "--with-$_=\${stageDir}" : "--with-$_=\${$eext.location}"; + } + } + + for (@{$need_jars{"$subsys.$module"}}) { + my $eext = $etics_externs{$_} ? $etics_externs{$_} : $_; + + push @copts,"--with-$_ \${$eext.location}/$_*.jar"; + } + + + my $conf = "glite-$subsys-${module}_R_${major}_${minor}_${rev}_${age}"; + my $file = $output ? $output : "$conf.ini"; + open C,">$file" or die "$file: $!\n"; + + my $buildroot = $topbuild{"$subsys.$module"} ? '' : "build.root = build\n"; + + my $confdir = $topbuild{"$subsys.$module"} ? '..' : '../..'; + + print STDERR "Writing $file\n"; + print C qq{ +[Configuration-$conf] +profile = None +moduleName = org.glite.$subsys.$module +displayName = $conf +description = org.glite.$subsys.$module +projectName = org.glite +age = $age +deploymentType = None +tag = $conf +version = $major.$minor.$rev +path = \${projectName}/\${moduleName}/\${version}/\${platformName}/\${packageName}-\${version}-\${age}.tar.gz + +[Platform-default:VcsCommand] +displayName = None +description = None +tag = cvs -d \${vcsroot} tag -R \${tag} \${moduleName} +branch = None +commit = None +checkout = cvs -d \${vcsroot} co -r \${tag} \${moduleName} + +[Platform-default:BuildCommand] +postpublish = None +packaging = None +displayName = None +description = None +doc = None +prepublish = None +publish = None +compile = make +init = None +install = make install +clean = make clean +test = make check +configure = cd $confdir && \${moduleName}/configure --thrflavour=\${globus.thr.flavor} --nothrflavour=\${globus.nothr.flavor} --prefix=\${prefix} --stage=\${stageDir} --module $subsys.$module @copts +checkstyle = None + +[Platform-default:Property] +$buildroot + +[Platform-default:DynamicDependency] + +}; + for (@{$need_externs{"$subsys.$module"}},@{$need_jars{"$subsys.$module"}}) { + my $eext = $etics_externs{$_} ? $etics_externs{$_} : $_; + + my $proj = 'externals'; + for my $p (keys %etics_projects) { + for $m (@{$etics_projects{$p}}) { + $proj = $p if $m eq $_; + } + } + + my $type = $need_externs_type{"$subsys.$module"}->{$_}; + print C "$proj|$eext = $type\n"; + } + + for (@{$deps{"$subsys.$module"}}) { + my $type = $deps_type{"$subsys.$module"}->{$_}; + print C "org.glite|org.glite.$_ = $type\n"; + } + + close C; +} + +sub gsoap_version { + local $_; + my $gsoap_version; + open S,"$extern_prefix{gsoap}/bin/soapcpp2 -v 2>&1 |" or die "$extern_prefix{gsoap}/bin/soapcpp2: $!\n"; + + while ($_ = ) { + chomp; + + $gsoap_version = $1 if /The gSOAP Stub and Skeleton Compiler for C and C\+\+ ([.[:digit:][:alpha:]]+)$/; + } + close S; + return $gsoap_version; +} + + +sub usage { + my @ext = keys %extern_prefix; + my @myjars, keys %jar; + + print STDERR qq{ +usage: $0 options + +General options (defaults in []): + --prefix=PREFIX destination directory [./stage] + --staged=module,module,... what is already in PREFIX (specify without org.glite.) + --thrflavour=flavour + --nothrflavour=flavour threaded and non-treaded flavours [gcc64dbgpthr,gcc64dbg] + --listmodules=subsys list modules of a subsystem + +Mode of operation: + --mode={checkout|build|etics} what to do [build] + +What to build: + --module=module build this module only (mostly in-Etics operation) + --enable-NODE build this "node" (set of modules) only. Available nodes are + @{$lbmodules{lb}},@{$lbmodules{security}} + --disable-NODE don't build this node + --lb-tag=tag checkout LB modules with specific tag + --jp-tag=tag checkout JP modules with specific tag + --lbjp-common-tag=tag checkout lbjp-common modules with specific tag + --security-tag=tag checkout security modules with specific tag + --jobid-tag=tag checkout jobid modules with specific tag + +Dependencies: + --with-EXTERNAL=PATH where to look for an external. Required externals + (not all for all modules) are: + @ext + --with-JAR=JAR where to look for jars. Required jars are: + @myjars + Summary of what will be used is always printed + +}; + +} diff --git a/org.glite.lb.logger/doc/glite-lb-interlogd.8 b/org.glite.lb.logger/doc/glite-lb-interlogd.8 index 20732a0..25f9615 100644 --- a/org.glite.lb.logger/doc/glite-lb-interlogd.8 +++ b/org.glite.lb.logger/doc/glite-lb-interlogd.8 @@ -1,4 +1,4 @@ -.TH EDG-WL-INTERLOGD 8 "May 2003" "EU DataGrid Project" "Logging&Bookkeeping" +.TH GLITE-LB-INTERLOGD 8 "April 2008" "EU EGEE Project" "Logging&Bookkeeping" .SH NAME glite-lb-interlogd - interlogger daemon @@ -166,10 +166,10 @@ If is set, it is used to locate proxy certificate file. .SH REPORTING BUGS -Please, report all bugs to EU DataGrid Bug Tracking System located at http://marianne.in2p3.fr/datagrid/bugzilla +Please, report all bugs to EU EGEE Bug Tracking System located at https://savannah.cern.ch .SH SEE ALSO .B te-lb-bkserverd\fR(8),\fP glite-lb-logd\fR(8),\fP glite-lb-logevent\fR(1),\fP .SH AUTHOR -EU DataGrid Work Package 1, CESNET group. +EU EGEE, JRA1. diff --git a/org.glite.lb.logger/doc/glite-lb-logd.8 b/org.glite.lb.logger/doc/glite-lb-logd.8 index c9bf108..7ea5db3 100644 --- a/org.glite.lb.logger/doc/glite-lb-logd.8 +++ b/org.glite.lb.logger/doc/glite-lb-logd.8 @@ -1,4 +1,4 @@ -.TH EDG-WL-LOGD 8 "May 2003" "EU DataGrid Project" "Logging&Bookkeeping" +.TH GLITE-LB-LOGD 8 "April 2008" "EU EGEE Project" "Logging&Bookkeeping" .SH NAME glite-lb-logd - local logger daemon @@ -153,10 +153,10 @@ Decrease verbosity of the program. .SH REPORTING BUGS -Please, report all bugs to DataGrid Bug Tracking System located at http://marianne.in2p3.fr/datagrid/bugzilla +Please, report all bugs to EGEE Bug Tracking System located at https://savannah.cern.ch .SH SEE ALSO .B glite-lb-bkserverd\fR(8),\fP glite-lb-interlogd\fR(8),\fP glite-lb-logevent\fR(1),\fP .SH AUTHOR -EU DataGrid Work Package 1, CESNET group. +EU EGEE, JRA1. diff --git a/org.glite.lb.logger/project/ChangeLog b/org.glite.lb.logger/project/ChangeLog index 744d95a..1585ac6 100644 --- a/org.glite.lb.logger/project/ChangeLog +++ b/org.glite.lb.logger/project/ChangeLog @@ -22,13 +22,22 @@ - fixed sending empty events - do not syslog "error reading server reply" unnecessarily -1.4.11-1 -- Support chconfig in startup scripts (#27055) -- Proper kill signal handling (#36470) +2.0.0-1 +- LB 2.0 release -1.4.11-2 -- L&B server export to MSG +2.0.0-2 +- fixed configure to work in etics -1.4.11-3 -- Module Repacked +2.0.0-3 +- Fixed typos in the Makefile +2.0.0-4 +- configure script update (globus flavors added to configure call) + +2.0.1-1 +- (from 1.4.11-1) Support chconfig in startup scripts (#27055) +- (from 1.4.11-1) Proper kill signal handling (#36470) + +2.0.2-1 +- implemented multi-file event store, avoiding ever-growing files + in the case of heavy traffic notifications diff --git a/org.glite.lb.logger/project/package.description b/org.glite.lb.logger/project/package.description new file mode 100644 index 0000000..cd0621b --- /dev/null +++ b/org.glite.lb.logger/project/package.description @@ -0,0 +1 @@ +glite-lb-logger is the gLite LB local-logger and inter-logger. This package contains the local-logger (glite-lb-logd), inter-logger (glite-lb-interlogd) and notification inter-logger (glite-lb-notif-interlogd) daemons. diff --git a/org.glite.lb.logger/project/package.summary b/org.glite.lb.logger/project/package.summary new file mode 100644 index 0000000..089b630 --- /dev/null +++ b/org.glite.lb.logger/project/package.summary @@ -0,0 +1 @@ +gLite Logging and Bookkeeping local-logger and inter-logger diff --git a/org.glite.lb.logger/src-nt/Connection.H b/org.glite.lb.logger/src-nt/Connection.H index 692c019..808b788 100644 --- a/org.glite.lb.logger/src-nt/Connection.H +++ b/org.glite.lb.logger/src-nt/Connection.H @@ -8,6 +8,8 @@ public: public: virtual Connection *newConnection(int fd) const = 0; virtual Connection *accept(int fd) const = 0; + + virtual ~Factory() {} }; class Endpoint { diff --git a/org.glite.lb.logger/src-nt/Connection.cpp b/org.glite.lb.logger/src-nt/Connection.cpp new file mode 100644 index 0000000..7cac943 --- /dev/null +++ b/org.glite.lb.logger/src-nt/Connection.cpp @@ -0,0 +1,4 @@ +#include "Connection.H" + +Connection::Factory::~Factory() { +} diff --git a/org.glite.lb.logger/src-nt/EventManager.H b/org.glite.lb.logger/src-nt/EventManager.H index 1fa4cab..b1f16ee 100644 --- a/org.glite.lb.logger/src-nt/EventManager.H +++ b/org.glite.lb.logger/src-nt/EventManager.H @@ -1,58 +1,91 @@ #ifndef _EVENT_MANAGER_H #define _EVENT_MANAGER_H +#include -class EventManager { -public: - // type for return code of event handler - typedef enum { - NOT_HANDLED, // the event was not handled at all - HANDLED, // the event was handled succesfully - HANDLED_FINAL // the event was handled, - // no other handlers should be called - } eventstatus_t; - - static EventManager* getEventManager() { return &theEventManager; }; +// interface - class Event { - public: - }; - - template - class EventHandler { - public: +/** + * Base class of event hierarchy. + */ +class Event { +public: + virtual ~Event() {} +}; - virtual eventstatus_t handleEvent(T *&e); - eventstatus_t dispatchEvent(Event *&e) { - T *event = dynamic_cast(e); - - if(event) - return(handleEvent(event)); - else - return(NOT_HANDLED); - } - }; +/** + * Base class for event handler address. + */ +class EventHandler { +public: + // constants for event handler return codes + static const int NOT_HANDLED = 0; + static const int HANDLED = 1; + static const int HANDLED_FINAL = 2; + static const int HANDLED_NEW = 3; + + virtual int handleEvent(Event* &e) { return NOT_HANDLED; } + virtual ~EventHandler() {} +}; - void postEvent(Event &); +/** + * Holds addres of event handler, ie. pointer to member function of T + * that takes E* & as an argument. + */ +template +class TypedEventHandler: public EventHandler { +public: + typedef int (T::*handlerType)(E* &); + + TypedEventHandler(T *handler, handlerType method) + : m_handler(handler), m_handleEvent(method) { + } + + virtual int handleEvent(Event* &e) { + E *ne = dynamic_cast(e); + int result = EventHandler::NOT_HANDLED; + if(ne) { + result = (m_handler->*m_handleEvent)(ne); + if((result == EventHandler::HANDLED_NEW) && + !(ne == e)) { + delete e; + e = ne; + } + } + return result; + } + +private: + T *m_handler; + handlerType m_handleEvent; +}; - template - bool registerHandler(EventHandler *); - template - bool registerHandlerFirst(EventHandler *); +class EventManager { +public: + + int postEvent(Event* &event); + + template + EventHandler& registerHandler(T *handler, int (T::*method)(E* &)) { + EventHandler *h = new TypedEventHandler(handler, method); + addHandler(h); + return *h; + } + + template + EventHandler& registerHandler(T *handler) { + return registerHandler(handler, &T::handleEvent); + } private: - - // the event manager - static EventManager theEventManager; - - // private default constructor for singleton instance - EventManager() - {}; + std::list handlers; + void addHandler(EventHandler*); + void removeHandler(EventHandler *); }; diff --git a/org.glite.lb.logger/src-nt/EventManager.cpp b/org.glite.lb.logger/src-nt/EventManager.cpp new file mode 100644 index 0000000..91efb12 --- /dev/null +++ b/org.glite.lb.logger/src-nt/EventManager.cpp @@ -0,0 +1,23 @@ +#include "EventManager.H" + +int +EventManager::postEvent(Event* &e) +{ + for(std::list::iterator i = handlers.begin(); + i != handlers.end(); + i++) { + (*i)->handleEvent(e); + } + return 0; +} + +void +EventManager::addHandler(EventHandler *handler) +{ + handlers.push_back(handler); +} + +void +EventManager::removeHandler(EventHandler *handler) +{ +} diff --git a/org.glite.lb.logger/src-nt/Exception.H b/org.glite.lb.logger/src-nt/Exception.H index 0fbac3e..a2c921d 100644 --- a/org.glite.lb.logger/src-nt/Exception.H +++ b/org.glite.lb.logger/src-nt/Exception.H @@ -4,4 +4,10 @@ class Exception { }; +class FatalException { +}; + + +#define E_ASSERT(a) if(!(a)) { throw new FatalException; } + #endif diff --git a/org.glite.lb.logger/src-nt/HTTPTransport.H b/org.glite.lb.logger/src-nt/HTTPTransport.H index 417deea..05e928b 100644 --- a/org.glite.lb.logger/src-nt/HTTPTransport.H +++ b/org.glite.lb.logger/src-nt/HTTPTransport.H @@ -3,6 +3,7 @@ #include "ThreadPool.H" #include "Transport.H" +#include "Singleton.H" #include @@ -10,20 +11,18 @@ class HTTPTransport: public Transport { public: - class Factory: public Transport::Factory { + + // factory class + class Factory: public Transport::Factory, + public Singleton { public: - virtual Transport *newTransport(Connection *conn) const { - if(conn) - return(new HTTPTransport(conn)); - else - return NULL; + virtual Transport *newTransport() const { + return(new HTTPTransport()); } }; - static Factory theFactory; - - HTTPTransport(Connection *conn) - : Transport(conn), + HTTPTransport() + : Transport(), state(NONE), request(), headers(), body(NULL), pos(NULL), content_length(0) @@ -31,12 +30,9 @@ public: virtual ~HTTPTransport(); - -protected: - // from ThreadPool::WorkDescription - virtual void onReady(); - virtual void onTimeout(); - virtual void onError(); + virtual int receive(Connection *conn, Message* &msg); + virtual int send(Connection *conn, Message* msg); + virtual void reset(); private: enum { NONE, @@ -51,6 +47,7 @@ private: unsigned int content_length; int parseHeader(const char *s, unsigned int len); + void serializeHeaders(Message *msg); }; diff --git a/org.glite.lb.logger/src-nt/HTTPTransport.cpp b/org.glite.lb.logger/src-nt/HTTPTransport.cpp index 8f495c3..9ed4e16 100644 --- a/org.glite.lb.logger/src-nt/HTTPTransport.cpp +++ b/org.glite.lb.logger/src-nt/HTTPTransport.cpp @@ -1,21 +1,22 @@ #include "HTTPTransport.H" #include "Exception.H" +#include "EventManager.H" #include #include -HTTPTransport::Factory HTTPTransport::theFactory; - - HTTPTransport::~HTTPTransport() { if(body) free(body); } -void -HTTPTransport::onReady() +// read what is available and parse what can be parsed +// returns the result of read operation of the underlying connection, +// ie. the number of bytes read or error code +int +HTTPTransport::receive(Connection *conn, Message* &msg) { int len; @@ -29,10 +30,12 @@ HTTPTransport::onReady() len = conn->read(pos, sizeof(buffer) - (pos - buffer)); if(len < 0) { // error during request - state = NONE; + // state = NONE; + return len; } else if(len == 0) { // other side closed connection - state = NONE; + // state = NONE; + return len; } else { char *cr = NULL, *p = buffer, *s = buffer; bool crlf_seen = false; @@ -114,6 +117,7 @@ HTTPTransport::onReady() } } else { // report error + // XXX - this may happen, do not handle using exceptions std::cout << "Wrong content length" << std::endl; throw new Exception(); } @@ -131,13 +135,15 @@ HTTPTransport::onReady() len = conn->read(pos, content_length - (pos - body)); if(len < 0) { // error reading - state = NONE; + // state = NONE; + return len; } else if(len == 0) { // no more data - state = NONE; + // state = NONE; + return len; } else { pos += len; - if(pos - body == content_length) { + if(pos == content_length + body) { // finished reading state = NONE; } @@ -146,40 +152,104 @@ HTTPTransport::onReady() } if(state != NONE) - ThreadPool::instance()->queueWorkRead(this); + msg = NULL; else { - std::cout << request << std::endl << headers << std::endl; - std::cout.write(body, content_length); - std::cout.flush(); + // we have a new message + // XXX - or we have an error, must handle it + msg = new Message(body, content_length); + msg->setProperties( } - + return len; } -void -HTTPTransport::onTimeout() +int +HTTPTransport::parseHeader(const char *s, unsigned int len) { + char *p; + + p = (char*)memccpy((void*)s, (void*)s, ':', len); + + if(!strncasecmp(s, "Content-Length", 14)) { + content_length = p ? atoi(p) : 0 ; + } + return(0); } -void -HTTPTransport::onError() +int +HTTPTransport::send(Connection *conn, Message* msg) { + int len; + switch(state) { + case NONE: + state = IN_REQUEST; + request = "POST " + msg->path() + "HTTP/1.1\r\n"; + pos = request.c_str(); + content_length = msg->getContent(body); + + case IN_REQUEST: + len = conn->send(pos, request.length() - pos + request.c_str()); + if(len < 0) { + return len; + } + pos += len; + if(request.c_str() + request.length() == pos) { + state = IN_HEADERS; + prepareHeaders(msg); + pos = headers.c_str(); + } else { + break; + } + + case IN_HEADERS: + len = conn->send(pos, headers.length() - pos + headers.c_str()); + if(len < 0) { + return len; + } + pos += len; + if(headers.c_str() + headers.length() == pos) { + state = IN_BODY; + pos = body; + } else { + break; + } + + case IN_BODY: + len = conn->send(pos, body, content_length - pos + body); + if(len < 0) { + return len; + } + pos += len; + if(body + content_length == pos) { + state = NONE; + return 0; + } + break; + + default: + } + return len; } -int -HTTPTransport::parseHeader(const char *s, unsigned int len) +void +HTTPTransport::reset() { - char *p; - - std::cout << "header: "; - std::cout.write(s, len); - std::cout << std::endl; - std::cout.flush(); - if(!strncasecmp(s, "Content-Length", 14)) { - p = (char*)memccpy((void*)s, (void*)s, ':', len); - content_length = p ? atoi(p) : 0 ; + state = NONE; + request.clear(); + headers.clear(); + if(body) { + free(body); + body = NULL; } - return(0); + content_length = 0; + pos = buffer; +} + + +void +HTTPTransport::serializeHeaders(Message *msg); +{ + for(Properties::iterator i = msg-> } diff --git a/org.glite.lb.logger/src-nt/InputChannel.H b/org.glite.lb.logger/src-nt/InputChannel.H new file mode 100644 index 0000000..2bac262 --- /dev/null +++ b/org.glite.lb.logger/src-nt/InputChannel.H @@ -0,0 +1,29 @@ +#ifndef _INPUT_CHANNEL_H_ +#define _INPUT_CHANNEL_H_ + +#include "ThreadPool.H" +#include "Connection.H" +#include "Transport.H" + +class InputChannel + : public ThreadPool::WorkDescription { +public: + + InputChannel(Connection *conn, Transport *trans) + : ThreadPool::WorkDescription(conn->getFD()), + m_connection(conn), m_transport(trans) + {} + + void start(); + +protected: + virtual void onReady(); + virtual void onTimeout(); + virtual void onError(); + +private: + Connection *m_connection; + Transport *m_transport; +}; + +#endif diff --git a/org.glite.lb.logger/src-nt/InputChannel.cpp b/org.glite.lb.logger/src-nt/InputChannel.cpp new file mode 100644 index 0000000..e3ac9c8 --- /dev/null +++ b/org.glite.lb.logger/src-nt/InputChannel.cpp @@ -0,0 +1,37 @@ +#include "InputChannel.H" +#include "ThreadPool.H" +#include "EventManager.H" + +extern EventManager theEventManager; + +void +InputChannel::start() +{ + ThreadPool::instance()->queueWorkRead(this); +} + +void +InputChannel::onReady() +{ + Transport::Message *msg = NULL; + int ret = m_transport->receive(m_connection, msg); + if(ret <= 0) { + // no new data read + } else if(msg) { + // we have a new message + + } else { + // still need more data + ThreadPool::instance()->queueWorkRead(this); + } +} + +void +InputChannel::onTimeout() +{ +} + +void +InputChannel::onError() +{ +} diff --git a/org.glite.lb.logger/src-nt/Makefile b/org.glite.lb.logger/src-nt/Makefile index 2ff9420..8ebe707 100644 --- a/org.glite.lb.logger/src-nt/Makefile +++ b/org.glite.lb.logger/src-nt/Makefile @@ -10,9 +10,9 @@ LINK = libtool --mode=link g++ $(LDFLAGS) THREAD_LIB = -lpthread -CPPUNIT_ROOT = /afs/ruk.cuni.cz/home/michal/egee/repository/externals/cppunit/1.10.2/slc3_ia32_gcc323 -CPPUNIT_LIB = -L$(CPPUNIT_ROOT)/lib -lcppunit -ldl -CPPUNIT_INCLUDE = -I$(CPPUNIT_ROOT)/include +CPPUNIT_ROOT = +CPPUNIT_LIB = -lcppunit -ldl +CPPUNIT_INCLUDE = TEST_OBJS= \ test/ThreadPoolTest.o \ @@ -21,13 +21,24 @@ TEST_OBJS= \ test/SingletonTest.o \ test/test_main.o -plain: SocketInput.o PlainConnection.o HTTPTransport.o ThreadPool.o main.o +OBJS = \ + PluginManager.o \ + SocketInput.o \ + Connection.o \ + PlainConnection.o \ + Transport.o \ + HTTPTransport.o \ + ThreadPool.o \ + EventManager.o \ + InputChannel.cpp + +plain: main.o $(OBJS) $(LINK) -o $@ $+ $(THREAD_LIB) utest: ThreadPool.o PluginManager.o EventManager.o $(TEST_OBJS) $(LINK) -o $@ $+ $(CPPUNIT_LIB) $(THREAD_LIB) -stest: test/SingletonTest.o test/test_main.o +stest: EventManager.o test/EventManagerTest.o test/test_main.o $(LINK) -o $@ $+ $(CPPUNIT_LIB) $(THREAD_LIB) $(TEST_OBJS): %.o: %.cpp diff --git a/org.glite.lb.logger/src-nt/Message.H b/org.glite.lb.logger/src-nt/Message.H new file mode 100644 index 0000000..725966a --- /dev/null +++ b/org.glite.lb.logger/src-nt/Message.H @@ -0,0 +1,66 @@ +#ifndef _MESSAGE_H_ +#define _MESSAGE_H + +#include "Properties.H" +#include "MessageStore.H" + +#include + +class Message: public MessageStore::Storable { +public: + + /** class that holds message state + * + */ + class State : public MessageStore::Storable { + public: + + /** Get size needed for storage (from Storable). + */ + virtual int getStorageSize() const; + + /** Save State (from Storable) + */ + virtual int save(void* data, int len) const; + + /** Load State (from Storable) + */ + virtual int load(void* data, int len); + }; + + + Message(); + + Message(void * data, unsigned int length) + : m_length(length), + m_data(data) + {} + + + int getContent(void* &data) const + { data = m_data; return m_length; } + + int getContentLength() const + { return m_length; } + + std::string getProperty(const std::string &name, std::string &val) + { return m_properties.getProperty(name); } + + void setProperty(const std::string &name, std::string &val) + { m_properties.setProperty(name, val); } + + Properties& getProperties() + { return m_properties; } + + void setProperties(Properties &) + {} + +private: + MessageStore::ID m_id; + unsigned int m_length; + void * m_data; + Properties m_properties; +}; + + +#endif diff --git a/org.glite.lb.logger/src-nt/MessageStore.H b/org.glite.lb.logger/src-nt/MessageStore.H new file mode 100644 index 0000000..ff03a9b --- /dev/null +++ b/org.glite.lb.logger/src-nt/MessageStore.H @@ -0,0 +1,84 @@ +#ifndef _MESSAGE_STORE_H_ +#define _MESSAGE_STORE_H_ + +#include + +/** Permanent storage for messages and their states. + */ + +class MessageStore { +public: + + /** Base class for everything that can be stored here. + */ + class Storable { + public: + /** Get size needed for object storage. + */ + virtual int getStorageSize() const = 0; + + /** Save state of object into binary data. + */ + virtual int save(void* data, int len) const = 0; + + /** Load state of object from binary data. + */ + virtual int load(void* data, int len) = 0; + + virtual ~Storable() {} + }; + + + /** Class that uniquely identifies stored content. + */ + class ID: public Storable { + public: + /** Default constructor. + * + * Creates new unique ID. + */ + ID(); + + /** Copy constructor. + */ + ID(const ID& src); + + /** Destructor. + */ + ~ID() {}; + + /** Assignment operator. + */ + ID& operator=(const ID& src); + + /** Return the string suitable for printing. + */ + std::string toString() const; + + /** Comparison operator + */ + int operator==(const ID& second); + + /** Get size needed for storage (from Storable). + */ + virtual int getStorageSize() const; + + /** Save ID (from Storable) + */ + virtual int save(void* data, int len) const; + + /** Load ID (from Storable) + */ + virtual int load(void* data, int len); + + protected: + unsigned long long getID() {return id;} + + private: + static pthread_mutex_t counterLock; + static unsigned counter; + unsigned long long id; + }; +}; + +#endif diff --git a/org.glite.lb.logger/src-nt/MessageStore.cpp b/org.glite.lb.logger/src-nt/MessageStore.cpp new file mode 100644 index 0000000..eb9de7a --- /dev/null +++ b/org.glite.lb.logger/src-nt/MessageStore.cpp @@ -0,0 +1,24 @@ +#include +#include +#include + +#include "MessageStore.H" + +pthread_mutex_t MessageStore::ID::counterLock = PTHREAD_MUTEX_INITIALIZER; +unsigned MessageStore::ID::counter = 0; + +MessageStore::ID::ID(){ + time_t t; + time(&t); + pthread_mutex_lock(&counterLock); + counter++; + id = ((unsigned long long) counter << 32) + t; + pthread_mutex_unlock(&counterLock); +} + +std::string MessageStore::ID::toString() const{ + std::ostringstream oss; + oss << id; + return oss.str(); +} + diff --git a/org.glite.lb.logger/src-nt/PlainConnection.H b/org.glite.lb.logger/src-nt/PlainConnection.H index 3551286..5536095 100644 --- a/org.glite.lb.logger/src-nt/PlainConnection.H +++ b/org.glite.lb.logger/src-nt/PlainConnection.H @@ -2,22 +2,23 @@ #define _PLAIN_CONNECTION_H #include "Connection.H" - +#include "Singleton.H" class PlainConnection: public Connection { public: - class Factory: public Connection::Factory { + class Factory: public Connection::Factory, + public Singleton { public: virtual Connection *newConnection(int fd) const { return new PlainConnection(fd); } virtual Connection *accept(int fd) const; - }; - static Factory theFactory; + virtual ~Factory() {} + }; PlainConnection(int a_fd): Connection(a_fd) {} diff --git a/org.glite.lb.logger/src-nt/PlainConnection.cpp b/org.glite.lb.logger/src-nt/PlainConnection.cpp index 156ec6e..31ce792 100644 --- a/org.glite.lb.logger/src-nt/PlainConnection.cpp +++ b/org.glite.lb.logger/src-nt/PlainConnection.cpp @@ -4,9 +4,6 @@ #include #include -PlainConnection::Factory PlainConnection::theFactory; - - PlainConnection::~PlainConnection() { } @@ -35,4 +32,8 @@ PlainConnection::read(char *buf, unsigned int len) int PlainConnection::write(char *buf, unsigned int len) { + int ret; + + ret = ::write(fd, buf, len); + return ret; } diff --git a/org.glite.lb.logger/src-nt/PluginManager.H b/org.glite.lb.logger/src-nt/PluginManager.H index 7b5aa74..1840536 100644 --- a/org.glite.lb.logger/src-nt/PluginManager.H +++ b/org.glite.lb.logger/src-nt/PluginManager.H @@ -21,6 +21,8 @@ public: virtual bool initialize() = 0; virtual bool cleanup () = 0; + + virtual ~Plugin(); }; // add plugin with given name to the list of registered plugins diff --git a/org.glite.lb.logger/src-nt/PluginManager.cpp b/org.glite.lb.logger/src-nt/PluginManager.cpp index df53043..0200737 100644 --- a/org.glite.lb.logger/src-nt/PluginManager.cpp +++ b/org.glite.lb.logger/src-nt/PluginManager.cpp @@ -1,3 +1,6 @@ #include "PluginManager.H" +PluginManager::Plugin::~Plugin() { +} + diff --git a/org.glite.lb.logger/src-nt/Properties.H b/org.glite.lb.logger/src-nt/Properties.H new file mode 100644 index 0000000..77d216d --- /dev/null +++ b/org.glite.lb.logger/src-nt/Properties.H @@ -0,0 +1,36 @@ +#ifndef _PROPERTIES_H_ +#define _PROPERTIES_H_ + +#include +#include + +class Properties { +public: + + // default constructor + Properties() + : properties() + {} + + // accessors + std::string& getProperty(const std::string &key) + { return properties[key]; } + + void setProperty(const std::string &key, std::string &val) + { properties[key] = val; } + + // iterators + typedef std::map::iterator iterator; + + iterator begin() + { return properties.begin(); } + + iterator end() + { return properties.end(); } + + +private: + std::map properties; +}; + +#endif diff --git a/org.glite.lb.logger/src-nt/SocketInput.cpp b/org.glite.lb.logger/src-nt/SocketInput.cpp index f38cabb..e553ec8 100644 --- a/org.glite.lb.logger/src-nt/SocketInput.cpp +++ b/org.glite.lb.logger/src-nt/SocketInput.cpp @@ -6,7 +6,8 @@ #include "ThreadPool.H" #include "SocketInput.H" - +#include "InputChannel.H" +#include "Exception.H" // create unix domain socket for input @@ -21,6 +22,7 @@ SocketInput::SocketInput(const char *path, saddr.sun_family = AF_UNIX; strcpy(saddr.sun_path, path); fd = socket(PF_UNIX, SOCK_STREAM, 0); + if(fd < 0) throw new Exception; if(connect(fd, (struct sockaddr*)&saddr, sizeof(saddr.sun_path)) < 0) { if(errno == ECONNREFUSED) { unlink(saddr.sun_path); @@ -29,9 +31,10 @@ SocketInput::SocketInput(const char *path, // another instance running // throw new Exception } - bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)); - listen(fd, SOCK_QUEUE_MAX); - ThreadPool::instance()->setWorkAccept(this); + if(bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) + throw new Exception; + if(listen(fd, SOCK_QUEUE_MAX) < 0) + throw new Exception; } @@ -48,8 +51,9 @@ void SocketInput::onReady() { Connection *conn = cFactory->accept(fd); - Transport *trans = tFactory->newTransport(conn); - ThreadPool::instance()->queueWorkRead(trans); + Transport *trans = tFactory->newTransport(); + InputChannel *channel = new InputChannel(conn, trans); + channel->start(); } diff --git a/org.glite.lb.logger/src-nt/ThreadPool.H b/org.glite.lb.logger/src-nt/ThreadPool.H index b03d7e5..1a3b43c 100644 --- a/org.glite.lb.logger/src-nt/ThreadPool.H +++ b/org.glite.lb.logger/src-nt/ThreadPool.H @@ -23,6 +23,8 @@ public: : fd(afd), event(NONE) {} + virtual ~WorkDescription(); + protected: enum Event { NONE, READY, TIMEOUT, ERROR } event; void doWork(); diff --git a/org.glite.lb.logger/src-nt/ThreadPool.cpp b/org.glite.lb.logger/src-nt/ThreadPool.cpp index 763e930..be4045c 100644 --- a/org.glite.lb.logger/src-nt/ThreadPool.cpp +++ b/org.glite.lb.logger/src-nt/ThreadPool.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -69,8 +70,12 @@ ThreadPool::WaitDesc::adjustTimeout(const struct timeval &delta) } +ThreadPool::WorkDescription::~WorkDescription() { +} + + ThreadPool::ThreadPool() - : work_count(0), wait_count(0), ufds_size(0), ufds(NULL), f_exit(false) + : f_exit(false), work_count(0), wait_count(0), ufds_size(0), ufds(NULL) { pthread_mutex_init(&wait_queue_mutex, NULL); pthread_mutex_init(&work_queue_mutex, NULL); @@ -79,6 +84,9 @@ ThreadPool::ThreadPool() pthread_cond_init(&wait_queue_cond_ready, NULL); pipe(pd); ufds = static_cast(malloc(sizeof(struct pollfd))); + if(ufds == NULL) { + throw new Exception; + } ufds->fd = pd[0]; ufds->events = POLLIN; ufds_size = 1; @@ -102,6 +110,7 @@ ThreadPool::startWorkers(unsigned int n) num_workers = n; for(unsigned int i = 0; i < n; i++) { + // XXX check return pthread_create(&workers[i], NULL, ThreadPool::threadMain, NULL); } } @@ -121,11 +130,11 @@ ThreadPool::stopWorkers() void ThreadPool::postWork(WorkDescription *work_unit) { - pthread_mutex_lock(&work_queue_mutex); + E_ASSERT(pthread_mutex_lock(&work_queue_mutex) >= 0); work_queue.push_back(work_unit); work_count++; - pthread_cond_signal(&work_queue_cond_ready); - pthread_mutex_unlock(&work_queue_mutex); + E_ASSERT(pthread_cond_signal(&work_queue_cond_ready) >= 0); + E_ASSERT(pthread_mutex_unlock(&work_queue_mutex) >= 0); } @@ -133,12 +142,14 @@ inline void ThreadPool::queueWork(WaitDesc *wd) { - pthread_mutex_lock(&wait_queue_mutex); + E_ASSERT(pthread_mutex_lock(&wait_queue_mutex) >= 0); wait_queue.push_back(wd); wait_count++; - pthread_cond_signal(&wait_queue_cond_ready); - pthread_mutex_unlock(&wait_queue_mutex); - write(pd[1], "1", 1); + E_ASSERT(pthread_cond_signal(&wait_queue_cond_ready) >= 0); + E_ASSERT(pthread_mutex_unlock(&wait_queue_mutex) >= 0); + if(write(pd[1], "1", 1) != 1) { + throw new Exception; + } } @@ -211,19 +222,19 @@ ThreadPool::getWork() WorkDescription *work_unit = NULL; struct timespec timeout; - pthread_mutex_lock(&work_queue_mutex); + E_ASSERT(pthread_mutex_lock(&work_queue_mutex) >= 0); if(work_count == 0) { timeout.tv_sec = 1; timeout.tv_nsec = 0; // pthread_cond_timedwait(&work_queue_cond_ready, &work_queue_mutex, &timeout); - pthread_cond_wait(&work_queue_cond_ready, &work_queue_mutex); + E_ASSERT(pthread_cond_wait(&work_queue_cond_ready, &work_queue_mutex) == 0); } if(work_count > 0) { work_count--; work_unit = work_queue.front(); work_queue.pop_front(); } - pthread_mutex_unlock(&work_queue_mutex); + E_ASSERT(pthread_mutex_unlock(&work_queue_mutex) >= 0); return work_unit; } @@ -232,7 +243,7 @@ ThreadPool::threadCleanup(void *data) { ThreadPool *pool = ThreadPool::instance(); - pthread_mutex_unlock(&(pool->work_queue_mutex)); + E_ASSERT(pthread_mutex_unlock(&(pool->work_queue_mutex)) >= 0); } @@ -263,12 +274,12 @@ ThreadPool::removeWaitDesc(std::list::iterator &i) std::list::iterator j = i; // actually this is safe even for the first element - pthread_mutex_lock(&wait_queue_mutex); + E_ASSERT(pthread_mutex_lock(&wait_queue_mutex) >= 0); j--; wait_queue.erase(i); wait_count--; i = j; - pthread_mutex_unlock(&wait_queue_mutex); + E_ASSERT(pthread_mutex_unlock(&wait_queue_mutex) >= 0); } @@ -278,18 +289,18 @@ ThreadPool::prepareDescriptorArray() std::list::iterator theIterator; struct pollfd *p; - pthread_mutex_lock(&wait_queue_mutex); + E_ASSERT(pthread_mutex_lock(&wait_queue_mutex) >= 0); if(wait_count == 0) { - pthread_cond_wait(&wait_queue_cond_ready, &wait_queue_mutex); + E_ASSERT(pthread_cond_wait(&wait_queue_cond_ready, &wait_queue_mutex) != 0); } if(wait_count == 0) { - pthread_mutex_unlock(&wait_queue_mutex); + E_ASSERT(pthread_mutex_unlock(&wait_queue_mutex) >= 0); return; } if(ufds_size != wait_count + 1) { ufds = static_cast(realloc(ufds, (1 + wait_count) * sizeof(struct pollfd))); if(ufds == NULL) { -// throw new Exception(); + throw new Exception(); } ufds_size = wait_count + 1; } @@ -305,7 +316,7 @@ ThreadPool::prepareDescriptorArray() min_timeout = w->timeout; } } - pthread_mutex_unlock(&wait_queue_mutex); + E_ASSERT(pthread_mutex_unlock(&wait_queue_mutex) >= 0); } @@ -338,9 +349,9 @@ ThreadPool::run() } // at least we have to adjust timeouts - pthread_mutex_lock(&wait_queue_mutex); + E_ASSERT(pthread_mutex_lock(&wait_queue_mutex) >= 0); i = wait_queue.begin(); - pthread_mutex_unlock(&wait_queue_mutex); + E_ASSERT(pthread_mutex_unlock(&wait_queue_mutex) >= 0); // the wait queue mutex is unlocked inside the loop // to allow handlers to add queue new // WorkDescriptions - these are added at the @@ -352,7 +363,7 @@ ThreadPool::run() // check for consistency if(p->fd != w->get_fd()) { // mismatch, what shall we do? - abort(); + throw new Exception; } // subtract the time passed from timeout @@ -389,12 +400,13 @@ ThreadPool::run() w->timeout.tv_usec = 0; } } - pthread_mutex_lock(&wait_queue_mutex); + E_ASSERT(pthread_mutex_lock(&wait_queue_mutex) >= 0); i++; - pthread_mutex_unlock(&wait_queue_mutex); + E_ASSERT(pthread_mutex_unlock(&wait_queue_mutex) >= 0); } } else { // some nasty error + throw new Exception; } } } diff --git a/org.glite.lb.logger/src-nt/Transport.H b/org.glite.lb.logger/src-nt/Transport.H index 974b8ac..1b6516d 100644 --- a/org.glite.lb.logger/src-nt/Transport.H +++ b/org.glite.lb.logger/src-nt/Transport.H @@ -2,25 +2,32 @@ #define _TRANSPORT_H #include "Connection.H" +#include "Message.H" - -class Transport: public ThreadPool::WorkDescription { +// Transport implements transport protocol +// - reads/writes messages using Connection interface +// - +class Transport { public: + + // class Factory { public: - virtual Transport *newTransport(Connection *conn) const = 0; + virtual Transport *newTransport() const = 0; + + virtual ~Factory() + {} }; - Transport(Connection *a_conn) - : conn(a_conn), - ThreadPool::WorkDescription(a_conn ? a_conn->getFD() : -1) + // + Transport() {} - virtual ~Transport() - { if(conn) delete conn; } - -protected: - Connection *conn; + virtual ~Transport(); + // + virtual int receive(Connection *conn, Message* &msg) = 0; + virtual int send(Connection *conn, Message* msg) = 0; + virtual void reset() = 0; }; #endif diff --git a/org.glite.lb.logger/src-nt/Transport.cpp b/org.glite.lb.logger/src-nt/Transport.cpp new file mode 100644 index 0000000..2544997 --- /dev/null +++ b/org.glite.lb.logger/src-nt/Transport.cpp @@ -0,0 +1,5 @@ +#include "Transport.H" + +Transport::~Transport() +{ +} diff --git a/org.glite.lb.logger/src-nt/main.cpp b/org.glite.lb.logger/src-nt/main.cpp index aa7ac45..b2fed11 100644 --- a/org.glite.lb.logger/src-nt/main.cpp +++ b/org.glite.lb.logger/src-nt/main.cpp @@ -3,10 +3,13 @@ #include "SocketInput.H" #include "PlainConnection.H" #include "HTTPTransport.H" +#include "EventManager.H" const int num_threads = 2; const char *sock_path = "/tmp/il_sock"; +EventManager theEventManager(); + int main(int argc, char *argv[]) { SocketInput *input; @@ -16,8 +19,10 @@ int main(int argc, char *argv[]) // create unix socket with plain IO and HTTP transport input = new SocketInput(sock_path, - &PlainConnection::theFactory, - &HTTPTransport::theFactory); + PlainConnection::Factory::instance(), + HTTPTransport::Factory::instance()); + // and add the socket to pool + ThreadPool::instance()->setWorkAccept(input); // start worker threads ThreadPool::instance()->startWorkers(num_threads); diff --git a/org.glite.lb.logger/src-nt/test/EventManagerTest.cpp b/org.glite.lb.logger/src-nt/test/EventManagerTest.cpp index bedaa03..2d55586 100644 --- a/org.glite.lb.logger/src-nt/test/EventManagerTest.cpp +++ b/org.glite.lb.logger/src-nt/test/EventManagerTest.cpp @@ -2,17 +2,44 @@ #include "EventManager.H" +class EventA : public Event { +}; + +class EventB : public Event { +}; + +class EventAA : public EventA { +}; + + class EventManagerTest: public CppUnit::TestFixture { CPPUNIT_TEST_SUITE(EventManagerTest); + CPPUNIT_TEST(handleEventTest); CPPUNIT_TEST_SUITE_END(); public: void setUp() { + handled = false; + manager.registerHandler(this); } void tearDown() { } + void handleEventTest() { + Event *e = new EventAA(); + manager.postEvent(e); + CPPUNIT_ASSERT(handled); + } + + int handleEvent(EventA* &e) { + handled = true; + return 0; + } + +private: + EventManager manager; + bool handled; }; CPPUNIT_TEST_SUITE_REGISTRATION( EventManagerTest ); diff --git a/org.glite.lb.logger/src-nt/test/ThreadPoolTest.cpp b/org.glite.lb.logger/src-nt/test/ThreadPoolTest.cpp index a4be75e..4a205ec 100644 --- a/org.glite.lb.logger/src-nt/test/ThreadPoolTest.cpp +++ b/org.glite.lb.logger/src-nt/test/ThreadPoolTest.cpp @@ -134,7 +134,7 @@ public: class ThreadPoolTest: public CppUnit::TestFixture { CPPUNIT_TEST_SUITE( ThreadPoolTest ); -// CPPUNIT_TEST( testWorkQueue ); + CPPUNIT_TEST( testWorkQueue ); CPPUNIT_TEST( testPoll ); CPPUNIT_TEST( testAccept ); CPPUNIT_TEST_SUITE_END(); diff --git a/org.glite.lb.logger/src/event_queue.c b/org.glite.lb.logger/src/event_queue.c index fcfcfbc..702e614 100644 --- a/org.glite.lb.logger/src/event_queue.c +++ b/org.glite.lb.logger/src/event_queue.c @@ -44,10 +44,10 @@ event_queue_create(char *server_name) if(p) *(p-1) = ':'; -#if defined(IL_NOTIFICATIONS) +#if defined(IL_NOTIFICATIONS) || defined(IL_WS) eq->dest_port = atoi(p); #else - eq->dest_port = p ? atoi(p)+1 : GLITE_WMSC_JOBID_DEFAULT_PORT+1; + eq->dest_port = p ? atoi(p)+1 : GLITE_JOBID_DEFAULT_PORT+1; #endif /* create all necessary locks */ if(pthread_rwlock_init(&eq->update_lock, NULL)) { @@ -291,10 +291,10 @@ event_queue_move_events(struct event_queue *eq_s, eq_s->tail = NULL; while(p) { if((*cmp_func)(p->msg, data)) { - il_log(LOG_DEBUG, " moving event at offset %d from %s:%d to %s:%d\n", - p->msg->offset, eq_s->dest_name, eq_s->dest_port, + il_log(LOG_DEBUG, " moving event at offset %d(%d) from %s:%d to %s:%d\n", + p->msg->offset, p->msg->generation, eq_s->dest_name, eq_s->dest_port, eq_d ? eq_d->dest_name : "trash", eq_d ? eq_d->dest_port : -1); - il_log(LOG_DEBUG, " current: %x, next: %x\n", p, p->prev); + /* il_log(LOG_DEBUG, " current: %x, next: %x\n", p, p->prev); */ /* remove the message from the source list */ *source_prev = p->prev; if(eq_d) { @@ -304,6 +304,9 @@ event_queue_move_events(struct event_queue *eq_s, dest_tail = &(p->prev); eq_d->tail = p; } else { + /* signal that the message was 'delivered' */ + event_store_commit(p->msg->es, p->msg->ev_len, queue_list_is_log(eq_s), + p->msg->generation); /* free the message */ server_msg_free(p->msg); free(p); diff --git a/org.glite.lb.logger/src/event_store.c b/org.glite.lb.logger/src/event_store.c index d919945..e584e89 100644 --- a/org.glite.lb.logger/src/event_store.c +++ b/org.glite.lb.logger/src/event_store.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -12,8 +13,8 @@ #include #endif #include +#include -#include "glite/lb/consumer.h" #include "glite/lb/events_parse.h" #include "interlogd.h" @@ -29,7 +30,9 @@ static char *file_prefix = NULL; struct event_store_list { struct event_store *es; - struct event_store_list *next; + struct event_store_list *next; // LL of event_store's + struct event_store_list *jobid_next; /* double LL of rotated stores - forward */ + struct event_store_list *jobid_prev; /* double LL of rotated stores - backward */ }; @@ -43,6 +46,20 @@ static pthread_rwlock_t store_list_lock = PTHREAD_RWLOCK_INITIALIZER; */ static char * +astrcat(const char *s1, const char *s2) +{ + char *s = malloc(strlen(s1) + strlen(s2) + 1); + if(s == NULL) + return NULL; + *s = 0; + strcat(s, s1); + strcat(s, s2); + return s; +} + + +static +char * jobid2eventfile(IL_EVENT_ID_T job_id) { char *buffer; @@ -52,9 +69,9 @@ jobid2eventfile(IL_EVENT_ID_T job_id) hash = IL_EVENT_GET_UNIQUE(job_id); asprintf(&buffer, "%s.%s", file_prefix, hash); free(hash); - } else + } else asprintf(&buffer, "%s.default", file_prefix); - + return(buffer); } @@ -70,12 +87,33 @@ jobid2controlfile(IL_EVENT_ID_T job_id) hash = IL_EVENT_GET_UNIQUE(job_id); snprintf(buffer, 256, "%s.%s.ctl", file_prefix, hash); free(hash); - } else + } else snprintf(buffer, 256, "%s.default.ctl", file_prefix); - + return(strdup(buffer)); } +static +long long +fname2index(const char *filename) +{ + char *p = rindex(filename, '.'); + char *s; + long long ret; + + if(p == NULL) + return 0; + + for(s = p+1; *s != 0; s++) { + if(*s < '0' || *s > '9') { + return 0; + } + } + + sscanf(p+1,"%lld",&ret); + return ret+1; +} + static char * @@ -93,7 +131,7 @@ read_event_string(FILE *file) len = 1024; while((c=fgetc(file)) != EOF) { - + /* we have to have free room for one byte */ /* if(len - (p - buffer) < 1) */ if(p - buffer >= len) { @@ -112,7 +150,7 @@ read_event_string(FILE *file) *p++ = 0; break; } else - *p++ = (char) c; + *p++ = (char) c; } if(c != EVENT_SEPARATOR) { @@ -139,7 +177,8 @@ event_store_free(struct event_store *es) if(es->event_file_name) free(es->event_file_name); if(es->control_file_name) free(es->control_file_name); pthread_rwlock_destroy(&es->use_lock); - pthread_rwlock_destroy(&es->update_lock); + pthread_rwlock_destroy(&es->commit_lock); + pthread_rwlock_destroy(&es->offset_lock); free(es); return(0); @@ -148,7 +187,7 @@ event_store_free(struct event_store *es) static struct event_store * -event_store_create(char *job_id_s) +event_store_create(char *job_id_s, const char *filename) { struct event_store *es; IL_EVENT_ID_T job_id; @@ -161,8 +200,6 @@ event_store_create(char *job_id_s) memset(es, 0, sizeof(*es)); - il_log(LOG_DEBUG, " creating event store for id %s\n", job_id_s); - job_id = NULL; if(strcmp(job_id_s, "default") && IL_EVENT_ID_PARSE(job_id_s, &job_id)) { set_error(IL_LBAPI, EDG_WLL_ERROR_PARSE_BROKEN_ULM, "event_store_create: error parsing id"); @@ -171,13 +208,19 @@ event_store_create(char *job_id_s) } es->job_id_s = strdup(job_id_s); - es->event_file_name = jobid2eventfile(job_id); - es->control_file_name = jobid2controlfile(job_id); + es->event_file_name = filename ? strdup(filename) : jobid2eventfile(job_id); + es->control_file_name = filename ? astrcat(filename, ".ctl") : jobid2controlfile(job_id); + es->rotate_index = filename ? fname2index(filename) : 0; IL_EVENT_ID_FREE(job_id); - if(pthread_rwlock_init(&es->update_lock, NULL)) + il_log(LOG_DEBUG, " creating event store for id %s, filename %s, rotate index %lld\n", + job_id_s, es->event_file_name, es->rotate_index); + + if(pthread_rwlock_init(&es->commit_lock, NULL)) + abort(); + if(pthread_rwlock_init(&es->offset_lock, NULL)) abort(); - if(pthread_rwlock_init(&es->use_lock, NULL)) + if(pthread_rwlock_init(&es->use_lock, NULL)) abort(); return(es); @@ -190,7 +233,7 @@ event_store_lock_ro(struct event_store *es) { assert(es != NULL); - if(pthread_rwlock_rdlock(&es->update_lock)) + if(pthread_rwlock_rdlock(&es->commit_lock)) abort(); return(0); @@ -203,7 +246,7 @@ event_store_lock(struct event_store *es) { assert(es != NULL); - if(pthread_rwlock_wrlock(&es->update_lock)) + if(pthread_rwlock_wrlock(&es->commit_lock)) abort(); return(0); @@ -216,7 +259,7 @@ event_store_unlock(struct event_store *es) { assert(es != NULL); - if(pthread_rwlock_unlock(&es->update_lock)) + if(pthread_rwlock_unlock(&es->commit_lock)) abort(); return(0); } @@ -262,8 +305,8 @@ event_store_write_ctl(struct event_store *es) return(-1); } - if(fprintf(ctl, "%s\n%ld\n%ld\n", - es->job_id_s, + if(fprintf(ctl, "%s\n%ld\n%ld\n", + es->job_id_s, es->last_committed_ls, es->last_committed_bs) < 0) { set_error(IL_SYS, errno, "event_store_write_ctl: error writing control record"); @@ -280,20 +323,23 @@ event_store_write_ctl(struct event_store *es) /* - * event_store_qurantine() - * - rename damaged event store file - * - essentially does the same actions as cleanup, but the event store + * event_store_qurantine() + * - rename damaged event store file + * - essentially does the same actions as cleanup, but the event store * does not have to be empty * returns 0 on success, -1 on error */ static int -event_store_quarantine(struct event_store *es) +event_store_quarantine(struct event_store *es) { + // TODO enable cleanup of quarantined event_store struct + // TODO handle file rotation + int num; char newname[MAXPATHLEN+1]; - /* find available qurantine name */ + /* find available quarantine name */ /* we give it at most 1024 tries */ for(num = 0; num < 1024; num++) { struct stat st; @@ -306,9 +352,9 @@ event_store_quarantine(struct event_store *es) break; } else { /* some other error with name, probably permanent */ - set_error(IL_SYS, errno, "event_store_qurantine: error looking for qurantine filename"); + set_error(IL_SYS, errno, "event_store_qurantine: error looking for quarantine filename"); return(-1); - + } } else { /* the filename is used already */ @@ -334,11 +380,109 @@ event_store_quarantine(struct event_store *es) es->last_committed_bs = 0; es->offset = 0; + /* increase cleanup count, this will invalidate all commits from previous generation */ + es->generation++; + return(0); } /* + * event_store_rotate_file() + * returns 0 on success, -1 on error + */ +static +int +event_store_rotate_file(struct event_store *es) +{ + int num; + time_t timestamp = time(NULL); + char newname[MAXPATHLEN+1]; + + /* do not rotate already rotated files */ + if(es->rotate_index > 0) + return 0; + + /* find available name */ + /* we give it at most 256 tries */ + for(num = 0; num < 256; num++) { + struct stat st; + + snprintf(newname, MAXPATHLEN, "%s.%d%03d", es->event_file_name, timestamp, num); + newname[MAXPATHLEN] = 0; + if(stat(newname, &st) < 0) { + if(errno == ENOENT) { + /* file not found */ + break; + } else { + /* some other error with name, probably permanent */ + set_error(IL_SYS, errno, "event_store_rotate_file: error looking for available filename"); + return(-1); + + } + } else { + /* the filename is used already */ + } + } + if(num >= 1024) { + /* new name not found */ + /* XXX - is there more suitable error? */ + set_error(IL_SYS, ENOSPC, "event_store_quarantine: exhausted number of retries looking for quarantine filename"); + return(-1); + } + + /* actually rename the file */ + il_log(LOG_DEBUG, " renaming too large event file from %s to %s\n", + es->event_file_name, newname); + if(rename(es->event_file_name, newname) < 0) { + set_error(IL_SYS, errno, "event_store_rotate_file: error renaming event file"); + return(-1); + } + + /* change names in event_store */ + es->event_file_name = strdup(newname); + es->control_file_name = astrcat(newname, ".ctl"); + es->rotate_index = 1000*timestamp + num + 1; + + return(0); +} + + +/* + * event_store_recover_jobid() + * - recover all event stores for given jobid + */ +static +int +event_store_recover_jobid(struct event_store *es) +{ + // es is locked for use already + struct event_store_list *p = es->le; + + do { + event_store_recover(p->es); + if(p != es->le ) { + event_store_release(p->es); + } + + if(pthread_rwlock_rdlock(&store_list_lock)) + abort(); + p = p->jobid_next; + if(p != es->le) { + if(pthread_rwlock_rdlock(&p->es->use_lock)) + abort(); + } + if(pthread_rwlock_unlock(&store_list_lock)) + abort(); + + + } while(p != es->le); + + return 0; +} + + +/* * event_store_recover() * - recover after restart or catch up when events missing in IPC * - if offset > 0, read everything behind it @@ -358,13 +502,13 @@ event_store_recover(struct event_store *es) struct stat stbuf; assert(es != NULL); - + #if defined(IL_NOTIFICATIONS) /* destination queue has to be found for each message separately */ #else - /* find bookkepping server queue */ + /* find bookkeeping server queue */ eq_b = queue_list_get(es->job_id_s); - if(eq_b == NULL) + if(eq_b == NULL) return(-1); #endif @@ -373,18 +517,23 @@ event_store_recover(struct event_store *es) eq_l = queue_list_get(NULL); #endif + /* lock the event_store and offset locks */ event_store_lock(es); + if(pthread_rwlock_wrlock(&es->offset_lock)) + abort(); il_log(LOG_DEBUG, " reading events from %s\n", es->event_file_name); /* open event file */ ef = fopen(es->event_file_name, "r"); if(ef == NULL) { - snprintf(err_msg, sizeof(err_msg), + snprintf(err_msg, sizeof(err_msg), "event_store_recover: error opening event file %s", es->event_file_name); set_error(IL_SYS, errno, err_msg); event_store_unlock(es); + if(pthread_rwlock_unlock(&es->offset_lock)) + abort(); return(-1); } @@ -395,11 +544,13 @@ event_store_recover(struct event_store *es) efl.l_start = 0; efl.l_len = 0; if(fcntl(fd, F_SETLKW, &efl) < 0) { - snprintf(err_msg, sizeof(err_msg), + snprintf(err_msg, sizeof(err_msg), "event_store_recover: error locking event file %s", es->event_file_name); set_error(IL_SYS, errno, err_msg); event_store_unlock(es); + if(pthread_rwlock_unlock(&es->offset_lock)) + abort(); fclose(ef); return(-1); } @@ -410,16 +561,25 @@ event_store_recover(struct event_store *es) il_log(LOG_ERR, " could not stat event file %s: %s\n", es->event_file_name, strerror(errno)); fclose(ef); event_store_unlock(es); + if(pthread_rwlock_unlock(&es->offset_lock)) + abort(); return -1; } else { if((es->offset == stbuf.st_size) && (es->last_modified == stbuf.st_mtime)) { il_log(LOG_DEBUG, " event file not modified since last visit, skipping\n"); fclose(ef); event_store_unlock(es); + if(pthread_rwlock_unlock(&es->offset_lock)) + abort(); return(0); } } + /* check the file size, rename it if it is bigger than max_store_size */ + if(max_store_size > 0 && stbuf.st_size > max_store_size) { + event_store_rotate_file(es); + } + while(1) { /* try, try, try */ /* get the position in file to be sought */ @@ -427,7 +587,7 @@ event_store_recover(struct event_store *es) last = es->offset; else { #if !defined(IL_NOTIFICATIONS) - if(eq_b == eq_l) + if(eq_b == eq_l) last = es->last_committed_ls; else #endif @@ -454,13 +614,15 @@ event_store_recover(struct event_store *es) set_error(IL_SYS, errno, "event_store_recover: error setting position for read"); event_store_unlock(es); fclose(ef); + if(pthread_rwlock_unlock(&es->offset_lock)) + abort(); return(-1); } /* the last enqueued event MUST end with EVENT_SEPARATOR, even if the offset points at EOF */ if((c=fgetc(ef)) != EVENT_SEPARATOR) { /* Houston, we have got a problem */ - il_log(LOG_WARNING, + il_log(LOG_WARNING, " file position %ld does not point at the beginning of event string, backing off!\n", last); /* now, where were we? */ @@ -484,20 +646,36 @@ event_store_recover(struct event_store *es) set_error(IL_SYS, errno, "event_store_recover: error setting position for read"); event_store_unlock(es); fclose(ef); + if(pthread_rwlock_unlock(&es->offset_lock)) + abort(); return(-1); } break; } } + /* now we have: + * - event file opened at position 'last' + * - offset and last_committed_* potentially reset to zero + */ + + /* release lock on commits, offset remains locked; + * other threads are allowed to send/remove events, but not insert + */ + event_store_unlock(es); + /* enqueue all remaining events */ ret = 1; msg = NULL; while((event_s=read_event_string(ef)) != NULL) { - + long last_ls, last_bs; + /* last holds the starting position of event_s in file */ il_log(LOG_DEBUG, " reading event at %ld\n", last); + last_ls = es->last_committed_ls; + last_bs = es->last_committed_bs; + /* break from now on means there was some error */ ret = -1; @@ -515,34 +693,51 @@ event_store_recover(struct event_store *es) /* actually do not bother if quarantine succeeded or not - we could not do more */ event_store_quarantine(es); fclose(ef); - event_store_unlock(es); + if(pthread_rwlock_unlock(&es->offset_lock)) + abort(); return(-1); } msg->es = es; + msg->generation = es->generation; /* first enqueue to the LS */ - if(!bs_only && (last >= es->last_committed_ls)) { - - il_log(LOG_DEBUG, " queueing event at %ld to logging server\n", last); + if(!bs_only && (last >= last_ls)) { + + il_log(LOG_DEBUG, " queuing event at %ld to logging server\n", last); #if !defined(IL_NOTIFICATIONS) if(enqueue_msg(eq_l, msg) < 0) break; #endif - } + } #ifdef IL_NOTIFICATIONS eq_b = queue_list_get(msg->dest); + /* if the message does not have destination itself, use destination cached for notification id */ + if(eq_b == NULL) { + eq_b = notifid_map_get_dest(msg->job_id_s); + if(eq_b == NULL) { + /* message has no destination and no destination is known for notification id, + * commit it immediately + */ + il_log(LOG_DEBUG, " message has no known destination, will not be sent\n"); + event_store_commit(es, msg->ev_len, 0, msg->generation); + /* if the expiration changed, set new one now, message will be discarded soon */ + if(msg->expires != notifid_map_get_expiration(msg->job_id_s)) { + notifid_map_set_expiration(msg->job_id_s, msg->expires); + } + } + } #endif /* now enqueue to the BS, if neccessary */ - if((eq_b != eq_l) && - (last >= es->last_committed_bs)) { - + if((eq_b != eq_l) && + (last >= last_bs)) { + il_log(LOG_DEBUG, " queueing event at %ld to bookkeeping server\n", last); - + if(enqueue_msg(eq_b, msg) < 0) - break; + break; } server_msg_free(msg); msg = NULL; @@ -555,18 +750,19 @@ event_store_recover(struct event_store *es) } /* while */ - /* due to this little assignment we had to lock the event_store for writing */ es->offset = last; es->last_modified = stbuf.st_mtime; il_log(LOG_DEBUG, " event store offset set to %ld\n", last); - if(msg) + if(msg) server_msg_free(msg); fclose(ef); il_log(LOG_DEBUG, " finished reading events with %d\n", ret); - event_store_unlock(es); + if(pthread_rwlock_unlock(&es->offset_lock)) + abort(); + return(ret); } @@ -590,13 +786,13 @@ event_store_sync(struct event_store *es, long offset) * event will be read from file, socket now serves only to notify * about possible event file change. */ - ret = event_store_recover(es); + ret = event_store_recover_jobid(es); ret = (ret < 0) ? ret : 0; return(ret); #if 0 event_store_lock_ro(es); - if(es->offset == offset) + if(es->offset == offset) /* we are up to date */ ret = 1; else if(es->offset > offset) @@ -606,9 +802,9 @@ event_store_sync(struct event_store *es, long offset) /* es->offset < offset, i.e. we have missed some events */ event_store_unlock(es); ret = event_store_recover(es); - /* XXX possible room for intervention by another thread - is there - * any other thread messing with us? - * 1) After recover() es->offset is set at the end of file. + /* XXX possible room for intervention by another thread - is there + * any other thread messing with us? + * 1) After recover() es->offset is set at the end of file. * 2) es->offset is set only by recover() and next(). * 3) Additional recover can not do much harm. * 4) And next() is only called by the same thread as sync(). @@ -617,7 +813,7 @@ event_store_sync(struct event_store *es, long offset) event_store_lock_ro(es); if(ret < 0) ret = -1; - else + else if(es->offset <= offset) { /* Apparently there is something wrong - we are receiving an event * which is beyond the end of file. Someone must have removed the file @@ -642,16 +838,16 @@ int event_store_next(struct event_store *es, long offset, int len) { assert(es != NULL); - + /* Commented out due to the fact that offset as received on socket * has little to do with real event file at the moment. es->offset * handling is left solely to the event_store_recover(). */ - + #if 0 event_store_lock(es); /* Whoa, be careful now. The es->offset points right after the last enqueued event, - * but it may not be the offset of the event WE have just enqueued, because:! + * but it may not be the offset of the event WE have just enqueued, because:! * 1) someone could have removed the event file behind our back * 2) the file could have been recover()ed and more events read * In either case the offset should not be moved. @@ -666,15 +862,22 @@ event_store_next(struct event_store *es, long offset, int len) } -/* +/* * event_store_commit() * */ int -event_store_commit(struct event_store *es, int len, int ls) +event_store_commit(struct event_store *es, int len, int ls, int generation) { assert(es != NULL); + /* do not move counters if event store with this message was cleaned up + * (this can happen only when moving to quarantine) + */ + /* XXX - assume int access is atomic */ + if(generation != es->generation) + return 0; + event_store_lock(es); if(ls) @@ -704,7 +907,7 @@ event_store_commit(struct event_store *es, int len, int ls) * Q: How do we know that we can safely remove the files? * A: When all events from file have been committed both by LS and BS. */ -static +static int event_store_clean(struct event_store *es) { @@ -717,7 +920,7 @@ event_store_clean(struct event_store *es) /* prevent sender threads from updating */ event_store_lock(es); - + il_log(LOG_DEBUG, " trying to cleanup event store %s\n", es->job_id_s); il_log(LOG_DEBUG, " bytes sent to logging server: %d\n", es->last_committed_ls); il_log(LOG_DEBUG, " bytes sent to bookkeeping server: %d\n", es->last_committed_bs); @@ -730,7 +933,12 @@ event_store_clean(struct event_store *es) return(0); } - /* the file can only be removed when all the events were succesfully sent + if(fd = pthread_rwlock_wrlock(&es->offset_lock)) { + fprintf(stderr, "Fatal locking error: %s\n", strerror(fd)); + abort(); + } + + /* the file can only be removed when all the events were succesfully sent (ie. committed both by LS and BS */ /* That also implies that the event queues are 'empty' at the moment. */ ef = fopen(es->event_file_name, "r+"); @@ -738,12 +946,14 @@ event_store_clean(struct event_store *es) /* if we can not open the event store, it is an error and the struct should be removed */ /* XXX - is it true? */ event_store_unlock(es); + if(pthread_rwlock_unlock(&es->offset_lock)) + abort(); il_log(LOG_ERR, " event_store_clean: error opening event file: %s\n", strerror(errno)); return(1); } - + fd = fileno(ef); - + /* prevent local-logger from writing into event file */ efl.l_type = F_WRLCK; efl.l_whence = SEEK_SET; @@ -753,6 +963,8 @@ event_store_clean(struct event_store *es) il_log(LOG_DEBUG, " could not lock event file, cleanup aborted\n"); fclose(ef); event_store_unlock(es); + if(pthread_rwlock_unlock(&es->offset_lock)) + abort(); if(errno != EACCES && errno != EAGAIN) { set_error(IL_SYS, errno, "event_store_clean: error locking event file"); @@ -760,22 +972,26 @@ event_store_clean(struct event_store *es) } return(0); } - + /* now the file should not contain partially written event, so it is safe to get offset behind last event by seeking the end of file */ if(fseek(ef, 0, SEEK_END) < 0) { set_error(IL_SYS, errno, "event_store_clean: error seeking the end of file"); event_store_unlock(es); + if(pthread_rwlock_unlock(&es->offset_lock)) + abort(); fclose(ef); return(-1); } - + last = ftell(ef); il_log(LOG_DEBUG, " total bytes in file: %d\n", last); if(es->last_committed_ls < last) { fclose(ef); event_store_unlock(es); + if(pthread_rwlock_unlock(&es->offset_lock)) + abort(); il_log(LOG_DEBUG, " events still waiting in queue, cleanup aborted\n"); return(0); } else if( es->last_committed_ls > last) { @@ -784,23 +1000,31 @@ event_store_clean(struct event_store *es) some undelivered events referring to that event store */ fclose(ef); event_store_unlock(es); + if(pthread_rwlock_unlock(&es->offset_lock)) + abort(); return(0); } - + /* now we are sure that all events were sent and the event queues are empty */ il_log(LOG_INFO, " removing event file %s\n", es->event_file_name); - + /* remove the event file */ unlink(es->event_file_name); unlink(es->control_file_name); - + /* clear the counters */ es->last_committed_ls = 0; es->last_committed_bs = 0; es->offset = 0; + /* increasing the generation count is rather pointless here, because there + are no messages waiting in the queue that would be invalidated */ + /* es->generation++ */ + /* unlock the event_store even if it is going to be removed */ event_store_unlock(es); + if(pthread_rwlock_unlock(&es->offset_lock)) + abort(); /* close the event file (that unlocks it as well) */ fclose(ef); @@ -815,9 +1039,9 @@ event_store_clean(struct event_store *es) * -------------------------------- */ struct event_store * -event_store_find(char *job_id_s) +event_store_find(char *job_id_s, const char *filename) { - struct event_store_list *q, *p; + struct event_store_list *q, *p, *d; struct event_store *es; if(pthread_rwlock_wrlock(&store_list_lock)) { @@ -825,27 +1049,31 @@ event_store_find(char *job_id_s) } es = NULL; - - q = NULL; + + d = NULL; p = store_list; - + while(p) { if(strcmp(p->es->job_id_s, job_id_s) == 0) { - es = p->es; - if(pthread_rwlock_rdlock(&es->use_lock)) - abort(); - if(pthread_rwlock_unlock(&store_list_lock)) - abort(); - return(es); + es = p->es; + d = p; + // if filename was given, compare it as well + if((filename == NULL && p->es->rotate_index == 0) || + (filename != NULL && strcmp(p->es->event_file_name, filename) == 0)) { + if(pthread_rwlock_rdlock(&es->use_lock)) + abort(); + if(pthread_rwlock_unlock(&store_list_lock)) + abort(); + return(es); + } } - - q = p; p = p->next; } - es = event_store_create(job_id_s); + // event store for given jobid and filename was not found, create one + es = event_store_create(job_id_s, filename); if(es == NULL) { - if(pthread_rwlock_unlock(&store_list_lock)) + if(pthread_rwlock_unlock(&store_list_lock)) abort(); return(NULL); } @@ -853,20 +1081,66 @@ event_store_find(char *job_id_s) p = malloc(sizeof(*p)); if(p == NULL) { set_error(IL_NOMEM, ENOMEM, "event_store_find: no room for new event store"); - if(pthread_rwlock_unlock(&store_list_lock)) + if(pthread_rwlock_unlock(&store_list_lock)) abort(); return(NULL); } - - p->next = store_list; - store_list = p; - p->es = es; + p->jobid_next = p; + p->jobid_prev = p; + es->le = p; + + if(filename != NULL && d != NULL) { + // there is another event store for this jobid; + // d points to the last event store for this jobid in LL + // find proper place to insert new event store + if(p->es->rotate_index == 0) { + // insert behind d in LL + p->next = d->next; + d->next = p; + // insert behind d in jobid LL + p->jobid_next = d->jobid_next; + p->jobid_prev = d; + d->jobid_next->jobid_prev = p; + d->jobid_next = p; + } else { + struct event_store_list *r; + q = NULL; + for(r = d->jobid_next; r != d->jobid_next; r = r->jobid_next) { + if(p->es->rotate_index < r->es->rotate_index) + break; + if(r->es->rotate_index > 0) + q = r; + } + // q has the last lesser non-zero index than p + if(q == NULL) { + p->next = store_list; + store_list = p; + // insert behind d + p->jobid_next = d->jobid_next; + p->jobid_prev = d; + d->jobid_next->jobid_prev = p; + d->jobid_next = p; + } else { + p->next = q->next; + q->next = p; + // insert behind q + p->jobid_next = q->jobid_next; + p->jobid_prev = q; + q->jobid_next->jobid_prev = p; + q->jobid_next = p; + } + } + } else { + // insert at the beginning + p->next = store_list; + store_list = p; + } if(pthread_rwlock_rdlock(&es->use_lock)) abort(); - if(pthread_rwlock_unlock(&store_list_lock)) + if(pthread_rwlock_unlock(&store_list_lock)) abort(); return(es); @@ -880,7 +1154,7 @@ event_store_release(struct event_store *es) if(pthread_rwlock_unlock(&es->use_lock)) abort(); - il_log(LOG_DEBUG, " released lock on %s\n", es->job_id_s); + il_log(LOG_DEBUG, " released lock on %s (%s)\n", es->job_id_s, es->event_file_name); return(0); } @@ -898,11 +1172,10 @@ event_store_from_file(char *filename) edg_wll_Context context; char *dest_name = NULL; - edg_wll_InitContext(&context); #endif - + il_log(LOG_INFO, " attaching to event file: %s\n", filename); - + if(strstr(filename, "quarantine") != NULL) { il_log(LOG_INFO, " file name belongs to quarantine, not touching that.\n"); return(0); @@ -915,17 +1188,20 @@ event_store_from_file(char *filename) } event_s = read_event_string(event_file); fclose(event_file); - if(event_s == NULL) + if(event_s == NULL) return(0); - + #if defined(IL_NOTIFICATIONS) - if((ret=edg_wll_ParseNotifEvent(context, event_s, ¬if_event))) { + edg_wll_InitContext(&context); + ret=edg_wll_ParseNotifEvent(context, event_s, ¬if_event); + edg_wll_FreeContext(context); + if(ret) { set_error(IL_LBAPI, ret, "event_store_from_file: could not parse event"); ret = -1; goto out; } if(notif_event->notification.notifId == NULL) { - set_error(IL_LBAPI, EDG_WLL_ERROR_PARSE_BROKEN_ULM, + set_error(IL_LBAPI, EDG_WLL_ERROR_PARSE_BROKEN_ULM, "event_store_from_file: parse error - no notif id"); ret = -1; goto out; @@ -935,11 +1211,13 @@ event_store_from_file(char *filename) ret = -1; goto out; } - if(notif_event->notification.dest_host && + /* XXX: what was that good for? + if(notif_event->notification.dest_host && (strlen(notif_event->notification.dest_host) > 0)) { asprintf(&dest_name, "%s:%d", notif_event->notification.dest_host, notif_event->notification.dest_port); } - + */ + #else job_id_s = edg_wll_GetJobId(event_s); #endif @@ -949,9 +1227,9 @@ event_store_from_file(char *filename) ret = 0; goto out; } - - es=event_store_find(job_id_s); - + + es = event_store_find(job_id_s, filename); + if(es == NULL) { ret = -1; goto out; @@ -961,9 +1239,9 @@ event_store_from_file(char *filename) (es->last_committed_bs == 0) && (es->offset == 0)) { ret = event_store_read_ctl(es); - } else + } else ret = 0; - + event_store_release(es); out: @@ -973,7 +1251,7 @@ out: free(notif_event); } #endif - if(event_s) free(event_s); + if(event_s) free(event_s); if(job_id_s) free(job_id_s); return(ret); } @@ -1015,12 +1293,12 @@ event_store_init(char *prefix) set_error(IL_SYS, errno, "event_store_init: error opening event directory"); return(-1); } - + while((entry=readdir(event_dir))) { char *s; /* skip all files that do not match prefix */ - if(strncmp(entry->d_name, p, len) != 0) + if(strncmp(entry->d_name, p, len) != 0) continue; /* skip all control files */ @@ -1058,12 +1336,12 @@ event_store_init(char *prefix) set_error(IL_SYS, errno, "event_store_init: error opening event directory"); return(-1); } - + while((entry=readdir(event_dir))) { char *s; /* skip all files that do not match prefix */ - if(strncmp(entry->d_name, p, len) != 0) + if(strncmp(entry->d_name, p, len) != 0) continue; /* find all control files */ @@ -1094,9 +1372,9 @@ event_store_init(char *prefix) /* could not stat file, remove ctl */ strcat(ef, s); il_log(LOG_DEBUG, " removing stale file %s\n", ef); - if(unlink(ef)) + if(unlink(ef)) il_log(LOG_ERR, " could not remove file %s: %s\n", ef, strerror(errno)); - + } free(ef); @@ -1116,7 +1394,7 @@ event_store_recover_all() struct event_store_list *sl; - if(pthread_rwlock_rdlock(&store_list_lock)) + if(pthread_rwlock_rdlock(&store_list_lock)) abort(); /* recover all event stores */ @@ -1131,15 +1409,15 @@ event_store_recover_all() } sl = sl->next; } - - if(pthread_rwlock_unlock(&store_list_lock)) + + if(pthread_rwlock_unlock(&store_list_lock)) abort(); return(0); } -#if 0 +#if 0 int event_store_remove(struct event_store *es) { @@ -1151,7 +1429,7 @@ event_store_remove(struct event_store *es) case 0: il_log(LOG_DEBUG, " event store not removed, still used\n"); return(0); - + case 1: if(pthread_rwlock_wrlock(&store_list_lock) < 0) { set_error(IL_SYS, errno, " event_store_remove: error locking event store list"); @@ -1195,7 +1473,7 @@ event_store_cleanup() /* try to remove event files */ - if(pthread_rwlock_wrlock(&store_list_lock)) + if(pthread_rwlock_wrlock(&store_list_lock)) abort(); sl = store_list; @@ -1205,11 +1483,11 @@ event_store_cleanup() int ret; slnext = sl->next; - + /* one event store at time */ ret = pthread_rwlock_trywrlock(&sl->es->use_lock); if(ret == EBUSY) { - il_log(LOG_DEBUG, " event_store %s is in use by another thread\n", + il_log(LOG_DEBUG, " event_store %s is in use by another thread\n", sl->es->job_id_s); sl = slnext; continue; @@ -1217,33 +1495,38 @@ event_store_cleanup() abort(); switch(event_store_clean(sl->es)) { - + case 1: - /* remove this event store */ + /* remove this event store from LL */ (*prev) = slnext; + /* remove this event store from jobid's LL */ + if(sl->jobid_next != sl) { + sl->jobid_prev->jobid_next = sl->jobid_next; + sl->jobid_next->jobid_prev = sl->jobid_prev; + } event_store_free(sl->es); free(sl); break; - + case -1: - il_log(LOG_ERR, " error removing event store %s (file %s):\n %s\n", + il_log(LOG_ERR, " error removing event store %s (file %s):\n %s\n", sl->es->job_id_s, sl->es->event_file_name, error_get_msg()); /* event_store_release(sl->es); */ clear_error(); /* go on to the next */ - + default: event_store_release(sl->es); prev = &(sl->next); break; } - + sl = slnext; } - - if(pthread_rwlock_unlock(&store_list_lock)) + + if(pthread_rwlock_unlock(&store_list_lock)) abort(); - + return(0); } diff --git a/org.glite.lb.logger/src/event_store_http.c b/org.glite.lb.logger/src/event_store_http.c new file mode 100644 index 0000000..ebd5523 --- /dev/null +++ b/org.glite.lb.logger/src/event_store_http.c @@ -0,0 +1,1113 @@ +#ident "$Header$" + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include + +#include "glite/lb/events_parse.h" + +#include "interlogd.h" + +#ifdef __GNUC__ +#define UNUSED_VAR __attribute__((unused)) +#else +#define UNUSED_VAR +#endif + +static char *file_prefix = NULL; + + +struct event_store_list { + struct event_store *es; + struct event_store_list *next; +}; + + +static struct event_store_list *store_list; +static pthread_rwlock_t store_list_lock = PTHREAD_RWLOCK_INITIALIZER; + + +/* ---------------- + * helper functions + * ---------------- + */ +static +char * +jobid2eventfile(const char *job_id_s) +{ + char *buffer; + + if(job_id_s) { + asprintf(&buffer, "%s.%s", file_prefix, job_id_s); + } else + asprintf(&buffer, "%s.default", file_prefix); + + return(buffer); +} + + +static +char * +jobid2controlfile(char *job_id_s) +{ + char *buffer; + char *hash; + + if(job_id_s) { + asprintf(&buffer, "%s.%s.ctl", file_prefix, job_id_s); + } else + asprintf(&buffer, "%s.default.ctl", file_prefix); + + return(buffer); +} + +static +int +file_reader(void *user_data, char *buffer, const int len) +{ + size_t ret = 0; + + if(len > 0) { + ret = fread(buffer, 1, len, (FILE*)user_data); + if(ret == 0 && ferror((FILE*)user_data)) { + return -1; + } + } + return ret; +} + + +static +int +read_event_string(FILE *file, il_http_message_t *msg) +{ + int len, ret; + int fd = fileno(file); + long start; + + /* remember the start position */ + start = ftell(file); + ret = receive_http(file, file_reader, msg); + if(ret < 0) return ret; + /* seek at the end of message in case the reader read ahead */ + len = fseek(file, start + msg->len, SEEK_SET); + len = fgetc(file); + if(len != '\n') { + il_log(LOG_ERR, "error reading event from file, missing terminator character at %d, found %c(%d))\n", + start+msg->len, len, len); + if(msg->data) { free(msg->data); msg->data = NULL; } + if(msg->host) { free(msg->host); msg->host = NULL; } + return EINVAL; + } + return ret; +} + + + +/* ------------------------------ + * event_store 'member' functions + * ------------------------------ + */ +static +int +event_store_free(struct event_store *es) +{ + assert(es != NULL); + + if(es->job_id_s) free(es->job_id_s); + if(es->event_file_name) free(es->event_file_name); + if(es->control_file_name) free(es->control_file_name); + pthread_rwlock_destroy(&es->use_lock); + pthread_rwlock_destroy(&es->commit_lock); + free(es); + + return(0); +} + + +static +struct event_store * +event_store_create(char *job_id_s) +{ + struct event_store *es; + + es = malloc(sizeof(*es)); + if(es == NULL) { + set_error(IL_NOMEM, ENOMEM, "event_store_create: error allocating room for structure"); + return(NULL); + } + + memset(es, 0, sizeof(*es)); + + il_log(LOG_DEBUG, " creating event store for id %s\n", job_id_s); + + es->job_id_s = strdup(job_id_s); + es->event_file_name = jobid2eventfile(job_id_s); + es->control_file_name = jobid2controlfile(job_id_s); + + if(pthread_rwlock_init(&es->commit_lock, NULL)) + abort(); + if(pthread_rwlock_init(&es->use_lock, NULL)) + abort(); + + return(es); +} + + +static +int +event_store_lock_ro(struct event_store *es) +{ + assert(es != NULL); + + if(pthread_rwlock_rdlock(&es->commit_lock)) + abort(); + + return(0); +} + + +static +int +event_store_lock(struct event_store *es) +{ + assert(es != NULL); + + if(pthread_rwlock_wrlock(&es->commit_lock)) + abort(); + + return(0); +} + + +static +int +event_store_unlock(struct event_store *es) +{ + assert(es != NULL); + + if(pthread_rwlock_unlock(&es->commit_lock)) + abort(); + return(0); +} + + +static +int +event_store_read_ctl(struct event_store *es) +{ + FILE *ctl_file; + + assert(es != NULL); + + event_store_lock(es); + if((ctl_file = fopen(es->control_file_name, "r")) == NULL) { + /* no control file, new event file */ + es->last_committed_ls = 0; + es->last_committed_bs = 0; + } else { + /* read last seen and last committed counts */ + fscanf(ctl_file, "%*s\n%ld\n%ld\n", + &es->last_committed_ls, + &es->last_committed_bs); + fclose(ctl_file); + } + event_store_unlock(es); + + return(0); +} + + +static +int +event_store_write_ctl(struct event_store *es) +{ + FILE *ctl; + + assert(es != NULL); + + ctl = fopen(es->control_file_name, "w"); + if(ctl == NULL) { + set_error(IL_SYS, errno, "event_store_write_ctl: error opening control file"); + return(-1); + } + + if(fprintf(ctl, "%s\n%ld\n%ld\n", + es->job_id_s, + es->last_committed_ls, + es->last_committed_bs) < 0) { + set_error(IL_SYS, errno, "event_store_write_ctl: error writing control record"); + return(-1); + } + + if(fclose(ctl) < 0) { + set_error(IL_SYS, errno, "event_store_write_ctl: error closing control file"); + return(-1); + } + + return(0); +} + + +/* + * event_store_qurantine() + * - rename damaged event store file + * - essentially does the same actions as cleanup, but the event store + * does not have to be empty + * returns 0 on success, -1 on error + */ +static +int +event_store_quarantine(struct event_store *es) +{ + int num; + char newname[MAXPATHLEN+1]; + + /* find available qurantine name */ + /* we give it at most 1024 tries */ + for(num = 0; num < 1024; num++) { + struct stat st; + + snprintf(newname, MAXPATHLEN, "%s.quarantine.%d", es->event_file_name, num); + newname[MAXPATHLEN] = 0; + if(stat(newname, &st) < 0) { + if(errno == ENOENT) { + /* file not found */ + break; + } else { + /* some other error with name, probably permanent */ + set_error(IL_SYS, errno, "event_store_qurantine: error looking for qurantine filename"); + return(-1); + + } + } else { + /* the filename is used already */ + } + } + if(num >= 1024) { + /* new name not found */ + /* XXX - is there more suitable error? */ + set_error(IL_SYS, ENOSPC, "event_store_quarantine: exhausted number of retries looking for quarantine filename"); + return(-1); + } + + /* actually rename the file */ + il_log(LOG_DEBUG, " renaming damaged event file from %s to %s\n", + es->event_file_name, newname); + if(rename(es->event_file_name, newname) < 0) { + set_error(IL_SYS, errno, "event_store_quarantine: error renaming event file"); + return(-1); + } + + /* clear the counters */ + es->last_committed_ls = 0; + es->last_committed_bs = 0; + es->offset = 0; + + return(0); +} + + +/* + * event_store_recover() + * - recover after restart or catch up when events missing in IPC + * - if offset > 0, read everything behind it + * - if offset == 0, read everything behind min(last_committed_bs, last_committed_es) + */ +int +event_store_recover(struct event_store *es) +{ + struct event_queue *eq_l = NULL, *eq_b = NULL; + struct server_msg *msg; + il_http_message_t hmsg; + char *event_s; + int fd, ret; + long last; + FILE *ef; + struct flock efl; + char err_msg[128]; + struct stat stbuf; + + assert(es != NULL); + +#if defined(IL_NOTIFICATIONS) + /* destination queue has to be found for each message separately */ +#else + /* find bookkepping server queue */ + eq_b = queue_list_get(es->job_id_s); + if(eq_b == NULL) + return(-1); +#endif + +#if !defined(IL_NOTIFICATIONS) + /* get log server queue */ + eq_l = queue_list_get(NULL); +#endif + + event_store_lock(es); + + il_log(LOG_DEBUG, " reading events from %s\n", es->event_file_name); + + /* open event file */ + ef = fopen(es->event_file_name, "r"); + if(ef == NULL) { + snprintf(err_msg, sizeof(err_msg), + "event_store_recover: error opening event file %s", + es->event_file_name); + set_error(IL_SYS, errno, err_msg); + event_store_unlock(es); + return(-1); + } + + /* lock the file for reading (we should not read while dglogd is writing) */ + fd = fileno(ef); + efl.l_type = F_RDLCK; + efl.l_whence = SEEK_SET; + efl.l_start = 0; + efl.l_len = 0; + if(fcntl(fd, F_SETLKW, &efl) < 0) { + snprintf(err_msg, sizeof(err_msg), + "event_store_recover: error locking event file %s", + es->event_file_name); + set_error(IL_SYS, errno, err_msg); + event_store_unlock(es); + fclose(ef); + return(-1); + } + + /* check the file modification time and size to avoid unnecessary operations */ + memset(&stbuf, 0, sizeof(stbuf)); + if(fstat(fd, &stbuf) < 0) { + il_log(LOG_ERR, " could not stat event file %s: %s\n", es->event_file_name, strerror(errno)); + fclose(ef); + event_store_unlock(es); + return -1; + } else { + if((es->offset == stbuf.st_size) && (es->last_modified == stbuf.st_mtime)) { + il_log(LOG_DEBUG, " event file not modified since last visit, skipping\n"); + fclose(ef); + event_store_unlock(es); + return(0); + } + } + + while(1) { /* try, try, try */ + + /* get the position in file to be sought */ + if(es->offset) + last = es->offset; + else { + last = es->last_committed_bs; + } + + il_log(LOG_DEBUG, " setting starting file position to %ld\n", last); + il_log(LOG_DEBUG, " bytes sent to destination: %d\n", es->last_committed_bs); + + if(last > 0) { + int c; + + /* skip all committed or already enqueued events */ + /* be careful - check, if the offset really points to the + beginning of event string */ + if(fseek(ef, last - 1, SEEK_SET) < 0) { + set_error(IL_SYS, errno, "event_store_recover: error setting position for read"); + event_store_unlock(es); + fclose(ef); + return(-1); + } + /* the last enqueued event MUST end with \n */ + if((c=fgetc(ef)) != '\n') { + /* Houston, we have got a problem */ + il_log(LOG_WARNING, + " file position %ld does not point at the beginning of event string, backing off!\n", + last); + /* now, where were we? */ + if(es->offset) { + /* next try will be with + last_commited_bs */ + es->offset = 0; + } else { + /* this is really weird... back off completely */ + es->last_committed_ls = es->last_committed_bs = 0; + } + } else { + /* OK, break out of the loop */ + break; + } + } else { + /* this breaks out of the loop, we are starting at + * the beginning of file + */ + if(fseek(ef, 0, SEEK_SET) < 0) { + set_error(IL_SYS, errno, "event_store_recover: error setting position for read"); + event_store_unlock(es); + fclose(ef); + return(-1); + } + break; + } + } + + /* enqueue all remaining events */ + ret = 1; + msg = NULL; + while(read_event_string(ef, &hmsg) >= 0) { + + /* last holds the starting position of event_s in file */ + il_log(LOG_DEBUG, " reading event at %ld\n", last); + + /* break from now on means there was some error */ + ret = -1; + + /* create message for server */ + msg = server_msg_create((il_octet_string_t*)&hmsg, last); + if(msg == NULL) { + il_log(LOG_ALERT, " event file corrupted! I will try to move it to quarantine (ie. rename it).\n"); + /* actually do not bother if quarantine succeeded or not - we could not do more */ + event_store_quarantine(es); + fclose(ef); + event_store_unlock(es); + return(-1); + } + msg->es = es; + + /* first enqueue to the LS */ + if(!bs_only && (last >= es->last_committed_ls)) { + + il_log(LOG_DEBUG, " queueing event at %ld to server %s\n", last, eq_l->dest_name); + +#if !defined(IL_NOTIFICATIONS) + if(enqueue_msg(eq_l, msg) < 0) + break; +#endif + } + +#ifdef IL_NOTIFICATIONS + eq_b = queue_list_get(msg->dest); +#endif + + /* now enqueue to the BS, if neccessary */ + if((eq_b != eq_l) && + (last >= es->last_committed_bs)) { + + il_log(LOG_DEBUG, " queueing event at %ld to server %s\n", last, eq_b->dest_name); + + if(enqueue_msg(eq_b, msg) < 0) + break; + } + server_msg_free(msg); + msg = NULL; + + /* now last is also the offset behind the last successfully queued event */ + last = ftell(ef); + + /* ret == 0 means EOF or incomplete event found */ + ret = 0; + + } /* while */ + + /* due to this little assignment we had to lock the event_store for writing */ + es->offset = last; + es->last_modified = stbuf.st_mtime; + il_log(LOG_DEBUG, " event store offset set to %ld\n", last); + + if(msg) + server_msg_free(msg); + + fclose(ef); + il_log(LOG_DEBUG, " finished reading events with %d\n", ret); + + event_store_unlock(es); + return(ret); +} + + +/* + * event_store_sync() + * - check the position of event and fill holes from file + * - return 1 if the event is new, + * 0 if it was seen before, + * -1 if there was an error + */ +int +event_store_sync(struct event_store *es, long offset) +{ + int ret; + + assert(es != NULL); + + /* all events are actually read from file, the event on socket + * is ignored and serves just to notify us about file change + */ + ret = event_store_recover(es); + ret = (ret < 0) ? ret : 0; + return(ret); +} + + +int +event_store_next(struct event_store *es, long offset, int len) +{ + assert(es != NULL); + + /* offsets are good only to detect losses (differences between socket and file), + which is not possible now */ + return 0; +} + + +/* + * event_store_commit() + * + */ +int +event_store_commit(struct event_store *es, int len, int ls) +{ + assert(es != NULL); + + event_store_lock(es); + + if(ls) + es->last_committed_ls += len; + else { + es->last_committed_bs += len; + if (bs_only) es->last_committed_ls += len; + } + + if(event_store_write_ctl(es) < 0) { + event_store_unlock(es); + return(-1); + } + + event_store_unlock(es); + + + return(0); +} + + +/* + * event_store_clean() + * - remove the event files (event and ctl), if they are not needed anymore + * - returns 0 if event_store is in use, 1 if it was removed and -1 on error + * + * Q: How do we know that we can safely remove the files? + * A: When all events from file have been committed both by LS and BS. + */ +static +int +event_store_clean(struct event_store *es) +{ + long last; + int fd; + FILE *ef; + struct flock efl; + + assert(es != NULL); + + /* prevent sender threads from updating */ + event_store_lock(es); + + il_log(LOG_DEBUG, " trying to cleanup event store %s\n", es->job_id_s); + il_log(LOG_DEBUG, " bytes sent to logging server: %d\n", es->last_committed_ls); + il_log(LOG_DEBUG, " bytes sent to bookkeeping server: %d\n", es->last_committed_bs); + + /* preliminary check to avoid opening event file */ + /* if the positions differ, some events still have to be sent */ + if(es->last_committed_ls != es->last_committed_bs) { + event_store_unlock(es); + il_log(LOG_DEBUG, " not all events sent, cleanup aborted\n"); + return(0); + } + + /* the file can only be removed when all the events were succesfully sent + (ie. committed both by LS and BS */ + /* That also implies that the event queues are 'empty' at the moment. */ + ef = fopen(es->event_file_name, "r+"); + if(ef == NULL) { + /* if we can not open the event store, it is an error and the struct should be removed */ + /* XXX - is it true? */ + event_store_unlock(es); + il_log(LOG_ERR, " event_store_clean: error opening event file: %s\n", strerror(errno)); + return(1); + } + + fd = fileno(ef); + + /* prevent local-logger from writing into event file */ + efl.l_type = F_WRLCK; + efl.l_whence = SEEK_SET; + efl.l_start = 0; + efl.l_len = 0; + if(fcntl(fd, F_SETLK, &efl) < 0) { + il_log(LOG_DEBUG, " could not lock event file, cleanup aborted\n"); + fclose(ef); + event_store_unlock(es); + if(errno != EACCES && + errno != EAGAIN) { + set_error(IL_SYS, errno, "event_store_clean: error locking event file"); + return(-1); + } + return(0); + } + + /* now the file should not contain partially written event, so it is safe + to get offset behind last event by seeking the end of file */ + if(fseek(ef, 0, SEEK_END) < 0) { + set_error(IL_SYS, errno, "event_store_clean: error seeking the end of file"); + event_store_unlock(es); + fclose(ef); + return(-1); + } + + last = ftell(ef); + il_log(LOG_DEBUG, " total bytes in file: %d\n", last); + + if(es->last_committed_ls < last) { + fclose(ef); + event_store_unlock(es); + il_log(LOG_DEBUG, " events still waiting in queue, cleanup aborted\n"); + return(0); + } else if( es->last_committed_ls > last) { + il_log(LOG_WARNING, " warning: event file seems to shrink!\n"); + /* XXX - in that case we can not continue because there may be + some undelivered events referring to that event store */ + fclose(ef); + event_store_unlock(es); + return(0); + } + + /* now we are sure that all events were sent and the event queues are empty */ + il_log(LOG_INFO, " removing event file %s\n", es->event_file_name); + + /* remove the event file */ + unlink(es->event_file_name); + unlink(es->control_file_name); + + /* clear the counters */ + es->last_committed_ls = 0; + es->last_committed_bs = 0; + es->offset = 0; + + /* unlock the event_store even if it is going to be removed */ + event_store_unlock(es); + + /* close the event file (that unlocks it as well) */ + fclose(ef); + + /* indicate that it is safe to remove this event_store */ + return(1); +} + + +/* -------------------------------- + * event store management functions + * -------------------------------- + */ +struct event_store * +event_store_find(char *job_id_s) +{ + struct event_store_list *q, *p; + struct event_store *es; + + if(pthread_rwlock_wrlock(&store_list_lock)) { + abort(); + } + + es = NULL; + + q = NULL; + p = store_list; + + while(p) { + if(strcmp(p->es->job_id_s, job_id_s) == 0) { + es = p->es; + if(pthread_rwlock_rdlock(&es->use_lock)) + abort(); + if(pthread_rwlock_unlock(&store_list_lock)) + abort(); + return(es); + } + + q = p; + p = p->next; + } + + es = event_store_create(job_id_s); + if(es == NULL) { + if(pthread_rwlock_unlock(&store_list_lock)) + abort(); + return(NULL); + } + + p = malloc(sizeof(*p)); + if(p == NULL) { + set_error(IL_NOMEM, ENOMEM, "event_store_find: no room for new event store"); + if(pthread_rwlock_unlock(&store_list_lock)) + abort(); + return(NULL); + } + + p->next = store_list; + store_list = p; + + p->es = es; + + if(pthread_rwlock_rdlock(&es->use_lock)) + abort(); + + if(pthread_rwlock_unlock(&store_list_lock)) + abort(); + + return(es); +} + + +int +event_store_release(struct event_store *es) +{ + assert(es != NULL); + + if(pthread_rwlock_unlock(&es->use_lock)) + abort(); + il_log(LOG_DEBUG, " released lock on %s\n", es->job_id_s); + return(0); +} + + +event_store_from_file(char *filename) +{ + struct event_store *es; + FILE *event_file; + char *job_id_s = NULL, *p; + il_http_message_t hmsg; + int ret; + + il_log(LOG_INFO, " attaching to event file: %s\n", filename); + + if(strstr(filename, "quarantine") != NULL) { + il_log(LOG_INFO, " file name belongs to quarantine, not touching that.\n"); + return(0); + } + + event_file = fopen(filename, "r"); + if(event_file == NULL) { + set_error(IL_SYS, errno, "event_store_from_file: error opening event file"); + return(-1); + } + ret = read_event_string(event_file, &hmsg); + fclose(event_file); + if(ret < 0) + return(0); + + /* get id aka dest */ + job_id_s = hmsg.host; + + il_log(LOG_DEBUG, " message dest: '%s'\n", job_id_s); + if(job_id_s == NULL) { + il_log(LOG_NOTICE, " skipping file, could not parse event\n"); + ret = 0; + goto out; + } + + es=event_store_find(job_id_s); + + if(es == NULL) { + ret = -1; + goto out; + } + + if((es->last_committed_ls == 0) && + (es->last_committed_bs == 0) && + (es->offset == 0)) { + ret = event_store_read_ctl(es); + } else + ret = 0; + + event_store_release(es); + +out: + if(hmsg.data) free(hmsg.data); + if(job_id_s) free(job_id_s); + return(ret); +} + + +int +event_store_init(char *prefix) +{ + if(file_prefix == NULL) { + file_prefix = strdup(prefix); + store_list = NULL; + } + + /* read directory and get a list of event files */ + { + int len; + + char *p, *dir; + DIR *event_dir; + struct dirent *entry; + + + /* get directory name */ + p = strrchr(file_prefix, '/'); + if(p == NULL) { + dir = strdup("."); + p = ""; + len = 0; + } else { + *p = '\0'; + dir = strdup(file_prefix); + *p++ = '/'; + len = strlen(p); + } + + event_dir = opendir(dir); + if(event_dir == NULL) { + free(dir); + set_error(IL_SYS, errno, "event_store_init: error opening event directory"); + return(-1); + } + + while((entry=readdir(event_dir))) { + char *s; + + /* skip all files that do not match prefix */ + if(strncmp(entry->d_name, p, len) != 0) + continue; + + /* skip all control files */ + if((s=strstr(entry->d_name, ".ctl")) != NULL && + s[4] == '\0') + continue; + + s = malloc(strlen(dir) + strlen(entry->d_name) + 2); + if(s == NULL) { + free(dir); + set_error(IL_NOMEM, ENOMEM, "event_store_init: no room for file name"); + return(-1); + } + + *s = '\0'; + strcat(s, dir); + strcat(s, "/"); + strcat(s, entry->d_name); + + if(event_store_from_file(s) < 0) { + free(dir); + free(s); + closedir(event_dir); + return(-1); + } + + free(s); + } + closedir(event_dir); + + /* one more pass - this time remove stale .ctl files */ + event_dir = opendir(dir); + if(event_dir == NULL) { + free(dir); + set_error(IL_SYS, errno, "event_store_init: error opening event directory"); + return(-1); + } + + while((entry=readdir(event_dir))) { + char *s; + + /* skip all files that do not match prefix */ + if(strncmp(entry->d_name, p, len) != 0) + continue; + + /* find all control files */ + if((s=strstr(entry->d_name, ".ctl")) != NULL && + s[4] == '\0') { + char *ef; + struct stat st; + + /* is there corresponding event file? */ + ef = malloc(strlen(dir) + strlen(entry->d_name) + 2); + if(ef == NULL) { + free(dir); + set_error(IL_NOMEM, ENOMEM, "event_store_init: no room for event file name"); + return(-1); + } + + s[0] = 0; + *ef = '\0'; + strcat(ef, dir); + strcat(ef, "/"); + strcat(ef, entry->d_name); + s[0] = '.'; + + if(stat(ef, &st) == 0) { + /* something is there */ + /* XXX - it could be something else than event file, but do not bother now */ + } else { + /* could not stat file, remove ctl */ + strcat(ef, s); + il_log(LOG_DEBUG, " removing stale file %s\n", ef); + if(unlink(ef)) + il_log(LOG_ERR, " could not remove file %s: %s\n", ef, strerror(errno)); + + } + free(ef); + + } + } + closedir(event_dir); + free(dir); + } + + return(0); +} + + +int +event_store_recover_all() +{ + struct event_store_list *sl; + + + if(pthread_rwlock_rdlock(&store_list_lock)) + abort(); + + /* recover all event stores */ + sl = store_list; + while(sl != NULL) { + + /* recover this event store */ + /* no need to lock use_lock in event_store, the store_list_lock is in place */ + if(event_store_recover(sl->es) < 0) { + il_log(LOG_ERR, " error recovering event store %s:\n %s\n", sl->es->event_file_name, error_get_msg()); + clear_error(); + } + sl = sl->next; + } + + if(pthread_rwlock_unlock(&store_list_lock)) + abort(); + + return(0); +} + + +#if 0 +int +event_store_remove(struct event_store *es) +{ + struct event_store_list *p, **q; + + assert(es != NULL); + + switch(event_store_clean(es)) { + case 0: + il_log(LOG_DEBUG, " event store not removed, still used\n"); + return(0); + + case 1: + if(pthread_rwlock_wrlock(&store_list_lock) < 0) { + set_error(IL_SYS, errno, " event_store_remove: error locking event store list"); + return(-1); + } + + p = store_list; + q = &store_list; + + while(p) { + if(p->es == es) { + (*q) = p->next; + event_store_free(es); + free(p); + break; + } + q = &(p->next); + p = p->next; + } + + if(pthread_rwlock_unlock(&store_list_lock) < 0) { + set_error(IL_SYS, errno, " event_store_remove: error unlocking event store list"); + return(-1); + } + return(1); + + default: + return(-1); + } + /* not reached */ + return(0); +} +#endif + +int +event_store_cleanup() +{ + struct event_store_list *sl; + struct event_store_list *slnext; + struct event_store_list **prev; + + /* try to remove event files */ + + if(pthread_rwlock_wrlock(&store_list_lock)) + abort(); + + sl = store_list; + prev = &store_list; + + while(sl != NULL) { + int ret; + + slnext = sl->next; + + /* one event store at time */ + ret = pthread_rwlock_trywrlock(&sl->es->use_lock); + if(ret == EBUSY) { + il_log(LOG_DEBUG, " event_store %s is in use by another thread\n", + sl->es->job_id_s); + sl = slnext; + continue; + } else if (ret < 0) + abort(); + + switch(event_store_clean(sl->es)) { + + case 1: + /* remove this event store */ + (*prev) = slnext; + event_store_free(sl->es); + free(sl); + break; + + case -1: + il_log(LOG_ERR, " error removing event store %s (file %s):\n %s\n", + sl->es->job_id_s, sl->es->event_file_name, error_get_msg()); + /* event_store_release(sl->es); */ + clear_error(); + /* go on to the next */ + + default: + event_store_release(sl->es); + prev = &(sl->next); + break; + } + + sl = slnext; + } + + if(pthread_rwlock_unlock(&store_list_lock)) + abort(); + + return(0); +} + diff --git a/org.glite.lb.logger/src/http.c b/org.glite.lb.logger/src/http.c new file mode 100644 index 0000000..c9fb89b --- /dev/null +++ b/org.glite.lb.logger/src/http.c @@ -0,0 +1,197 @@ +#ident "$Header$" + +#include +#include + +#include "interlogd.h" + + +int +parse_request(const char *s, il_http_message_t *msg) +{ + if(!strncasecmp(s, "HTTP", 4)) { + msg->msg_type = IL_HTTP_REPLY; + } else if(!strncasecmp(s, "POST", 4)) { + msg->msg_type = IL_HTTP_POST; + } else if(!strncasecmp(s, "GET", 3)) { + msg->msg_type = IL_HTTP_GET; + } else { + msg->msg_type = IL_HTTP_OTHER; + } + if(msg->msg_type == IL_HTTP_REPLY) { + char *p = strchr(s, ' '); + + if(!p) goto parse_end; + p++; + msg->reply_code=atoi(p); + p = strchr(p, ' '); + if(!p) goto parse_end; + p++; + msg->reply_string = strdup(p); + + parse_end: + ; + } +} + + +int +parse_header(const char *s, il_http_message_t *msg) +{ + if(!strncasecmp(s, "Content-Length:", 15)) { + msg->content_length = atoi(s + 15); + } else if(!strncasecmp(s, "Host:", 5)) { + const char *p = s + 4; + while(*++p == ' '); /* skip spaces */ + msg->host = strdup(p); + } + return(0); +} + + +#define DEFAULT_CHUNK_SIZE 1024 + +// read what is available and parse what can be parsed +// returns the result of read operation of the underlying connection, +// ie. the number of bytes read or error code +int +receive_http(void *user_data, int (*reader)(void *, char *, const int), il_http_message_t *msg) +{ + static enum { NONE, IN_REQUEST, IN_HEADERS, IN_BODY } state = NONE; + int len, alen, clen, i, buffer_free, min_buffer_free = DEFAULT_CHUNK_SIZE; + char *buffer, *p, *s, *cr; + + memset(msg, 0, sizeof(*msg)); + // msg->data = NULL; + // msg->len = 0; + state = IN_REQUEST; + alen = 0; + buffer = NULL; + buffer_free = 0; + p = NULL; + s = NULL; + + do { + /* p - first empty position in buffer + alen - size of allocated buffer + len - number of bytes received in last read + s - points behind last scanned CRLF or at buffer start + buffer_free = alen - (p - buffer) + */ + + /* prepare at least chunk_size bytes for next data */ + if(buffer_free < min_buffer_free) { + char *n; + + alen += min_buffer_free; + n = realloc(buffer, alen); + if(n == NULL) { + free(buffer); + set_error(IL_NOMEM, ENOMEM, "read_event: no room for event"); + return(-1); + } + buffer_free += min_buffer_free; + p = n + (p - buffer); + s = n + (s - buffer); + buffer = n; + } + + if(buffer_free > 0) { + len = (*reader)(user_data, p, buffer_free); + if(len < 0) { + // error + free(buffer); + // set_error(IL_SYS, errno, "receive_http: error reading data"); + return -1; + } else if(len == 0) { + // EOF + free(buffer); + set_error(IL_PROTO, errno, "receive_http: error reading data - premature EOF"); + return -1; + } + buffer_free -= len; + p+= len; + } + + + switch(state) { + + // parse buffer, look for CRLFs + // s - start scan position + // p - start of current token + // cr - current CRLF position + + case IN_REQUEST: + if((s < p - 1) && + (cr = (char*)memchr(s, '\r', p - s - 1)) && + (cr[1] == '\n')) { + *cr = 0; + parse_request(s, msg); + *cr = '\r'; + // change state + state = IN_HEADERS; + // start new tokens (cr < p - 1 -> s < p + 1 <-> s <= p) + s = cr + 2; + } else { + break; + } + + case IN_HEADERS: + while((state != IN_BODY) && + (s < p - 1) && + (cr = (char*)memchr(s, '\r', p - s - 1)) && + (cr[1] == '\n')) { + if(s == cr) { /* do not consider request starting with CRLF */ + // found CRLFCRLF + state = IN_BODY; + } else { + *cr = 0; + parse_header(s, msg); + *cr = '\r'; + } + // next scan starts after CRLF + s = cr + 2; + } + if(state == IN_BODY) { + // we found body + // content-length should be set at the moment + if(msg->content_length > 0) { + int need_free = msg->content_length - (p - s); + char *n; + + alen += need_free - buffer_free + 1; + n = realloc(buffer, alen); + if(n == NULL) { + free(buffer); + set_error(IL_NOMEM, ENOMEM, "read_event: no room for event"); + return(-1); + } + buffer_free = need_free; + min_buffer_free = 0; + p = n + (p - buffer); + s = n + (s - buffer); + buffer = n; + } else { + // report error + free(buffer); + set_error(IL_PROTO, EINVAL, "receive_http: error reading data - no content length specified\n"); + return -1; + } + } + break; + + case IN_BODY: + if(buffer_free == 0) { + // finished reading + *p = 0; + state = NONE; + } + break; + } + } while(state != NONE); + + msg->data = buffer; + msg->len = p - buffer; + + return 0; +} diff --git a/org.glite.lb.logger/src/il_error.c b/org.glite.lb.logger/src/il_error.c index 1fe9bb9..630bd75 100644 --- a/org.glite.lb.logger/src/il_error.c +++ b/org.glite.lb.logger/src/il_error.c @@ -13,9 +13,6 @@ extern void _start (void), etext (void); #endif -/* XXX DK: */ -#include // SSL header file - #include "glite/security/glite_gss.h" #include "il_error.h" @@ -31,6 +28,8 @@ static void error_key_delete(void *err) { + if(((struct error_inf*)err)->msg) + free(((struct error_inf*)err)->msg); free(err); } @@ -110,11 +109,6 @@ set_error(int code, long minor, char *msg) snprintf(err->msg, IL_ERR_MSG_LEN, "%s: %s", msg, hstrerror(err->code_min)); break; - /* XXX DK: je tahle hodnota k necemu potreba? */ - case IL_AUTH: - snprintf(err->msg, IL_ERR_MSG_LEN, "%s: %s", msg, ERR_error_string(err->code_min, NULL)); - break; - case IL_DGGSS: switch(err->code_min) { diff --git a/org.glite.lb.logger/src/il_error.h b/org.glite.lb.logger/src/il_error.h index 120e7ed..09c593e 100644 --- a/org.glite.lb.logger/src/il_error.h +++ b/org.glite.lb.logger/src/il_error.h @@ -9,7 +9,6 @@ enum err_code_maj { /* minor = */ IL_OK, /* 0 */ IL_SYS, /* errno */ IL_NOMEM, /* ENOMEM */ - IL_AUTH, /* 0 (SSL error) */ IL_PROTO, /* LB_* */ IL_LBAPI, /* dgLBErrCode */ IL_DGGSS, /* EDG_WLL_GSS_* */ diff --git a/org.glite.lb.logger/src/il_master.c b/org.glite.lb.logger/src/il_master.c index e394e5a..47808e8 100644 --- a/org.glite.lb.logger/src/il_master.c +++ b/org.glite.lb.logger/src/il_master.c @@ -180,7 +180,7 @@ handle_cmd(il_octet_string_t *event, long offset) /* catchup with all neccessary event files */ if(job_id_s) { - struct event_store *es = event_store_find(job_id_s); + struct event_store *es = event_store_find(job_id_s, NULL); if(es == NULL) { goto cmd_error; @@ -356,17 +356,19 @@ handle_msg(il_octet_string_t *event, long offset) /* sync event store with IPC (if neccessary) * This MUST be called before inserting event into output queue! */ - if((es = event_store_find(msg->job_id_s)) == NULL) + if((es = event_store_find(msg->job_id_s, NULL)) == NULL) return(-1); msg->es = es; - + #ifdef LB_PERF if(nosync) ret = 1; else #endif ret = event_store_sync(es, offset); + /* no longer informative: il_log(LOG_DEBUG, " syncing event store at %d with event at %d, result %d\n", es->offset, offset, ret); + */ if(ret < 0) { il_log(LOG_ERR, " handle_msg: error syncing event store:\n %s\n", error_get_msg()); /* XXX should error during event store recovery cause us to drop the message? */ @@ -431,7 +433,7 @@ loop() { /* receive events */ while(1) { - il_octet_string_t msg; + il_octet_string_t *msg; long offset; int ret; @@ -456,17 +458,17 @@ loop() } #ifdef PERF_EMPTY - glite_wll_perftest_consumeEventString(msg.data); - free(msg.data); + glite_wll_perftest_consumeEventString(msg->data); + free(msg->data); continue; #endif #ifdef INTERLOGD_HANDLE_CMD - ret = handle_cmd(&msg, offset); + ret = handle_cmd(msg, offset); if(ret == 0) #endif - ret = handle_msg(&msg, offset); - free(msg.data); + ret = handle_msg(msg, offset); + if(msg->data) free(msg->data); if(ret < 0) switch (error_get_maj()) { case IL_SYS: diff --git a/org.glite.lb.logger/src/input_queue_socket.c b/org.glite.lb.logger/src/input_queue_socket.c index bf3c7a2..05a0034 100644 --- a/org.glite.lb.logger/src/input_queue_socket.c +++ b/org.glite.lb.logger/src/input_queue_socket.c @@ -243,14 +243,19 @@ read_event(int sock, long *offset, il_octet_string_t *msg) */ #ifdef PERF_EVENTS_INLINE int -input_queue_get(il_octet_string *buffer, long *offset, int timeout) +input_queue_get(il_octet_string **buffer, long *offset, int timeout) { static long o = 0; int len; char *jobid; + static il_octet_string_t my_buffer; - len = glite_wll_perftest_produceEventString(&buffer->data, &jobid); - buffer->len = len; + assert(buffer != NULL); + + *buffer = &my_buffer; + + len = glite_wll_perftest_produceEventString(&my_buffer.data, &jobid); + my_buffer.len = len; if(len) { o += len; *offset = o; @@ -261,14 +266,17 @@ input_queue_get(il_octet_string *buffer, long *offset, int timeout) } #else int -input_queue_get(il_octet_string_t *buffer, long *offset, int timeout) +input_queue_get(il_octet_string_t **buffer, long *offset, int timeout) { fd_set fds; struct timeval tv; int msg_len; + static il_octet_string_t my_buffer; assert(buffer != NULL); + *buffer = &my_buffer; + FD_ZERO(&fds); FD_SET(sock, &fds); @@ -300,16 +308,16 @@ input_queue_get(il_octet_string_t *buffer, long *offset, int timeout) return(-1); } - read_event(accepted, offset, buffer); + read_event(accepted, offset, &my_buffer); close(accepted); - if(buffer->data == NULL) { + if(my_buffer.data == NULL) { if(error_get_maj() != IL_OK) return(-1); else return(0); } - return(buffer->len); + return(my_buffer.len); } #endif diff --git a/org.glite.lb.logger/src/input_queue_socket_http.c b/org.glite.lb.logger/src/input_queue_socket_http.c new file mode 100644 index 0000000..939c45f --- /dev/null +++ b/org.glite.lb.logger/src/input_queue_socket_http.c @@ -0,0 +1,167 @@ +#ident "$Header$" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "interlogd.h" + +static const int SOCK_QUEUE_MAX = 50; +extern char *socket_path; +extern char *file_prefix; + +static int sock; +static int accepted; + +static +int plain_reader(void *user_data, char *buffer, const int len) +{ + return (recv(*(int*)user_data, buffer, len, MSG_NOSIGNAL)); +} + + +int +input_queue_attach() +{ + struct sockaddr_un saddr; + + if((sock=socket(PF_UNIX, SOCK_STREAM, 0)) < 0) { + set_error(IL_SYS, errno, "input_queue_attach: error creating socket"); + return(-1); + } + + memset(&saddr, 0, sizeof(saddr)); + saddr.sun_family = AF_UNIX; + strcpy(saddr.sun_path, socket_path); + + /* test for the presence of the socket and another instance + of interlogger listening */ + if(connect(sock, (struct sockaddr *)&saddr, sizeof(saddr.sun_path)) < 0) { + if(errno == ECONNREFUSED) { + /* socket present, but no one at the other end; remove it */ + il_log(LOG_WARNING, " removing stale input socket %s\n", socket_path); + unlink(socket_path); + } + /* ignore other errors for now */ + } else { + /* connection was successful, so bail out - there is + another interlogger running */ + set_error(IL_SYS, EADDRINUSE, "input_queue_attach: another instance of interlogger is running"); + return(-1); + } + + if(bind(sock, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) { + set_error(IL_SYS, errno, "input_queue_attach: error binding socket"); + return(-1); + } + + if (listen(sock, SOCK_QUEUE_MAX)) { + set_error(IL_SYS, errno, "input_queue_attach: error listening on socket"); + return -1; + } + + return(0); +} + + +void input_queue_detach() +{ + if (sock >= 0) + close(sock); + unlink(socket_path); +} + + + +/* + * Returns: -1 on error, 0 if no message available, message length otherwise + * + */ +#ifdef PERF_EVENTS_INLINE +int +input_queue_get(il_octet_string_t **buffer, long *offset, int timeout) +{ + static long o = 0; + int len; + char *jobid; + static il_octet_string_t my_buffer; + + assert(buffer != NULL); + + *buffer = &my_buffer; + + len = glite_wll_perftest_produceEventString(&my_buffer.data, &jobid); + my_buffer.len = len; + if(len) { + o += len; + *offset = o; + } else if (len == 0) { + sleep(timeout); + } + return(len); +} +#else +int +input_queue_get(il_octet_string_t **buffer, long *offset, int timeout) +{ + fd_set fds; + struct timeval tv; + int msg_len; + static il_http_message_t msg; + + assert(buffer != NULL); + + *buffer = (il_octet_string_t *)&msg; + + FD_ZERO(&fds); + FD_SET(sock, &fds); + + tv.tv_sec = timeout; + tv.tv_usec = 0; + + msg_len = select(sock + 1, &fds, NULL, NULL, timeout >= 0 ? &tv : NULL); + switch(msg_len) { + + case 0: /* timeout */ + return(0); + + case -1: /* error */ + switch(errno) { + case EINTR: + il_log(LOG_DEBUG, " interrupted while waiting for event!\n"); + return(0); + + default: + set_error(IL_SYS, errno, "input_queue_get: error waiting for event"); + return(-1); + } + default: + break; + } + + if((accepted=accept(sock, NULL, NULL)) < 0) { + set_error(IL_SYS, errno, "input_queue_get: error accepting connection"); + return(-1); + } + + msg_len = receive_http(&accepted, plain_reader, &msg); + + if(msg_len < 0) { + close(accepted); + if(error_get_maj() != IL_OK) + return -1; + else + return 0; + } + + close(accepted); + *offset = -1; + return(msg.len); +} +#endif + diff --git a/org.glite.lb.logger/src/interlogd.c b/org.glite.lb.logger/src/interlogd.c index a66d7f4..4b8b405 100644 --- a/org.glite.lb.logger/src/interlogd.c +++ b/org.glite.lb.logger/src/interlogd.c @@ -4,15 +4,14 @@ interlogger - collect events from local-logger and send them to logging and bookkeeping servers */ +#include #include #include #include #include - -#include +#include #include "interlogd.h" -#include "glite/lb/consumer.h" #include "glite/lb/log_proto.h" #include "glite/security/glite_gss.h" #ifdef LB_PERF @@ -35,7 +34,7 @@ int killflg = 0; int TIMEOUT = DEFAULT_TIMEOUT; -gss_cred_id_t cred_handle = GSS_C_NO_CREDENTIAL; +cred_handle_t *cred_handle = NULL; pthread_mutex_t cred_handle_lock = PTHREAD_MUTEX_INITIALIZER; time_t key_mtime = 0, cert_mtime = 0; @@ -58,6 +57,7 @@ static void usage (int status) " -l, --log-server specify address of log server\n" " -s, --socket non-default path of local socket\n" " -L, --lazy [] be lazy when closing connections to servers (default, timeout==0 means turn lazy off)\n" + " -p, --parallel [] use parallel streams to the same server\n" #ifdef LB_PERF " -n, --nosend PERFTEST: consume events instead of sending\n" " -S, --nosync PERFTEST: do not check logd files for lost events\n" @@ -80,6 +80,8 @@ char *file_prefix = DEFAULT_PREFIX; int bs_only = 0; int lazy_close = 1; int default_close_timeout; +size_t max_store_size; +int parallel = 0; #ifdef LB_PERF int nosend = 0, norecover=0, nosync=0, noparse=0; char *event_source = NULL; @@ -106,6 +108,8 @@ static struct option const long_options[] = {"log-server", required_argument, 0, 'l'}, {"socket", required_argument, 0, 's'}, {"lazy", optional_argument, 0, 'L'}, + {"max-store", required_argument, 0, 'M'}, + {"parallel", optional_argument, 0, 'p'}, #ifdef LB_PERF {"nosend", no_argument, 0, 'n'}, {"nosync", no_argument, 0, 'S'}, @@ -139,8 +143,9 @@ decode_switches (int argc, char **argv) "k:" /* key */ "C:" /* CA dir */ "b" /* only bookeeping */ - "l:" /* log server */ + "l:" /* log server */ "d" /* debug */ + "p" /* parallel */ #ifdef LB_PERF "n" /* nosend */ "S" /* nosync */ @@ -150,9 +155,10 @@ decode_switches (int argc, char **argv) "e:" /* event file */ "j:" /* num jobs */ #endif -#endif +#endif "L::" /* lazy */ - "s:", /* socket */ + "s:" /* socket */ + "M:" /* max-store */, long_options, (int *) 0)) != EOF) { switch (c) @@ -202,7 +208,7 @@ decode_switches (int argc, char **argv) case 'L': lazy_close = 1; - if(optarg) + if(optarg) default_close_timeout = atoi(optarg); if(default_close_timeout == 0) { default_close_timeout = TIMEOUT; @@ -212,6 +218,17 @@ decode_switches (int argc, char **argv) default_close_timeout = TIMEOUT; break; + case 'M': + max_store_size = atoi(optarg); + break; + + case 'p': + if(optarg) + parallel = atoi(optarg); + else + parallel = 4; + break; + #ifdef LB_PERF case 'n': nosend = 1; @@ -254,6 +271,7 @@ void handle_signal(int num) { killflg++; } + int main (int argc, char **argv) { @@ -321,11 +339,18 @@ main (int argc, char **argv) il_log(LOG_DEBUG, " using lazy mode when closing connections, timeout %d\n", default_close_timeout); + /* get credentials */ if (CAcert_dir) setenv("X509_CERT_DIR", CAcert_dir, 1); - edg_wll_gss_watch_creds(cert_file,&cert_mtime); - ret = edg_wll_gss_acquire_cred_gsi(cert_file, key_file, &cred_handle, NULL, &gss_stat); + cred_handle = malloc(sizeof(*cred_handle)); + if(cred_handle == NULL) { + il_log(LOG_CRIT, "Failed to allocate structure for credentials.\n"); + exit(EXIT_FAILURE); + } + cred_handle->creds = NULL; + cred_handle->counter = 0; + ret = edg_wll_gss_acquire_cred_gsi(cert_file, key_file, &cred_handle->creds, NULL, &gss_stat); if (ret) { char *gss_err = NULL; char *str; @@ -333,18 +358,13 @@ main (int argc, char **argv) if (ret == EDG_WLL_GSS_ERROR_GSS) edg_wll_gss_get_error(&gss_stat, "edg_wll_gss_acquire_cred_gsi()", &gss_err); asprintf(&str, "Failed to load GSI credential: %s\n", - (gss_err) ? gss_err : "edg_wll_gss_acquire_cred_gsi() failed"); + (gss_err) ? gss_err : "edg_wll_gss_acquire_cred_gsi() failed"); il_log(LOG_CRIT, str); free(str); if (gss_err) free(gss_err); exit(EXIT_FAILURE); } - - if (globus_module_activate(GLOBUS_COMMON_MODULE) != GLOBUS_SUCCESS) { - il_log(LOG_CRIT, "Failed to initialize Globus common module\n"); - exit(EXIT_FAILURE); - } #ifndef PERF_EMPTY /* find all unsent events waiting in files */ @@ -356,7 +376,7 @@ main (int argc, char **argv) } } else #endif - { + { pthread_t rid; il_log(LOG_INFO, "Starting recovery thread...\n"); diff --git a/org.glite.lb.logger/src/interlogd.h b/org.glite.lb.logger/src/interlogd.h index cf4d634..3b038ee 100644 --- a/org.glite.lb.logger/src/interlogd.h +++ b/org.glite.lb.logger/src/interlogd.h @@ -63,8 +63,14 @@ extern int TIMEOUT; #define INPUT_TIMEOUT (60) #define EXIT_TIMEOUT (1*60) -extern gss_cred_id_t cred_handle; +typedef struct cred_handle { + gss_cred_id_t creds; + int counter; +} cred_handle_t; +extern cred_handle_t *cred_handle; + extern pthread_mutex_t cred_handle_lock; +extern pthread_key_t cred_handle_key; extern char *cert_file; extern char *key_file; extern char *CAcert_dir; @@ -72,6 +78,8 @@ extern int bs_only; extern int killflg; extern int lazy_close; extern int default_close_timeout; +extern size_t max_store_size; +extern int parallel; #ifdef LB_PERF extern int nosend, nosync, norecover, noparse; #ifdef PERF_EVENTS_INLINE @@ -84,7 +92,30 @@ extern char *event_source; extern pthread_mutex_t flush_lock; extern pthread_cond_t flush_cond; #endif - + +typedef struct { + /* il_octet_string_t */ + int len; + char *data; + /* http message specific */ + enum { IL_HTTP_OTHER, + IL_HTTP_GET, + IL_HTTP_POST, + IL_HTTP_REPLY + } msg_type; + int reply_code; + char *reply_string; + size_t content_length; + char *host; +} il_http_message_t; + +/* this struct can be passed instead of il_octet_string as parameter */ +typedef union { + il_octet_string_t bin_msg; + il_http_message_t http_msg; +} il_message_t; + + struct event_store { char *event_file_name; /* file with events from local logger */ char *control_file_name; /* file with control information */ @@ -93,8 +124,11 @@ struct event_store { long last_committed_ls; /* -"- LS */ long offset; /* expected file position of next event */ time_t last_modified; /* time of the last file modification */ - int recovering; /* flag for recovery mode */ - pthread_rwlock_t update_lock; /* lock to prevent simultaneous updates */ + int generation; /* cleanup counter, scopes the offset */ + long long rotate_index; /* rotation counter */ + struct event_store_list *le; /* points back to the list */ + pthread_rwlock_t commit_lock; /* lock to prevent simultaneous updates to last_committed_* */ + pthread_rwlock_t offset_lock; /* lock to prevent simultaneous updates offset */ pthread_rwlock_t use_lock; /* lock to prevent struct deallocation */ #if defined(IL_NOTIFICATIONS) char *dest; /* host:port destination */ @@ -109,6 +143,7 @@ struct server_msg { int len; int ev_len; struct event_store *es; /* cache for corresponding event store */ + int generation; /* event store genereation */ long receipt_to; /* receiver (long local-logger id - LLLID) of delivery confirmation (for priority messages) */ #if defined(IL_NOTIFICATIONS) char *dest_name; @@ -147,6 +182,9 @@ struct event_queue { }; +/* credential destructor */ +void cred_handle_destroy(void *); + /* server msg methods */ struct server_msg *server_msg_create(il_octet_string_t *, long); struct server_msg *server_msg_copy(struct server_msg *); @@ -166,7 +204,7 @@ int event_queue_remove(struct event_queue *); int event_queue_enqueue(struct event_queue *, char *); /* helper */ int enqueue_msg(struct event_queue *, struct server_msg *); -int event_queue_move_events(struct event_queue *, struct event_queue *, int (*)(struct server_msg *, void *), void *); +int event_queue_move_events(struct event_queue *, struct event_queue *, int (*)(struct server_msg *, void *), void *); /* protocol event queue methods */ int event_queue_connect(struct event_queue *); @@ -189,7 +227,7 @@ int event_queue_cond_unlock(struct event_queue *); /* input queue */ int input_queue_attach(); void input_queue_detach(); -int input_queue_get(il_octet_string_t *, long *, int); +int input_queue_get(il_octet_string_t **, long *, int); /* queue management functions */ int queue_list_init(char *); @@ -209,14 +247,20 @@ int notifid_map_set_expiration(const char *, time_t); int event_store_init(char *); int event_store_cleanup(); int event_store_recover_all(void); -struct event_store *event_store_find(char *); +struct event_store *event_store_find(char *, const char *); int event_store_sync(struct event_store *, long); int event_store_next(struct event_store *, long, int); -int event_store_commit(struct event_store *, int, int); +int event_store_commit(struct event_store *, int, int, int); int event_store_recover(struct event_store *); int event_store_release(struct event_store *); /* int event_store_remove(struct event_store *); */ +#if defined(IL_WS) +/* http functions */ +int parse_header(const char *, il_http_message_t *); +int receive_http(void *, int (*)(void *, char *, const int), il_http_message_t *); +#endif + /* master main loop */ int loop(); diff --git a/org.glite.lb.logger/src/logd.c b/org.glite.lb.logger/src/logd.c index 36489db..9197bad 100644 --- a/org.glite.lb.logger/src/logd.c +++ b/org.glite.lb.logger/src/logd.c @@ -1,22 +1,23 @@ #ident "$Header$" +#include #include #include #include #include #include #include +#include #include #include #include #include #include - -#include +#include #include "glite/lb/context-int.h" +#include "glite/lb/timeouts.h" #include "logd_proto.h" -#include "glite/lb/consumer.h" #include "glite/security/glite_gss.h" #ifdef LB_PERF #include "glite/lb/lb_perftest.h" @@ -146,17 +147,14 @@ void handle_signal(int num) { *---------------------------------------------------------------------- */ static int -doit(int socket, gss_cred_id_t cred_handle, char *file_name_prefix, int noipc, int noparse) +doit(int socket, edg_wll_GssCred cred_handle, char *file_name_prefix, int noipc, int noparse) { char *subject; int ret,fd,count; struct timeval timeout; edg_wll_GssConnection con; edg_wll_GssStatus gss_stat; - gss_buffer_desc gss_token = GSS_C_EMPTY_BUFFER; - gss_name_t client_name = GSS_C_NO_NAME; - OM_uint32 min_stat; - gss_OID name_type = GSS_C_NO_OID; + edg_wll_GssPrincipal client = NULL; fd_set fdset; struct sockaddr_in peer; socklen_t alen = sizeof peer; @@ -179,34 +177,24 @@ doit(int socket, gss_cred_id_t cred_handle, char *file_name_prefix, int noipc, i /* authenticate */ edg_wll_ll_log(LOG_INFO,"Processing authentication:\n"); - gss_stat.major_status = gss_inquire_context(&gss_stat.minor_status, con.context, - &client_name, NULL, NULL, NULL, NULL, - NULL, NULL); - if (GSS_ERROR(gss_stat.major_status)) { - char *gss_err; - edg_wll_gss_get_error(&gss_stat, "Cannot read client identification", &gss_err); - edg_wll_ll_log(LOG_WARNING, "%s: %s\n", inet_ntoa(peer.sin_addr),gss_err); - free(gss_err); - } else { - gss_stat.major_status = gss_display_name(&gss_stat.minor_status, client_name, - &gss_token, &name_type); - if (GSS_ERROR(gss_stat.major_status)) { - char *gss_err; - edg_wll_gss_get_error(&gss_stat, "Cannot process client identification", &gss_err); - edg_wll_ll_log(LOG_WARNING, "%s: %s\n",inet_ntoa(peer.sin_addr),gss_err); - free(gss_err); - } + ret = edg_wll_gss_get_client_conn(&con, &client, &gss_stat); + if (ret) { + char *gss_err; + edg_wll_gss_get_error(&gss_stat, "Cannot read client identification", &gss_err); + edg_wll_ll_log(LOG_WARNING, "%s: %s\n", inet_ntoa(peer.sin_addr),gss_err); + free(gss_err); } - if (GSS_ERROR(gss_stat.major_status) || edg_wll_gss_oid_equal(name_type, GSS_C_NT_ANONYMOUS)) { + if (ret || client->flags & EDG_WLL_GSS_FLAG_ANON) { edg_wll_ll_log(LOG_INFO," User not authenticated, setting as \"%s\". \n",EDG_WLL_LOG_USER_DEFAULT); subject=strdup(EDG_WLL_LOG_USER_DEFAULT); } else { edg_wll_ll_log(LOG_INFO," User successfully authenticated as:\n"); - edg_wll_ll_log(LOG_INFO, " %s\n", (char *)gss_token.value); - subject=gss_token.value; - memset(&gss_token.value, 0, sizeof(gss_token.value)); + edg_wll_ll_log(LOG_INFO, " %s\n", client->name); + subject=strdup(client->name); } + if (client) + edg_wll_gss_free_princ(client); /* get and process the data */ timeout.tv_sec = CONNECTION_TIMEOUT; @@ -266,10 +254,6 @@ doit_end: if (con.sock == -1) edg_wll_ll_log(LOG_DEBUG, "o.k.\n"); if (subject) free(subject); - if (gss_token.length) - gss_release_buffer(&min_stat, &gss_token); - if (client_name != GSS_C_NO_NAME) - gss_release_name(&min_stat, &client_name); return ret; } @@ -291,12 +275,9 @@ int main(int argc, char *argv[]) struct sockaddr_in client_addr; int client_addr_len; - char *my_subject_name = NULL; - time_t cert_mtime = 0, key_mtime = 0; - OM_uint32 min_stat; edg_wll_GssStatus gss_stat; - gss_cred_id_t cred = GSS_C_NO_CREDENTIAL; + edg_wll_GssCred cred = NULL; setlinebuf(stdout); @@ -373,18 +354,6 @@ This is LocalLogger, part of Workload Management System in EU DataGrid & EGEE.\n if (CAcert_dir) setenv("X509_CERT_DIR", CAcert_dir, 1); - /* initialize Globus common module */ -/* XXX: obsolete? - edg_wll_ll_log(LOG_INFO,"Initializing Globus common module..."); - if (globus_module_activate(GLOBUS_COMMON_MODULE) != GLOBUS_SUCCESS) { - edg_wll_ll_log(LOG_NOTICE,"no.\n"); - edg_wll_ll_log(LOG_CRIT, "Failed to initialize Globus common module. Exiting.\n"); - exit(1); - } else { - edg_wll_ll_log(LOG_INFO,"yes.\n"); - } -*/ - /* initialize signal handling */ if (mysignal(SIGUSR1, handle_signal) == SIG_ERR) { perror("signal"); exit(1); } if (mysignal(SIGUSR2, handle_signal) == SIG_ERR) { perror("signal"); exit(1); } @@ -401,25 +370,17 @@ This is LocalLogger, part of Workload Management System in EU DataGrid & EGEE.\n edg_wll_gss_watch_creds(cert_file,&cert_mtime); /* XXX DK: support noAuth */ - ret = edg_wll_gss_acquire_cred_gsi(cert_file, key_file, &cred, &my_subject_name, - &gss_stat); + ret = edg_wll_gss_acquire_cred_gsi(cert_file, key_file, &cred, &gss_stat); if (ret) { /* XXX DK: call edg_wll_gss_get_error() */ edg_wll_ll_log(LOG_CRIT,"Failed to get GSI credentials. Exiting.\n"); exit(1); } - if (my_subject_name!=NULL) { - edg_wll_ll_log(LOG_INFO,"Server running with certificate: %s\n",my_subject_name); - free(my_subject_name); + if (cred->name!=NULL) { + edg_wll_ll_log(LOG_INFO,"Server running with certificate: %s\n",cred->name); } else if (noAuth) { edg_wll_ll_log(LOG_INFO,"Server running without certificate\n"); -#if 0 - /* XXX DK: */ - } else { - edg_wll_ll_log(LOG_CRIT,"No server credential found. Exiting.\n"); - exit(1); -#endif } /* do listen */ @@ -427,7 +388,7 @@ This is LocalLogger, part of Workload Management System in EU DataGrid & EGEE.\n listener_fd = do_listen(port); if (listener_fd == -1) { edg_wll_ll_log(LOG_CRIT,"Failed to listen on port %d\n",port); - gss_release_cred(&min_stat, &cred); + edg_wll_gss_release_cred(&cred, NULL); exit(-1); } else { edg_wll_ll_log(LOG_DEBUG,"Listener's socket descriptor is '%d'\n",listener_fd); @@ -452,6 +413,8 @@ This is LocalLogger, part of Workload Management System in EU DataGrid & EGEE.\n * Main loop */ while (1) { + int opt; + edg_wll_ll_log(LOG_INFO,"Accepting incomming connections...\n"); client_fd = accept(listener_fd, (struct sockaddr *) &client_addr, &client_addr_len); @@ -459,22 +422,31 @@ This is LocalLogger, part of Workload Management System in EU DataGrid & EGEE.\n close(listener_fd); edg_wll_ll_log(LOG_CRIT,"Failed to accept incomming connections\n"); SYSTEM_ERROR("accept"); - gss_release_cred(&min_stat, &cred); + edg_wll_gss_release_cred(&cred, NULL); exit(-1); } else { edg_wll_ll_log(LOG_DEBUG,"Incomming connection on socket '%d'\n",client_fd); } + opt = 0; + if (setsockopt(client_fd,IPPROTO_TCP,TCP_CORK,(const void *) &opt,sizeof opt)) { + edg_wll_ll_log(LOG_WARNING,"Can't reset TCP_CORK\n"); + } + opt = 1; + if (setsockopt(client_fd,IPPROTO_TCP,TCP_NODELAY,(const void *) &opt,sizeof opt)) { + edg_wll_ll_log(LOG_WARNING,"Can't set TCP_NODELAY\n"); + } + switch (edg_wll_gss_watch_creds(cert_file,&cert_mtime)) { - gss_cred_id_t newcred; + edg_wll_GssCred newcred; case 0: break; case 1: - ret = edg_wll_gss_acquire_cred_gsi(cert_file,key_file,&newcred,NULL,&gss_stat); + ret = edg_wll_gss_acquire_cred_gsi(cert_file,key_file,&newcred,&gss_stat); if (ret) { edg_wll_ll_log(LOG_WARNING,"Reloading credentials failed, continue with older\n"); } else { edg_wll_ll_log(LOG_DEBUG,"Reloading credentials succeeded\n"); - gss_release_cred(&min_stat, &cred); + edg_wll_gss_release_cred(&cred, NULL); cred = newcred; } break; @@ -509,6 +481,6 @@ This is LocalLogger, part of Workload Management System in EU DataGrid & EGEE.\n end: if (listener_fd) close(listener_fd); - gss_release_cred(&min_stat, &cred); + edg_wll_gss_release_cred(&cred, NULL); exit(ret); } diff --git a/org.glite.lb.logger/src/logd_proto.c b/org.glite.lb.logger/src/logd_proto.c index 4e1a1e3..6c4015d 100644 --- a/org.glite.lb.logger/src/logd_proto.c +++ b/org.glite.lb.logger/src/logd_proto.c @@ -1,5 +1,6 @@ #ident "$Header$" +#include #include #include #include @@ -9,9 +10,11 @@ #include #include #include +#include +#include +#include "glite/lbu/escape.h" #include "glite/lb/context-int.h" -#include "glite/lb/escape.h" #include "glite/lb/events_parse.h" #include "logd_proto.h" @@ -376,7 +379,7 @@ int edg_wll_log_proto_server(edg_wll_GssConnection *con, struct timeval *timeout dglllid_size = strlen(dglllid); /* format the DG.USER string */ - name_esc = edg_wll_LogEscape(name); + name_esc = glite_lbu_EscapeULM(name); if (asprintf(&dguser,"DG.USER=\"%s\" ",name_esc) == -1) { SYSTEM_ERROR("asprintf"); edg_wll_ll_log(LOG_ERR,"edg_wll_log_proto_server(): nomem for DG.USER\n"); @@ -493,7 +496,7 @@ int edg_wll_log_proto_server(edg_wll_GssConnection *con, struct timeval *timeout #endif /* if not priority send now the answer back to client */ - if (!event->any.priority) { + if (!(event->any.priority & (EDG_WLL_LOGFLAG_SYNC|EDG_WLL_LOGFLAG_SYNC_COMPAT))) { if (!send_answer_back(con,answer,timeout)) { answer_sent = 1; } @@ -501,7 +504,7 @@ int edg_wll_log_proto_server(edg_wll_GssConnection *con, struct timeval *timeout /* send message via IPC (UNIX socket) */ if (!noipc) { - if (event->any.priority) { + if (event->any.priority & (EDG_WLL_LOGFLAG_SYNC|EDG_WLL_LOGFLAG_SYNC_COMPAT)) { edg_wll_ll_log(LOG_DEBUG,"Initializing 2nd UNIX socket (%s) for priority messages confirmation...",confirm_sock_name); if(init_confirmation() < 0) { edg_wll_ll_log(LOG_DEBUG,"error.\n"); @@ -525,7 +528,7 @@ int edg_wll_log_proto_server(edg_wll_GssConnection *con, struct timeval *timeout goto edg_wll_log_proto_server_end_1; } else edg_wll_ll_log(LOG_DEBUG,"o.k.\n"); - if (event->any.priority) { + if (event->any.priority & (EDG_WLL_LOGFLAG_SYNC|EDG_WLL_LOGFLAG_SYNC_COMPAT)) { edg_wll_ll_log(LOG_INFO,"Waiting for confirmation..."); if ((count = wait_for_confirmation(timeout, &answer)) < 0) { edg_wll_ll_log(LOG_INFO,"error.\n"); diff --git a/org.glite.lb.logger/src/perftest_il.sh b/org.glite.lb.logger/src/perftest_il.sh index 6121555..e9d2ee5 100644 --- a/org.glite.lb.logger/src/perftest_il.sh +++ b/org.glite.lb.logger/src/perftest_il.sh @@ -15,7 +15,7 @@ DEBUG=${DEBUG:-0} # CONSUMER_ARGS= # PERFTEST_COMPONENT= # COMPONENT_ARGS= -#LOGJOBS_ARGS="" +LOGJOBS_ARGS="-s /tmp/interlogger.perftest" check_test_files || exit 1 @@ -254,9 +254,9 @@ echo "" fi PERFTEST_CONSUMER=$STAGEDIR/bin/glite-lb-bkserverd -CONSUMER_ARGS="-d --perf-sink=1" +CONSUMER_ARGS="-d --perf-sink=1 -p 10500 -w 10503" PERFTEST_COMPONENT=$STAGEDIR/bin/glite-lb-interlogd-perf -LOGJOBS_ARGS=" $COMM_ARGS" +LOGJOBS_ARGS=" -m localhost:10500 $COMM_ARGS" } group_c_test_a () diff --git a/org.glite.lb.logger/src/queue_mgr.c b/org.glite.lb.logger/src/queue_mgr.c index b76c397..f2c9b3b 100644 --- a/org.glite.lb.logger/src/queue_mgr.c +++ b/org.glite.lb.logger/src/queue_mgr.c @@ -4,7 +4,8 @@ #include #include -#include "glite/lb/consumer.h" +#include "glite/wmsutils/jobid/cjobid.h" +#include "glite/lb/context.h" #include "interlogd.h" @@ -88,7 +89,7 @@ queue_list_add(struct queue_list **ql, const char *dest, struct event_queue *eq) return(-1); } el->queue = eq; - el->next = queues; + el->next = *ql; *ql = el; return 0; } diff --git a/org.glite.lb.logger/src/queue_mgr_http.c b/org.glite.lb.logger/src/queue_mgr_http.c new file mode 100644 index 0000000..777e620 --- /dev/null +++ b/org.glite.lb.logger/src/queue_mgr_http.c @@ -0,0 +1,164 @@ +#ident "$Header$" + +#include +#include +#include + +#include "glite/jobid/cjobid.h" +#include "glite/lb/context.h" + +#include "interlogd.h" + +struct queue_list { + struct event_queue *queue; + char *dest; + struct queue_list *next; + time_t expires; +}; + +static struct event_queue *log_queue; +static struct queue_list *queues; + + +static +int +queue_list_create() +{ + queues = NULL; + + return(0); +} + + +static +int +queue_list_find(struct queue_list *ql, const char *dest, struct queue_list **el, struct queue_list **prev) +{ + struct queue_list *q, *p; + + assert(el != NULL); + + *el = NULL; + if(prev) + *prev = NULL; + + if(ql == NULL) + return(0); + + q = NULL; + p = ql; + + while(p) { + if(strcmp(p->dest, dest) == 0) { + *el = p; + if(prev) + *prev = q; + return(1); + } + + q = p; + p = p->next; + }; + + return(0); +} + + +static +int +queue_list_add(struct queue_list **ql, const char *dest, struct event_queue *eq) +{ + struct queue_list *el; + + assert(dest != NULL); + assert(eq != NULL); + assert(ql != NULL); + + el = malloc(sizeof(*el)); + if(el == NULL) { + set_error(IL_NOMEM, ENOMEM, "queue_list_add: not enough room for new queue"); + return(-1); + } + + el->dest = strdup(dest); + if(el->dest == NULL) { + free(el); + set_error(IL_NOMEM, ENOMEM, "queue_list_add: not enough memory for new queue"); + return(-1); + } + el->queue = eq; + el->next = *ql; + *ql = el; + return 0; +} + + +struct event_queue * +queue_list_get(char *job_id_s) +{ + char *dest; + struct queue_list *q; + struct event_queue *eq; + dest = job_id_s; + + if(dest == NULL) + return(NULL); + + if(queue_list_find(queues, dest, &q, NULL)) { + return(q->queue); + } else { + eq = event_queue_create(dest); + if(eq) + queue_list_add(&queues, dest, eq); + return(eq); + } +} + + +int +queue_list_is_log(struct event_queue *eq) +{ + return(eq == queue_list_get(NULL)); +} + + +int +queue_list_init(char *ls) +{ + return(queue_list_create()); +} + + +static struct queue_list *current; + + +struct event_queue * +queue_list_first() +{ + current = queues; + return(current ? current->queue : NULL); +} + + +struct event_queue * +queue_list_next() +{ + current = current ? current->next : NULL; + return(current ? current->queue : NULL); +} + + +int +queue_list_remove_queue(struct event_queue *eq) +{ + assert(eq != NULL); + + free(eq); + return(1); +} + + + +/* Local Variables: */ +/* c-indentation-style: gnu */ +/* End: */ diff --git a/org.glite.lb.logger/src/recover.c b/org.glite.lb.logger/src/recover.c index 981b2b6..e69398d 100644 --- a/org.glite.lb.logger/src/recover.c +++ b/org.glite.lb.logger/src/recover.c @@ -33,23 +33,34 @@ recover_thread(void *q) exit(1); } il_log(LOG_INFO, "Reloading certificate...\n"); - if(pthread_mutex_lock(&cred_handle_lock) < 0) - abort(); - { - gss_cred_id_t new_cred_handle = GSS_C_NO_CREDENTIAL; - OM_uint32 min_stat; + if (edg_wll_gss_watch_creds(cert_file, &cert_mtime) > 0) { + gss_cred_id_t new_creds = GSS_C_NO_CREDENTIAL; int ret; ret = edg_wll_gss_acquire_cred_gsi(cert_file,key_file, - &new_cred_handle, NULL, NULL); - if (new_cred_handle != GSS_C_NO_CREDENTIAL) { - gss_release_cred(&min_stat, &cred_handle); - cred_handle = new_cred_handle; + &new_creds, NULL, NULL); + if (new_creds != GSS_C_NO_CREDENTIAL) { + if(pthread_mutex_lock(&cred_handle_lock) < 0) + abort(); + /* if no one is using the old credentials, release them */ + if(cred_handle && cred_handle->counter == 0) { + OM_uint32 min_stat; + gss_release_cred(&min_stat,&cred_handle->creds); + free(cred_handle); + il_log(LOG_DEBUG, " freed old credentials\n"); + } + cred_handle = malloc(sizeof(*cred_handle)); + if(cred_handle == NULL) { + il_log(LOG_CRIT, "Failed to allocate structure for credentials.\n"); + exit(EXIT_FAILURE); + } + cred_handle->creds = new_creds; + cred_handle->counter = 0; + if(pthread_mutex_unlock(&cred_handle_lock) < 0) + abort(); il_log(LOG_INFO, "New certificate found and deployed.\n"); } } - if(pthread_mutex_unlock(&cred_handle_lock) < 0) - abort(); sleep(INPUT_TIMEOUT); } } diff --git a/org.glite.lb.logger/src/send_event.c b/org.glite.lb.logger/src/send_event.c index 761b300..0afd62c 100644 --- a/org.glite.lb.logger/src/send_event.c +++ b/org.glite.lb.logger/src/send_event.c @@ -2,6 +2,7 @@ #include #include +#include #ifdef HAVE_UNISTD_H #include #endif @@ -102,7 +103,8 @@ static int gss_reader(void *user_data, char *buffer, int max_len) { - int ret, len; + int ret; + size_t len; struct reader_data *data = (struct reader_data *)user_data; edg_wll_GssStatus gss_stat; @@ -165,6 +167,7 @@ event_queue_connect(struct event_queue *eq) int ret; struct timeval tv; edg_wll_GssStatus gss_stat; + cred_handle_t *local_cred_handle; assert(eq != NULL); @@ -172,16 +175,34 @@ event_queue_connect(struct event_queue *eq) if(!nosend) { #endif - if(eq->gss.context == GSS_C_NO_CONTEXT) { + if(eq->gss.context == NULL) { tv.tv_sec = TIMEOUT; tv.tv_usec = 0; + + /* get pointer to the credentials */ if(pthread_mutex_lock(&cred_handle_lock) < 0) abort(); - il_log(LOG_DEBUG, " trying to connect to %s:%d\n", eq->dest_name, eq->dest_port); - ret = edg_wll_gss_connect(cred_handle, eq->dest_name, eq->dest_port, &tv, &eq->gss, &gss_stat); + local_cred_handle = cred_handle; + local_cred_handle->counter++; if(pthread_mutex_unlock(&cred_handle_lock) < 0) abort(); + + il_log(LOG_DEBUG, " trying to connect to %s:%d\n", eq->dest_name, eq->dest_port); + ret = edg_wll_gss_connect(local_cred_handle->creds, eq->dest_name, eq->dest_port, &tv, &eq->gss, &gss_stat); + if(pthread_mutex_lock(&cred_handle_lock) < 0) + abort(); + /* check if we need to release the credentials */ + --local_cred_handle->counter; + if(local_cred_handle != cred_handle && local_cred_handle->counter == 0) { + OM_uint32 min_stat; + gss_release_cred(&min_stat,&local_cred_handle->creds); + free(local_cred_handle); + il_log(LOG_DEBUG, " freed credentials, not used anymore\n"); + } + if(pthread_mutex_unlock(&cred_handle_lock) < 0) + abort(); + if(ret < 0) { char *gss_err = NULL; @@ -190,7 +211,7 @@ event_queue_connect(struct event_queue *eq) set_error(IL_DGGSS, ret, (ret == EDG_WLL_GSS_ERROR_GSS) ? gss_err : "event_queue_connect: edg_wll_gss_connect"); if (gss_err) free(gss_err); - eq->gss.context = GSS_C_NO_CONTEXT; + eq->gss.context = NULL; eq->timeout = TIMEOUT; return(0); } @@ -213,9 +234,9 @@ event_queue_close(struct event_queue *eq) if(!nosend) { #endif - if(eq->gss.context != GSS_C_NO_CONTEXT) { + if(eq->gss.context != NULL) { edg_wll_gss_close(&eq->gss, NULL); - eq->gss.context = GSS_C_NO_CONTEXT; + eq->gss.context = NULL; } #ifdef LB_PERF } @@ -237,7 +258,7 @@ event_queue_send(struct event_queue *eq) #ifdef LB_PERF if(!nosend) { #endif - if(eq->gss.context == GSS_C_NO_CONTEXT) + if(eq->gss.context == NULL) return(0); #ifdef LB_PERF } @@ -262,16 +283,10 @@ event_queue_send(struct event_queue *eq) #ifdef LB_PERF if(!nosend) { #endif - if(msg->len) { + if (msg->len) { tv.tv_sec = TIMEOUT; tv.tv_usec = 0; ret = edg_wll_gss_write_full(&eq->gss, msg->msg, msg->len, &tv, &bytes_sent, &gss_stat); - /* commented out due to the conflict with following ljocha's code - if(ret < 0) { - eq->timeout = TIMEOUT; - return(0); - } - */ if(ret < 0) { if (ret == EDG_WLL_GSS_ERROR_ERRNO && errno == EPIPE && events_sent > 0) eq->timeout = 0; @@ -292,8 +307,8 @@ event_queue_send(struct event_queue *eq) } return(0); } - } - else { code = LB_OK; code_min = 0; rep = strdup("not sending empty message"); } + } + else { code = LB_OK; code_min = 0; rep = strdup("not sending empty message"); } #ifdef LB_PERF } else { glite_wll_perftest_consumeEventIlMsg(msg->msg+17); @@ -322,7 +337,7 @@ event_queue_send(struct event_queue *eq) default: /* LB_DBERR, LB_PROTO */ /* the event was not accepted by the server */ /* update the event pointer */ - if(event_store_commit(msg->es, msg->ev_len, queue_list_is_log(eq)) < 0) + if(event_store_commit(msg->es, msg->ev_len, queue_list_is_log(eq), msg->generation) < 0) /* failure committing message, this is bad */ return(-1); /* if we have just delivered priority message from the queue, send confirmation */ diff --git a/org.glite.lb.logger/src/send_event_http.c b/org.glite.lb.logger/src/send_event_http.c new file mode 100644 index 0000000..3c90562 --- /dev/null +++ b/org.glite.lb.logger/src/send_event_http.c @@ -0,0 +1,282 @@ +#ident "$Header$" + +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#include +#include + + +/* + * - L/B server protocol handling routines + */ + +#include "glite/jobid/cjobid.h" +#include "glite/lb/il_string.h" +#include "glite/lb/context.h" + +#include "interlogd.h" + +struct reader_data { + edg_wll_GssConnection *gss; + struct timeval *timeout; +}; + + +static +int +gss_reader(void *user_data, char *buffer, int max_len) +{ + int ret; + struct reader_data *data = (struct reader_data *)user_data; + edg_wll_GssStatus gss_stat; + + ret = edg_wll_gss_read(data->gss, buffer, max_len, data->timeout, &gss_stat); + if(ret < 0) { + char *gss_err = NULL; + + if(ret == EDG_WLL_GSS_ERROR_GSS) { + edg_wll_gss_get_error(&gss_stat, "get_reply", &gss_err); + set_error(IL_DGGSS, ret, gss_err); + free(gss_err); + } else + set_error(IL_DGGSS, ret, "get_reply"); + } + return(ret); +} + + +/* + * Read reply from server. + * Returns: -1 - error reading message, + * code > 0 - http status code from server + */ +static +int +get_reply(struct event_queue *eq, char **buf, int *code_min) +{ + int ret, code; + int len; + struct timeval tv; + struct reader_data data; + il_http_message_t msg; + + tv.tv_sec = TIMEOUT; + tv.tv_usec = 0; + data.gss = &eq->gss; + data.timeout = &tv; + len = receive_http(&data, gss_reader, &msg); + if(len < 0) { + set_error(IL_PROTO, LB_PROTO, "get_reply: error reading server reply"); + return(-1); + } + if(msg.data) free(msg.data); + if(msg.reply_string) *buf = msg.reply_string; + *code_min = 0; /* XXX fill in flag for fault */ + return(msg.reply_code); +} + + + +/* + * Returns: 0 - not connected, timeout set, 1 - OK + */ +int +event_queue_connect(struct event_queue *eq) +{ + int ret; + struct timeval tv; + edg_wll_GssStatus gss_stat; + cred_handle_t *local_cred_handle; + + assert(eq != NULL); + +#ifdef LB_PERF + if(!nosend) { +#endif + + if(eq->gss.context == NULL) { + + tv.tv_sec = TIMEOUT; + tv.tv_usec = 0; + + /* get pointer to the credentials */ + if(pthread_mutex_lock(&cred_handle_lock) < 0) + abort(); + local_cred_handle = cred_handle; + local_cred_handle->counter++; + if(pthread_mutex_unlock(&cred_handle_lock) < 0) + abort(); + + il_log(LOG_DEBUG, " trying to connect to %s:%d\n", eq->dest_name, eq->dest_port); + ret = edg_wll_gss_connect(local_cred_handle->creds, eq->dest_name, eq->dest_port, &tv, &eq->gss, &gss_stat); + if(pthread_mutex_lock(&cred_handle_lock) < 0) + abort(); + /* check if we need to release the credentials */ + --local_cred_handle->counter; + if(local_cred_handle != cred_handle && local_cred_handle->counter == 0) { + edg_wll_gss_release_cred(&local_cred_handle->creds, NULL); + free(local_cred_handle); + il_log(LOG_DEBUG, " freed credentials, not used anymore\n"); + } + if(pthread_mutex_unlock(&cred_handle_lock) < 0) + abort(); + + if(ret < 0) { + char *gss_err = NULL; + + if (ret == EDG_WLL_GSS_ERROR_GSS) + edg_wll_gss_get_error(&gss_stat, "event_queue_connect: edg_wll_gss_connect", &gss_err); + set_error(IL_DGGSS, ret, + (ret == EDG_WLL_GSS_ERROR_GSS) ? gss_err : "event_queue_connect: edg_wll_gss_connect"); + if (gss_err) free(gss_err); + eq->gss.context = NULL; + eq->timeout = TIMEOUT; + return(0); + } + } + +#ifdef LB_PERF + } +#endif + + return(1); +} + + +int +event_queue_close(struct event_queue *eq) +{ + assert(eq != NULL); + +#ifdef LB_PERF + if(!nosend) { +#endif + + if(eq->gss.context != NULL) { + edg_wll_gss_close(&eq->gss, NULL); + eq->gss.context = NULL; + } +#ifdef LB_PERF + } +#endif + return(0); +} + + +/* + * Send all events from the queue. + * Returns: -1 - system error, 0 - not sent, 1 - queue empty + */ +int +event_queue_send(struct event_queue *eq) +{ + int events_sent = 0; + assert(eq != NULL); + +#ifdef LB_PERF + if(!nosend) { +#endif + if(eq->gss.context == NULL) + return(0); +#ifdef LB_PERF + } +#endif + + /* feed the server with events */ + while (!event_queue_empty(eq)) { + struct server_msg *msg; + char *rep; + int ret, code, code_min; + size_t bytes_sent; + struct timeval tv; + edg_wll_GssStatus gss_stat; + + clear_error(); + + if(event_queue_get(eq, &msg) < 0) + return(-1); + + il_log(LOG_DEBUG, " trying to deliver event at offset %d for job %s\n", msg->offset, msg->job_id_s); + +#ifdef LB_PERF + if(!nosend) { +#endif + /* XXX: ljocha -- does it make sense to send empty messages ? */ + if (msg->len) { + tv.tv_sec = TIMEOUT; + tv.tv_usec = 0; + ret = edg_wll_gss_write_full(&eq->gss, msg->msg, msg->len, &tv, &bytes_sent, &gss_stat); + if(ret < 0) { + if (ret == EDG_WLL_GSS_ERROR_ERRNO && errno == EPIPE && events_sent > 0) { + eq->timeout = 0; + } else { + il_log(LOG_ERR, "send_event: %s\n", error_get_msg()); + eq->timeout = TIMEOUT; + } + return(0); + } + if((code = get_reply(eq, &rep, &code_min)) < 0) { + /* could not get the reply properly, so try again later */ + if (events_sent>0) + eq->timeout = 1; + else { + eq->timeout = TIMEOUT; + il_log(LOG_ERR, " error reading server %s reply:\n %s\n", eq->dest_name, error_get_msg()); + } + return(0); + } + } + else { code = 200; code_min = 0; rep = strdup("not sending empty message"); } +#ifdef LB_PERF + } else { + glite_wll_perftest_consumeEventIlMsg(msg->msg+17); + code = 200; + rep = strdup("OK"); + } +#endif + + il_log(LOG_DEBUG, " event sent, server %s replied with %d, %s\n", eq->dest_name, code, rep); + free(rep); + + /* the reply is back here, decide what to do with message */ + /* HTTP error codes: + 1xx - informational (eg. 100 Continue) + 2xx - successful (eg. 200 OK) + 3xx - redirection (eg. 301 Moved Permanently) + 4xx - client error (eq. 400 Bad Request) + 5xx - server error (eq. 500 Internal Server Error) + */ + if(code >= 100 && code < 200) { + + /* non fatal errors (for us), try to deliver later */ + eq->timeout = TIMEOUT; + return(0); + } + + /* the message was consumed (successfully or not) */ + /* update the event pointer */ + if(event_store_commit(msg->es, msg->ev_len, queue_list_is_log(eq)) < 0) + /* failure committing message, this is bad */ + return(-1); + + event_queue_remove(eq); + events_sent++; + } /* while */ + + return(1); + +} /* send_events */ + + +/* this is just not used */ +int +send_confirmation(long lllid, int code) +{ + return 0; +} diff --git a/org.glite.lb.logger/src/server_msg.c b/org.glite.lb.logger/src/server_msg.c index e9578a1..84ae833 100644 --- a/org.glite.lb.logger/src/server_msg.c +++ b/org.glite.lb.logger/src/server_msg.c @@ -7,7 +7,6 @@ #include "interlogd.h" #include "glite/lb/il_msg.h" #include "glite/lb/events_parse.h" -#include "glite/lb/consumer.h" #include "glite/lb/context.h" static @@ -17,7 +16,7 @@ create_msg(il_octet_string_t *ev, char **buffer, long *receipt, time_t *expires) char *p; int len; char *event = ev->data; - *receipt = 0; + *receipt = 0L; #if defined(INTERLOGD_EMS) /* find DG.LLLID */ @@ -42,18 +41,19 @@ create_msg(il_octet_string_t *ev, char **buffer, long *receipt, time_t *expires) int n; p += 12; /* skip the key and = */ - if((n = atoi(p)) == 0) { + n = atoi(p); + if((n & (EDG_WLL_LOGFLAG_SYNC|EDG_WLL_LOGFLAG_SYNC_COMPAT)) == 0) { /* normal asynchronous message */ - *receipt = 0; + *receipt = 0L; } } else { /* could not find priority key */ - *receipt = 0; + *receipt = 0L; } } else { /* could not find local logger PID, confirmation can not be sent */ - *receipt = 0; + *receipt = 0L; } #endif @@ -119,11 +119,12 @@ server_msg_copy(struct server_msg *src) msg->receipt_to = src->receipt_to; msg->offset = src->offset; #if defined(IL_NOTIFICATIONS) - msg->dest_name = strdup(src->dest_name); + msg->dest_name = src->dest_name ? strdup(src->dest_name) : NULL; msg->dest_port = src->dest_port; - msg->dest = strdup(src->dest); + msg->dest = src->dest ? strdup(src->dest) : NULL; #endif msg->expires = src->expires; + msg->generation = src->generation; return(msg); } @@ -144,13 +145,16 @@ server_msg_init(struct server_msg *msg, il_octet_string_t *event) #if defined(IL_NOTIFICATIONS) - edg_wll_InitContext(&context); /* parse the notification event */ - if((ret=edg_wll_ParseNotifEvent(context, event->data, ¬if_event))) { + edg_wll_InitContext(&context); + ret=edg_wll_ParseNotifEvent(context, event->data, ¬if_event); + edg_wll_FreeContext(context); + if(ret) { set_error(IL_LBAPI, ret, "server_msg_init: error parsing notification event"); return(-1); } + /* FIXME: check for allocation error */ if(notif_event->notification.dest_host && (strlen(notif_event->notification.dest_host) > 0)) { diff --git a/org.glite.lb.logger/src/server_msg_http.c b/org.glite.lb.logger/src/server_msg_http.c new file mode 100644 index 0000000..8bd3623 --- /dev/null +++ b/org.glite.lb.logger/src/server_msg_http.c @@ -0,0 +1,128 @@ +#ident "$Header$" + +#include +#include +#include + +#include "interlogd.h" +#include "glite/lb/il_msg.h" +#include "glite/lb/events_parse.h" +#include "glite/lb/context.h" + +static +int +create_msg(il_http_message_t *ev, char **buffer, long *receipt, time_t *expires) +{ + char *event = ev->data; + + *receipt = 0; + *expires = 0; + + *buffer = ev->data; + return ev->len;; +} + + +struct server_msg * +server_msg_create(il_octet_string_t *event, long offset) +{ + struct server_msg *msg; + + msg = malloc(sizeof(*msg)); + if(msg == NULL) { + set_error(IL_NOMEM, ENOMEM, "server_msg_create: out of memory allocating message"); + return(NULL); + } + + if(server_msg_init(msg, event) < 0) { + server_msg_free(msg); + return(NULL); + } + msg->offset = offset; + + return(msg); +} + + +struct server_msg * +server_msg_copy(struct server_msg *src) +{ + struct server_msg *msg; + + msg = malloc(sizeof(*msg)); + if(msg == NULL) { + set_error(IL_NOMEM, ENOMEM, "server_msg_copy: out of memory allocating message"); + return(NULL); + } + + msg->msg = malloc(src->len); + if(msg->msg == NULL) { + set_error(IL_NOMEM, ENOMEM, "server_msg_copy: out of memory allocating server message"); + server_msg_free(msg); + return(NULL); + } + msg->len = src->len; + memcpy(msg->msg, src->msg, src->len); + + msg->job_id_s = strdup(src->job_id_s); + msg->ev_len = src->ev_len; + msg->es = src->es; + msg->receipt_to = src->receipt_to; + msg->offset = src->offset; +#if defined(IL_NOTIFICATIONS) + msg->dest_name = strdup(src->dest_name); + msg->dest_port = src->dest_port; + msg->dest = strdup(src->dest); +#endif + msg->expires = src->expires; + return(msg); +} + + +int +server_msg_init(struct server_msg *msg, il_octet_string_t *event) +{ + il_http_message_t *hmsg = (il_http_message_t *)event; + + assert(msg != NULL); + assert(event != NULL); + + memset(msg, 0, sizeof(*msg)); + + + msg->job_id_s = hmsg->host; + if(msg->job_id_s == NULL) { + set_error(IL_LBAPI, EDG_WLL_ERROR_PARSE_BROKEN_ULM, "server_msg_init: error getting id"); + return -1; + } + msg->len = create_msg(hmsg, &msg->msg, &msg->receipt_to, &msg->expires); + if(msg->len < 0) + return -1; + /* set this to indicate new data owner */ + hmsg->data = NULL; + hmsg->host = NULL; + msg->ev_len = hmsg->len + 1; /* must add separator size too */ + return 0; + +} + + +int +server_msg_is_priority(struct server_msg *msg) +{ + assert(msg != NULL); + + return(msg->receipt_to != 0); +} + + +int +server_msg_free(struct server_msg *msg) +{ + assert(msg != NULL); + + if(msg->msg) free(msg->msg); + if(msg->job_id_s) free(msg->job_id_s); + free(msg); + return 0; +} diff --git a/org.glite.lb/project/version.properties b/org.glite.lb/project/version.properties index 7cf8901..3649e48 100644 --- a/org.glite.lb/project/version.properties +++ b/org.glite.lb/project/version.properties @@ -1,2 +1,2 @@ -module.version=1.9.2 -module.age=2 +module.version=1.9.3 +module.age=1