From: Miloš Mulač Date: Thu, 13 Mar 2008 08:13:41 +0000 (+0000) Subject: merge of transactions (merge_transactions_src -> merge_transactions_dst) X-Git-Tag: merge_transactions_after~1 X-Git-Url: http://scientific.zcu.cz/git/?a=commitdiff_plain;h=1a7d22e407ebf627bf23a89c898ab2724968b5bc;p=jra1mw.git Merge ... transactions (merge_transactions_src -> merge_transactions_dst) --- diff --git a/org.glite.lb.client/examples/stresslog.c b/org.glite.lb.client/examples/stresslog.c index e6d75f2..9dc276c 100644 --- a/org.glite.lb.client/examples/stresslog.c +++ b/org.glite.lb.client/examples/stresslog.c @@ -16,20 +16,22 @@ extern int opterr,optind; extern int edg_wll_DoLogEvent(edg_wll_Context context, edg_wll_LogLine logline); extern int edg_wll_DoLogEventProxy(edg_wll_Context context, edg_wll_LogLine logline); +extern int edg_wll_DoLogEventDirect(edg_wll_Context context, edg_wll_LogLine logline); static const char *me; static void usage() { - fprintf(stderr,"usage: %s -m bkserver [-x] [-N numjobs] [-n subjobs (each)] -f file_name \n", me); + fprintf(stderr,"usage: %s -m bkserver [-x|-y] [-N numjobs] [-n subjobs (each)] -f file_name \n", me); + fprintf(stderr," - event file not containing DG.JOBID and without -x or -z not containing DG.USER\n"); } int main(int argc, char *argv[]) { char *job = NULL,*server = NULL,*seq = NULL,*filename = NULL; char buf[MAXMSGSIZE]; - int lbproxy = 0, num_subjobs = 0; - int done = 0, njobs = 1,i; + int lbproxy = 0, num_subjobs = 0, lbdirect = 0; + int done = 0, njobs = 1,i,j; edg_wll_Context ctx; edg_wlc_JobId jobid,*subjobs; FILE *f; @@ -41,9 +43,10 @@ int main(int argc, char *argv[]) me = strdup(argv[0]); do { - switch (getopt(argc,argv,"m:xN:n:f:")) { + switch (getopt(argc,argv,"m:xyN:n:f:")) { case 'm': server = strdup(optarg); break; case 'x': lbproxy = 1; break; + case 'y': lbdirect = 1; break; case 'N': njobs = atoi(optarg); break; case 'n': num_subjobs = atoi(optarg); break; case 'f': filename = (char *) strdup(optarg); break; @@ -51,6 +54,12 @@ int main(int argc, char *argv[]) case -1: done = 1; break; } } while (!done); + + if (lbproxy && lbdirect) { + fprintf(stderr,"%s: only one of -x or -y options may be specified \n",me); + usage(); + exit(1); + } if (!server) { fprintf(stderr,"%s: -m required\n",me); @@ -58,7 +67,7 @@ int main(int argc, char *argv[]) exit(1); } - if ((njobs <= 0) || (num_subjobs)) { + if ((njobs <= 0) || (num_subjobs <= 0)) { fprintf(stderr,"%s: wrong number of jobs\n",me); usage(); exit(1); @@ -76,12 +85,13 @@ int main(int argc, char *argv[]) } /* MAIN LOOP */ -for (i = 1; i #include #include +#include #include "glite/lb/context-int.h" #include "glite/lb/lb_perftest.h" #include "glite/lb/log_proto.h" diff --git a/org.glite.lb.client/src/prod_proto.c b/org.glite.lb.client/src/prod_proto.c index 00d102a..1a2a3f8 100644 --- a/org.glite.lb.client/src/prod_proto.c +++ b/org.glite.lb.client/src/prod_proto.c @@ -646,7 +646,6 @@ int edg_wll_log_proxy_read(edg_wll_Context ctx, edg_wll_PlainConnection *conn) int edg_wll_log_direct_connect(edg_wll_Context ctx, edg_wll_GssConnection *conn) { int ret,answer; - char *my_subject_name = NULL; edg_wll_GssStatus gss_stat; edg_wll_GssCred cred = NULL; char *host; @@ -673,14 +672,13 @@ int edg_wll_log_direct_connect(edg_wll_Context ctx, edg_wll_GssConnection *conn) answer = edg_wll_SetErrorGss(ctx, "edg_wll_gss_acquire_cred_gsi(): failed to load GSI credentials", &gss_stat); goto edg_wll_log_direct_connect_end; } - my_subject_name = cred->name; #ifdef EDG_WLL_LOG_STUB - if (my_subject_name) { + if (cred && cred->name) { /* TODO: merge - shouldn't be probably ctx->p_user_lbproxy but some new parameter, eg. ctx->p_user related to the change in producer.c */ - edg_wll_SetParamString(ctx, EDG_WLL_PARAM_LBPROXY_USER, my_subject_name); - fprintf(stderr,"edg_wll_log_direct_connect: using certificate: %s\n",my_subject_name); + edg_wll_SetParamString(ctx, EDG_WLL_PARAM_LBPROXY_USER, cred->name); + fprintf(stderr,"edg_wll_log_direct_connect: using certificate: %s\n", cred->name); } else { fprintf(stderr,"edg_wll_log_direct_connect: going on anonymously\n"); } diff --git a/org.glite.lb.client/src/producer.c b/org.glite.lb.client/src/producer.c index 497b5d7..6c1be90 100644 --- a/org.glite.lb.client/src/producer.c +++ b/org.glite.lb.client/src/producer.c @@ -642,8 +642,13 @@ int edg_wll_SetLoggingJob( ctx->p_proxy_filename ? ctx->p_proxy_filename : ctx->p_key_filename, &cred, &gss_stat); /* give up if unable to acquire prescribed credentials */ - if (err && ctx->p_proxy_filename) { + if (err) { edg_wll_SetErrorGss(ctx, "failed to load GSI credentials", &gss_stat); + + // XXX: stop here - further changes need to be done in + // edg_wll_gss_connect() to support annonymous connetion + return edg_wll_Error(ctx,NULL,NULL); + edg_wll_SetParamString(ctx, EDG_WLL_PARAM_LBPROXY_USER, EDG_WLL_LOG_USER_DEFAULT); } else { edg_wll_SetParamString(ctx, EDG_WLL_PARAM_LBPROXY_USER, cred->name); @@ -694,8 +699,13 @@ int edg_wll_SetLoggingJobProxy( ctx->p_proxy_filename ? ctx->p_proxy_filename : ctx->p_key_filename, &cred, &gss_stat); /* give up if unable to acquire prescribed credentials */ - if (err && ctx->p_proxy_filename) { + if (err) { edg_wll_SetErrorGss(ctx, "failed to load GSI credentials", &gss_stat); + + // XXX: stop here - further changes need to be done in + // edg_wll_gss_connect() to support annonymous connetion + return edg_wll_SetError(ctx, ENOENT, "No credentials found."); + edg_wll_SetParamString(ctx, EDG_WLL_PARAM_LBPROXY_USER, EDG_WLL_LOG_USER_DEFAULT); } else { edg_wll_SetParamString(ctx, EDG_WLL_PARAM_LBPROXY_USER, cred->name); diff --git a/org.glite.lb.common/interface/context.h b/org.glite.lb.common/interface/context.h index ad366de..02fe955 100644 --- a/org.glite.lb.common/interface/context.h +++ b/org.glite.lb.common/interface/context.h @@ -193,6 +193,7 @@ typedef enum _edg_wll_ErrorCode { EDG_WLL_IL_SYS, /**< Interlogger internal error. */ EDG_WLL_IL_EVENTS_WAITING, /**< Interlogger still has events pending delivery. */ EDG_WLL_ERROR_COMPARE_EVENTS, /**< Two compared events differ. */ + EDG_WLL_ERROR_DB_TRANS_DEADLOCK, /**< Deadlock detected during DB operation. */ } edg_wll_ErrorCode; /** diff --git a/org.glite.lb.common/src/context.c b/org.glite.lb.common/src/context.c index 535c743..3d946f4 100644 --- a/org.glite.lb.common/src/context.c +++ b/org.glite.lb.common/src/context.c @@ -171,6 +171,7 @@ static const char* const errTexts[] = { "Interlogger internal error", "Interlogger has events pending", "Compared events differ", + "DB deadlock detected", }; const char *edg_wll_GetErrorText(int code) { diff --git a/org.glite.lb.server/Makefile b/org.glite.lb.server/Makefile index b79ea9e..10d48c5 100644 --- a/org.glite.lb.server/Makefile +++ b/org.glite.lb.server/Makefile @@ -38,13 +38,6 @@ else STATIC_LIB_BK:=libglite_lb_bkserver.a endif -# Use embrionic DAG registration implicitely -LB_DAG_FLAGS:=-DLB_DAG_EMBRIONIC - -ifdef LB_BUF - LB_BUF_FLAGS:=-DLB_BUF -endif - ifeq ($(GLITE_LB_SERVER_WITH_WS),yes) WS_CFLAGS=-DGLITE_LB_SERVER_WITH_WS NSMAP=LoggingAndBookkeeping.nsmap @@ -83,8 +76,7 @@ CFLAGS:= \ -I${voms_prefix}/include \ ${COVERAGE_FLAGS} \ $(GRIDSITE_CFLAGS) \ - -D_GNU_SOURCE ${LB_STANDALONE_FLAGS} ${LB_PERF_FLAGS} ${LB_DAG_FLAGS} \ - ${LB_BUF_FLAGS} \ + -D_GNU_SOURCE ${LB_STANDALONE_FLAGS} ${LB_PERF_FLAGS} \ -I${globus_prefix}/include/${nothrflavour} #gridsite needs this ifdef LB_PROF @@ -107,6 +99,10 @@ ifdef LBS_DB_PROFILE CFLAGS:=${CFLAGS} -DLBS_DB_PROFILE endif +ifdef LB_EVENTS_BLOB + CFLAGS:=${CFLAGS} -DLB_EVENTS_BLOB +endif + TEST_LIBS:=-L${cppunit_prefix}/lib -lcppunit TEST_INC:=-I${cppunit_prefix}/include diff --git a/org.glite.lb.server/config/glite-lb-dbsetup.sql b/org.glite.lb.server/config/glite-lb-dbsetup.sql index c9e0c80..8eee601 100644 --- a/org.glite.lb.server/config/glite-lb-dbsetup.sql +++ b/org.glite.lb.server/config/glite-lb-dbsetup.sql @@ -5,22 +5,15 @@ create table jobs ( aclid char(32) binary null, proxy bool not null, server bool not null, - + grey bool not null, + zombie bool not null, + nevents int not null, primary key (jobid), unique (dg_jobid), index (userid) ) engine=innodb; -create table grey_jobs ( - jobid char(32) binary not null, - dg_jobid varchar(255) binary not null, - time_stamp datetime not null, - - primary key (jobid), - unique (dg_jobid) -) engine=innodb; - create table users ( userid char(32) binary not null, cert_subj varchar(255) binary not null, @@ -41,7 +34,8 @@ create table events ( level int null, arrived datetime not null, - + ulm mediumblob not null, -- testing (1) + seqcode varchar(255) binary not null, primary key (jobid,event), index (time_stamp), @@ -49,6 +43,16 @@ create table events ( index (arrived) ) engine=innodb; +-- testing (2) +create table events_flesh ( + jobid char(32) binary not null, + event int not null, + ulm mediumblob binary not null, + + primary key (jobid,event) +) engine=innodb; + +-- for compatibility create table short_fields ( jobid char(32) binary not null, event int not null, @@ -58,6 +62,7 @@ create table short_fields ( primary key (jobid,event,name) ) engine=innodb; +-- for compatibility create table long_fields ( jobid char(32) binary not null, event int not null, @@ -114,7 +119,7 @@ create table notif_registrations ( `STD_owner` varchar(200) null, `STD_network_server` varchar(200) null, - `JDL_VirtualOrganization` varchar(200) null, + `JDL_VirtualOrganisation` varchar(200) null, primary key (notifid), index (`STD_owner`), diff --git a/org.glite.lb.server/interface/store.h b/org.glite.lb.server/interface/store.h index bb0bb38..03a7b49 100644 --- a/org.glite.lb.server/interface/store.h +++ b/org.glite.lb.server/interface/store.h @@ -15,6 +15,7 @@ extern "C" { int edg_wll_StoreEvent( edg_wll_Context, /* INOUT */ edg_wll_Event *, /* IN */ + const char *, /* IN */ int * ); @@ -40,13 +41,14 @@ edg_wll_ErrorCode edg_wll_StepIntStateEmbriotic( edg_wll_Event *e /* IN */ ); -int db_store(edg_wll_Context,char *, char *); +int db_store(edg_wll_Context, char *); int db_parent_store(edg_wll_Context, edg_wll_Event *, intJobStat *); int handle_request(edg_wll_Context,char *); int create_reply(const edg_wll_Context,char **); -int trans_db_store(edg_wll_Context,char *,edg_wll_Event *,intJobStat *); -int is_job_local(edg_wll_Context, edg_wlc_JobId jobId); -int store_job_server_proxy(edg_wll_Context ctx, edg_wll_Event *event); +int is_job_local(edg_wll_Context, glite_jobid_const_t jobId); +int store_job_server_proxy(edg_wll_Context ctx, edg_wll_Event *event, int *register_to_JP); +int register_subjobs_embryonic(edg_wll_Context,const edg_wll_RegJobEvent *); + int edg_wll_delete_event(edg_wll_Context,const char *, int); diff --git a/org.glite.lb.server/src/bkindex.c b/org.glite.lb.server/src/bkindex.c index fa087f9..bf1b621 100644 --- a/org.glite.lb.server/src/bkindex.c +++ b/org.glite.lb.server/src/bkindex.c @@ -16,6 +16,7 @@ #include "jobstat.h" #include "db_supp.h" #include "openserver.h" +#include "db_calls.h" #ifdef LB_PERF #include "glite/lb/lb_perftest.h" @@ -101,6 +102,12 @@ int main(int argc,char **argv) edg_wll_SetError(ctx, EDG_WLL_ERROR_DB_CALL, "index capability not available"); do_exit(ctx, EX_SOFTWARE); } + if (!(ctx->dbcaps & GLITE_LBU_DB_CAP_TRANSACTIONS)) { + edg_wll_SetError(ctx, EDG_WLL_ERROR_DB_CALL, "transactions capability not available"); + do_exit(ctx, EX_SOFTWARE); + } + glite_lbu_DBSetCaps(ctx->dbctx, ctx->dbcaps); + if (edg_wll_QueryJobIndices(ctx,&old_indices,&index_names)) do_exit(ctx,EX_SOFTWARE); if (dump) { @@ -361,38 +368,52 @@ static edg_wll_ErrorCode edg_wll_RefreshIColumns(edg_wll_Context ctx, void *job_ return edg_wll_Error(ctx, NULL, NULL); } while ((ret=edg_wll_FetchRow(ctx,sh,sizeof(res)/sizeof(res[0]),NULL,res)) >0) { - if (strcmp(res[3], INTSTAT_VERSION)) { - stat = NULL; - if (!edg_wlc_JobIdParse(res[4], &jobid)) { - if ((stat = malloc(sizeof(intJobStat))) != NULL) { - if (edg_wll_intJobStatus(ctx, jobid, 0, stat, 1)) { - free(stat); - stat = NULL; + do { + if (edg_wll_Transaction(ctx)) goto rollback; + if (edg_wll_LockJobRowInShareMode(ctx, res[4])) goto rollback;; + + if (strcmp(res[3], INTSTAT_VERSION)) { + stat = NULL; + if (!edg_wlc_JobIdParse(res[4], &jobid)) { + if ((stat = malloc(sizeof(intJobStat))) != NULL) { + if (!edg_wll_LoadIntState(ctx, jobid, 0 /* DONT_LOCK */, -1 /*all events*/, &stat)) { + destroy_intJobStat_extension(stat); + free(stat); + stat = NULL; + } else { + if (edg_wll_intJobStatus(ctx, jobid, 0, stat, 1)) { + free(stat); + stat = NULL; + } + } } + edg_wlc_JobIdFree(jobid); } - edg_wlc_JobIdFree(jobid); + } else { + stat = dec_intJobStat(res[1], &rest); + if (rest == NULL) stat = NULL; + } + if (stat == NULL) { + glite_lbu_FreeStmt(&sh); + edg_wll_SetError(ctx, EDG_WLL_ERROR_SERVER_RESPONSE, + "cannot decode int_status from states DB table"); + goto rollback; } - } else { - stat = dec_intJobStat(res[1], &rest); - if (rest == NULL) stat = NULL; - } - if (stat == NULL) { - glite_lbu_FreeStmt(&sh); - return edg_wll_SetError(ctx, EDG_WLL_ERROR_SERVER_RESPONSE, - "cannot decode int_status from states DB table"); - } - edg_wll_IColumnsSQLPart(ctx, job_index_cols, &stat->pub, 0, NULL, &icvalues); - trio_asprintf(&stmt, "update states set seq=%s%s where jobid='%|Ss'", res[2], icvalues, res[0]); - ret = edg_wll_ExecSQL(ctx, stmt, NULL); + edg_wll_IColumnsSQLPart(ctx, job_index_cols, &stat->pub, 0, NULL, &icvalues); + trio_asprintf(&stmt, "update states set seq=%s%s where jobid='%|Ss'", res[2], icvalues, res[0]); + ret = edg_wll_ExecSQL(ctx, stmt, NULL); - for (i = 0; i < 5; i++) free(res[i]); - destroy_intJobStat(stat); free(stat); - free(stmt); free(icvalues); + for (i = 0; i < 5; i++) free(res[i]); + destroy_intJobStat(stat); free(stat); + free(stmt); free(icvalues); - if (ret < 0) return edg_wll_Error(ctx, NULL, NULL); - + if (ret < 0) goto rollback; +rollback:; + } while (edg_wll_TransNeedRetry(ctx)); + if (edg_wll_Error(ctx, NULL, NULL)) goto err; } +err: glite_lbu_FreeStmt(&sh); return edg_wll_Error(ctx, NULL, NULL); } diff --git a/org.glite.lb.server/src/bkserverd.c b/org.glite.lb.server/src/bkserverd.c index c0c2b7e..4075070 100644 --- a/org.glite.lb.server/src/bkserverd.c +++ b/org.glite.lb.server/src/bkserverd.c @@ -1386,6 +1386,7 @@ int bk_accept_serve(int conn, struct timeval *timeout, void *cdata) /* fallthrough */ case ENOTCONN: + case ECONNREFUSED: free(errt); free(errd); /* * "recoverable" error - return (>0) diff --git a/org.glite.lb.server/src/db_calls.c b/org.glite.lb.server/src/db_calls.c index 203ac2b..cbdb0d6 100644 --- a/org.glite.lb.server/src/db_calls.c +++ b/org.glite.lb.server/src/db_calls.c @@ -9,11 +9,11 @@ #include "glite/lb/context-int.h" #include "db_calls.h" -# include "db_supp.h" +#include "db_supp.h" /** Returns bitmask of job membership in common server/proxy database */ -int edg_wll_jobMembership(edg_wll_Context ctx, edg_wlc_JobId job) +int edg_wll_jobMembership(edg_wll_Context ctx, glite_jobid_const_t job) { char *dbjob; char *stmt = NULL; @@ -25,7 +25,7 @@ int edg_wll_jobMembership(edg_wll_Context ctx, edg_wlc_JobId job) dbjob = edg_wlc_JobIdGetUnique(job); - trio_asprintf(&stmt,"select proxy,server from jobs where jobid = '%|Ss'",dbjob); + trio_asprintf(&stmt,"select proxy,server from jobs where jobid = '%|Ss' for update",dbjob); ret = edg_wll_ExecSQL(ctx,stmt,&q); if (ret <= 0) { if (ret == 0) { @@ -53,3 +53,35 @@ clean: free(stmt); return(result); } + + +/* just lock one row corresponding to job in table jobs + * lock_mode: 0 = lock in share mode / 1 = for update + */ +int edg_wll_LockJobRow(edg_wll_Context ctx, const char *job, int lock_mode) +{ + char *stmt = NULL; + glite_lbu_Statement sh; + int nr; + + + edg_wll_ResetError(ctx); + + if (lock_mode) + trio_asprintf(&stmt, "select count(*) from jobs where jobid='%|Ss' for update", job); + else + trio_asprintf(&stmt, "select count(*) from jobs where jobid='%|Ss' lock in share mode", job); + + if ((nr = edg_wll_ExecSQL(ctx,stmt,&sh)) < 0) goto cleanup; + if (nr == 0) { + edg_wll_SetError(ctx,ENOENT,"no state in DB"); + goto cleanup; + } + +cleanup: + if (sh) glite_lbu_FreeStmt(&sh); + free(stmt); stmt = NULL; + + return edg_wll_Error(ctx, NULL, NULL); +} + diff --git a/org.glite.lb.server/src/db_calls.h b/org.glite.lb.server/src/db_calls.h index 1adcbcc..ff4910d 100644 --- a/org.glite.lb.server/src/db_calls.h +++ b/org.glite.lb.server/src/db_calls.h @@ -6,6 +6,11 @@ #define DB_PROXY_JOB 1 #define DB_SERVER_JOB 2 -int edg_wll_jobMembership(edg_wll_Context ctx, edg_wlc_JobId job); +int edg_wll_jobMembership(edg_wll_Context ctx, glite_jobid_const_t job); + +#define edg_wll_LockJobRowInShareMode(X,Y) edg_wll_LockJobRow(X,Y,0) +#define edg_wll_LockJobRowForUpdate(X,Y) edg_wll_LockJobRow(X,Y,1) +int edg_wll_LockJobRow(edg_wll_Context ctx, const char *job, int lock_mode); + #endif /* GLITE_LB_LB_CALLS_H */ diff --git a/org.glite.lb.server/src/db_store.c b/org.glite.lb.server/src/db_store.c index 36ec72e..ed8f042 100644 --- a/org.glite.lb.server/src/db_store.c +++ b/org.glite.lb.server/src/db_store.c @@ -12,9 +12,9 @@ #include "glite/lb/lb_maildir.h" #include "purge.h" #include "store.h" -#include "lock.h" #include "il_lbproxy.h" #include "jobstat.h" +#include "db_supp.h" #ifdef LB_PERF #include "glite/lb/lb_perftest.h" @@ -22,32 +22,25 @@ #endif -/* XXX */ -#define use_db 1 - extern int unset_proxy_flag(edg_wll_Context, edg_wlc_JobId); extern int edg_wll_NotifMatch(edg_wll_Context, const edg_wll_JobStat *); -static int db_store_finalize(edg_wll_Context ctx, char *event, edg_wll_Event *ev, edg_wll_JobStat *newstat, int seq, int reg_to_JP); +static int db_store_finalize(edg_wll_Context ctx, char *event, edg_wll_Event *ev, edg_wll_JobStat *newstat, int reg_to_JP); int -db_store(edg_wll_Context ctx,char *ucs, char *event) +db_store(edg_wll_Context ctx, char *event) { - edg_wll_Event *ev; - int seq, reg_to_JP = 0; - int err; - int local_job; + edg_wll_Event *ev = NULL; + int seq, reg_to_JP = 0, local_job; edg_wll_JobStat newstat; - ev = NULL; - edg_wll_ResetError(ctx); memset(&newstat,0,sizeof newstat); - if(edg_wll_ParseEvent(ctx, event, &ev)) - goto err; + if(edg_wll_ParseEvent(ctx, event, &ev)) goto err; + local_job = is_job_local(ctx, ev->any.jobId); #ifdef LB_PERF @@ -59,104 +52,74 @@ db_store(edg_wll_Context ctx,char *ucs, char *event) } #endif - if(use_db) { - char *ed; - int code; - - if (edg_wll_LockJob(ctx,ev->any.jobId)) goto err; - store_job_server_proxy(ctx, ev); - code = edg_wll_Error(ctx,NULL,&ed); - edg_wll_UnlockJob(ctx,ev->any.jobId); /* XXX: ignore error */ - if (code) { - edg_wll_SetError(ctx,code,ed); - free(ed); - goto err; - } - } + do { + if (edg_wll_Transaction(ctx)) goto err; + + if (store_job_server_proxy(ctx, ev, ®_to_JP)) goto rollback; + /* events logged to proxy and server (DIRECT flag) may be ignored on proxy + * if jobid prefix hostname matches server hostname -> they will + * sooner or later arrive to server too and are stored in common DB + */ + if (ctx->isProxy && local_job && (ev->any.priority & EDG_WLL_LOGFLAG_DIRECT)) { + goto commit; + } - /* events logged to proxy and server (DIRECT flag) may be ignored on proxy - * if jobid prefix hostname matches server hostname -> they will - * sooner or later arrive to server too and are stored in common DB - */ - if (ctx->isProxy && local_job) { - if (ev->any.priority & EDG_WLL_LOGFLAG_DIRECT) { - edg_wll_FreeEvent(ev); - free(ev); - return 0; + if (edg_wll_StoreEvent(ctx, ev, event, &seq)) goto rollback; + + if ( ev->any.type == EDG_WLL_EVENT_CHANGEACL ) { + if (edg_wll_UpdateACL(ctx, ev->any.jobId, + ev->changeACL.user_id, ev->changeACL.user_id_type, + ev->changeACL.permission, ev->changeACL.permission_type, + ev->changeACL.operation)) goto rollback; + } else { - /* these are re-registrations of subjobs on proxy */ - /* embryonic registrations does not trigger registration in JP */ - reg_to_JP = 1; +#ifdef LB_PERF + if(sink_mode == GLITE_LB_SINK_STATE) { + glite_wll_perftest_consumeEvent(ev); + goto commit; + } +#endif + + if ( newstat.state ) { /* prevent memleaks in case of transaction retry */ + edg_wll_FreeStatus(&newstat); + newstat.state = EDG_WLL_JOB_UNDEF; + } + if (edg_wll_StepIntState(ctx,ev->any.jobId, ev, seq, &newstat)) goto rollback; + + if (newstat.remove_from_proxy) + if (edg_wll_PurgeServerProxy(ctx, ev->any.jobId)) goto rollback; } - } - /* XXX: if event type is user tag, convert the tag name to lowercase! - * (not sure whether to convert a value too is reasonable - * or keep it 'case sensitive') - */ - if ( ev->any.type == EDG_WLL_EVENT_USERTAG ) - { - int i; - for ( i = 0; ev->userTag.name[i] != '\0'; i++ ) - ev->userTag.name[i] = tolower(ev->userTag.name[i]); - } - - if(ev->any.user == NULL) - ev->any.user = strdup(ucs); - - if(use_db) { - if (ctx->strict_locking && edg_wll_LockJob(ctx,ev->any.jobId)) goto err; - if(edg_wll_StoreEvent(ctx, ev,&seq)) { - edg_wll_UnlockJob(ctx,ev->any.jobId); - goto err; - } - } - if (!ctx->strict_locking && edg_wll_LockJob(ctx,ev->any.jobId)) goto err; + if (ev->any.type == EDG_WLL_EVENT_REGJOB && + (ev->regJob.jobtype == EDG_WLL_REGJOB_DAG || + ev->regJob.jobtype == EDG_WLL_REGJOB_PARTITIONED || + ev->regJob.jobtype == EDG_WLL_REGJOB_COLLECTION) && + ev->regJob.nsubjobs > 0) - if ( ev->any.type == EDG_WLL_EVENT_CHANGEACL ) { - err = edg_wll_UpdateACL(ctx, ev->any.jobId, - ev->changeACL.user_id, ev->changeACL.user_id_type, - ev->changeACL.permission, ev->changeACL.permission_type, - ev->changeACL.operation); + if (register_subjobs_embryonic(ctx,&ev->regJob)) goto rollback; - edg_wll_UnlockJob(ctx,ev->any.jobId); - } - else { -#ifdef LB_PERF - if(sink_mode == GLITE_LB_SINK_STATE) { - glite_wll_perftest_consumeEvent(ev); - edg_wll_UnlockJob(ctx,ev->any.jobId); - goto err; - } -#endif +commit: +rollback:; + } while (edg_wll_TransNeedRetry(ctx)); - err = edg_wll_StepIntState(ctx,ev->any.jobId, ev, seq, &newstat); - } + if (edg_wll_Error(ctx, NULL, NULL)) goto err; - /* XXX: in edg_wll_StepIntState() - * if (edg_wll_UnlockJob(ctx,ev->any.jobId)) goto err; - */ - if (err) goto err; - db_store_finalize(ctx, event, ev, &newstat, seq, reg_to_JP); + db_store_finalize(ctx, event, ev, &newstat, reg_to_JP); -err: - - if(ev) { - edg_wll_FreeEvent(ev); - free(ev); - } +err: + if(ev) { edg_wll_FreeEvent(ev); free(ev); } if ( newstat.state ) edg_wll_FreeStatus(&newstat); - return edg_wll_Error(ctx,NULL,NULL); } + /* Called only when CollectionStateEvent generated */ int db_parent_store(edg_wll_Context ctx, edg_wll_Event *ev, intJobStat *is) @@ -170,7 +133,7 @@ db_parent_store(edg_wll_Context ctx, edg_wll_Event *ev, intJobStat *is) edg_wll_ResetError(ctx); memset(&newstat,0,sizeof newstat); - /* Locked from load_parent_intJobStat() */ + /* Transaction opened from db_store */ #ifdef LB_PERF if (sink_mode == GLITE_LB_SINK_STORE) { @@ -184,10 +147,8 @@ db_parent_store(edg_wll_Context ctx, edg_wll_Event *ev, intJobStat *is) assert(ev->any.user); - if(use_db) { - if(edg_wll_StoreEvent(ctx, ev,&seq)) + if(edg_wll_StoreEvent(ctx, ev, NULL, &seq)) goto err; - } #ifdef LB_PERF if(sink_mode == GLITE_LB_SINK_STATE) { @@ -205,7 +166,7 @@ db_parent_store(edg_wll_Context ctx, edg_wll_Event *ev, intJobStat *is) assert(event); } - db_store_finalize(ctx, event, ev, &newstat, seq, 0); + db_store_finalize(ctx, event, ev, &newstat, 0); err: @@ -216,42 +177,33 @@ err: } - -static int db_store_finalize(edg_wll_Context ctx, char *event, edg_wll_Event *ev, edg_wll_JobStat *newstat, int seq, int reg_to_JP) +/* Send regitration to JP + */ +static int register_to_JP(edg_wll_Context ctx, edg_wll_Event *ev) { - int local_job = is_job_local(ctx, ev->any.jobId); - - -#ifdef LB_PERF - if( sink_mode == GLITE_LB_SINK_SEND ) { - glite_wll_perftest_consumeEvent(ev); - return edg_wll_Error(ctx,NULL,NULL); - } -#endif + char *jids, *msg; - /* Send regitration to JP - */ - if ( ctx->jpreg_dir && ev->any.type == EDG_WLL_EVENT_REGJOB && seq == 0 && - (!ctx->isProxy || reg_to_JP) ) { - char *jids, *msg; - - if ( !(jids = edg_wlc_JobIdUnparse(ev->any.jobId)) ) { - return edg_wll_SetError(ctx, errno, "Can't unparse jobid when registering to JP"); - } - if ( !(msg = realloc(jids, strlen(jids)+strlen(ev->any.user)+2)) ) { - free(jids); - return edg_wll_SetError(ctx, errno, "Can't allocate buffer when registering to JP"); - } - strcat(msg, "\n"); - strcat(msg, ev->any.user); - if ( edg_wll_MaildirStoreMsg(ctx->jpreg_dir, ctx->srvName, msg) ) { - free(msg); - return edg_wll_SetError(ctx, errno, lbm_errdesc); - } + if ( !(jids = edg_wlc_JobIdUnparse(ev->any.jobId)) ) { + return edg_wll_SetError(ctx, errno, "Can't unparse jobid when registering to JP"); + } + if ( !(msg = realloc(jids, strlen(jids)+strlen(ev->any.user)+2)) ) { + free(jids); + return edg_wll_SetError(ctx, errno, "Can't allocate buffer when registering to JP"); + } + strcat(msg, "\n"); + strcat(msg, ev->any.user); + if ( edg_wll_MaildirStoreMsg(ctx->jpreg_dir, ctx->srvName, msg) ) { free(msg); + return edg_wll_SetError(ctx, errno, lbm_errdesc); } + free(msg); + + return edg_wll_Error(ctx,NULL,NULL); +} +static int forward_event_to_server(edg_wll_Context ctx, char *event, edg_wll_Event *ev, int local_job) +{ if ( ctx->isProxy ) { /* * send event to the proper BK server @@ -269,32 +221,41 @@ static int db_store_finalize(edg_wll_Context ctx, char *event, edg_wll_Event *ev return edg_wll_SetError(ctx, EDG_WLL_IL_PROTO, "edg_wll_EventSendProxy() error."); } } - else { - /* event will not arrive to server, only flag was set */ - /* check whether some pending notifications are not triggered */ - if ( newstat->state ) { + } + + return edg_wll_Error(ctx,NULL,NULL); +} + + +static int db_store_finalize(edg_wll_Context ctx, char *event, edg_wll_Event *ev, edg_wll_JobStat *newstat, int reg_to_JP) +{ + int local_job = is_job_local(ctx, ev->any.jobId); + + +#ifdef LB_PERF + if( sink_mode == GLITE_LB_SINK_SEND ) { + glite_wll_perftest_consumeEvent(ev); + return edg_wll_Error(ctx,NULL,NULL); + } +#endif + + if (reg_to_JP) + if (register_to_JP(ctx,ev)) goto err; + + if (forward_event_to_server(ctx, event, ev, local_job)) goto err; + + if (newstat->state) { + if ( ctx->isProxy ) { + if ((ev->any.priority & EDG_WLL_LOGFLAG_DIRECT) || local_job) + /* event will not arrive to server, only flag was set */ + /* check whether some pending notifications are not triggered */ edg_wll_NotifMatch(ctx, newstat); } - } - - /* LB proxy purge */ - if (newstat->remove_from_proxy) { - edg_wll_PurgeServerProxy(ctx, ev->any.jobId); - } - } else - { - /* Purge proxy flag */ - if ( newstat->remove_from_proxy && local_job ) { - if (unset_proxy_flag(ctx, ev->any.jobId) < 0) { - return(edg_wll_Error(ctx,NULL,NULL)); - } - } - - if ( newstat->state ) { - edg_wll_NotifMatch(ctx, newstat); + else { + edg_wll_NotifMatch(ctx, newstat); } } - +err: return edg_wll_Error(ctx,NULL,NULL); } diff --git a/org.glite.lb.server/src/db_supp.c b/org.glite.lb.server/src/db_supp.c index 692d833..132ac20 100644 --- a/org.glite.lb.server/src/db_supp.c +++ b/org.glite.lb.server/src/db_supp.c @@ -10,6 +10,7 @@ int edg_wll_SetErrorDB(edg_wll_Context ctx) { if (ctx->dbctx) { code = glite_lbu_DBError(ctx->dbctx, NULL, &ed); + if (code == EDEADLOCK) code = EDG_WLL_ERROR_DB_TRANS_DEADLOCK; edg_wll_SetError(ctx, code, ed); free(ed); } else { @@ -77,3 +78,26 @@ int edg_wll_Rollback(edg_wll_Context ctx) { if ((retval = glite_lbu_Rollback(ctx->dbctx)) != 0) edg_wll_SetErrorDB(ctx); return retval; } + +int edg_wll_TransNeedRetry(edg_wll_Context ctx) { + int ret; + char *errd; + + ret = edg_wll_Error(ctx,NULL,NULL); + if (ret == EDG_WLL_ERROR_DB_TRANS_DEADLOCK) { + edg_wll_Rollback(ctx); + edg_wll_ResetError(ctx); + return 1; + } else if (ret==0) { + edg_wll_Commit(ctx); /* errors propagated further */ + return 0; + } else { + edg_wll_Error(ctx, NULL, &errd); + edg_wll_Rollback(ctx); + edg_wll_SetError(ctx, ret, errd); + free(errd); + return 0; + } +} + + diff --git a/org.glite.lb.server/src/db_supp.h b/org.glite.lb.server/src/db_supp.h index f53b817..2abf777 100644 --- a/org.glite.lb.server/src/db_supp.h +++ b/org.glite.lb.server/src/db_supp.h @@ -22,5 +22,6 @@ int edg_wll_bufferedInsertClose(edg_wll_Context ctx, glite_lbu_bufInsert bi); int edg_wll_Transaction(edg_wll_Context ctx); int edg_wll_Commit(edg_wll_Context ctx); int edg_wll_Rollback(edg_wll_Context ctx); +int edg_wll_TransNeedRetry(edg_wll_Context ctx); #endif diff --git a/org.glite.lb.server/src/get_events.c.T b/org.glite.lb.server/src/get_events.c.T index 5f05332..8dc5f9f 100644 --- a/org.glite.lb.server/src/get_events.c.T +++ b/org.glite.lb.server/src/get_events.c.T @@ -33,9 +33,24 @@ int edg_wll_get_event_flesh(edg_wll_Context ctx,int n,edg_wll_Event *e) glite_lbu_Statement sh; int ret,t; const char *tables[] = { "short_fields","long_fields" }; + edg_wll_Event *f; edg_wll_ResetError(ctx); + trio_asprintf(&q, "select ulm from events_flesh where jobid = '%|Ss' and event = %d", jobid, n); + if ((ret=edg_wll_ExecSQL(ctx,q,&sh)) < 0) goto cleanup; + if (edg_wll_FetchRow(ctx,sh,1,NULL,&nameval[0]) == 1) { + //fprintf(stderr, "got ulm: '%s'\n", nameval[0]); + // nasty ;-) + edg_wll_ParseEvent(ctx,nameval[0],&f); + memcpy(e, f, sizeof *e); + free(f); + ret=edg_wll_CheckEvent(ctx,e); + } else ret = ENOENT; + free(q); q = NULL; + + // old way keeped for compatibility/slow migration + if (ret != 0) { for (t=0; t<=1; t++) { trio_asprintf(&q,"select name,value from %s " "where jobid = '%|Ss' and event = %d ", @@ -56,8 +71,8 @@ int edg_wll_get_event_flesh(edg_wll_Context ctx,int n,edg_wll_Event *e) glite_lbu_FreeStmt(&sh); free(q); q=NULL; } - ret=edg_wll_CheckEvent(ctx,e); + } cleanup: if (sh) glite_lbu_FreeStmt(&sh); diff --git a/org.glite.lb.server/src/jobstat.c b/org.glite.lb.server/src/jobstat.c index 45b8369..901ab97 100644 --- a/org.glite.lb.server/src/jobstat.c +++ b/org.glite.lb.server/src/jobstat.c @@ -23,9 +23,13 @@ #include "lb_authz.h" #include "stats.h" #include "db_supp.h" +#include "db_calls.h" #define DAG_ENABLE 1 +#define DONT_LOCK 0 +#define LOCK 1 + /* TBD: share in whole logging or workload */ #ifdef __GNUC__ #define UNUSED_VAR __attribute__((unused)) @@ -67,7 +71,7 @@ static char* matched_substr(char *in, regmatch_t match) } -int edg_wll_JobStatus( +int edg_wll_JobStatusServer( edg_wll_Context ctx, glite_jobid_const_t job, int flags, @@ -75,245 +79,262 @@ int edg_wll_JobStatus( { /* Local variables */ - char *string_jobid; - char *md5_jobid; + char *string_jobid = NULL; + char *md5_jobid = NULL; intJobStat jobstat; intJobStat *ijsp; - int intErr = 0; - int lockErr; + int whole_cycle; edg_wll_Acl acl = NULL; #if DAG_ENABLE char *stmt = NULL; #endif - char *errdesc = NULL; - //The following declarations have originally been positioned in the funcion's code - //That was rather messy and lead to redeclaratios :-( char *stat_str, *s_out; intJobStat *js; char *out[1]; - glite_lbu_Statement sh; + glite_lbu_Statement sh = NULL; int num_sub, num_f, i, ii; + edg_wll_ResetError(ctx); + memset(&jobstat, 0, sizeof(jobstat)); string_jobid = edg_wlc_JobIdUnparse(job); if (string_jobid == NULL || stat == NULL) return edg_wll_SetError(ctx,EINVAL, NULL); md5_jobid = edg_wlc_JobIdGetUnique(job); - if ( !(jobstat.pub.owner = job_owner(ctx,md5_jobid)) ) { - free(md5_jobid); - free(string_jobid); - return edg_wll_Error(ctx,NULL,NULL); - } + do { + whole_cycle = 0; - intErr = edg_wll_GetACL(ctx, job, &acl); - if (intErr) { - free(md5_jobid); - free(string_jobid); - free(jobstat.pub.owner); - return edg_wll_Error(ctx,NULL,NULL); - } + if (edg_wll_Transaction(ctx)) goto rollback; + if (edg_wll_LockJobRowInShareMode(ctx, md5_jobid)) goto rollback;; - /* authorization check */ - if ( !(ctx->noAuth) && - (!(ctx->peerName) || !edg_wll_gss_equal_subj(ctx->peerName, jobstat.pub.owner))) { - intErr = (acl == NULL) || edg_wll_CheckACL(ctx, acl, EDG_WLL_PERM_READ); - if (intErr) { - free(string_jobid); - free(md5_jobid); - free(jobstat.pub.owner); jobstat.pub.owner = NULL; - if (acl) { - edg_wll_FreeAcl(acl); - return edg_wll_Error(ctx, NULL, NULL); - } else { - return edg_wll_SetError(ctx,EPERM, "not owner, no ACL is set"); - } - } - } - intErr = edg_wll_LoadIntState(ctx, job, -1 /*all events*/, &ijsp); - if (!intErr) { - *stat = ijsp->pub; - free(jobstat.pub.owner); jobstat.pub.owner = NULL; - destroy_intJobStat_extension(ijsp); - free(ijsp); + if (edg_wll_GetACL(ctx, job, &acl)) goto rollback; - } else { - lockErr = edg_wll_LockJob(ctx,job); - intErr = edg_wll_intJobStatus(ctx, job, flags,&jobstat, js_enable_store && !lockErr); - if (intErr) edg_wll_Error(ctx, NULL, &errdesc); - if (!lockErr) { - edg_wll_UnlockJob(ctx,job); + /* authorization check */ + if ( !(ctx->noAuth) && + (!(ctx->peerName) || !edg_wll_gss_equal_subj(ctx->peerName, jobstat.pub.owner))) { + if ((acl == NULL) || edg_wll_CheckACL(ctx, acl, EDG_WLL_PERM_READ)) { + if (acl) { + goto rollback; + } else { + edg_wll_SetError(ctx,EPERM, "not owner, no ACL is set"); + goto rollback; + } + } } - - *stat = jobstat.pub; - if (intErr) edg_wll_FreeStatus(&jobstat.pub); - destroy_intJobStat_extension(&jobstat); - } - - if (intErr) { - free(string_jobid); - free(md5_jobid); - if (acl) edg_wll_FreeAcl(acl); - edg_wll_SetError(ctx, intErr, errdesc); - free(errdesc); - return edg_wll_UpdateError(ctx, EDG_WLL_ERROR_SERVER_RESPONSE, "Could not compute job status from events"); - } - if (acl) { - stat->acl = strdup(acl->string); - edg_wll_FreeAcl(acl); - } + if (!edg_wll_LoadIntState(ctx, job, DONT_LOCK, -1 /*all events*/, &ijsp)) { + *stat = ijsp->pub; + free(jobstat.pub.owner); jobstat.pub.owner = NULL; + destroy_intJobStat_extension(ijsp); + free(ijsp); + } else { + if (edg_wll_intJobStatus(ctx, job, flags,&jobstat, js_enable_store)) { + char *err; - if ((flags & EDG_WLL_STAT_CLASSADS) == 0) { - char *null = NULL; + /* job has no record in states table ?? */ + asprintf(&err, "Could not compute status of job %s (corrupted DB?)\n",string_jobid); + edg_wll_UpdateError(ctx, EDG_WLL_ERROR_SERVER_RESPONSE, err); + free(err); - mov(stat->jdl, null); - mov(stat->matched_jdl, null); - mov(stat->condor_jdl, null); - mov(stat->rsl, null); - } + goto rollback; + } + *stat = jobstat.pub; + } + + if (acl) { + stat->acl = strdup(acl->string); + edg_wll_FreeAcl(acl); + } -#if DAG_ENABLE - if (stat->jobtype == EDG_WLL_STAT_DAG || stat->jobtype == EDG_WLL_STAT_COLLECTION) { + if ((flags & EDG_WLL_STAT_CLASSADS) == 0) { + char *null = NULL; -// XXX: The users does not want any histogram. What do we do about it? -// if ((!(flags & EDG_WLL_STAT_CHILDHIST_FAST))&&(!(flags & EDG_WLL_STAT_CHILDHIST_THOROUGH))) { /* No Histogram */ -// if (stat->children_hist != NULL) { /* No histogram will be sent even if there was one */ -// -// printf("\nNo Histogram required\n\n"); -// -// free(stat->children_hist); -// } -// -// } + mov(stat->jdl, null); + mov(stat->matched_jdl, null); + mov(stat->condor_jdl, null); + mov(stat->rsl, null); + } + #if DAG_ENABLE + if (stat->jobtype == EDG_WLL_STAT_DAG || stat->jobtype == EDG_WLL_STAT_COLLECTION) { - if (flags & EDG_WLL_STAT_CHILDSTAT) { + // XXX: The users does not want any histogram. What do we do about it? + // if ((!(flags & EDG_WLL_STAT_CHILDHIST_FAST))&&(!(flags & EDG_WLL_STAT_CHILDHIST_THOROUGH))) { /* No Histogram */ + // if (stat->children_hist != NULL) { /* No histogram will be sent even if there was one */ + // + // printf("\nNo Histogram required\n\n"); + // + // free(stat->children_hist); + // } + // + // } - trio_asprintf(&stmt, "SELECT int_status FROM states WHERE parent_job='%|Ss'" - " AND version='%|Ss'", - md5_jobid, INTSTAT_VERSION); - if (stmt != NULL) { - num_sub = edg_wll_ExecSQL(ctx, stmt, &sh); - if (num_sub >=0 ) { - i = 0; - stat->children_states = calloc(num_sub+1, sizeof(edg_wll_JobStat)); - if (stat->children_states == NULL) { - glite_lbu_FreeStmt(&sh); - goto dag_enomem; - } - while ((num_f = edg_wll_FetchRow(ctx, sh, 1, NULL, &stat_str)) == 1 - && i < num_sub) { - js = dec_intJobStat(stat_str, &s_out); - if (s_out != NULL && js != NULL) { - stat->children_states[i] = js->pub; - destroy_intJobStat_extension(js); - free(js); - i++; // Careful, this value will also be used further + + if (flags & EDG_WLL_STAT_CHILDSTAT) { + + trio_asprintf(&stmt, "SELECT int_status FROM states WHERE parent_job='%|Ss'" + " AND version='%|Ss'", + md5_jobid, INTSTAT_VERSION); + if (stmt != NULL) { + num_sub = edg_wll_ExecSQL(ctx, stmt, &sh); + if (num_sub >=0 ) { + i = 0; + stat->children_states = calloc(num_sub+1, sizeof(edg_wll_JobStat)); + if (stat->children_states == NULL) { + edg_wll_SetError(ctx, ENOMEM, "edg_wll_JobStatusServer() calloc children_states failed!"); + goto rollback; } - free(stat_str); + while ((num_f = edg_wll_FetchRow(ctx, sh, 1, NULL, &stat_str)) == 1 + && i < num_sub) { + js = dec_intJobStat(stat_str, &s_out); + if (s_out != NULL && js != NULL) { + stat->children_states[i] = js->pub; + destroy_intJobStat_extension(js); + free(js); + i++; // Careful, this value will also be used further + } + free(stat_str); + } + if (num_f < 0) goto rollback; + + glite_lbu_FreeStmt(&sh); sh = NULL; } - glite_lbu_FreeStmt(&sh); + else goto rollback; + + free(stmt); stmt = NULL; + } else { + edg_wll_SetError(ctx, ENOMEM, "edg_wll_JobStatusServer() trio_asprintf failed!"); + goto rollback; } - free(stmt); - } else goto dag_enomem; - } + } - if (flags & EDG_WLL_STAT_CHILDHIST_THOROUGH) { /* Full (thorough) Histogram */ + if (flags & EDG_WLL_STAT_CHILDHIST_THOROUGH) { /* Full (thorough) Histogram */ - if (stat->children_hist == NULL) { - stat->children_hist = (int*) calloc(1+EDG_WLL_NUMBER_OF_STATCODES, sizeof(int)); - stat->children_hist[0] = EDG_WLL_NUMBER_OF_STATCODES; - } - else { - /* If hist is loaded, it probably contain only incomplete histogram - * built in update_parent_status. Count it from scratch...*/ - for (ii=1; ii<=EDG_WLL_NUMBER_OF_STATCODES; ii++) - stat->children_hist[ii] = 0; - } + if (stat->children_hist == NULL) { + stat->children_hist = (int*) calloc(1+EDG_WLL_NUMBER_OF_STATCODES, sizeof(int)); + if (stat->children_hist == NULL) { + edg_wll_SetError(ctx, ENOMEM, "edg_wll_JobStatusServer() calloc children_hist failed!"); + goto rollback; + } - if (flags & EDG_WLL_STAT_CHILDSTAT) { // Job states have already been loaded - for ( ii = 0 ; ii < i ; ii++ ) { - stat->children_hist[(stat->children_states[ii].state)+1]++; + stat->children_hist[0] = EDG_WLL_NUMBER_OF_STATCODES; } - } - else { - // Get child states from the database - trio_asprintf(&stmt, "SELECT status FROM states WHERE parent_job='%|Ss' AND version='%|Ss'", - md5_jobid, INTSTAT_VERSION); - out[1] = NULL; - if (stmt != NULL) { - num_sub = edg_wll_ExecSQL(ctx, stmt, &sh); - if (num_sub >=0 ) { - while ((num_f = edg_wll_FetchRow(ctx, sh, sizeof(out)/sizeof(out[0]), NULL, out)) == 1 ) { - num_f = atoi(out[0]); - if (num_f > EDG_WLL_JOB_UNDEF && num_f < EDG_WLL_NUMBER_OF_STATCODES) - stat->children_hist[num_f+1]++; - free(out[0]); + else { + /* If hist is loaded, it probably contain only incomplete histogram + * built in update_parent_status. Count it from scratch...*/ + for (ii=1; ii<=EDG_WLL_NUMBER_OF_STATCODES; ii++) + stat->children_hist[ii] = 0; + } + + if (flags & EDG_WLL_STAT_CHILDSTAT) { // Job states have already been loaded + for ( ii = 0 ; ii < i ; ii++ ) { + stat->children_hist[(stat->children_states[ii].state)+1]++; + } + } + else { + // Get child states from the database + trio_asprintf(&stmt, "SELECT status FROM states WHERE parent_job='%|Ss' AND version='%|Ss'", + md5_jobid, INTSTAT_VERSION); + out[1] = NULL; + if (stmt != NULL) { + num_sub = edg_wll_ExecSQL(ctx, stmt, &sh); + if (num_sub >=0 ) { + while ((num_f = edg_wll_FetchRow(ctx, sh, sizeof(out)/sizeof(out[0]), NULL, out)) == 1 ) { + num_f = atoi(out[0]); + if (num_f > EDG_WLL_JOB_UNDEF && num_f < EDG_WLL_NUMBER_OF_STATCODES) + stat->children_hist[num_f+1]++; + free(out[0]); + } + if (num_f < 0) goto rollback; + + glite_lbu_FreeStmt(&sh); sh = NULL; } - glite_lbu_FreeStmt(&sh); + else goto rollback; + + free(stmt); stmt = NULL; + } else { + edg_wll_SetError(ctx, ENOMEM, "edg_wll_JobStatusServer() trio_asprintf failed!"); + goto rollback; } - free(stmt); - } else goto dag_enomem; - } - } - else { - if (flags & EDG_WLL_STAT_CHILDHIST_FAST) { /* Fast Histogram */ - - if (stat->children_hist == NULL) { - // If the histogram exists, assume that it was already filled during job state retrieval - stat->children_hist = (int*) calloc(1+EDG_WLL_NUMBER_OF_STATCODES, sizeof(int)); - edg_wll_GetSubjobHistogram(ctx, job, stat->children_hist); } } else { - if (stat->children_hist) { - free (stat->children_hist); - stat->children_hist = NULL; + if (flags & EDG_WLL_STAT_CHILDHIST_FAST) { /* Fast Histogram */ + + if (stat->children_hist == NULL) { + // If the histogram exists, assume that it was already filled during job state retrieval + stat->children_hist = (int*) calloc(1+EDG_WLL_NUMBER_OF_STATCODES, sizeof(int)); + if (stat->children_hist == NULL) { + edg_wll_SetError(ctx, ENOMEM, "edg_wll_JobStatusServer() calloc children_hist failed!"); + goto rollback; + } + + if (edg_wll_GetSubjobHistogram(ctx, job, stat->children_hist)) + goto rollback; + } } + else { + if (stat->children_hist) { + free (stat->children_hist); + stat->children_hist = NULL; + } + } + } - } + if (flags & EDG_WLL_STAT_CHILDREN) { - if (flags & EDG_WLL_STAT_CHILDREN) { + trio_asprintf(&stmt, "SELECT j.dg_jobid FROM states s,jobs j " + "WHERE s.parent_job='%|Ss' AND s.version='%|Ss' AND s.jobid=j.jobid", + md5_jobid, INTSTAT_VERSION); + if (stmt != NULL) { + num_sub = edg_wll_ExecSQL(ctx, stmt, &sh); + if (num_sub >=0 ) { + while ((num_f = edg_wll_FetchRow(ctx, sh, sizeof(out)/sizeof(out[0]), NULL, out)) == 1 ) { + add_stringlist(&stat->children, out[0]); + free(out[0]); + } + if (num_f < 0) goto rollback; - trio_asprintf(&stmt, "SELECT j.dg_jobid FROM states s,jobs j " - "WHERE s.parent_job='%|Ss' AND s.version='%|Ss' AND s.jobid=j.jobid", - md5_jobid, INTSTAT_VERSION); - if (stmt != NULL) { - num_sub = edg_wll_ExecSQL(ctx, stmt, &sh); - if (num_sub >=0 ) { - while (edg_wll_FetchRow(ctx, sh, sizeof(out)/sizeof(out[0]), NULL, out) == 1 ) { - add_stringlist(&stat->children, out[0]); - free(out[0]); + glite_lbu_FreeStmt(&sh); sh = NULL; } - glite_lbu_FreeStmt(&sh); + else goto rollback; + + free(stmt); stmt = NULL; + } else { + edg_wll_SetError(ctx, ENOMEM, "edg_wll_JobStatusServer() trio_asprintf failed!"); + goto rollback; } - free(stmt); - } else goto dag_enomem; + } + } +#endif + whole_cycle = 1; +commit: +rollback: + if (!whole_cycle) { + edg_wll_FreeStatus(&jobstat.pub); + jobstat.pub.owner = NULL; + destroy_intJobStat_extension(&jobstat); } + if (jobstat.pub.owner) { free(jobstat.pub.owner); jobstat.pub.owner = NULL; } + if (acl) { edg_wll_FreeAcl(acl); acl = NULL; } + if (stmt) { free(stmt); stmt = NULL; } + if (sh) { glite_lbu_FreeStmt(&sh); sh = NULL; } - } -#endif - free(string_jobid); - free(md5_jobid); - return edg_wll_Error(ctx, NULL, NULL); + } while (edg_wll_TransNeedRetry(ctx)); -#if DAG_ENABLE -dag_enomem: free(string_jobid); free(md5_jobid); - edg_wll_FreeStatus(stat); - free(stmt); - return edg_wll_SetError(ctx, ENOMEM, NULL); -#endif + + return edg_wll_Error(ctx, NULL, NULL); } int edg_wll_intJobStatus( @@ -370,7 +391,7 @@ int edg_wll_intJobStatus( if (edg_wll_QueryEventsServer(ctx,1, (const edg_wll_QueryRec **)jqra, NULL, &events)) { free(string_jobid); free(jqra); - free(intstat->pub.owner); + free(intstat->pub.owner); intstat->pub.owner = NULL; return edg_wll_Error(ctx, NULL, NULL); } free(jqra); @@ -380,7 +401,7 @@ int edg_wll_intJobStatus( if (num_events == 0) { free(string_jobid); - free(intstat->pub.owner); + free(intstat->pub.owner); intstat->pub.owner = NULL; return edg_wll_SetError(ctx,ENOENT,NULL); } @@ -640,11 +661,6 @@ edg_wll_ErrorCode edg_wll_StoreIntStateEmbryonic(edg_wll_Context ctx, if (ctx->rgma_export) write2rgma_status(&jobstat.pub); */ -#ifdef LB_BUF - if (edg_wll_bufferedInsert(bi, values)) - goto cleanup; -#else - trio_asprintf(&stmt, "insert into states" "(jobid,status,seq,int_status,version" @@ -653,7 +669,6 @@ edg_wll_ErrorCode edg_wll_StoreIntStateEmbryonic(edg_wll_Context ctx, icnames, values); if (edg_wll_ExecSQL(ctx,stmt,NULL) < 0) goto cleanup; -#endif cleanup: free(stmt); @@ -668,6 +683,7 @@ cleanup: edg_wll_ErrorCode edg_wll_LoadIntState(edg_wll_Context ctx, edg_wlc_JobId jobid, + int lock, int seq, intJobStat **stat) { @@ -680,6 +696,10 @@ edg_wll_ErrorCode edg_wll_LoadIntState(edg_wll_Context ctx, edg_wll_ResetError(ctx); jobid_md5 = edg_wlc_JobIdGetUnique(jobid); + if (lock) { + edg_wll_LockJobRowForUpdate(ctx,jobid_md5); + } + if (seq == -1) { /* any sequence number */ trio_asprintf(&stmt, @@ -713,7 +733,8 @@ edg_wll_ErrorCode edg_wll_LoadIntState(edg_wll_Context ctx, free(res); cleanup: free(jobid_md5); - free(stmt); glite_lbu_FreeStmt(&sh); + free(stmt); + if (sh) glite_lbu_FreeStmt(&sh); return edg_wll_Error(ctx,NULL,NULL); } @@ -756,9 +777,7 @@ static edg_wll_ErrorCode load_parent_intJobStat(edg_wll_Context ctx, intJobStat { if (*pis) return edg_wll_Error(ctx, NULL, NULL); // already loaded and locked - if (edg_wll_LockJob(ctx,cis->pub.parent_job)) goto err; - - if (edg_wll_LoadIntState(ctx, cis->pub.parent_job, - 1, pis)) + if (edg_wll_LoadIntState(ctx, cis->pub.parent_job, LOCK, - 1, pis)) goto err; assert(*pis); // deadlock would happen with next call of this function @@ -799,7 +818,7 @@ static int log_collectionState_event(edg_wll_Context ctx, edg_wll_JobStatCode st edg_wlc_JobIdDup(cis->pub.jobId, &(event->collectionState.child)); event->collectionState.child_event = edg_wll_EventToString(ce->any.type); - ret = trans_db_store(ctx, NULL, event, pis); + ret = db_parent_store(ctx, event, pis); edg_wll_FreeEvent(event); free(event); @@ -904,8 +923,6 @@ static edg_wll_ErrorCode update_parent_status(edg_wll_Context ctx, edg_wll_JobSt } err: - edg_wll_UnlockJob(ctx,cis->pub.parent_job); - if (pis) destroy_intJobStat(pis); @@ -987,7 +1004,7 @@ edg_wll_ErrorCode edg_wll_StepIntState(edg_wll_Context ctx, memset(&oldstat,0,sizeof oldstat); - if (!edg_wll_LoadIntState(ctx, job, seq - 1, &ijsp)) { + if (!edg_wll_LoadIntState(ctx, job, DONT_LOCK, seq - 1, &ijsp)) { edg_wll_CpyStatus(&ijsp->pub,&oldstat); if (ctx->rgma_export) oldstat_rgmaline = write2rgma_statline(&ijsp->pub); @@ -995,11 +1012,9 @@ edg_wll_ErrorCode edg_wll_StepIntState(edg_wll_Context ctx, res = processEvent(ijsp, e, seq, be_strict, &errstring); if (res == RET_FATAL || res == RET_INTERNAL) { /* !strict */ edg_wll_FreeStatus(&oldstat); - edg_wll_UnlockJob(ctx,job); /* XXX: error lost */ return edg_wll_SetError(ctx, EINVAL, errstring); } edg_wll_StoreIntState(ctx, ijsp, seq); - if (edg_wll_UnlockJob(ctx,job)) goto err; edg_wll_UpdateStatistics(ctx,&oldstat,e,&ijsp->pub); @@ -1029,7 +1044,6 @@ edg_wll_ErrorCode edg_wll_StepIntState(edg_wll_Context ctx, Right approach is computing parent status from scratch. */ - if (edg_wll_UnlockJob(ctx,job)) goto err; edg_wll_UpdateStatistics(ctx,NULL,e,&jobstat.pub); if (ctx->rgma_export) write2rgma_status(&jobstat.pub); @@ -1040,7 +1054,7 @@ edg_wll_ErrorCode edg_wll_StepIntState(edg_wll_Context ctx, } else destroy_intJobStat(&jobstat); } - else edg_wll_UnlockJob(ctx,job); + err: return edg_wll_Error(ctx, NULL, NULL); } diff --git a/org.glite.lb.server/src/jobstat.h b/org.glite.lb.server/src/jobstat.h index bab2fd9..128be56 100644 --- a/org.glite.lb.server/src/jobstat.h +++ b/org.glite.lb.server/src/jobstat.h @@ -6,14 +6,15 @@ #include "glite/lb/jobstat.h" #include "glite/lb/intjobstat.h" -int edg_wll_JobStatus(edg_wll_Context, glite_jobid_const_t, int, edg_wll_JobStat *); +int edg_wll_JobStatusServer(edg_wll_Context, glite_jobid_const_t, int, edg_wll_JobStat *); int edg_wll_intJobStatus( edg_wll_Context, glite_jobid_const_t, int, intJobStat *, int); edg_wll_ErrorCode edg_wll_StoreIntState(edg_wll_Context, intJobStat *, int); edg_wll_ErrorCode edg_wll_StoreIntStateEmbryonic(edg_wll_Context, edg_wlc_JobId, char *icnames, char *values, glite_lbu_bufInsert *bi); -edg_wll_ErrorCode edg_wll_LoadIntState(edg_wll_Context , edg_wlc_JobId , int, intJobStat **); +edg_wll_ErrorCode edg_wll_LoadIntState(edg_wll_Context , edg_wlc_JobId , int, int, intJobStat **); +edg_wll_ErrorCode edg_wll_StepIntState(edg_wll_Context ctx, edg_wlc_JobId job, edg_wll_Event *e, int seq, edg_wll_JobStat *stat_out); edg_wll_ErrorCode edg_wll_StepIntStateParent(edg_wll_Context,edg_wlc_JobId,edg_wll_Event *,int,intJobStat *,edg_wll_JobStat *); diff --git a/org.glite.lb.server/src/lb_proto.c b/org.glite.lb.server/src/lb_proto.c index 8a3c1ec..efaaf09 100644 --- a/org.glite.lb.server/src/lb_proto.c +++ b/org.glite.lb.server/src/lb_proto.c @@ -435,7 +435,7 @@ edg_wll_ErrorCode edg_wll_Proto(edg_wll_Context ctx, flags = (requestPTR[1]=='?') ? edg_wll_string_to_stat_flags(requestPTR + 2) : 0; // FIXME: edg_wll_UserJobs should take flags as parameter - switch (edg_wll_UserJobs(ctx,&jobsOut,NULL)) { + switch (edg_wll_UserJobsServer(ctx,&jobsOut,NULL)) { case 0: if (html) edg_wll_UserJobsToHTML(ctx, jobsOut, &message); else ret = HTTP_OK; break; @@ -479,7 +479,7 @@ edg_wll_ErrorCode edg_wll_Proto(edg_wll_Context ctx, edg_wll_SetError(ctx,EDG_WLL_ERROR_JOBID_FORMAT,fullid); ret = HTTP_BADREQ; } - else switch (edg_wll_JobStatus(ctx,jobId,0,&stat)) { + else switch (edg_wll_JobStatusServer(ctx,jobId,0,&stat)) { case 0: if (html) edg_wll_JobStatusToHTML(ctx,stat,&message); else ret = HTTP_OK; break; @@ -635,11 +635,41 @@ edg_wll_ErrorCode edg_wll_Proto(edg_wll_Context ctx, } else if (!strncmp(requestPTR,KEY_PURGE_REQUEST,sizeof(KEY_PURGE_REQUEST)-1)) { edg_wll_PurgeRequest request; + edg_wll_PurgeResult result; + int fatal = 0; ctx->p_tmp_timeout.tv_sec = 86400; - if ( !parsePurgeRequest(ctx,messageBody,(int (*)()) edg_wll_StringToStat,&request) ) - edg_wll_PurgeServer(ctx, (const edg_wll_PurgeRequest *)&request); + if ( !parsePurgeRequest(ctx,messageBody,(int (*)()) edg_wll_StringToStat,&request) ) { + switch ( edg_wll_PurgeServer(ctx, (const edg_wll_PurgeRequest *)&request, &result)) { + case 0: if (html) ret = HTTP_NOTIMPL; + else ret = HTTP_OK; + + break; + case ENOENT: ret = HTTP_NOTFOUND; break; + case EPERM: ret = HTTP_UNAUTH; break; + case EINVAL: ret = HTTP_INVALID; break; + case ENOMEM: fatal = 1; ret = HTTP_INTERNAL; break; + default: ret = HTTP_INTERNAL; break; + } + if (!html && !fatal) { + if (edg_wll_PurgeResultToXML(ctx, &result, &message)) + ret = HTTP_INTERNAL; + else + printf("%s", message); + } + + /* result is now packed in message, free it */ + if ( result.server_file ) + free(result.server_file); + if ( result.jobs ) + { + for ( i = 0; result.jobs[i]; i++ ) + free(result.jobs[i]); + free(result.jobs); + } + + } if ( request.jobs ) { @@ -649,13 +679,6 @@ edg_wll_ErrorCode edg_wll_Proto(edg_wll_Context ctx, free(request.jobs); } - /* - * response allready sent from edg_wll_PurgeServer() - return NULL results - */ - *response = NULL; - *headersOut = NULL; - *bodyOut = NULL; - return edg_wll_Error(ctx,NULL,NULL); } else if (!strncmp(requestPTR,KEY_DUMP_REQUEST,sizeof(KEY_DUMP_REQUEST)-1)) { edg_wll_DumpRequest request; diff --git a/org.glite.lb.server/src/load.c b/org.glite.lb.server/src/load.c index 6cacb0d..dbab5bc 100644 --- a/org.glite.lb.server/src/load.c +++ b/org.glite.lb.server/src/load.c @@ -32,9 +32,9 @@ int edg_wll_LoadEventsServer(edg_wll_Context ctx,const edg_wll_LoadRequest *req, { int fd, reject_fd = -1, - readret, i; + readret, i, ret; size_t maxsize; - char *line = NULL, + char *line = NULL, *errdesc, buff[30]; edg_wll_Event *event; edg_wlc_JobId jobid = NULL; @@ -48,9 +48,6 @@ int edg_wll_LoadEventsServer(edg_wll_Context ctx,const edg_wll_LoadRequest *req, if ( (fd = open(req->server_file, O_RDONLY)) == -1 ) return edg_wll_SetError(ctx, errno, "Server can not open the file"); - if (edg_wll_Transaction(ctx) != 0) - return edg_wll_Error(ctx, NULL, NULL); - memset(result,0,sizeof(*result)); i = 0; while ( 1 ) @@ -58,7 +55,6 @@ int edg_wll_LoadEventsServer(edg_wll_Context ctx,const edg_wll_LoadRequest *req, /* Read one line */ if ( (readret = read_line(&line, &maxsize, fd)) == -1 ) { - edg_wll_Rollback(ctx); return edg_wll_SetError(ctx, errno, "reading dump file"); } @@ -84,14 +80,19 @@ int edg_wll_LoadEventsServer(edg_wll_Context ctx,const edg_wll_LoadRequest *req, result->to = event->any.arrived.tv_sec; } ctx->event_load = 1; - if ( edg_wll_StoreEvent(ctx, event, NULL) ) - { - char *errdesc; + + do { + if (edg_wll_Transaction(ctx)) goto err; + + edg_wll_StoreEvent(ctx, event, line, NULL); + + } while (edg_wll_TransNeedRetry(ctx)); + + if ((ret = edg_wll_Error(ctx, NULL, &errdesc)) != 0) { int len = strlen(line), total = 0, written; - edg_wll_Error(ctx, NULL, &errdesc); fprintf(stderr, "Can't store event: %s\n", errdesc); if ( reject_fd == -1 ) { @@ -127,8 +128,7 @@ int edg_wll_LoadEventsServer(edg_wll_Context ctx,const edg_wll_LoadRequest *req, } write(reject_fd,"\n",1); } - else - { + else { result->to = event->any.arrived.tv_sec; if ( jobid ) { @@ -138,7 +138,7 @@ int edg_wll_LoadEventsServer(edg_wll_Context ctx,const edg_wll_LoadRequest *req, { edg_wll_JobStat st; - edg_wll_JobStatus(ctx, jobid, 0, &st); + edg_wll_JobStatusServer(ctx, jobid, 0, &st); edg_wll_FreeStatus(&st); edg_wlc_JobIdFree(jobid); @@ -156,11 +156,12 @@ cycle_clean: edg_wll_FreeEvent(event); } +err: if ( jobid ) { edg_wll_JobStat st; - edg_wll_JobStatus(ctx, jobid, 0, &st); + edg_wll_JobStatusServer(ctx, jobid, 0, &st); edg_wll_FreeStatus(&st); edg_wlc_JobIdFree(jobid); } @@ -168,9 +169,6 @@ cycle_clean: if ( reject_fd != -1 ) close(reject_fd); - if (edg_wll_Commit(ctx) != 0) - return edg_wll_Error(ctx, NULL, NULL); - return edg_wll_Error(ctx,NULL,NULL); } diff --git a/org.glite.lb.server/src/notification.c b/org.glite.lb.server/src/notification.c index cc22d7d..f3e4e56 100644 --- a/org.glite.lb.server/src/notification.c +++ b/org.glite.lb.server/src/notification.c @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include "glite/jobid/strmd5.h" #include "glite/lbu/trio.h" @@ -106,48 +108,45 @@ int edg_wll_NotifNewServer( trio_asprintf(&addr_s, "%s:%s", ctx->connections->serverConnection->peerName, aux+1); } - /* Format DB insert statement - */ - trio_asprintf(&q, - "insert into notif_registrations(notifid,destination,valid,userid,conditions) " - "values ('%|Ss','%|Ss',%s,'%|Ss', '%|Ss')", - nid_s, addr_s? addr_s: address_override, time_s, owner, xml_conds); + do { + if (edg_wll_Transaction(ctx) != 0) goto cleanup; - if ( edg_wll_ExecSQL(ctx, q, NULL) < 0 ) - goto cleanup; + /* Format DB insert statement + */ + trio_asprintf(&q, + "insert into notif_registrations(notifid,destination,valid,userid,conditions) " + "values ('%|Ss','%|Ss',%s,'%|Ss', '%|Ss')", + nid_s, addr_s? addr_s: address_override, time_s, owner, xml_conds); - if (get_indexed_cols(ctx,nid_s,nconds,&add_index) || - (add_index && edg_wll_ExecSQL(ctx,add_index,NULL) < 0) - ) goto cleanup; + if ( edg_wll_ExecSQL(ctx, q, NULL) < 0 ) + goto rollback; + if (get_indexed_cols(ctx,nid_s,nconds,&add_index) || + (add_index && edg_wll_ExecSQL(ctx,add_index,NULL) < 0) + ) goto rollback; - if (jobs) for ( i = 0; jobs[i]; i++ ) - { - free(q); - trio_asprintf(&q, - "insert into notif_jobs(notifid,jobid) values ('%|Ss','%|Ss')", - nid_s, jobs[i]); - if ( edg_wll_ExecSQL(ctx, q, NULL) < 0 ) + + if (jobs) for ( i = 0; jobs[i]; i++ ) { - /* XXX: Remove uncoplete registration? - * Which error has to be returned? - */ free(q); - trio_asprintf(&q, "delete from notif_jobs where notifid='%|Ss'", nid_s); - edg_wll_ExecSQL(ctx, q, NULL); - free(q); - trio_asprintf(&q, "delete from notif_registrations where notifid='%|Ss'", nid_s); - edg_wll_ExecSQL(ctx, q, NULL); - goto cleanup; + trio_asprintf(&q, + "insert into notif_jobs(notifid,jobid) values ('%|Ss','%|Ss')", + nid_s, jobs[i]); + if ( edg_wll_ExecSQL(ctx, q, NULL) < 0 ) + goto rollback; } - } - else { - trio_asprintf(&q,"insert into notif_jobs(notifid,jobid) values ('%|Ss','%|Ss')", - nid_s,NOTIF_ALL_JOBS); - if ( edg_wll_ExecSQL(ctx, q, NULL) < 0 ) goto cleanup; + else { + trio_asprintf(&q,"insert into notif_jobs(notifid,jobid) values ('%|Ss','%|Ss')", + nid_s,NOTIF_ALL_JOBS); + if ( edg_wll_ExecSQL(ctx, q, NULL) < 0 ) goto rollback; - } + } + +rollback: + free(q); q= NULL; + free(add_index); add_index = NULL; + } while (edg_wll_TransNeedRetry(ctx)); cleanup: if ( q ) free(q); @@ -175,55 +174,61 @@ int edg_wll_NotifBindServer( const char *address_override, time_t *valid) { - char *time_s = NULL, - *addr_s = NULL; + char *time_s = NULL, + *addr_s = NULL; if ( !address_override ) { edg_wll_SetError(ctx, EINVAL, "Address parameter not given"); - goto cleanup; + goto err; } + + do { + if (edg_wll_Transaction(ctx) != 0) goto err; - if ( check_notif_request(ctx, nid, NULL) ) - goto cleanup; + if ( check_notif_request(ctx, nid, NULL) ) + goto rollback; - /* Format time of validity - */ + /* Format time of validity + */ /* XXX: until valid works [inout] */ - *valid = time(NULL) + ctx->notifDuration; - adjust_validity(ctx,valid); - - glite_lbu_TimeToDB(*valid, &time_s); - if ( !time_s ) - { - edg_wll_SetError(ctx, errno, "Formating validity time"); - goto cleanup; - } + *valid = time(NULL) + ctx->notifDuration; + adjust_validity(ctx,valid); - /* Format the address - */ - if ( address_override ) - { - char *aux; + glite_lbu_TimeToDB(*valid, &time_s); + if ( !time_s ) + { + edg_wll_SetError(ctx, errno, "Formating validity time"); + goto rollback; + } - if ( !(aux = strchr(address_override, ':')) ) + /* Format the address + */ + if ( address_override ) { - edg_wll_SetError(ctx, EINVAL, "Addres overrirde not in format host:port"); - goto cleanup; + char *aux; + + if ( !(aux = strchr(address_override, ':')) ) + { + edg_wll_SetError(ctx, EINVAL, "Addres overrirde not in format host:port"); + goto rollback; + } + if ( !strncmp(address_override, "0.0.0.0", aux-address_override) ) + trio_asprintf(&addr_s, "%s:%s", ctx->connections->serverConnection->peerName, aux+1); } - if ( !strncmp(address_override, "0.0.0.0", aux-address_override) ) - trio_asprintf(&addr_s, "%s:%s", ctx->connections->serverConnection->peerName, aux+1); - } - update_notif(ctx, nid, NULL, addr_s? addr_s: address_override, (const char *)(time_s)); + update_notif(ctx, nid, NULL, addr_s? addr_s: address_override, (const char *)(time_s)); -cleanup: - if ( time_s ) free(time_s); - if ( addr_s ) free(addr_s); +rollback: + free(time_s); time_s = NULL; + free(addr_s); addr_s = NULL; + + } while (edg_wll_TransNeedRetry(ctx)); +err: return edg_wll_Error(ctx, NULL, NULL); } @@ -245,89 +250,94 @@ int edg_wll_NotifChangeServer( /* Format notification ID */ if ( !(nid_s = edg_wll_NotifIdGetUnique(nid)) ) - goto cleanup; + goto err; - if ( check_notif_request(ctx, nid, NULL) ) - goto cleanup; + do { + if (edg_wll_Transaction(ctx) != 0) goto err; - switch ( op ) - { - case EDG_WLL_NOTIF_REPLACE: - /* Format conditions - * - separate all jobids - * - format new condition list without jobids - */ - if ( split_cond_list(ctx, conditions, &nconds, &jobs) ) - goto cleanup; + if ( check_notif_request(ctx, nid, NULL) ) + goto rollback; - /* - * encode new cond. list into a XML string - */ - if ( edg_wll_JobQueryRecToXML(ctx, (edg_wll_QueryRec const * const *) nconds, &xml_conds) ) + switch ( op ) { - /* XXX: edg_wll_JobQueryRecToXML() do not set errors in context! - * can't get propper error number :( + case EDG_WLL_NOTIF_REPLACE: + /* Format conditions + * - separate all jobids + * - format new condition list without jobids */ - edg_wll_SetError(ctx, errno, "Can't encode data into xml"); - goto cleanup; - } + if ( split_cond_list(ctx, conditions, &nconds, &jobs) ) + goto rollback; - /* Format DB insert statement - */ - if ( update_notif(ctx, nid, xml_conds, NULL, NULL) ) - goto cleanup; + /* + * encode new cond. list into a XML string + */ + if ( edg_wll_JobQueryRecToXML(ctx, (edg_wll_QueryRec const * const *) nconds, &xml_conds) ) + { + /* XXX: edg_wll_JobQueryRecToXML() do not set errors in context! + * can't get propper error number :( + */ + edg_wll_SetError(ctx, errno, "Can't encode data into xml"); + goto rollback; + } - if ( jobs ) - { /* Format DB insert statement */ - trio_asprintf(&q, "delete from notif_jobs where notifid='%|Ss'", nid_s); - if ( edg_wll_ExecSQL(ctx, q, NULL) < 0 ) - goto cleanup; + if ( update_notif(ctx, nid, xml_conds, NULL, NULL) ) + goto rollback; - for ( i = 0; jobs[i]; i++ ) + if ( jobs ) { - free(q); - trio_asprintf(&q, - "insert into notif_jobs(notifid,jobid) values ('%|Ss','%|Ss')", - nid_s, jobs[i]); + /* Format DB insert statement + */ + trio_asprintf(&q, "delete from notif_jobs where notifid='%|Ss'", nid_s); if ( edg_wll_ExecSQL(ctx, q, NULL) < 0 ) + goto rollback; + + for ( i = 0; jobs[i]; i++ ) { - /* XXX: Remove uncoplete registration? - * Which error has to be returned? - */ free(q); - trio_asprintf(&q, "delete from notif_jobs where notifid='%|Ss'", nid_s); - edg_wll_ExecSQL(ctx, q, NULL); - free(q); - trio_asprintf(&q,"delete from notif_registrations where notifid='%|Ss'", nid_s); - edg_wll_ExecSQL(ctx, q, NULL); - goto cleanup; + trio_asprintf(&q, + "insert into notif_jobs(notifid,jobid) values ('%|Ss','%|Ss')", + nid_s, jobs[i]); + if ( edg_wll_ExecSQL(ctx, q, NULL) < 0 ) + { + /* XXX: Remove uncoplete registration? + * Which error has to be returned? + */ + free(q); + trio_asprintf(&q, "delete from notif_jobs where notifid='%|Ss'", nid_s); + edg_wll_ExecSQL(ctx, q, NULL); + free(q); + trio_asprintf(&q,"delete from notif_registrations where notifid='%|Ss'", nid_s); + edg_wll_ExecSQL(ctx, q, NULL); + goto rollback; + } } } + break; + + case EDG_WLL_NOTIF_ADD: + break; + case EDG_WLL_NOTIF_REMOVE: + break; + default: + break; } - break; - - case EDG_WLL_NOTIF_ADD: - break; - case EDG_WLL_NOTIF_REMOVE: - break; - default: - break; - } -cleanup: - if ( q ) free(q); - if ( xml_conds ) free(xml_conds); - if ( nid_s ) free(nid_s); - if ( jobs ) - { - for ( i = 0; jobs[i]; i++ ) - free(jobs[i]); - free(jobs); - } - if ( nconds ) free(nconds); +rollback: + free(q); q = NULL; + free(xml_conds); xml_conds = NULL; + free(nid_s); nid_s = NULL; + if ( jobs ) { + for ( i = 0; jobs[i]; i++ ) + free(jobs[i]); + free(jobs); jobs = NULL; + } + free(nconds); nconds = NULL; + + } while (edg_wll_TransNeedRetry(ctx)); +err: return edg_wll_Error(ctx, NULL, NULL); } @@ -338,29 +348,34 @@ int edg_wll_NotifRefreshServer( { char *time_s = NULL; + do { + if (edg_wll_Transaction(ctx) != 0) goto err; - if ( check_notif_request(ctx, nid, NULL) ) - goto cleanup; + if ( check_notif_request(ctx, nid, NULL) ) + goto rollback; - /* Format time of validity - */ + /* Format time of validity + */ /* XXX: until valid works [inout] */ - *valid = time(NULL) + ctx->notifDuration; - adjust_validity(ctx,valid); + *valid = time(NULL) + ctx->notifDuration; + adjust_validity(ctx,valid); - glite_lbu_TimeToDB(*valid, &time_s); - if ( !time_s ) - { - edg_wll_SetError(ctx, errno, "Formating validity time"); - goto cleanup; - } + glite_lbu_TimeToDB(*valid, &time_s); + if ( !time_s ) + { + edg_wll_SetError(ctx, errno, "Formating validity time"); + goto rollback; + } - update_notif(ctx, nid, NULL, NULL, time_s); + update_notif(ctx, nid, NULL, NULL, time_s); -cleanup: - if ( time_s ) free(time_s); +rollback: + free(time_s); time_s = NULL; + + } while (edg_wll_TransNeedRetry(ctx)); +err: return edg_wll_Error(ctx, NULL, NULL); } @@ -370,27 +385,44 @@ int edg_wll_NotifDropServer( { char *nid_s = NULL, *stmt = NULL; - int ret; + + do { + if (edg_wll_Transaction(ctx) != 0) goto err; - if ( check_notif_request(ctx, nid, NULL) ) - goto cleanup; + if ( check_notif_request(ctx, nid, NULL) ) + goto rollback; - if ( !(nid_s = edg_wll_NotifIdGetUnique(nid)) ) - goto cleanup; + if ( !(nid_s = edg_wll_NotifIdGetUnique(nid)) ) + goto rollback; - trio_asprintf(&stmt, "delete from notif_registrations where notifid='%|Ss'", nid_s); - if ( (ret = edg_wll_ExecSQL(ctx, stmt, NULL)) < 0 ) - goto cleanup; - free(stmt); - trio_asprintf(&stmt, "delete from notif_jobs where notifid='%|Ss'", nid_s); - edg_wll_ExecSQL(ctx, stmt, NULL); - edg_wll_NotifCancelRegId(ctx, nid); + trio_asprintf(&stmt, "delete from notif_registrations where notifid='%|Ss'", nid_s); + if ( edg_wll_ExecSQL(ctx, stmt, NULL) < 0 ) + goto rollback; + free(stmt); + trio_asprintf(&stmt, "delete from notif_jobs where notifid='%|Ss'", nid_s); + if ( edg_wll_ExecSQL(ctx, stmt, NULL) < 0 ) + goto rollback; + edg_wll_NotifCancelRegId(ctx, nid); + if (edg_wll_Error(ctx, NULL, NULL) == ECONNREFUSED) { + /* Let notification erase from DB, + * on notif-IL side it will be autopurged later anyway */ + + fprintf(stderr,"[%d] edg_wll_NotifDropServer() - NotifID found and dropped,"\ + " however, connection to notif-IL was refused (notif-IL not running?)\n", getpid()); + syslog(LOG_INFO,"edg_wll_NotifDropServer() - NotifID found and dropped,"\ + " however, connection to notif-IL was refused (notif-IL not running?)"); -cleanup: - if ( nid_s ) free(nid_s); - if ( stmt ) free(stmt); + edg_wll_ResetError(ctx); + } + +rollback: + free(nid_s); nid_s = NULL; + free(stmt); stmt = NULL; + + } while (edg_wll_TransNeedRetry(ctx)); +err: return edg_wll_Error(ctx, NULL, NULL); } @@ -473,7 +505,7 @@ static int check_notif_request( trio_asprintf(&stmt, "select notifid from notif_registrations " - "where notifid='%|Ss' and userid='%|Ss'", + "where notifid='%|Ss' and userid='%|Ss' FOR UPDATE", nid_s, user); if ( (ret = edg_wll_ExecSQL(ctx, stmt, NULL)) < 0 ) diff --git a/org.glite.lb.server/src/openserver.c b/org.glite.lb.server/src/openserver.c index 9b043d5..5052223 100644 --- a/org.glite.lb.server/src/openserver.c +++ b/org.glite.lb.server/src/openserver.c @@ -1,11 +1,20 @@ #ident "$Header$" +#include +#include +#include + #include "glite/lb/context-int.h" #include "openserver.h" #include "db_supp.h" edg_wll_ErrorCode edg_wll_Open(edg_wll_Context ctx, char *cs) { + int ret, hit = 0, i; + char *table[1]; + char *cols[20]; + glite_lbu_Statement stmt; + if (glite_lbu_InitDBContext(&ctx->dbctx) != 0) { char *ed; @@ -14,7 +23,46 @@ edg_wll_ErrorCode edg_wll_Open(edg_wll_Context ctx, char *cs) free(ed); return EDG_WLL_ERROR_DB_INIT; } - return glite_lbu_DBConnect(ctx->dbctx,cs) ? edg_wll_SetErrorDB(ctx) : 0; + if (glite_lbu_DBConnect(ctx->dbctx,cs) != 0) return edg_wll_SetErrorDB(ctx); + + // proxy and server columns added + if (glite_lbu_ExecSQL(ctx->dbctx, "DESC jobs", &stmt) <= 0) goto err; + hit = 0; + while (hit < 2 && (ret = glite_lbu_FetchRow(stmt, 1, NULL, cols)) > 0) { + assert(ret <= (int)(sizeof cols/sizeof cols[0])); + if (strcasecmp(cols[0], "proxy") == 0 || + strcasecmp(cols[0], "server") == 0) hit++; + for (i = 0; i < ret; i++) free(cols[i]); + } + if (ret < 0) goto err; + glite_lbu_FreeStmt(&stmt); + if (hit != 2) { + ret = edg_wll_SetError(ctx, EINVAL, "old DB schema found, migration to new schema needed"); + goto close_db; + } + + // events_flesh table added + if (glite_lbu_ExecSQL(ctx->dbctx, "SHOW TABLES", &stmt) <= 0) goto err; + hit = 0; + while (hit < 1 && (ret = glite_lbu_FetchRow(stmt, 1, NULL, table)) > 0) { + if (strcasecmp(table[0], "events_flesh") == 0) hit++; + free(table[0]); + } + if (ret < 0) goto err; + glite_lbu_FreeStmt(&stmt); + if (hit != 1) { + ret = edg_wll_SetError(ctx, EINVAL, "events_flesh table not found, migration to new schema needed"); + goto close_db; + } + + return 0; + +err: + edg_wll_SetErrorDB(ctx); + glite_lbu_FreeStmt(&stmt); +close_db: + glite_lbu_DBClose(ctx->dbctx); + return ret; } edg_wll_ErrorCode edg_wll_Close(edg_wll_Context ctx) diff --git a/org.glite.lb.server/src/purge.h b/org.glite.lb.server/src/purge.h index 697acdf..97f8b82 100644 --- a/org.glite.lb.server/src/purge.h +++ b/org.glite.lb.server/src/purge.h @@ -11,7 +11,8 @@ */ int edg_wll_PurgeServer( edg_wll_Context ctx, - const edg_wll_PurgeRequest *request + const edg_wll_PurgeRequest *request, + edg_wll_PurgeResult *result ); /** LB Proxy purge implementation @@ -19,7 +20,7 @@ int edg_wll_PurgeServer( */ int edg_wll_PurgeServerProxy( edg_wll_Context ctx, - edg_wlc_JobId job + glite_jobid_const_t job ); #define FILE_TYPE_ANY "" diff --git a/org.glite.lb.server/src/query.c b/org.glite.lb.server/src/query.c index a71bb4e..c0d0b95 100644 --- a/org.glite.lb.server/src/query.c +++ b/org.glite.lb.server/src/query.c @@ -24,6 +24,7 @@ #include "store.h" #include "lb_authz.h" #include "db_supp.h" +#include "jobstat.h" #define FL_SEL_STATUS 1 #define FL_SEL_TAGS (1<<1) @@ -57,7 +58,7 @@ int edg_wll_QueryEventsServer( *event_where = NULL, *qbase = NULL, *q = NULL, - *res[11]; + *res[12]; edg_wll_Event *out = NULL; glite_lbu_Statement sh = NULL; int i = 0, @@ -106,7 +107,7 @@ int edg_wll_QueryEventsServer( * convert_event_head() called on the result */ trio_asprintf(&qbase,"SELECT e.event,j.userid,j.dg_jobid,e.code," - "e.prog,e.host,u.cert_subj,e.time_stamp,e.usec,e.level,e.arrived " + "e.prog,e.host,u.cert_subj,e.time_stamp,e.usec,e.level,e.arrived,e.seqcode " "FROM events e,users u,jobs j%s " "WHERE %se.jobid=j.jobid AND e.userid=u.userid AND e.code != %d " "%s %s %s %s %s %s", @@ -189,7 +190,7 @@ int edg_wll_QueryEventsServer( if (j_old) edg_wll_FreeStatus(&state_out); - if ( edg_wll_JobStatus(ctx, out[i].any.jobId, 0, &state_out) ) + if ( edg_wll_JobStatusServer(ctx, out[i].any.jobId, 0, &state_out) ) { edg_wll_FreeEvent(out+i); if (edg_wll_Error(ctx,NULL,NULL) == EPERM) eperm = 1; @@ -214,7 +215,7 @@ int edg_wll_QueryEventsServer( } } - // Auth checked in edg_wll_JobStatus above + // Auth checked in edg_wll_JobStatusServer above if ( !(where_flags & FL_FILTER) && !noAuth ) { if (!ctx->peerName || (strcmp(res[1],peerid) && strcmp(res[1], can_peerid))) { @@ -420,7 +421,7 @@ int edg_wll_QueryJobsServer( // if some condition hits unindexed column or states of matching jobs wanted if ((where_flags & FL_FILTER) || !(flags & EDG_WLL_STAT_NO_STATES)) { - if ( edg_wll_JobStatus(ctx, jobs_out[i], (where_flags & FL_SEL_JDL)?(flags|EDG_WLL_STAT_CLASSADS):flags, &states_out[i]) ) + if ( edg_wll_JobStatusServer(ctx, jobs_out[i], (where_flags & FL_SEL_JDL)?(flags|EDG_WLL_STAT_CLASSADS):flags, &states_out[i]) ) { edg_wlc_JobIdFree(jobs_out[i]); if (edg_wll_Error(ctx,NULL,NULL) == EPERM) eperm = 1; @@ -1290,6 +1291,9 @@ int convert_event_head(edg_wll_Context ctx,char **f,edg_wll_Event *e) e->any.arrived.tv_usec = 0; free(f[8]); f[8] = NULL; + e->any.seqcode = f[9]; + f[9] = NULL; + return 0; err: diff --git a/org.glite.lb.server/src/request.c b/org.glite.lb.server/src/request.c index de2326c..99c54e8 100644 --- a/org.glite.lb.server/src/request.c +++ b/org.glite.lb.server/src/request.c @@ -19,32 +19,8 @@ #endif -int -trans_db_store(edg_wll_Context ctx, char *event_data, edg_wll_Event *e, intJobStat *is) -{ - int ret; - char *errd = NULL; - - if ((ret = edg_wll_Transaction(ctx) != 0)) goto err; - - if (e) ret = db_parent_store(ctx, e, is); - else ret = db_store(ctx, "NOT USED", event_data); - - if (ret == 0) { - if ((ret = edg_wll_Commit(ctx)) != 0) goto err; - } else { - edg_wll_Error(ctx, NULL, &errd); - edg_wll_Rollback(ctx); - edg_wll_SetError(ctx, ret, errd); - free(errd); - } - -err: - return(ret); -} - int -handle_request(edg_wll_Context ctx,char *buf) +handle_il_message(edg_wll_Context ctx,char *buf) { il_octet_string_t event; int ret; @@ -57,7 +33,7 @@ handle_request(edg_wll_Context ctx,char *buf) return EDG_WLL_IL_PROTO; } - ret = trans_db_store(ctx, event.data, NULL, NULL); + ret = db_store(ctx, event.data); if(event.data) free(event.data); diff --git a/org.glite.lb.server/src/srv_purge.c b/org.glite.lb.server/src/srv_purge.c index ae7a4aa..d50f77d 100644 --- a/org.glite.lb.server/src/srv_purge.c +++ b/org.glite.lb.server/src/srv_purge.c @@ -42,9 +42,9 @@ static const char* const resp_headers[] = { NULL }; -static int purge_one(edg_wll_Context ctx,glite_jobid_const_t,int,int); -int unset_proxy_flag(edg_wll_Context ctx, edg_wlc_JobId job); -static int unset_server_flag(edg_wll_Context ctx, edg_wlc_JobId job); +static int purge_one(edg_wll_Context ctx,glite_jobid_const_t,int,int,int); +int unset_proxy_flag(edg_wll_Context ctx, glite_jobid_const_t job); +static int unset_server_flag(edg_wll_Context ctx, glite_jobid_const_t job); int edg_wll_CreateTmpFileStorage(edg_wll_Context ctx, char *prefix, char **fname) @@ -184,28 +184,25 @@ int edg_wll_CreateFileStorage(edg_wll_Context ctx, char *file_type, char *prefix return retfd; } -int edg_wll_PurgeServerProxy(edg_wll_Context ctx, edg_wlc_JobId job) +int edg_wll_PurgeServerProxy(edg_wll_Context ctx, glite_jobid_const_t job) { - switch ( purge_one(ctx, job, -1, 1) ) { + switch ( purge_one(ctx, job, -1, 1, 1) ) { case 0: case ENOENT: - edg_wll_ResetError(ctx); - return 0; - + return(edg_wll_ResetError(ctx)); + break; default: - return -1; + return(edg_wll_Error(ctx,NULL,NULL)); + break; } } -int edg_wll_PurgeServer(edg_wll_Context ctx,const edg_wll_PurgeRequest *request) +int edg_wll_PurgeServer(edg_wll_Context ctx,const edg_wll_PurgeRequest *request, edg_wll_PurgeResult *result) { int i,parse = 0,dumpfile = -1; edg_wlc_JobId job; - char *message = NULL, *response = NULL; char *tmpfname = NULL; - int naffected_jobs = 0; - edg_wll_PurgeResult result; - int ret = HTTP_OK; + int naffected_jobs = 0, ret; if (!ctx->noAuth) { @@ -214,7 +211,7 @@ int edg_wll_PurgeServer(edg_wll_Context ctx,const edg_wll_PurgeRequest *request) } edg_wll_ResetError(ctx); - memset(&result, 0, sizeof(edg_wll_PurgeResult)); + memset(result, 0, sizeof(*result)); if ( (request->flags & EDG_WLL_PURGE_SERVER_DUMP) && @@ -252,15 +249,15 @@ int edg_wll_PurgeServer(edg_wll_Context ctx,const edg_wll_PurgeRequest *request) parse = 1; } else { - switch (purge_one(ctx,job,dumpfile,request->flags&EDG_WLL_PURGE_REALLY_PURGE)) { + switch (purge_one(ctx,job,dumpfile,request->flags&EDG_WLL_PURGE_REALLY_PURGE,0)) { case 0: if (request->flags & EDG_WLL_PURGE_LIST_JOBS) { - result.jobs = realloc(result.jobs,(naffected_jobs+2) * sizeof(*result.jobs)); - result.jobs[naffected_jobs] = strdup(request->jobs[i]); - result.jobs[naffected_jobs+1] = NULL; + result->jobs = realloc(result->jobs,(naffected_jobs+2) * sizeof(*(result->jobs))); + result->jobs[naffected_jobs] = strdup(request->jobs[i]); + result->jobs[naffected_jobs+1] = NULL; } naffected_jobs++; break; - case ENOENT: parse = 1; + case ENOENT: /* job does not exist, consider purged and ignore */ edg_wll_ResetError(ctx); break; default: goto abort; @@ -297,7 +294,12 @@ int edg_wll_PurgeServer(edg_wll_Context ctx,const edg_wll_PurgeRequest *request) } memset(&stat,0,sizeof stat); - if (edg_wll_JobStatus(ctx,job,0,&stat)) { /* FIXME: replace by intJobStatus ?? */ + if (edg_wll_JobStatusServer(ctx,job,0,&stat)) { /* FIXME: replace by intJobStatus ?? */ + if (edg_wll_Error(ctx, NULL, NULL) == ENOENT) { + /* job purged meanwhile, ignore */ + edg_wll_ResetError(ctx); + continue; + } edg_wll_FreeStatus(&stat); goto abort; } @@ -314,16 +316,21 @@ int edg_wll_PurgeServer(edg_wll_Context ctx,const edg_wll_PurgeRequest *request) if (now-stat.lastUpdateTime.tv_sec > timeout[i] && !check_strict_jobid(ctx,job)) { - if (purge_one(ctx,job,dumpfile,request->flags&EDG_WLL_PURGE_REALLY_PURGE)) { + if (purge_one(ctx,job,dumpfile,request->flags&EDG_WLL_PURGE_REALLY_PURGE,0)) { edg_wll_FreeStatus(&stat); + if (edg_wll_Error(ctx, NULL, NULL) == ENOENT) { + /* job purged meanwhile, ignore */ + edg_wll_ResetError(ctx); + continue; + } goto abort; } /* XXX: change with the streaming interface */ if (request->flags & EDG_WLL_PURGE_LIST_JOBS) { - result.jobs = realloc(result.jobs,(naffected_jobs+2) * sizeof(*result.jobs)); - result.jobs[naffected_jobs] = job_s; - result.jobs[naffected_jobs+1] = NULL; + result->jobs = realloc(result->jobs,(naffected_jobs+2) * sizeof(*(result->jobs))); + result->jobs[naffected_jobs] = job_s; + result->jobs[naffected_jobs+1] = NULL; job_s = NULL; } naffected_jobs++; @@ -354,57 +361,15 @@ abort: } } - switch ( edg_wll_Error(ctx,NULL,NULL) ) - { - case 0: - ret = HTTP_OK; - break; - case EINVAL: - ret = HTTP_INVALID; - break; - case EPERM: - ret = HTTP_UNAUTH; - break; - case ENOENT: - ret = HTTP_NOTFOUND; - break; - - /* fatal errors */ - case ENOMEM: - /* fall through */ - default: - ret = HTTP_INTERNAL; - break; - } - - if (ret != HTTP_INTERNAL) { + ret = edg_wll_Error(ctx,NULL,NULL); + if (ret == 0 || ret == ENOENT || ret == EPERM || ret == EINVAL) { if ( request->flags & EDG_WLL_PURGE_SERVER_DUMP && tmpfname ) { - edg_wll_CreatePurgeFileFromTmp(ctx, tmpfname, &(result.server_file)); + edg_wll_CreatePurgeFileFromTmp(ctx, tmpfname, &(result->server_file)); unlink(tmpfname); } - - if ( edg_wll_PurgeResultToXML(ctx, &result, &message) ) - ret = HTTP_INTERNAL; - else - printf("%s", message); } - if ( result.server_file ) - free(result.server_file); - if ( result.jobs ) - { - for ( i = 0; result.jobs[i]; i++ ) - free(result.jobs[i]); - free(result.jobs); - } - - asprintf(&response, "HTTP/1.1 %d %s", ret, edg_wll_HTTPErrorMessage(ret)); - - edg_wll_http_send(ctx, response, resp_headers, message,ctx->connections->serverConnection); - if (response) free(response); - if (message) free(message); - return edg_wll_Error(ctx,NULL,NULL); } @@ -424,214 +389,209 @@ static void unlock_and_check(edg_wll_Context ctx,edg_wlc_JobId job) } } +static int dump_events(edg_wll_Context ctx, glite_jobid_const_t job, int dump, char **res) +{ + edg_wll_Event e; + int event; + + + event = atoi(res[0]); + free(res[0]); res[0] = NULL; + + res[0] = edg_wlc_JobIdUnparse(job); + if (convert_event_head(ctx,res,&e) || edg_wll_get_event_flesh(ctx,event,&e)) + { + char *et,*ed, *dbjob; + int i; + + + /* Most likely sort of internal inconsistency. + * Must not be fatal -- just complain + */ + edg_wll_Error(ctx,&et,&ed); + dbjob = edg_wlc_JobIdGetUnique(job); + fprintf(stderr,"%s event %d: %s (%s)\n",dbjob,event,et,ed); + syslog(LOG_WARNING,"%s event %d: %s (%s)",dbjob,event,et,ed); + free(et); free(ed); free(dbjob); + for (i=0; iisProxy) { - /* should not happen */ - return 0; - } - /* continue */ - break; - case DB_SERVER_JOB: - if (ctx->isProxy) { - /* should not happen */ - return 0; - } - /* continue */ - break; - case DB_PROXY_JOB+DB_SERVER_JOB: - if (ctx->isProxy) { - purge = 0; - if (unset_proxy_flag(ctx, job) < 0) { - return(edg_wll_Error(ctx,NULL,NULL)); + do { + if (edg_wll_Transaction(ctx)) goto err; + + switch (edg_wll_jobMembership(ctx, job)) { + case DB_PROXY_JOB: + if (!ctx->isProxy) { + /* should not happen */ + goto commit; } - } - else { - purge = 0; - if (unset_server_flag(ctx, job) < 0) { - return(edg_wll_Error(ctx,NULL,NULL)); + /* continue */ + break; + case DB_SERVER_JOB: + if (purge_from_proxy_only) { + /* no action needed */ + goto commit; } - } - break; - case 0: - // Zombie job (server=0, proxy=0)? should not happen; - // clear it to keep DB healthy - break; - default: - return 0; - break; - } + if (ctx->isProxy) { + /* should not happen */ + goto commit; + } + /* continue */ + break; + case DB_PROXY_JOB+DB_SERVER_JOB: + if (ctx->isProxy) { + purge = 0; + if (unset_proxy_flag(ctx, job) < 0) { + goto rollback; + } + } + else { + purge = 0; + /* if server&proxy DB is shared ... */ + if (is_job_local(ctx,job) && purge_from_proxy_only) { + if (unset_proxy_flag(ctx, job) < 0) { + goto rollback; + } + } + else { + if (unset_server_flag(ctx, job) < 0) { + goto rollback; + } + } + } + break; + case 0: + // Zombie job (server=0, proxy=0)? should not happen; + // clear it to keep DB healthy + break; + default: + goto rollback; + break; + } - dbjob = edg_wlc_JobIdGetUnique(job); /* XXX: strict jobid already checked */ - if (edg_wll_LockJob(ctx,job)) goto clean; + dbjob = edg_wlc_JobIdGetUnique(job); /* XXX: strict jobid already checked */ - if ( purge ) - { - trio_asprintf(&stmt,"delete from jobs where jobid = '%|Ss'",dbjob); - ret = edg_wll_ExecSQL(ctx,stmt,NULL); - if (ret <= 0) { - unlock_and_check(ctx,job); - if (ret == 0) { - fprintf(stderr,"%s: no such job\n",dbjob); - edg_wll_SetError(ctx,ENOENT,dbjob); - } - goto clean; - } - free(stmt); stmt = NULL; + if ( purge ) + { + trio_asprintf(&stmt,"delete from jobs where jobid = '%|Ss'",dbjob); + if (edg_wll_ExecSQL(ctx,stmt,NULL) < 0) goto rollback; + free(stmt); stmt = NULL; - trio_asprintf(&stmt,"delete from states where jobid = '%|Ss'",dbjob); - if (edg_wll_ExecSQL(ctx,stmt,NULL) < 0) { - unlock_and_check(ctx,job); - goto clean; + trio_asprintf(&stmt,"delete from states where jobid = '%|Ss'",dbjob); + if (edg_wll_ExecSQL(ctx,stmt,NULL) < 0) goto rollback; + free(stmt); stmt = NULL; } - free(stmt); stmt = NULL; -/* Why on earth ? - trio_asprintf(&stmt,"delete from states where jobid = '%|Ss'",dbjob); - if (edg_wll_ExecSQL(ctx,stmt,NULL) < 0) { - unlock_and_check(ctx,job); - goto clean; + if ( purge ) + { + trio_asprintf(&stmt,"delete from status_tags where jobid = '%|Ss'",dbjob); + if (edg_wll_ExecSQL(ctx,stmt,NULL) < 0) goto rollback; + free(stmt); stmt = NULL; } - free(stmt); stmt = NULL; -*/ - - } - if (!ctx->strict_locking) unlock_and_check(ctx,job); + if (dump >= 0) + trio_asprintf(&stmt, + "select event,code,prog,host,u.cert_subj,time_stamp,usec,level,arrived " + "from events e,users u " + "where e.jobid='%|Ss' " + "and u.userid=e.userid " + "order by event", dbjob); + else + trio_asprintf(&stmt,"select event from events " + "where jobid='%|Ss' " + "order by event", dbjob); - if ( purge ) - { - trio_asprintf(&stmt,"delete from status_tags where jobid = '%|Ss'",dbjob); - if (edg_wll_ExecSQL(ctx,stmt,NULL) < 0) goto unlock; + if (edg_wll_ExecSQL(ctx,stmt,&q) < 0) goto rollback; free(stmt); stmt = NULL; - } - - if (dump >= 0) - trio_asprintf(&stmt, - "select event,code,prog,host,u.cert_subj,time_stamp,usec,level,arrived " - "from events e,users u " - "where e.jobid='%|Ss' " - "and u.userid=e.userid " - "order by event", dbjob); - else - trio_asprintf(&stmt,"select event from events " - "where jobid='%|Ss' " - "order by event", dbjob); - -/* check for events repeatedly -- new one may have arrived in the meantime */ - while ((ret = edg_wll_ExecSQL(ctx,stmt,&q)) > 0) { - char *res[9]; dumped = 1; while ((ret = edg_wll_FetchRow(ctx,q,sizofa(res),NULL,res)) > 0) { int event; + event = atoi(res[0]); - free(res[0]); res[0] = NULL; if (dump >= 0) { - edg_wll_Event e; - assert(ret == 9); - res[0] = edg_wlc_JobIdUnparse(job); - if (convert_event_head(ctx,res,&e) || edg_wll_get_event_flesh(ctx,event,&e)) - { - char *et,*ed; - int i; - - /* Most likely sort of internal inconsistency. - * Must not be fatal -- just complain - */ - edg_wll_Error(ctx,&et,&ed); - fprintf(stderr,"%s event %d: %s (%s)\n",dbjob,event,et,ed); - syslog(LOG_WARNING,"%s event %d: %s (%s)",dbjob,event,et,ed); - free(et); free(ed); - for (i=0; istrict_locking) unlock_and_check(ctx,job); -clean: +err: free(dbjob); free(stmt); return edg_wll_Error(ctx,NULL,NULL); } -int unset_proxy_flag(edg_wll_Context ctx, edg_wlc_JobId job) +int unset_proxy_flag(edg_wll_Context ctx, glite_jobid_const_t job) { char *stmt = NULL; char *dbjob; @@ -646,7 +606,7 @@ int unset_proxy_flag(edg_wll_Context ctx, edg_wlc_JobId job) } -int unset_server_flag(edg_wll_Context ctx, edg_wlc_JobId job) +int unset_server_flag(edg_wll_Context ctx, glite_jobid_const_t job) { char *stmt = NULL; char *dbjob; diff --git a/org.glite.lb.server/src/store.c.T b/org.glite.lb.server/src/store.c.T index 404f1f5..0afa6b3 100644 --- a/org.glite.lb.server/src/store.c.T +++ b/org.glite.lb.server/src/store.c.T @@ -16,6 +16,7 @@ #include #include #include +#include #include "glite/jobid/strmd5.h" #include "glite/lbu/trio.h" @@ -31,37 +32,36 @@ #include "jobstat.h" #include "db_calls.h" #include "db_supp.h" +#include "index.h" static int store_user(edg_wll_Context,const char *,const char *); -static int store_job(edg_wll_Context,glite_jobid_const_t,const char *, int, int); -#ifdef LB_BUF -static int store_job_block(edg_wll_Context, glite_jobid_const_t, const char *, glite_lbu_bufInsert *, int, int); -#endif -static int store_job_grey(edg_wll_Context,glite_jobid_const_t,time_t); -static int store_flesh(edg_wll_Context,edg_wll_Event *,char *,int); -static int store_seq(edg_wll_Context,edg_wll_Event *,int); +static int store_job(edg_wll_Context,glite_jobid_const_t,const char *, int, int, int, int); +static int set_job_grey(edg_wll_Context ctx, char *jobid); +static int store_flesh(edg_wll_Context,edg_wll_Event *,const char *ulm, char *,int); static int check_dup(edg_wll_Context,edg_wll_Event *); static int check_auth(edg_wll_Context,edg_wll_Event *e); -#ifndef LB_DAG_EMBRIONIC -static int register_subjobs(edg_wll_Context,const edg_wll_RegJobEvent *); -#endif -static int register_subjobs_embryonic(edg_wll_Context,const edg_wll_RegJobEvent *, const char *); +static void lowercase_usertag(edg_wll_Event *ev); void edg_wll_StoreAnonymous(edg_wll_Context ctx,int anon) { ctx->allowAnonymous = anon; } -int edg_wll_StoreEvent(edg_wll_Context ctx,edg_wll_Event *e,int *seq) + +/* !!! to be called from OPEN TRANSACTION only !!! + */ +int edg_wll_StoreEvent(edg_wll_Context ctx,edg_wll_Event *e,const char *ulm,int *seq) { - edg_wll_ErrorCode err = 0; - char *userid = NULL,*jobid,*stmt; - char *select_max,*ssrc; + char *userid, *jobid, *stmt, *ssrc, *now_s, *stamp, *dummy, *max; glite_lbu_Statement sh = NULL; - int next = 0xDEAD; - int lbproxy_notreg = 0; - char *now_s = NULL; + int next = 0xDEAD, nr; - ssrc = jobid = stmt = select_max = NULL; + + userid = ssrc = jobid = stmt = now_s = stamp = dummy = max = NULL; + + lowercase_usertag(e); + jobid = edg_wlc_JobIdGetUnique(e->any.jobId); + glite_lbu_TimeToDB(e->any.timestamp.tv_sec, &stamp); + ssrc = edg_wll_SourceToString(e->any.source); if ( ctx->event_load ) glite_lbu_TimeToDB(e->any.arrived.tv_sec, &now_s); @@ -69,21 +69,12 @@ int edg_wll_StoreEvent(edg_wll_Context ctx,edg_wll_Event *e,int *seq) glite_lbu_TimeToDB(time(NULL), &now_s); edg_wll_ResetError(ctx); - switch (err = check_auth(ctx,e)) { + switch (check_auth(ctx,e)) { case 0: break; case ENOENT: - if ( !ctx->isProxy ) { - if (ctx->greyjobs) { - edg_wll_ResetError(ctx); - if (store_job_grey(ctx,e->any.jobId,e->any.timestamp.tv_sec)) - goto clean; - break; - } - else goto clean; - } - - edg_wll_ResetError(ctx); - lbproxy_notreg = 1; + /* job not registered */ + // should not happen, store_job_server_proxy() miscoded or going thu load? + goto clean; break; case EPERM: if (!ctx->noAuth) goto clean; @@ -92,11 +83,6 @@ int edg_wll_StoreEvent(edg_wll_Context ctx,edg_wll_Event *e,int *seq) default: goto clean; } -/* FIXME: does not work for grey jobs due to "select from jobs" -- I don't care for the time being */ - if ((err = check_dup(ctx,e))) goto clean; - - jobid = edg_wlc_JobIdGetUnique(e->any.jobId); - trio_asprintf(&stmt,"select userid from jobs where jobid='%|Ss'", jobid); if (edg_wll_ExecSQL(ctx,stmt,&sh) < 0 || edg_wll_FetchRow(ctx,sh,1,NULL,&userid) < 0) goto clean; @@ -104,131 +90,69 @@ int edg_wll_StoreEvent(edg_wll_Context ctx,edg_wll_Event *e,int *seq) free(stmt); stmt = NULL; -/* obtain next event sequence number */ - trio_asprintf(&select_max, - "select max(event) from events " - "where jobid = '%|Ss'",jobid); +/* check duplicity */ + trio_asprintf(&stmt, + "select arrived from events where jobid='%|Ss' and code='%d'" + " and prog='%|Ss' and host='%|Ss' and time_stamp=%s and usec='%d'" + " and level='%d' and userid='%|Ss' and seqcode='%|Ss'", + jobid, (int) e->any.type, + ssrc,e->any.host, + stamp,e->any.timestamp.tv_usec, + e->any.level,userid, e->any.seqcode); + + if (edg_wll_ExecSQL(ctx,stmt,&sh) < 0) goto clean; + nr = edg_wll_FetchRow(ctx,sh,1,NULL,&dummy); + if (sh) glite_lbu_FreeStmt(&sh); + free(stmt); stmt = NULL; + free(dummy); - ssrc = edg_wll_SourceToString(e->any.source); + if (nr < 0) goto clean; + if (nr > 0) { + /* possible duplicity (99%) */ + // XXX: check event flesh to be 100% sure + edg_wll_SetError(ctx,EEXIST,"duplicate event"); + goto clean; + } + /* else (nr == 0) -> unique event, continue */ + -/* try to insert (someone else may be doing the same) */ - { - char *max = NULL; +/* obtain number of stored events */ + trio_asprintf(&stmt, + "select nevents from jobs " + "where jobid = '%|Ss'",jobid); + + if (edg_wll_ExecSQL(ctx,stmt,&sh) < 0 || + edg_wll_FetchRow(ctx,sh,1,NULL,&max) < 0) goto clean; + glite_lbu_FreeStmt(&sh); + + next = (max && *max) ? atoi(max)+1 : 0; + free(max); - if (edg_wll_ExecSQL(ctx,select_max,&sh) < 0 || - edg_wll_FetchRow(ctx,sh,1,NULL,&max) < 0) - { - err = edg_wll_Error(ctx,NULL,NULL); - goto clean; - } - glite_lbu_FreeStmt(&sh); - - next = max && *max ? atoi(max)+1 : 0; - free(max); - } - - while (1) { - /* - * 1) when using transactions: - * Store the whole event right now. - * - * 2) when not using transactions: - * Store an UNDEF event first in order to prevent race condition - * with readers and update event code later. - */ - char *stamp = NULL; - - glite_lbu_TimeToDB(e->any.timestamp.tv_sec, &stamp); - trio_asprintf(&stmt, - "insert into events(jobid,event,code,prog,host,time_stamp,usec,arrived,level,userid) " - "values ('%|Ss',%d,%d,'%|Ss','%|Ss',%s,%d,%s,%d,'%|Ss')", - jobid,next, - ctx->dbcaps & GLITE_LBU_DB_CAP_TRANSACTIONS ? (int) e->any.type : EDG_WLL_EVENT_UNDEF, - ssrc,e->any.host, - stamp,e->any.timestamp.tv_usec, - now_s, e->any.level,userid); - free(stamp); - - if (edg_wll_ExecSQL(ctx,stmt,NULL) < 0) { - if ((err = edg_wll_Error(ctx,NULL,NULL)) != EEXIST) - goto clean; - } else break; /* successful insert */ - - /* we were late -- try once again */ - next++; - free(stmt); - } +/* store event */ + trio_asprintf(&stmt, + "insert into events(jobid,event,code,prog,host,time_stamp,usec,arrived,level,userid,seqcode) " + "values ('%|Ss',%d,%d,'%|Ss','%|Ss',%s,%d,%s,%d,'%|Ss','%|Ss')", + jobid,next, + (int) e->any.type, + ssrc,e->any.host, + stamp,e->any.timestamp.tv_usec, + now_s, e->any.level,userid, e->any.seqcode); + + if (edg_wll_ExecSQL(ctx,stmt,NULL) < 0) goto clean; free(stmt); stmt = NULL; - if ((err = store_seq(ctx,e,next)) || - (err = store_flesh(ctx,e,jobid,next))) { - /* attempt to cleanup, ignore new errors */ - char *desc; - edg_wll_ErrorCode oerr = edg_wll_Error(ctx,NULL,&desc); - - edg_wll_delete_event(ctx,jobid,next); - edg_wll_SetError(ctx,oerr,desc); - free(desc); - } else - if (!(ctx->dbcaps & GLITE_LBU_DB_CAP_TRANSACTIONS)) { - /* emulate commit, i.e. swith to a real event type to make - * the record valid */ - trio_asprintf(&stmt, - "update events set code=%d " - "where jobid='%|Ss' and event=%d", - (int) e->any.type,jobid,next); - switch (edg_wll_ExecSQL(ctx,stmt,NULL)) { - case 0: if (ctx->strict_locking) - err = edg_wll_SetError(ctx,ENOENT,"event disappeared on store while strict locking"); - /* purge in progres: drop the garbage, ignore errors */ - else { - edg_wll_delete_event(ctx,jobid,next); - err = edg_wll_SetError(ctx,ENOENT,"job being purged"); - } - break; - case 1: if (ctx->strict_locking) err = 0; - else { - /* check whether the job is still there to prevent garbage - * left while there is a concurrent purge - */ - free(stmt); - trio_asprintf(&stmt, - "select 'x' from jobs where jobid='%|Ss'", - jobid); - switch (edg_wll_ExecSQL(ctx,stmt,NULL)) { - case 1: break; - case 0: /* purge in progres */ - edg_wll_delete_event(ctx,jobid,next); - err = edg_wll_SetError(ctx,ENOENT,"job being purged"); - break; - default: err = edg_wll_SetError(ctx,EDG_WLL_ERROR_DB_CALL, - "more job records, what is that?"); - break; - } - } - break; - case -1: err = edg_wll_Error(ctx,NULL,NULL); - break; - - default: err = edg_wll_SetError(ctx,EDG_WLL_ERROR_DB_CALL, - "more event records, what is that?"); - break; - } - } /* if !transactions */ - if (err == 0 && - e->any.type == EDG_WLL_EVENT_REGJOB && - (e->regJob.jobtype == EDG_WLL_REGJOB_DAG || - e->regJob.jobtype == EDG_WLL_REGJOB_PARTITIONED || - e->regJob.jobtype == EDG_WLL_REGJOB_COLLECTION) && - e->regJob.nsubjobs > 0) +/* increase number of stored events */ + trio_asprintf(&stmt, + "update jobs set nevents='%d'" + "where jobid = '%|Ss'", next, jobid); -#ifdef LB_DAG_EMBRIONIC - err = register_subjobs_embryonic(ctx,&e->regJob,userid); -#else - err = register_subjobs(ctx,&e->regJob); -#endif + if (edg_wll_ExecSQL(ctx,stmt,NULL) < 0) goto clean; + free(stmt); stmt = NULL; + +/* store event record */ + if (store_flesh(ctx,e,ulm,jobid,next)) goto clean; clean: free(now_s); @@ -236,10 +160,10 @@ clean: free(jobid); free(stmt); free(ssrc); - free(select_max); if (sh) glite_lbu_FreeStmt(&sh); - if (!err && seq) *seq = next; - return err; + if (!edg_wll_Error(ctx,NULL,NULL) && seq) *seq = next; + free(stamp); + return edg_wll_Error(ctx,NULL,NULL); } static int store_user(edg_wll_Context ctx,const char *userid,const char *subj) @@ -258,7 +182,7 @@ static int store_user(edg_wll_Context ctx,const char *userid,const char *subj) return edg_wll_Error(ctx,NULL,NULL); } -static int store_job(edg_wll_Context ctx,glite_jobid_const_t job,const char *userid, int proxy, int server) +static int store_job(edg_wll_Context ctx,glite_jobid_const_t job,const char *userid, int proxy, int server,int grey, int update) { char *jobstr = edg_wlc_JobIdUnparse(job); char *jobid = edg_wlc_JobIdGetUnique(job); @@ -288,262 +212,158 @@ static int store_job(edg_wll_Context ctx,glite_jobid_const_t job,const char *use else { server = 1; } - - trio_asprintf(&stmt,"insert into jobs(jobid,dg_jobid,userid,proxy,server) " - "values ('%|Ss','%|Ss','%|Ss', '%|Sd', '%|Sd')",jobid,jobstr,userid,proxy,server); - if (edg_wll_ExecSQL(ctx,stmt,NULL) < 0) { - if (edg_wll_Error(ctx,NULL,NULL) == EEXIST) - edg_wll_ResetError(ctx); + if (update) { + trio_asprintf(&stmt,"update jobs set userid='%|Ss', proxy='%|Sd', server='%|Sd', grey='%|Sd' where jobid='%|Ss'", + userid,proxy,server,grey,jobid); + } + else { + trio_asprintf(&stmt,"insert into jobs(jobid,dg_jobid,userid,proxy,server,grey) " + "values ('%|Ss','%|Ss','%|Ss', '%|Sd', '%|Sd', '%|Sd')",jobid,jobstr,userid,proxy,server,grey); } - free(stmt); - if (ctx->greyjobs) { - trio_asprintf(&stmt,"delete from grey_jobs where jobid = '%|Ss'",jobid); - edg_wll_ExecSQL(ctx,stmt,NULL); /* XXX: error propagates */ - free(stmt); + if (edg_wll_ExecSQL(ctx,stmt,NULL) < 0) { + if (edg_wll_Error(ctx,NULL,NULL) == EEXIST && !update) + edg_wll_ResetError(ctx); + else + goto err; } + free(stmt); stmt = NULL; +err: + free(stmt); free(jobstr); free(jobid); return edg_wll_Error(ctx,NULL,NULL); } -#ifdef LB_BUF -static int store_job_block(edg_wll_Context ctx,glite_jobid_const_t job,const char *userid, glite_lbu_bufInsert *bi, int proxy, int server) -{ - char *jobstr = edg_wlc_JobIdUnparse(job); - char *jobid = edg_wlc_JobIdGetUnique(job); - char *row; - -/* debug Duplicate key on index: Duplicate entry '(nil)' for key 1 - */ - - assert(!ctx->greyjobs); /* XXX: should not happen */ - if (jobid == NULL || jobstr == NULL) - return edg_wll_SetError(ctx,EINVAL,"store_jobi_block()"); - - edg_wll_ResetError(ctx); - - trio_asprintf(&row, "'%|Ss','%|Ss','%|Ss','%|Ss','%|Ss'", jobid,jobstr,userid,proxy,server); - edg_wll_bufferedInsert(bi, row); // no need to free row - - free(jobstr); - free(jobid); - return edg_wll_Error(ctx,NULL,NULL); -} -#endif - -static int store_job_grey(edg_wll_Context ctx,glite_jobid_const_t job,time_t etime) +static int set_job_grey(edg_wll_Context ctx, char *jobid) { - char *jobstr = edg_wlc_JobIdUnparse(job); - char *jobid = edg_wlc_JobIdGetUnique(job); - char *stmt, *s_etime; - - if (jobid == NULL || jobstr == NULL) - return edg_wll_SetError(ctx,EINVAL,"store_job_grey()"); + char *stmt; - edg_wll_ResetError(ctx); - glite_lbu_TimeToDB(etime, &s_etime); - trio_asprintf(&stmt,"insert into grey_jobs(jobid,dg_jobid,time_stamp) " - "values ('%|Ss','%|Ss',%s)", - jobid,jobstr,s_etime); - free(s_etime); - - if (edg_wll_ExecSQL(ctx,stmt,NULL) < 0) { - if (edg_wll_Error(ctx,NULL,NULL) == EEXIST) - edg_wll_ResetError(ctx); - } + trio_asprintf(&stmt,"update jobs set grey='1' where jobid='%|Ss'", jobid); + edg_wll_ExecSQL(ctx,stmt,NULL); free(stmt); - free(jobstr); - free(jobid); return edg_wll_Error(ctx,NULL,NULL); } /* test whether job shares LB proxy and server DB or not */ -int is_job_local(edg_wll_Context ctx, edg_wlc_JobId jobId) +int is_job_local(edg_wll_Context ctx, glite_jobid_const_t jobId) { char *srvName = NULL; unsigned int srvPort; int ret; + if (!ctx->serverRunning) return 0; /* XXX */ edg_wlc_JobIdGetServerParts(jobId, &srvName, &srvPort); - ret = ((ctx->srvPort == srvPort) && !strcmp(srvName,ctx->srvName)); + ret = ((ctx->srvPort == srvPort) && ctx->srvName && !strcmp(srvName,ctx->srvName)); free(srvName); return(ret); } -int store_job_server_proxy(edg_wll_Context ctx, edg_wll_Event *event) -{ +int store_job_server_proxy(edg_wll_Context ctx, edg_wll_Event *event, int *register_to_JP) +{ char *unique = edg_wlc_JobIdGetUnique(event->any.jobId); - char *q = NULL, *owner = NULL, *userid = NULL; + char *q = NULL, *userid = NULL, *subj = NULL; glite_lbu_Statement stmt = NULL; - int nar; + int nar, grey = 0; char *can_peername = NULL; - int local_job = is_job_local(ctx, event->any.jobId); + int local_job = is_job_local(ctx, event->any.jobId); + char *res[3] = {NULL, NULL, NULL}; + + /* check auth */ + if (!ctx->isProxy && !ctx->peerName) + return edg_wll_SetError(ctx,EPERM,"LB server can't store using unauthenticated connection"); + if (ctx->isProxy && (!event->any.user || !strcmp(event->any.user,EDG_WLL_LOG_USER_DEFAULT)) ) + return edg_wll_SetError(ctx,EPERM,"LB proxy can't store using unauthenticated connection"); - edg_wll_ResetError(ctx); - - can_peername = edg_wll_gss_normalize_subj(ctx->peerName, 0); - if (ctx->isProxy) { - /* event arrived on proxy socket */ - if (event->any.type == EDG_WLL_EVENT_REGJOB) { - if (event->any.priority & EDG_WLL_LOGFLAG_DIRECT) { - /* first synchronous registration */ - if (local_job) { - /* we are both server and proxy for this job */ - trio_asprintf(&q,"update jobs set proxy=1 where jobid='%|Ss'", - unique); - - nar = edg_wll_ExecSQL(ctx, q, NULL); - - if (nar == 0) { - /* job isn't registered yet */ - userid = strdup(strmd5("unknown_to_proxy", NULL)); - if (store_user(ctx,userid,"unknown_to_proxy")) goto err; - - if (store_job(ctx,(glite_jobid_const_t) event->any.jobId, - userid, 1, ctx->serverRunning)) goto err; - - } - else {} /* job was registered thru GSI, no further action needed */ - /* or error occured - and will go out via return() */ - } - else { - /* we are only proxy for this job, forward it to server */ - - /* XXX - does it have any sence ?? - if (!strcmp(e->any.user,EDG_WLL_LOG_USER_DEFAULT)) { - edg_wll_SetError(ctx,EPERM,"can't register jobs anonymously"); - goto err; - } - */ - - userid = strdup(strmd5(event->any.user, NULL)); - if (store_user(ctx,userid,event->any.user)) goto err; - - if (store_job(ctx,(glite_jobid_const_t) event->any.jobId, - userid, 1, 0)) goto err; - } - } + trio_asprintf(&q,"select proxy,server,grey from jobs where jobid='%|Ss' for update", unique); + + nar = edg_wll_ExecSQL(ctx,q,&stmt); + free(q); q = NULL; + + if (nar < 0) goto err; + else if (nar == 0) { + /* Job not registered yet */ + + if (!( (event->any.type == EDG_WLL_EVENT_REGJOB) && + (event->any.priority & EDG_WLL_LOGFLAG_DIRECT) )) + { + if (ctx->greyjobs) grey = 1; else { - /* supplementary re-registration (JDL of subjob, etc.) */ - if (local_job) { - /* previous registration via GSI required */ - trio_asprintf(&q,"update jobs set jobid='%|Ss', proxy='1' where jobid='%|Ss'", - unique, unique); - /* does the job exists ? */ - if (edg_wll_ExecSQL(ctx,q,NULL) < 0) { - edg_wll_SetError(ctx, ENOENT, "job not registered"); - goto err; - } - } - else { - /* try to register job in case that first reistration */ - /* was sent to server only; ignore errors (EEXIST) */ - userid = strdup(strmd5(event->any.user, NULL)); - if (store_user(ctx,userid,event->any.user)) goto err; - - store_job(ctx,(glite_jobid_const_t) event->any.jobId, - userid, 1, 0); - edg_wll_ResetError(ctx); - - } - } - } - else { - /* any other event than JobReg */ - trio_asprintf(&q,"update jobs set jobid='%|Ss', proxy='1' where jobid='%|Ss'", - unique, unique); - /* does the job exists ? now we require registration on proxy too */ - if (edg_wll_ExecSQL(ctx,q,NULL) < 0) { edg_wll_SetError(ctx, ENOENT, "job not registered"); - goto err; + goto err; } } + + subj = strdup( ctx->isProxy ? event->any.user : ctx->peerName); + can_peername = edg_wll_gss_normalize_subj(subj, 0); + userid = strdup(strmd5(can_peername, NULL)); + if (store_user(ctx,userid,can_peername)) goto err; + if (store_job(ctx,(glite_jobid_const_t) event->any.jobId, + userid, ctx->isProxy, local_job, grey, 0 )) goto err; + *register_to_JP = local_job; } else { - /* event arrived on LB port */ - if (event->any.type == EDG_WLL_EVENT_REGJOB) { - trio_asprintf(&q,"select cert_subj from jobs,users where jobs.jobid='%|Ss'" - " AND jobs.userid=users.userid",unique); - if ( (nar = edg_wll_ExecSQL(ctx,q,&stmt)) < 0 || edg_wll_FetchRow(ctx,stmt,1,NULL,&owner) < 0 ) { - goto err; - } - free(q); q = NULL; - - if (nar) { - /* job is already registered */ - if (!strcmp(owner,"unknown_to_proxy")) { - /* proxy registration was already done */ - userid = strdup(strmd5(can_peername, NULL)); - if (store_user(ctx,userid,can_peername)) goto err; - - trio_asprintf(&q,"update jobs set server=1, userid='%|Ss' where jobid='%|Ss'", - userid, unique); - - edg_wll_ExecSQL(ctx, q, NULL); - } - else { } /* re-registration, no action needed */ - } - else { - userid = strdup(strmd5(can_peername, NULL)); - if (store_user(ctx,userid,can_peername)) goto err; + /* Job already registered */ - if (store_job(ctx,(glite_jobid_const_t) event->any.jobId, - userid, 0, 1)) goto err; - } + if (edg_wll_FetchRow(ctx,stmt,sizeof(res)/sizeof(res[0]),NULL,res) < 0) goto err; + +/* TODO: ljocha: only GSI should switch the job from grey, and specify owner + * add !ctx->isProxy to the conditions and make the rest simpler */ + + if (ctx->greyjobs && !strcmp(res[2],"1") && + (event->any.type == EDG_WLL_EVENT_REGJOB) && + (event->any.priority & EDG_WLL_LOGFLAG_DIRECT)) + { + + subj = strdup(ctx->isProxy ? event->any.user : ctx->peerName); + can_peername = edg_wll_gss_normalize_subj(subj, 0); + userid = strdup(strmd5(can_peername, NULL)); + if (store_user(ctx,userid,can_peername)) goto err; + if (store_job(ctx,(glite_jobid_const_t) event->any.jobId, + userid, (ctx->isProxy || !strcmp(res[0],"1")), + !strcmp(res[1],"1") || (local_job ? ctx->serverRunning : 0), 0, 1)) goto err; + *register_to_JP = 1; + } else { - /* any other event than JobReg */ - /* no action needed */ + // if (!strcmp(res[0],"1") && !strcmp(res[1],"1") ) /*nothing to do */; + if ( (!strcmp(res[0],"0") && ctx->isProxy) || (!strcmp(res[1],"0") && !ctx->isProxy) ) { + trio_asprintf(&q,"update jobs set server='1', proxy='1' where jobid='%|Ss'", + unique); + if (edg_wll_ExecSQL(ctx,q,NULL) < 0) goto err; + free(q); q = NULL; + } } + + /* ??? test whether user from proxy is the same as user from server ??? + be picky -- return error when not mathing? + */ } - + err: + free(res[0]); free(res[1]); free(res[2]); if (stmt) glite_lbu_FreeStmt(&stmt); - free(unique); + free(subj); free(userid); free(q); return edg_wll_Error(ctx,NULL,NULL); -} - -/* - * XXX: store it in SHORT_FIELDS for now despite it should go to dedicated - * column in EVENTS. - * - * don't want to change the database structure now, will be done anyway - * soon - */ -static int store_seq(edg_wll_Context ctx,edg_wll_Event *e,int no) -{ - int ret; - char *stmt; - char *jobid = edg_wlc_JobIdGetUnique(e->any.jobId); +} - edg_wll_ResetError(ctx); - trio_asprintf(&stmt,"insert into short_fields(jobid,event,name,value) " - "values ('%|Ss',%d,'SEQCODE','%|Ss')", - jobid,no,e->any.seqcode); - ret = edg_wll_ExecSQL(ctx,stmt,NULL); - free(stmt); - free(jobid); - - return ret>=0 ? 0 : edg_wll_Error(ctx,NULL,NULL); -} #define SHORT_LEN 255 /* short_fiels.value db column lenght */ -static int store_flesh(edg_wll_Context ctx,edg_wll_Event *e,char *jobid,int no) +static int store_flesh(edg_wll_Context ctx,edg_wll_Event *e,const char *ulm,char *jobid,int no) { struct { char *key; @@ -554,7 +374,17 @@ static int store_flesh(edg_wll_Context ctx,edg_wll_Event *e,char *jobid,int no) unsigned int i; int err = 0; + assert(ulm || e); edg_wll_ResetError(ctx); + if (!ulm) ulm = edg_wll_UnparseEvent(ctx, e); + if (!ulm) return edg_wll_Error(ctx, NULL, NULL); + +#ifndef LB_EVENTS_BLOB + // in both ways if some preparsed flesh fields in events too + if (!e) { + if (edg_wll_ParseEvent(ctx, (char *)ulm, &e) != 0) return edg_wll_Error(ctx, NULL, NULL); + } + memset(f,0,sizeof(f)); assert(f[0].key == NULL); switch (e->type) { @@ -613,6 +443,11 @@ static int store_flesh(edg_wll_Context ctx,edg_wll_Event *e,char *jobid,int no) if (edg_wll_ExecSQL(ctx,stmt,NULL) < 0) err = edg_wll_Error(ctx,NULL,NULL); free(stmt); } +#else + if (!ulm) ulm = edg_wll_UnparseEvent(ctx, e); + trio_asprintf(&stmt, "insert into events_flesh (jobid, event, ulm) values ('%|Ss', %d, '%|Ss')", jobid, no, ulm); + if (edg_wll_ExecSQL(ctx,stmt,NULL) < 0) err = edg_wll_Error(ctx,NULL,NULL); +#endif return err; } @@ -690,19 +525,12 @@ static int check_auth(edg_wll_Context ctx,edg_wll_Event *e) char *jobid = edg_wlc_JobIdGetUnique(e->any.jobId); char *q = NULL,*owner = NULL; glite_lbu_Statement stmt = NULL; - char *user; edg_wll_ResetError(ctx); if (!ctx->isProxy && !ctx->peerName) return edg_wll_SetError(ctx,EPERM,"can't store using unauthenticated connection"); -#if 0 - if (e->type == EDG_WLL_EVENT_REGJOB) - return strcmp(e->any.user,EDG_WLL_LOG_USER_DEFAULT) ? - 0 : edg_wll_SetError(ctx,EPERM,"can't register jobs anonymously"); -#endif - trio_asprintf(&q,"select u.cert_subj from jobs j, users u " "where j.jobid='%|Ss' and u.userid=j.userid",jobid); @@ -711,14 +539,7 @@ static int check_auth(edg_wll_Context ctx,edg_wll_Event *e) ) goto clean; if (!owner) { - if ( ctx->isProxy ) - edg_wll_SetError(ctx, EINVAL, "Job not registered"); - else - /* We have to let the calling function know what happened here - * even if it hapens inside the LB Proxy which shouldn't consider - * this as an error - */ - edg_wll_SetError(ctx, ENOENT, "job not registered"); + edg_wll_SetError(ctx, ENOENT, "job not registered"); goto clean; } @@ -744,95 +565,6 @@ clean: return edg_wll_Error(ctx,NULL,NULL); } -#ifndef LB_DAG_EMBRIONIC -static int register_subjobs(edg_wll_Context ctx,const edg_wll_RegJobEvent *e) -{ - int i,err; - edg_wlc_JobId *subjobs; - struct timeval now; - - edg_wll_ResetError(ctx); - if (e->nsubjobs == 0) return 0; - if (e->nsubjobs < 0) return edg_wll_SetError(ctx,EINVAL,"negative number of subjobs"); - - if ((err = edg_wll_GenerateSubjobIds(ctx,e->jobId,e->nsubjobs,e->seed,&subjobs))) - return err; - - gettimeofday(&now,NULL); - -/* XXX: increase the overall request timeout. */ - ctx->p_tmp_timeout.tv_sec += e->nsubjobs; - if (ctx->p_tmp_timeout.tv_sec > 86400) ctx->p_tmp_timeout.tv_sec = 86400; - - for (i=0; insubjobs; i++) { - edg_wll_Event e2; - int seq; - char *et,*ed,*job_s; - - memset(&e2,0,sizeof e2); - e2.type = EDG_WLL_EVENT_REGJOB; - e2.any.jobId = subjobs[i]; subjobs[i] = NULL; - memcpy(&e2.regJob.timestamp,&now,sizeof now); - e2.any.host = strdup(ctx->srvName); - e2.any.level = e->level; - e2.any.priority = e->priority; - e2.any.seqcode = strdup(EDG_WLL_SEQ_NULL); - e2.any.user = strdup(e->user); - e2.any.source = e->source; - e2.any.src_instance = strdup(ctx->isProxy ? - "L&B proxy" : "L&B server"); - e2.regJob.ns = strdup(e->ns); - edg_wlc_JobIdDup(e->jobId,&e2.regJob.parent); - e2.regJob.jobtype = EDG_WLL_REGJOB_SIMPLE; - e2.regJob.jdl = strdup(""); - - switch (edg_wll_StoreEvent(ctx,&e2,&seq)) { - - case 0: break; - /* maybe some non-ignorable errors should be handled here */ - - default: - edg_wll_Error(ctx,&et,&ed); - job_s = edg_wlc_JobIdUnparse(e2.any.jobId); - fprintf(stderr,"register subjob %s: %s (%s)\n",job_s,et,ed); - syslog(LOG_ERR,"register subjob %s: %s (%s)",job_s,et,ed); - free(job_s); free(et); free(ed); - edg_wll_FreeEvent(&e2); - edg_wll_ResetError(ctx); - continue; - } - - if (edg_wll_LockJob(ctx,e2.any.jobId)) { - job_s = edg_wlc_JobIdUnparse(e2.any.jobId); - fprintf(stderr,"lock job %s: %s (%s)\n",job_s,et,ed); - syslog(LOG_ERR,"lock job %s: %s (%s)",job_s,et,ed); - free(job_s); free(et); free(ed); - edg_wll_FreeEvent(&e2); - edg_wll_ResetError(ctx); - continue; - } - - if ((err = edg_wll_StepIntState(ctx,e2.any.jobId,&e2,seq,NULL))) - edg_wll_Error(ctx,&et,&ed); - - edg_wll_UnlockJob(ctx,e2.any.jobId); - edg_wll_ResetError(ctx); - - if (err) { - job_s = edg_wlc_JobIdUnparse(e2.any.jobId); - fprintf(stderr,"%s: %s (%s)\n",job_s,et,ed); - syslog(LOG_ERR,"%s: %s (%s)",job_s,et,ed); - free(job_s); free(et); free(ed); - edg_wll_ResetError(ctx); - } - - edg_wll_FreeEvent(&e2); - } - - free(subjobs); - return edg_wll_Error(ctx,NULL,NULL); -} -#endif /* @@ -841,7 +573,7 @@ static int register_subjobs(edg_wll_Context ctx,const edg_wll_RegJobEvent *e) static edg_wll_ErrorCode states_values_embryonic( edg_wll_Context ctx, - edg_wlc_JobId jobid, + glite_jobid_const_t jobid, const edg_wll_RegJobEvent *e, char **icnames, char **values) @@ -883,21 +615,17 @@ err: return edg_wll_Error(ctx,NULL,NULL); } -static int register_subjobs_embryonic(edg_wll_Context ctx,const edg_wll_RegJobEvent *e, const char *userid) +int register_subjobs_embryonic(edg_wll_Context ctx,const edg_wll_RegJobEvent *e) { - int i, err = 0; - edg_wlc_JobId *subjobs; + int i, j, err = 0; + edg_wlc_JobId *subjobs = NULL; struct timeval now; - char *jobid_md5, *jobid_md5_old; + char *jobid = NULL, *jobid_md5 = NULL, *jobid_md5_old = NULL; size_t jobid_len; -#ifdef LB_BUF - glite_lbu_bufInsert bi_j; - glite_lbu_bufInsert *bi_jobs = &bi_j; - char *states_cols; -#endif glite_lbu_bufInsert bi_s, *bi_states = &bi_s; - char *icnames, *values; + char *icnames = NULL, *values = NULL, *userid = NULL, *stmt = NULL; int server, proxy, membership = 0; + glite_lbu_Statement sh = NULL; edg_wll_ResetError(ctx); @@ -914,43 +642,28 @@ static int register_subjobs_embryonic(edg_wll_Context ctx,const edg_wll_RegJobEv jobid_len = strlen(jobid_md5_old); -#ifdef LB_BUF - /* init multirows insert mechanism for tables used here */ - if (edg_wll_bufferedInsertInit(ctx, bi_jobs, NULL, "jobs", 4000, 1000, - "jobid, dg_jobid, userid, proxy, server")) - { - return edg_wll_SetError(ctx, EINVAL, "edg_wll_bufferedInsertInit()"); - } - - asprintf(&states_cols,"jobid, status, seq,int_status, version, parent_job%s", icnames); - if (edg_wll_bufferedInsertInit(ctx, bi_states, NULL, "states", 4000, 1000, states_cols)) - { - return edg_wll_SetError(ctx, EINVAL, "edg_wll_bufferedInsertInit()"); - } - free(states_cols); -#endif - - gettimeofday(&now,NULL); /* increase the overall request timeout. */ ctx->p_tmp_timeout.tv_sec += e->nsubjobs/10; if (ctx->p_tmp_timeout.tv_sec > 86400) ctx->p_tmp_timeout.tv_sec = 86400; - membership = edg_wll_jobMembership(ctx, e->jobId); + if ((membership = edg_wll_jobMembership(ctx, e->jobId)) < 0) goto err; + proxy = membership & DB_PROXY_JOB; server = membership & DB_SERVER_JOB; + /* get userid of parent job */ + jobid = edg_wlc_JobIdGetUnique(e->jobId); + trio_asprintf(&stmt,"select userid from jobs where jobid='%|Ss'", jobid); + if (edg_wll_ExecSQL(ctx,stmt,&sh) < 0 || edg_wll_FetchRow(ctx,sh,1,NULL,&userid) < 0) goto err; + for (i=0; insubjobs; i++) { char *et,*ed,*job_s,*p,*p1; /* save jobid-userid relation into jobs table */ -#ifdef LB_BUF - if ((err = store_job_block(ctx, subjobs[i], userid, bi_jobs, proxy, server))) -#else - if ((err = store_job(ctx, subjobs[i], userid, proxy, server))) -#endif - edg_wll_Error(ctx,&et,&ed); + if ((err = store_job(ctx, subjobs[i], userid, proxy, server, 0, 0))) + if (edg_wll_Error(ctx,&et,&ed) == EDEADLOCK) goto err; /* interchange variable parts (jobids) in values */ /* there are only two occurences of subjob jobid */ @@ -970,10 +683,6 @@ static int register_subjobs_embryonic(edg_wll_Context ctx,const edg_wll_RegJobEv if (!err && (err = edg_wll_StoreIntStateEmbryonic(ctx, subjobs[i], icnames, values, bi_states))) edg_wll_Error(ctx,&et,&ed); -//job_s = edg_wlc_JobIdUnparse(subjobs[i]); -//printf("%s\n", job_s); -//free(job_s); - if (err) { job_s = edg_wlc_JobIdUnparse(subjobs[i]); fprintf(stderr,"%s: %s (%s)\n",job_s,et,ed); @@ -984,16 +693,15 @@ static int register_subjobs_embryonic(edg_wll_Context ctx,const edg_wll_RegJobEv edg_wlc_JobIdFree(subjobs[i]); } +err: free(jobid_md5_old); //free the last one free(icnames); free(values); + /* free the rest of subjobs if DEADLOCK occurs */ + for (j=i; jnsubjobs; j++) edg_wlc_JobIdFree(subjobs[i]); free(subjobs); - -#ifdef LB_BUF - /* commit the rest of multirows insert and clean structures */ - edg_wll_bufferedInsertClose(bi_jobs); - edg_wll_bufferedInsertClose(bi_states); -#endif + if (sh) glite_lbu_FreeStmt(&sh); + free(stmt); return edg_wll_Error(ctx,NULL,NULL); } @@ -1022,6 +730,12 @@ int edg_wll_delete_event(edg_wll_Context ctx,const char *jobid,int event) free(stmt); trio_asprintf(&stmt, + "delete from events_flesh where jobid='%|Ss' and event=%d", + jobid,event); + edg_wll_ExecSQL(ctx,stmt,NULL); + free(stmt); + + trio_asprintf(&stmt, "delete from events where jobid='%|Ss' and event=%d", jobid,event); edg_wll_ExecSQL(ctx,stmt,NULL); @@ -1029,3 +743,19 @@ int edg_wll_delete_event(edg_wll_Context ctx,const char *jobid,int event) return edg_wll_Error(ctx,NULL,NULL); } + + +/* XXX: if event type is user tag, convert the tag name to lowercase! + * (not sure whether to convert a value too is reasonable + * or keep it 'case sensitive') + */ +static void lowercase_usertag(edg_wll_Event *ev) +{ + int i; + + if ( ev->any.type == EDG_WLL_EVENT_USERTAG ) { + + for ( i = 0; ev->userTag.name[i] != '\0'; i++ ) + ev->userTag.name[i] = tolower(ev->userTag.name[i]); + } +} diff --git a/org.glite.lb.server/src/stored_master.c b/org.glite.lb.server/src/stored_master.c index 2b5b54a..4e8ecab 100644 --- a/org.glite.lb.server/src/stored_master.c +++ b/org.glite.lb.server/src/stored_master.c @@ -92,7 +92,7 @@ int edg_wll_StoreProtoServer(edg_wll_Context ctx) else #endif - handle_request(ctx,buf); + handle_il_message(ctx,buf); free(buf); if ((len = create_reply(ctx,&buf)) > 0) { @@ -145,7 +145,7 @@ int edg_wll_StoreProtoProxy(edg_wll_Context ctx) if (sink_mode == GLITE_LB_SINK_PARSE) glite_wll_perftest_consumeEventIlMsg(buf); else #endif - if ( !(ret = handle_request(ctx, buf)) ) { + if ( !(ret = handle_il_message(ctx, buf)) ) { if ( (err = edg_wll_Error(ctx, NULL, &errd)) ) edg_wll_ResetError(ctx); } free(buf); diff --git a/org.glite.lb.server/src/userjobs.c b/org.glite.lb.server/src/userjobs.c index 2cfc045..75fd992 100644 --- a/org.glite.lb.server/src/userjobs.c +++ b/org.glite.lb.server/src/userjobs.c @@ -13,7 +13,7 @@ #include "jobstat.h" #include "db_supp.h" -int edg_wll_UserJobs( +int edg_wll_UserJobsServer( edg_wll_Context ctx, edg_wlc_JobId **jobs, edg_wll_JobStat **states) @@ -21,7 +21,7 @@ int edg_wll_UserJobs( char *userid, *stmt = NULL, *res = NULL; char *can_peername; - int njobs = 0,ret,i,j; + int njobs = 0,ret,i,j,idx; edg_wlc_JobId *out = NULL; glite_lbu_Statement sth = NULL; edg_wll_ErrorCode err = 0; @@ -52,7 +52,7 @@ int edg_wll_UserJobs( free(stmt); stmt = NULL; free(res); res = NULL; - trio_asprintf(&stmt,"select dg_jobid from jobs where userid = '%|Ss'",userid); + trio_asprintf(&stmt,"select dg_jobid from jobs where userid = '%|Ss' and grey='0'",userid); switch (njobs = edg_wll_ExecSQL(ctx,stmt,&sth)) { case 0: edg_wll_SetError(ctx,ENOENT,ctx->peerName); case -1: goto err; @@ -71,14 +71,16 @@ int edg_wll_UserJobs( } if (states) *states = calloc(njobs, sizeof(**states)); + idx = 0; for (i = 0; i < njobs; i++) { - edg_wll_JobStat *stat = states ? &(*states)[i] : NULL; - - if (stat && edg_wll_JobStatus(ctx, out[i], -1, stat) != 0) { - for (j = 0; j < i; j++) edg_wll_FreeStatus(&(*states)[j]); - *states = NULL; + if (edg_wll_JobStatusServer(ctx, out[idx], -1, &(*states)[idx]) != 0) { + if (edg_wll_Error(ctx, NULL, NULL) == ENOENT) { + /* some jobs may be purged meanwhile, ignore */ + continue; + } + else break; } - break; + idx++; } err: free(res); diff --git a/org.glite.lb.server/src/ws_query.c b/org.glite.lb.server/src/ws_query.c index 9d9b092..ca5d0eb 100644 --- a/org.glite.lb.server/src/ws_query.c +++ b/org.glite.lb.server/src/ws_query.c @@ -79,7 +79,7 @@ SOAP_FMAC5 int SOAP_FMAC6 __lb__JobStatus( free(cflags); } - if ( edg_wll_JobStatus(ctx, j, flags, &s) ) + if ( edg_wll_JobStatusServer(ctx, j, flags, &s) ) { edg_wll_ErrToFault(ctx, soap); return SOAP_FAULT; @@ -166,7 +166,7 @@ SOAP_FMAC5 int SOAP_FMAC6 __lb__UserJobs( ctx = (edg_wll_Context) glite_gsplugin_get_udata(soap); memset(out, 0, sizeof *out); - if (edg_wll_UserJobs(ctx, &jobs, &states) != 0) goto fault; + if (edg_wll_UserJobsServer(ctx, &jobs, &states) != 0) goto fault; if (edg_wll_UserJobsResToSoap(soap, jobs, states, out) != SOAP_OK) { edg_wll_SetError(ctx, ENOMEM, "Couldn't create internal structures"); goto freefault; diff --git a/org.glite.lb.server/test/test_query_events.cpp b/org.glite.lb.server/test/test_query_events.cpp index e4e39e8..1a3cd2a 100644 --- a/org.glite.lb.server/test/test_query_events.cpp +++ b/org.glite.lb.server/test/test_query_events.cpp @@ -158,7 +158,7 @@ int glite_lbu_bufferedInsertClose(glite_lbu_bufInsert bi) { return 0; } int glite_lbu_QueryIndices(glite_lbu_DBContext ctx, const char *table, char ***key_names, char ****column_names) { return 0; } int glite_lbu_DBError(glite_lbu_DBContext ctx, char **s1, char **s2) { return 0; } -int edg_wll_JobStatus( +int edg_wll_JobStatusServer( edg_wll_Context ctx, const edg_wlc_JobId job, int flags, diff --git a/org.glite.lb.server/test/test_trans.sh b/org.glite.lb.server/test/test_trans.sh new file mode 100644 index 0000000..22608a8 --- /dev/null +++ b/org.glite.lb.server/test/test_trans.sh @@ -0,0 +1,190 @@ +# Simple script to test United Server Proxy behaviour +# - for internal purposes only +# - test should correspond to tests described at +# http://egee.cesnet.cz/mediawiki/index.php/LB_and_JP_cleanup#unify_lb.proxy_.2B_server + +#!/bin/bash + +QUERY_SOCK=/tmp/lb_proxy_serve.sock + + +cisti() { + echo "DB cleanup..." + + mysql -u lbserver -e "delete from users;" lbserver20trans + mysql -u lbserver -e "delete from status_tags;" lbserver20trans + mysql -u lbserver -e "delete from states;" lbserver20trans + mysql -u lbserver -e "delete from short_fields;" lbserver20trans + mysql -u lbserver -e "delete from jobs;" lbserver20trans + mysql -u lbserver -e "delete from events;" lbserver20trans + mysql -u lbserver -e "delete from server_state;" lbserver20trans + mysql -u lbserver -e "delete from notif_registrations;" lbserver20trans + mysql -u lbserver -e "delete from notif_jobs;" lbserver20trans + mysql -u lbserver -e "delete from long_fields;" lbserver20trans + mysql -u lbserver -e "delete from acls;" lbserver20trans + + echo "done." +} + +registruj() { + + echo "Registering...." + OUT=`org.glite.lb.client/build/job_reg -x -m scientific.civ.zcu.cz:7846 -s application|grep JOBID` + eval $OUT + ID1=$EDG_JOBID + OUT=`org.glite.lb.client/build/job_reg -m scientific.civ.zcu.cz:7846 -s application|grep JOBID` + eval $OUT + ID2=$EDG_JOBID + OUT=`org.glite.lb.client/build/job_reg -x -m skurut68-2.cesnet.cz:9000 -s application|grep JOBID` + eval $OUT + ID3=$EDG_JOBID + echo "done." +} + +registruj_kolekce() { + + echo "Registering...." + OUT=`org.glite.lb.client/build/job_reg -x -C -n 1 -m scientific.civ.zcu.cz:7846 -s application|grep JOBID` + eval $OUT + ID1=$EDG_WL_COLLECTION_JOBID + ID1_SUB=$EDG_WL_SUB_JOBID + OUT=`org.glite.lb.client/build/job_reg -C -n 1 -m scientific.civ.zcu.cz:7846 -s application|grep JOBID` + eval $OUT + ID2=$EDG_WL_COLLECTION_JOBID + ID2_SUB=$EDG_WL_SUB_JOBID + OUT=`org.glite.lb.client/build/job_reg -x -C -n 1 -m skurut68-2.cesnet.cz:9000 -s application|grep JOBID` + eval $OUT + ID3=$EDG_WL_COLLECTION_JOBID + ID3_SUB=$EDG_WL_SUB_JOBID + OUT=`org.glite.lb.client/build/job_reg -x -C -S -n 1 -m scientific.civ.zcu.cz:7846 -s application|grep JOBID` + eval $OUT + ID4=$EDG_WL_COLLECTION_JOBID + ID4_SUB=$EDG_WL_SUB_JOBID + OUT=`org.glite.lb.client/build/job_reg -C -S -n 1 -m scientific.civ.zcu.cz:7846 -s application|grep JOBID` + eval $OUT + ID5=$EDG_WL_COLLECTION_JOBID + ID5_SUB=$EDG_WL_SUB_JOBID + OUT=`org.glite.lb.client/build/job_reg -x -C -S -n 1 -m skurut68-2.cesnet.cz:9000 -s application|grep JOBID` + eval $OUT + ID6=$EDG_WL_COLLECTION_JOBID + ID6_SUB=$EDG_WL_SUB_JOBID + echo "done." + +} + +vypis() { + mysql -u lbserver -e "select dg_jobid,proxy,server from jobs" lbserver20trans +} + +vypis_kolekci() { + mysql -u lbserver -e "select dg_jobid, proxy,server from jobs where dg_jobid='$1'" lbserver20trans + mysql -u lbserver -e "select dg_jobid, proxy,server from jobs where dg_jobid='$2'" lbserver20trans|grep http + mysql -u lbserver -e "select dg_jobid, proxy,server from jobs where dg_jobid='$3'" lbserver20trans|grep http + mysql -u lbserver -e "select dg_jobid, proxy,server from jobs where dg_jobid='$4'" lbserver20trans|grep http + mysql -u lbserver -e "select dg_jobid, proxy,server from jobs where dg_jobid='$5'" lbserver20trans|grep http + mysql -u lbserver -e "select dg_jobid, proxy,server from jobs where dg_jobid='$6'" lbserver20trans|grep http + shift 6 + mysql -u lbserver -e "select dg_jobid, proxy,server from jobs where dg_jobid='$1'" lbserver20trans|grep http + mysql -u lbserver -e "select dg_jobid, proxy,server from jobs where dg_jobid='$2'" lbserver20trans|grep http + mysql -u lbserver -e "select dg_jobid, proxy,server from jobs where dg_jobid='$3'" lbserver20trans|grep http + mysql -u lbserver -e "select dg_jobid, proxy,server from jobs where dg_jobid='$4'" lbserver20trans|grep http + mysql -u lbserver -e "select dg_jobid, proxy,server from jobs where dg_jobid='$5'" lbserver20trans|grep http + mysql -u lbserver -e "select dg_jobid, proxy,server from jobs where dg_jobid='$6'" lbserver20trans|grep http +} + +do_stavu_cleared() { + echo "Transfering jobs to cleared state..." + stage/examples/glite-lb-cleared.sh -x -j $1 2>/dev/null + stage/examples/glite-lb-cleared.sh -j $2 2>/dev/null + stage/examples/glite-lb-cleared.sh -x -j $3 2>/dev/null + echo "done." +} + +check_states() { + echo + echo "State of job $1" + stage/examples/glite-lb-job_status -x $QUERY_SOCK $1 | grep "state : " + stage/examples/glite-lb-job_status $1 |grep "state : " + echo "State of job $2" + stage/examples/glite-lb-job_status -x $QUERY_SOCK $2 | grep "state : " + stage/examples/glite-lb-job_status $2 |grep "state : " + echo "State of job $3" + stage/examples/glite-lb-job_status -x $QUERY_SOCK $3 | grep "state : " + stage/examples/glite-lb-job_status $3 |grep "state : " +} + +test1() { + echo + echo "==================== test 1 =============================" + + cisti; + registruj; + vypis; + echo job1=$ID1 + echo job2=$ID2 + echo job3=$ID3 +} + +test2() { + echo + echo "==================== test 2 =============================" + + cisti; + registruj; + vypis; + echo job1=$ID1 + echo job2=$ID2 + echo job3=$ID3 + + do_stavu_cleared $ID1 $ID2 $ID3 + vypis; +} + +test3() { + echo + echo "==================== test 3 =============================" + + cisti; + registruj; + vypis; + echo job1=$ID1 + echo job2=$ID2 + echo job3=$ID3 + + sleep 2; + stage/bin/glite-lb-purge --cleared 1s --aborted 1s --cancelled 1s --other 1s -l -m scientific.civ.zcu.cz:7846 + vypis; +} + +test4() { + echo + echo "==================== test 4 =============================" + + cisti; + registruj; + vypis; + echo job1=$ID1 + echo job2=$ID2 + echo job3=$ID3 + + check_states $ID1 $ID2 $ID3 +} + +test5() { + echo + echo "==================== test 5 =============================" + + cisti; + registruj_kolekce; + vypis_kolekci $ID1 $ID1_SUB $ID2 $ID2_SUB $ID3 $ID3_SUB $ID4 $ID4_SUB $ID5 $ID5_SUB $ID6 $ID6_SUB +} + + + +#################################################### + +test1; +test2; +test3; +test4; +test5; diff --git a/org.glite.lb.state-machine/src/process_event.c b/org.glite.lb.state-machine/src/process_event.c index 006c8d1..0c1e739 100644 --- a/org.glite.lb.state-machine/src/process_event.c +++ b/org.glite.lb.state-machine/src/process_event.c @@ -383,7 +383,10 @@ static int processEvent_glite(intJobStat *js, edg_wll_Event *e, int ev_seq, int if (USABLE_DATA(res, strict)) { switch (e->transfer.source) { case EDG_WLL_SOURCE_USER_INTERFACE: - rep(js->pub.jdl, e->transfer.job); break; + if (!js->pub.jdl) { + rep(js->pub.jdl, e->transfer.job); + } + break; case EDG_WLL_SOURCE_JOB_SUBMISSION: rep(js->pub.condor_jdl, e->transfer.job); break; case EDG_WLL_SOURCE_LOG_MONITOR: @@ -504,7 +507,10 @@ static int processEvent_glite(intJobStat *js, edg_wll_Event *e, int ev_seq, int if (USABLE_DATA(res, strict)) { switch (e->enQueued.source) { case EDG_WLL_SOURCE_NETWORK_SERVER: - rep(js->pub.jdl, e->enQueued.job); break; + if (!js->pub.jdl) { + rep(js->pub.jdl, e->enQueued.job); + } + break; case EDG_WLL_SOURCE_WORKLOAD_MANAGER: if (USABLE_BRANCH(res)) { rep(js->pub.matched_jdl, e->enQueued.job); diff --git a/org.glite.lbjp-common.db/src/db.c b/org.glite.lbjp-common.db/src/db.c index a45a16c..66816a1 100644 --- a/org.glite.lbjp-common.db/src/db.c +++ b/org.glite.lbjp-common.db/src/db.c @@ -256,8 +256,12 @@ void glite_lbu_FreeDBContext(glite_lbu_DBContext ctx) { int glite_lbu_DBConnect(glite_lbu_DBContext ctx, const char *cs) { - if (db_connect(ctx, cs, &ctx->mysql) != 0) return STATUS(ctx); - return 0; + if (db_connect(ctx, cs, &ctx->mysql) != 0 || + glite_lbu_ExecSQL(ctx, "SET AUTOCOMMIT=1", NULL) < 0 || + glite_lbu_ExecSQL(ctx, "SET TRANSACTION ISOLATION LEVEL REPEATABLE READ", NULL) < 0) + return STATUS(ctx); + else + return 0; } @@ -422,7 +426,7 @@ int glite_lbu_QueryIndices(glite_lbu_DBContext ctx, const char *table, char ***k idx[i][j] = strdup(showcol[Column_name]); //printf("****** [%d, %d] %s\n", i, j, idx[i][j]); //FIXME: needed?idx[i][j].value.i = atoi(showcol[Sub_part]); - for (i = 0; showcol[i]; i++) free(showcol[i]); + for (i = 0; i<(sizeof(showcol)/sizeof(showcol[0])); i++) free(showcol[i]); } glite_lbu_FreeStmt(&stmt); @@ -489,6 +493,10 @@ int glite_lbu_ExecSQL(glite_lbu_DBContext ctx, const char *cmd, glite_lbu_Statem if (retry_nr <= 0) do_reconnect = 1; break; + case ER_LOCK_DEADLOCK: + ERR(ctx, EDEADLOCK, db_handle.mysql_error(ctx->mysql)); + return -1; + break; default: MY_ERR(ctx); return -1; @@ -527,7 +535,7 @@ int glite_lbu_ExecSQL(glite_lbu_DBContext ctx, const char *cmd, glite_lbu_Statem sum.tv_usec += end.tv_usec; sum.tv_sec += end.tv_sec + sum.tv_usec / 1000000; sum.tv_usec -= 1000000 * (sum.tv_usec / 1000000); - fprintf(stderr,"[%d] %s\n[%d] %3ld.%06ld (sum: %3ld.%06ld)\n",pid,txt,pid,end.tv_sec,end.tv_usec,sum.tv_sec,sum.tv_usec); + fprintf(stderr,"[%d] %s\n[%d] %3ld.%06ld (sum: %3ld.%06ld)\n",pid,cmd,pid,end.tv_sec,end.tv_usec,sum.tv_sec,sum.tv_usec); #endif return db_handle.mysql_affected_rows(ctx->mysql); @@ -1022,8 +1030,13 @@ static int transaction_test(glite_lbu_DBContext ctx) { trio_asprintf(&cmd, "SHOW CREATE TABLE %|Ss", table[0]); if (glite_lbu_ExecSQL(ctx, cmd, &stmt) <= 0 || (retval = glite_lbu_FetchRow(stmt, 2, NULL, res)) < 0 ) goto quit; - if (retval != 2 || strcmp(res[0], table[0])) ERR(ctx, EIO, "unexpected show create result"); - else ctx->caps |= GLITE_LBU_DB_CAP_TRANSACTIONS; + if (retval != 2 || strcmp(res[0], table[0])) { + ERR(ctx, EIO, "unexpected show create result"); + goto quit; + } + + if (strstr(res[1],"ENGINE=InnoDB")) + ctx->caps |= GLITE_LBU_DB_CAP_TRANSACTIONS; #ifdef LBS_DB_PROFILE fprintf(stderr, "[%d] use_transactions = %d\n", getpid(), USE_TRANS(ctx)); @@ -1031,10 +1044,10 @@ static int transaction_test(glite_lbu_DBContext ctx) { quit: glite_lbu_FreeStmt(&stmt); - if (table[0]) free(table[0]); - if (res[0]) free(res[0]); - if (res[1]) free(res[1]); - if (cmd) free(cmd); + free(table[0]); + free(res[0]); + free(res[1]); + free(cmd); return STATUS(ctx); } diff --git a/org.glite.lbjp-common.server-bones/src/srvbones.c b/org.glite.lbjp-common.server-bones/src/srvbones.c index 3fcc41c..ed052ee 100644 --- a/org.glite.lbjp-common.server-bones/src/srvbones.c +++ b/org.glite.lbjp-common.server-bones/src/srvbones.c @@ -325,10 +325,17 @@ static int dispatchit(int sock_slave, int sock, int sidx) getpeername(conn, (struct sockaddr *)&a, &alen); pom = (char *) &a.sin_addr.s_addr; - dprintf(("[master] %s connection from %d.%d.%d.%d:%d\n", - services[sidx].id? services[sidx].id: "", - (int)pom[0], (int)pom[1], (int)pom[2], (int)pom[3], - ntohs(a.sin_port))); + if (a.sin_family == PF_LOCAL) { + dprintf(("[master] %s connection from local socket\n", + services[sidx].id? services[sidx].id: "")); + } + else { + dprintf(("[master] %s connection from %d.%d.%d.%d:%d\n", + services[sidx].id? services[sidx].id: "", + (int)pom[0], (int)pom[1], (int)pom[2], (int)pom[3], + ntohs(a.sin_port))); + } + ret = 0; if ( ( clnt_dispatched < clnt_accepted /* wraparound */ @@ -481,7 +488,8 @@ static int slave(slave_data_init_hnd data_init_hnd, int sock) req_cnt++; first_request = 0; to = set_request_to; - if ((rv = services[srv].on_request_hnd(conn,to.tv_sec>=0 ? &to : NULL,clnt_data)) == ENOTCONN) { + rv = services[srv].on_request_hnd(conn,to.tv_sec>=0 ? &to : NULL,clnt_data); + if ( (rv == ENOTCONN) || (rv == ECONNREFUSED) ) { if (services[srv].on_disconnect_hnd && (rv = services[srv].on_disconnect_hnd(conn,NULL,clnt_data))) {