#!/usr/bin/perl -w

# glite-info-service-glue2: an information provider for the Service,
# Endpoint and AccessPolicy objects, in v 2.0 of the GLUE schema
# Ref: http://www.ogf.org/documents/GFD.147.pdf
#
# NB Currently this code assumes that there is only one Endpoint per Service
#
# Stephen Burke, v 1.0 August 2009
# 5/3/10:  Add the EGEE copyright and license text
#
# Arguments are:
#    -   a service-specific configuration file
#    -   the GLUE2DomainID (i.e. the site unique ID)
#    -   optionally the GLUE2ServiceID (otherwise this will be
#        automatically constructed)
#
# Copyright (c) Members of the EGEE Collaboration. 2009-2010.
# See http://www.eu-egee.org/partners/ for details on the copyright
# holders.
#
#     Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

use strict;
use FileHandle;
use POSIX qw(strftime);

my $confile;
my $SiteID;
my $UID;
my $command;

my $gis_version = "1.0";

my $host = `hostname -f`;
chomp($host);

my $bind_dn = "GLUE2GroupID=resource,o=glue";

# Hardwire the data validity period to 1 hour for now
my $validity = "3600";

# The site name (GLUE2DomainID) should be the second argument

if ($ARGV[1]){
    $SiteID = $ARGV[1];
}else{
    print STDERR "Usage: glite-info-service-glue2 <config-file> <site-ID> [<service-ID>]\n";
    exit 1;
}

# Read in the configuration file (first argument)

my %config = (
        init => '',
        service_type => '',
        get_version => '',
        get_endpoint => '',
        get_status => '',
        WSDL_URL => '',
        semantics_URL => '',
        get_starttime => '',
        get_owner => '',
        get_acbr => '',
	get_data => '',
        get_services => '',
	      );

if ($ARGV[0]){
    $confile = $ARGV[0];
    my $fh = new FileHandle $confile
        or die "Error: Can't open configuration file: $ARGV[0]\n";
    foreach (<$fh>){
        if ((! m/^\#/) & (m/=/)){
# For lines containing an = and not starting with #
            m/^(.*?)=\s*(.*)\s*$/;
# Split on the first =, no leading or trailing whitespace in the value
            my $key=$1;
            my $value=$2;
            $key=~s/\s+//g;
            $config{$key}=$value;
        }
    }
}else{
    print STDERR "Usage: glite-info-service-glue2 <config-file> <site-ID>  [<service-ID>]\n";
    exit 1;
}

# Check that all configuration items are set

my $missing = 0;
while ( my ($key, $value) = each(%config) ) {
    if ( ! $config{$key} ){
        print STDERR "Error: configuration parameter $key not set\n";
        $missing += 1;
    }
}
if ( $missing ) {
    print STDERR "Error: $missing configuration parameter(s) not set in file $confile\n";
    exit 2;
}

# Export the GLUE2DomainID as an environment variable so it can be used
# in the scripts and config items

$ENV{"GLITE_INFO_SERVICE_GLUE2_SITE_ID"} = $SiteID;

# Most of the configuration parameters are commands to be executed.
# We'll do that first in case something blows up ...
# In general we won't bother about the return code, but the output is
# error-checked below.
#
# First thing is an init command, in case anything needs to be set up.
# Any output lines are used to set environment variables (x=y).
# Also a non-zero return code here will abort the provider without
# outputting anything.

$command = $config{init};
my @init_result = `$command`;
my $init_rc = $? >> 8;
if ( $init_rc ){
    print STDERR "Error: init command $command failed\n";
    print STDERR "@init_result\n";
    exit $init_rc;
}

foreach (@init_result) {
# Should be key=value pairs
    if ( m/=/ ){
        m/^(.*?)=\s*(.*)\s*$/;
# Split on the first =, no leading or trailing whitespace in the value
        my $key=$1;
        my $value=$2;
        $key=~s/\s+//g;
	if ($key) {
	    $ENV{$key} = $value;
	}
    }
}

$command = $config{get_version};
my $Version = `$command`;

$command = $config{get_endpoint};
my $Endpoint = `$command`;

$command = $config{get_status};
my $Info = `$command`;
my $Status = $? >> 8;

$command = $config{get_starttime};
my $ST = `$command`;

# NB Owner and ACBR are multivalued, so they get read into lists

$command = $config{get_owner};
my @Owner = `$command`;

$command = $config{get_acbr};
my @ACBR = `$command`;

# OtherInfo is not yet supported
$command = "echo";
my @ServiceOtherInfo = `$command`;
my @EndpointOtherInfo = `$command`;

# Capabilities are not yet supported
$command = "echo";
my @Capabilities = `$command`;

# Quality level not settable yet
my $QualityLevel = "production";

# Serving state not settable yet
my $ServingState = "production";

# StatusInfo is not yet supported
my @StatusInfo = "";

# Implementor, ImplementationName and ImplementationVersion are not
# yet supported

my $Implementor = ""; 
my $ImplementationName = "";
my $ImplementationVersion = "";

# Not clear if this should be configurable
my $TrustedCA = "IGTF";

# ServiceData and related service items are also multivalued

$command = $config{get_data};
my @ServiceData = `$command`;

$command = $config{get_services};
my @RelatedServices = `$command`;

# Extract the service type

my $Type = $config{service_type};

# The GLUE2ServiceID needs to be unique by construction.
# Also there is a need to watch out for invalid characters as
# the ID is used to form the DN. The solution chosen is to append a
# checksum of the config file to the hostname and service type. However,
# there may be a need to preserve a persistent ID in some cases,
# hence the entire ID can be passed as an optional third argument.

if ($ARGV[2]) {
    $UID = $ARGV[2];
} else {
    my $check = `cksum $confile | cut -d\" \" -f 1`;
    chomp($check);
# Note that this may cause trouble if hostname -f does not return an fqdn,
# but I think that is normally required by other things anyway 
    $UID = $host . "_" . $Type . "_" . $check;
}

# Now start outputting LDIF lines for the Service object.
# Note that once we get here we are committed to printing a
# complete, valid object. Start with the DN ...

print "dn: GLUE2ServiceID=$UID,$bind_dn\n";

# Print the boilerplate objectclass declarations and unique ID

print "objectClass: GLUE2Entity\n";
print "objectClass: GLUE2Service\n";
print "GLUE2ServiceID: $UID\n";

# Attributes are generally dealt with in the order in which they appear
# in the GLUE 2 specification document, unless dependencies dictate
# otherwise. Comments are included for all attributes, even if this
# provider doesn't support them.

# Creation time and validity are standard attributes for all objects

# Times are mandated to be UTC only
my $TimeNow = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime());
print "GLUE2EntityCreationTime: $TimeNow\n";

print "GLUE2EntityValidity: $validity\n";

# The name is just an indicative human-readable string, here of the
# form site-type (taking the last component of compound types).

my @last = split /\./, $Type;
my $Name = $SiteID . "-" . $last[$#last];

print "GLUE2EntityName: $Name\n";

# NB OtherInfo is not yet supported, but this would print it if defined above

foreach (@ServiceOtherInfo) {
# No leading or trailing spaces, and limit to a reasonable length
    s/^\s+//;
    s/\s+$//;
    if (length > 240) {
        $_ = substr($_, 0, 239) . "4444";
    }
    if ($_) {
        print "GLUE2EntityOtherInfo: $_\n";
    }
}

# Embed some metadata to help with debugging

print "GLUE2EntityOtherInfo: InfoProviderName=glite-info-service-glue2\n";
print "GLUE2EntityOtherInfo: InfoProviderVersion=$gis_version\n";
print "GLUE2EntityOtherInfo: InfoProviderHost=$host\n";

# NB Capabilities are not yet supported, but this would print them
# if defined above

foreach (@Capabilities) {
# Should be no need for validation as it will be fixed per service type,
# but still strip leading and trailing spaces
    s/^\s+//;
    s/\s+$//;
    if ($_) {
        print "GLUE2ServiceCapability: $_\n";
    }
}

# Type is already extracted above. For the simple services covered by
# this provider, assume for now that the Service and Endpoint types
# are the same.

print "GLUE2ServiceType: $Type\n";

# NB QualityLevel is currently hardwired
print "GLUE2ServiceQualityLevel: $QualityLevel\n";

# NB StatusInfo URLs are not yet supported, but this would print them
# if defined above

foreach (@StatusInfo) {
# Strip all whitespace
    s/\s+//g;
    if (m/^http/) {
	print "GlueServiceStatusInfo: $_\n";
    }
}

# This provider only supports simple services
print "GLUE2ServiceComplexity: endpointType=1, share=0, resource=0\n";

# Reference to the parent site (AdminDomain)

print "GLUE2ServiceAdminDomainForeignKey: $SiteID\n";

# Links to any related services go here.
# This could do with being supported better, but it isn't entirely
# obvious how to do it. Currently only the FTS uses this and that has
# its own info provider. Probably this should be expanded if/when there
# is a real use-case.

foreach (@RelatedServices) {
    s/\s+//g;
    if (length > 240) {
        $_ = substr($_, 0, 239) . "4444";
    }
    if ($_) {
        print "GLUE2ServiceServiceForeignKey: $_\n";
    }
}

# print a newline to finish the object
print "\n";

# That's it for the Service object, now start on the Endpoint.

# We need a unique ID for the Endpoint - for now take a simple
# solution and just append _1 to the Service ID. Currently this
# provider only supports one Endpoint per Service.

my $EUID = $UID . "_1";

# Start with the DN ...

print "dn: GLUE2EndpointID=$EUID,GLUE2ServiceID=$UID,$bind_dn\n";

# Print the boilerplate objectclass declarations and unique ID

print "objectClass: GLUE2Entity\n";
print "objectClass: GLUE2Endpoint\n";
print "GLUE2EndpointID: $EUID\n";

# Creation time and validity are standard attributes for all objects

print "GLUE2EntityCreationTime: $TimeNow\n";
print "GLUE2EntityValidity: $validity\n";

# The name is just an indicative human-readable string. For now just use
# the same name as the Service.

print "GLUE2EntityName: $Name\n";

# NB OtherInfo is not yet supported, but this would print it if defined above

foreach (@EndpointOtherInfo) {
# No leading or trailing spaces, and limit to a reasonable length
    s/^\s+//;
    s/\s+$//;
    if (length > 240) {
        $_ = substr($_, 0, 239) . "4444";
    }
    if ($_) {
        print "GLUE2EntityOtherInfo: $_\n";
    }
}

# Embed some metadata to help with debugging

print "GLUE2EntityOtherInfo: InfoProviderName=glite-info-service-glue2\n";
print "GLUE2EntityOtherInfo: InfoProviderVersion=$gis_version\n";
print "GLUE2EntityOtherInfo: InfoProviderHost=$host\n";

# We don't print a null endpoint (bad LDAP) but otherwise take it as
# it comes, aside from basic checks

# URLs don't have white space
$Endpoint =~ s/\s+//g;

# Sanity check on the length
if (length($Endpoint) > 240) {
    $Endpoint = substr($Endpoint, 0, 239) . "4444";
}

if ($Endpoint) {
    print "GLUE2EndpointURL: $Endpoint\n";
}

# NB Capabilities are not yet supported, but this would print them
# if defined above

foreach (@Capabilities) {
# Should be no need for validation as it will be fixed per service type,
# but still strip leading and trailing spaces
    s/^\s+//;
    s/\s+$//;
    if ($_) {
        print "GLUE2EndpointCapability: $_\n";
    }
}

# For Technology see WSDL below - basically assume that if there's a WSDL
# the technology is a web service, and otherwise don't print anything

# Type is already extracted above. For the simple services covered by
# this provider, assume for now that the Service and Endpoint types
# are the same.

print "GLUE2EndpointInterfaceName: $Type\n";

# The version can now have a more flexible format than we had in GLUE 1.3
# and can be multi-valued, but for now just re-use the same code.
# The Version should be the version of the endpoint interface in the form
# x.y.z. In the case of an error we return 4.4.4.4 by analogy with
# the CE provider. Two-component versions are padded with a .0 and single
# values with .0.0.

# No white space allowed here
$Version =~ s/\s+//g;

# Simple format check (NB Currently allows non-numeric components)
# RGMA defines Version as varchar(20)
my $minor;
my $patch;
my $bad;
(undef, $minor, $patch, $bad) = split /\./, $Version;
if ( defined($bad) || (length($Version) > 16) || (length($Version) == 0) ) {
    print "GLUE2EndpointInterfaceVersion: 4.4.4.4\n";
} elsif ( ! defined($minor) ) {
    print "GLUE2EndpointInterfaceVersion: $Version.0.0\n";
} elsif ( ! defined($patch) ) {
    print "GLUE2EndpointInterfaceVersion: $Version.0\n";
} else {
    print "GLUE2EndpointInterfaceVersion: $Version\n";
}

# Interface extensions and supported profiles are not currently supported

# WSDL and Semantics should be URLs starting with "http", no whitespace.
# Hopefully no need to check the length as these are hard-wired in the
# config file.
# NB These attributes are now multivalued, unlike GLUE 1.3, but for now
# only a single value is supported.

my $WSDL = $config{WSDL_URL};
$WSDL =~ s/\s+//g;
if ($WSDL =~ m/^http/) {
    print "GLUE2EndpointTechnology: webservice\n";
    print "GLUE2EndpointWSDL: $WSDL\n";
}

my $Semantics = $config{semantics_URL};
$Semantics =~ s/\s+//g;
if ($Semantics =~ m/^http/) {
    print "GLUE2EndpointSemantics: $Semantics\n";
}

# Implementor, ImplementationName and ImplementationVersion are not
# currently supported, but this would print them if defined above

$Implementor =~ s/^\s+//;
$Implementor =~ s/\s+$//;
if ($Implementor) {
    print "GLUE2EndpointImplementor: $Implementor\n";
}

$ImplementationName =~ s/^\s+//;
$ImplementationName =~ s/\s+$//;
if ($ImplementationName) {
    print "GLUE2EndpointImplementationName: $ImplementationName\n";
}

$ImplementationVersion =~ s/^\s+//;
$ImplementationVersion =~ s/\s+$//;
if ($ImplementationVersion) {
    print "GLUE2EndpointImplementationVersion: $ImplementationVersion\n";
}

# NB QualityLevel is currently hardwired
print "GLUE2EndpointQualityLevel: $QualityLevel\n";

# The status return code gets turned into the standard values:
# 0 = OK, 1 = Critical, 2 = Warning, 3 = Unknown, other = Other

my $Statcode;
if    ($Status == 0) { $Statcode = "ok" }
elsif ($Status == 1) { $Statcode = "critical" }
elsif ($Status == 2) { $Statcode = "warning" }
elsif ($Status == 3) { $Statcode = "unknown" }
else                 { $Statcode = "other" } 

print "GLUE2EndpointHealthState: $Statcode\n";

# The status command will have printed something, maybe multi-line,
# to stdout - we need to massage it to something reasonable

# Strip leading and trailing spaces and convert any newlines to spaces
$Info =~ s/^\s+//;
$Info =~ s/\s+$//;
$Info =~ s/\n+/ /g;

# Strip anything after a control character, e.g. from service xxx status
# (Could just strip the controls themselves but it can leave
# strange-looking strings)
$Info =~ s/\p{IsC}.*$//;

# Truncate if necessary
if (length($Info) > 240) {
    $Info = substr($Info, 0, 239) . " ...";
}

# Too short
if (length($Info) < 2) {
    $Info = "??";
}

print "GLUE2EndpointHealthStateInfo: $Info\n";

# ServingState is currently just hardwired above but should become
# configurable

print "GLUE2EndpointServingState: $ServingState\n";

# StartTime needs to be in the ISO8601 format, but it seems easier
# to accept a Unix timestamp (e.g. from stat on a suitable file) and
# convert it. The conversion function was shamelessly stolen from the web.
# This uses local time rather than UTC, both formats are valid and it
# might be useful to know the timezone at the server.

# No white space, should just be a number
# The error indicator here is the Unix epoch
$ST =~ s/\s+//g;
if ( !($ST =~ m/^\d+$/) ) {
    $ST = 0;
}

# We need to munge the timezone indicator to add a colon between
# the hours and minutes
my $tz = strftime("%z", localtime($ST));
$tz =~ s/(\d{2})(\d{2})/$1:$2/;
my $StartTime = strftime("%Y-%m-%dT%H:%M:%S", localtime($ST)) . $tz;

# No error checks as strftime should return something reasonable
print "GLUE2EndpointStartTime: $StartTime\n";

# This should give us the host cert issuer, if any

my $Issuer = "";
my $hostcert = "/etc/grid-security/hostcert.pem";
if ( -e $hostcert) {
#    $Issuer = `grid-cert-info -file $hostcert -issuer`;
    $Issuer = `openssl x509 -issuer -noout -in $hostcert | sed 's/^[^/]*//'`;
}

# Output whatever it gave us, if anything

$Issuer =~ s/^\s+//;
$Issuer =~ s/\s+$//;
if ($Issuer) {
    print "GLUE2EndpointIssuerCA: $Issuer\n";
}

# For now just ouputs IGTF as the trusted CA - not sure what we want here

print "GLUE2EndpointTrustedCA: $TrustedCA\n";

# Downtimes are handled by the GOC DB in EGEE, so not published here

print "GLUE2EndpointDownTimeInfo: See the GOC DB for downtimes: https://goc.gridops.org/\n";

# Finally print the upward link to the parent Service

print "GLUE2EndpointServiceForeignKey: $UID\n";

# Print a newline to finish the object
print "\n";

# Now we deal with old-style ServiceData entries. For now treat these as
# Extensions to the Endpoint as that's the closest match to the GLUE 1 format.

my %items = ();

foreach (@ServiceData) {
# Should be key=value pairs
    if ( m/=/ ){
        m/^(.*?)=\s*(.*)\s*$/;
# Split on the first =, no leading or trailing whitespace in the value
        my $key=$1;
        my $value=$2;
        $key=~s/\s+//g;
# R-GMA has varchar(100)
        if (length($key) > 99) {
            $key = substr($key, 0, 94) . "4444";
        }
        if (length($value) > 99) {
            $value = substr($value, 0, 94) . "4444";
        }
# Don't output anything if the key is null
        if (length($key) > 0) {
            $items{$key} = $value;
        }else{
            print STDERR "Extension Key has 0 length, value is: " . $value . "\n";
	}
    }
}

# Now print the Extension objects - the order is random but it doesn't matter

my $ext = 0;
my $ELID = "0";
while ( my ($key, $value) = each(%items) ) {
# Extensions only need a local ID - but note that the Key may not be unique
    $ext++;
    $ELID = $key . "_" . $ext;
    print "dn: GLUE2ExtensionLocalID=$ELID,GLUE2EndpointID=$EUID,GLUE2ServiceID=$UID,$bind_dn\n";
    print "objectClass: GLUE2Extension\n";
    print "GLUE2ExtensionLocalID: $ELID\n";
    print "GLUE2ExtensionKey: $key\n";
    if (length($value) > 0) {
        print "GLUE2ExtensionValue: $value\n";
    }else{
# We are required to print a value
        print "GLUE2ExtensionValue: UNDEFINEDVALUE\n";
    }
    print "GLUE2ExtensionEntityForeignKey: $EUID\n";
    print "\n";
}

# That's it for the Endpoint, now start on the Access Policies

# GLUE 1 ACBRs become Rules in a new AccessPolicy object, and the old
# Owner attribute becomes a reference to an Admin Domain.

# We need a unique ID for the object - for now take a simple
# solution and just append _AP to the Endpoint ID. Assume that
# we'll only publish one AP object, i.e. one scheme (the gLite scheme),
# per Endpoint.

my $APUID = $EUID . "_AP";

# Start with the DN ...

print "dn: GLUE2PolicyID=$APUID,GLUE2EndpointID=$EUID,GLUE2ServiceID=$UID,$bind_dn\n";

# Print the boilerplate objectclass declarations and unique ID

print "objectClass: GLUE2Entity\n";
print "objectClass: GLUE2Policy\n";
print "objectClass: GLUE2AccessPolicy\n";
print "GLUE2PolicyID: $APUID\n";

# Creation time and validity are standard attributes for all objects

print "GLUE2EntityCreationTime: $TimeNow\n";
print "GLUE2EntityValidity: $validity\n";

# The name is just an indicative human-readable string. For now just use
# the same name as the Service with " access control rules" appended

print "GLUE2EntityName: $Name access control rules\n";

# Probably no need for configurable OtherInfo here?

# Embed some metadata to help with debugging

print "GLUE2EntityOtherInfo: InfoProviderName=glite-info-service-glue2\n";
print "GLUE2EntityOtherInfo: InfoProviderVersion=$gis_version\n";
print "GLUE2EntityOtherInfo: InfoProviderHost=$host\n";

# The policy scheme needs a name: arbitrarily define this as org.glite.standard

print "GLUE2PolicyScheme: org.glite.standard\n";

# Now for the actual rules - note that we must have at least one.

# Strip leading and trailing white space. NB DNs may contain spaces.
# Empty lines should not be printed as they aren't valid LDAP, and make
# a basic sanity check for length.

my $rules = 0;
foreach (@ACBR) {
    s/^\s+//;
    s/\s+$//;
    if (length > 240) {
        $_ = substr($_, 0, 239) . "4444";
    }
    if ($_) {
	$rules++;
        print "GLUE2PolicyRule: $_\n";
    }
}

# "ALL" is a reserved word meaning that there is no authz
if ($rules == 0) {
    print "GLUE2PolicyRule: ALL\n";
}

# The Owner can be multivalued, and can be null - typically a list of VO names.
# Strip white space (this is supposed to be a unique ID).
# Empty lines should not be printed as they aren't valid LDAP.
# Sanity check for length.

$rules = 0;
foreach (@Owner) {
    s/\s+//g;
    if (length > 240) {
	$_ = substr($_, 0, 239) . "4444";
    }
    if ($_) {
	$rules++;
	print "GLUE2PolicyUserDomainForeignKey: $_\n";
    }
}

# The spec says that there must be at least one UserDomain reference,
# which is probably a mistake! We should try to avoid this being a valid
# VO name ...
if ($rules == 0) {
    print "GLUE2PolicyUserDomainForeignKey: \$UNDEFINED\$\n";
}

# Finally print the upward link to the parent Endpoint

print "GLUE2AccessPolicyEndpointForeignKey: $EUID\n";

# Print a newline to finish the object
print "\n";

exit 0;
