From 491d8b4696f49afbcd83c08b0af198c95f3c0eef Mon Sep 17 00:00:00 2001
From: =?utf8?q?Franti=C5=A1ek=20Dvo=C5=99=C3=A1k?= <valtri@civ.zcu.cz>
Date: Sat, 17 Jan 2015 13:13:47 +0100
Subject: [PATCH] Implement statistics gathering. The first to watch are HDFS
 disks and data.

---
 files/accounting/create.sql                | 126 +++++++++++++++++++++++++++++
 files/accounting/hdfs.awk                  |  64 +++++++++++++++
 manifests/accounting.pp                    |  90 +++++++++++++++++++++
 manifests/init.pp                          |   3 +
 manifests/params.pp                        |   5 ++
 templates/accounting/cron-hdfs.erb         |   5 ++
 templates/accounting/hadoop-accounting.erb |   9 +++
 templates/accounting/hdfs.sh.erb           |  27 +++++++
 8 files changed, 329 insertions(+)
 create mode 100644 files/accounting/create.sql
 create mode 100644 files/accounting/hdfs.awk
 create mode 100644 manifests/accounting.pp
 create mode 100644 templates/accounting/cron-hdfs.erb
 create mode 100644 templates/accounting/hadoop-accounting.erb
 create mode 100755 templates/accounting/hdfs.sh.erb

diff --git a/files/accounting/create.sql b/files/accounting/create.sql
new file mode 100644
index 0000000..6631f5b
--- /dev/null
+++ b/files/accounting/create.sql
@@ -0,0 +1,126 @@
+--
+-- Accounting for Hadoop
+--
+-- How to add values:
+--
+-- INSERT INTO measure (name) VALUES ('quota');
+-- INSERT INTO quota (id_measure, user, used) VALUES (last_insert_id(), 'valtri', 17);
+-- INSERT INTO quota (id_measure, user, used) VALUES (last_insert_id(), 'nemo', 1);
+--
+-- INSERT INTO hdfs (full, disk, disk_used) VALUES (1024, 1023, 10);
+-- or:
+-- INSERT INTO measure (name) VALUES ('hdfs');
+-- INSERT INTO hdfs (id_measure, full, disk, disk_used, block_under, block_corrupt, block_missing) VALUES (last_insert_id(), 10240, 10230, 100, 0, 0, 0);
+-- INSERT INTO hdfs (id_measure, hostname, state, full, disk, disk_used) VALUES (last_insert_id(), 'hador1', 1, 1024, 1023, 10);
+--
+-- INSERT INTO measure (name, start, end) VALUES ('jobstat', '2015-01-16', '2015-01-17');
+-- INSERT INTO jobstat (id_measure, user, total, fails, wait_min, wait_avg, wait_max) VALUES (last_insert_id(), 'valtri', 10, 2, 0, 50, 100);
+-- INSERT INTO jobstat (id_measure, user, total, fails, wait_min, wait_avg, wait_max) VALUES (last_insert_id(), 'nemo', 0, 0, NULL, NULL, NULL);
+--
+-- How to read values:
+--
+-- a) all history
+--
+-- SELECT * FROM view_hdfs;
+-- SELECT * FROM view_quotas;
+--
+-- b) current values
+--
+-- SELECT h.* FROM view_hdfs h, statistic s WHERE h.seq=s.last_seq;
+-- SELECT h.* FROM view_hdfs h, statistic s WHERE h.seq=s.last_seq;
+--
+
+CREATE TABLE statistic (
+	name CHAR(8) NOT NULL,
+	last_id_measure INTEGER,
+	last_seq INTEGER,
+
+	INDEX (last_id_measure),
+	INDEX (last_seq)
+);
+
+CREATE TABLE measure (
+	id_measure INTEGER AUTO_INCREMENT PRIMARY KEY,
+	name CHAR(8) NOT NULL,
+	seq INTEGER NOT NULL,
+	time TIMESTAMP DEFAULT NOW(),
+	start TIMESTAMP NULL DEFAULT NULL,
+	end TIMESTAMP NULL DEFAULT NULL,
+
+	INDEX (id_measure),
+	INDEX (name),
+	INDEX (seq)
+);
+
+CREATE TABLE hdfs (
+	id_measure INTEGER NOT NULL,
+	hostname CHAR(50),
+	state INTEGER,
+	full BIGINT,
+	disk BIGINT,
+	disk_used BIGINT,
+	disk_free BIGINT,
+	block_under INTEGER,
+	block_corrupt INTEGER,
+	block_missing INTEGER,
+
+	CONSTRAINT PRIMARY KEY (id_measure, hostname),
+	INDEX(id_measure),
+	INDEX(hostname)
+);
+
+CREATE TABLE quota (
+	id_measure INTEGER NOT NULL,
+	user CHAR(20) NOT NULL,
+	used BIGINT,
+
+	CONSTRAINT PRIMARY KEY (id_measure, user),
+	INDEX(id_measure),
+	INDEX(user)
+);
+
+CREATE TABLE jobstat (
+	id_measure INTEGER NOT NULL,
+	user CHAR(20) NULL,
+	total INTEGER,
+	fails INTEGER,
+	wait_min INTEGER,
+	wait_avg INTEGER,
+	wait_max INTEGER,
+
+	CONSTRAINT PRIMARY KEY (id_measure, user),
+	INDEX(id_measure),
+	INDEX(user)
+);
+
+INSERT INTO statistic (name, last_seq) VALUES ('hdfs', 0);
+INSERT INTO statistic (name, last_seq) VALUES ('quota', 0);
+INSERT INTO statistic (name, last_seq) VALUES ('jobstat', 0);
+
+DELIMITER //
+
+CREATE TRIGGER bi_measure BEFORE INSERT ON measure
+FOR EACH ROW BEGIN
+  SET NEW.seq=(SELECT last_seq+1 FROM statistic s WHERE NEW.name=s.name);
+END; //
+
+CREATE TRIGGER ai_measure AFTER INSERT ON measure
+FOR EACH ROW BEGIN
+  UPDATE statistic s SET s.last_seq=s.last_seq+1, s.last_id_measure=NEW.id_measure WHERE s.name=NEW.name;
+END; //
+
+-- not needed, id_measure should be always specified
+CREATE TRIGGER ai_hdfs BEFORE INSERT ON hdfs
+FOR EACH ROW BEGIN
+  IF NEW.id_measure IS NULL OR NEW.id_measure=0 THEN
+    INSERT INTO measure (name) VALUES ('hdfs');
+    SET NEW.id_measure=last_insert_id();
+  END IF;
+END; //
+
+DELIMITER ;
+
+CREATE VIEW view_measures AS SELECT m.* FROM measure m, statistic s WHERE s.last_id_measure = m.id_measure;
+CREATE VIEW view_hdfs AS SELECT m.seq, m.time, h.hostname, h.full, h.disk, h.disk_used, h.disk_free, h.block_under, h.block_corrupt, h.block_missing FROM hdfs h, measure m WHERE h.id_measure=m.id_measure;
+CREATE VIEW view_quotas AS SELECT m.seq, m.time, q.user, q.used FROM quota q, measure m WHERE q.id_measure=m.id_measure;
+CREATE VIEW view_jobstat AS SELECT m.seq, m.time, m.start, m.end, j.user, j.total, j.fails, j.wait_min, j.wait_avg, j.wait_max FROM jobstat j, measure m WHERE j.id_measure=m.id_measure;
diff --git a/files/accounting/hdfs.awk b/files/accounting/hdfs.awk
new file mode 100644
index 0000000..94e406b
--- /dev/null
+++ b/files/accounting/hdfs.awk
@@ -0,0 +1,64 @@
+#
+# Parsing output of:
+#
+# hdfs dfsadmin -report
+#
+
+function dbstr(s) {
+	if (s) { return "'" s "'" }
+	else { return "NULL" }
+}
+
+function dbi(i) {
+	if (i >= 0) { return i }
+	else { return "NULL" }
+}
+
+function reset() {
+	name="(none)";
+	full=-1;
+	disk=-1;
+	disk_free=-1;
+	disk_used=-1;
+	block_under=-1;
+	block_corrupt=-1;
+	block_missing=-1;
+#	cache=-1;
+#	cache_used=-1;
+#	cache_free=-1;
+}
+
+BEGIN {
+	reset();
+	name="all";
+	state=-1;
+
+	FS="[: ]+";
+	CONVFMT="%d";
+
+	print "INSERT INTO measure (name) VALUES ('hdfs');";
+}
+
+/^Live datanodes.*/		{state=1}
+/^Dead datanodes.*/		{state=2}
+/^Decommissioned .*/		{state=3}
+
+/^Hostname:.*/			{name=$2}
+/^Name:.*/			{ip=$2}
+/^Configured Capacity:.*/	{full=$3}
+/^Present Capacity:.*/		{disk=$3}
+/^DFS Remaining:.*/		{disk_free=$3}
+/^DFS Used:.*/			{disk_used=$3}
+/^Under replicated blocks:.*/	{block_under=$4}
+/^Blocks with corrupt replicas:.*/	{block_corrupt=$5}
+/^Missing blocks:.*/		{block_missing=$3}
+#/^Configured Cache Capacity:.*/{cache=$4}
+#/^Cache Used:.*/		{cache_used=$3}
+#/^Cache Remaining:.*/		{cache_free=$3}
+
+/^$/ {
+	if (name != "(none)" && ip !~ /^10\./) {
+		print "INSERT INTO hdfs (id_measure, hostname, state, full, disk, disk_free, disk_used, block_under, block_corrupt, block_missing) VALUES (last_insert_id(), " dbstr(name) ", " dbi(state) ", " dbi(full) ", IFNULL(" dbi(disk) ", " dbi(disk_free) " + " dbi(disk_used) "), " dbi(disk_free) ", " dbi(disk_used) ", " dbi(block_under) ", " dbi(block_corrupt) ", " dbi(block_missing) ");";
+	}
+	reset()
+}
diff --git a/manifests/accounting.pp b/manifests/accounting.pp
new file mode 100644
index 0000000..0b4341c
--- /dev/null
+++ b/manifests/accounting.pp
@@ -0,0 +1,90 @@
+# == Class site_hadoop::accounting
+#
+# Requires:
+# * database
+# * hdfs user and group (=hadoop)
+#
+# For example using puppetlabs-mysql and cesnet-hadoop:
+#
+#    mysql::db { 'accounting':
+#      user     => 'accounting',
+#      password => 'accpass',
+#      host     => 'localhost',
+#      grant    => ['SELECT', 'INSERT', 'UPDATE', 'DELETE'],
+#      sql      => '/usr/local/share/hadoop/accounting.sql',
+#    }
+#
+#    Class['site_hadoop::accounting'] -> Mysql::Db['accounting']
+#    Class['hadoop::nameserver::install'] -> Class['site_hadoop::accounting']
+#
+# === Parameters
+#
+# [*email*] undef
+#
+# Email to send errors from cron.
+#
+# [*hdfs*] undef
+#
+# Enable storing global HDFS disk and data statistics. The value is time in the cron format. See *man 5 crontab*.
+#
+class site_hadoop::accounting(
+  $email = undef,
+  $hdfs  = undef,
+) {
+  file {'/usr/local/bin/accounting-hdfs':
+    owner  => 'root',
+    group  => 'root',
+    mode   => '0755',
+    content => template('site_hadoop/accounting/hdfs.sh.erb'),
+  }
+
+  file{'/usr/local/share/hadoop':
+    ensure  => 'directory',
+    owner => 'root',
+    group => 'root',
+    mode  => '0755',
+  }
+  ->
+  file {'/usr/local/share/hadoop/accounting-hdfs.awk':
+    owner => 'root',
+    group => 'root',
+    mode  => '0644',
+    source => 'puppet:///modules/site_hadoop/accounting/hdfs.awk',
+  }
+
+  file{'/usr/local/share/hadoop/accounting.sql':
+    owner => 'root',
+    group => 'root',
+    mode  => '0644',
+    source => 'puppet:///modules/site_hadoop/accounting/create.sql',
+  }
+
+  $db_name = $site_hadoop::db_name
+  $db_user = $site_hadoop::db_user
+  $db_password = $site_hadoop::db_password
+  if $db_name or $db_user or $db_password {
+    file{"${site_hadoop::defaultconfdir}/hadoop-accounting":
+      owner  => 'hdfs',
+      group  => 'hdfs',
+      mode   => '0400',
+      content => template('site_hadoop/accounting/hadoop-accounting.erb'),
+    }
+  } else {
+    file{"${site_hadoop::defaultconfdir}/hadoop-accounting":
+      ensure => 'absent',
+    }
+  }
+
+  if $hdfs {
+    file{'/etc/cron.d/accounting-hdfs':
+      owner => 'root',
+      group => 'root',
+      mode  => '0644',
+      content => template('site_hadoop/accounting/cron-hdfs.erb'),
+    }
+  } else {
+    file{'/etc/cron.d/accounting-hdfs':
+      ensure => 'absent',
+    }
+  }
+}
diff --git a/manifests/init.pp b/manifests/init.pp
index 12f2c31..49be157 100644
--- a/manifests/init.pp
+++ b/manifests/init.pp
@@ -3,6 +3,9 @@
 # Basic system configurations for Hadoop cluster on Meta.
 #
 class site_hadoop (
+  $db_name = undef,
+  $db_user = undef,
+  $db_password = undef,
   $mirror = $site_hadoop::params::mirror,
 ) inherits site_hadoop::params {
   include 'site_hadoop::install'
diff --git a/manifests/params.pp b/manifests/params.pp
index f061ad9..33870ad 100644
--- a/manifests/params.pp
+++ b/manifests/params.pp
@@ -1,4 +1,9 @@
 class site_hadoop::params {
+  $defaultconfdir = $::osfamily ? {
+    debian => '/etc/default',
+    redhat => '/etc/sysconfig',
+  }
+
   case $::osfamily {
     'Debian': {
       case $::lsbdistcodename {
diff --git a/templates/accounting/cron-hdfs.erb b/templates/accounting/cron-hdfs.erb
new file mode 100644
index 0000000..88a7134
--- /dev/null
+++ b/templates/accounting/cron-hdfs.erb
@@ -0,0 +1,5 @@
+<% if @email -%>
+MAILTO='<%= @email -%>'
+
+<% end -%>
+<%= @hdfs -%>	hdfs	/usr/local/bin/accounting-hdfs
diff --git a/templates/accounting/hadoop-accounting.erb b/templates/accounting/hadoop-accounting.erb
new file mode 100644
index 0000000..314e728
--- /dev/null
+++ b/templates/accounting/hadoop-accounting.erb
@@ -0,0 +1,9 @@
+<% if @db_name -%>
+MYSQL_DB='<%= @db_name -%>'
+<% end -%>
+<% if @db_user -%>
+MYSQL_USER='<%= @db_user -%>'
+<% end -%>
+<% if @db_password -%>
+MYSQL_PASSWORD='<%= @db_password -%>'
+<% end -%>
diff --git a/templates/accounting/hdfs.sh.erb b/templates/accounting/hdfs.sh.erb
new file mode 100755
index 0000000..7739754
--- /dev/null
+++ b/templates/accounting/hdfs.sh.erb
@@ -0,0 +1,27 @@
+#! /bin/sh -e
+
+PREFIX='/usr/local'
+DEFAULTDIR='<%= scope.lookupvar('site_hadoop::defaultconfdir') -%>'
+export KRB5CCNAME='FILE:/tmp/krb5cc_hdfs_stat'
+KEYTAB='FILE:/etc/security/keytab/nn.service.keytab'
+PRINCIPAL="nn/`hostname -f`"
+MYSQL_DB='accounting'
+MYSQL_USER='root'
+MYSQL_PASSWORD=''
+
+if test -f ${DEFAULTDIR}/hadoop-accounting; then
+ . ${DEFAULTDIR}/hadoop-accounting
+fi
+
+if test -n "${PRINCIPAL}"; then
+  kinit -k -t ${KEYTAB} -l 5m ${PRINCIPAL}
+fi
+
+rm -f /tmp/accounting.hdfs.txt
+hdfs dfsadmin -report >/tmp/accounting.hdfs.txt
+
+if test -n "${PRINCIPAL}"; then
+  kdestroy
+fi
+
+cat /tmp/accounting.hdfs.txt | awk -f ${PREFIX}/share/hadoop/accounting-hdfs.awk | mysql --user ${MYSQL_USER} --password=${MYSQL_PASSWORD} ${MYSQL_DB}
-- 
1.8.2.3