From 491d8b4696f49afbcd83c08b0af198c95f3c0eef Mon Sep 17 00:00:00 2001 From: =?utf8?q?Franti=C5=A1ek=20Dvo=C5=99=C3=A1k?= Date: Sat, 17 Jan 2015 13:13:47 +0100 Subject: [PATCH] Implement statistics gathering. The first to watch are HDFS disks and data. --- files/accounting/create.sql | 126 +++++++++++++++++++++++++++++ files/accounting/hdfs.awk | 64 +++++++++++++++ manifests/accounting.pp | 90 +++++++++++++++++++++ manifests/init.pp | 3 + manifests/params.pp | 5 ++ templates/accounting/cron-hdfs.erb | 5 ++ templates/accounting/hadoop-accounting.erb | 9 +++ templates/accounting/hdfs.sh.erb | 27 +++++++ 8 files changed, 329 insertions(+) create mode 100644 files/accounting/create.sql create mode 100644 files/accounting/hdfs.awk create mode 100644 manifests/accounting.pp create mode 100644 templates/accounting/cron-hdfs.erb create mode 100644 templates/accounting/hadoop-accounting.erb create mode 100755 templates/accounting/hdfs.sh.erb diff --git a/files/accounting/create.sql b/files/accounting/create.sql new file mode 100644 index 0000000..6631f5b --- /dev/null +++ b/files/accounting/create.sql @@ -0,0 +1,126 @@ +-- +-- Accounting for Hadoop +-- +-- How to add values: +-- +-- INSERT INTO measure (name) VALUES ('quota'); +-- INSERT INTO quota (id_measure, user, used) VALUES (last_insert_id(), 'valtri', 17); +-- INSERT INTO quota (id_measure, user, used) VALUES (last_insert_id(), 'nemo', 1); +-- +-- INSERT INTO hdfs (full, disk, disk_used) VALUES (1024, 1023, 10); +-- or: +-- INSERT INTO measure (name) VALUES ('hdfs'); +-- INSERT INTO hdfs (id_measure, full, disk, disk_used, block_under, block_corrupt, block_missing) VALUES (last_insert_id(), 10240, 10230, 100, 0, 0, 0); +-- INSERT INTO hdfs (id_measure, hostname, state, full, disk, disk_used) VALUES (last_insert_id(), 'hador1', 1, 1024, 1023, 10); +-- +-- INSERT INTO measure (name, start, end) VALUES ('jobstat', '2015-01-16', '2015-01-17'); +-- INSERT INTO jobstat (id_measure, user, total, fails, wait_min, wait_avg, wait_max) VALUES (last_insert_id(), 'valtri', 10, 2, 0, 50, 100); +-- INSERT INTO jobstat (id_measure, user, total, fails, wait_min, wait_avg, wait_max) VALUES (last_insert_id(), 'nemo', 0, 0, NULL, NULL, NULL); +-- +-- How to read values: +-- +-- a) all history +-- +-- SELECT * FROM view_hdfs; +-- SELECT * FROM view_quotas; +-- +-- b) current values +-- +-- SELECT h.* FROM view_hdfs h, statistic s WHERE h.seq=s.last_seq; +-- SELECT h.* FROM view_hdfs h, statistic s WHERE h.seq=s.last_seq; +-- + +CREATE TABLE statistic ( + name CHAR(8) NOT NULL, + last_id_measure INTEGER, + last_seq INTEGER, + + INDEX (last_id_measure), + INDEX (last_seq) +); + +CREATE TABLE measure ( + id_measure INTEGER AUTO_INCREMENT PRIMARY KEY, + name CHAR(8) NOT NULL, + seq INTEGER NOT NULL, + time TIMESTAMP DEFAULT NOW(), + start TIMESTAMP NULL DEFAULT NULL, + end TIMESTAMP NULL DEFAULT NULL, + + INDEX (id_measure), + INDEX (name), + INDEX (seq) +); + +CREATE TABLE hdfs ( + id_measure INTEGER NOT NULL, + hostname CHAR(50), + state INTEGER, + full BIGINT, + disk BIGINT, + disk_used BIGINT, + disk_free BIGINT, + block_under INTEGER, + block_corrupt INTEGER, + block_missing INTEGER, + + CONSTRAINT PRIMARY KEY (id_measure, hostname), + INDEX(id_measure), + INDEX(hostname) +); + +CREATE TABLE quota ( + id_measure INTEGER NOT NULL, + user CHAR(20) NOT NULL, + used BIGINT, + + CONSTRAINT PRIMARY KEY (id_measure, user), + INDEX(id_measure), + INDEX(user) +); + +CREATE TABLE jobstat ( + id_measure INTEGER NOT NULL, + user CHAR(20) NULL, + total INTEGER, + fails INTEGER, + wait_min INTEGER, + wait_avg INTEGER, + wait_max INTEGER, + + CONSTRAINT PRIMARY KEY (id_measure, user), + INDEX(id_measure), + INDEX(user) +); + +INSERT INTO statistic (name, last_seq) VALUES ('hdfs', 0); +INSERT INTO statistic (name, last_seq) VALUES ('quota', 0); +INSERT INTO statistic (name, last_seq) VALUES ('jobstat', 0); + +DELIMITER // + +CREATE TRIGGER bi_measure BEFORE INSERT ON measure +FOR EACH ROW BEGIN + SET NEW.seq=(SELECT last_seq+1 FROM statistic s WHERE NEW.name=s.name); +END; // + +CREATE TRIGGER ai_measure AFTER INSERT ON measure +FOR EACH ROW BEGIN + UPDATE statistic s SET s.last_seq=s.last_seq+1, s.last_id_measure=NEW.id_measure WHERE s.name=NEW.name; +END; // + +-- not needed, id_measure should be always specified +CREATE TRIGGER ai_hdfs BEFORE INSERT ON hdfs +FOR EACH ROW BEGIN + IF NEW.id_measure IS NULL OR NEW.id_measure=0 THEN + INSERT INTO measure (name) VALUES ('hdfs'); + SET NEW.id_measure=last_insert_id(); + END IF; +END; // + +DELIMITER ; + +CREATE VIEW view_measures AS SELECT m.* FROM measure m, statistic s WHERE s.last_id_measure = m.id_measure; +CREATE VIEW view_hdfs AS SELECT m.seq, m.time, h.hostname, h.full, h.disk, h.disk_used, h.disk_free, h.block_under, h.block_corrupt, h.block_missing FROM hdfs h, measure m WHERE h.id_measure=m.id_measure; +CREATE VIEW view_quotas AS SELECT m.seq, m.time, q.user, q.used FROM quota q, measure m WHERE q.id_measure=m.id_measure; +CREATE VIEW view_jobstat AS SELECT m.seq, m.time, m.start, m.end, j.user, j.total, j.fails, j.wait_min, j.wait_avg, j.wait_max FROM jobstat j, measure m WHERE j.id_measure=m.id_measure; diff --git a/files/accounting/hdfs.awk b/files/accounting/hdfs.awk new file mode 100644 index 0000000..94e406b --- /dev/null +++ b/files/accounting/hdfs.awk @@ -0,0 +1,64 @@ +# +# Parsing output of: +# +# hdfs dfsadmin -report +# + +function dbstr(s) { + if (s) { return "'" s "'" } + else { return "NULL" } +} + +function dbi(i) { + if (i >= 0) { return i } + else { return "NULL" } +} + +function reset() { + name="(none)"; + full=-1; + disk=-1; + disk_free=-1; + disk_used=-1; + block_under=-1; + block_corrupt=-1; + block_missing=-1; +# cache=-1; +# cache_used=-1; +# cache_free=-1; +} + +BEGIN { + reset(); + name="all"; + state=-1; + + FS="[: ]+"; + CONVFMT="%d"; + + print "INSERT INTO measure (name) VALUES ('hdfs');"; +} + +/^Live datanodes.*/ {state=1} +/^Dead datanodes.*/ {state=2} +/^Decommissioned .*/ {state=3} + +/^Hostname:.*/ {name=$2} +/^Name:.*/ {ip=$2} +/^Configured Capacity:.*/ {full=$3} +/^Present Capacity:.*/ {disk=$3} +/^DFS Remaining:.*/ {disk_free=$3} +/^DFS Used:.*/ {disk_used=$3} +/^Under replicated blocks:.*/ {block_under=$4} +/^Blocks with corrupt replicas:.*/ {block_corrupt=$5} +/^Missing blocks:.*/ {block_missing=$3} +#/^Configured Cache Capacity:.*/{cache=$4} +#/^Cache Used:.*/ {cache_used=$3} +#/^Cache Remaining:.*/ {cache_free=$3} + +/^$/ { + if (name != "(none)" && ip !~ /^10\./) { + print "INSERT INTO hdfs (id_measure, hostname, state, full, disk, disk_free, disk_used, block_under, block_corrupt, block_missing) VALUES (last_insert_id(), " dbstr(name) ", " dbi(state) ", " dbi(full) ", IFNULL(" dbi(disk) ", " dbi(disk_free) " + " dbi(disk_used) "), " dbi(disk_free) ", " dbi(disk_used) ", " dbi(block_under) ", " dbi(block_corrupt) ", " dbi(block_missing) ");"; + } + reset() +} diff --git a/manifests/accounting.pp b/manifests/accounting.pp new file mode 100644 index 0000000..0b4341c --- /dev/null +++ b/manifests/accounting.pp @@ -0,0 +1,90 @@ +# == Class site_hadoop::accounting +# +# Requires: +# * database +# * hdfs user and group (=hadoop) +# +# For example using puppetlabs-mysql and cesnet-hadoop: +# +# mysql::db { 'accounting': +# user => 'accounting', +# password => 'accpass', +# host => 'localhost', +# grant => ['SELECT', 'INSERT', 'UPDATE', 'DELETE'], +# sql => '/usr/local/share/hadoop/accounting.sql', +# } +# +# Class['site_hadoop::accounting'] -> Mysql::Db['accounting'] +# Class['hadoop::nameserver::install'] -> Class['site_hadoop::accounting'] +# +# === Parameters +# +# [*email*] undef +# +# Email to send errors from cron. +# +# [*hdfs*] undef +# +# Enable storing global HDFS disk and data statistics. The value is time in the cron format. See *man 5 crontab*. +# +class site_hadoop::accounting( + $email = undef, + $hdfs = undef, +) { + file {'/usr/local/bin/accounting-hdfs': + owner => 'root', + group => 'root', + mode => '0755', + content => template('site_hadoop/accounting/hdfs.sh.erb'), + } + + file{'/usr/local/share/hadoop': + ensure => 'directory', + owner => 'root', + group => 'root', + mode => '0755', + } + -> + file {'/usr/local/share/hadoop/accounting-hdfs.awk': + owner => 'root', + group => 'root', + mode => '0644', + source => 'puppet:///modules/site_hadoop/accounting/hdfs.awk', + } + + file{'/usr/local/share/hadoop/accounting.sql': + owner => 'root', + group => 'root', + mode => '0644', + source => 'puppet:///modules/site_hadoop/accounting/create.sql', + } + + $db_name = $site_hadoop::db_name + $db_user = $site_hadoop::db_user + $db_password = $site_hadoop::db_password + if $db_name or $db_user or $db_password { + file{"${site_hadoop::defaultconfdir}/hadoop-accounting": + owner => 'hdfs', + group => 'hdfs', + mode => '0400', + content => template('site_hadoop/accounting/hadoop-accounting.erb'), + } + } else { + file{"${site_hadoop::defaultconfdir}/hadoop-accounting": + ensure => 'absent', + } + } + + if $hdfs { + file{'/etc/cron.d/accounting-hdfs': + owner => 'root', + group => 'root', + mode => '0644', + content => template('site_hadoop/accounting/cron-hdfs.erb'), + } + } else { + file{'/etc/cron.d/accounting-hdfs': + ensure => 'absent', + } + } +} diff --git a/manifests/init.pp b/manifests/init.pp index 12f2c31..49be157 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -3,6 +3,9 @@ # Basic system configurations for Hadoop cluster on Meta. # class site_hadoop ( + $db_name = undef, + $db_user = undef, + $db_password = undef, $mirror = $site_hadoop::params::mirror, ) inherits site_hadoop::params { include 'site_hadoop::install' diff --git a/manifests/params.pp b/manifests/params.pp index f061ad9..33870ad 100644 --- a/manifests/params.pp +++ b/manifests/params.pp @@ -1,4 +1,9 @@ class site_hadoop::params { + $defaultconfdir = $::osfamily ? { + debian => '/etc/default', + redhat => '/etc/sysconfig', + } + case $::osfamily { 'Debian': { case $::lsbdistcodename { diff --git a/templates/accounting/cron-hdfs.erb b/templates/accounting/cron-hdfs.erb new file mode 100644 index 0000000..88a7134 --- /dev/null +++ b/templates/accounting/cron-hdfs.erb @@ -0,0 +1,5 @@ +<% if @email -%> +MAILTO='<%= @email -%>' + +<% end -%> +<%= @hdfs -%> hdfs /usr/local/bin/accounting-hdfs diff --git a/templates/accounting/hadoop-accounting.erb b/templates/accounting/hadoop-accounting.erb new file mode 100644 index 0000000..314e728 --- /dev/null +++ b/templates/accounting/hadoop-accounting.erb @@ -0,0 +1,9 @@ +<% if @db_name -%> +MYSQL_DB='<%= @db_name -%>' +<% end -%> +<% if @db_user -%> +MYSQL_USER='<%= @db_user -%>' +<% end -%> +<% if @db_password -%> +MYSQL_PASSWORD='<%= @db_password -%>' +<% end -%> diff --git a/templates/accounting/hdfs.sh.erb b/templates/accounting/hdfs.sh.erb new file mode 100755 index 0000000..7739754 --- /dev/null +++ b/templates/accounting/hdfs.sh.erb @@ -0,0 +1,27 @@ +#! /bin/sh -e + +PREFIX='/usr/local' +DEFAULTDIR='<%= scope.lookupvar('site_hadoop::defaultconfdir') -%>' +export KRB5CCNAME='FILE:/tmp/krb5cc_hdfs_stat' +KEYTAB='FILE:/etc/security/keytab/nn.service.keytab' +PRINCIPAL="nn/`hostname -f`" +MYSQL_DB='accounting' +MYSQL_USER='root' +MYSQL_PASSWORD='' + +if test -f ${DEFAULTDIR}/hadoop-accounting; then + . ${DEFAULTDIR}/hadoop-accounting +fi + +if test -n "${PRINCIPAL}"; then + kinit -k -t ${KEYTAB} -l 5m ${PRINCIPAL} +fi + +rm -f /tmp/accounting.hdfs.txt +hdfs dfsadmin -report >/tmp/accounting.hdfs.txt + +if test -n "${PRINCIPAL}"; then + kdestroy +fi + +cat /tmp/accounting.hdfs.txt | awk -f ${PREFIX}/share/hadoop/accounting-hdfs.awk | mysql --user ${MYSQL_USER} --password=${MYSQL_PASSWORD} ${MYSQL_DB} -- 1.8.2.3