if header:
usages = list()
usages.append('time')
- usages.append('mapred')
+ usages.append('n')
+ usages.append('map')
+ usages.append('reduce')
for nodeid in sorted(nodes.keys()):
usages.append(nodes[nodeid])
print "\t".join(usages)
for nodeid in sorted(nodes.keys()):
nodes_usage[nodeid] = list(0 for i in range(0, imax - imin))
nodes_subjobs[nodeid] = list(0 for i in range(0, imax - imin))
+map_usage = list(0 for i in range(0, imax - imin))
+reduce_usage = list(0 for i in range(0, imax - imin))
# SELECT * FROM subjobs WHERE (finish >= %s OR finish = 0 OR finish IS NULL) AND start <= %s', (start, end)
st = db.cursor(MySQLdb.cursors.SSDictCursor)
nodes_usage[data['nodeid']][i - imin] += usage;
nodes_subjobs[data['nodeid']][i - imin] += 1;
+ if data['type'] == 'MAP':
+ map_usage[i - imin] += usage
+ elif data['type'] == 'REDUCE':
+ reduce_usage[i - imin] += usage
+
+
data = st.fetchone()
st.close()
usages.append("%s" % nodes_usage[nodeid][i])
subjobs += nodes_subjobs[nodeid][i]
#print '#%s' % (int(i * 1000.0 * interval + min))
- print '%s\t%d\t%s' % (strp(i * 1000 * interval + min), subjobs, "\t".join(usages))
+ print '%s\t%d\t%s\t%s\t%s' % (strp(i * 1000 * interval + min), subjobs, map_usage[i], reduce_usage[i], "\t".join(usages))
--- /dev/null
+#! /usr/bin/gnuplot -p
+
+set title "Hadoop: Map/Reduce Jobs Summary" font ',16'
+set xdata time
+set timefmt "%Y-%m-%d %H:%M:%S"
+set format x "%Y-%m-%d"
+
+set xtics rotate by -20
+
+#plot for [i=3:25] 'nodes.dat' using 1:(sum [col=4:i+1] column(col)) with lines notitle
+plot for [i=26:26] 'nodes.dat' using 1:(sum [col=6:i+1] column(col)) with lines title 'jobs (max 384)'
#! /usr/bin/gnuplot -p
-set title "Hadoop: Map/Reduce Jobs Summary" font ',16'
+set title "Hadoop: Map/Reduce Tasks Summary" font ',16'
set xdata time
set timefmt "%Y-%m-%d %H:%M:%S"
set format x "%Y-%m-%d"
+set yrange [0:384]
set xtics rotate by -20
-#plot for [i=3:25] 'nodes.dat' using 1:(sum [col=4:i+1] column(col)) with lines notitle
-plot for [i=26:26] 'nodes.dat' using 1:(sum [col=4:i+1] column(col)) with lines title 'jobs (max 384)'
+#plot for [i=3:4] 'nodes.dat' using 1:(sum [col=4:i+1] column(col)) with lines title 'jobs (max 384)'
+
+plot for [i=3:4] 'nodes.dat' using 1:(sum [col=4:i+1] column(col)) with lines title column(i)