488 lines
19 KiB
Python
Executable File
488 lines
19 KiB
Python
Executable File
#!/usr/bin/python2
|
|
"""
|
|
Postprocessing module for IOzone. It is capable to pick results from an
|
|
IOzone run, calculate the geometric mean for all throughput results for
|
|
a given file size or record size, and then generate a series of 2D and 3D
|
|
graphs. The graph generation functionality depends on gnuplot, and if it
|
|
is not present, functionality degrates gracefully.
|
|
|
|
@copyright: Red Hat 2010
|
|
"""
|
|
import os, sys, optparse, logging, math, time
|
|
import common
|
|
from autotest_lib.client.common_lib import logging_config, logging_manager
|
|
from autotest_lib.client.common_lib import error
|
|
from autotest_lib.client.bin import utils, os_dep
|
|
|
|
|
|
_LABELS = ['file_size', 'record_size', 'write', 'rewrite', 'read', 'reread',
|
|
'randread', 'randwrite', 'bkwdread', 'recordrewrite', 'strideread',
|
|
'fwrite', 'frewrite', 'fread', 'freread']
|
|
|
|
|
|
def unique(list):
|
|
"""
|
|
Return a list of the elements in list, but without duplicates.
|
|
|
|
@param list: List with values.
|
|
@return: List with non duplicate elements.
|
|
"""
|
|
n = len(list)
|
|
if n == 0:
|
|
return []
|
|
u = {}
|
|
try:
|
|
for x in list:
|
|
u[x] = 1
|
|
except TypeError:
|
|
return None
|
|
else:
|
|
return u.keys()
|
|
|
|
|
|
def geometric_mean(values):
|
|
"""
|
|
Evaluates the geometric mean for a list of numeric values.
|
|
|
|
@param values: List with values.
|
|
@return: Single value representing the geometric mean for the list values.
|
|
@see: http://en.wikipedia.org/wiki/Geometric_mean
|
|
"""
|
|
try:
|
|
values = [int(value) for value in values]
|
|
except ValueError:
|
|
return None
|
|
product = 1
|
|
n = len(values)
|
|
if n == 0:
|
|
return None
|
|
return math.exp(sum([math.log(x) for x in values])/n)
|
|
|
|
|
|
def compare_matrices(matrix1, matrix2, treshold=0.05):
|
|
"""
|
|
Compare 2 matrices nxm and return a matrix nxm with comparison data
|
|
|
|
@param matrix1: Reference Matrix with numeric data
|
|
@param matrix2: Matrix that will be compared
|
|
@param treshold: Any difference bigger than this percent treshold will be
|
|
reported.
|
|
"""
|
|
improvements = 0
|
|
regressions = 0
|
|
same = 0
|
|
comparison_matrix = []
|
|
|
|
new_matrix = []
|
|
for line1, line2 in zip(matrix1, matrix2):
|
|
new_line = []
|
|
for element1, element2 in zip(line1, line2):
|
|
ratio = float(element2) / float(element1)
|
|
if ratio < (1 - treshold):
|
|
regressions += 1
|
|
new_line.append((100 * ratio - 1) - 100)
|
|
elif ratio > (1 + treshold):
|
|
improvements += 1
|
|
new_line.append("+" + str((100 * ratio - 1) - 100))
|
|
else:
|
|
same + 1
|
|
if line1.index(element1) == 0:
|
|
new_line.append(element1)
|
|
else:
|
|
new_line.append(".")
|
|
new_matrix.append(new_line)
|
|
|
|
total = improvements + regressions + same
|
|
|
|
return (new_matrix, improvements, regressions, total)
|
|
|
|
|
|
class IOzoneAnalyzer(object):
|
|
"""
|
|
Analyze an unprocessed IOzone file, and generate the following types of
|
|
report:
|
|
|
|
* Summary of throughput for all file and record sizes combined
|
|
* Summary of throughput for all file sizes
|
|
* Summary of throughput for all record sizes
|
|
|
|
If more than one file is provided to the analyzer object, a comparison
|
|
between the two runs is made, searching for regressions in performance.
|
|
"""
|
|
def __init__(self, list_files, output_dir):
|
|
self.list_files = list_files
|
|
if not os.path.isdir(output_dir):
|
|
os.makedirs(output_dir)
|
|
self.output_dir = output_dir
|
|
logging.info("Results will be stored in %s", output_dir)
|
|
|
|
|
|
def average_performance(self, results, size=None):
|
|
"""
|
|
Flattens a list containing performance results.
|
|
|
|
@param results: List of n lists containing data from performance runs.
|
|
@param size: Numerical value of a size (say, file_size) that was used
|
|
to filter the original results list.
|
|
@return: List with 1 list containing average data from the performance
|
|
run.
|
|
"""
|
|
average_line = []
|
|
if size is not None:
|
|
average_line.append(size)
|
|
for i in range(2, 15):
|
|
average = geometric_mean([line[i] for line in results]) / 1024.0
|
|
average = int(average)
|
|
average_line.append(average)
|
|
return average_line
|
|
|
|
|
|
def process_results(self, results, label=None):
|
|
"""
|
|
Process a list of IOzone results according to label.
|
|
|
|
@label: IOzone column label that we'll use to filter and compute
|
|
geometric mean results, in practical term either 'file_size'
|
|
or 'record_size'.
|
|
@result: A list of n x m columns with original iozone results.
|
|
@return: A list of n-? x (m-1) columns with geometric averages for
|
|
values of each label (ex, average for all file_sizes).
|
|
"""
|
|
performance = []
|
|
if label is not None:
|
|
index = _LABELS.index(label)
|
|
sizes = unique([line[index] for line in results])
|
|
sizes.sort()
|
|
for size in sizes:
|
|
r_results = [line for line in results if line[index] == size]
|
|
performance.append(self.average_performance(r_results, size))
|
|
else:
|
|
performance.append(self.average_performance(results))
|
|
|
|
return performance
|
|
|
|
|
|
def parse_file(self, file):
|
|
"""
|
|
Parse an IOzone results file.
|
|
|
|
@param file: File object that will be parsed.
|
|
@return: Matrix containing IOzone results extracted from the file.
|
|
"""
|
|
lines = []
|
|
for line in file.readlines():
|
|
fields = line.split()
|
|
if len(fields) != 15:
|
|
continue
|
|
try:
|
|
lines.append([int(i) for i in fields])
|
|
except ValueError:
|
|
continue
|
|
return lines
|
|
|
|
|
|
def report(self, overall_results, record_size_results, file_size_results):
|
|
"""
|
|
Generates analysis data for IOZone run.
|
|
|
|
Generates a report to both logs (where it goes with nice headers) and
|
|
output files for further processing (graph generation).
|
|
|
|
@param overall_results: 1x15 Matrix containing IOzone results for all
|
|
file sizes
|
|
@param record_size_results: nx15 Matrix containing IOzone results for
|
|
each record size tested.
|
|
@param file_size_results: nx15 Matrix containing file size results
|
|
for each file size tested.
|
|
"""
|
|
# Here we'll use the logging system to put the output of our analysis
|
|
# to files
|
|
logger = logging.getLogger()
|
|
formatter = logging.Formatter("")
|
|
|
|
logging.info("")
|
|
logging.info("TABLE: SUMMARY of ALL FILE and RECORD SIZES Results in MB/sec")
|
|
logging.info("")
|
|
logging.info("FILE & RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE")
|
|
logging.info("SIZES (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ")
|
|
logging.info("-------------------------------------------------------------------------------------------------------------------")
|
|
for result_line in overall_results:
|
|
logging.info("ALL %-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line))
|
|
logging.info("")
|
|
|
|
logging.info("DRILLED DATA:")
|
|
|
|
logging.info("")
|
|
logging.info("TABLE: RECORD Size against all FILE Sizes Results in MB/sec")
|
|
logging.info("")
|
|
logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ")
|
|
logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ")
|
|
logging.info("--------------------------------------------------------------------------------------------------------------")
|
|
|
|
foutput_path = os.path.join(self.output_dir, '2d-datasource-file')
|
|
if os.path.isfile(foutput_path):
|
|
os.unlink(foutput_path)
|
|
foutput = logging.FileHandler(foutput_path)
|
|
foutput.setFormatter(formatter)
|
|
logger.addHandler(foutput)
|
|
for result_line in record_size_results:
|
|
logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line))
|
|
logger.removeHandler(foutput)
|
|
|
|
logging.info("")
|
|
|
|
logging.info("")
|
|
logging.info("TABLE: FILE Size against all RECORD Sizes Results in MB/sec")
|
|
logging.info("")
|
|
logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ")
|
|
logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ")
|
|
logging.info("--------------------------------------------------------------------------------------------------------------")
|
|
|
|
routput_path = os.path.join(self.output_dir, '2d-datasource-record')
|
|
if os.path.isfile(routput_path):
|
|
os.unlink(routput_path)
|
|
routput = logging.FileHandler(routput_path)
|
|
routput.setFormatter(formatter)
|
|
logger.addHandler(routput)
|
|
for result_line in file_size_results:
|
|
logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line))
|
|
logger.removeHandler(routput)
|
|
|
|
logging.info("")
|
|
|
|
|
|
def report_comparison(self, record, file):
|
|
"""
|
|
Generates comparison data for 2 IOZone runs.
|
|
|
|
It compares 2 sets of nxm results and outputs a table with differences.
|
|
If a difference higher or smaller than 5% is found, a warning is
|
|
triggered.
|
|
|
|
@param record: Tuple with 4 elements containing results for record size.
|
|
@param file: Tuple with 4 elements containing results for file size.
|
|
"""
|
|
(record_size, record_improvements, record_regressions,
|
|
record_total) = record
|
|
(file_size, file_improvements, file_regressions,
|
|
file_total) = file
|
|
logging.info("ANALYSIS of DRILLED DATA:")
|
|
|
|
logging.info("")
|
|
logging.info("TABLE: RECsize Difference between runs Results are % DIFF")
|
|
logging.info("")
|
|
logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ")
|
|
logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ")
|
|
logging.info("--------------------------------------------------------------------------------------------------------------")
|
|
for result_line in record_size:
|
|
logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line))
|
|
logging.info("REGRESSIONS: %d (%.2f%%) Improvements: %d (%.2f%%)",
|
|
record_regressions,
|
|
(100 * record_regressions/float(record_total)),
|
|
record_improvements,
|
|
(100 * record_improvements/float(record_total)))
|
|
logging.info("")
|
|
|
|
logging.info("")
|
|
logging.info("TABLE: FILEsize Difference between runs Results are % DIFF")
|
|
logging.info("")
|
|
logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ")
|
|
logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ")
|
|
logging.info("--------------------------------------------------------------------------------------------------------------")
|
|
for result_line in file_size:
|
|
logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line))
|
|
logging.info("REGRESSIONS: %d (%.2f%%) Improvements: %d (%.2f%%)",
|
|
file_regressions,
|
|
(100 * file_regressions/float(file_total)),
|
|
file_improvements,
|
|
(100 * file_improvements/float(file_total)))
|
|
logging.info("")
|
|
|
|
|
|
def analyze(self):
|
|
"""
|
|
Analyzes and eventually compares sets of IOzone data.
|
|
"""
|
|
overall = []
|
|
record_size = []
|
|
file_size = []
|
|
for path in self.list_files:
|
|
file = open(path, 'r')
|
|
logging.info('FILE: %s', path)
|
|
|
|
results = self.parse_file(file)
|
|
|
|
overall_results = self.process_results(results)
|
|
record_size_results = self.process_results(results, 'record_size')
|
|
file_size_results = self.process_results(results, 'file_size')
|
|
self.report(overall_results, record_size_results, file_size_results)
|
|
|
|
if len(self.list_files) == 2:
|
|
overall.append(overall_results)
|
|
record_size.append(record_size_results)
|
|
file_size.append(file_size_results)
|
|
|
|
if len(self.list_files) == 2:
|
|
record_comparison = compare_matrices(*record_size)
|
|
file_comparison = compare_matrices(*file_size)
|
|
self.report_comparison(record_comparison, file_comparison)
|
|
|
|
|
|
class IOzonePlotter(object):
|
|
"""
|
|
Plots graphs based on the results of an IOzone run.
|
|
|
|
Plots graphs based on the results of an IOzone run. Uses gnuplot to
|
|
generate the graphs.
|
|
"""
|
|
def __init__(self, results_file, output_dir):
|
|
self.active = True
|
|
try:
|
|
self.gnuplot = os_dep.command("gnuplot")
|
|
except:
|
|
logging.error("Command gnuplot not found, disabling graph "
|
|
"generation")
|
|
self.active = False
|
|
|
|
if not os.path.isdir(output_dir):
|
|
os.makedirs(output_dir)
|
|
self.output_dir = output_dir
|
|
|
|
if not os.path.isfile(results_file):
|
|
logging.error("Invalid file %s provided, disabling graph "
|
|
"generation", results_file)
|
|
self.active = False
|
|
self.results_file = None
|
|
else:
|
|
self.results_file = results_file
|
|
self.generate_data_source()
|
|
|
|
|
|
def generate_data_source(self):
|
|
"""
|
|
Creates data file without headers for gnuplot consumption.
|
|
"""
|
|
results_file = open(self.results_file, 'r')
|
|
self.datasource = os.path.join(self.output_dir, '3d-datasource')
|
|
datasource = open(self.datasource, 'w')
|
|
for line in results_file.readlines():
|
|
fields = line.split()
|
|
if len(fields) != 15:
|
|
continue
|
|
try:
|
|
values = [int(i) for i in fields]
|
|
datasource.write(line)
|
|
except ValueError:
|
|
continue
|
|
datasource.close()
|
|
|
|
|
|
def plot_2d_graphs(self):
|
|
"""
|
|
For each one of the throughput parameters, generate a set of gnuplot
|
|
commands that will create a parametric surface with file size vs.
|
|
record size vs. throughput.
|
|
"""
|
|
datasource_2d = os.path.join(self.output_dir, '2d-datasource-file')
|
|
for index, label in zip(range(2, 15), _LABELS[2:]):
|
|
commands_path = os.path.join(self.output_dir, '2d-%s.do' % label)
|
|
commands = ""
|
|
commands += "set title 'Iozone performance: %s'\n" % label
|
|
commands += "set logscale x\n"
|
|
commands += "set xlabel 'File size (KB)'\n"
|
|
commands += "set ylabel 'Througput (MB/s)'\n"
|
|
commands += "set terminal png small size 450 350\n"
|
|
commands += "set output '%s'\n" % os.path.join(self.output_dir,
|
|
'2d-%s.png' % label)
|
|
commands += ("plot '%s' using 1:%s title '%s' with lines \n" %
|
|
(datasource_2d, index, label))
|
|
commands_file = open(commands_path, 'w')
|
|
commands_file.write(commands)
|
|
commands_file.close()
|
|
try:
|
|
utils.system("%s %s" % (self.gnuplot, commands_path))
|
|
except error.CmdError:
|
|
logging.error("Problem plotting from commands file %s",
|
|
commands_path)
|
|
|
|
|
|
def plot_3d_graphs(self):
|
|
"""
|
|
For each one of the throughput parameters, generate a set of gnuplot
|
|
commands that will create a parametric surface with file size vs.
|
|
record size vs. throughput.
|
|
"""
|
|
for index, label in zip(range(1, 14), _LABELS[2:]):
|
|
commands_path = os.path.join(self.output_dir, '%s.do' % label)
|
|
commands = ""
|
|
commands += "set title 'Iozone performance: %s'\n" % label
|
|
commands += "set grid lt 2 lw 1\n"
|
|
commands += "set surface\n"
|
|
commands += "set parametric\n"
|
|
commands += "set xtics\n"
|
|
commands += "set ytics\n"
|
|
commands += "set logscale x 2\n"
|
|
commands += "set logscale y 2\n"
|
|
commands += "set logscale z\n"
|
|
commands += "set xrange [2.**5:2.**24]\n"
|
|
commands += "set xlabel 'File size (KB)'\n"
|
|
commands += "set ylabel 'Record size (KB)'\n"
|
|
commands += "set zlabel 'Througput (KB/s)'\n"
|
|
commands += "set data style lines\n"
|
|
commands += "set dgrid3d 80,80, 3\n"
|
|
commands += "set terminal png small size 900 700\n"
|
|
commands += "set output '%s'\n" % os.path.join(self.output_dir,
|
|
'%s.png' % label)
|
|
commands += ("splot '%s' using 1:2:%s title '%s'\n" %
|
|
(self.datasource, index, label))
|
|
commands_file = open(commands_path, 'w')
|
|
commands_file.write(commands)
|
|
commands_file.close()
|
|
try:
|
|
utils.system("%s %s" % (self.gnuplot, commands_path))
|
|
except error.CmdError:
|
|
logging.error("Problem plotting from commands file %s",
|
|
commands_path)
|
|
|
|
|
|
def plot_all(self):
|
|
"""
|
|
Plot all graphs that are to be plotted, provided that we have gnuplot.
|
|
"""
|
|
if self.active:
|
|
self.plot_2d_graphs()
|
|
self.plot_3d_graphs()
|
|
|
|
|
|
class AnalyzerLoggingConfig(logging_config.LoggingConfig):
|
|
def configure_logging(self, results_dir=None, verbose=False):
|
|
super(AnalyzerLoggingConfig, self).configure_logging(use_console=True,
|
|
verbose=verbose)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = optparse.OptionParser("usage: %prog [options] [filenames]")
|
|
options, args = parser.parse_args()
|
|
|
|
logging_manager.configure_logging(AnalyzerLoggingConfig())
|
|
|
|
if args:
|
|
filenames = args
|
|
else:
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
|
|
if len(args) > 2:
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
|
|
o = os.path.join(os.getcwd(),
|
|
"iozone-graphs-%s" % time.strftime('%Y-%m-%d-%H.%M.%S'))
|
|
if not os.path.isdir(o):
|
|
os.makedirs(o)
|
|
|
|
a = IOzoneAnalyzer(list_files=filenames, output_dir=o)
|
|
a.analyze()
|
|
p = IOzonePlotter(results_file=filenames[0], output_dir=o)
|
|
p.plot_all()
|