291 lines
9.1 KiB
Python
291 lines
9.1 KiB
Python
# Copyright 2017 The PDFium Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
"""Classes that draw conclusions out of a comparison and represent them."""
|
|
|
|
from collections import Counter
|
|
|
|
FORMAT_RED = '\033[01;31m{0}\033[00m'
|
|
FORMAT_GREEN = '\033[01;32m{0}\033[00m'
|
|
FORMAT_MAGENTA = '\033[01;35m{0}\033[00m'
|
|
FORMAT_CYAN = '\033[01;36m{0}\033[00m'
|
|
FORMAT_NORMAL = '{0}'
|
|
|
|
RATING_FAILURE = 'failure'
|
|
RATING_REGRESSION = 'regression'
|
|
RATING_IMPROVEMENT = 'improvement'
|
|
RATING_NO_CHANGE = 'no_change'
|
|
RATING_SMALL_CHANGE = 'small_change'
|
|
|
|
RATINGS = [
|
|
RATING_FAILURE, RATING_REGRESSION, RATING_IMPROVEMENT, RATING_NO_CHANGE,
|
|
RATING_SMALL_CHANGE
|
|
]
|
|
|
|
RATING_TO_COLOR = {
|
|
RATING_FAILURE: FORMAT_MAGENTA,
|
|
RATING_REGRESSION: FORMAT_RED,
|
|
RATING_IMPROVEMENT: FORMAT_CYAN,
|
|
RATING_NO_CHANGE: FORMAT_GREEN,
|
|
RATING_SMALL_CHANGE: FORMAT_NORMAL,
|
|
}
|
|
|
|
|
|
class ComparisonConclusions(object):
|
|
"""All conclusions drawn from a comparison.
|
|
|
|
This is initialized empty and then processes pairs of results for each test
|
|
case, determining the rating for that case, which can be:
|
|
"failure" if either or both runs for the case failed.
|
|
"regression" if there is a significant increase in time for the test case.
|
|
"improvement" if there is a significant decrease in time for the test case.
|
|
"no_change" if the time for the test case did not change at all.
|
|
"small_change" if the time for the test case changed but within the threshold.
|
|
"""
|
|
|
|
def __init__(self, threshold_significant):
|
|
"""Initializes an empty ComparisonConclusions.
|
|
|
|
Args:
|
|
threshold_significant: Float with the tolerance beyond which changes in
|
|
measurements are considered significant.
|
|
|
|
The change is considered as a multiplication rather than an addition
|
|
of a fraction of the previous measurement, that is, a
|
|
threshold_significant of 1.0 will flag test cases that became over
|
|
100% slower (> 200% of the previous time measured) or over 100% faster
|
|
(< 50% of the previous time measured).
|
|
|
|
threshold_significant 0.02 -> 98.04% to 102% is not significant
|
|
threshold_significant 0.1 -> 90.9% to 110% is not significant
|
|
threshold_significant 0.25 -> 80% to 125% is not significant
|
|
threshold_significant 1 -> 50% to 200% is not significant
|
|
threshold_significant 4 -> 20% to 500% is not significant
|
|
|
|
"""
|
|
self.threshold_significant = threshold_significant
|
|
self.threshold_significant_negative = (1 / (1 + threshold_significant)) - 1
|
|
|
|
self.params = {'threshold': threshold_significant}
|
|
self.summary = ComparisonSummary()
|
|
self.case_results = {}
|
|
|
|
def ProcessCase(self, case_name, before, after):
|
|
"""Feeds a test case results to the ComparisonConclusions.
|
|
|
|
Args:
|
|
case_name: String identifying the case.
|
|
before: Measurement for the "before" version of the code.
|
|
after: Measurement for the "after" version of the code.
|
|
"""
|
|
|
|
# Switch 0 to None to simplify the json dict output. All zeros are
|
|
# considered failed runs, so they will be represented by "null".
|
|
if not before:
|
|
before = None
|
|
if not after:
|
|
after = None
|
|
|
|
if not before or not after:
|
|
ratio = None
|
|
rating = RATING_FAILURE
|
|
else:
|
|
ratio = (float(after) / before) - 1.0
|
|
if ratio > self.threshold_significant:
|
|
rating = RATING_REGRESSION
|
|
elif ratio < self.threshold_significant_negative:
|
|
rating = RATING_IMPROVEMENT
|
|
elif ratio == 0:
|
|
rating = RATING_NO_CHANGE
|
|
else:
|
|
rating = RATING_SMALL_CHANGE
|
|
|
|
case_result = CaseResult(case_name, before, after, ratio, rating)
|
|
|
|
self.summary.ProcessCaseResult(case_result)
|
|
self.case_results[case_name] = case_result
|
|
|
|
def GetSummary(self):
|
|
"""Gets the ComparisonSummary with consolidated totals."""
|
|
return self.summary
|
|
|
|
def GetCaseResults(self):
|
|
"""Gets a dict mapping each test case identifier to its CaseResult."""
|
|
return self.case_results
|
|
|
|
def GetOutputDict(self):
|
|
"""Returns a conclusions dict with all the conclusions drawn.
|
|
|
|
Returns:
|
|
A serializable dict with the format illustrated below:
|
|
{
|
|
"version": 1,
|
|
"params": {
|
|
"threshold": 0.02
|
|
},
|
|
"summary": {
|
|
"total": 123,
|
|
"failure": 1,
|
|
"regression": 2,
|
|
"improvement": 1,
|
|
"no_change": 100,
|
|
"small_change": 19
|
|
},
|
|
"comparison_by_case": {
|
|
"testing/resources/new_test.pdf": {
|
|
"before": None,
|
|
"after": 1000,
|
|
"ratio": None,
|
|
"rating": "failure"
|
|
},
|
|
"testing/resources/test1.pdf": {
|
|
"before": 100,
|
|
"after": 120,
|
|
"ratio": 0.2,
|
|
"rating": "regression"
|
|
},
|
|
"testing/resources/test2.pdf": {
|
|
"before": 100,
|
|
"after": 2000,
|
|
"ratio": 19.0,
|
|
"rating": "regression"
|
|
},
|
|
"testing/resources/test3.pdf": {
|
|
"before": 1000,
|
|
"after": 1005,
|
|
"ratio": 0.005,
|
|
"rating": "small_change"
|
|
},
|
|
"testing/resources/test4.pdf": {
|
|
"before": 1000,
|
|
"after": 1000,
|
|
"ratio": 0.0,
|
|
"rating": "no_change"
|
|
},
|
|
"testing/resources/test5.pdf": {
|
|
"before": 1000,
|
|
"after": 600,
|
|
"ratio": -0.4,
|
|
"rating": "improvement"
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
output_dict = {}
|
|
output_dict['version'] = 1
|
|
output_dict['params'] = {'threshold': self.threshold_significant}
|
|
output_dict['summary'] = self.summary.GetOutputDict()
|
|
output_dict['comparison_by_case'] = {
|
|
cr.case_name.decode('utf-8'): cr.GetOutputDict()
|
|
for cr in self.GetCaseResults().values()
|
|
}
|
|
return output_dict
|
|
|
|
|
|
class ComparisonSummary(object):
|
|
"""Totals computed for a comparison."""
|
|
|
|
def __init__(self):
|
|
self.rating_counter = Counter()
|
|
|
|
def ProcessCaseResult(self, case_result):
|
|
self.rating_counter[case_result.rating] += 1
|
|
|
|
def GetTotal(self):
|
|
"""Gets the number of test cases processed."""
|
|
return sum(self.rating_counter.values())
|
|
|
|
def GetCount(self, rating):
|
|
"""Gets the number of test cases processed with a given rating."""
|
|
return self.rating_counter[rating]
|
|
|
|
def GetOutputDict(self):
|
|
"""Returns a dict that can be serialized with all the totals."""
|
|
result = {'total': self.GetTotal()}
|
|
for rating in RATINGS:
|
|
result[rating] = self.GetCount(rating)
|
|
return result
|
|
|
|
|
|
class CaseResult(object):
|
|
"""The conclusion for the comparison of a single test case."""
|
|
|
|
def __init__(self, case_name, before, after, ratio, rating):
|
|
"""Initializes an empty ComparisonConclusions.
|
|
|
|
Args:
|
|
case_name: String identifying the case.
|
|
before: Measurement for the "before" version of the code.
|
|
after: Measurement for the "after" version of the code.
|
|
ratio: Difference between |after| and |before| as a fraction of |before|.
|
|
rating: Rating for this test case.
|
|
"""
|
|
self.case_name = case_name
|
|
self.before = before
|
|
self.after = after
|
|
self.ratio = ratio
|
|
self.rating = rating
|
|
|
|
def GetOutputDict(self):
|
|
"""Returns a dict with the test case's conclusions."""
|
|
return {
|
|
'before': self.before,
|
|
'after': self.after,
|
|
'ratio': self.ratio,
|
|
'rating': self.rating
|
|
}
|
|
|
|
|
|
def PrintConclusionsDictHumanReadable(conclusions_dict, colored, key=None):
|
|
"""Prints a conclusions dict in a human-readable way.
|
|
|
|
Args:
|
|
conclusions_dict: Dict to print.
|
|
colored: Whether to color the output to highlight significant changes.
|
|
key: String with the CaseResult dictionary key to sort the cases.
|
|
"""
|
|
# Print header
|
|
print '=' * 80
|
|
print '{0:>11s} {1:>15s} {2}'.format('% Change', 'Time after', 'Test case')
|
|
print '-' * 80
|
|
|
|
color = FORMAT_NORMAL
|
|
|
|
# Print cases
|
|
if key is not None:
|
|
case_pairs = sorted(
|
|
conclusions_dict['comparison_by_case'].iteritems(),
|
|
key=lambda kv: kv[1][key])
|
|
else:
|
|
case_pairs = sorted(conclusions_dict['comparison_by_case'].iteritems())
|
|
|
|
for case_name, case_dict in case_pairs:
|
|
if colored:
|
|
color = RATING_TO_COLOR[case_dict['rating']]
|
|
|
|
if case_dict['rating'] == RATING_FAILURE:
|
|
print u'{} to measure time for {}'.format(
|
|
color.format('Failed'), case_name).encode('utf-8')
|
|
continue
|
|
|
|
print u'{0} {1:15,d} {2}'.format(
|
|
color.format('{:+11.4%}'.format(case_dict['ratio'])),
|
|
case_dict['after'], case_name).encode('utf-8')
|
|
|
|
# Print totals
|
|
totals = conclusions_dict['summary']
|
|
print '=' * 80
|
|
print 'Test cases run: %d' % totals['total']
|
|
|
|
if colored:
|
|
color = FORMAT_MAGENTA if totals[RATING_FAILURE] else FORMAT_GREEN
|
|
print('Failed to measure: %s' % color.format(totals[RATING_FAILURE]))
|
|
|
|
if colored:
|
|
color = FORMAT_RED if totals[RATING_REGRESSION] else FORMAT_GREEN
|
|
print('Regressions: %s' % color.format(totals[RATING_REGRESSION]))
|
|
|
|
if colored:
|
|
color = FORMAT_CYAN if totals[RATING_IMPROVEMENT] else FORMAT_GREEN
|
|
print('Improvements: %s' % color.format(totals[RATING_IMPROVEMENT]))
|