116 lines
4.2 KiB
Python
116 lines
4.2 KiB
Python
# Copyright 2017 The Chromium OS Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
"""This throttler tries to remove the remove repeated files sharing the same
|
|
prefix, for example, screenshots or dumps in the same folder. The dedupe logic
|
|
does not compare the file content, instead, it sorts the files with the same
|
|
prefix and remove files in the middle part.
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
|
|
try:
|
|
from autotest_lib.client.bin.result_tools import result_info_lib
|
|
|
|
from autotest_lib.client.bin.result_tools import throttler_lib
|
|
from autotest_lib.client.bin.result_tools import utils_lib
|
|
except ImportError:
|
|
import result_info_lib
|
|
import throttler_lib
|
|
import utils_lib
|
|
|
|
|
|
# Number of files to keep for the oldest files.
|
|
OLDEST_FILES_TO_KEEP_COUNT = 2
|
|
# Number of files to keep for the newest files.
|
|
NEWEST_FILES_TO_KEEP_COUNT = 1
|
|
|
|
# Files with path mathing following patterns should not be deduped.
|
|
NO_DEDUPE_FILE_PATTERNS = [
|
|
'debug/.*',
|
|
'.*perf.data$', # Performance test data.
|
|
'.*/debug/.*',
|
|
'.*dir_summary_\d+.json',
|
|
]
|
|
|
|
# regex pattern to get the prefix of a file.
|
|
PREFIX_PATTERN = '([a-zA-Z_-]*).*'
|
|
|
|
def _group_by(file_infos, keys):
|
|
"""Group the file infos by the given keys.
|
|
|
|
@param file_infos: A list of ResultInfo objects.
|
|
@param keys: A list of names of the attribute to group the file infos by.
|
|
@return: A dictionary of grouped_key: [ResultInfo].
|
|
"""
|
|
grouped_infos = {}
|
|
for info in file_infos:
|
|
key_values = []
|
|
for key in keys:
|
|
key_values.append(getattr(info, key))
|
|
grouped_key = os.sep.join(key_values)
|
|
if grouped_key not in grouped_infos:
|
|
grouped_infos[grouped_key] = []
|
|
grouped_infos[grouped_key].append(info)
|
|
return grouped_infos
|
|
|
|
|
|
def _dedupe_files(summary, file_infos, max_result_size_KB):
|
|
"""Delete the given file and update the summary.
|
|
|
|
@param summary: A ResultInfo object containing result summary.
|
|
@param file_infos: A list of ResultInfo objects to be de-duplicated.
|
|
@param max_result_size_KB: Maximum test result size in KB.
|
|
"""
|
|
# Sort file infos based on the modify date of the file.
|
|
file_infos.sort(
|
|
key=lambda f: result_info_lib.get_last_modification_time(f.path))
|
|
file_infos_to_delete = file_infos[
|
|
OLDEST_FILES_TO_KEEP_COUNT:-NEWEST_FILES_TO_KEEP_COUNT]
|
|
|
|
for file_info in file_infos_to_delete:
|
|
if throttler_lib.try_delete_file_on_disk(file_info.path):
|
|
file_info.trimmed_size = 0
|
|
|
|
if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
|
|
return
|
|
|
|
|
|
def throttle(summary, max_result_size_KB):
|
|
"""Throttle the files in summary by de-duplicating files.
|
|
|
|
Stop throttling until all files are processed or the result size is already
|
|
reduced to be under the given max_result_size_KB.
|
|
|
|
@param summary: A ResultInfo object containing result summary.
|
|
@param max_result_size_KB: Maximum test result size in KB.
|
|
"""
|
|
_, grouped_files = throttler_lib.sort_result_files(summary)
|
|
for pattern in throttler_lib.RESULT_THROTTLE_PRIORITY:
|
|
throttable_files = list(throttler_lib.get_throttleable_files(
|
|
grouped_files[pattern], NO_DEDUPE_FILE_PATTERNS))
|
|
|
|
for info in throttable_files:
|
|
info.parent_dir = os.path.dirname(info.path)
|
|
info.prefix = re.match(PREFIX_PATTERN, info.name).group(1)
|
|
|
|
# Group files for each parent directory
|
|
grouped_infos = _group_by(throttable_files, ['parent_dir', 'prefix'])
|
|
|
|
for infos in grouped_infos.values():
|
|
if (len(infos) <=
|
|
OLDEST_FILES_TO_KEEP_COUNT + NEWEST_FILES_TO_KEEP_COUNT):
|
|
# No need to dedupe if the count of file is too few.
|
|
continue
|
|
|
|
# Remove files can be deduped
|
|
utils_lib.LOG('De-duplicating files in %s with the same prefix of '
|
|
'"%s"' % (infos[0].parent_dir, infos[0].prefix))
|
|
#dedupe_file_infos = [i.result_info for i in infos]
|
|
_dedupe_files(summary, infos, max_result_size_KB)
|
|
|
|
if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
|
|
return
|