177 lines
7.0 KiB
Python
177 lines
7.0 KiB
Python
# Copyright 2017 The Chromium OS Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
import os
|
|
import re
|
|
|
|
try:
|
|
from autotest_lib.client.bin.result_tools import throttler_lib
|
|
from autotest_lib.client.bin.result_tools import utils_lib
|
|
except ImportError:
|
|
import throttler_lib
|
|
import utils_lib
|
|
|
|
|
|
# File extensions that can be safely shrunk.
|
|
# Extension matching is case-insensitive but the items in this set must be
|
|
# lowercase to match.
|
|
# Files without an extension and with no alphabetic characters in the extension
|
|
# (e.g. file.20201110) are always shrinkable.
|
|
SHRINKABLE_EXTENSIONS = frozenset([
|
|
'.log',
|
|
'.txt',
|
|
'.debug',
|
|
'.error',
|
|
'.info',
|
|
'.warning',
|
|
])
|
|
|
|
# Regex for paths that should not be shrunk.
|
|
UNSHRINKABLE_PATH_PATTERNS = [
|
|
# Files in a log_diff/ directory should already be relatively small,
|
|
# and trimming them further would be detrimental to debugging. If
|
|
# they're too large, let other throttlers (e.g., zip_file_ or
|
|
# delete_file_) deal with them.
|
|
# Only blocklist a few known-useful log_diff's.
|
|
'/log_diff/messages$',
|
|
'/log_diff/net\.log$',
|
|
# Ramoops files are small but relatively important.
|
|
# The name of this file has changed starting with linux-3.19.
|
|
# Use a glob to match all existing records.
|
|
'/console-ramoops.*',
|
|
]
|
|
|
|
TRIMMED_FILE_HEADER = '!!! This file is trimmed !!!\n'
|
|
ORIGINAL_SIZE_TEMPLATE = 'Original size: %d bytes\n\n'
|
|
# Regex pattern to retrieve the original size of the file.
|
|
ORIGINAL_SIZE_REGEX = 'Original size: (\d+) bytes'
|
|
TRIMMED_FILE_INJECT_TEMPLATE = """
|
|
|
|
========================================================================
|
|
< %d > characters are trimmed here.
|
|
========================================================================
|
|
|
|
"""
|
|
|
|
# Percent of file content to keep at the beginning and end of the file, default
|
|
# to 20%.
|
|
HEAD_SIZE_PERCENT = 0.20
|
|
|
|
# Default size in byte to trim the file down to.
|
|
DEFAULT_FILE_SIZE_LIMIT_BYTE = 100 * 1024
|
|
|
|
def _trim_file(file_info, file_size_limit_byte):
|
|
"""Remove the file content in the middle to reduce the file size.
|
|
|
|
@param file_info: A ResultInfo object containing summary for the file to be
|
|
shrunk.
|
|
@param file_size_limit_byte: Maximum file size in bytes after trimming.
|
|
"""
|
|
utils_lib.LOG('Trimming file %s to reduce size from %d bytes to %d bytes' %
|
|
(file_info.path, file_info.original_size,
|
|
file_size_limit_byte))
|
|
new_path = os.path.join(os.path.dirname(file_info.path),
|
|
file_info.name + '_trimmed')
|
|
original_size_bytes = file_info.original_size
|
|
with open(new_path, 'w') as new_file, open(file_info.path) as old_file:
|
|
# Read the beginning part of the old file, if it's already started with
|
|
# TRIMMED_FILE_HEADER, no need to add the header again.
|
|
header = old_file.read(len(TRIMMED_FILE_HEADER))
|
|
if header != TRIMMED_FILE_HEADER:
|
|
new_file.write(TRIMMED_FILE_HEADER)
|
|
new_file.write(ORIGINAL_SIZE_TEMPLATE % file_info.original_size)
|
|
else:
|
|
line = old_file.readline()
|
|
match = re.match(ORIGINAL_SIZE_REGEX, line)
|
|
if match:
|
|
original_size_bytes = int(match.group(1))
|
|
header_size_bytes = new_file.tell()
|
|
# Move old file reader to the beginning of the file.
|
|
old_file.seek(0, os.SEEK_SET)
|
|
|
|
new_file.write(old_file.read(
|
|
int((file_size_limit_byte - header_size_bytes) *
|
|
HEAD_SIZE_PERCENT)))
|
|
# Position to seek from the end of the file.
|
|
seek_pos = -(file_size_limit_byte - new_file.tell() -
|
|
len(TRIMMED_FILE_INJECT_TEMPLATE))
|
|
bytes_to_skip = original_size_bytes + seek_pos - old_file.tell()
|
|
# Adjust seek position based on string TRIMMED_FILE_INJECT_TEMPLATE
|
|
seek_pos += len(str(bytes_to_skip)) - 2
|
|
bytes_to_skip = original_size_bytes + seek_pos - old_file.tell()
|
|
new_file.write(TRIMMED_FILE_INJECT_TEMPLATE % bytes_to_skip)
|
|
old_file.seek(seek_pos, os.SEEK_END)
|
|
new_file.write(old_file.read())
|
|
stat = os.stat(file_info.path)
|
|
if not throttler_lib.try_delete_file_on_disk(file_info.path):
|
|
# Clean up the intermediate file.
|
|
throttler_lib.try_delete_file_on_disk(new_path)
|
|
utils_lib.LOG('Failed to shrink %s' % file_info.path)
|
|
return
|
|
|
|
os.rename(new_path, file_info.path)
|
|
# Modify the new file's timestamp to the old one.
|
|
os.utime(file_info.path, (stat.st_atime, stat.st_mtime))
|
|
# Update the trimmed_size.
|
|
file_info.trimmed_size = file_info.size
|
|
|
|
|
|
def _get_shrinkable_files(file_infos, file_size_limit_byte):
|
|
"""Filter the files that can be throttled.
|
|
|
|
@param file_infos: A list of ResultInfo objects.
|
|
@param file_size_limit_byte: Minimum file size in bytes to be throttled.
|
|
@yield: ResultInfo objects that can be shrunk.
|
|
"""
|
|
for info in file_infos:
|
|
ext = os.path.splitext(info.name)[1].lower()
|
|
# if ext contains alphabetic characters and is not in the allowlist,
|
|
# skip the file.
|
|
# islower() returns false if the string does not contain any alphabetic
|
|
# characters, e.g. '.20201110'.islower() is False.
|
|
if ext.islower() and ext not in SHRINKABLE_EXTENSIONS:
|
|
continue
|
|
|
|
match_found = False
|
|
for pattern in UNSHRINKABLE_PATH_PATTERNS:
|
|
if re.search(pattern, info.path):
|
|
match_found = True
|
|
break
|
|
if match_found:
|
|
continue
|
|
|
|
if info.trimmed_size <= file_size_limit_byte:
|
|
continue
|
|
|
|
yield info
|
|
|
|
|
|
def throttle(summary, max_result_size_KB,
|
|
file_size_limit_byte=DEFAULT_FILE_SIZE_LIMIT_BYTE,
|
|
skip_autotest_log=False):
|
|
"""Throttle the files in summary by trimming file content.
|
|
|
|
Stop throttling until all files are processed or the result file size is
|
|
already reduced to be under the given max_result_size_KB.
|
|
|
|
@param summary: A ResultInfo object containing result summary.
|
|
@param max_result_size_KB: Maximum test result size in KB.
|
|
@param file_size_limit_byte: Limit each file's size in the summary to be
|
|
under the given threshold, until all files are processed or the
|
|
result size is under the given max_result_size_KB.
|
|
@param skip_autotest_log: True to skip shrink Autotest logs, default is
|
|
False.
|
|
"""
|
|
file_infos, _ = throttler_lib.sort_result_files(summary)
|
|
extra_patterns = ([throttler_lib.AUTOTEST_LOG_PATTERN] if skip_autotest_log
|
|
else [])
|
|
file_infos = throttler_lib.get_throttleable_files(
|
|
file_infos, extra_patterns)
|
|
file_infos = _get_shrinkable_files(file_infos, file_size_limit_byte)
|
|
for info in file_infos:
|
|
_trim_file(info, file_size_limit_byte)
|
|
|
|
if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
|
|
return
|