120 lines
3.9 KiB
Python
120 lines
3.9 KiB
Python
#!/usr/bin/python3
|
|
#
|
|
# Copyright (c) 2018-2019 Collabora, Ltd.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
# Author(s): Ryan Pavlik <ryan.pavlik@collabora.com>
|
|
"Utilities for processing files."
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
class LinewiseFileProcessor:
|
|
"""A base class for code that processes an input file (or file handle) one line at a time."""
|
|
|
|
def __init__(self):
|
|
self._lines = []
|
|
self._line_num = 0
|
|
self._next_line = None
|
|
self._line = ''
|
|
self._filename = Path()
|
|
|
|
@property
|
|
def filename(self):
|
|
"""The Path object of the currently processed file"""
|
|
return self._filename
|
|
|
|
@property
|
|
def relative_filename(self):
|
|
"""The current file's Path relative to the current working directory"""
|
|
return self.filename.relative_to(Path('.').resolve())
|
|
|
|
@property
|
|
def line(self):
|
|
"""The current line, including any trailing whitespace and the line ending."""
|
|
return self._line
|
|
|
|
@property
|
|
def line_number(self):
|
|
"""Get 1-indexed line number."""
|
|
return self._line_num
|
|
|
|
@property
|
|
def line_rstripped(self):
|
|
"""The current line without any trailing whitespace."""
|
|
if self.line is None:
|
|
return None
|
|
return self.line.rstrip()
|
|
|
|
@property
|
|
def trailing_whitespace(self):
|
|
"""The trailing whitespace of the current line that gets removed when accessing rstrippedLine"""
|
|
non_whitespace_length = len(self.line_rstripped)
|
|
return self.line[non_whitespace_length:]
|
|
|
|
@property
|
|
def next_line(self):
|
|
"""Peek at the next line, if any."""
|
|
return self._next_line
|
|
|
|
@property
|
|
def next_line_rstripped(self):
|
|
"""Peek at the next line, if any, without any trailing whitespace."""
|
|
if self.next_line is None:
|
|
return None
|
|
return self.next_line.rstrip()
|
|
|
|
def get_preceding_line(self, relative_index=-1):
|
|
"""Retrieve the line at an line number at the given relative index, if one exists. Returns None if there is no line there."""
|
|
if relative_index >= 0:
|
|
raise RuntimeError(
|
|
'relativeIndex must be negative, to retrieve a preceding line.')
|
|
if relative_index + self.line_number <= 0:
|
|
# There is no line at this index
|
|
return None
|
|
return self._lines[self.line_number + relative_index - 1]
|
|
|
|
def get_preceding_lines(self, num):
|
|
"""Get *up to* the preceding num lines. Fewer may be returned if the requested number aren't available."""
|
|
return self._lines[- (num + 1):-1]
|
|
|
|
def process_line(self, line_num, line):
|
|
"""Implement in your subclass to handle each new line."""
|
|
raise NotImplementedError
|
|
|
|
def _process_file_handle(self, file_handle):
|
|
# These are so we can process one line earlier than we're actually iterating thru.
|
|
processing_line_num = None
|
|
processing_line = None
|
|
|
|
def do_process_line():
|
|
self._line_num = processing_line_num
|
|
self._line = processing_line
|
|
if processing_line is not None:
|
|
self._lines.append(processing_line)
|
|
self.process_line(processing_line_num, processing_line)
|
|
|
|
for line_num, line in enumerate(file_handle, 1):
|
|
self._next_line = line
|
|
do_process_line()
|
|
processing_line_num = line_num
|
|
processing_line = line
|
|
|
|
# Finally process the left-over line
|
|
self._next_line = None
|
|
do_process_line()
|
|
|
|
def process_file(self, filename, file_handle=None):
|
|
"""Main entry point - call with a filename and optionally the file handle to read from."""
|
|
if isinstance(filename, str):
|
|
filename = Path(filename).resolve()
|
|
|
|
self._filename = filename
|
|
|
|
if file_handle:
|
|
self._process_file_handle(file_handle)
|
|
else:
|
|
with self._filename.open('r', encoding='utf-8') as f:
|
|
self._process_file_handle(f)
|