From eda81eee9ee730911b3779cc367c51a7123fded8 Mon Sep 17 00:00:00 2001 From: Simon Marchi Date: Fri, 22 Sep 2023 16:43:00 -0400 Subject: [PATCH] tests: add moultipart.py Add moultipart.py, a Python module to help read text files meant to be split in different parts. A moultipart-compliant file is a succession of parts, where each part is made of a header line followed with zero or more lines of content. A header line consists of `---` optionally followed by a space and some custom information. The moultipart module exposes the `parse` function, which takes a TextIO object and returns a list of Part objects. For instance, the following input: --- Victoria Parenteau --- Taillon --- This part is empty --- Josianne Gervais results in the following parts: [Part('Victoria', 'Parenteau\n', 2), Part('', 'Taillon\n', 4), Part('This part is empty', '', 6), Part('Josianne', 'Gervais\n', 7)] Change-Id: I50fb88ed6e064c09deaf1d56187415bc26002a14 Signed-off-by: Simon Marchi Signed-off-by: Philippe Proulx Reviewed-on: https://review.lttng.org/c/babeltrace/+/10912 Tested-by: jenkins --- setup.cfg | 3 +- tests/utils/python/moultipart.py | 147 +++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 tests/utils/python/moultipart.py diff --git a/setup.cfg b/setup.cfg index fa412107..02702a12 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,7 +2,8 @@ # E501: line too long # W503: line break before binary operator (conflicts with black's way of # formatting) -ignore = E501,W503 +# E203: Whitespace before ':' (conclicts with black's way of formatting) +ignore = E501,W503,E203 # Disabled warnings for `bt2/__init__.py`: # diff --git a/tests/utils/python/moultipart.py b/tests/utils/python/moultipart.py new file mode 100644 index 00000000..d33748c7 --- /dev/null +++ b/tests/utils/python/moultipart.py @@ -0,0 +1,147 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2023 EfficiOS Inc. +# +# pyright: strict, reportTypeCommentUsage=false + +import re +from typing import TextIO + + +# One part of a moultipart document. +# +# For example, for this part of which the header is at line 37: +# +# --- Another Oscar Wilde quote +# I can resist everything except temptation. +# +# The corresponding `Part` object is: +# +# Part('Another Oscar Wilde quote', +# 'I can resist everything except temptation', +# 38) +class Part: + def __init__(self, header_info: str, content: str, first_content_line_no: int): + self._header_info = header_info + self._content = content + self._first_content_line_no = first_content_line_no + + @property + def header_info(self): + return self._header_info + + @property + def content(self): + return self._content + + # Number of the first line, relative to the beginning of the + # containing moultipart document, of the content of this part. + @property + def first_content_line_no(self): + return self._first_content_line_no + + def __repr__(self): + return "Part({}, {}, {})".format( + repr(self.header_info), repr(self.content), self.first_content_line_no + ) + + +def _try_parse_header(line: str): + m = re.match(r"---(\s*| .+)$", line) + + if m is None: + return + + return m.group(1).strip() + + +# Parses the moultipart document file `in_file` and returns its parts +# (list of `Part` objects). +# +# A moultipart document is a sequence of parts. +# +# A moutlipart part is: +# +# 1. A header line, that is, in this order: +# +# a) Exactly `---`. +# b) Zero or more spaces. +# c) Optional: custom information until the end of the line. +# +# 2. Zero or more lines of text which aren't header lines. +# +# For example, consider the following moultipart document: +# +# --- Victoria +# Parenteau +# --- +# Taillon +# --- This part is empty +# --- Josianne +# Gervais +# +# Then this function would return the following part objects: +# +# [ +# Part('Victoria', 'Parenteau\n', 2), +# Part('', 'Taillon\n', 4), +# Part('This part is empty', '', 6), +# Part('Josianne', 'Gervais\n', 7), +# ] +# +# Raises `RuntimeError` on any parsing error. +def parse(in_file: TextIO): + # Read the first header + cur_part_content = "" + cur_first_content_line_no = 2 + parts = [] # type: list[Part] + line_no = 1 + line = next(in_file) + cur_part_header_info = _try_parse_header(line) + + if cur_part_header_info is None: + raise RuntimeError( + "Expecting header line starting with `---`, got `{}`".format( + line.strip("\n") + ) + ) + + for line in in_file: + line_no += 1 + maybe_part_header_info = _try_parse_header(line) + + if maybe_part_header_info is not None: + # New header + parts.append( + Part( + cur_part_header_info, + cur_part_content, + cur_first_content_line_no, + ) + ) + cur_part_content = "" + cur_part_header_info = maybe_part_header_info + cur_first_content_line_no = line_no + 1 + continue + + # Accumulate content lines + cur_part_content += line + + # Last part (always exists) + parts.append( + Part( + cur_part_header_info, + cur_part_content, + cur_first_content_line_no, + ) + ) + + return parts + + +if __name__ == "__main__": + import sys + import pprint + + with open(sys.argv[1]) as f: + pprint.pprint(parse(f)) -- 2.34.1