cpp-common/bt2c/fmt.hpp: use `wise_enum::string_type` in `EnableIfIsWiseEnum` definition
[babeltrace.git] / tests / utils / python / moultipart.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # Copyright (C) 2023 EfficiOS Inc.
4 #
5 # pyright: strict, reportTypeCommentUsage=false
6
7 import re
8 from typing import TextIO
9
10
11 # One part of a moultipart document.
12 #
13 # For example, for this part of which the header is at line 37:
14 #
15 # --- Another Oscar Wilde quote
16 # I can resist everything except temptation.
17 #
18 # The corresponding `Part` object is:
19 #
20 # Part('Another Oscar Wilde quote',
21 # 'I can resist everything except temptation',
22 # 38)
23 class Part:
24 def __init__(self, header_info: str, content: str, first_content_line_no: int):
25 self._header_info = header_info
26 self._content = content
27 self._first_content_line_no = first_content_line_no
28
29 @property
30 def header_info(self):
31 return self._header_info
32
33 @property
34 def content(self):
35 return self._content
36
37 # Number of the first line, relative to the beginning of the
38 # containing moultipart document, of the content of this part.
39 @property
40 def first_content_line_no(self):
41 return self._first_content_line_no
42
43 def __repr__(self):
44 return "Part({}, {}, {})".format(
45 repr(self.header_info), repr(self.content), self.first_content_line_no
46 )
47
48
49 def _try_parse_header(line: str):
50 m = re.match(r"---(\s*| .+)$", line)
51
52 if m is None:
53 return
54
55 return m.group(1).strip()
56
57
58 # Parses the moultipart document file `in_file` and returns its parts
59 # (list of `Part` objects).
60 #
61 # A moultipart document is a sequence of parts.
62 #
63 # A moutlipart part is:
64 #
65 # 1. A header line, that is, in this order:
66 #
67 # a) Exactly `---`.
68 # b) Zero or more spaces.
69 # c) Optional: custom information until the end of the line.
70 #
71 # 2. Zero or more lines of text which aren't header lines.
72 #
73 # For example, consider the following moultipart document:
74 #
75 # --- Victoria
76 # Parenteau
77 # ---
78 # Taillon
79 # --- This part is empty
80 # --- Josianne
81 # Gervais
82 #
83 # Then this function would return the following part objects:
84 #
85 # [
86 # Part('Victoria', 'Parenteau\n', 2),
87 # Part('', 'Taillon\n', 4),
88 # Part('This part is empty', '', 6),
89 # Part('Josianne', 'Gervais\n', 7),
90 # ]
91 #
92 # Raises `RuntimeError` on any parsing error.
93 def parse(in_file: TextIO):
94 # Read the first header
95 cur_part_content = ""
96 cur_first_content_line_no = 2
97 parts = [] # type: list[Part]
98 line_no = 1
99 line = next(in_file)
100 cur_part_header_info = _try_parse_header(line)
101
102 if cur_part_header_info is None:
103 raise RuntimeError(
104 "Expecting header line starting with `---`, got `{}`".format(
105 line.strip("\n")
106 )
107 )
108
109 for line in in_file:
110 line_no += 1
111 maybe_part_header_info = _try_parse_header(line)
112
113 if maybe_part_header_info is not None:
114 # New header
115 parts.append(
116 Part(
117 cur_part_header_info,
118 cur_part_content,
119 cur_first_content_line_no,
120 )
121 )
122 cur_part_content = ""
123 cur_part_header_info = maybe_part_header_info
124 cur_first_content_line_no = line_no + 1
125 continue
126
127 # Accumulate content lines
128 cur_part_content += line
129
130 # Last part (always exists)
131 parts.append(
132 Part(
133 cur_part_header_info,
134 cur_part_content,
135 cur_first_content_line_no,
136 )
137 )
138
139 return parts
140
141
142 if __name__ == "__main__":
143 import sys
144 import pprint
145
146 with open(sys.argv[1]) as f:
147 pprint.pprint(parse(f))
This page took 0.030693 seconds and 4 git commands to generate.