Add `tests/utils/python/normand.py` (Normand 0.23)
[babeltrace.git] / tests / utils / python / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.23.0"
34 __all__ = [
35 "__author__",
36 "__version__",
37 "ByteOrder",
38 "LabelsT",
39 "parse",
40 "ParseError",
41 "ParseErrorMessage",
42 "ParseResult",
43 "TextLocation",
44 "VariablesT",
45 ]
46
47 import re
48 import abc
49 import ast
50 import bz2
51 import sys
52 import copy
53 import enum
54 import gzip
55 import math
56 import base64
57 import quopri
58 import struct
59 import typing
60 import functools
61 from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
62
63
64 # Text location (line and column numbers).
65 class TextLocation:
66 @classmethod
67 def _create(cls, line_no: int, col_no: int):
68 self = cls.__new__(cls)
69 self._init(line_no, col_no)
70 return self
71
72 def __init__(*args, **kwargs): # type: ignore
73 raise NotImplementedError
74
75 def _init(self, line_no: int, col_no: int):
76 self._line_no = line_no
77 self._col_no = col_no
78
79 # Line number.
80 @property
81 def line_no(self):
82 return self._line_no
83
84 # Column number.
85 @property
86 def col_no(self):
87 return self._col_no
88
89 def __repr__(self):
90 return "TextLocation({}, {})".format(self._line_no, self._col_no)
91
92
93 # Any item.
94 class _Item:
95 def __init__(self, text_loc: TextLocation):
96 self._text_loc = text_loc
97
98 # Source text location.
99 @property
100 def text_loc(self):
101 return self._text_loc
102
103
104 # Scalar item.
105 class _ScalarItem(_Item):
106 # Returns the size, in bytes, of this item.
107 @property
108 @abc.abstractmethod
109 def size(self) -> int:
110 ...
111
112
113 # A repeatable item.
114 class _RepableItem:
115 pass
116
117
118 # Single byte.
119 class _Byte(_ScalarItem, _RepableItem):
120 def __init__(self, val: int, text_loc: TextLocation):
121 super().__init__(text_loc)
122 self._val = val
123
124 # Byte value.
125 @property
126 def val(self):
127 return self._val
128
129 @property
130 def size(self):
131 return 1
132
133 def __repr__(self):
134 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
135
136
137 # Literal string.
138 class _LitStr(_ScalarItem, _RepableItem):
139 def __init__(self, data: bytes, text_loc: TextLocation):
140 super().__init__(text_loc)
141 self._data = data
142
143 # Encoded bytes.
144 @property
145 def data(self):
146 return self._data
147
148 @property
149 def size(self):
150 return len(self._data)
151
152 def __repr__(self):
153 return "_LitStr({}, {})".format(repr(self._data), repr(self._text_loc))
154
155
156 # Byte order.
157 @enum.unique
158 class ByteOrder(enum.Enum):
159 # Big endian.
160 BE = "be"
161
162 # Little endian.
163 LE = "le"
164
165
166 # Byte order setting.
167 class _SetBo(_Item):
168 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
169 super().__init__(text_loc)
170 self._bo = bo
171
172 @property
173 def bo(self):
174 return self._bo
175
176 def __repr__(self):
177 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
178
179
180 # Label.
181 class _Label(_Item):
182 def __init__(self, name: str, text_loc: TextLocation):
183 super().__init__(text_loc)
184 self._name = name
185
186 # Label name.
187 @property
188 def name(self):
189 return self._name
190
191 def __repr__(self):
192 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
193
194
195 # Offset setting.
196 class _SetOffset(_Item):
197 def __init__(self, val: int, text_loc: TextLocation):
198 super().__init__(text_loc)
199 self._val = val
200
201 # Offset value (bytes).
202 @property
203 def val(self):
204 return self._val
205
206 def __repr__(self):
207 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
208
209
210 # Offset alignment.
211 class _AlignOffset(_Item):
212 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
213 super().__init__(text_loc)
214 self._val = val
215 self._pad_val = pad_val
216
217 # Alignment value (bits).
218 @property
219 def val(self):
220 return self._val
221
222 # Padding byte value.
223 @property
224 def pad_val(self):
225 return self._pad_val
226
227 def __repr__(self):
228 return "_AlignOffset({}, {}, {})".format(
229 repr(self._val), repr(self._pad_val), repr(self._text_loc)
230 )
231
232
233 # Mixin of containing an AST expression and its string.
234 class _ExprMixin:
235 def __init__(self, expr_str: str, expr: ast.Expression):
236 self._expr_str = expr_str
237 self._expr = expr
238
239 # Expression string.
240 @property
241 def expr_str(self):
242 return self._expr_str
243
244 # Expression node to evaluate.
245 @property
246 def expr(self):
247 return self._expr
248
249
250 # Fill until some offset.
251 class _FillUntil(_Item, _ExprMixin):
252 def __init__(
253 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
254 ):
255 super().__init__(text_loc)
256 _ExprMixin.__init__(self, expr_str, expr)
257 self._pad_val = pad_val
258
259 # Padding byte value.
260 @property
261 def pad_val(self):
262 return self._pad_val
263
264 def __repr__(self):
265 return "_FillUntil({}, {}, {}, {})".format(
266 repr(self._expr_str),
267 repr(self._expr),
268 repr(self._pad_val),
269 repr(self._text_loc),
270 )
271
272
273 # Variable assignment.
274 class _VarAssign(_Item, _ExprMixin):
275 def __init__(
276 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
277 ):
278 super().__init__(text_loc)
279 _ExprMixin.__init__(self, expr_str, expr)
280 self._name = name
281
282 # Name.
283 @property
284 def name(self):
285 return self._name
286
287 def __repr__(self):
288 return "_VarAssign({}, {}, {}, {})".format(
289 repr(self._name),
290 repr(self._expr_str),
291 repr(self._expr),
292 repr(self._text_loc),
293 )
294
295
296 # Fixed-length number, possibly needing more than one byte.
297 class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
298 def __init__(
299 self,
300 expr_str: str,
301 expr: ast.Expression,
302 len: int,
303 bo: Optional[ByteOrder],
304 text_loc: TextLocation,
305 ):
306 super().__init__(text_loc)
307 _ExprMixin.__init__(self, expr_str, expr)
308 self._len = len
309 self._bo = bo
310
311 # Length (bits).
312 @property
313 def len(self):
314 return self._len
315
316 # Byte order override.
317 @property
318 def bo(self):
319 return self._bo
320
321 @property
322 def size(self):
323 return self._len // 8
324
325 def __repr__(self):
326 return "_FlNum({}, {}, {}, {}, {})".format(
327 repr(self._expr_str),
328 repr(self._expr),
329 repr(self._len),
330 repr(self._bo),
331 repr(self._text_loc),
332 )
333
334
335 # LEB128 integer.
336 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
337 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
338 super().__init__(text_loc)
339 _ExprMixin.__init__(self, expr_str, expr)
340
341 def __repr__(self):
342 return "{}({}, {}, {})".format(
343 self.__class__.__name__,
344 repr(self._expr_str),
345 repr(self._expr),
346 repr(self._text_loc),
347 )
348
349
350 # Unsigned LEB128 integer.
351 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
352 pass
353
354
355 # Signed LEB128 integer.
356 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
357 pass
358
359
360 # String.
361 class _Str(_Item, _RepableItem, _ExprMixin):
362 def __init__(
363 self, expr_str: str, expr: ast.Expression, codec: str, text_loc: TextLocation
364 ):
365 super().__init__(text_loc)
366 _ExprMixin.__init__(self, expr_str, expr)
367 self._codec = codec
368
369 # Codec name.
370 @property
371 def codec(self):
372 return self._codec
373
374 def __repr__(self):
375 return "_Str({}, {}, {}, {})".format(
376 repr(self._expr_str),
377 repr(self._expr),
378 repr(self._codec),
379 repr(self._text_loc),
380 )
381
382
383 # Group of items.
384 class _Group(_Item, _RepableItem):
385 def __init__(self, items: List[_Item], text_loc: TextLocation):
386 super().__init__(text_loc)
387 self._items = items
388
389 # Contained items.
390 @property
391 def items(self):
392 return self._items
393
394 def __repr__(self):
395 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
396
397
398 # Repetition item.
399 class _Rep(_Group, _ExprMixin):
400 def __init__(
401 self,
402 items: List[_Item],
403 expr_str: str,
404 expr: ast.Expression,
405 text_loc: TextLocation,
406 ):
407 super().__init__(items, text_loc)
408 _ExprMixin.__init__(self, expr_str, expr)
409
410 def __repr__(self):
411 return "_Rep({}, {}, {}, {})".format(
412 repr(self._items),
413 repr(self._expr_str),
414 repr(self._expr),
415 repr(self._text_loc),
416 )
417
418
419 # Conditional item.
420 class _Cond(_Item, _ExprMixin):
421 def __init__(
422 self,
423 true_item: _Group,
424 false_item: _Group,
425 expr_str: str,
426 expr: ast.Expression,
427 text_loc: TextLocation,
428 ):
429 super().__init__(text_loc)
430 _ExprMixin.__init__(self, expr_str, expr)
431 self._true_item = true_item
432 self._false_item = false_item
433
434 # Item when condition is true.
435 @property
436 def true_item(self):
437 return self._true_item
438
439 # Item when condition is false.
440 @property
441 def false_item(self):
442 return self._false_item
443
444 def __repr__(self):
445 return "_Cond({}, {}, {}, {}, {})".format(
446 repr(self._true_item),
447 repr(self._false_item),
448 repr(self._expr_str),
449 repr(self._expr),
450 repr(self._text_loc),
451 )
452
453
454 # Transformation.
455 class _Trans(_Group, _RepableItem):
456 def __init__(
457 self,
458 items: List[_Item],
459 name: str,
460 func: Callable[[Union[bytes, bytearray]], bytes],
461 text_loc: TextLocation,
462 ):
463 super().__init__(items, text_loc)
464 self._name = name
465 self._func = func
466
467 @property
468 def name(self):
469 return self._name
470
471 # Transforms the data `data`.
472 def trans(self, data: Union[bytes, bytearray]):
473 return self._func(data)
474
475 def __repr__(self):
476 return "_Trans({}, {}, {}, {})".format(
477 repr(self._items),
478 repr(self._name),
479 repr(self._func),
480 repr(self._text_loc),
481 )
482
483
484 # Macro definition item.
485 class _MacroDef(_Group):
486 def __init__(
487 self,
488 name: str,
489 param_names: List[str],
490 items: List[_Item],
491 text_loc: TextLocation,
492 ):
493 super().__init__(items, text_loc)
494 self._name = name
495 self._param_names = param_names
496
497 # Name.
498 @property
499 def name(self):
500 return self._name
501
502 # Parameters.
503 @property
504 def param_names(self):
505 return self._param_names
506
507 def __repr__(self):
508 return "_MacroDef({}, {}, {}, {})".format(
509 repr(self._name),
510 repr(self._param_names),
511 repr(self._items),
512 repr(self._text_loc),
513 )
514
515
516 # Macro expansion parameter.
517 class _MacroExpParam:
518 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
519 self._expr_str = expr_str
520 self._expr = expr
521 self._text_loc = text_loc
522
523 # Expression string.
524 @property
525 def expr_str(self):
526 return self._expr_str
527
528 # Expression.
529 @property
530 def expr(self):
531 return self._expr
532
533 # Source text location.
534 @property
535 def text_loc(self):
536 return self._text_loc
537
538 def __repr__(self):
539 return "_MacroExpParam({}, {}, {})".format(
540 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
541 )
542
543
544 # Macro expansion item.
545 class _MacroExp(_Item, _RepableItem):
546 def __init__(
547 self,
548 name: str,
549 params: List[_MacroExpParam],
550 text_loc: TextLocation,
551 ):
552 super().__init__(text_loc)
553 self._name = name
554 self._params = params
555
556 # Name.
557 @property
558 def name(self):
559 return self._name
560
561 # Parameters.
562 @property
563 def params(self):
564 return self._params
565
566 def __repr__(self):
567 return "_MacroExp({}, {}, {})".format(
568 repr(self._name),
569 repr(self._params),
570 repr(self._text_loc),
571 )
572
573
574 # A parsing error message: a string and a text location.
575 class ParseErrorMessage:
576 @classmethod
577 def _create(cls, text: str, text_loc: TextLocation):
578 self = cls.__new__(cls)
579 self._init(text, text_loc)
580 return self
581
582 def __init__(self, *args, **kwargs): # type: ignore
583 raise NotImplementedError
584
585 def _init(self, text: str, text_loc: TextLocation):
586 self._text = text
587 self._text_loc = text_loc
588
589 # Message text.
590 @property
591 def text(self):
592 return self._text
593
594 # Source text location.
595 @property
596 def text_location(self):
597 return self._text_loc
598
599
600 # A parsing error containing one or more messages (`ParseErrorMessage`).
601 class ParseError(RuntimeError):
602 @classmethod
603 def _create(cls, msg: str, text_loc: TextLocation):
604 self = cls.__new__(cls)
605 self._init(msg, text_loc)
606 return self
607
608 def __init__(self, *args, **kwargs): # type: ignore
609 raise NotImplementedError
610
611 def _init(self, msg: str, text_loc: TextLocation):
612 super().__init__(msg)
613 self._msgs = [] # type: List[ParseErrorMessage]
614 self._add_msg(msg, text_loc)
615
616 def _add_msg(self, msg: str, text_loc: TextLocation):
617 self._msgs.append(
618 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
619 msg, text_loc
620 )
621 )
622
623 # Parsing error messages.
624 #
625 # The first message is the most specific one.
626 @property
627 def messages(self):
628 return self._msgs
629
630
631 # Raises a parsing error, forwarding the parameters to the constructor.
632 def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
633 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
634
635
636 # Adds a message to the parsing error `exc`.
637 def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
638 exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage]
639
640
641 # Appends a message to the parsing error `exc` and reraises it.
642 def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
643 _add_error_msg(exc, msg, text_loc)
644 raise exc
645
646
647 # Returns a normalized version (so as to be parseable by int()) of
648 # the constant integer string `s`, possibly negative, dealing with
649 # any radix suffix.
650 def _norm_const_int(s: str):
651 neg = ""
652 pos = s
653
654 if s.startswith("-"):
655 neg = "-"
656 pos = s[1:]
657
658 for r in "xXoObB":
659 if pos.startswith("0" + r):
660 # Already correct
661 return s
662
663 # Try suffix
664 asm_suf_base = {
665 "h": "x",
666 "H": "x",
667 "q": "o",
668 "Q": "o",
669 "o": "o",
670 "O": "o",
671 "b": "b",
672 "B": "B",
673 }
674
675 for suf in asm_suf_base:
676 if pos[-1] == suf:
677 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
678
679 return s
680
681
682 # Encodes the string `s` using the codec `codec`, raising `ParseError`
683 # with `text_loc` on encoding error.
684 def _encode_str(s: str, codec: str, text_loc: TextLocation):
685 try:
686 return s.encode(codec)
687 except UnicodeEncodeError:
688 _raise_error(
689 "Cannot encode `{}` with the `{}` encoding".format(s, codec), text_loc
690 )
691
692
693 # Variables dictionary type (for type hints).
694 VariablesT = Dict[str, Union[int, float, str]]
695
696
697 # Labels dictionary type (for type hints).
698 LabelsT = Dict[str, int]
699
700
701 # Common patterns.
702 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
703 _pos_const_int_pat = re.compile(
704 r"(?:0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+)\b"
705 )
706 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
707 _const_float_pat = re.compile(
708 r"[-+]?(?:(?:\d*\.\d+)|(?:\d+\.))(?:[Ee][+-]?\d+)?(?=\W|)"
709 )
710
711
712 # Macro definition dictionary.
713 _MacroDefsT = Dict[str, _MacroDef]
714
715
716 # Normand parser.
717 #
718 # The constructor accepts a Normand input. After building, use the `res`
719 # property to get the resulting main group.
720 class _Parser:
721 # Builds a parser to parse the Normand input `normand`, parsing
722 # immediately.
723 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
724 self._normand = normand
725 self._at = 0
726 self._line_no = 1
727 self._col_no = 1
728 self._label_names = set(labels.keys())
729 self._var_names = set(variables.keys())
730 self._macro_defs = {} # type: _MacroDefsT
731 self._base_item_parse_funcs = [
732 self._try_parse_byte,
733 self._try_parse_str,
734 self._try_parse_val,
735 self._try_parse_var_assign,
736 self._try_parse_set_bo,
737 self._try_parse_label_or_set_offset,
738 self._try_parse_align_offset,
739 self._try_parse_fill_until,
740 self._try_parse_group,
741 self._try_parse_rep_block,
742 self._try_parse_cond_block,
743 self._try_parse_macro_exp,
744 self._try_parse_trans_block,
745 ]
746 self._parse()
747
748 # Result (main group).
749 @property
750 def res(self):
751 return self._res
752
753 # Macro definitions.
754 @property
755 def macro_defs(self):
756 return self._macro_defs
757
758 # Current text location.
759 @property
760 def _text_loc(self):
761 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
762 self._line_no, self._col_no
763 )
764
765 # Returns `True` if this parser is done parsing.
766 def _is_done(self):
767 return self._at == len(self._normand)
768
769 # Returns `True` if this parser isn't done parsing.
770 def _isnt_done(self):
771 return not self._is_done()
772
773 # Raises a parse error, creating it using the message `msg` and the
774 # current text location.
775 def _raise_error(self, msg: str) -> NoReturn:
776 _raise_error(msg, self._text_loc)
777
778 # Tries to make the pattern `pat` match the current substring,
779 # returning the match object and updating `self._at`,
780 # `self._line_no`, and `self._col_no` on success.
781 def _try_parse_pat(self, pat: Pattern[str]):
782 m = pat.match(self._normand, self._at)
783
784 if m is None:
785 return
786
787 # Skip matched string
788 self._at += len(m.group(0))
789
790 # Update line number
791 self._line_no += m.group(0).count("\n")
792
793 # Update column number
794 for i in reversed(range(self._at)):
795 if self._normand[i] == "\n" or i == 0:
796 if i == 0:
797 self._col_no = self._at + 1
798 else:
799 self._col_no = self._at - i
800
801 break
802
803 # Return match object
804 return m
805
806 # Expects the pattern `pat` to match the current substring,
807 # returning the match object and updating `self._at`,
808 # `self._line_no`, and `self._col_no` on success, or raising a parse
809 # error with the message `error_msg` on error.
810 def _expect_pat(self, pat: Pattern[str], error_msg: str):
811 # Match
812 m = self._try_parse_pat(pat)
813
814 if m is None:
815 # No match: error
816 self._raise_error(error_msg)
817
818 # Return match object
819 return m
820
821 # Patterns for _skip_*()
822 _comment_pat = re.compile(r"#[^#]*?(?:$|#)", re.M)
823 _ws_or_comments_pat = re.compile(r"(?:\s|{})*".format(_comment_pat.pattern), re.M)
824 _ws_or_syms_or_comments_pat = re.compile(
825 r"(?:[\s/\\?&:;.,_=|-]|{})*".format(_comment_pat.pattern), re.M
826 )
827
828 # Skips as many whitespaces and comments as possible, but not
829 # insignificant symbol characters.
830 def _skip_ws_and_comments(self):
831 self._try_parse_pat(self._ws_or_comments_pat)
832
833 # Skips as many whitespaces, insignificant symbol characters, and
834 # comments as possible.
835 def _skip_ws_and_comments_and_syms(self):
836 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
837
838 # Pattern for _try_parse_hex_byte()
839 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
840
841 # Tries to parse a hexadecimal byte, returning a byte item on
842 # success.
843 def _try_parse_hex_byte(self):
844 begin_text_loc = self._text_loc
845
846 # Match initial nibble
847 m_high = self._try_parse_pat(self._nibble_pat)
848
849 if m_high is None:
850 # No match
851 return
852
853 # Expect another nibble
854 self._skip_ws_and_comments_and_syms()
855 m_low = self._expect_pat(
856 self._nibble_pat, "Expecting another hexadecimal nibble"
857 )
858
859 # Return item
860 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
861
862 # Patterns for _try_parse_bin_byte()
863 _bin_byte_bit_pat = re.compile(r"[01]")
864 _bin_byte_prefix_pat = re.compile(r"%+")
865
866 # Tries to parse a binary byte, returning a byte item on success.
867 def _try_parse_bin_byte(self):
868 begin_text_loc = self._text_loc
869
870 # Match prefix
871 m = self._try_parse_pat(self._bin_byte_prefix_pat)
872
873 if m is None:
874 # No match
875 return
876
877 # Expect as many bytes as there are `%` prefixes
878 items = [] # type: List[_Item]
879
880 for _ in range(len(m.group(0))):
881 self._skip_ws_and_comments_and_syms()
882 byte_text_loc = self._text_loc
883 bits = [] # type: List[str]
884
885 # Expect eight bits
886 for _ in range(8):
887 self._skip_ws_and_comments_and_syms()
888 m = self._expect_pat(
889 self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)"
890 )
891 bits.append(m.group(0))
892
893 items.append(_Byte(int("".join(bits), 2), byte_text_loc))
894
895 # Return item
896 if len(items) == 1:
897 return items[0]
898
899 # As group
900 return _Group(items, begin_text_loc)
901
902 # Patterns for _try_parse_dec_byte()
903 _dec_byte_prefix_pat = re.compile(r"\$")
904 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
905
906 # Tries to parse a decimal byte, returning a byte item on success.
907 def _try_parse_dec_byte(self):
908 begin_text_loc = self._text_loc
909
910 # Match prefix
911 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
912 # No match
913 return
914
915 # Expect the value
916 self._skip_ws_and_comments()
917 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
918
919 # Compute value
920 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
921
922 # Validate
923 if val < -128 or val > 255:
924 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
925
926 # Two's complement
927 val %= 256
928
929 # Return item
930 return _Byte(val, begin_text_loc)
931
932 # Tries to parse a byte, returning a byte item on success.
933 def _try_parse_byte(self):
934 # Hexadecimal
935 item = self._try_parse_hex_byte()
936
937 if item is not None:
938 return item
939
940 # Binary
941 item = self._try_parse_bin_byte()
942
943 if item is not None:
944 return item
945
946 # Decimal
947 item = self._try_parse_dec_byte()
948
949 if item is not None:
950 return item
951
952 # Strings corresponding to escape sequence characters
953 _lit_str_escape_seq_strs = {
954 "0": "\0",
955 "a": "\a",
956 "b": "\b",
957 "e": "\x1b",
958 "f": "\f",
959 "n": "\n",
960 "r": "\r",
961 "t": "\t",
962 "v": "\v",
963 "\\": "\\",
964 '"': '"',
965 }
966
967 # Patterns for _try_parse_lit_str()
968 _lit_str_prefix_suffix_pat = re.compile(r'"')
969 _lit_str_contents_pat = re.compile(r'(?:(?:\\.)|[^"])*')
970
971 # Parses a literal string between double quotes (without an encoding
972 # prefix) and returns the resulting string.
973 def _try_parse_lit_str(self, with_prefix: bool):
974 # Match prefix if needed
975 if with_prefix:
976 if self._try_parse_pat(self._lit_str_prefix_suffix_pat) is None:
977 # No match
978 return
979
980 # Expect literal string
981 m = self._expect_pat(self._lit_str_contents_pat, "Expecting a literal string")
982
983 # Expect end of string
984 self._expect_pat(
985 self._lit_str_prefix_suffix_pat, 'Expecting `"` (end of literal string)'
986 )
987
988 # Replace escape sequences
989 val = m.group(0)
990
991 for ec in '0abefnrtv"\\':
992 val = val.replace(r"\{}".format(ec), self._lit_str_escape_seq_strs[ec])
993
994 # Return string
995 return val
996
997 # Patterns for _try_parse_utf_str_encoding()
998 _str_encoding_utf_prefix_pat = re.compile(r"u")
999 _str_encoding_utf_pat = re.compile(r"(?:8|(?:(?:16|32)(?:[bl]e)))\b")
1000
1001 # Tries to parse a UTF encoding specification, returning the Python
1002 # codec name on success.
1003 def _try_parse_utf_str_encoding(self):
1004 # Match prefix
1005 if self._try_parse_pat(self._str_encoding_utf_prefix_pat) is None:
1006 # No match
1007 return
1008
1009 # Expect UTF specification
1010 m = self._expect_pat(
1011 self._str_encoding_utf_pat,
1012 "Expecting `8`, `16be`, `16le`, `32be` or `32le`",
1013 )
1014
1015 # Convert to codec name
1016 return {
1017 "8": "utf_8",
1018 "16be": "utf_16_be",
1019 "16le": "utf_16_le",
1020 "32be": "utf_32_be",
1021 "32le": "utf_32_le",
1022 }[m.group(0)]
1023
1024 # Patterns for _try_parse_str_encoding()
1025 _str_encoding_gen_prefix_pat = re.compile(r"s")
1026 _str_encoding_colon_pat = re.compile(r":")
1027 _str_encoding_non_utf_pat = re.compile(r"latin(?:[1-9]|10)\b")
1028
1029 # Tries to parse a string encoding specification, returning the
1030 # Python codec name on success.
1031 #
1032 # Requires the general prefix (`s:`) if `req_gen_prefix` is `True`.
1033 def _try_parse_str_encoding(self, req_gen_prefix: bool = False):
1034 # General prefix?
1035 if self._try_parse_pat(self._str_encoding_gen_prefix_pat) is not None:
1036 # Expect `:`
1037 self._skip_ws_and_comments()
1038 self._expect_pat(self._str_encoding_colon_pat, "Expecting `:`")
1039
1040 # Expect encoding specification
1041 self._skip_ws_and_comments()
1042
1043 # UTF?
1044 codec = self._try_parse_utf_str_encoding()
1045
1046 if codec is not None:
1047 return codec
1048
1049 # Expect Latin
1050 m = self._expect_pat(
1051 self._str_encoding_non_utf_pat,
1052 "Expecting `u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`",
1053 )
1054 return m.group(0)
1055
1056 # UTF?
1057 if not req_gen_prefix:
1058 return self._try_parse_utf_str_encoding()
1059
1060 # Patterns for _try_parse_str()
1061 _lit_str_prefix_pat = re.compile(r'"')
1062 _str_prefix_pat = re.compile(r'"|\{')
1063 _str_expr_pat = re.compile(r"[^}]+")
1064 _str_expr_suffix_pat = re.compile(r"\}")
1065
1066 # Tries to parse a string, returning a literal string or string item
1067 # on success.
1068 def _try_parse_str(self):
1069 begin_text_loc = self._text_loc
1070
1071 # Encoding
1072 codec = self._try_parse_str_encoding()
1073
1074 # Match prefix (expect if there's an encoding specification)
1075 self._skip_ws_and_comments()
1076
1077 if codec is None:
1078 # No encoding: only a literal string (UTF-8) is legal
1079 m_prefix = self._try_parse_pat(self._lit_str_prefix_pat)
1080
1081 if m_prefix is None:
1082 return
1083 else:
1084 # Encoding present: expect a string prefix
1085 m_prefix = self._expect_pat(self._str_prefix_pat, 'Expecting `"` or `{`')
1086
1087 # Literal string or expression?
1088 prefix = m_prefix.group(0)
1089
1090 if prefix == '"':
1091 # Expect literal string
1092 str_text_loc = self._text_loc
1093 val = self._try_parse_lit_str(False)
1094
1095 if val is None:
1096 self._raise_error("Expecting a literal string")
1097
1098 # Encode string
1099 data = _encode_str(val, "utf_8" if codec is None else codec, str_text_loc)
1100
1101 # Return item
1102 return _LitStr(data, begin_text_loc)
1103 else:
1104 # Expect expression
1105 self._skip_ws_and_comments()
1106 expr_text_loc = self._text_loc
1107 m = self._expect_pat(self._str_expr_pat, "Expecting an expression")
1108
1109 # Expect `}`
1110 self._expect_pat(self._str_expr_suffix_pat, "Expecting `}`")
1111
1112 # Create an expression node from the expression string
1113 expr_str, expr = self._ast_expr_from_str(m.group(0), expr_text_loc)
1114
1115 # Return item
1116 assert codec is not None
1117 return _Str(expr_str, expr, codec, begin_text_loc)
1118
1119 # Common right parenthesis pattern
1120 _right_paren_pat = re.compile(r"\)")
1121
1122 # Patterns for _try_parse_group()
1123 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
1124
1125 # Tries to parse a group, returning a group item on success.
1126 def _try_parse_group(self):
1127 begin_text_loc = self._text_loc
1128
1129 # Match prefix
1130 m_open = self._try_parse_pat(self._group_prefix_pat)
1131
1132 if m_open is None:
1133 # No match
1134 return
1135
1136 # Parse items
1137 items = self._parse_items()
1138
1139 # Expect end of group
1140 self._skip_ws_and_comments_and_syms()
1141
1142 if m_open.group(0) == "(":
1143 pat = self._right_paren_pat
1144 exp = ")"
1145 else:
1146 pat = self._block_end_pat
1147 exp = "!end"
1148
1149 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
1150
1151 # Return item
1152 return _Group(items, begin_text_loc)
1153
1154 # Returns a stripped expression string and an AST expression node
1155 # from the expression string `expr_str` at text location `text_loc`.
1156 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
1157 # Create an expression node from the expression string
1158 expr_str = expr_str.strip().replace("\n", " ")
1159
1160 try:
1161 expr = ast.parse(expr_str, mode="eval")
1162 except SyntaxError:
1163 _raise_error(
1164 "Invalid expression `{}`: invalid syntax".format(expr_str),
1165 text_loc,
1166 )
1167
1168 return expr_str, expr
1169
1170 # Returns a `ByteOrder` value from the _valid_ byte order string
1171 # `bo_str`.
1172 @staticmethod
1173 def _bo_from_str(bo_str: str):
1174 return {
1175 "be": ByteOrder.BE,
1176 "le": ByteOrder.LE,
1177 }[bo_str]
1178
1179 # Patterns for _try_parse_val()
1180 _val_prefix_pat = re.compile(r"\[")
1181 _val_expr_pat = re.compile(r"([^\]:]+):")
1182 _fl_num_len_fmt_pat = re.compile(r"(?P<len>8|16|24|32|40|48|56|64)(?P<bo>[bl]e)?")
1183 _leb128_int_fmt_pat = re.compile(r"(u|s)leb128")
1184 _val_suffix_pat = re.compile(r"]")
1185
1186 # Tries to parse a value (number or string) and format (fixed length
1187 # in bits and optional byte order override, `uleb128`, `sleb128`, or
1188 # `s:` followed with an encoding name), returning an item on
1189 # success.
1190 def _try_parse_val(self):
1191 # Match prefix
1192 if self._try_parse_pat(self._val_prefix_pat) is None:
1193 # No match
1194 return
1195
1196 # Expect expression and `:`
1197 self._skip_ws_and_comments()
1198 expr_text_loc = self._text_loc
1199 m = self._expect_pat(self._val_expr_pat, "Expecting an expression")
1200
1201 # Create an expression node from the expression string
1202 expr_str, expr = self._ast_expr_from_str(m.group(1), expr_text_loc)
1203
1204 # Fixed length?
1205 self._skip_ws_and_comments()
1206 m_fmt = self._try_parse_pat(self._fl_num_len_fmt_pat)
1207
1208 if m_fmt is not None:
1209 # Byte order override
1210 if m_fmt.group("bo") is None:
1211 bo = None
1212 else:
1213 bo = self._bo_from_str(m_fmt.group("bo"))
1214
1215 # Create fixed-length number item
1216 item = _FlNum(
1217 expr_str,
1218 expr,
1219 int(m_fmt.group("len")),
1220 bo,
1221 expr_text_loc,
1222 )
1223 else:
1224 # LEB128?
1225 m_fmt = self._try_parse_pat(self._leb128_int_fmt_pat)
1226
1227 if m_fmt is not None:
1228 # Create LEB128 integer item
1229 cls = _ULeb128Int if m_fmt.group(1) == "u" else _SLeb128Int
1230 item = cls(expr_str, expr, expr_text_loc)
1231 else:
1232 # String encoding?
1233 codec = self._try_parse_str_encoding(True)
1234
1235 if codec is not None:
1236 # Create string item
1237 item = _Str(expr_str, expr, codec, expr_text_loc)
1238 else:
1239 # At this point it's invalid
1240 self._raise_error(
1241 "Expecting a fixed length (multiple of eight bits and optional `be` or `le`), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)"
1242 )
1243
1244 # Expect `]`
1245 self._skip_ws_and_comments()
1246 m = self._expect_pat(self._val_suffix_pat, "Expecting `]`")
1247
1248 # Return item
1249 return item
1250
1251 # Patterns for _try_parse_var_assign()
1252 _var_assign_prefix_pat = re.compile(r"\{")
1253 _var_assign_equal_pat = re.compile(r"=")
1254 _var_assign_expr_pat = re.compile(r"[^}]+")
1255 _var_assign_suffix_pat = re.compile(r"\}")
1256
1257 # Tries to parse a variable assignment, returning a variable
1258 # assignment item on success.
1259 def _try_parse_var_assign(self):
1260 # Match prefix
1261 if self._try_parse_pat(self._var_assign_prefix_pat) is None:
1262 # No match
1263 return
1264
1265 # Expect a name
1266 self._skip_ws_and_comments()
1267 name_text_loc = self._text_loc
1268 m = self._expect_pat(_py_name_pat, "Expecting a valid Python name")
1269 name = m.group(0)
1270
1271 # Expect `=`
1272 self._skip_ws_and_comments()
1273 self._expect_pat(self._var_assign_equal_pat, "Expecting `=`")
1274
1275 # Expect expression
1276 self._skip_ws_and_comments()
1277 expr_text_loc = self._text_loc
1278 m_expr = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
1279
1280 # Expect `}`
1281 self._skip_ws_and_comments()
1282 self._expect_pat(self._var_assign_suffix_pat, "Expecting `}`")
1283
1284 # Validate name
1285 if name == _icitte_name:
1286 _raise_error(
1287 "`{}` is a reserved variable name".format(_icitte_name), name_text_loc
1288 )
1289
1290 if name in self._label_names:
1291 _raise_error("Existing label named `{}`".format(name), name_text_loc)
1292
1293 # Create an expression node from the expression string
1294 expr_str, expr = self._ast_expr_from_str(m_expr.group(0), expr_text_loc)
1295
1296 # Add to known variable names
1297 self._var_names.add(name)
1298
1299 # Return item
1300 return _VarAssign(
1301 name,
1302 expr_str,
1303 expr,
1304 name_text_loc,
1305 )
1306
1307 # Pattern for _try_parse_set_bo()
1308 _set_bo_pat = re.compile(r"!([bl]e)\b")
1309
1310 # Tries to parse a byte order setting, returning a byte order
1311 # setting item on success.
1312 def _try_parse_set_bo(self):
1313 begin_text_loc = self._text_loc
1314
1315 # Match
1316 m = self._try_parse_pat(self._set_bo_pat)
1317
1318 if m is None:
1319 # No match
1320 return
1321
1322 # Return corresponding item
1323 if m.group(1) == "be":
1324 bo = ByteOrder.BE
1325 else:
1326 assert m.group(1) == "le"
1327 bo = ByteOrder.LE
1328
1329 return _SetBo(bo, begin_text_loc)
1330
1331 # Tries to parse an offset setting value (after the initial `<`),
1332 # returning an offset item on success.
1333 def _try_parse_set_offset_val(self):
1334 begin_text_loc = self._text_loc
1335
1336 # Match
1337 m = self._try_parse_pat(_pos_const_int_pat)
1338
1339 if m is None:
1340 # No match
1341 return
1342
1343 # Return item
1344 return _SetOffset(int(_norm_const_int(m.group(0)), 0), begin_text_loc)
1345
1346 # Tries to parse a label name (after the initial `<`), returning a
1347 # label item on success.
1348 def _try_parse_label_name(self):
1349 begin_text_loc = self._text_loc
1350
1351 # Match
1352 m = self._try_parse_pat(_py_name_pat)
1353
1354 if m is None:
1355 # No match
1356 return
1357
1358 # Validate
1359 name = m.group(0)
1360
1361 if name == _icitte_name:
1362 _raise_error(
1363 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1364 )
1365
1366 if name in self._label_names:
1367 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
1368
1369 if name in self._var_names:
1370 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
1371
1372 # Add to known label names
1373 self._label_names.add(name)
1374
1375 # Return item
1376 return _Label(name, begin_text_loc)
1377
1378 # Patterns for _try_parse_label_or_set_offset()
1379 _label_set_offset_prefix_pat = re.compile(r"<")
1380 _label_set_offset_suffix_pat = re.compile(r">")
1381
1382 # Tries to parse a label or an offset setting, returning an item on
1383 # success.
1384 def _try_parse_label_or_set_offset(self):
1385 # Match prefix
1386 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
1387 # No match
1388 return
1389
1390 # Offset setting item?
1391 self._skip_ws_and_comments()
1392 item = self._try_parse_set_offset_val()
1393
1394 if item is None:
1395 # Label item?
1396 item = self._try_parse_label_name()
1397
1398 if item is None:
1399 # At this point it's invalid
1400 self._raise_error("Expecting a label name or an offset setting value")
1401
1402 # Expect suffix
1403 self._skip_ws_and_comments()
1404 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
1405 return item
1406
1407 # Pattern for _parse_pad_val()
1408 _pad_val_prefix_pat = re.compile(r"~")
1409
1410 # Tries to parse a padding value, returning the padding value, or 0
1411 # if none.
1412 def _parse_pad_val(self):
1413 # Padding value?
1414 self._skip_ws_and_comments()
1415 pad_val = 0
1416
1417 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1418 self._skip_ws_and_comments()
1419 pad_val_text_loc = self._text_loc
1420 m = self._expect_pat(
1421 _pos_const_int_pat,
1422 "Expecting a positive constant integer (byte value)",
1423 )
1424
1425 # Validate
1426 pad_val = int(_norm_const_int(m.group(0)), 0)
1427
1428 if pad_val > 255:
1429 _raise_error(
1430 "Invalid padding byte value {}".format(pad_val),
1431 pad_val_text_loc,
1432 )
1433
1434 return pad_val
1435
1436 # Patterns for _try_parse_align_offset()
1437 _align_offset_prefix_pat = re.compile(r"@")
1438 _align_offset_val_pat = re.compile(r"\d+")
1439
1440 # Tries to parse an offset alignment, returning an offset alignment
1441 # item on success.
1442 def _try_parse_align_offset(self):
1443 begin_text_loc = self._text_loc
1444
1445 # Match prefix
1446 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1447 # No match
1448 return
1449
1450 # Expect an alignment
1451 self._skip_ws_and_comments()
1452 align_text_loc = self._text_loc
1453 m = self._expect_pat(
1454 self._align_offset_val_pat,
1455 "Expecting an alignment (positive multiple of eight bits)",
1456 )
1457
1458 # Validate alignment
1459 val = int(m.group(0))
1460
1461 if val <= 0 or (val % 8) != 0:
1462 _raise_error(
1463 "Invalid alignment value {} (not a positive multiple of eight)".format(
1464 val
1465 ),
1466 align_text_loc,
1467 )
1468
1469 # Padding value
1470 pad_val = self._parse_pad_val()
1471
1472 # Return item
1473 return _AlignOffset(val, pad_val, begin_text_loc)
1474
1475 # Patterns for _expect_expr()
1476 _inner_expr_prefix_pat = re.compile(r"\{")
1477 _inner_expr_pat = re.compile(r"[^}]+")
1478 _inner_expr_suffix_pat = re.compile(r"\}")
1479
1480 # Parses an expression outside a `{`/`}` context.
1481 #
1482 # This function accepts:
1483 #
1484 # • A Python expression within `{` and `}`.
1485 #
1486 # • A Python name.
1487 #
1488 # • If `accept_const_int` is `True`: a constant integer, which may
1489 # be negative if `allow_neg_int` is `True`.
1490 #
1491 # • If `accept_float` is `True`: a constant floating point number.
1492 #
1493 # Returns the stripped expression string and AST expression.
1494 def _expect_expr(
1495 self,
1496 accept_const_int: bool = False,
1497 allow_neg_int: bool = False,
1498 accept_const_float: bool = False,
1499 accept_lit_str: bool = False,
1500 ):
1501 begin_text_loc = self._text_loc
1502
1503 # Constant floating point number?
1504 if accept_const_float:
1505 m = self._try_parse_pat(_const_float_pat)
1506
1507 if m is not None:
1508 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1509
1510 # Constant integer?
1511 if accept_const_int:
1512 m = self._try_parse_pat(_const_int_pat)
1513
1514 if m is not None:
1515 # Negative and allowed?
1516 if m.group("neg") == "-" and not allow_neg_int:
1517 _raise_error(
1518 "Expecting a positive constant integer", begin_text_loc
1519 )
1520
1521 expr_str = _norm_const_int(m.group(0))
1522 return self._ast_expr_from_str(expr_str, begin_text_loc)
1523
1524 # Name?
1525 m = self._try_parse_pat(_py_name_pat)
1526
1527 if m is not None:
1528 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1529
1530 # Literal string
1531 if accept_lit_str:
1532 val = self._try_parse_lit_str(True)
1533
1534 if val is not None:
1535 return self._ast_expr_from_str(repr(val), begin_text_loc)
1536
1537 # Expect `{`
1538 msg_accepted_parts = ["a name", "or `{`"]
1539
1540 if accept_lit_str:
1541 msg_accepted_parts.insert(0, "a literal string")
1542
1543 if accept_const_float:
1544 msg_accepted_parts.insert(0, "a constant floating point number")
1545
1546 if accept_const_int:
1547 msg_pos = "" if allow_neg_int else "positive "
1548 msg_accepted_parts.insert(0, "a {}constant integer".format(msg_pos))
1549
1550 if len(msg_accepted_parts) == 2:
1551 msg_accepted = " ".join(msg_accepted_parts)
1552 else:
1553 msg_accepted = ", ".join(msg_accepted_parts)
1554
1555 self._expect_pat(
1556 self._inner_expr_prefix_pat,
1557 "Expecting {}".format(msg_accepted),
1558 )
1559
1560 # Expect an expression
1561 self._skip_ws_and_comments()
1562 expr_text_loc = self._text_loc
1563 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
1564 expr_str = m.group(0)
1565
1566 # Expect `}`
1567 self._skip_ws_and_comments()
1568 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
1569
1570 return self._ast_expr_from_str(expr_str, expr_text_loc)
1571
1572 # Patterns for _try_parse_fill_until()
1573 _fill_until_prefix_pat = re.compile(r"\+")
1574 _fill_until_pad_val_prefix_pat = re.compile(r"~")
1575
1576 # Tries to parse a filling, returning a filling item on success.
1577 def _try_parse_fill_until(self):
1578 begin_text_loc = self._text_loc
1579
1580 # Match prefix
1581 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1582 # No match
1583 return
1584
1585 # Expect expression
1586 self._skip_ws_and_comments()
1587 expr_str, expr = self._expect_expr(accept_const_int=True)
1588
1589 # Padding value
1590 pad_val = self._parse_pad_val()
1591
1592 # Return item
1593 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
1594
1595 # Parses the multiplier expression of a repetition (block or
1596 # post-item) and returns the expression string and AST node.
1597 def _expect_rep_mul_expr(self):
1598 return self._expect_expr(accept_const_int=True)
1599
1600 # Common block end pattern
1601 _block_end_pat = re.compile(r"!end\b")
1602
1603 # Pattern for _try_parse_rep_block()
1604 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
1605
1606 # Tries to parse a repetition block, returning a repetition item on
1607 # success.
1608 def _try_parse_rep_block(self):
1609 begin_text_loc = self._text_loc
1610
1611 # Match prefix
1612 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1613 # No match
1614 return
1615
1616 # Expect expression
1617 self._skip_ws_and_comments()
1618 expr_str, expr = self._expect_rep_mul_expr()
1619
1620 # Parse items
1621 self._skip_ws_and_comments_and_syms()
1622 items = self._parse_items()
1623
1624 # Expect end of block
1625 self._skip_ws_and_comments_and_syms()
1626 self._expect_pat(
1627 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
1628 )
1629
1630 # Return item
1631 return _Rep(items, expr_str, expr, begin_text_loc)
1632
1633 # Pattern for _try_parse_cond_block()
1634 _cond_block_prefix_pat = re.compile(r"!if\b")
1635 _cond_block_else_pat = re.compile(r"!else\b")
1636
1637 # Tries to parse a conditional block, returning a conditional item
1638 # on success.
1639 def _try_parse_cond_block(self):
1640 begin_text_loc = self._text_loc
1641
1642 # Match prefix
1643 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1644 # No match
1645 return
1646
1647 # Expect expression
1648 self._skip_ws_and_comments()
1649 expr_str, expr = self._expect_expr()
1650
1651 # Parse "true" items
1652 self._skip_ws_and_comments_and_syms()
1653 true_items_text_loc = self._text_loc
1654 true_items = self._parse_items()
1655 false_items = [] # type: List[_Item]
1656 false_items_text_loc = begin_text_loc
1657
1658 # `!else`?
1659 self._skip_ws_and_comments_and_syms()
1660
1661 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1662 # Parse "false" items
1663 self._skip_ws_and_comments_and_syms()
1664 false_items_text_loc = self._text_loc
1665 false_items = self._parse_items()
1666
1667 # Expect end of block
1668 self._expect_pat(
1669 self._block_end_pat,
1670 "Expecting an item, `!else`, or `!end` (end of conditional block)",
1671 )
1672
1673 # Return item
1674 return _Cond(
1675 _Group(true_items, true_items_text_loc),
1676 _Group(false_items, false_items_text_loc),
1677 expr_str,
1678 expr,
1679 begin_text_loc,
1680 )
1681
1682 # Pattern for _try_parse_trans_block()
1683 _trans_block_prefix_pat = re.compile(r"!t(?:ransform)?\b")
1684 _trans_block_type_pat = re.compile(
1685 r"(?:(?:base|b)64(?:u)?|(?:base|b)(?:16|32)|(?:ascii|a|base|b)85(?:p)?|(?:quopri|qp)(?:t)?|gzip|gz|bzip2|bz2)\b"
1686 )
1687
1688 # Tries to parse a transformation block, returning a transformation
1689 # block item on success.
1690 def _try_parse_trans_block(self):
1691 begin_text_loc = self._text_loc
1692
1693 # Match prefix
1694 if self._try_parse_pat(self._trans_block_prefix_pat) is None:
1695 # No match
1696 return
1697
1698 # Expect type
1699 self._skip_ws_and_comments()
1700 m = self._expect_pat(
1701 self._trans_block_type_pat, "Expecting a known transformation type"
1702 )
1703
1704 # Parse items
1705 self._skip_ws_and_comments_and_syms()
1706 items = self._parse_items()
1707
1708 # Expect end of block
1709 self._expect_pat(
1710 self._block_end_pat,
1711 "Expecting an item or `!end` (end of transformation block)",
1712 )
1713
1714 # Choose encoding function
1715 enc = m.group(0)
1716
1717 if enc in ("base64", "b64"):
1718 func = base64.standard_b64encode
1719 name = "standard Base64"
1720 elif enc in ("base64u", "b64u"):
1721 func = base64.urlsafe_b64encode
1722 name = "URL-safe Base64"
1723 elif enc in ("base32", "b32"):
1724 func = base64.b32encode
1725 name = "Base32"
1726 elif enc in ("base16", "b16"):
1727 func = base64.b16encode
1728 name = "Base16"
1729 elif enc in ("ascii85", "a85"):
1730 func = base64.a85encode
1731 name = "Ascii85"
1732 elif enc in ("ascii85p", "a85p"):
1733 func = functools.partial(base64.a85encode, pad=True)
1734 name = "padded Ascii85"
1735 elif enc in ("base85", "b85"):
1736 func = base64.b85encode
1737 name = "Base85"
1738 elif enc in ("base85p", "b85p"):
1739 func = functools.partial(base64.b85encode, pad=True)
1740 name = "padded Base85"
1741 elif enc in ("quopri", "qp"):
1742 func = quopri.encodestring
1743 name = "MIME quoted-printable"
1744 elif enc in ("quoprit", "qpt"):
1745 func = functools.partial(quopri.encodestring, quotetabs=True)
1746 name = "MIME quoted-printable (with quoted tabs)"
1747 elif enc in ("gzip", "gz"):
1748 func = gzip.compress
1749 name = "gzip"
1750 else:
1751 assert enc in ("bzip2", "bz2")
1752 func = bz2.compress
1753 name = "bzip2"
1754
1755 # Return item
1756 return _Trans(
1757 items,
1758 name,
1759 func,
1760 begin_text_loc,
1761 )
1762
1763 # Common left parenthesis pattern
1764 _left_paren_pat = re.compile(r"\(")
1765
1766 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1767 _macro_params_comma_pat = re.compile(",")
1768
1769 # Patterns for _try_parse_macro_def()
1770 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1771
1772 # Tries to parse a macro definition, adding it to `self._macro_defs`
1773 # and returning `True` on success.
1774 def _try_parse_macro_def(self):
1775 begin_text_loc = self._text_loc
1776
1777 # Match prefix
1778 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1779 # No match
1780 return False
1781
1782 # Expect a name
1783 self._skip_ws_and_comments()
1784 name_text_loc = self._text_loc
1785 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1786
1787 # Validate name
1788 name = m.group(0)
1789
1790 if name in self._macro_defs:
1791 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1792
1793 # Expect `(`
1794 self._skip_ws_and_comments()
1795 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1796
1797 # Try to parse comma-separated parameter names
1798 param_names = [] # type: List[str]
1799 expect_comma = False
1800
1801 while True:
1802 self._skip_ws_and_comments()
1803
1804 # End?
1805 if self._try_parse_pat(self._right_paren_pat) is not None:
1806 # End
1807 break
1808
1809 # Comma?
1810 if expect_comma:
1811 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1812
1813 # Expect parameter name
1814 self._skip_ws_and_comments()
1815 param_text_loc = self._text_loc
1816 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1817
1818 if m.group(0) in param_names:
1819 _raise_error(
1820 "Duplicate macro parameter named `{}`".format(m.group(0)),
1821 param_text_loc,
1822 )
1823
1824 param_names.append(m.group(0))
1825 expect_comma = True
1826
1827 # Expect items
1828 self._skip_ws_and_comments_and_syms()
1829 old_var_names = self._var_names.copy()
1830 old_label_names = self._label_names.copy()
1831 self._var_names = set() # type: Set[str]
1832 self._label_names = set() # type: Set[str]
1833 items = self._parse_items()
1834 self._var_names = old_var_names
1835 self._label_names = old_label_names
1836
1837 # Expect suffix
1838 self._expect_pat(
1839 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1840 )
1841
1842 # Register macro
1843 self._macro_defs[name] = _MacroDef(name, param_names, items, begin_text_loc)
1844
1845 return True
1846
1847 # Patterns for _try_parse_macro_exp()
1848 _macro_exp_prefix_pat = re.compile(r"m\b")
1849 _macro_exp_colon_pat = re.compile(r":")
1850
1851 # Tries to parse a macro expansion, returning a macro expansion item
1852 # on success.
1853 def _try_parse_macro_exp(self):
1854 begin_text_loc = self._text_loc
1855
1856 # Match prefix
1857 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1858 # No match
1859 return
1860
1861 # Expect `:`
1862 self._skip_ws_and_comments()
1863 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1864
1865 # Expect a macro name
1866 self._skip_ws_and_comments()
1867 name_text_loc = self._text_loc
1868 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1869
1870 # Validate name
1871 name = m.group(0)
1872 macro_def = self._macro_defs.get(name)
1873
1874 if macro_def is None:
1875 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1876
1877 # Expect `(`
1878 self._skip_ws_and_comments()
1879 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1880
1881 # Try to parse comma-separated parameter values
1882 params_text_loc = self._text_loc
1883 params = [] # type: List[_MacroExpParam]
1884 expect_comma = False
1885
1886 while True:
1887 self._skip_ws_and_comments()
1888
1889 # End?
1890 if self._try_parse_pat(self._right_paren_pat) is not None:
1891 # End
1892 break
1893
1894 # Expect a value
1895 if expect_comma:
1896 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1897
1898 self._skip_ws_and_comments()
1899 param_text_loc = self._text_loc
1900 params.append(
1901 _MacroExpParam(
1902 *self._expect_expr(
1903 accept_const_int=True,
1904 allow_neg_int=True,
1905 accept_const_float=True,
1906 accept_lit_str=True,
1907 ),
1908 text_loc=param_text_loc
1909 )
1910 )
1911 expect_comma = True
1912
1913 # Validate parameter values
1914 if len(params) != len(macro_def.param_names):
1915 sing_plur = "" if len(params) == 1 else "s"
1916 _raise_error(
1917 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1918 len(params), sing_plur, len(macro_def.param_names)
1919 ),
1920 params_text_loc,
1921 )
1922
1923 # Return item
1924 return _MacroExp(name, params, begin_text_loc)
1925
1926 # Tries to parse a base item (anything except a post-item
1927 # repetition), returning it on success.
1928 def _try_parse_base_item(self):
1929 for func in self._base_item_parse_funcs:
1930 item = func()
1931
1932 if item is not None:
1933 return item
1934
1935 # Pattern for _try_parse_rep_post()
1936 _rep_post_prefix_pat = re.compile(r"\*")
1937
1938 # Tries to parse a post-item repetition, returning the expression
1939 # string and AST expression node on success.
1940 def _try_parse_rep_post(self):
1941 # Match prefix
1942 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
1943 # No match
1944 return
1945
1946 # Return expression string and AST expression
1947 self._skip_ws_and_comments()
1948 return self._expect_rep_mul_expr()
1949
1950 # Tries to parse an item, possibly followed by a repetition,
1951 # returning `True` on success.
1952 #
1953 # Appends any parsed item to `items`.
1954 def _try_append_item(self, items: List[_Item]):
1955 self._skip_ws_and_comments_and_syms()
1956
1957 # Base item
1958 item = self._try_parse_base_item()
1959
1960 if item is None:
1961 return
1962
1963 # Parse repetition if the base item is repeatable
1964 if isinstance(item, _RepableItem):
1965 self._skip_ws_and_comments()
1966 rep_text_loc = self._text_loc
1967 rep_ret = self._try_parse_rep_post()
1968
1969 if rep_ret is not None:
1970 item = _Rep([item], *rep_ret, text_loc=rep_text_loc)
1971
1972 items.append(item)
1973 return True
1974
1975 # Parses and returns items, skipping whitespaces, insignificant
1976 # symbols, and comments when allowed, and stopping at the first
1977 # unknown character.
1978 #
1979 # Accepts and registers macro definitions if `accept_macro_defs`
1980 # is `True`.
1981 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
1982 items = [] # type: List[_Item]
1983
1984 while self._isnt_done():
1985 # Try to append item
1986 if not self._try_append_item(items):
1987 if accept_macro_defs and self._try_parse_macro_def():
1988 continue
1989
1990 # Unknown at this point
1991 break
1992
1993 return items
1994
1995 # Parses the whole Normand input, setting `self._res` to the main
1996 # group item on success.
1997 def _parse(self):
1998 if len(self._normand.strip()) == 0:
1999 # Special case to make sure there's something to consume
2000 self._res = _Group([], self._text_loc)
2001 return
2002
2003 # Parse first level items
2004 items = self._parse_items(True)
2005
2006 # Make sure there's nothing left
2007 self._skip_ws_and_comments_and_syms()
2008
2009 if self._isnt_done():
2010 self._raise_error(
2011 "Unexpected character `{}`".format(self._normand[self._at])
2012 )
2013
2014 # Set main group item
2015 self._res = _Group(items, self._text_loc)
2016
2017
2018 # The return type of parse().
2019 class ParseResult:
2020 @classmethod
2021 def _create(
2022 cls,
2023 data: bytearray,
2024 variables: VariablesT,
2025 labels: LabelsT,
2026 offset: int,
2027 bo: Optional[ByteOrder],
2028 ):
2029 self = cls.__new__(cls)
2030 self._init(data, variables, labels, offset, bo)
2031 return self
2032
2033 def __init__(self, *args, **kwargs): # type: ignore
2034 raise NotImplementedError
2035
2036 def _init(
2037 self,
2038 data: bytearray,
2039 variables: VariablesT,
2040 labels: LabelsT,
2041 offset: int,
2042 bo: Optional[ByteOrder],
2043 ):
2044 self._data = data
2045 self._vars = variables
2046 self._labels = labels
2047 self._offset = offset
2048 self._bo = bo
2049
2050 # Generated data.
2051 @property
2052 def data(self):
2053 return self._data
2054
2055 # Dictionary of updated variable names to their last computed value.
2056 @property
2057 def variables(self):
2058 return self._vars
2059
2060 # Dictionary of updated main group label names to their computed
2061 # value.
2062 @property
2063 def labels(self):
2064 return self._labels
2065
2066 # Updated offset.
2067 @property
2068 def offset(self):
2069 return self._offset
2070
2071 # Updated byte order.
2072 @property
2073 def byte_order(self):
2074 return self._bo
2075
2076
2077 # Raises a parse error for the item `item`, creating it using the
2078 # message `msg`.
2079 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
2080 _raise_error(msg, item.text_loc)
2081
2082
2083 # The `ICITTE` reserved name.
2084 _icitte_name = "ICITTE"
2085
2086
2087 # Base node visitor.
2088 #
2089 # Calls the _visit_name() method for each name node which isn't the name
2090 # of a call.
2091 class _NodeVisitor(ast.NodeVisitor):
2092 def __init__(self):
2093 self._parent_is_call = False
2094
2095 def generic_visit(self, node: ast.AST):
2096 if type(node) is ast.Call:
2097 self._parent_is_call = True
2098 elif type(node) is ast.Name and not self._parent_is_call:
2099 self._visit_name(node.id)
2100
2101 super().generic_visit(node)
2102 self._parent_is_call = False
2103
2104 @abc.abstractmethod
2105 def _visit_name(self, name: str):
2106 ...
2107
2108
2109 # Expression validator: validates that all the names within the
2110 # expression are allowed.
2111 class _ExprValidator(_NodeVisitor):
2112 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2113 super().__init__()
2114 self._expr_str = expr_str
2115 self._text_loc = text_loc
2116 self._allowed_names = allowed_names
2117
2118 def _visit_name(self, name: str):
2119 # Make sure the name refers to a known and reachable
2120 # variable/label name.
2121 if name != _icitte_name and name not in self._allowed_names:
2122 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
2123 name, self._expr_str
2124 )
2125
2126 allowed_names = self._allowed_names.copy()
2127 allowed_names.add(_icitte_name)
2128
2129 if len(allowed_names) > 0:
2130 allowed_names_str = ", ".join(
2131 sorted(["`{}`".format(name) for name in allowed_names])
2132 )
2133 msg += "; the legal names are {{{}}}".format(allowed_names_str)
2134
2135 _raise_error(
2136 msg,
2137 self._text_loc,
2138 )
2139
2140
2141 # Generator state.
2142 class _GenState:
2143 def __init__(
2144 self,
2145 variables: VariablesT,
2146 labels: LabelsT,
2147 offset: int,
2148 bo: Optional[ByteOrder],
2149 ):
2150 self.variables = variables.copy()
2151 self.labels = labels.copy()
2152 self.offset = offset
2153 self.bo = bo
2154
2155 def __repr__(self):
2156 return "_GenState({}, {}, {}, {})".format(
2157 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
2158 )
2159
2160
2161 # Fixed-length number item instance.
2162 class _FlNumItemInst:
2163 def __init__(
2164 self,
2165 item: _FlNum,
2166 offset_in_data: int,
2167 state: _GenState,
2168 parse_error_msgs: List[ParseErrorMessage],
2169 ):
2170 self._item = item
2171 self._offset_in_data = offset_in_data
2172 self._state = state
2173 self._parse_error_msgs = parse_error_msgs
2174
2175 @property
2176 def item(self):
2177 return self._item
2178
2179 @property
2180 def offset_in_data(self):
2181 return self._offset_in_data
2182
2183 @property
2184 def state(self):
2185 return self._state
2186
2187 @property
2188 def parse_error_msgs(self):
2189 return self._parse_error_msgs
2190
2191
2192 # Generator of data and final state from a group item.
2193 #
2194 # Generation happens in memory at construction time. After building, use
2195 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
2196 # get the resulting context.
2197 #
2198 # The steps of generation are:
2199 #
2200 # 1. Handle each item in prefix order.
2201 #
2202 # The handlers append bytes to `self._data` and update some current
2203 # state object (`_GenState` instance).
2204 #
2205 # When handling a fixed-length number item, try to evaluate its
2206 # expression using the current state. If this fails, then it might be
2207 # because the expression refers to a "future" label: save the current
2208 # offset in `self._data` (generated data) and a snapshot of the
2209 # current state within `self._fl_num_item_insts` (`_FlNumItemInst`
2210 # object). _gen_fl_num_item_insts() will deal with this later. A
2211 # `_FlNumItemInst` instance also contains a snapshot of the current
2212 # parsing error messages (`self._parse_error_msgs`) which need to be
2213 # taken into account when handling the instance later.
2214 #
2215 # When handling the items of a group, keep a map of immediate label
2216 # names to their offset. Then, after having processed all the items,
2217 # update the relevant saved state snapshots in
2218 # `self._fl_num_item_insts` with those immediate label values.
2219 # _gen_fl_num_item_insts() will deal with this later.
2220 #
2221 # 2. Handle all the fixed-length number item instances of which the
2222 # expression evaluation failed before.
2223 #
2224 # At this point, `self._fl_num_item_insts` contains everything that's
2225 # needed to evaluate the expressions, including the values of
2226 # "future" labels from the point of view of some fixed-length number
2227 # item instance.
2228 #
2229 # If an evaluation fails at this point, then it's a user error. Add
2230 # to the parsing error all the saved parsing error messages of the
2231 # instance. Those additional messages add precious context to the
2232 # error.
2233 class _Gen:
2234 def __init__(
2235 self,
2236 group: _Group,
2237 macro_defs: _MacroDefsT,
2238 variables: VariablesT,
2239 labels: LabelsT,
2240 offset: int,
2241 bo: Optional[ByteOrder],
2242 ):
2243 self._macro_defs = macro_defs
2244 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
2245 self._parse_error_msgs = [] # type: List[ParseErrorMessage]
2246 self._in_trans = False
2247 self._gen(group, _GenState(variables, labels, offset, bo))
2248
2249 # Generated bytes.
2250 @property
2251 def data(self):
2252 return self._data
2253
2254 # Updated variables.
2255 @property
2256 def variables(self):
2257 return self._final_state.variables
2258
2259 # Updated main group labels.
2260 @property
2261 def labels(self):
2262 return self._final_state.labels
2263
2264 # Updated offset.
2265 @property
2266 def offset(self):
2267 return self._final_state.offset
2268
2269 # Updated byte order.
2270 @property
2271 def bo(self):
2272 return self._final_state.bo
2273
2274 # Evaluates the expression `expr` of which the original string is
2275 # `expr_str` at the location `text_loc` considering the current
2276 # generation state `state`.
2277 #
2278 # If `accept_float` is `True`, then the type of the result may be
2279 # `float` too.
2280 #
2281 # If `accept_str` is `True`, then the type of the result may be
2282 # `str` too.
2283 @staticmethod
2284 def _eval_expr(
2285 expr_str: str,
2286 expr: ast.Expression,
2287 text_loc: TextLocation,
2288 state: _GenState,
2289 accept_float: bool = False,
2290 accept_str: bool = False,
2291 ):
2292 syms = {} # type: VariablesT
2293 syms.update(state.labels)
2294
2295 # Set the `ICITTE` name to the current offset
2296 syms[_icitte_name] = state.offset
2297
2298 # Add the current variables
2299 syms.update(state.variables)
2300
2301 # Validate the node and its children
2302 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
2303
2304 # Compile and evaluate expression node
2305 try:
2306 val = eval(compile(expr, "", "eval"), None, syms)
2307 except Exception as exc:
2308 _raise_error(
2309 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
2310 text_loc,
2311 )
2312
2313 # Convert `bool` result type to `int` to normalize
2314 if type(val) is bool:
2315 val = int(val)
2316
2317 # Validate result type
2318 expected_types = {int} # type: Set[type]
2319
2320 if accept_float:
2321 expected_types.add(float)
2322
2323 if accept_str:
2324 expected_types.add(str)
2325
2326 if type(val) not in expected_types:
2327 expected_types_str = sorted(
2328 ["`{}`".format(t.__name__) for t in expected_types]
2329 )
2330
2331 if len(expected_types_str) == 1:
2332 msg_expected = expected_types_str[0]
2333 elif len(expected_types_str) == 2:
2334 msg_expected = " or ".join(expected_types_str)
2335 else:
2336 expected_types_str[-1] = "or {}".format(expected_types_str[-1])
2337 msg_expected = ", ".join(expected_types_str)
2338
2339 _raise_error(
2340 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
2341 expr_str, msg_expected, type(val).__name__
2342 ),
2343 text_loc,
2344 )
2345
2346 return val
2347
2348 # Forwards to _eval_expr() with the expression and text location of
2349 # `item`.
2350 @staticmethod
2351 def _eval_item_expr(
2352 item: Union[_Cond, _FillUntil, _FlNum, _Leb128Int, _Rep, _Str, _VarAssign],
2353 state: _GenState,
2354 accept_float: bool = False,
2355 accept_str: bool = False,
2356 ):
2357 return _Gen._eval_expr(
2358 item.expr_str, item.expr, item.text_loc, state, accept_float, accept_str
2359 )
2360
2361 # Handles the byte item `item`.
2362 def _handle_byte_item(self, item: _Byte, state: _GenState):
2363 self._data.append(item.val)
2364 state.offset += item.size
2365
2366 # Handles the literal string item `item`.
2367 def _handle_lit_str_item(self, item: _LitStr, state: _GenState):
2368 self._data += item.data
2369 state.offset += item.size
2370
2371 # Handles the byte order setting item `item`.
2372 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
2373 # Update current byte order
2374 state.bo = item.bo
2375
2376 # Handles the variable assignment item `item`.
2377 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
2378 # Update variable
2379 state.variables[item.name] = self._eval_item_expr(
2380 item, state, accept_float=True, accept_str=True
2381 )
2382
2383 # Returns the effective byte order to use to encode the fixed-length
2384 # number `item` considering the current state `state`.
2385 @staticmethod
2386 def _fl_num_item_effective_bo(item: _FlNum, state: _GenState):
2387 return state.bo if item.bo is None else item.bo
2388
2389 # Handles the fixed-length number item `item`.
2390 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
2391 # Effective byte order
2392 bo = self._fl_num_item_effective_bo(item, state)
2393
2394 # Validate current byte order
2395 if bo is None and item.len > 8:
2396 _raise_error_for_item(
2397 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2398 item.expr_str
2399 ),
2400 item,
2401 )
2402
2403 # Try an immediate evaluation. If it fails, then keep everything
2404 # needed to (try to) generate the bytes of this item later.
2405 try:
2406 data = self._gen_fl_num_item_inst_data(item, state)
2407 except Exception:
2408 if self._in_trans:
2409 _raise_error_for_item(
2410 "Invalid expression `{}`: failed to evaluate within a transformation block".format(
2411 item.expr_str
2412 ),
2413 item,
2414 )
2415
2416 self._fl_num_item_insts.append(
2417 _FlNumItemInst(
2418 item,
2419 len(self._data),
2420 copy.deepcopy(state),
2421 copy.deepcopy(self._parse_error_msgs),
2422 )
2423 )
2424
2425 # Reserve space in `self._data` for this instance
2426 data = bytes([0] * (item.len // 8))
2427
2428 # Append bytes
2429 self._data += data
2430
2431 # Update offset
2432 state.offset += len(data)
2433
2434 # Returns the size, in bytes, required to encode the value `val`
2435 # with LEB128 (signed version if `is_signed` is `True`).
2436 @staticmethod
2437 def _leb128_size_for_val(val: int, is_signed: bool):
2438 if val < 0:
2439 # Equivalent upper bound.
2440 #
2441 # For example, if `val` is -128, then the full integer for
2442 # this number of bits would be [-128, 127].
2443 val = -val - 1
2444
2445 # Number of bits (add one for the sign if needed)
2446 bits = val.bit_length() + int(is_signed)
2447
2448 if bits == 0:
2449 bits = 1
2450
2451 # Seven bits per byte
2452 return math.ceil(bits / 7)
2453
2454 # Handles the LEB128 integer item `item`.
2455 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2456 # Compute value
2457 val = self._eval_item_expr(item, state)
2458
2459 # Size in bytes
2460 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
2461
2462 # For each byte
2463 for _ in range(size):
2464 # Seven LSBs, MSB of the byte set (continue)
2465 self._data.append((val & 0x7F) | 0x80)
2466 val >>= 7
2467
2468 # Clear MSB of last byte (stop)
2469 self._data[-1] &= ~0x80
2470
2471 # Update offset
2472 state.offset += size
2473
2474 # Handles the string item `item`.
2475 def _handle_str_item(self, item: _Str, state: _GenState):
2476 # Compute value
2477 val = str(self._eval_item_expr(item, state, accept_float=True, accept_str=True))
2478
2479 # Encode
2480 data = _encode_str(val, item.codec, item.text_loc)
2481
2482 # Add to data
2483 self._data += data
2484
2485 # Update offset
2486 state.offset += len(data)
2487
2488 # Handles the group item `item`, removing the immediate labels from
2489 # `state` at the end if `remove_immediate_labels` is `True`.
2490 def _handle_group_item(
2491 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2492 ):
2493 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2494 immediate_labels = {} # type: LabelsT
2495
2496 # Handle each item
2497 for subitem in item.items:
2498 if type(subitem) is _Label:
2499 # Add to local immediate labels
2500 immediate_labels[subitem.name] = state.offset
2501
2502 self._handle_item(subitem, state)
2503
2504 # Remove immediate labels from current state if needed
2505 if remove_immediate_labels:
2506 for name in immediate_labels:
2507 del state.labels[name]
2508
2509 # Add all immediate labels to all state snapshots since
2510 # `first_fl_num_item_inst_index`.
2511 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2512 inst.state.labels.update(immediate_labels)
2513
2514 # Handles the repetition item `item`.
2515 def _handle_rep_item(self, item: _Rep, state: _GenState):
2516 # Compute the repetition count
2517 mul = _Gen._eval_item_expr(item, state)
2518
2519 # Validate result
2520 if mul < 0:
2521 _raise_error_for_item(
2522 "Invalid expression `{}`: unexpected negative result {:,}".format(
2523 item.expr_str, mul
2524 ),
2525 item,
2526 )
2527
2528 # Generate group data `mul` times
2529 for _ in range(mul):
2530 self._handle_group_item(item, state)
2531
2532 # Handles the conditional item `item`.
2533 def _handle_cond_item(self, item: _Cond, state: _GenState):
2534 # Compute the conditional value
2535 val = _Gen._eval_item_expr(item, state)
2536
2537 # Generate selected group data
2538 if val:
2539 self._handle_group_item(item.true_item, state)
2540 else:
2541 self._handle_group_item(item.false_item, state)
2542
2543 # Handles the transformation item `item`.
2544 def _handle_trans_item(self, item: _Trans, state: _GenState):
2545 init_in_trans = self._in_trans
2546 self._in_trans = True
2547 init_data_len = len(self._data)
2548 init_offset = state.offset
2549
2550 # Generate group data
2551 self._handle_group_item(item, state)
2552
2553 # Remove and keep group data
2554 to_trans = self._data[init_data_len:]
2555 del self._data[init_data_len:]
2556
2557 # Encode group data and append to current data
2558 try:
2559 transformed = item.trans(to_trans)
2560 except Exception as exc:
2561 _raise_error_for_item(
2562 "Cannot apply the {} transformation to this data: {}".format(
2563 item.name, exc
2564 ),
2565 item,
2566 )
2567
2568 self._data += transformed
2569
2570 # Update offset and restore
2571 state.offset = init_offset + len(transformed)
2572 self._in_trans = init_in_trans
2573
2574 # Evaluates the parameters of the macro expansion item `item`
2575 # considering the initial state `init_state` and returns a new state
2576 # to handle the items of the macro.
2577 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2578 # New state
2579 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2580
2581 # Evaluate the parameter expressions
2582 macro_def = self._macro_defs[item.name]
2583
2584 for param_name, param in zip(macro_def.param_names, item.params):
2585 exp_state.variables[param_name] = _Gen._eval_expr(
2586 param.expr_str,
2587 param.expr,
2588 param.text_loc,
2589 init_state,
2590 accept_float=True,
2591 accept_str=True,
2592 )
2593
2594 return exp_state
2595
2596 # Handles the macro expansion item `item`.
2597 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
2598 parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
2599
2600 try:
2601 # New state
2602 exp_state = self._eval_macro_exp_params(item, state)
2603
2604 # Process the contained group
2605 init_data_size = len(self._data)
2606 parse_error_msg = (
2607 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
2608 parse_error_msg_text, item.text_loc
2609 )
2610 )
2611 self._parse_error_msgs.append(parse_error_msg)
2612 self._handle_group_item(self._macro_defs[item.name], exp_state)
2613 self._parse_error_msgs.pop()
2614 except ParseError as exc:
2615 _augment_error(exc, parse_error_msg_text, item.text_loc)
2616
2617 # Update state offset and return
2618 state.offset += len(self._data) - init_data_size
2619
2620 # Handles the offset setting item `item`.
2621 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
2622 state.offset = item.val
2623
2624 # Handles the offset alignment item `item` (adds padding).
2625 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2626 init_offset = state.offset
2627 align_bytes = item.val // 8
2628 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2629 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2630
2631 # Handles the filling item `item` (adds padding).
2632 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2633 # Compute the new offset
2634 new_offset = _Gen._eval_item_expr(item, state)
2635
2636 # Validate the new offset
2637 if new_offset < state.offset:
2638 _raise_error_for_item(
2639 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2640 item.expr_str, new_offset, state.offset
2641 ),
2642 item,
2643 )
2644
2645 # Fill
2646 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2647
2648 # Update offset
2649 state.offset = new_offset
2650
2651 # Handles the label item `item`.
2652 def _handle_label_item(self, item: _Label, state: _GenState):
2653 state.labels[item.name] = state.offset
2654
2655 # Handles the item `item`, returning the updated next repetition
2656 # instance.
2657 def _handle_item(self, item: _Item, state: _GenState):
2658 return self._item_handlers[type(item)](item, state)
2659
2660 # Generates the data for a fixed-length integer item instance having
2661 # the value `val` and the effective byte order `bo` and returns it.
2662 def _gen_fl_int_item_inst_data(
2663 self, val: int, bo: Optional[ByteOrder], item: _FlNum
2664 ):
2665 # Validate range
2666 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2667 _raise_error_for_item(
2668 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2669 val, item.len, item.expr_str
2670 ),
2671 item,
2672 )
2673
2674 # Encode result on 64 bits (to extend the sign bit whatever the
2675 # value of `item.len`).
2676 data = struct.pack(
2677 "{}{}".format(
2678 ">" if bo in (None, ByteOrder.BE) else "<",
2679 "Q" if val >= 0 else "q",
2680 ),
2681 val,
2682 )
2683
2684 # Keep only the requested length
2685 len_bytes = item.len // 8
2686
2687 if bo in (None, ByteOrder.BE):
2688 # Big endian: keep last bytes
2689 data = data[-len_bytes:]
2690 else:
2691 # Little endian: keep first bytes
2692 assert bo == ByteOrder.LE
2693 data = data[:len_bytes]
2694
2695 # Return data
2696 return data
2697
2698 # Generates the data for a fixed-length floating point number item
2699 # instance having the value `val` and the effective byte order `bo`
2700 # and returns it.
2701 def _gen_fl_float_item_inst_data(
2702 self, val: float, bo: Optional[ByteOrder], item: _FlNum
2703 ):
2704 # Validate length
2705 if item.len not in (32, 64):
2706 _raise_error_for_item(
2707 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2708 item.len, val
2709 ),
2710 item,
2711 )
2712
2713 # Encode and return result
2714 return struct.pack(
2715 "{}{}".format(
2716 ">" if bo in (None, ByteOrder.BE) else "<",
2717 "f" if item.len == 32 else "d",
2718 ),
2719 val,
2720 )
2721
2722 # Generates the data for a fixed-length number item instance and
2723 # returns it.
2724 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
2725 # Effective byte order
2726 bo = self._fl_num_item_effective_bo(item, state)
2727
2728 # Compute value
2729 val = self._eval_item_expr(item, state, True)
2730
2731 # Handle depending on type
2732 if type(val) is int:
2733 return self._gen_fl_int_item_inst_data(val, bo, item)
2734 else:
2735 assert type(val) is float
2736 return self._gen_fl_float_item_inst_data(val, bo, item)
2737
2738 # Generates the data for all the fixed-length number item instances
2739 # and writes it at the correct offset within `self._data`.
2740 def _gen_fl_num_item_insts(self):
2741 for inst in self._fl_num_item_insts:
2742 # Generate bytes
2743 try:
2744 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2745 except ParseError as exc:
2746 # Add all the saved parse error messages for this
2747 # instance.
2748 for msg in reversed(inst.parse_error_msgs):
2749 _add_error_msg(exc, msg.text, msg.text_location)
2750
2751 raise
2752
2753 # Insert bytes into `self._data`
2754 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2755
2756 # Generates the data (`self._data`) and final state
2757 # (`self._final_state`) from `group` and the initial state `state`.
2758 def _gen(self, group: _Group, state: _GenState):
2759 # Initial state
2760 self._data = bytearray()
2761
2762 # Item handlers
2763 self._item_handlers = {
2764 _AlignOffset: self._handle_align_offset_item,
2765 _Byte: self._handle_byte_item,
2766 _Cond: self._handle_cond_item,
2767 _FillUntil: self._handle_fill_until_item,
2768 _FlNum: self._handle_fl_num_item,
2769 _Group: self._handle_group_item,
2770 _Label: self._handle_label_item,
2771 _LitStr: self._handle_lit_str_item,
2772 _MacroExp: self._handle_macro_exp_item,
2773 _Rep: self._handle_rep_item,
2774 _SetBo: self._handle_set_bo_item,
2775 _SetOffset: self._handle_set_offset_item,
2776 _SLeb128Int: self._handle_leb128_int_item,
2777 _Str: self._handle_str_item,
2778 _Trans: self._handle_trans_item,
2779 _ULeb128Int: self._handle_leb128_int_item,
2780 _VarAssign: self._handle_var_assign_item,
2781 } # type: Dict[type, Callable[[Any, _GenState], None]]
2782
2783 # Handle the group item, _not_ removing the immediate labels
2784 # because the `labels` property offers them.
2785 self._handle_group_item(group, state, False)
2786
2787 # This is actually the final state
2788 self._final_state = state
2789
2790 # Generate all the fixed-length number bytes now that we know
2791 # their full state
2792 self._gen_fl_num_item_insts()
2793
2794
2795 # Returns a `ParseResult` instance containing the bytes encoded by the
2796 # input string `normand`.
2797 #
2798 # `init_variables` is a dictionary of initial variable names (valid
2799 # Python names) to integral values. A variable name must not be the
2800 # reserved name `ICITTE`.
2801 #
2802 # `init_labels` is a dictionary of initial label names (valid Python
2803 # names) to integral values. A label name must not be the reserved name
2804 # `ICITTE`.
2805 #
2806 # `init_offset` is the initial offset.
2807 #
2808 # `init_byte_order` is the initial byte order.
2809 #
2810 # Raises `ParseError` on any parsing error.
2811 def parse(
2812 normand: str,
2813 init_variables: Optional[VariablesT] = None,
2814 init_labels: Optional[LabelsT] = None,
2815 init_offset: int = 0,
2816 init_byte_order: Optional[ByteOrder] = None,
2817 ):
2818 if init_variables is None:
2819 init_variables = {}
2820
2821 if init_labels is None:
2822 init_labels = {}
2823
2824 parser = _Parser(normand, init_variables, init_labels)
2825 gen = _Gen(
2826 parser.res,
2827 parser.macro_defs,
2828 init_variables,
2829 init_labels,
2830 init_offset,
2831 init_byte_order,
2832 )
2833 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2834 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2835 )
2836
2837
2838 # Raises a command-line error with the message `msg`.
2839 def _raise_cli_error(msg: str) -> NoReturn:
2840 raise RuntimeError("Command-line error: {}".format(msg))
2841
2842
2843 # Returns the `int` or `float` value out of a CLI assignment value.
2844 def _val_from_assign_val_str(s: str, is_label: bool):
2845 s = s.strip()
2846
2847 # Floating point number?
2848 if not is_label:
2849 m = _const_float_pat.fullmatch(s)
2850
2851 if m is not None:
2852 return float(m.group(0))
2853
2854 # Integer?
2855 m = _const_int_pat.fullmatch(s)
2856
2857 if m is not None:
2858 return int(_norm_const_int(m.group(0)), 0)
2859
2860 exp = "an integer" if is_label else "a number"
2861 _raise_cli_error("Invalid assignment value `{}`: expecting {}".format(s, exp))
2862
2863
2864 # Returns a dictionary of string to numbers from the list of strings
2865 # `args` containing `NAME=VAL` entries.
2866 def _dict_from_arg(args: Optional[List[str]], is_label: bool, is_str_only: bool):
2867 d = {} # type: VariablesT
2868
2869 if args is None:
2870 return d
2871
2872 for arg in args:
2873 m = re.match(r"({})\s*=\s*(.*)$".format(_py_name_pat.pattern), arg)
2874
2875 if m is None:
2876 _raise_cli_error("Invalid assignment `{}`".format(arg))
2877
2878 if is_str_only:
2879 val = m.group(2)
2880 else:
2881 val = _val_from_assign_val_str(m.group(2), is_label)
2882
2883 d[m.group(1)] = val
2884
2885 return d
2886
2887
2888 # Parses the command-line arguments and returns, in this order:
2889 #
2890 # 1. The input file path, or `None` if none.
2891 # 2. The Normand input text.
2892 # 3. The initial offset.
2893 # 4. The initial byte order.
2894 # 5. The initial variables.
2895 # 6. The initial labels.
2896 def _parse_cli_args():
2897 import argparse
2898
2899 # Build parser
2900 ap = argparse.ArgumentParser()
2901 ap.add_argument(
2902 "--offset",
2903 metavar="OFFSET",
2904 action="store",
2905 type=int,
2906 default=0,
2907 help="initial offset (positive)",
2908 )
2909 ap.add_argument(
2910 "-b",
2911 "--byte-order",
2912 metavar="BO",
2913 choices=["be", "le"],
2914 type=str,
2915 help="initial byte order (`be` or `le`)",
2916 )
2917 ap.add_argument(
2918 "-v",
2919 "--var",
2920 metavar="NAME=VAL",
2921 action="append",
2922 help="add an initial numeric variable (may be repeated)",
2923 )
2924 ap.add_argument(
2925 "-s",
2926 "--var-str",
2927 metavar="NAME=VAL",
2928 action="append",
2929 help="add an initial string variable (may be repeated)",
2930 )
2931 ap.add_argument(
2932 "-l",
2933 "--label",
2934 metavar="NAME=VAL",
2935 action="append",
2936 help="add an initial label (may be repeated)",
2937 )
2938 ap.add_argument(
2939 "--version", action="version", version="Normand {}".format(__version__)
2940 )
2941 ap.add_argument(
2942 "path",
2943 metavar="PATH",
2944 action="store",
2945 nargs="?",
2946 help="input path (none means standard input)",
2947 )
2948
2949 # Parse
2950 args = ap.parse_args()
2951
2952 # Read input
2953 if args.path is None:
2954 normand = sys.stdin.read()
2955 else:
2956 with open(args.path) as f:
2957 normand = f.read()
2958
2959 # Variables and labels
2960 variables = _dict_from_arg(args.var, False, False)
2961 variables.update(_dict_from_arg(args.var_str, False, True))
2962 labels = _dict_from_arg(args.label, True, False)
2963
2964 # Validate offset
2965 if args.offset < 0:
2966 _raise_cli_error("Invalid negative offset {}")
2967
2968 # Validate and set byte order
2969 bo = None # type: Optional[ByteOrder]
2970
2971 if args.byte_order is not None:
2972 if args.byte_order == "be":
2973 bo = ByteOrder.BE
2974 else:
2975 assert args.byte_order == "le"
2976 bo = ByteOrder.LE
2977
2978 # Return input and initial state
2979 return args.path, normand, args.offset, bo, variables, typing.cast(LabelsT, labels)
2980
2981
2982 # CLI entry point without exception handling.
2983 def _run_cli_with_args(
2984 normand: str,
2985 offset: int,
2986 bo: Optional[ByteOrder],
2987 variables: VariablesT,
2988 labels: LabelsT,
2989 ):
2990 sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
2991
2992
2993 # Prints the exception message `msg` and exits with status 1.
2994 def _fail(msg: str) -> NoReturn:
2995 if not msg.endswith("."):
2996 msg += "."
2997
2998 print(msg.strip(), file=sys.stderr)
2999 sys.exit(1)
3000
3001
3002 # CLI entry point.
3003 def _run_cli():
3004 try:
3005 args = _parse_cli_args()
3006 except Exception as exc:
3007 _fail(str(exc))
3008
3009 try:
3010 _run_cli_with_args(*args[1:])
3011 except ParseError as exc:
3012 import os.path
3013
3014 prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
3015 fail_msg = ""
3016
3017 for msg in reversed(exc.messages):
3018 fail_msg += "{}{}:{} - {}".format(
3019 prefix,
3020 msg.text_location.line_no,
3021 msg.text_location.col_no,
3022 msg.text,
3023 )
3024
3025 if fail_msg[-1] not in ".:;":
3026 fail_msg += "."
3027
3028 fail_msg += "\n"
3029
3030 _fail(fail_msg.strip())
3031 except Exception as exc:
3032 _fail(str(exc))
3033
3034
3035 if __name__ == "__main__":
3036 _run_cli()
This page took 0.137477 seconds and 4 git commands to generate.