cpp-common/bt2c/fmt.hpp: use `wise_enum::string_type` in `EnableIfIsWiseEnum` definition
[babeltrace.git] / tests / utils / python / normand.py
1 # SPDX-FileCopyrightText: 2023 Philippe Proulx <eeppeliteloop@gmail.com>
2 # SPDX-License-Identifier: MIT
3 #
4 # The MIT License (MIT)
5 #
6 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
7 #
8 # Permission is hereby granted, free of charge, to any person obtaining
9 # a copy of this software and associated documentation files (the
10 # "Software"), to deal in the Software without restriction, including
11 # without limitation the rights to use, copy, modify, merge, publish,
12 # distribute, sublicense, and/or sell copies of the Software, and to
13 # permit persons to whom the Software is furnished to do so, subject to
14 # the following conditions:
15 #
16 # The above copyright notice and this permission notice shall be
17 # included in all copies or substantial portions of the Software.
18 #
19 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 # This module is the portable Normand processor. It offers both the
28 # parse() function and the command-line tool (run the module itself)
29 # without external dependencies except a `typing` module for Python 3.4.
30 #
31 # Feel free to copy this module file to your own project to use Normand.
32 #
33 # Upstream repository: <https://github.com/efficios/normand>.
34
35 __author__ = "Philippe Proulx"
36 __version__ = "0.23.0"
37 __all__ = [
38 "__author__",
39 "__version__",
40 "ByteOrder",
41 "LabelsT",
42 "parse",
43 "ParseError",
44 "ParseErrorMessage",
45 "ParseResult",
46 "TextLocation",
47 "VariablesT",
48 ]
49
50 import re
51 import abc
52 import ast
53 import bz2
54 import sys
55 import copy
56 import enum
57 import gzip
58 import math
59 import base64
60 import quopri
61 import struct
62 import typing
63 import functools
64 from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
65
66
67 # Text location (line and column numbers).
68 class TextLocation:
69 @classmethod
70 def _create(cls, line_no: int, col_no: int):
71 self = cls.__new__(cls)
72 self._init(line_no, col_no)
73 return self
74
75 def __init__(*args, **kwargs): # type: ignore
76 raise NotImplementedError
77
78 def _init(self, line_no: int, col_no: int):
79 self._line_no = line_no
80 self._col_no = col_no
81
82 # Line number.
83 @property
84 def line_no(self):
85 return self._line_no
86
87 # Column number.
88 @property
89 def col_no(self):
90 return self._col_no
91
92 def __repr__(self):
93 return "TextLocation({}, {})".format(self._line_no, self._col_no)
94
95
96 # Any item.
97 class _Item:
98 def __init__(self, text_loc: TextLocation):
99 self._text_loc = text_loc
100
101 # Source text location.
102 @property
103 def text_loc(self):
104 return self._text_loc
105
106
107 # Scalar item.
108 class _ScalarItem(_Item):
109 # Returns the size, in bytes, of this item.
110 @property
111 @abc.abstractmethod
112 def size(self) -> int:
113 ...
114
115
116 # A repeatable item.
117 class _RepableItem:
118 pass
119
120
121 # Single byte.
122 class _Byte(_ScalarItem, _RepableItem):
123 def __init__(self, val: int, text_loc: TextLocation):
124 super().__init__(text_loc)
125 self._val = val
126
127 # Byte value.
128 @property
129 def val(self):
130 return self._val
131
132 @property
133 def size(self):
134 return 1
135
136 def __repr__(self):
137 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
138
139
140 # Literal string.
141 class _LitStr(_ScalarItem, _RepableItem):
142 def __init__(self, data: bytes, text_loc: TextLocation):
143 super().__init__(text_loc)
144 self._data = data
145
146 # Encoded bytes.
147 @property
148 def data(self):
149 return self._data
150
151 @property
152 def size(self):
153 return len(self._data)
154
155 def __repr__(self):
156 return "_LitStr({}, {})".format(repr(self._data), repr(self._text_loc))
157
158
159 # Byte order.
160 @enum.unique
161 class ByteOrder(enum.Enum):
162 # Big endian.
163 BE = "be"
164
165 # Little endian.
166 LE = "le"
167
168
169 # Byte order setting.
170 class _SetBo(_Item):
171 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
172 super().__init__(text_loc)
173 self._bo = bo
174
175 @property
176 def bo(self):
177 return self._bo
178
179 def __repr__(self):
180 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
181
182
183 # Label.
184 class _Label(_Item):
185 def __init__(self, name: str, text_loc: TextLocation):
186 super().__init__(text_loc)
187 self._name = name
188
189 # Label name.
190 @property
191 def name(self):
192 return self._name
193
194 def __repr__(self):
195 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
196
197
198 # Offset setting.
199 class _SetOffset(_Item):
200 def __init__(self, val: int, text_loc: TextLocation):
201 super().__init__(text_loc)
202 self._val = val
203
204 # Offset value (bytes).
205 @property
206 def val(self):
207 return self._val
208
209 def __repr__(self):
210 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
211
212
213 # Offset alignment.
214 class _AlignOffset(_Item):
215 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
216 super().__init__(text_loc)
217 self._val = val
218 self._pad_val = pad_val
219
220 # Alignment value (bits).
221 @property
222 def val(self):
223 return self._val
224
225 # Padding byte value.
226 @property
227 def pad_val(self):
228 return self._pad_val
229
230 def __repr__(self):
231 return "_AlignOffset({}, {}, {})".format(
232 repr(self._val), repr(self._pad_val), repr(self._text_loc)
233 )
234
235
236 # Mixin of containing an AST expression and its string.
237 class _ExprMixin:
238 def __init__(self, expr_str: str, expr: ast.Expression):
239 self._expr_str = expr_str
240 self._expr = expr
241
242 # Expression string.
243 @property
244 def expr_str(self):
245 return self._expr_str
246
247 # Expression node to evaluate.
248 @property
249 def expr(self):
250 return self._expr
251
252
253 # Fill until some offset.
254 class _FillUntil(_Item, _ExprMixin):
255 def __init__(
256 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
257 ):
258 super().__init__(text_loc)
259 _ExprMixin.__init__(self, expr_str, expr)
260 self._pad_val = pad_val
261
262 # Padding byte value.
263 @property
264 def pad_val(self):
265 return self._pad_val
266
267 def __repr__(self):
268 return "_FillUntil({}, {}, {}, {})".format(
269 repr(self._expr_str),
270 repr(self._expr),
271 repr(self._pad_val),
272 repr(self._text_loc),
273 )
274
275
276 # Variable assignment.
277 class _VarAssign(_Item, _ExprMixin):
278 def __init__(
279 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
280 ):
281 super().__init__(text_loc)
282 _ExprMixin.__init__(self, expr_str, expr)
283 self._name = name
284
285 # Name.
286 @property
287 def name(self):
288 return self._name
289
290 def __repr__(self):
291 return "_VarAssign({}, {}, {}, {})".format(
292 repr(self._name),
293 repr(self._expr_str),
294 repr(self._expr),
295 repr(self._text_loc),
296 )
297
298
299 # Fixed-length number, possibly needing more than one byte.
300 class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
301 def __init__(
302 self,
303 expr_str: str,
304 expr: ast.Expression,
305 len: int,
306 bo: Optional[ByteOrder],
307 text_loc: TextLocation,
308 ):
309 super().__init__(text_loc)
310 _ExprMixin.__init__(self, expr_str, expr)
311 self._len = len
312 self._bo = bo
313
314 # Length (bits).
315 @property
316 def len(self):
317 return self._len
318
319 # Byte order override.
320 @property
321 def bo(self):
322 return self._bo
323
324 @property
325 def size(self):
326 return self._len // 8
327
328 def __repr__(self):
329 return "_FlNum({}, {}, {}, {}, {})".format(
330 repr(self._expr_str),
331 repr(self._expr),
332 repr(self._len),
333 repr(self._bo),
334 repr(self._text_loc),
335 )
336
337
338 # LEB128 integer.
339 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
340 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
341 super().__init__(text_loc)
342 _ExprMixin.__init__(self, expr_str, expr)
343
344 def __repr__(self):
345 return "{}({}, {}, {})".format(
346 self.__class__.__name__,
347 repr(self._expr_str),
348 repr(self._expr),
349 repr(self._text_loc),
350 )
351
352
353 # Unsigned LEB128 integer.
354 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
355 pass
356
357
358 # Signed LEB128 integer.
359 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
360 pass
361
362
363 # String.
364 class _Str(_Item, _RepableItem, _ExprMixin):
365 def __init__(
366 self, expr_str: str, expr: ast.Expression, codec: str, text_loc: TextLocation
367 ):
368 super().__init__(text_loc)
369 _ExprMixin.__init__(self, expr_str, expr)
370 self._codec = codec
371
372 # Codec name.
373 @property
374 def codec(self):
375 return self._codec
376
377 def __repr__(self):
378 return "_Str({}, {}, {}, {})".format(
379 repr(self._expr_str),
380 repr(self._expr),
381 repr(self._codec),
382 repr(self._text_loc),
383 )
384
385
386 # Group of items.
387 class _Group(_Item, _RepableItem):
388 def __init__(self, items: List[_Item], text_loc: TextLocation):
389 super().__init__(text_loc)
390 self._items = items
391
392 # Contained items.
393 @property
394 def items(self):
395 return self._items
396
397 def __repr__(self):
398 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
399
400
401 # Repetition item.
402 class _Rep(_Group, _ExprMixin):
403 def __init__(
404 self,
405 items: List[_Item],
406 expr_str: str,
407 expr: ast.Expression,
408 text_loc: TextLocation,
409 ):
410 super().__init__(items, text_loc)
411 _ExprMixin.__init__(self, expr_str, expr)
412
413 def __repr__(self):
414 return "_Rep({}, {}, {}, {})".format(
415 repr(self._items),
416 repr(self._expr_str),
417 repr(self._expr),
418 repr(self._text_loc),
419 )
420
421
422 # Conditional item.
423 class _Cond(_Item, _ExprMixin):
424 def __init__(
425 self,
426 true_item: _Group,
427 false_item: _Group,
428 expr_str: str,
429 expr: ast.Expression,
430 text_loc: TextLocation,
431 ):
432 super().__init__(text_loc)
433 _ExprMixin.__init__(self, expr_str, expr)
434 self._true_item = true_item
435 self._false_item = false_item
436
437 # Item when condition is true.
438 @property
439 def true_item(self):
440 return self._true_item
441
442 # Item when condition is false.
443 @property
444 def false_item(self):
445 return self._false_item
446
447 def __repr__(self):
448 return "_Cond({}, {}, {}, {}, {})".format(
449 repr(self._true_item),
450 repr(self._false_item),
451 repr(self._expr_str),
452 repr(self._expr),
453 repr(self._text_loc),
454 )
455
456
457 # Transformation.
458 class _Trans(_Group, _RepableItem):
459 def __init__(
460 self,
461 items: List[_Item],
462 name: str,
463 func: Callable[[Union[bytes, bytearray]], bytes],
464 text_loc: TextLocation,
465 ):
466 super().__init__(items, text_loc)
467 self._name = name
468 self._func = func
469
470 @property
471 def name(self):
472 return self._name
473
474 # Transforms the data `data`.
475 def trans(self, data: Union[bytes, bytearray]):
476 return self._func(data)
477
478 def __repr__(self):
479 return "_Trans({}, {}, {}, {})".format(
480 repr(self._items),
481 repr(self._name),
482 repr(self._func),
483 repr(self._text_loc),
484 )
485
486
487 # Macro definition item.
488 class _MacroDef(_Group):
489 def __init__(
490 self,
491 name: str,
492 param_names: List[str],
493 items: List[_Item],
494 text_loc: TextLocation,
495 ):
496 super().__init__(items, text_loc)
497 self._name = name
498 self._param_names = param_names
499
500 # Name.
501 @property
502 def name(self):
503 return self._name
504
505 # Parameters.
506 @property
507 def param_names(self):
508 return self._param_names
509
510 def __repr__(self):
511 return "_MacroDef({}, {}, {}, {})".format(
512 repr(self._name),
513 repr(self._param_names),
514 repr(self._items),
515 repr(self._text_loc),
516 )
517
518
519 # Macro expansion parameter.
520 class _MacroExpParam:
521 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
522 self._expr_str = expr_str
523 self._expr = expr
524 self._text_loc = text_loc
525
526 # Expression string.
527 @property
528 def expr_str(self):
529 return self._expr_str
530
531 # Expression.
532 @property
533 def expr(self):
534 return self._expr
535
536 # Source text location.
537 @property
538 def text_loc(self):
539 return self._text_loc
540
541 def __repr__(self):
542 return "_MacroExpParam({}, {}, {})".format(
543 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
544 )
545
546
547 # Macro expansion item.
548 class _MacroExp(_Item, _RepableItem):
549 def __init__(
550 self,
551 name: str,
552 params: List[_MacroExpParam],
553 text_loc: TextLocation,
554 ):
555 super().__init__(text_loc)
556 self._name = name
557 self._params = params
558
559 # Name.
560 @property
561 def name(self):
562 return self._name
563
564 # Parameters.
565 @property
566 def params(self):
567 return self._params
568
569 def __repr__(self):
570 return "_MacroExp({}, {}, {})".format(
571 repr(self._name),
572 repr(self._params),
573 repr(self._text_loc),
574 )
575
576
577 # A parsing error message: a string and a text location.
578 class ParseErrorMessage:
579 @classmethod
580 def _create(cls, text: str, text_loc: TextLocation):
581 self = cls.__new__(cls)
582 self._init(text, text_loc)
583 return self
584
585 def __init__(self, *args, **kwargs): # type: ignore
586 raise NotImplementedError
587
588 def _init(self, text: str, text_loc: TextLocation):
589 self._text = text
590 self._text_loc = text_loc
591
592 # Message text.
593 @property
594 def text(self):
595 return self._text
596
597 # Source text location.
598 @property
599 def text_location(self):
600 return self._text_loc
601
602
603 # A parsing error containing one or more messages (`ParseErrorMessage`).
604 class ParseError(RuntimeError):
605 @classmethod
606 def _create(cls, msg: str, text_loc: TextLocation):
607 self = cls.__new__(cls)
608 self._init(msg, text_loc)
609 return self
610
611 def __init__(self, *args, **kwargs): # type: ignore
612 raise NotImplementedError
613
614 def _init(self, msg: str, text_loc: TextLocation):
615 super().__init__(msg)
616 self._msgs = [] # type: List[ParseErrorMessage]
617 self._add_msg(msg, text_loc)
618
619 def _add_msg(self, msg: str, text_loc: TextLocation):
620 self._msgs.append(
621 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
622 msg, text_loc
623 )
624 )
625
626 # Parsing error messages.
627 #
628 # The first message is the most specific one.
629 @property
630 def messages(self):
631 return self._msgs
632
633
634 # Raises a parsing error, forwarding the parameters to the constructor.
635 def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
636 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
637
638
639 # Adds a message to the parsing error `exc`.
640 def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
641 exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage]
642
643
644 # Appends a message to the parsing error `exc` and reraises it.
645 def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
646 _add_error_msg(exc, msg, text_loc)
647 raise exc
648
649
650 # Returns a normalized version (so as to be parseable by int()) of
651 # the constant integer string `s`, possibly negative, dealing with
652 # any radix suffix.
653 def _norm_const_int(s: str):
654 neg = ""
655 pos = s
656
657 if s.startswith("-"):
658 neg = "-"
659 pos = s[1:]
660
661 for r in "xXoObB":
662 if pos.startswith("0" + r):
663 # Already correct
664 return s
665
666 # Try suffix
667 asm_suf_base = {
668 "h": "x",
669 "H": "x",
670 "q": "o",
671 "Q": "o",
672 "o": "o",
673 "O": "o",
674 "b": "b",
675 "B": "B",
676 }
677
678 for suf in asm_suf_base:
679 if pos[-1] == suf:
680 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
681
682 return s
683
684
685 # Encodes the string `s` using the codec `codec`, raising `ParseError`
686 # with `text_loc` on encoding error.
687 def _encode_str(s: str, codec: str, text_loc: TextLocation):
688 try:
689 return s.encode(codec)
690 except UnicodeEncodeError:
691 _raise_error(
692 "Cannot encode `{}` with the `{}` encoding".format(s, codec), text_loc
693 )
694
695
696 # Variables dictionary type (for type hints).
697 VariablesT = Dict[str, Union[int, float, str]]
698
699
700 # Labels dictionary type (for type hints).
701 LabelsT = Dict[str, int]
702
703
704 # Common patterns.
705 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
706 _pos_const_int_pat = re.compile(
707 r"(?:0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+)\b"
708 )
709 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
710 _const_float_pat = re.compile(
711 r"[-+]?(?:(?:\d*\.\d+)|(?:\d+\.))(?:[Ee][+-]?\d+)?(?=\W|)"
712 )
713
714
715 # Macro definition dictionary.
716 _MacroDefsT = Dict[str, _MacroDef]
717
718
719 # Normand parser.
720 #
721 # The constructor accepts a Normand input. After building, use the `res`
722 # property to get the resulting main group.
723 class _Parser:
724 # Builds a parser to parse the Normand input `normand`, parsing
725 # immediately.
726 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
727 self._normand = normand
728 self._at = 0
729 self._line_no = 1
730 self._col_no = 1
731 self._label_names = set(labels.keys())
732 self._var_names = set(variables.keys())
733 self._macro_defs = {} # type: _MacroDefsT
734 self._base_item_parse_funcs = [
735 self._try_parse_byte,
736 self._try_parse_str,
737 self._try_parse_val,
738 self._try_parse_var_assign,
739 self._try_parse_set_bo,
740 self._try_parse_label_or_set_offset,
741 self._try_parse_align_offset,
742 self._try_parse_fill_until,
743 self._try_parse_group,
744 self._try_parse_rep_block,
745 self._try_parse_cond_block,
746 self._try_parse_macro_exp,
747 self._try_parse_trans_block,
748 ]
749 self._parse()
750
751 # Result (main group).
752 @property
753 def res(self):
754 return self._res
755
756 # Macro definitions.
757 @property
758 def macro_defs(self):
759 return self._macro_defs
760
761 # Current text location.
762 @property
763 def _text_loc(self):
764 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
765 self._line_no, self._col_no
766 )
767
768 # Returns `True` if this parser is done parsing.
769 def _is_done(self):
770 return self._at == len(self._normand)
771
772 # Returns `True` if this parser isn't done parsing.
773 def _isnt_done(self):
774 return not self._is_done()
775
776 # Raises a parse error, creating it using the message `msg` and the
777 # current text location.
778 def _raise_error(self, msg: str) -> NoReturn:
779 _raise_error(msg, self._text_loc)
780
781 # Tries to make the pattern `pat` match the current substring,
782 # returning the match object and updating `self._at`,
783 # `self._line_no`, and `self._col_no` on success.
784 def _try_parse_pat(self, pat: Pattern[str]):
785 m = pat.match(self._normand, self._at)
786
787 if m is None:
788 return
789
790 # Skip matched string
791 self._at += len(m.group(0))
792
793 # Update line number
794 self._line_no += m.group(0).count("\n")
795
796 # Update column number
797 for i in reversed(range(self._at)):
798 if self._normand[i] == "\n" or i == 0:
799 if i == 0:
800 self._col_no = self._at + 1
801 else:
802 self._col_no = self._at - i
803
804 break
805
806 # Return match object
807 return m
808
809 # Expects the pattern `pat` to match the current substring,
810 # returning the match object and updating `self._at`,
811 # `self._line_no`, and `self._col_no` on success, or raising a parse
812 # error with the message `error_msg` on error.
813 def _expect_pat(self, pat: Pattern[str], error_msg: str):
814 # Match
815 m = self._try_parse_pat(pat)
816
817 if m is None:
818 # No match: error
819 self._raise_error(error_msg)
820
821 # Return match object
822 return m
823
824 # Patterns for _skip_*()
825 _comment_pat = re.compile(r"#[^#]*?(?:$|#)", re.M)
826 _ws_or_comments_pat = re.compile(r"(?:\s|{})*".format(_comment_pat.pattern), re.M)
827 _ws_or_syms_or_comments_pat = re.compile(
828 r"(?:[\s/\\?&:;.,_=|-]|{})*".format(_comment_pat.pattern), re.M
829 )
830
831 # Skips as many whitespaces and comments as possible, but not
832 # insignificant symbol characters.
833 def _skip_ws_and_comments(self):
834 self._try_parse_pat(self._ws_or_comments_pat)
835
836 # Skips as many whitespaces, insignificant symbol characters, and
837 # comments as possible.
838 def _skip_ws_and_comments_and_syms(self):
839 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
840
841 # Pattern for _try_parse_hex_byte()
842 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
843
844 # Tries to parse a hexadecimal byte, returning a byte item on
845 # success.
846 def _try_parse_hex_byte(self):
847 begin_text_loc = self._text_loc
848
849 # Match initial nibble
850 m_high = self._try_parse_pat(self._nibble_pat)
851
852 if m_high is None:
853 # No match
854 return
855
856 # Expect another nibble
857 self._skip_ws_and_comments_and_syms()
858 m_low = self._expect_pat(
859 self._nibble_pat, "Expecting another hexadecimal nibble"
860 )
861
862 # Return item
863 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
864
865 # Patterns for _try_parse_bin_byte()
866 _bin_byte_bit_pat = re.compile(r"[01]")
867 _bin_byte_prefix_pat = re.compile(r"%+")
868
869 # Tries to parse a binary byte, returning a byte item on success.
870 def _try_parse_bin_byte(self):
871 begin_text_loc = self._text_loc
872
873 # Match prefix
874 m = self._try_parse_pat(self._bin_byte_prefix_pat)
875
876 if m is None:
877 # No match
878 return
879
880 # Expect as many bytes as there are `%` prefixes
881 items = [] # type: List[_Item]
882
883 for _ in range(len(m.group(0))):
884 self._skip_ws_and_comments_and_syms()
885 byte_text_loc = self._text_loc
886 bits = [] # type: List[str]
887
888 # Expect eight bits
889 for _ in range(8):
890 self._skip_ws_and_comments_and_syms()
891 m = self._expect_pat(
892 self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)"
893 )
894 bits.append(m.group(0))
895
896 items.append(_Byte(int("".join(bits), 2), byte_text_loc))
897
898 # Return item
899 if len(items) == 1:
900 return items[0]
901
902 # As group
903 return _Group(items, begin_text_loc)
904
905 # Patterns for _try_parse_dec_byte()
906 _dec_byte_prefix_pat = re.compile(r"\$")
907 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
908
909 # Tries to parse a decimal byte, returning a byte item on success.
910 def _try_parse_dec_byte(self):
911 begin_text_loc = self._text_loc
912
913 # Match prefix
914 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
915 # No match
916 return
917
918 # Expect the value
919 self._skip_ws_and_comments()
920 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
921
922 # Compute value
923 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
924
925 # Validate
926 if val < -128 or val > 255:
927 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
928
929 # Two's complement
930 val %= 256
931
932 # Return item
933 return _Byte(val, begin_text_loc)
934
935 # Tries to parse a byte, returning a byte item on success.
936 def _try_parse_byte(self):
937 # Hexadecimal
938 item = self._try_parse_hex_byte()
939
940 if item is not None:
941 return item
942
943 # Binary
944 item = self._try_parse_bin_byte()
945
946 if item is not None:
947 return item
948
949 # Decimal
950 item = self._try_parse_dec_byte()
951
952 if item is not None:
953 return item
954
955 # Strings corresponding to escape sequence characters
956 _lit_str_escape_seq_strs = {
957 "0": "\0",
958 "a": "\a",
959 "b": "\b",
960 "e": "\x1b",
961 "f": "\f",
962 "n": "\n",
963 "r": "\r",
964 "t": "\t",
965 "v": "\v",
966 "\\": "\\",
967 '"': '"',
968 }
969
970 # Patterns for _try_parse_lit_str()
971 _lit_str_prefix_suffix_pat = re.compile(r'"')
972 _lit_str_contents_pat = re.compile(r'(?:(?:\\.)|[^"])*')
973
974 # Parses a literal string between double quotes (without an encoding
975 # prefix) and returns the resulting string.
976 def _try_parse_lit_str(self, with_prefix: bool):
977 # Match prefix if needed
978 if with_prefix:
979 if self._try_parse_pat(self._lit_str_prefix_suffix_pat) is None:
980 # No match
981 return
982
983 # Expect literal string
984 m = self._expect_pat(self._lit_str_contents_pat, "Expecting a literal string")
985
986 # Expect end of string
987 self._expect_pat(
988 self._lit_str_prefix_suffix_pat, 'Expecting `"` (end of literal string)'
989 )
990
991 # Replace escape sequences
992 val = m.group(0)
993
994 for ec in '0abefnrtv"\\':
995 val = val.replace(r"\{}".format(ec), self._lit_str_escape_seq_strs[ec])
996
997 # Return string
998 return val
999
1000 # Patterns for _try_parse_utf_str_encoding()
1001 _str_encoding_utf_prefix_pat = re.compile(r"u")
1002 _str_encoding_utf_pat = re.compile(r"(?:8|(?:(?:16|32)(?:[bl]e)))\b")
1003
1004 # Tries to parse a UTF encoding specification, returning the Python
1005 # codec name on success.
1006 def _try_parse_utf_str_encoding(self):
1007 # Match prefix
1008 if self._try_parse_pat(self._str_encoding_utf_prefix_pat) is None:
1009 # No match
1010 return
1011
1012 # Expect UTF specification
1013 m = self._expect_pat(
1014 self._str_encoding_utf_pat,
1015 "Expecting `8`, `16be`, `16le`, `32be` or `32le`",
1016 )
1017
1018 # Convert to codec name
1019 return {
1020 "8": "utf_8",
1021 "16be": "utf_16_be",
1022 "16le": "utf_16_le",
1023 "32be": "utf_32_be",
1024 "32le": "utf_32_le",
1025 }[m.group(0)]
1026
1027 # Patterns for _try_parse_str_encoding()
1028 _str_encoding_gen_prefix_pat = re.compile(r"s")
1029 _str_encoding_colon_pat = re.compile(r":")
1030 _str_encoding_non_utf_pat = re.compile(r"latin(?:[1-9]|10)\b")
1031
1032 # Tries to parse a string encoding specification, returning the
1033 # Python codec name on success.
1034 #
1035 # Requires the general prefix (`s:`) if `req_gen_prefix` is `True`.
1036 def _try_parse_str_encoding(self, req_gen_prefix: bool = False):
1037 # General prefix?
1038 if self._try_parse_pat(self._str_encoding_gen_prefix_pat) is not None:
1039 # Expect `:`
1040 self._skip_ws_and_comments()
1041 self._expect_pat(self._str_encoding_colon_pat, "Expecting `:`")
1042
1043 # Expect encoding specification
1044 self._skip_ws_and_comments()
1045
1046 # UTF?
1047 codec = self._try_parse_utf_str_encoding()
1048
1049 if codec is not None:
1050 return codec
1051
1052 # Expect Latin
1053 m = self._expect_pat(
1054 self._str_encoding_non_utf_pat,
1055 "Expecting `u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`",
1056 )
1057 return m.group(0)
1058
1059 # UTF?
1060 if not req_gen_prefix:
1061 return self._try_parse_utf_str_encoding()
1062
1063 # Patterns for _try_parse_str()
1064 _lit_str_prefix_pat = re.compile(r'"')
1065 _str_prefix_pat = re.compile(r'"|\{')
1066 _str_expr_pat = re.compile(r"[^}]+")
1067 _str_expr_suffix_pat = re.compile(r"\}")
1068
1069 # Tries to parse a string, returning a literal string or string item
1070 # on success.
1071 def _try_parse_str(self):
1072 begin_text_loc = self._text_loc
1073
1074 # Encoding
1075 codec = self._try_parse_str_encoding()
1076
1077 # Match prefix (expect if there's an encoding specification)
1078 self._skip_ws_and_comments()
1079
1080 if codec is None:
1081 # No encoding: only a literal string (UTF-8) is legal
1082 m_prefix = self._try_parse_pat(self._lit_str_prefix_pat)
1083
1084 if m_prefix is None:
1085 return
1086 else:
1087 # Encoding present: expect a string prefix
1088 m_prefix = self._expect_pat(self._str_prefix_pat, 'Expecting `"` or `{`')
1089
1090 # Literal string or expression?
1091 prefix = m_prefix.group(0)
1092
1093 if prefix == '"':
1094 # Expect literal string
1095 str_text_loc = self._text_loc
1096 val = self._try_parse_lit_str(False)
1097
1098 if val is None:
1099 self._raise_error("Expecting a literal string")
1100
1101 # Encode string
1102 data = _encode_str(val, "utf_8" if codec is None else codec, str_text_loc)
1103
1104 # Return item
1105 return _LitStr(data, begin_text_loc)
1106 else:
1107 # Expect expression
1108 self._skip_ws_and_comments()
1109 expr_text_loc = self._text_loc
1110 m = self._expect_pat(self._str_expr_pat, "Expecting an expression")
1111
1112 # Expect `}`
1113 self._expect_pat(self._str_expr_suffix_pat, "Expecting `}`")
1114
1115 # Create an expression node from the expression string
1116 expr_str, expr = self._ast_expr_from_str(m.group(0), expr_text_loc)
1117
1118 # Return item
1119 assert codec is not None
1120 return _Str(expr_str, expr, codec, begin_text_loc)
1121
1122 # Common right parenthesis pattern
1123 _right_paren_pat = re.compile(r"\)")
1124
1125 # Patterns for _try_parse_group()
1126 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
1127
1128 # Tries to parse a group, returning a group item on success.
1129 def _try_parse_group(self):
1130 begin_text_loc = self._text_loc
1131
1132 # Match prefix
1133 m_open = self._try_parse_pat(self._group_prefix_pat)
1134
1135 if m_open is None:
1136 # No match
1137 return
1138
1139 # Parse items
1140 items = self._parse_items()
1141
1142 # Expect end of group
1143 self._skip_ws_and_comments_and_syms()
1144
1145 if m_open.group(0) == "(":
1146 pat = self._right_paren_pat
1147 exp = ")"
1148 else:
1149 pat = self._block_end_pat
1150 exp = "!end"
1151
1152 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
1153
1154 # Return item
1155 return _Group(items, begin_text_loc)
1156
1157 # Returns a stripped expression string and an AST expression node
1158 # from the expression string `expr_str` at text location `text_loc`.
1159 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
1160 # Create an expression node from the expression string
1161 expr_str = expr_str.strip().replace("\n", " ")
1162
1163 try:
1164 expr = ast.parse(expr_str, mode="eval")
1165 except SyntaxError:
1166 _raise_error(
1167 "Invalid expression `{}`: invalid syntax".format(expr_str),
1168 text_loc,
1169 )
1170
1171 return expr_str, expr
1172
1173 # Returns a `ByteOrder` value from the _valid_ byte order string
1174 # `bo_str`.
1175 @staticmethod
1176 def _bo_from_str(bo_str: str):
1177 return {
1178 "be": ByteOrder.BE,
1179 "le": ByteOrder.LE,
1180 }[bo_str]
1181
1182 # Patterns for _try_parse_val()
1183 _val_prefix_pat = re.compile(r"\[")
1184 _val_expr_pat = re.compile(r"([^\]:]+):")
1185 _fl_num_len_fmt_pat = re.compile(r"(?P<len>8|16|24|32|40|48|56|64)(?P<bo>[bl]e)?")
1186 _leb128_int_fmt_pat = re.compile(r"(u|s)leb128")
1187 _val_suffix_pat = re.compile(r"]")
1188
1189 # Tries to parse a value (number or string) and format (fixed length
1190 # in bits and optional byte order override, `uleb128`, `sleb128`, or
1191 # `s:` followed with an encoding name), returning an item on
1192 # success.
1193 def _try_parse_val(self):
1194 # Match prefix
1195 if self._try_parse_pat(self._val_prefix_pat) is None:
1196 # No match
1197 return
1198
1199 # Expect expression and `:`
1200 self._skip_ws_and_comments()
1201 expr_text_loc = self._text_loc
1202 m = self._expect_pat(self._val_expr_pat, "Expecting an expression")
1203
1204 # Create an expression node from the expression string
1205 expr_str, expr = self._ast_expr_from_str(m.group(1), expr_text_loc)
1206
1207 # Fixed length?
1208 self._skip_ws_and_comments()
1209 m_fmt = self._try_parse_pat(self._fl_num_len_fmt_pat)
1210
1211 if m_fmt is not None:
1212 # Byte order override
1213 if m_fmt.group("bo") is None:
1214 bo = None
1215 else:
1216 bo = self._bo_from_str(m_fmt.group("bo"))
1217
1218 # Create fixed-length number item
1219 item = _FlNum(
1220 expr_str,
1221 expr,
1222 int(m_fmt.group("len")),
1223 bo,
1224 expr_text_loc,
1225 )
1226 else:
1227 # LEB128?
1228 m_fmt = self._try_parse_pat(self._leb128_int_fmt_pat)
1229
1230 if m_fmt is not None:
1231 # Create LEB128 integer item
1232 cls = _ULeb128Int if m_fmt.group(1) == "u" else _SLeb128Int
1233 item = cls(expr_str, expr, expr_text_loc)
1234 else:
1235 # String encoding?
1236 codec = self._try_parse_str_encoding(True)
1237
1238 if codec is not None:
1239 # Create string item
1240 item = _Str(expr_str, expr, codec, expr_text_loc)
1241 else:
1242 # At this point it's invalid
1243 self._raise_error(
1244 "Expecting a fixed length (multiple of eight bits and optional `be` or `le`), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)"
1245 )
1246
1247 # Expect `]`
1248 self._skip_ws_and_comments()
1249 m = self._expect_pat(self._val_suffix_pat, "Expecting `]`")
1250
1251 # Return item
1252 return item
1253
1254 # Patterns for _try_parse_var_assign()
1255 _var_assign_prefix_pat = re.compile(r"\{")
1256 _var_assign_equal_pat = re.compile(r"=")
1257 _var_assign_expr_pat = re.compile(r"[^}]+")
1258 _var_assign_suffix_pat = re.compile(r"\}")
1259
1260 # Tries to parse a variable assignment, returning a variable
1261 # assignment item on success.
1262 def _try_parse_var_assign(self):
1263 # Match prefix
1264 if self._try_parse_pat(self._var_assign_prefix_pat) is None:
1265 # No match
1266 return
1267
1268 # Expect a name
1269 self._skip_ws_and_comments()
1270 name_text_loc = self._text_loc
1271 m = self._expect_pat(_py_name_pat, "Expecting a valid Python name")
1272 name = m.group(0)
1273
1274 # Expect `=`
1275 self._skip_ws_and_comments()
1276 self._expect_pat(self._var_assign_equal_pat, "Expecting `=`")
1277
1278 # Expect expression
1279 self._skip_ws_and_comments()
1280 expr_text_loc = self._text_loc
1281 m_expr = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
1282
1283 # Expect `}`
1284 self._skip_ws_and_comments()
1285 self._expect_pat(self._var_assign_suffix_pat, "Expecting `}`")
1286
1287 # Validate name
1288 if name == _icitte_name:
1289 _raise_error(
1290 "`{}` is a reserved variable name".format(_icitte_name), name_text_loc
1291 )
1292
1293 if name in self._label_names:
1294 _raise_error("Existing label named `{}`".format(name), name_text_loc)
1295
1296 # Create an expression node from the expression string
1297 expr_str, expr = self._ast_expr_from_str(m_expr.group(0), expr_text_loc)
1298
1299 # Add to known variable names
1300 self._var_names.add(name)
1301
1302 # Return item
1303 return _VarAssign(
1304 name,
1305 expr_str,
1306 expr,
1307 name_text_loc,
1308 )
1309
1310 # Pattern for _try_parse_set_bo()
1311 _set_bo_pat = re.compile(r"!([bl]e)\b")
1312
1313 # Tries to parse a byte order setting, returning a byte order
1314 # setting item on success.
1315 def _try_parse_set_bo(self):
1316 begin_text_loc = self._text_loc
1317
1318 # Match
1319 m = self._try_parse_pat(self._set_bo_pat)
1320
1321 if m is None:
1322 # No match
1323 return
1324
1325 # Return corresponding item
1326 if m.group(1) == "be":
1327 bo = ByteOrder.BE
1328 else:
1329 assert m.group(1) == "le"
1330 bo = ByteOrder.LE
1331
1332 return _SetBo(bo, begin_text_loc)
1333
1334 # Tries to parse an offset setting value (after the initial `<`),
1335 # returning an offset item on success.
1336 def _try_parse_set_offset_val(self):
1337 begin_text_loc = self._text_loc
1338
1339 # Match
1340 m = self._try_parse_pat(_pos_const_int_pat)
1341
1342 if m is None:
1343 # No match
1344 return
1345
1346 # Return item
1347 return _SetOffset(int(_norm_const_int(m.group(0)), 0), begin_text_loc)
1348
1349 # Tries to parse a label name (after the initial `<`), returning a
1350 # label item on success.
1351 def _try_parse_label_name(self):
1352 begin_text_loc = self._text_loc
1353
1354 # Match
1355 m = self._try_parse_pat(_py_name_pat)
1356
1357 if m is None:
1358 # No match
1359 return
1360
1361 # Validate
1362 name = m.group(0)
1363
1364 if name == _icitte_name:
1365 _raise_error(
1366 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1367 )
1368
1369 if name in self._label_names:
1370 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
1371
1372 if name in self._var_names:
1373 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
1374
1375 # Add to known label names
1376 self._label_names.add(name)
1377
1378 # Return item
1379 return _Label(name, begin_text_loc)
1380
1381 # Patterns for _try_parse_label_or_set_offset()
1382 _label_set_offset_prefix_pat = re.compile(r"<")
1383 _label_set_offset_suffix_pat = re.compile(r">")
1384
1385 # Tries to parse a label or an offset setting, returning an item on
1386 # success.
1387 def _try_parse_label_or_set_offset(self):
1388 # Match prefix
1389 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
1390 # No match
1391 return
1392
1393 # Offset setting item?
1394 self._skip_ws_and_comments()
1395 item = self._try_parse_set_offset_val()
1396
1397 if item is None:
1398 # Label item?
1399 item = self._try_parse_label_name()
1400
1401 if item is None:
1402 # At this point it's invalid
1403 self._raise_error("Expecting a label name or an offset setting value")
1404
1405 # Expect suffix
1406 self._skip_ws_and_comments()
1407 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
1408 return item
1409
1410 # Pattern for _parse_pad_val()
1411 _pad_val_prefix_pat = re.compile(r"~")
1412
1413 # Tries to parse a padding value, returning the padding value, or 0
1414 # if none.
1415 def _parse_pad_val(self):
1416 # Padding value?
1417 self._skip_ws_and_comments()
1418 pad_val = 0
1419
1420 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1421 self._skip_ws_and_comments()
1422 pad_val_text_loc = self._text_loc
1423 m = self._expect_pat(
1424 _pos_const_int_pat,
1425 "Expecting a positive constant integer (byte value)",
1426 )
1427
1428 # Validate
1429 pad_val = int(_norm_const_int(m.group(0)), 0)
1430
1431 if pad_val > 255:
1432 _raise_error(
1433 "Invalid padding byte value {}".format(pad_val),
1434 pad_val_text_loc,
1435 )
1436
1437 return pad_val
1438
1439 # Patterns for _try_parse_align_offset()
1440 _align_offset_prefix_pat = re.compile(r"@")
1441 _align_offset_val_pat = re.compile(r"\d+")
1442
1443 # Tries to parse an offset alignment, returning an offset alignment
1444 # item on success.
1445 def _try_parse_align_offset(self):
1446 begin_text_loc = self._text_loc
1447
1448 # Match prefix
1449 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1450 # No match
1451 return
1452
1453 # Expect an alignment
1454 self._skip_ws_and_comments()
1455 align_text_loc = self._text_loc
1456 m = self._expect_pat(
1457 self._align_offset_val_pat,
1458 "Expecting an alignment (positive multiple of eight bits)",
1459 )
1460
1461 # Validate alignment
1462 val = int(m.group(0))
1463
1464 if val <= 0 or (val % 8) != 0:
1465 _raise_error(
1466 "Invalid alignment value {} (not a positive multiple of eight)".format(
1467 val
1468 ),
1469 align_text_loc,
1470 )
1471
1472 # Padding value
1473 pad_val = self._parse_pad_val()
1474
1475 # Return item
1476 return _AlignOffset(val, pad_val, begin_text_loc)
1477
1478 # Patterns for _expect_expr()
1479 _inner_expr_prefix_pat = re.compile(r"\{")
1480 _inner_expr_pat = re.compile(r"[^}]+")
1481 _inner_expr_suffix_pat = re.compile(r"\}")
1482
1483 # Parses an expression outside a `{`/`}` context.
1484 #
1485 # This function accepts:
1486 #
1487 # • A Python expression within `{` and `}`.
1488 #
1489 # • A Python name.
1490 #
1491 # • If `accept_const_int` is `True`: a constant integer, which may
1492 # be negative if `allow_neg_int` is `True`.
1493 #
1494 # • If `accept_float` is `True`: a constant floating point number.
1495 #
1496 # Returns the stripped expression string and AST expression.
1497 def _expect_expr(
1498 self,
1499 accept_const_int: bool = False,
1500 allow_neg_int: bool = False,
1501 accept_const_float: bool = False,
1502 accept_lit_str: bool = False,
1503 ):
1504 begin_text_loc = self._text_loc
1505
1506 # Constant floating point number?
1507 if accept_const_float:
1508 m = self._try_parse_pat(_const_float_pat)
1509
1510 if m is not None:
1511 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1512
1513 # Constant integer?
1514 if accept_const_int:
1515 m = self._try_parse_pat(_const_int_pat)
1516
1517 if m is not None:
1518 # Negative and allowed?
1519 if m.group("neg") == "-" and not allow_neg_int:
1520 _raise_error(
1521 "Expecting a positive constant integer", begin_text_loc
1522 )
1523
1524 expr_str = _norm_const_int(m.group(0))
1525 return self._ast_expr_from_str(expr_str, begin_text_loc)
1526
1527 # Name?
1528 m = self._try_parse_pat(_py_name_pat)
1529
1530 if m is not None:
1531 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1532
1533 # Literal string
1534 if accept_lit_str:
1535 val = self._try_parse_lit_str(True)
1536
1537 if val is not None:
1538 return self._ast_expr_from_str(repr(val), begin_text_loc)
1539
1540 # Expect `{`
1541 msg_accepted_parts = ["a name", "or `{`"]
1542
1543 if accept_lit_str:
1544 msg_accepted_parts.insert(0, "a literal string")
1545
1546 if accept_const_float:
1547 msg_accepted_parts.insert(0, "a constant floating point number")
1548
1549 if accept_const_int:
1550 msg_pos = "" if allow_neg_int else "positive "
1551 msg_accepted_parts.insert(0, "a {}constant integer".format(msg_pos))
1552
1553 if len(msg_accepted_parts) == 2:
1554 msg_accepted = " ".join(msg_accepted_parts)
1555 else:
1556 msg_accepted = ", ".join(msg_accepted_parts)
1557
1558 self._expect_pat(
1559 self._inner_expr_prefix_pat,
1560 "Expecting {}".format(msg_accepted),
1561 )
1562
1563 # Expect an expression
1564 self._skip_ws_and_comments()
1565 expr_text_loc = self._text_loc
1566 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
1567 expr_str = m.group(0)
1568
1569 # Expect `}`
1570 self._skip_ws_and_comments()
1571 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
1572
1573 return self._ast_expr_from_str(expr_str, expr_text_loc)
1574
1575 # Patterns for _try_parse_fill_until()
1576 _fill_until_prefix_pat = re.compile(r"\+")
1577 _fill_until_pad_val_prefix_pat = re.compile(r"~")
1578
1579 # Tries to parse a filling, returning a filling item on success.
1580 def _try_parse_fill_until(self):
1581 begin_text_loc = self._text_loc
1582
1583 # Match prefix
1584 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1585 # No match
1586 return
1587
1588 # Expect expression
1589 self._skip_ws_and_comments()
1590 expr_str, expr = self._expect_expr(accept_const_int=True)
1591
1592 # Padding value
1593 pad_val = self._parse_pad_val()
1594
1595 # Return item
1596 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
1597
1598 # Parses the multiplier expression of a repetition (block or
1599 # post-item) and returns the expression string and AST node.
1600 def _expect_rep_mul_expr(self):
1601 return self._expect_expr(accept_const_int=True)
1602
1603 # Common block end pattern
1604 _block_end_pat = re.compile(r"!end\b")
1605
1606 # Pattern for _try_parse_rep_block()
1607 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
1608
1609 # Tries to parse a repetition block, returning a repetition item on
1610 # success.
1611 def _try_parse_rep_block(self):
1612 begin_text_loc = self._text_loc
1613
1614 # Match prefix
1615 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1616 # No match
1617 return
1618
1619 # Expect expression
1620 self._skip_ws_and_comments()
1621 expr_str, expr = self._expect_rep_mul_expr()
1622
1623 # Parse items
1624 self._skip_ws_and_comments_and_syms()
1625 items = self._parse_items()
1626
1627 # Expect end of block
1628 self._skip_ws_and_comments_and_syms()
1629 self._expect_pat(
1630 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
1631 )
1632
1633 # Return item
1634 return _Rep(items, expr_str, expr, begin_text_loc)
1635
1636 # Pattern for _try_parse_cond_block()
1637 _cond_block_prefix_pat = re.compile(r"!if\b")
1638 _cond_block_else_pat = re.compile(r"!else\b")
1639
1640 # Tries to parse a conditional block, returning a conditional item
1641 # on success.
1642 def _try_parse_cond_block(self):
1643 begin_text_loc = self._text_loc
1644
1645 # Match prefix
1646 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1647 # No match
1648 return
1649
1650 # Expect expression
1651 self._skip_ws_and_comments()
1652 expr_str, expr = self._expect_expr()
1653
1654 # Parse "true" items
1655 self._skip_ws_and_comments_and_syms()
1656 true_items_text_loc = self._text_loc
1657 true_items = self._parse_items()
1658 false_items = [] # type: List[_Item]
1659 false_items_text_loc = begin_text_loc
1660
1661 # `!else`?
1662 self._skip_ws_and_comments_and_syms()
1663
1664 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1665 # Parse "false" items
1666 self._skip_ws_and_comments_and_syms()
1667 false_items_text_loc = self._text_loc
1668 false_items = self._parse_items()
1669
1670 # Expect end of block
1671 self._expect_pat(
1672 self._block_end_pat,
1673 "Expecting an item, `!else`, or `!end` (end of conditional block)",
1674 )
1675
1676 # Return item
1677 return _Cond(
1678 _Group(true_items, true_items_text_loc),
1679 _Group(false_items, false_items_text_loc),
1680 expr_str,
1681 expr,
1682 begin_text_loc,
1683 )
1684
1685 # Pattern for _try_parse_trans_block()
1686 _trans_block_prefix_pat = re.compile(r"!t(?:ransform)?\b")
1687 _trans_block_type_pat = re.compile(
1688 r"(?:(?:base|b)64(?:u)?|(?:base|b)(?:16|32)|(?:ascii|a|base|b)85(?:p)?|(?:quopri|qp)(?:t)?|gzip|gz|bzip2|bz2)\b"
1689 )
1690
1691 # Tries to parse a transformation block, returning a transformation
1692 # block item on success.
1693 def _try_parse_trans_block(self):
1694 begin_text_loc = self._text_loc
1695
1696 # Match prefix
1697 if self._try_parse_pat(self._trans_block_prefix_pat) is None:
1698 # No match
1699 return
1700
1701 # Expect type
1702 self._skip_ws_and_comments()
1703 m = self._expect_pat(
1704 self._trans_block_type_pat, "Expecting a known transformation type"
1705 )
1706
1707 # Parse items
1708 self._skip_ws_and_comments_and_syms()
1709 items = self._parse_items()
1710
1711 # Expect end of block
1712 self._expect_pat(
1713 self._block_end_pat,
1714 "Expecting an item or `!end` (end of transformation block)",
1715 )
1716
1717 # Choose encoding function
1718 enc = m.group(0)
1719
1720 if enc in ("base64", "b64"):
1721 func = base64.standard_b64encode
1722 name = "standard Base64"
1723 elif enc in ("base64u", "b64u"):
1724 func = base64.urlsafe_b64encode
1725 name = "URL-safe Base64"
1726 elif enc in ("base32", "b32"):
1727 func = base64.b32encode
1728 name = "Base32"
1729 elif enc in ("base16", "b16"):
1730 func = base64.b16encode
1731 name = "Base16"
1732 elif enc in ("ascii85", "a85"):
1733 func = base64.a85encode
1734 name = "Ascii85"
1735 elif enc in ("ascii85p", "a85p"):
1736 func = functools.partial(base64.a85encode, pad=True)
1737 name = "padded Ascii85"
1738 elif enc in ("base85", "b85"):
1739 func = base64.b85encode
1740 name = "Base85"
1741 elif enc in ("base85p", "b85p"):
1742 func = functools.partial(base64.b85encode, pad=True)
1743 name = "padded Base85"
1744 elif enc in ("quopri", "qp"):
1745 func = quopri.encodestring
1746 name = "MIME quoted-printable"
1747 elif enc in ("quoprit", "qpt"):
1748 func = functools.partial(quopri.encodestring, quotetabs=True)
1749 name = "MIME quoted-printable (with quoted tabs)"
1750 elif enc in ("gzip", "gz"):
1751 func = gzip.compress
1752 name = "gzip"
1753 else:
1754 assert enc in ("bzip2", "bz2")
1755 func = bz2.compress
1756 name = "bzip2"
1757
1758 # Return item
1759 return _Trans(
1760 items,
1761 name,
1762 func,
1763 begin_text_loc,
1764 )
1765
1766 # Common left parenthesis pattern
1767 _left_paren_pat = re.compile(r"\(")
1768
1769 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1770 _macro_params_comma_pat = re.compile(",")
1771
1772 # Patterns for _try_parse_macro_def()
1773 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1774
1775 # Tries to parse a macro definition, adding it to `self._macro_defs`
1776 # and returning `True` on success.
1777 def _try_parse_macro_def(self):
1778 begin_text_loc = self._text_loc
1779
1780 # Match prefix
1781 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1782 # No match
1783 return False
1784
1785 # Expect a name
1786 self._skip_ws_and_comments()
1787 name_text_loc = self._text_loc
1788 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1789
1790 # Validate name
1791 name = m.group(0)
1792
1793 if name in self._macro_defs:
1794 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1795
1796 # Expect `(`
1797 self._skip_ws_and_comments()
1798 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1799
1800 # Try to parse comma-separated parameter names
1801 param_names = [] # type: List[str]
1802 expect_comma = False
1803
1804 while True:
1805 self._skip_ws_and_comments()
1806
1807 # End?
1808 if self._try_parse_pat(self._right_paren_pat) is not None:
1809 # End
1810 break
1811
1812 # Comma?
1813 if expect_comma:
1814 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1815
1816 # Expect parameter name
1817 self._skip_ws_and_comments()
1818 param_text_loc = self._text_loc
1819 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1820
1821 if m.group(0) in param_names:
1822 _raise_error(
1823 "Duplicate macro parameter named `{}`".format(m.group(0)),
1824 param_text_loc,
1825 )
1826
1827 param_names.append(m.group(0))
1828 expect_comma = True
1829
1830 # Expect items
1831 self._skip_ws_and_comments_and_syms()
1832 old_var_names = self._var_names.copy()
1833 old_label_names = self._label_names.copy()
1834 self._var_names = set() # type: Set[str]
1835 self._label_names = set() # type: Set[str]
1836 items = self._parse_items()
1837 self._var_names = old_var_names
1838 self._label_names = old_label_names
1839
1840 # Expect suffix
1841 self._expect_pat(
1842 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1843 )
1844
1845 # Register macro
1846 self._macro_defs[name] = _MacroDef(name, param_names, items, begin_text_loc)
1847
1848 return True
1849
1850 # Patterns for _try_parse_macro_exp()
1851 _macro_exp_prefix_pat = re.compile(r"m\b")
1852 _macro_exp_colon_pat = re.compile(r":")
1853
1854 # Tries to parse a macro expansion, returning a macro expansion item
1855 # on success.
1856 def _try_parse_macro_exp(self):
1857 begin_text_loc = self._text_loc
1858
1859 # Match prefix
1860 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1861 # No match
1862 return
1863
1864 # Expect `:`
1865 self._skip_ws_and_comments()
1866 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1867
1868 # Expect a macro name
1869 self._skip_ws_and_comments()
1870 name_text_loc = self._text_loc
1871 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1872
1873 # Validate name
1874 name = m.group(0)
1875 macro_def = self._macro_defs.get(name)
1876
1877 if macro_def is None:
1878 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1879
1880 # Expect `(`
1881 self._skip_ws_and_comments()
1882 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1883
1884 # Try to parse comma-separated parameter values
1885 params_text_loc = self._text_loc
1886 params = [] # type: List[_MacroExpParam]
1887 expect_comma = False
1888
1889 while True:
1890 self._skip_ws_and_comments()
1891
1892 # End?
1893 if self._try_parse_pat(self._right_paren_pat) is not None:
1894 # End
1895 break
1896
1897 # Expect a value
1898 if expect_comma:
1899 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1900
1901 self._skip_ws_and_comments()
1902 param_text_loc = self._text_loc
1903 params.append(
1904 _MacroExpParam(
1905 *self._expect_expr(
1906 accept_const_int=True,
1907 allow_neg_int=True,
1908 accept_const_float=True,
1909 accept_lit_str=True,
1910 ),
1911 text_loc=param_text_loc
1912 )
1913 )
1914 expect_comma = True
1915
1916 # Validate parameter values
1917 if len(params) != len(macro_def.param_names):
1918 sing_plur = "" if len(params) == 1 else "s"
1919 _raise_error(
1920 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1921 len(params), sing_plur, len(macro_def.param_names)
1922 ),
1923 params_text_loc,
1924 )
1925
1926 # Return item
1927 return _MacroExp(name, params, begin_text_loc)
1928
1929 # Tries to parse a base item (anything except a post-item
1930 # repetition), returning it on success.
1931 def _try_parse_base_item(self):
1932 for func in self._base_item_parse_funcs:
1933 item = func()
1934
1935 if item is not None:
1936 return item
1937
1938 # Pattern for _try_parse_rep_post()
1939 _rep_post_prefix_pat = re.compile(r"\*")
1940
1941 # Tries to parse a post-item repetition, returning the expression
1942 # string and AST expression node on success.
1943 def _try_parse_rep_post(self):
1944 # Match prefix
1945 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
1946 # No match
1947 return
1948
1949 # Return expression string and AST expression
1950 self._skip_ws_and_comments()
1951 return self._expect_rep_mul_expr()
1952
1953 # Tries to parse an item, possibly followed by a repetition,
1954 # returning `True` on success.
1955 #
1956 # Appends any parsed item to `items`.
1957 def _try_append_item(self, items: List[_Item]):
1958 self._skip_ws_and_comments_and_syms()
1959
1960 # Base item
1961 item = self._try_parse_base_item()
1962
1963 if item is None:
1964 return
1965
1966 # Parse repetition if the base item is repeatable
1967 if isinstance(item, _RepableItem):
1968 self._skip_ws_and_comments()
1969 rep_text_loc = self._text_loc
1970 rep_ret = self._try_parse_rep_post()
1971
1972 if rep_ret is not None:
1973 item = _Rep([item], *rep_ret, text_loc=rep_text_loc)
1974
1975 items.append(item)
1976 return True
1977
1978 # Parses and returns items, skipping whitespaces, insignificant
1979 # symbols, and comments when allowed, and stopping at the first
1980 # unknown character.
1981 #
1982 # Accepts and registers macro definitions if `accept_macro_defs`
1983 # is `True`.
1984 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
1985 items = [] # type: List[_Item]
1986
1987 while self._isnt_done():
1988 # Try to append item
1989 if not self._try_append_item(items):
1990 if accept_macro_defs and self._try_parse_macro_def():
1991 continue
1992
1993 # Unknown at this point
1994 break
1995
1996 return items
1997
1998 # Parses the whole Normand input, setting `self._res` to the main
1999 # group item on success.
2000 def _parse(self):
2001 if len(self._normand.strip()) == 0:
2002 # Special case to make sure there's something to consume
2003 self._res = _Group([], self._text_loc)
2004 return
2005
2006 # Parse first level items
2007 items = self._parse_items(True)
2008
2009 # Make sure there's nothing left
2010 self._skip_ws_and_comments_and_syms()
2011
2012 if self._isnt_done():
2013 self._raise_error(
2014 "Unexpected character `{}`".format(self._normand[self._at])
2015 )
2016
2017 # Set main group item
2018 self._res = _Group(items, self._text_loc)
2019
2020
2021 # The return type of parse().
2022 class ParseResult:
2023 @classmethod
2024 def _create(
2025 cls,
2026 data: bytearray,
2027 variables: VariablesT,
2028 labels: LabelsT,
2029 offset: int,
2030 bo: Optional[ByteOrder],
2031 ):
2032 self = cls.__new__(cls)
2033 self._init(data, variables, labels, offset, bo)
2034 return self
2035
2036 def __init__(self, *args, **kwargs): # type: ignore
2037 raise NotImplementedError
2038
2039 def _init(
2040 self,
2041 data: bytearray,
2042 variables: VariablesT,
2043 labels: LabelsT,
2044 offset: int,
2045 bo: Optional[ByteOrder],
2046 ):
2047 self._data = data
2048 self._vars = variables
2049 self._labels = labels
2050 self._offset = offset
2051 self._bo = bo
2052
2053 # Generated data.
2054 @property
2055 def data(self):
2056 return self._data
2057
2058 # Dictionary of updated variable names to their last computed value.
2059 @property
2060 def variables(self):
2061 return self._vars
2062
2063 # Dictionary of updated main group label names to their computed
2064 # value.
2065 @property
2066 def labels(self):
2067 return self._labels
2068
2069 # Updated offset.
2070 @property
2071 def offset(self):
2072 return self._offset
2073
2074 # Updated byte order.
2075 @property
2076 def byte_order(self):
2077 return self._bo
2078
2079
2080 # Raises a parse error for the item `item`, creating it using the
2081 # message `msg`.
2082 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
2083 _raise_error(msg, item.text_loc)
2084
2085
2086 # The `ICITTE` reserved name.
2087 _icitte_name = "ICITTE"
2088
2089
2090 # Base node visitor.
2091 #
2092 # Calls the _visit_name() method for each name node which isn't the name
2093 # of a call.
2094 class _NodeVisitor(ast.NodeVisitor):
2095 def __init__(self):
2096 self._parent_is_call = False
2097
2098 def generic_visit(self, node: ast.AST):
2099 if type(node) is ast.Call:
2100 self._parent_is_call = True
2101 elif type(node) is ast.Name and not self._parent_is_call:
2102 self._visit_name(node.id)
2103
2104 super().generic_visit(node)
2105 self._parent_is_call = False
2106
2107 @abc.abstractmethod
2108 def _visit_name(self, name: str):
2109 ...
2110
2111
2112 # Expression validator: validates that all the names within the
2113 # expression are allowed.
2114 class _ExprValidator(_NodeVisitor):
2115 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2116 super().__init__()
2117 self._expr_str = expr_str
2118 self._text_loc = text_loc
2119 self._allowed_names = allowed_names
2120
2121 def _visit_name(self, name: str):
2122 # Make sure the name refers to a known and reachable
2123 # variable/label name.
2124 if name != _icitte_name and name not in self._allowed_names:
2125 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
2126 name, self._expr_str
2127 )
2128
2129 allowed_names = self._allowed_names.copy()
2130 allowed_names.add(_icitte_name)
2131
2132 if len(allowed_names) > 0:
2133 allowed_names_str = ", ".join(
2134 sorted(["`{}`".format(name) for name in allowed_names])
2135 )
2136 msg += "; the legal names are {{{}}}".format(allowed_names_str)
2137
2138 _raise_error(
2139 msg,
2140 self._text_loc,
2141 )
2142
2143
2144 # Generator state.
2145 class _GenState:
2146 def __init__(
2147 self,
2148 variables: VariablesT,
2149 labels: LabelsT,
2150 offset: int,
2151 bo: Optional[ByteOrder],
2152 ):
2153 self.variables = variables.copy()
2154 self.labels = labels.copy()
2155 self.offset = offset
2156 self.bo = bo
2157
2158 def __repr__(self):
2159 return "_GenState({}, {}, {}, {})".format(
2160 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
2161 )
2162
2163
2164 # Fixed-length number item instance.
2165 class _FlNumItemInst:
2166 def __init__(
2167 self,
2168 item: _FlNum,
2169 offset_in_data: int,
2170 state: _GenState,
2171 parse_error_msgs: List[ParseErrorMessage],
2172 ):
2173 self._item = item
2174 self._offset_in_data = offset_in_data
2175 self._state = state
2176 self._parse_error_msgs = parse_error_msgs
2177
2178 @property
2179 def item(self):
2180 return self._item
2181
2182 @property
2183 def offset_in_data(self):
2184 return self._offset_in_data
2185
2186 @property
2187 def state(self):
2188 return self._state
2189
2190 @property
2191 def parse_error_msgs(self):
2192 return self._parse_error_msgs
2193
2194
2195 # Generator of data and final state from a group item.
2196 #
2197 # Generation happens in memory at construction time. After building, use
2198 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
2199 # get the resulting context.
2200 #
2201 # The steps of generation are:
2202 #
2203 # 1. Handle each item in prefix order.
2204 #
2205 # The handlers append bytes to `self._data` and update some current
2206 # state object (`_GenState` instance).
2207 #
2208 # When handling a fixed-length number item, try to evaluate its
2209 # expression using the current state. If this fails, then it might be
2210 # because the expression refers to a "future" label: save the current
2211 # offset in `self._data` (generated data) and a snapshot of the
2212 # current state within `self._fl_num_item_insts` (`_FlNumItemInst`
2213 # object). _gen_fl_num_item_insts() will deal with this later. A
2214 # `_FlNumItemInst` instance also contains a snapshot of the current
2215 # parsing error messages (`self._parse_error_msgs`) which need to be
2216 # taken into account when handling the instance later.
2217 #
2218 # When handling the items of a group, keep a map of immediate label
2219 # names to their offset. Then, after having processed all the items,
2220 # update the relevant saved state snapshots in
2221 # `self._fl_num_item_insts` with those immediate label values.
2222 # _gen_fl_num_item_insts() will deal with this later.
2223 #
2224 # 2. Handle all the fixed-length number item instances of which the
2225 # expression evaluation failed before.
2226 #
2227 # At this point, `self._fl_num_item_insts` contains everything that's
2228 # needed to evaluate the expressions, including the values of
2229 # "future" labels from the point of view of some fixed-length number
2230 # item instance.
2231 #
2232 # If an evaluation fails at this point, then it's a user error. Add
2233 # to the parsing error all the saved parsing error messages of the
2234 # instance. Those additional messages add precious context to the
2235 # error.
2236 class _Gen:
2237 def __init__(
2238 self,
2239 group: _Group,
2240 macro_defs: _MacroDefsT,
2241 variables: VariablesT,
2242 labels: LabelsT,
2243 offset: int,
2244 bo: Optional[ByteOrder],
2245 ):
2246 self._macro_defs = macro_defs
2247 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
2248 self._parse_error_msgs = [] # type: List[ParseErrorMessage]
2249 self._in_trans = False
2250 self._gen(group, _GenState(variables, labels, offset, bo))
2251
2252 # Generated bytes.
2253 @property
2254 def data(self):
2255 return self._data
2256
2257 # Updated variables.
2258 @property
2259 def variables(self):
2260 return self._final_state.variables
2261
2262 # Updated main group labels.
2263 @property
2264 def labels(self):
2265 return self._final_state.labels
2266
2267 # Updated offset.
2268 @property
2269 def offset(self):
2270 return self._final_state.offset
2271
2272 # Updated byte order.
2273 @property
2274 def bo(self):
2275 return self._final_state.bo
2276
2277 # Evaluates the expression `expr` of which the original string is
2278 # `expr_str` at the location `text_loc` considering the current
2279 # generation state `state`.
2280 #
2281 # If `accept_float` is `True`, then the type of the result may be
2282 # `float` too.
2283 #
2284 # If `accept_str` is `True`, then the type of the result may be
2285 # `str` too.
2286 @staticmethod
2287 def _eval_expr(
2288 expr_str: str,
2289 expr: ast.Expression,
2290 text_loc: TextLocation,
2291 state: _GenState,
2292 accept_float: bool = False,
2293 accept_str: bool = False,
2294 ):
2295 syms = {} # type: VariablesT
2296 syms.update(state.labels)
2297
2298 # Set the `ICITTE` name to the current offset
2299 syms[_icitte_name] = state.offset
2300
2301 # Add the current variables
2302 syms.update(state.variables)
2303
2304 # Validate the node and its children
2305 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
2306
2307 # Compile and evaluate expression node
2308 try:
2309 val = eval(compile(expr, "", "eval"), None, syms)
2310 except Exception as exc:
2311 _raise_error(
2312 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
2313 text_loc,
2314 )
2315
2316 # Convert `bool` result type to `int` to normalize
2317 if type(val) is bool:
2318 val = int(val)
2319
2320 # Validate result type
2321 expected_types = {int} # type: Set[type]
2322
2323 if accept_float:
2324 expected_types.add(float)
2325
2326 if accept_str:
2327 expected_types.add(str)
2328
2329 if type(val) not in expected_types:
2330 expected_types_str = sorted(
2331 ["`{}`".format(t.__name__) for t in expected_types]
2332 )
2333
2334 if len(expected_types_str) == 1:
2335 msg_expected = expected_types_str[0]
2336 elif len(expected_types_str) == 2:
2337 msg_expected = " or ".join(expected_types_str)
2338 else:
2339 expected_types_str[-1] = "or {}".format(expected_types_str[-1])
2340 msg_expected = ", ".join(expected_types_str)
2341
2342 _raise_error(
2343 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
2344 expr_str, msg_expected, type(val).__name__
2345 ),
2346 text_loc,
2347 )
2348
2349 return val
2350
2351 # Forwards to _eval_expr() with the expression and text location of
2352 # `item`.
2353 @staticmethod
2354 def _eval_item_expr(
2355 item: Union[_Cond, _FillUntil, _FlNum, _Leb128Int, _Rep, _Str, _VarAssign],
2356 state: _GenState,
2357 accept_float: bool = False,
2358 accept_str: bool = False,
2359 ):
2360 return _Gen._eval_expr(
2361 item.expr_str, item.expr, item.text_loc, state, accept_float, accept_str
2362 )
2363
2364 # Handles the byte item `item`.
2365 def _handle_byte_item(self, item: _Byte, state: _GenState):
2366 self._data.append(item.val)
2367 state.offset += item.size
2368
2369 # Handles the literal string item `item`.
2370 def _handle_lit_str_item(self, item: _LitStr, state: _GenState):
2371 self._data += item.data
2372 state.offset += item.size
2373
2374 # Handles the byte order setting item `item`.
2375 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
2376 # Update current byte order
2377 state.bo = item.bo
2378
2379 # Handles the variable assignment item `item`.
2380 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
2381 # Update variable
2382 state.variables[item.name] = self._eval_item_expr(
2383 item, state, accept_float=True, accept_str=True
2384 )
2385
2386 # Returns the effective byte order to use to encode the fixed-length
2387 # number `item` considering the current state `state`.
2388 @staticmethod
2389 def _fl_num_item_effective_bo(item: _FlNum, state: _GenState):
2390 return state.bo if item.bo is None else item.bo
2391
2392 # Handles the fixed-length number item `item`.
2393 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
2394 # Effective byte order
2395 bo = self._fl_num_item_effective_bo(item, state)
2396
2397 # Validate current byte order
2398 if bo is None and item.len > 8:
2399 _raise_error_for_item(
2400 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2401 item.expr_str
2402 ),
2403 item,
2404 )
2405
2406 # Try an immediate evaluation. If it fails, then keep everything
2407 # needed to (try to) generate the bytes of this item later.
2408 try:
2409 data = self._gen_fl_num_item_inst_data(item, state)
2410 except Exception:
2411 if self._in_trans:
2412 _raise_error_for_item(
2413 "Invalid expression `{}`: failed to evaluate within a transformation block".format(
2414 item.expr_str
2415 ),
2416 item,
2417 )
2418
2419 self._fl_num_item_insts.append(
2420 _FlNumItemInst(
2421 item,
2422 len(self._data),
2423 copy.deepcopy(state),
2424 copy.deepcopy(self._parse_error_msgs),
2425 )
2426 )
2427
2428 # Reserve space in `self._data` for this instance
2429 data = bytes([0] * (item.len // 8))
2430
2431 # Append bytes
2432 self._data += data
2433
2434 # Update offset
2435 state.offset += len(data)
2436
2437 # Returns the size, in bytes, required to encode the value `val`
2438 # with LEB128 (signed version if `is_signed` is `True`).
2439 @staticmethod
2440 def _leb128_size_for_val(val: int, is_signed: bool):
2441 if val < 0:
2442 # Equivalent upper bound.
2443 #
2444 # For example, if `val` is -128, then the full integer for
2445 # this number of bits would be [-128, 127].
2446 val = -val - 1
2447
2448 # Number of bits (add one for the sign if needed)
2449 bits = val.bit_length() + int(is_signed)
2450
2451 if bits == 0:
2452 bits = 1
2453
2454 # Seven bits per byte
2455 return math.ceil(bits / 7)
2456
2457 # Handles the LEB128 integer item `item`.
2458 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2459 # Compute value
2460 val = self._eval_item_expr(item, state)
2461
2462 # Size in bytes
2463 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
2464
2465 # For each byte
2466 for _ in range(size):
2467 # Seven LSBs, MSB of the byte set (continue)
2468 self._data.append((val & 0x7F) | 0x80)
2469 val >>= 7
2470
2471 # Clear MSB of last byte (stop)
2472 self._data[-1] &= ~0x80
2473
2474 # Update offset
2475 state.offset += size
2476
2477 # Handles the string item `item`.
2478 def _handle_str_item(self, item: _Str, state: _GenState):
2479 # Compute value
2480 val = str(self._eval_item_expr(item, state, accept_float=True, accept_str=True))
2481
2482 # Encode
2483 data = _encode_str(val, item.codec, item.text_loc)
2484
2485 # Add to data
2486 self._data += data
2487
2488 # Update offset
2489 state.offset += len(data)
2490
2491 # Handles the group item `item`, removing the immediate labels from
2492 # `state` at the end if `remove_immediate_labels` is `True`.
2493 def _handle_group_item(
2494 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2495 ):
2496 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2497 immediate_labels = {} # type: LabelsT
2498
2499 # Handle each item
2500 for subitem in item.items:
2501 if type(subitem) is _Label:
2502 # Add to local immediate labels
2503 immediate_labels[subitem.name] = state.offset
2504
2505 self._handle_item(subitem, state)
2506
2507 # Remove immediate labels from current state if needed
2508 if remove_immediate_labels:
2509 for name in immediate_labels:
2510 del state.labels[name]
2511
2512 # Add all immediate labels to all state snapshots since
2513 # `first_fl_num_item_inst_index`.
2514 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2515 inst.state.labels.update(immediate_labels)
2516
2517 # Handles the repetition item `item`.
2518 def _handle_rep_item(self, item: _Rep, state: _GenState):
2519 # Compute the repetition count
2520 mul = _Gen._eval_item_expr(item, state)
2521
2522 # Validate result
2523 if mul < 0:
2524 _raise_error_for_item(
2525 "Invalid expression `{}`: unexpected negative result {:,}".format(
2526 item.expr_str, mul
2527 ),
2528 item,
2529 )
2530
2531 # Generate group data `mul` times
2532 for _ in range(mul):
2533 self._handle_group_item(item, state)
2534
2535 # Handles the conditional item `item`.
2536 def _handle_cond_item(self, item: _Cond, state: _GenState):
2537 # Compute the conditional value
2538 val = _Gen._eval_item_expr(item, state)
2539
2540 # Generate selected group data
2541 if val:
2542 self._handle_group_item(item.true_item, state)
2543 else:
2544 self._handle_group_item(item.false_item, state)
2545
2546 # Handles the transformation item `item`.
2547 def _handle_trans_item(self, item: _Trans, state: _GenState):
2548 init_in_trans = self._in_trans
2549 self._in_trans = True
2550 init_data_len = len(self._data)
2551 init_offset = state.offset
2552
2553 # Generate group data
2554 self._handle_group_item(item, state)
2555
2556 # Remove and keep group data
2557 to_trans = self._data[init_data_len:]
2558 del self._data[init_data_len:]
2559
2560 # Encode group data and append to current data
2561 try:
2562 transformed = item.trans(to_trans)
2563 except Exception as exc:
2564 _raise_error_for_item(
2565 "Cannot apply the {} transformation to this data: {}".format(
2566 item.name, exc
2567 ),
2568 item,
2569 )
2570
2571 self._data += transformed
2572
2573 # Update offset and restore
2574 state.offset = init_offset + len(transformed)
2575 self._in_trans = init_in_trans
2576
2577 # Evaluates the parameters of the macro expansion item `item`
2578 # considering the initial state `init_state` and returns a new state
2579 # to handle the items of the macro.
2580 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2581 # New state
2582 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2583
2584 # Evaluate the parameter expressions
2585 macro_def = self._macro_defs[item.name]
2586
2587 for param_name, param in zip(macro_def.param_names, item.params):
2588 exp_state.variables[param_name] = _Gen._eval_expr(
2589 param.expr_str,
2590 param.expr,
2591 param.text_loc,
2592 init_state,
2593 accept_float=True,
2594 accept_str=True,
2595 )
2596
2597 return exp_state
2598
2599 # Handles the macro expansion item `item`.
2600 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
2601 parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
2602
2603 try:
2604 # New state
2605 exp_state = self._eval_macro_exp_params(item, state)
2606
2607 # Process the contained group
2608 init_data_size = len(self._data)
2609 parse_error_msg = (
2610 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
2611 parse_error_msg_text, item.text_loc
2612 )
2613 )
2614 self._parse_error_msgs.append(parse_error_msg)
2615 self._handle_group_item(self._macro_defs[item.name], exp_state)
2616 self._parse_error_msgs.pop()
2617 except ParseError as exc:
2618 _augment_error(exc, parse_error_msg_text, item.text_loc)
2619
2620 # Update state offset and return
2621 state.offset += len(self._data) - init_data_size
2622
2623 # Handles the offset setting item `item`.
2624 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
2625 state.offset = item.val
2626
2627 # Handles the offset alignment item `item` (adds padding).
2628 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2629 init_offset = state.offset
2630 align_bytes = item.val // 8
2631 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2632 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2633
2634 # Handles the filling item `item` (adds padding).
2635 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2636 # Compute the new offset
2637 new_offset = _Gen._eval_item_expr(item, state)
2638
2639 # Validate the new offset
2640 if new_offset < state.offset:
2641 _raise_error_for_item(
2642 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2643 item.expr_str, new_offset, state.offset
2644 ),
2645 item,
2646 )
2647
2648 # Fill
2649 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2650
2651 # Update offset
2652 state.offset = new_offset
2653
2654 # Handles the label item `item`.
2655 def _handle_label_item(self, item: _Label, state: _GenState):
2656 state.labels[item.name] = state.offset
2657
2658 # Handles the item `item`, returning the updated next repetition
2659 # instance.
2660 def _handle_item(self, item: _Item, state: _GenState):
2661 return self._item_handlers[type(item)](item, state)
2662
2663 # Generates the data for a fixed-length integer item instance having
2664 # the value `val` and the effective byte order `bo` and returns it.
2665 def _gen_fl_int_item_inst_data(
2666 self, val: int, bo: Optional[ByteOrder], item: _FlNum
2667 ):
2668 # Validate range
2669 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2670 _raise_error_for_item(
2671 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2672 val, item.len, item.expr_str
2673 ),
2674 item,
2675 )
2676
2677 # Encode result on 64 bits (to extend the sign bit whatever the
2678 # value of `item.len`).
2679 data = struct.pack(
2680 "{}{}".format(
2681 ">" if bo in (None, ByteOrder.BE) else "<",
2682 "Q" if val >= 0 else "q",
2683 ),
2684 val,
2685 )
2686
2687 # Keep only the requested length
2688 len_bytes = item.len // 8
2689
2690 if bo in (None, ByteOrder.BE):
2691 # Big endian: keep last bytes
2692 data = data[-len_bytes:]
2693 else:
2694 # Little endian: keep first bytes
2695 assert bo == ByteOrder.LE
2696 data = data[:len_bytes]
2697
2698 # Return data
2699 return data
2700
2701 # Generates the data for a fixed-length floating point number item
2702 # instance having the value `val` and the effective byte order `bo`
2703 # and returns it.
2704 def _gen_fl_float_item_inst_data(
2705 self, val: float, bo: Optional[ByteOrder], item: _FlNum
2706 ):
2707 # Validate length
2708 if item.len not in (32, 64):
2709 _raise_error_for_item(
2710 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2711 item.len, val
2712 ),
2713 item,
2714 )
2715
2716 # Encode and return result
2717 return struct.pack(
2718 "{}{}".format(
2719 ">" if bo in (None, ByteOrder.BE) else "<",
2720 "f" if item.len == 32 else "d",
2721 ),
2722 val,
2723 )
2724
2725 # Generates the data for a fixed-length number item instance and
2726 # returns it.
2727 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
2728 # Effective byte order
2729 bo = self._fl_num_item_effective_bo(item, state)
2730
2731 # Compute value
2732 val = self._eval_item_expr(item, state, True)
2733
2734 # Handle depending on type
2735 if type(val) is int:
2736 return self._gen_fl_int_item_inst_data(val, bo, item)
2737 else:
2738 assert type(val) is float
2739 return self._gen_fl_float_item_inst_data(val, bo, item)
2740
2741 # Generates the data for all the fixed-length number item instances
2742 # and writes it at the correct offset within `self._data`.
2743 def _gen_fl_num_item_insts(self):
2744 for inst in self._fl_num_item_insts:
2745 # Generate bytes
2746 try:
2747 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2748 except ParseError as exc:
2749 # Add all the saved parse error messages for this
2750 # instance.
2751 for msg in reversed(inst.parse_error_msgs):
2752 _add_error_msg(exc, msg.text, msg.text_location)
2753
2754 raise
2755
2756 # Insert bytes into `self._data`
2757 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2758
2759 # Generates the data (`self._data`) and final state
2760 # (`self._final_state`) from `group` and the initial state `state`.
2761 def _gen(self, group: _Group, state: _GenState):
2762 # Initial state
2763 self._data = bytearray()
2764
2765 # Item handlers
2766 self._item_handlers = {
2767 _AlignOffset: self._handle_align_offset_item,
2768 _Byte: self._handle_byte_item,
2769 _Cond: self._handle_cond_item,
2770 _FillUntil: self._handle_fill_until_item,
2771 _FlNum: self._handle_fl_num_item,
2772 _Group: self._handle_group_item,
2773 _Label: self._handle_label_item,
2774 _LitStr: self._handle_lit_str_item,
2775 _MacroExp: self._handle_macro_exp_item,
2776 _Rep: self._handle_rep_item,
2777 _SetBo: self._handle_set_bo_item,
2778 _SetOffset: self._handle_set_offset_item,
2779 _SLeb128Int: self._handle_leb128_int_item,
2780 _Str: self._handle_str_item,
2781 _Trans: self._handle_trans_item,
2782 _ULeb128Int: self._handle_leb128_int_item,
2783 _VarAssign: self._handle_var_assign_item,
2784 } # type: Dict[type, Callable[[Any, _GenState], None]]
2785
2786 # Handle the group item, _not_ removing the immediate labels
2787 # because the `labels` property offers them.
2788 self._handle_group_item(group, state, False)
2789
2790 # This is actually the final state
2791 self._final_state = state
2792
2793 # Generate all the fixed-length number bytes now that we know
2794 # their full state
2795 self._gen_fl_num_item_insts()
2796
2797
2798 # Returns a `ParseResult` instance containing the bytes encoded by the
2799 # input string `normand`.
2800 #
2801 # `init_variables` is a dictionary of initial variable names (valid
2802 # Python names) to integral values. A variable name must not be the
2803 # reserved name `ICITTE`.
2804 #
2805 # `init_labels` is a dictionary of initial label names (valid Python
2806 # names) to integral values. A label name must not be the reserved name
2807 # `ICITTE`.
2808 #
2809 # `init_offset` is the initial offset.
2810 #
2811 # `init_byte_order` is the initial byte order.
2812 #
2813 # Raises `ParseError` on any parsing error.
2814 def parse(
2815 normand: str,
2816 init_variables: Optional[VariablesT] = None,
2817 init_labels: Optional[LabelsT] = None,
2818 init_offset: int = 0,
2819 init_byte_order: Optional[ByteOrder] = None,
2820 ):
2821 if init_variables is None:
2822 init_variables = {}
2823
2824 if init_labels is None:
2825 init_labels = {}
2826
2827 parser = _Parser(normand, init_variables, init_labels)
2828 gen = _Gen(
2829 parser.res,
2830 parser.macro_defs,
2831 init_variables,
2832 init_labels,
2833 init_offset,
2834 init_byte_order,
2835 )
2836 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2837 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2838 )
2839
2840
2841 # Raises a command-line error with the message `msg`.
2842 def _raise_cli_error(msg: str) -> NoReturn:
2843 raise RuntimeError("Command-line error: {}".format(msg))
2844
2845
2846 # Returns the `int` or `float` value out of a CLI assignment value.
2847 def _val_from_assign_val_str(s: str, is_label: bool):
2848 s = s.strip()
2849
2850 # Floating point number?
2851 if not is_label:
2852 m = _const_float_pat.fullmatch(s)
2853
2854 if m is not None:
2855 return float(m.group(0))
2856
2857 # Integer?
2858 m = _const_int_pat.fullmatch(s)
2859
2860 if m is not None:
2861 return int(_norm_const_int(m.group(0)), 0)
2862
2863 exp = "an integer" if is_label else "a number"
2864 _raise_cli_error("Invalid assignment value `{}`: expecting {}".format(s, exp))
2865
2866
2867 # Returns a dictionary of string to numbers from the list of strings
2868 # `args` containing `NAME=VAL` entries.
2869 def _dict_from_arg(args: Optional[List[str]], is_label: bool, is_str_only: bool):
2870 d = {} # type: VariablesT
2871
2872 if args is None:
2873 return d
2874
2875 for arg in args:
2876 m = re.match(r"({})\s*=\s*(.*)$".format(_py_name_pat.pattern), arg)
2877
2878 if m is None:
2879 _raise_cli_error("Invalid assignment `{}`".format(arg))
2880
2881 if is_str_only:
2882 val = m.group(2)
2883 else:
2884 val = _val_from_assign_val_str(m.group(2), is_label)
2885
2886 d[m.group(1)] = val
2887
2888 return d
2889
2890
2891 # Parses the command-line arguments and returns, in this order:
2892 #
2893 # 1. The input file path, or `None` if none.
2894 # 2. The Normand input text.
2895 # 3. The initial offset.
2896 # 4. The initial byte order.
2897 # 5. The initial variables.
2898 # 6. The initial labels.
2899 def _parse_cli_args():
2900 import argparse
2901
2902 # Build parser
2903 ap = argparse.ArgumentParser()
2904 ap.add_argument(
2905 "--offset",
2906 metavar="OFFSET",
2907 action="store",
2908 type=int,
2909 default=0,
2910 help="initial offset (positive)",
2911 )
2912 ap.add_argument(
2913 "-b",
2914 "--byte-order",
2915 metavar="BO",
2916 choices=["be", "le"],
2917 type=str,
2918 help="initial byte order (`be` or `le`)",
2919 )
2920 ap.add_argument(
2921 "-v",
2922 "--var",
2923 metavar="NAME=VAL",
2924 action="append",
2925 help="add an initial numeric variable (may be repeated)",
2926 )
2927 ap.add_argument(
2928 "-s",
2929 "--var-str",
2930 metavar="NAME=VAL",
2931 action="append",
2932 help="add an initial string variable (may be repeated)",
2933 )
2934 ap.add_argument(
2935 "-l",
2936 "--label",
2937 metavar="NAME=VAL",
2938 action="append",
2939 help="add an initial label (may be repeated)",
2940 )
2941 ap.add_argument(
2942 "--version", action="version", version="Normand {}".format(__version__)
2943 )
2944 ap.add_argument(
2945 "path",
2946 metavar="PATH",
2947 action="store",
2948 nargs="?",
2949 help="input path (none means standard input)",
2950 )
2951
2952 # Parse
2953 args = ap.parse_args()
2954
2955 # Read input
2956 if args.path is None:
2957 normand = sys.stdin.read()
2958 else:
2959 with open(args.path) as f:
2960 normand = f.read()
2961
2962 # Variables and labels
2963 variables = _dict_from_arg(args.var, False, False)
2964 variables.update(_dict_from_arg(args.var_str, False, True))
2965 labels = _dict_from_arg(args.label, True, False)
2966
2967 # Validate offset
2968 if args.offset < 0:
2969 _raise_cli_error("Invalid negative offset {}")
2970
2971 # Validate and set byte order
2972 bo = None # type: Optional[ByteOrder]
2973
2974 if args.byte_order is not None:
2975 if args.byte_order == "be":
2976 bo = ByteOrder.BE
2977 else:
2978 assert args.byte_order == "le"
2979 bo = ByteOrder.LE
2980
2981 # Return input and initial state
2982 return args.path, normand, args.offset, bo, variables, typing.cast(LabelsT, labels)
2983
2984
2985 # CLI entry point without exception handling.
2986 def _run_cli_with_args(
2987 normand: str,
2988 offset: int,
2989 bo: Optional[ByteOrder],
2990 variables: VariablesT,
2991 labels: LabelsT,
2992 ):
2993 sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
2994
2995
2996 # Prints the exception message `msg` and exits with status 1.
2997 def _fail(msg: str) -> NoReturn:
2998 if not msg.endswith("."):
2999 msg += "."
3000
3001 print(msg.strip(), file=sys.stderr)
3002 sys.exit(1)
3003
3004
3005 # CLI entry point.
3006 def _run_cli():
3007 try:
3008 args = _parse_cli_args()
3009 except Exception as exc:
3010 _fail(str(exc))
3011
3012 try:
3013 _run_cli_with_args(*args[1:])
3014 except ParseError as exc:
3015 import os.path
3016
3017 prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
3018 fail_msg = ""
3019
3020 for msg in reversed(exc.messages):
3021 fail_msg += "{}{}:{} - {}".format(
3022 prefix,
3023 msg.text_location.line_no,
3024 msg.text_location.col_no,
3025 msg.text,
3026 )
3027
3028 if fail_msg[-1] not in ".:;":
3029 fail_msg += "."
3030
3031 fail_msg += "\n"
3032
3033 _fail(fail_msg.strip())
3034 except Exception as exc:
3035 _fail(str(exc))
3036
3037
3038 if __name__ == "__main__":
3039 _run_cli()
This page took 0.157929 seconds and 4 git commands to generate.