1 # The MIT License (MIT)
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
28 # Feel free to copy this module file to your own project to use Normand.
30 # Upstream repository: <https://github.com/efficios/normand>.
32 __author__
= "Philippe Proulx"
54 from typing
import Any
, Set
, Dict
, List
, Union
, Pattern
, Callable
, NoReturn
, Optional
57 # Text location (line and column numbers).
60 def _create(cls
, line_no
: int, col_no
: int):
61 self
= cls
.__new
__(cls
)
62 self
._init
(line_no
, col_no
)
65 def __init__(*args
, **kwargs
): # type: ignore
66 raise NotImplementedError
68 def _init(self
, line_no
: int, col_no
: int):
69 self
._line
_no
= line_no
83 return "TextLocation({}, {})".format(self
._line
_no
, self
._col
_no
)
88 def __init__(self
, text_loc
: TextLocation
):
89 self
._text
_loc
= text_loc
91 # Source text location.
98 class _ScalarItem(_Item
):
99 # Returns the size, in bytes, of this item.
102 def size(self
) -> int:
112 class _Byte(_ScalarItem
, _RepableItem
):
113 def __init__(self
, val
: int, text_loc
: TextLocation
):
114 super().__init
__(text_loc
)
127 return "_Byte({}, {})".format(hex(self
._val
), repr(self
._text
_loc
))
131 class _Str(_ScalarItem
, _RepableItem
):
132 def __init__(self
, data
: bytes
, text_loc
: TextLocation
):
133 super().__init
__(text_loc
)
143 return len(self
._data
)
146 return "_Str({}, {})".format(repr(self
._data
), repr(self
._text
_loc
))
151 class ByteOrder(enum
.Enum
):
159 # Byte order setting.
161 def __init__(self
, bo
: ByteOrder
, text_loc
: TextLocation
):
162 super().__init
__(text_loc
)
170 return "_SetBo({}, {})".format(repr(self
._bo
), repr(self
._text
_loc
))
175 def __init__(self
, name
: str, text_loc
: TextLocation
):
176 super().__init
__(text_loc
)
185 return "_Label({}, {})".format(repr(self
._name
), repr(self
._text
_loc
))
189 class _SetOffset(_Item
):
190 def __init__(self
, val
: int, text_loc
: TextLocation
):
191 super().__init
__(text_loc
)
194 # Offset value (bytes).
200 return "_SetOffset({}, {})".format(repr(self
._val
), repr(self
._text
_loc
))
204 class _AlignOffset(_Item
):
205 def __init__(self
, val
: int, pad_val
: int, text_loc
: TextLocation
):
206 super().__init
__(text_loc
)
208 self
._pad
_val
= pad_val
210 # Alignment value (bits).
215 # Padding byte value.
221 return "_AlignOffset({}, {}, {})".format(
222 repr(self
._val
), repr(self
._pad
_val
), repr(self
._text
_loc
)
226 # Mixin of containing an AST expression and its string.
228 def __init__(self
, expr_str
: str, expr
: ast
.Expression
):
229 self
._expr
_str
= expr_str
235 return self
._expr
_str
237 # Expression node to evaluate.
243 # Variable assignment.
244 class _VarAssign(_Item
, _ExprMixin
):
246 self
, name
: str, expr_str
: str, expr
: ast
.Expression
, text_loc
: TextLocation
248 super().__init
__(text_loc
)
249 _ExprMixin
.__init
__(self
, expr_str
, expr
)
258 return "_VarAssign({}, {}, {}, {})".format(
260 repr(self
._expr
_str
),
262 repr(self
._text
_loc
),
266 # Fixed-length number, possibly needing more than one byte.
267 class _FlNum(_ScalarItem
, _RepableItem
, _ExprMixin
):
269 self
, expr_str
: str, expr
: ast
.Expression
, len: int, text_loc
: TextLocation
271 super().__init
__(text_loc
)
272 _ExprMixin
.__init
__(self
, expr_str
, expr
)
282 return self
._len
// 8
285 return "_FlNum({}, {}, {}, {})".format(
286 repr(self
._expr
_str
),
289 repr(self
._text
_loc
),
294 class _Leb128Int(_Item
, _RepableItem
, _ExprMixin
):
295 def __init__(self
, expr_str
: str, expr
: ast
.Expression
, text_loc
: TextLocation
):
296 super().__init
__(text_loc
)
297 _ExprMixin
.__init
__(self
, expr_str
, expr
)
300 return "{}({}, {}, {})".format(
301 self
.__class
__.__name
__,
302 repr(self
._expr
_str
),
304 repr(self
._text
_loc
),
308 # Unsigned LEB128 integer.
309 class _ULeb128Int(_Leb128Int
, _RepableItem
, _ExprMixin
):
313 # Signed LEB128 integer.
314 class _SLeb128Int(_Leb128Int
, _RepableItem
, _ExprMixin
):
319 class _Group(_Item
, _RepableItem
):
320 def __init__(self
, items
: List
[_Item
], text_loc
: TextLocation
):
321 super().__init
__(text_loc
)
330 return "_Group({}, {})".format(repr(self
._items
), repr(self
._text
_loc
))
334 class _Rep(_Item
, _ExprMixin
):
336 self
, item
: _Item
, expr_str
: str, expr
: ast
.Expression
, text_loc
: TextLocation
338 super().__init
__(text_loc
)
339 _ExprMixin
.__init
__(self
, expr_str
, expr
)
348 return "_Rep({}, {}, {}, {})".format(
350 repr(self
._expr
_str
),
352 repr(self
._text
_loc
),
357 class _Cond(_Item
, _ExprMixin
):
359 self
, item
: _Item
, expr_str
: str, expr
: ast
.Expression
, text_loc
: TextLocation
361 super().__init
__(text_loc
)
362 _ExprMixin
.__init
__(self
, expr_str
, expr
)
371 return "_Cond({}, {}, {}, {})".format(
373 repr(self
._expr
_str
),
375 repr(self
._text
_loc
),
379 # Expression item type.
380 _ExprItemT
= Union
[_FlNum
, _Leb128Int
, _VarAssign
, _Rep
, _Cond
]
383 # A parsing error containing a message and a text location.
384 class ParseError(RuntimeError):
386 def _create(cls
, msg
: str, text_loc
: TextLocation
):
387 self
= cls
.__new
__(cls
)
388 self
._init
(msg
, text_loc
)
391 def __init__(self
, *args
, **kwargs
): # type: ignore
392 raise NotImplementedError
394 def _init(self
, msg
: str, text_loc
: TextLocation
):
395 super().__init
__(msg
)
396 self
._text
_loc
= text_loc
398 # Source text location.
401 return self
._text
_loc
404 # Raises a parsing error, forwarding the parameters to the constructor.
405 def _raise_error(msg
: str, text_loc
: TextLocation
) -> NoReturn
:
406 raise ParseError
._create
(msg
, text_loc
) # pyright: ignore[reportPrivateUsage]
409 # Variables dictionary type (for type hints).
410 VariablesT
= Dict
[str, Union
[int, float]]
413 # Labels dictionary type (for type hints).
414 LabelsT
= Dict
[str, int]
417 # Python name pattern.
418 _py_name_pat
= re
.compile(r
"[a-zA-Z_][a-zA-Z0-9_]*")
423 # The constructor accepts a Normand input. After building, use the `res`
424 # property to get the resulting main group.
426 # Builds a parser to parse the Normand input `normand`, parsing
428 def __init__(self
, normand
: str, variables
: VariablesT
, labels
: LabelsT
):
429 self
._normand
= normand
433 self
._label
_names
= set(labels
.keys())
434 self
._var
_names
= set(variables
.keys())
437 # Result (main group).
442 # Current text location.
445 return TextLocation
._create
( # pyright: ignore[reportPrivateUsage]
446 self
._line
_no
, self
._col
_no
449 # Returns `True` if this parser is done parsing.
451 return self
._at
== len(self
._normand
)
453 # Returns `True` if this parser isn't done parsing.
454 def _isnt_done(self
):
455 return not self
._is
_done
()
457 # Raises a parse error, creating it using the message `msg` and the
458 # current text location.
459 def _raise_error(self
, msg
: str) -> NoReturn
:
460 _raise_error(msg
, self
._text
_loc
)
462 # Tries to make the pattern `pat` match the current substring,
463 # returning the match object and updating `self._at`,
464 # `self._line_no`, and `self._col_no` on success.
465 def _try_parse_pat(self
, pat
: Pattern
[str]):
466 m
= pat
.match(self
._normand
, self
._at
)
471 # Skip matched string
472 self
._at
+= len(m
.group(0))
475 self
._line
_no
+= m
.group(0).count("\n")
477 # Update column number
478 for i
in reversed(range(self
._at
)):
479 if self
._normand
[i
] == "\n" or i
== 0:
481 self
._col
_no
= self
._at
+ 1
483 self
._col
_no
= self
._at
- i
487 # Return match object
490 # Expects the pattern `pat` to match the current substring,
491 # returning the match object and updating `self._at`,
492 # `self._line_no`, and `self._col_no` on success, or raising a parse
493 # error with the message `error_msg` on error.
494 def _expect_pat(self
, pat
: Pattern
[str], error_msg
: str):
496 m
= self
._try
_parse
_pat
(pat
)
500 self
._raise
_error
(error_msg
)
502 # Return match object
505 # Pattern for _skip_ws_and_comments()
506 _ws_or_syms_or_comments_pat
= re
.compile(
507 r
"(?:[\s/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
510 # Skips as many whitespaces, insignificant symbol characters, and
511 # comments as possible.
512 def _skip_ws_and_comments(self
):
513 self
._try
_parse
_pat
(self
._ws
_or
_syms
_or
_comments
_pat
)
515 # Pattern for _try_parse_hex_byte()
516 _nibble_pat
= re
.compile(r
"[A-Fa-f0-9]")
518 # Tries to parse a hexadecimal byte, returning a byte item on
520 def _try_parse_hex_byte(self
):
521 begin_text_loc
= self
._text
_loc
523 # Match initial nibble
524 m_high
= self
._try
_parse
_pat
(self
._nibble
_pat
)
530 # Expect another nibble
531 self
._skip
_ws
_and
_comments
()
532 m_low
= self
._expect
_pat
(
533 self
._nibble
_pat
, "Expecting another hexadecimal nibble"
537 return _Byte(int(m_high
.group(0) + m_low
.group(0), 16), begin_text_loc
)
539 # Patterns for _try_parse_bin_byte()
540 _bin_byte_bit_pat
= re
.compile(r
"[01]")
541 _bin_byte_prefix_pat
= re
.compile(r
"%")
543 # Tries to parse a binary byte, returning a byte item on success.
544 def _try_parse_bin_byte(self
):
545 begin_text_loc
= self
._text
_loc
548 if self
._try
_parse
_pat
(self
._bin
_byte
_prefix
_pat
) is None:
553 bits
= [] # type: List[str]
556 self
._skip
_ws
_and
_comments
()
557 m
= self
._expect
_pat
(self
._bin
_byte
_bit
_pat
, "Expecting a bit (`0` or `1`)")
558 bits
.append(m
.group(0))
561 return _Byte(int("".join(bits
), 2), begin_text_loc
)
563 # Patterns for _try_parse_dec_byte()
564 _dec_byte_prefix_pat
= re
.compile(r
"\$\s*")
565 _dec_byte_val_pat
= re
.compile(r
"(?P<neg>-?)(?P<val>\d+)")
567 # Tries to parse a decimal byte, returning a byte item on success.
568 def _try_parse_dec_byte(self
):
569 begin_text_loc
= self
._text
_loc
572 if self
._try
_parse
_pat
(self
._dec
_byte
_prefix
_pat
) is None:
577 m
= self
._expect
_pat
(self
._dec
_byte
_val
_pat
, "Expecting a decimal constant")
580 val
= int(m
.group("val")) * (-1 if m
.group("neg") == "-" else 1)
583 if val
< -128 or val
> 255:
584 _raise_error("Invalid decimal byte value {}".format(val
), begin_text_loc
)
590 return _Byte(val
, begin_text_loc
)
592 # Tries to parse a byte, returning a byte item on success.
593 def _try_parse_byte(self
):
595 item
= self
._try
_parse
_hex
_byte
()
601 item
= self
._try
_parse
_bin
_byte
()
607 item
= self
._try
_parse
_dec
_byte
()
612 # Patterns for _try_parse_str()
613 _str_prefix_pat
= re
.compile(r
'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
614 _str_suffix_pat
= re
.compile(r
'"')
615 _str_str_pat
= re
.compile(r
'(?:(?:\\.)|[^"])*')
617 # Strings corresponding to escape sequence characters
618 _str_escape_seq_strs
= {
632 # Tries to parse a string, returning a string item on success.
633 def _try_parse_str(self
):
634 begin_text_loc
= self
._text
_loc
637 m
= self
._try
_parse
_pat
(self
._str
_prefix
_pat
)
646 if m
.group("len") is not None:
647 encoding
= "utf_{}_{}".format(m
.group("len"), m
.group("bo"))
650 m
= self
._expect
_pat
(self
._str
_str
_pat
, "Expecting a literal string")
652 # Expect end of string
653 self
._expect
_pat
(self
._str
_suffix
_pat
, 'Expecting `"` (end of literal string)')
655 # Replace escape sequences
658 for ec
in '0abefnrtv"\\':
659 val
= val
.replace(r
"\{}".format(ec
), self
._str
_escape
_seq
_strs
[ec
])
662 data
= val
.encode(encoding
)
665 return _Str(data
, begin_text_loc
)
667 # Patterns for _try_parse_group()
668 _group_prefix_pat
= re
.compile(r
"\(")
669 _group_suffix_pat
= re
.compile(r
"\)")
671 # Tries to parse a group, returning a group item on success.
672 def _try_parse_group(self
):
673 begin_text_loc
= self
._text
_loc
676 if self
._try
_parse
_pat
(self
._group
_prefix
_pat
) is None:
681 items
= self
._parse
_items
()
683 # Expect end of group
684 self
._skip
_ws
_and
_comments
()
686 self
._group
_suffix
_pat
, "Expecting an item or `)` (end of group)"
690 return _Group(items
, begin_text_loc
)
692 # Returns a stripped expression string and an AST expression node
693 # from the expression string `expr_str` at text location `text_loc`.
694 def _ast_expr_from_str(self
, expr_str
: str, text_loc
: TextLocation
):
695 # Create an expression node from the expression string
696 expr_str
= expr_str
.strip().replace("\n", " ")
699 expr
= ast
.parse(expr_str
, mode
="eval")
702 "Invalid expression `{}`: invalid syntax".format(expr_str
),
706 return expr_str
, expr
708 # Patterns for _try_parse_num_and_attr()
709 _val_expr_pat
= re
.compile(r
"([^}:]+):\s*")
710 _fl_num_len_attr_pat
= re
.compile(r
"8|16|24|32|40|48|56|64")
711 _leb128_int_attr_pat
= re
.compile(r
"(u|s)leb128")
713 # Tries to parse a value and attribute (fixed length in bits or
714 # `leb128`), returning a value item on success.
715 def _try_parse_num_and_attr(self
):
716 begin_text_loc
= self
._text
_loc
719 m_expr
= self
._try
_parse
_pat
(self
._val
_expr
_pat
)
725 # Create an expression node from the expression string
726 expr_str
, expr
= self
._ast
_expr
_from
_str
(m_expr
.group(1), begin_text_loc
)
729 m_attr
= self
._try
_parse
_pat
(self
._fl
_num
_len
_attr
_pat
)
733 m_attr
= self
._try
_parse
_pat
(self
._leb
128_int
_attr
_pat
)
736 # At this point it's invalid
738 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
741 # Return LEB128 integer item
742 cls
= _ULeb128Int
if m_attr
.group(1) == "u" else _SLeb128Int
743 return cls(expr_str
, expr
, begin_text_loc
)
745 # Return fixed-length number item
749 int(m_attr
.group(0)),
753 # Patterns for _try_parse_num_and_attr()
754 _var_assign_pat
= re
.compile(
755 r
"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat
.pattern
)
758 # Tries to parse a variable assignment, returning a variable
759 # assignment item on success.
760 def _try_parse_var_assign(self
):
761 begin_text_loc
= self
._text
_loc
764 m
= self
._try
_parse
_pat
(self
._var
_assign
_pat
)
771 name
= m
.group("name")
773 if name
== _icitte_name
:
775 "`{}` is a reserved variable name".format(_icitte_name
), begin_text_loc
778 if name
in self
._label
_names
:
779 _raise_error("Existing label named `{}`".format(name
), begin_text_loc
)
781 # Add to known variable names
782 self
._var
_names
.add(name
)
784 # Create an expression node from the expression string
785 expr_str
, expr
= self
._ast
_expr
_from
_str
(m
.group("expr"), begin_text_loc
)
795 # Pattern for _try_parse_set_bo()
796 _bo_pat
= re
.compile(r
"[bl]e")
798 # Tries to parse a byte order name, returning a byte order setting
800 def _try_parse_set_bo(self
):
801 begin_text_loc
= self
._text
_loc
804 m
= self
._try
_parse
_pat
(self
._bo
_pat
)
810 # Return corresponding item
811 if m
.group(0) == "be":
812 return _SetBo(ByteOrder
.BE
, begin_text_loc
)
814 assert m
.group(0) == "le"
815 return _SetBo(ByteOrder
.LE
, begin_text_loc
)
817 # Patterns for _try_parse_val_or_bo()
818 _val_var_assign_set_bo_prefix_pat
= re
.compile(r
"\{\s*")
819 _val_var_assign_set_bo_suffix_pat
= re
.compile(r
"\s*}")
821 # Tries to parse a value, a variable assignment, or a byte order
822 # setting, returning an item on success.
823 def _try_parse_val_or_var_assign_or_set_bo(self
):
825 if self
._try
_parse
_pat
(self
._val
_var
_assign
_set
_bo
_prefix
_pat
) is None:
829 # Variable assignment item?
830 item
= self
._try
_parse
_var
_assign
()
834 item
= self
._try
_parse
_num
_and
_attr
()
837 # Byte order setting item?
838 item
= self
._try
_parse
_set
_bo
()
841 # At this point it's invalid
843 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
847 self
._expect
_pat
(self
._val
_var
_assign
_set
_bo
_suffix
_pat
, "Expecting `}`")
850 # Common positive constant integer pattern
851 _pos_const_int_pat
= re
.compile(r
"0[Xx][A-Fa-f0-9]+|\d+")
853 # Tries to parse an offset setting value (after the initial `<`),
854 # returning an offset item on success.
855 def _try_parse_set_offset_val(self
):
856 begin_text_loc
= self
._text
_loc
859 m
= self
._try
_parse
_pat
(self
._pos
_const
_int
_pat
)
866 return _SetOffset(int(m
.group(0), 0), begin_text_loc
)
868 # Tries to parse a label name (after the initial `<`), returning a
869 # label item on success.
870 def _try_parse_label_name(self
):
871 begin_text_loc
= self
._text
_loc
874 m
= self
._try
_parse
_pat
(_py_name_pat
)
883 if name
== _icitte_name
:
885 "`{}` is a reserved label name".format(_icitte_name
), begin_text_loc
888 if name
in self
._label
_names
:
889 _raise_error("Duplicate label name `{}`".format(name
), begin_text_loc
)
891 if name
in self
._var
_names
:
892 _raise_error("Existing variable named `{}`".format(name
), begin_text_loc
)
894 # Add to known label names
895 self
._label
_names
.add(name
)
898 return _Label(name
, begin_text_loc
)
900 # Patterns for _try_parse_label_or_set_offset()
901 _label_set_offset_prefix_pat
= re
.compile(r
"<\s*")
902 _label_set_offset_suffix_pat
= re
.compile(r
"\s*>")
904 # Tries to parse a label or an offset setting, returning an item on
906 def _try_parse_label_or_set_offset(self
):
908 if self
._try
_parse
_pat
(self
._label
_set
_offset
_prefix
_pat
) is None:
912 # Offset setting item?
913 item
= self
._try
_parse
_set
_offset
_val
()
917 item
= self
._try
_parse
_label
_name
()
920 # At this point it's invalid
921 self
._raise
_error
("Expecting a label name or an offset setting value")
924 self
._expect
_pat
(self
._label
_set
_offset
_suffix
_pat
, "Expecting `>`")
927 # Patterns for _try_parse_align_offset()
928 _align_offset_prefix_pat
= re
.compile(r
"@\s*")
929 _align_offset_val_pat
= re
.compile(r
"(\d+)\s*")
930 _align_offset_pad_val_prefix_pat
= re
.compile(r
"~\s*")
932 # Tries to parse an offset alignment, returning an offset alignment
934 def _try_parse_align_offset(self
):
935 begin_text_loc
= self
._text
_loc
938 if self
._try
_parse
_pat
(self
._align
_offset
_prefix
_pat
) is None:
942 align_text_loc
= self
._text
_loc
943 m
= self
._expect
_pat
(
944 self
._align
_offset
_val
_pat
,
945 "Expecting an alignment (positive multiple of eight bits)",
949 val
= int(m
.group(1))
951 if val
<= 0 or (val
% 8) != 0:
953 "Invalid alignment value {} (not a positive multiple of eight)".format(
962 if self
._try
_parse
_pat
(self
._align
_offset
_pad
_val
_prefix
_pat
) is not None:
963 pad_val_text_loc
= self
._text
_loc
964 m
= self
._expect
_pat
(self
._pos
_const
_int
_pat
, "Expecting a byte value")
967 pad_val
= int(m
.group(0), 0)
971 "Invalid padding byte value {}".format(pad_val
),
976 return _AlignOffset(val
, pad_val
, begin_text_loc
)
978 # Patterns for _expect_rep_mul_expr()
979 _rep_cond_expr_prefix_pat
= re
.compile(r
"\{")
980 _rep_cond_expr_pat
= re
.compile(r
"[^}]+")
981 _rep_cond_expr_suffix_pat
= re
.compile(r
"\}")
983 # Parses the expression of a conditional block or of a repetition
984 # (block or post-item) and returns the expression string and AST
986 def _expect_rep_cond_expr(self
, accept_int
: bool):
987 expr_text_loc
= self
._text
_loc
993 m
= self
._try
_parse
_pat
(self
._pos
_const
_int
_pat
)
997 m
= self
._try
_parse
_pat
(_py_name_pat
)
1001 if self
._try
_parse
_pat
(self
._rep
_cond
_expr
_prefix
_pat
) is None:
1003 mid_msg
= "a positive constant integer, a name, or `{`"
1005 mid_msg
= "a name or `{`"
1007 # At this point it's invalid
1008 self
._raise
_error
("Expecting {}".format(mid_msg
))
1010 # Expect an expression
1011 expr_text_loc
= self
._text
_loc
1012 m
= self
._expect
_pat
(self
._rep
_cond
_expr
_pat
, "Expecting an expression")
1013 expr_str
= m
.group(0)
1016 self
._expect
_pat
(self
._rep
_cond
_expr
_suffix
_pat
, "Expecting `}`")
1018 expr_str
= m
.group(0)
1020 expr_str
= m
.group(0)
1022 return self
._ast
_expr
_from
_str
(expr_str
, expr_text_loc
)
1024 # Parses the multiplier expression of a repetition (block or
1025 # post-item) and returns the expression string and AST node.
1026 def _expect_rep_mul_expr(self
):
1027 return self
._expect
_rep
_cond
_expr
(True)
1029 # Common block end pattern
1030 _block_end_pat
= re
.compile(r
"!end\b\s*")
1032 # Pattern for _try_parse_rep_block()
1033 _rep_block_prefix_pat
= re
.compile(r
"!r(?:epeat)?\b\s*")
1035 # Tries to parse a repetition block, returning a repetition item on
1037 def _try_parse_rep_block(self
):
1038 begin_text_loc
= self
._text
_loc
1041 if self
._try
_parse
_pat
(self
._rep
_block
_prefix
_pat
) is None:
1046 self
._skip
_ws
_and
_comments
()
1047 expr_str
, expr
= self
._expect
_rep
_mul
_expr
()
1050 self
._skip
_ws
_and
_comments
()
1051 items_text_loc
= self
._text
_loc
1052 items
= self
._parse
_items
()
1054 # Expect end of block
1055 self
._skip
_ws
_and
_comments
()
1057 self
._block
_end
_pat
, "Expecting an item or `!end` (end of repetition block)"
1061 return _Rep(_Group(items
, items_text_loc
), expr_str
, expr
, begin_text_loc
)
1063 # Pattern for _try_parse_cond_block()
1064 _cond_block_prefix_pat
= re
.compile(r
"!if\b\s*")
1066 # Tries to parse a conditional block, returning a conditional item
1068 def _try_parse_cond_block(self
):
1069 begin_text_loc
= self
._text
_loc
1072 if self
._try
_parse
_pat
(self
._cond
_block
_prefix
_pat
) is None:
1077 self
._skip
_ws
_and
_comments
()
1078 expr_str
, expr
= self
._expect
_rep
_cond
_expr
(False)
1081 self
._skip
_ws
_and
_comments
()
1082 items_text_loc
= self
._text
_loc
1083 items
= self
._parse
_items
()
1085 # Expect end of block
1086 self
._skip
_ws
_and
_comments
()
1088 self
._block
_end
_pat
,
1089 "Expecting an item or `!end` (end of conditional block)",
1093 return _Cond(_Group(items
, items_text_loc
), expr_str
, expr
, begin_text_loc
)
1095 # Tries to parse a base item (anything except a repetition),
1096 # returning it on success.
1097 def _try_parse_base_item(self
):
1099 item
= self
._try
_parse
_byte
()
1101 if item
is not None:
1105 item
= self
._try
_parse
_str
()
1107 if item
is not None:
1110 # Value, variable assignment, or byte order setting item?
1111 item
= self
._try
_parse
_val
_or
_var
_assign
_or
_set
_bo
()
1113 if item
is not None:
1116 # Label or offset setting item?
1117 item
= self
._try
_parse
_label
_or
_set
_offset
()
1119 if item
is not None:
1122 # Offset alignment item?
1123 item
= self
._try
_parse
_align
_offset
()
1125 if item
is not None:
1129 item
= self
._try
_parse
_group
()
1131 if item
is not None:
1134 # Repetition (block) item?
1135 item
= self
._try
_parse
_rep
_block
()
1137 if item
is not None:
1140 # Conditional block item?
1141 item
= self
._try
_parse
_cond
_block
()
1143 if item
is not None:
1146 # Pattern for _try_parse_rep_post()
1147 _rep_post_prefix_pat
= re
.compile(r
"\*")
1149 # Tries to parse a post-item repetition, returning the expression
1150 # string and AST expression node on success.
1151 def _try_parse_rep_post(self
):
1153 if self
._try
_parse
_pat
(self
._rep
_post
_prefix
_pat
) is None:
1157 # Return expression string and AST expression
1158 self
._skip
_ws
_and
_comments
()
1159 return self
._expect
_rep
_mul
_expr
()
1161 # Tries to parse an item, possibly followed by a repetition,
1162 # returning `True` on success.
1164 # Appends any parsed item to `items`.
1165 def _try_append_item(self
, items
: List
[_Item
]):
1166 self
._skip
_ws
_and
_comments
()
1169 item
= self
._try
_parse
_base
_item
()
1175 # Parse repetition if the base item is repeatable
1176 if isinstance(item
, _RepableItem
):
1177 self
._skip
_ws
_and
_comments
()
1178 rep_text_loc
= self
._text
_loc
1179 rep_ret
= self
._try
_parse
_rep
_post
()
1181 if rep_ret
is not None:
1182 item
= _Rep(item
, rep_ret
[0], rep_ret
[1], rep_text_loc
)
1187 # Parses and returns items, skipping whitespaces, insignificant
1188 # symbols, and comments when allowed, and stopping at the first
1189 # unknown character.
1190 def _parse_items(self
) -> List
[_Item
]:
1191 items
= [] # type: List[_Item]
1193 while self
._isnt
_done
():
1194 # Try to append item
1195 if not self
._try
_append
_item
(items
):
1196 # Unknown at this point
1201 # Parses the whole Normand input, setting `self._res` to the main
1202 # group item on success.
1204 if len(self
._normand
.strip()) == 0:
1205 # Special case to make sure there's something to consume
1206 self
._res
= _Group([], self
._text
_loc
)
1209 # Parse first level items
1210 items
= self
._parse
_items
()
1212 # Make sure there's nothing left
1213 self
._skip
_ws
_and
_comments
()
1215 if self
._isnt
_done
():
1217 "Unexpected character `{}`".format(self
._normand
[self
._at
])
1220 # Set main group item
1221 self
._res
= _Group(items
, self
._text
_loc
)
1224 # The return type of parse().
1230 variables
: VariablesT
,
1233 bo
: Optional
[ByteOrder
],
1235 self
= cls
.__new
__(cls
)
1236 self
._init
(data
, variables
, labels
, offset
, bo
)
1239 def __init__(self
, *args
, **kwargs
): # type: ignore
1240 raise NotImplementedError
1245 variables
: VariablesT
,
1248 bo
: Optional
[ByteOrder
],
1251 self
._vars
= variables
1252 self
._labels
= labels
1253 self
._offset
= offset
1261 # Dictionary of updated variable names to their last computed value.
1263 def variables(self
):
1266 # Dictionary of updated main group label names to their computed
1277 # Updated byte order.
1279 def byte_order(self
):
1283 # Raises a parse error for the item `item`, creating it using the
1285 def _raise_error_for_item(msg
: str, item
: _Item
) -> NoReturn
:
1286 _raise_error(msg
, item
.text_loc
)
1289 # The `ICITTE` reserved name.
1290 _icitte_name
= "ICITTE"
1293 # Base node visitor.
1295 # Calls the _visit_name() method for each name node which isn't the name
1297 class _NodeVisitor(ast
.NodeVisitor
):
1299 self
._parent
_is
_call
= False
1301 def generic_visit(self
, node
: ast
.AST
):
1302 if type(node
) is ast
.Call
:
1303 self
._parent
_is
_call
= True
1304 elif type(node
) is ast
.Name
and not self
._parent
_is
_call
:
1305 self
._visit
_name
(node
.id)
1307 super().generic_visit(node
)
1308 self
._parent
_is
_call
= False
1311 def _visit_name(self
, name
: str):
1315 # Expression validator: validates that all the names within the
1316 # expression are allowed.
1317 class _ExprValidator(_NodeVisitor
):
1318 def __init__(self
, item
: _ExprItemT
, allowed_names
: Set
[str]):
1321 self
._allowed
_names
= allowed_names
1323 def _visit_name(self
, name
: str):
1324 # Make sure the name refers to a known and reachable
1325 # variable/label name.
1326 if name
!= _icitte_name
and name
not in self
._allowed
_names
:
1327 msg
= "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1328 name
, self
._item
.expr_str
1331 allowed_names
= self
._allowed
_names
.copy()
1332 allowed_names
.add(_icitte_name
)
1334 if len(allowed_names
) > 0:
1335 allowed_names_str
= ", ".join(
1336 sorted(["`{}`".format(name
) for name
in allowed_names
])
1338 msg
+= "; the legal names are {{{}}}".format(allowed_names_str
)
1342 self
._item
.text_loc
,
1346 # Expression visitor getting all the contained names.
1347 class _ExprNamesVisitor(_NodeVisitor
):
1349 self
._parent
_is
_call
= False
1350 self
._names
= set() # type: Set[str]
1356 def _visit_name(self
, name
: str):
1357 self
._names
.add(name
)
1364 variables
: VariablesT
,
1367 bo
: Optional
[ByteOrder
],
1369 self
.variables
= variables
.copy()
1370 self
.labels
= labels
.copy()
1371 self
.offset
= offset
1375 # Generator of data and final state from a group item.
1377 # Generation happens in memory at construction time. After building, use
1378 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1379 # get the resulting context.
1381 # The steps of generation are:
1383 # 1. Validate that each repetition, conditional, and LEB128 integer
1384 # expression uses only reachable names.
1386 # 2. Compute and keep the effective repetition count, conditional value,
1387 # and LEB128 integer value for each repetition and LEB128 integer
1390 # 3. Generate bytes, updating the initial state as it goes which becomes
1391 # the final state after the operation.
1393 # During the generation, when handling a `_Rep`, `_Cond`, or
1394 # `_Leb128Int` item, we already have the effective repetition count,
1395 # conditional value, or value of the instance.
1397 # When handling a `_Group` item, first update the current labels with
1398 # all the immediate (not nested) labels, and then handle each
1399 # contained item. This gives contained item access to "future" outer
1400 # labels. Then remove the immediate labels from the state so that
1401 # outer items don't have access to inner labels.
1406 variables
: VariablesT
,
1409 bo
: Optional
[ByteOrder
],
1411 self
._validate
_vl
_exprs
(group
, set(variables
.keys()), set(labels
.keys()))
1412 self
._vl
_instance
_vals
= self
._compute
_vl
_instance
_vals
(
1413 group
, _GenState(variables
, labels
, offset
, bo
)
1415 self
._gen
(group
, _GenState(variables
, labels
, offset
, bo
))
1422 # Updated variables.
1424 def variables(self
):
1425 return self
._final
_state
.variables
1427 # Updated main group labels.
1430 return self
._final
_state
.labels
1435 return self
._final
_state
.offset
1437 # Updated byte order.
1440 return self
._final
_state
.bo
1442 # Returns the set of used, non-called names within the AST
1443 # expression `expr`.
1445 def _names_of_expr(expr
: ast
.Expression
):
1446 visitor
= _ExprNamesVisitor()
1448 return visitor
.names
1450 # Validates that all the repetition, conditional, and LEB128 integer
1451 # expressions within `group` don't refer, directly or indirectly, to
1452 # subsequent labels.
1454 # The strategy here is to keep a set of allowed label names, per
1455 # group, initialized to `allowed_label_names`, and a set of allowed
1456 # variable names initialized to `allowed_variable_names`.
1458 # Then, depending on the type of `item`:
1461 # Add its name to the local allowed label names: a label
1462 # occurring before a repetition, and not within a nested group,
1463 # is always reachable.
1466 # If all the names within its expression are allowed, then add
1467 # its name to the allowed variable names.
1469 # Otherwise, remove its name from the allowed variable names (if
1470 # it's in there): a variable which refers to an unreachable name
1471 # is unreachable itself.
1473 # `_Rep`, `_Cond`, and `_Leb128`:
1474 # Make sure all the names within its expression are allowed.
1477 # Call this function for each contained item with a _copy_ of
1478 # the current allowed label names and the same current allowed
1481 def _validate_vl_exprs(
1482 item
: _Item
, allowed_variable_names
: Set
[str], allowed_label_names
: Set
[str]
1484 if type(item
) is _Label
:
1485 allowed_label_names
.add(item
.name
)
1486 elif type(item
) is _VarAssign
:
1487 # Check if this variable name is allowed
1490 for name
in _Gen
._names
_of
_expr
(item
.expr
):
1492 allowed_label_names | allowed_variable_names | {_icitte_name}
1499 allowed_variable_names
.add(item
.name
)
1500 elif item
.name
in allowed_variable_names
:
1501 allowed_variable_names
.remove(item
.name
)
1502 elif isinstance(item
, _Leb128Int
):
1503 # Validate the expression
1504 _ExprValidator(item
, allowed_label_names | allowed_variable_names
).visit(
1507 elif type(item
) is _Rep
or type(item
) is _Cond
:
1508 # Validate the expression first
1509 _ExprValidator(item
, allowed_label_names | allowed_variable_names
).visit(
1513 # Validate inner item
1514 _Gen
._validate
_vl
_exprs
(
1515 item
.item
, allowed_variable_names
, allowed_label_names
1517 elif type(item
) is _Group
:
1518 # Copy `allowed_label_names` so that this frame cannot
1519 # access the nested label names.
1520 group_allowed_label_names
= allowed_label_names
.copy()
1522 for subitem
in item
.items
:
1523 _Gen
._validate
_vl
_exprs
(
1524 subitem
, allowed_variable_names
, group_allowed_label_names
1527 # Evaluates the expression of `item` considering the current
1528 # generation state `state`.
1530 # If `allow_float` is `True`, then the type of the result may be
1533 def _eval_item_expr(
1536 allow_float
: bool = False,
1538 syms
= {} # type: VariablesT
1539 syms
.update(state
.labels
)
1541 # Set the `ICITTE` name to the current offset
1542 syms
[_icitte_name
] = state
.offset
1544 # Add the current variables
1545 syms
.update(state
.variables
)
1547 # Validate the node and its children
1548 _ExprValidator(item
, set(syms
.keys())).visit(item
.expr
)
1550 # Compile and evaluate expression node
1552 val
= eval(compile(item
.expr
, "", "eval"), None, syms
)
1553 except Exception as exc
:
1554 _raise_error_for_item(
1555 "Failed to evaluate expression `{}`: {}".format(item
.expr_str
, exc
),
1559 # Convert `bool` result type to `int` to normalize
1560 if type(val
) is bool:
1563 # Validate result type
1564 expected_types
= {int}
# type: Set[type]
1568 expected_types
.add(float)
1569 type_msg
+= " or `float`"
1571 if type(val
) not in expected_types
:
1572 _raise_error_for_item(
1573 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
1574 item
.expr_str
, type_msg
, type(val
).__name
__
1581 # Returns the size, in bytes, required to encode the value `val`
1582 # with LEB128 (signed version if `is_signed` is `True`).
1584 def _leb128_size_for_val(val
: int, is_signed
: bool):
1586 # Equivalent upper bound.
1588 # For example, if `val` is -128, then the full integer for
1589 # this number of bits would be [-128, 127].
1592 # Number of bits (add one for the sign if needed)
1593 bits
= val
.bit_length() + int(is_signed
)
1598 # Seven bits per byte
1599 return math
.ceil(bits
/ 7)
1601 # Returns the offset `offset` aligned according to `item`.
1603 def _align_offset(offset
: int, item
: _AlignOffset
):
1604 align_bytes
= item
.val
// 8
1605 return (offset
+ align_bytes
- 1) // align_bytes
* align_bytes
1607 # Computes the effective value for each repetition, conditional, and
1608 # LEB128 integer instance, filling `instance_vals` (if not `None`)
1609 # and returning `instance_vals`.
1611 # At this point it must be known that, for a given variable-length
1612 # item, its expression only contains reachable names.
1614 # When handling a `_Rep` or `_Cond` item, this function appends its
1615 # effective multiplier/value to `instance_vals` _before_ handling
1616 # its repeated/conditional item.
1618 # When handling a `_VarAssign` item, this function only evaluates it
1619 # if all its names are reachable.
1621 def _compute_vl_instance_vals(
1622 item
: _Item
, state
: _GenState
, instance_vals
: Optional
[List
[int]] = None
1624 if instance_vals
is None:
1627 if isinstance(item
, _ScalarItem
):
1628 state
.offset
+= item
.size
1629 elif type(item
) is _Label
:
1630 state
.labels
[item
.name
] = state
.offset
1631 elif type(item
) is _VarAssign
:
1632 # Check if all the names are reachable
1635 for name
in _Gen
._names
_of
_expr
(item
.expr
):
1637 name
!= _icitte_name
1638 and name
not in state
.variables
1639 and name
not in state
.labels
1641 # A name is unknown: cannot evaluate
1646 # Evaluate the expression and keep the result
1647 state
.variables
[item
.name
] = _Gen
._eval
_item
_expr
(item
, state
, True)
1648 elif type(item
) is _SetOffset
:
1649 state
.offset
= item
.val
1650 elif type(item
) is _AlignOffset
:
1651 state
.offset
= _Gen
._align
_offset
(state
.offset
, item
)
1652 elif isinstance(item
, _Leb128Int
):
1653 # Evaluate the expression
1654 val
= _Gen
._eval
_item
_expr
(item
, state
)
1657 if type(item
) is _ULeb128Int
and val
< 0:
1658 _raise_error_for_item(
1659 "Invalid expression `{}`: unexpected negative result {:,} for a ULEB128 encoding".format(
1665 # Add the evaluation result to the to variable-length item
1667 instance_vals
.append(val
)
1670 state
.offset
+= _Gen
._leb
128_size
_for
_val
(val
, type(item
) is _SLeb128Int
)
1671 elif type(item
) is _Rep
:
1672 # Evaluate the expression and keep the result
1673 val
= _Gen
._eval
_item
_expr
(item
, state
)
1677 _raise_error_for_item(
1678 "Invalid expression `{}`: unexpected negative result {:,}".format(
1684 # Add to variable-length item instance values
1685 instance_vals
.append(val
)
1687 # Process the repeated item `val` times
1688 for _
in range(val
):
1689 _Gen
._compute
_vl
_instance
_vals
(item
.item
, state
, instance_vals
)
1690 elif type(item
) is _Cond
:
1691 # Evaluate the expression and keep the result
1692 val
= _Gen
._eval
_item
_expr
(item
, state
)
1694 # Add to variable-length item instance values
1695 instance_vals
.append(val
)
1697 # Process the conditional item if needed
1699 _Gen
._compute
_vl
_instance
_vals
(item
.item
, state
, instance_vals
)
1700 elif type(item
) is _Group
:
1701 prev_labels
= state
.labels
.copy()
1704 for subitem
in item
.items
:
1705 _Gen
._compute
_vl
_instance
_vals
(subitem
, state
, instance_vals
)
1707 state
.labels
= prev_labels
1709 return instance_vals
1711 def _update_offset_noop(self
, item
: _Item
, state
: _GenState
, next_vl_instance
: int):
1712 return next_vl_instance
1714 def _dry_handle_scalar_item(
1715 self
, item
: _ScalarItem
, state
: _GenState
, next_vl_instance
: int
1717 state
.offset
+= item
.size
1718 return next_vl_instance
1720 def _dry_handle_leb128_int_item(
1721 self
, item
: _Leb128Int
, state
: _GenState
, next_vl_instance
: int
1723 # Get the value from `self._vl_instance_vals` _before_
1724 # incrementing `next_vl_instance` to honor the order of
1725 # _compute_vl_instance_vals().
1726 state
.offset
+= self
._leb
128_size
_for
_val
(
1727 self
._vl
_instance
_vals
[next_vl_instance
], type(item
) is _SLeb128Int
1730 return next_vl_instance
+ 1
1732 def _dry_handle_group_item(
1733 self
, item
: _Group
, state
: _GenState
, next_vl_instance
: int
1735 for subitem
in item
.items
:
1736 next_vl_instance
= self
._dry
_handle
_item
(subitem
, state
, next_vl_instance
)
1738 return next_vl_instance
1740 def _dry_handle_rep_item(self
, item
: _Rep
, state
: _GenState
, next_vl_instance
: int):
1741 # Get the value from `self._vl_instance_vals` _before_
1742 # incrementing `next_vl_instance` to honor the order of
1743 # _compute_vl_instance_vals().
1744 mul
= self
._vl
_instance
_vals
[next_vl_instance
]
1745 next_vl_instance
+= 1
1747 for _
in range(mul
):
1748 next_vl_instance
= self
._dry
_handle
_item
(item
.item
, state
, next_vl_instance
)
1750 return next_vl_instance
1752 def _dry_handle_cond_item(
1753 self
, item
: _Cond
, state
: _GenState
, next_vl_instance
: int
1755 # Get the value from `self._vl_instance_vals` _before_
1756 # incrementing `next_vl_instance` to honor the order of
1757 # _compute_vl_instance_vals().
1758 val
= self
._vl
_instance
_vals
[next_vl_instance
]
1759 next_vl_instance
+= 1
1762 next_vl_instance
= self
._dry
_handle
_item
(item
.item
, state
, next_vl_instance
)
1764 return next_vl_instance
1766 def _dry_handle_align_offset_item(
1767 self
, item
: _AlignOffset
, state
: _GenState
, next_vl_instance
: int
1769 state
.offset
= self
._align
_offset
(state
.offset
, item
)
1770 return next_vl_instance
1772 def _dry_handle_set_offset_item(
1773 self
, item
: _SetOffset
, state
: _GenState
, next_vl_instance
: int
1775 state
.offset
= item
.val
1776 return next_vl_instance
1778 # Updates `state.offset` considering the generated data of `item`,
1779 # without generating any, and returns the updated next
1780 # variable-length item instance.
1781 def _dry_handle_item(self
, item
: _Item
, state
: _GenState
, next_vl_instance
: int):
1782 return self
._dry
_handle
_item
_funcs
[type(item
)](item
, state
, next_vl_instance
)
1784 # Handles the byte item `item`.
1785 def _handle_byte_item(self
, item
: _Byte
, state
: _GenState
, next_vl_instance
: int):
1786 self
._data
.append(item
.val
)
1787 state
.offset
+= item
.size
1788 return next_vl_instance
1790 # Handles the string item `item`.
1791 def _handle_str_item(self
, item
: _Str
, state
: _GenState
, next_vl_instance
: int):
1792 self
._data
+= item
.data
1793 state
.offset
+= item
.size
1794 return next_vl_instance
1796 # Handles the byte order setting item `item`.
1797 def _handle_set_bo_item(
1798 self
, item
: _SetBo
, state
: _GenState
, next_vl_instance
: int
1800 # Update current byte order
1802 return next_vl_instance
1804 # Handles the variable assignment item `item`.
1805 def _handle_var_assign_item(
1806 self
, item
: _VarAssign
, state
: _GenState
, next_vl_instance
: int
1809 state
.variables
[item
.name
] = self
._eval
_item
_expr
(item
, state
, True)
1810 return next_vl_instance
1812 # Handles the fixed-length integer item `item`.
1813 def _handle_fl_int_item(self
, val
: int, item
: _FlNum
, state
: _GenState
):
1815 if val
< -(2 ** (item
.len - 1)) or val
> 2**item
.len - 1:
1816 _raise_error_for_item(
1817 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1818 val
, item
.len, item
.expr_str
, state
.offset
1823 # Encode result on 64 bits (to extend the sign bit whatever the
1824 # value of `item.len`).
1827 ">" if state
.bo
in (None, ByteOrder
.BE
) else "<",
1828 "Q" if val
>= 0 else "q",
1833 # Keep only the requested length
1834 len_bytes
= item
.len // 8
1836 if state
.bo
in (None, ByteOrder
.BE
):
1837 # Big endian: keep last bytes
1838 data
= data
[-len_bytes
:]
1840 # Little endian: keep first bytes
1841 assert state
.bo
== ByteOrder
.LE
1842 data
= data
[:len_bytes
]
1844 # Append to current bytes and update offset
1847 # Handles the fixed-length integer item `item`.
1848 def _handle_fl_float_item(self
, val
: float, item
: _FlNum
, state
: _GenState
):
1850 if item
.len not in (32, 64):
1851 _raise_error_for_item(
1852 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
1859 self
._data
+= struct
.pack(
1861 ">" if state
.bo
in (None, ByteOrder
.BE
) else "<",
1862 "f" if item
.len == 32 else "d",
1867 # Handles the fixed-length number item `item`.
1868 def _handle_fl_num_item(
1869 self
, item
: _FlNum
, state
: _GenState
, next_vl_instance
: int
1872 val
= self
._eval
_item
_expr
(item
, state
, True)
1874 # Validate current byte order
1875 if state
.bo
is None and item
.len > 8:
1876 _raise_error_for_item(
1877 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
1883 # Handle depending on type
1884 if type(val
) is int:
1885 self
._handle
_fl
_int
_item
(val
, item
, state
)
1887 assert type(val
) is float
1888 self
._handle
_fl
_float
_item
(val
, item
, state
)
1891 state
.offset
+= item
.size
1893 return next_vl_instance
1895 # Handles the LEB128 integer item `item`.
1896 def _handle_leb128_int_item(
1897 self
, item
: _Leb128Int
, state
: _GenState
, next_vl_instance
: int
1899 # Get the precomputed value
1900 val
= self
._vl
_instance
_vals
[next_vl_instance
]
1903 size
= self
._leb
128_size
_for
_val
(val
, type(item
) is _SLeb128Int
)
1906 for _
in range(size
):
1907 # Seven LSBs, MSB of the byte set (continue)
1908 self
._data
.append((val
& 0x7F) |
0x80)
1911 # Clear MSB of last byte (stop)
1912 self
._data
[-1] &= ~
0x80
1914 # Consumed this instance
1915 return next_vl_instance
+ 1
1917 # Handles the group item `item`, only removing the immediate labels
1918 # from `state.labels` if `remove_immediate_labels` is `True`.
1919 def _handle_group_item(
1923 next_vl_instance
: int,
1924 remove_immediate_labels
: bool = True,
1926 # Compute the values of the immediate (not nested) labels. Those
1927 # labels are reachable by any expression within the group.
1928 tmp_state
= _GenState({}, {}, state
.offset
, None)
1929 immediate_label_names
= set() # type: Set[str]
1930 tmp_next_vl_instance
= next_vl_instance
1932 for subitem
in item
.items
:
1933 if type(subitem
) is _Label
:
1934 # New immediate label
1935 state
.labels
[subitem
.name
] = tmp_state
.offset
1936 immediate_label_names
.add(subitem
.name
)
1938 tmp_next_vl_instance
= self
._dry
_handle
_item
(
1939 subitem
, tmp_state
, tmp_next_vl_instance
1942 # Handle each item now with the actual state
1943 for subitem
in item
.items
:
1944 next_vl_instance
= self
._handle
_item
(subitem
, state
, next_vl_instance
)
1946 # Remove immediate labels if required so that outer items won't
1947 # reach inner labels.
1948 if remove_immediate_labels
:
1949 for name
in immediate_label_names
:
1950 del state
.labels
[name
]
1952 return next_vl_instance
1954 # Handles the repetition item `item`.
1955 def _handle_rep_item(self
, item
: _Rep
, state
: _GenState
, next_vl_instance
: int):
1956 # Get the precomputed repetition count
1957 mul
= self
._vl
_instance
_vals
[next_vl_instance
]
1959 # Consumed this instance
1960 next_vl_instance
+= 1
1962 for _
in range(mul
):
1963 next_vl_instance
= self
._handle
_item
(item
.item
, state
, next_vl_instance
)
1965 return next_vl_instance
1967 # Handles the conditional item `item`.
1968 def _handle_cond_item(self
, item
: _Rep
, state
: _GenState
, next_vl_instance
: int):
1969 # Get the precomputed conditional value
1970 val
= self
._vl
_instance
_vals
[next_vl_instance
]
1972 # Consumed this instance
1973 next_vl_instance
+= 1
1976 next_vl_instance
= self
._handle
_item
(item
.item
, state
, next_vl_instance
)
1978 return next_vl_instance
1980 # Handles the offset setting item `item`.
1981 def _handle_set_offset_item(
1982 self
, item
: _SetOffset
, state
: _GenState
, next_vl_instance
: int
1984 state
.offset
= item
.val
1985 return next_vl_instance
1987 # Handles offset alignment item `item` (adds padding).
1988 def _handle_align_offset_item(
1989 self
, item
: _AlignOffset
, state
: _GenState
, next_vl_instance
: int
1991 init_offset
= state
.offset
1992 state
.offset
= self
._align
_offset
(state
.offset
, item
)
1993 self
._data
+= bytes([item
.pad_val
] * (state
.offset
- init_offset
))
1994 return next_vl_instance
1996 # Handles the label item `item`.
1997 def _handle_label_item(self
, item
: _Label
, state
: _GenState
, next_vl_instance
: int):
1998 return next_vl_instance
2000 # Handles the item `item`, returning the updated next repetition
2002 def _handle_item(self
, item
: _Item
, state
: _GenState
, next_vl_instance
: int):
2003 return self
._item
_handlers
[type(item
)](item
, state
, next_vl_instance
)
2005 # Generates the data (`self._data`) and final state
2006 # (`self._final_state`) from `group` and the initial state `state`.
2007 def _gen(self
, group
: _Group
, state
: _GenState
):
2009 self
._data
= bytearray()
2012 self
._item
_handlers
= {
2013 _AlignOffset
: self
._handle
_align
_offset
_item
,
2014 _Byte
: self
._handle
_byte
_item
,
2015 _Cond
: self
._handle
_cond
_item
,
2016 _FlNum
: self
._handle
_fl
_num
_item
,
2017 _Group
: self
._handle
_group
_item
,
2018 _Label
: self
._handle
_label
_item
,
2019 _Rep
: self
._handle
_rep
_item
,
2020 _SetBo
: self
._handle
_set
_bo
_item
,
2021 _SetOffset
: self
._handle
_set
_offset
_item
,
2022 _SLeb128Int
: self
._handle
_leb
128_int
_item
,
2023 _Str
: self
._handle
_str
_item
,
2024 _ULeb128Int
: self
._handle
_leb
128_int
_item
,
2025 _VarAssign
: self
._handle
_var
_assign
_item
,
2026 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
2028 # Dry item handlers (only updates the state offset)
2029 self
._dry
_handle
_item
_funcs
= {
2030 _AlignOffset
: self
._dry
_handle
_align
_offset
_item
,
2031 _Byte
: self
._dry
_handle
_scalar
_item
,
2032 _Cond
: self
._dry
_handle
_cond
_item
,
2033 _FlNum
: self
._dry
_handle
_scalar
_item
,
2034 _Group
: self
._dry
_handle
_group
_item
,
2035 _Label
: self
._update
_offset
_noop
,
2036 _Rep
: self
._dry
_handle
_rep
_item
,
2037 _SetBo
: self
._update
_offset
_noop
,
2038 _SetOffset
: self
._dry
_handle
_set
_offset
_item
,
2039 _SLeb128Int
: self
._dry
_handle
_leb
128_int
_item
,
2040 _Str
: self
._dry
_handle
_scalar
_item
,
2041 _ULeb128Int
: self
._dry
_handle
_leb
128_int
_item
,
2042 _VarAssign
: self
._update
_offset
_noop
,
2043 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
2045 # Handle the group item, _not_ removing the immediate labels
2046 # because the `labels` property offers them.
2047 self
._handle
_group
_item
(group
, state
, 0, False)
2049 # This is actually the final state
2050 self
._final
_state
= state
2053 # Returns a `ParseResult` instance containing the bytes encoded by the
2054 # input string `normand`.
2056 # `init_variables` is a dictionary of initial variable names (valid
2057 # Python names) to integral values. A variable name must not be the
2058 # reserved name `ICITTE`.
2060 # `init_labels` is a dictionary of initial label names (valid Python
2061 # names) to integral values. A label name must not be the reserved name
2064 # `init_offset` is the initial offset.
2066 # `init_byte_order` is the initial byte order.
2068 # Raises `ParseError` on any parsing error.
2071 init_variables
: Optional
[VariablesT
] = None,
2072 init_labels
: Optional
[LabelsT
] = None,
2073 init_offset
: int = 0,
2074 init_byte_order
: Optional
[ByteOrder
] = None,
2076 if init_variables
is None:
2079 if init_labels
is None:
2083 _Parser(normand
, init_variables
, init_labels
).res
,
2089 return ParseResult
._create
( # pyright: ignore[reportPrivateUsage]
2090 gen
.data
, gen
.variables
, gen
.labels
, gen
.offset
, gen
.bo
2094 # Parses the command-line arguments.
2095 def _parse_cli_args():
2099 ap
= argparse
.ArgumentParser()
2106 help="initial offset (positive)",
2112 choices
=["be", "le"],
2114 help="initial byte order (`be` or `le`)",
2120 help="add an initial variable (may be repeated)",
2127 help="add an initial label (may be repeated)",
2130 "--version", action
="version", version
="Normand {}".format(__version__
)
2137 help="input path (none means standard input)",
2141 return ap
.parse_args()
2144 # Raises a command-line error with the message `msg`.
2145 def _raise_cli_error(msg
: str) -> NoReturn
:
2146 raise RuntimeError("Command-line error: {}".format(msg
))
2149 # Returns a dictionary of string to integers from the list of strings
2150 # `args` containing `NAME=VAL` entries.
2151 def _dict_from_arg(args
: Optional
[List
[str]]):
2152 d
= {} # type: LabelsT
2158 m
= re
.match(r
"({})=(\d+)$".format(_py_name_pat
.pattern
), arg
)
2161 _raise_cli_error("Invalid assignment {}".format(arg
))
2163 d
[m
.group(1)] = int(m
.group(2))
2168 # CLI entry point without exception handling.
2173 args
= _parse_cli_args()
2176 if args
.path
is None:
2177 normand
= sys
.stdin
.read()
2179 with
open(args
.path
) as f
:
2182 # Variables and labels
2183 variables
= typing
.cast(VariablesT
, _dict_from_arg(args
.var
))
2184 labels
= _dict_from_arg(args
.label
)
2188 _raise_cli_error("Invalid negative offset {}")
2190 # Validate and set byte order
2191 bo
= None # type: Optional[ByteOrder]
2193 if args
.byte_order
is not None:
2194 if args
.byte_order
== "be":
2197 assert args
.byte_order
== "le"
2202 res
= parse(normand
, variables
, labels
, args
.offset
, bo
)
2203 except ParseError
as exc
:
2206 if args
.path
is not None:
2207 prefix
= "{}:".format(os
.path
.abspath(args
.path
))
2210 "{}{}:{} - {}".format(
2211 prefix
, exc
.text_loc
.line_no
, exc
.text_loc
.col_no
, str(exc
)
2216 sys
.stdout
.buffer.write(res
.data
)
2219 # Prints the exception message `msg` and exits with status 1.
2220 def _fail(msg
: str) -> NoReturn
:
2221 if not msg
.endswith("."):
2224 print(msg
, file=sys
.stderr
)
2232 except Exception as exc
:
2236 if __name__
== "__main__":