bb9a382d5d2bd0949b973ff39daad23e9f1ccd29
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.18.0"
34 __all__ = [
35 "__author__",
36 "__version__",
37 "ByteOrder",
38 "LabelsT",
39 "parse",
40 "ParseError",
41 "ParseErrorMessage",
42 "ParseResult",
43 "TextLocation",
44 "VariablesT",
45 ]
46
47 import re
48 import abc
49 import ast
50 import sys
51 import copy
52 import enum
53 import math
54 import struct
55 import typing
56 from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
57
58
59 # Text location (line and column numbers).
60 class TextLocation:
61 @classmethod
62 def _create(cls, line_no: int, col_no: int):
63 self = cls.__new__(cls)
64 self._init(line_no, col_no)
65 return self
66
67 def __init__(*args, **kwargs): # type: ignore
68 raise NotImplementedError
69
70 def _init(self, line_no: int, col_no: int):
71 self._line_no = line_no
72 self._col_no = col_no
73
74 # Line number.
75 @property
76 def line_no(self):
77 return self._line_no
78
79 # Column number.
80 @property
81 def col_no(self):
82 return self._col_no
83
84 def __repr__(self):
85 return "TextLocation({}, {})".format(self._line_no, self._col_no)
86
87
88 # Any item.
89 class _Item:
90 def __init__(self, text_loc: TextLocation):
91 self._text_loc = text_loc
92
93 # Source text location.
94 @property
95 def text_loc(self):
96 return self._text_loc
97
98
99 # Scalar item.
100 class _ScalarItem(_Item):
101 # Returns the size, in bytes, of this item.
102 @property
103 @abc.abstractmethod
104 def size(self) -> int:
105 ...
106
107
108 # A repeatable item.
109 class _RepableItem:
110 pass
111
112
113 # Single byte.
114 class _Byte(_ScalarItem, _RepableItem):
115 def __init__(self, val: int, text_loc: TextLocation):
116 super().__init__(text_loc)
117 self._val = val
118
119 # Byte value.
120 @property
121 def val(self):
122 return self._val
123
124 @property
125 def size(self):
126 return 1
127
128 def __repr__(self):
129 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
130
131
132 # String.
133 class _Str(_ScalarItem, _RepableItem):
134 def __init__(self, data: bytes, text_loc: TextLocation):
135 super().__init__(text_loc)
136 self._data = data
137
138 # Encoded bytes.
139 @property
140 def data(self):
141 return self._data
142
143 @property
144 def size(self):
145 return len(self._data)
146
147 def __repr__(self):
148 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
149
150
151 # Byte order.
152 @enum.unique
153 class ByteOrder(enum.Enum):
154 # Big endian.
155 BE = "be"
156
157 # Little endian.
158 LE = "le"
159
160
161 # Byte order setting.
162 class _SetBo(_Item):
163 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
164 super().__init__(text_loc)
165 self._bo = bo
166
167 @property
168 def bo(self):
169 return self._bo
170
171 def __repr__(self):
172 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
173
174
175 # Label.
176 class _Label(_Item):
177 def __init__(self, name: str, text_loc: TextLocation):
178 super().__init__(text_loc)
179 self._name = name
180
181 # Label name.
182 @property
183 def name(self):
184 return self._name
185
186 def __repr__(self):
187 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
188
189
190 # Offset setting.
191 class _SetOffset(_Item):
192 def __init__(self, val: int, text_loc: TextLocation):
193 super().__init__(text_loc)
194 self._val = val
195
196 # Offset value (bytes).
197 @property
198 def val(self):
199 return self._val
200
201 def __repr__(self):
202 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
203
204
205 # Offset alignment.
206 class _AlignOffset(_Item):
207 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
208 super().__init__(text_loc)
209 self._val = val
210 self._pad_val = pad_val
211
212 # Alignment value (bits).
213 @property
214 def val(self):
215 return self._val
216
217 # Padding byte value.
218 @property
219 def pad_val(self):
220 return self._pad_val
221
222 def __repr__(self):
223 return "_AlignOffset({}, {}, {})".format(
224 repr(self._val), repr(self._pad_val), repr(self._text_loc)
225 )
226
227
228 # Mixin of containing an AST expression and its string.
229 class _ExprMixin:
230 def __init__(self, expr_str: str, expr: ast.Expression):
231 self._expr_str = expr_str
232 self._expr = expr
233
234 # Expression string.
235 @property
236 def expr_str(self):
237 return self._expr_str
238
239 # Expression node to evaluate.
240 @property
241 def expr(self):
242 return self._expr
243
244
245 # Fill until some offset.
246 class _FillUntil(_Item, _ExprMixin):
247 def __init__(
248 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
249 ):
250 super().__init__(text_loc)
251 _ExprMixin.__init__(self, expr_str, expr)
252 self._pad_val = pad_val
253
254 # Padding byte value.
255 @property
256 def pad_val(self):
257 return self._pad_val
258
259 def __repr__(self):
260 return "_FillUntil({}, {}, {}, {})".format(
261 repr(self._expr_str),
262 repr(self._expr),
263 repr(self._pad_val),
264 repr(self._text_loc),
265 )
266
267
268 # Variable assignment.
269 class _VarAssign(_Item, _ExprMixin):
270 def __init__(
271 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
272 ):
273 super().__init__(text_loc)
274 _ExprMixin.__init__(self, expr_str, expr)
275 self._name = name
276
277 # Name.
278 @property
279 def name(self):
280 return self._name
281
282 def __repr__(self):
283 return "_VarAssign({}, {}, {}, {})".format(
284 repr(self._name),
285 repr(self._expr_str),
286 repr(self._expr),
287 repr(self._text_loc),
288 )
289
290
291 # Fixed-length number, possibly needing more than one byte.
292 class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
293 def __init__(
294 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
295 ):
296 super().__init__(text_loc)
297 _ExprMixin.__init__(self, expr_str, expr)
298 self._len = len
299
300 # Length (bits).
301 @property
302 def len(self):
303 return self._len
304
305 @property
306 def size(self):
307 return self._len // 8
308
309 def __repr__(self):
310 return "_FlNum({}, {}, {}, {})".format(
311 repr(self._expr_str),
312 repr(self._expr),
313 repr(self._len),
314 repr(self._text_loc),
315 )
316
317
318 # LEB128 integer.
319 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
320 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
321 super().__init__(text_loc)
322 _ExprMixin.__init__(self, expr_str, expr)
323
324 def __repr__(self):
325 return "{}({}, {}, {})".format(
326 self.__class__.__name__,
327 repr(self._expr_str),
328 repr(self._expr),
329 repr(self._text_loc),
330 )
331
332
333 # Unsigned LEB128 integer.
334 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
335 pass
336
337
338 # Signed LEB128 integer.
339 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
340 pass
341
342
343 # Group of items.
344 class _Group(_Item, _RepableItem):
345 def __init__(self, items: List[_Item], text_loc: TextLocation):
346 super().__init__(text_loc)
347 self._items = items
348
349 # Contained items.
350 @property
351 def items(self):
352 return self._items
353
354 def __repr__(self):
355 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
356
357
358 # Repetition item.
359 class _Rep(_Item, _ExprMixin):
360 def __init__(
361 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
362 ):
363 super().__init__(text_loc)
364 _ExprMixin.__init__(self, expr_str, expr)
365 self._item = item
366
367 # Item to repeat.
368 @property
369 def item(self):
370 return self._item
371
372 def __repr__(self):
373 return "_Rep({}, {}, {}, {})".format(
374 repr(self._item),
375 repr(self._expr_str),
376 repr(self._expr),
377 repr(self._text_loc),
378 )
379
380
381 # Conditional item.
382 class _Cond(_Item, _ExprMixin):
383 def __init__(
384 self,
385 true_item: _Item,
386 false_item: _Item,
387 expr_str: str,
388 expr: ast.Expression,
389 text_loc: TextLocation,
390 ):
391 super().__init__(text_loc)
392 _ExprMixin.__init__(self, expr_str, expr)
393 self._true_item = true_item
394 self._false_item = false_item
395
396 # Item when condition is true.
397 @property
398 def true_item(self):
399 return self._true_item
400
401 # Item when condition is false.
402 @property
403 def false_item(self):
404 return self._false_item
405
406 def __repr__(self):
407 return "_Cond({}, {}, {}, {}, {})".format(
408 repr(self._true_item),
409 repr(self._false_item),
410 repr(self._expr_str),
411 repr(self._expr),
412 repr(self._text_loc),
413 )
414
415
416 # Macro definition item.
417 class _MacroDef(_Item):
418 def __init__(
419 self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation
420 ):
421 super().__init__(text_loc)
422 self._name = name
423 self._param_names = param_names
424 self._group = group
425
426 # Name.
427 @property
428 def name(self):
429 return self._name
430
431 # Parameters.
432 @property
433 def param_names(self):
434 return self._param_names
435
436 # Contained items.
437 @property
438 def group(self):
439 return self._group
440
441 def __repr__(self):
442 return "_MacroDef({}, {}, {}, {})".format(
443 repr(self._name),
444 repr(self._param_names),
445 repr(self._group),
446 repr(self._text_loc),
447 )
448
449
450 # Macro expansion parameter.
451 class _MacroExpParam:
452 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
453 self._expr_str = expr_str
454 self._expr = expr
455 self._text_loc = text_loc
456
457 # Expression string.
458 @property
459 def expr_str(self):
460 return self._expr_str
461
462 # Expression.
463 @property
464 def expr(self):
465 return self._expr
466
467 # Source text location.
468 @property
469 def text_loc(self):
470 return self._text_loc
471
472 def __repr__(self):
473 return "_MacroExpParam({}, {}, {})".format(
474 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
475 )
476
477
478 # Macro expansion item.
479 class _MacroExp(_Item, _RepableItem):
480 def __init__(
481 self,
482 name: str,
483 params: List[_MacroExpParam],
484 text_loc: TextLocation,
485 ):
486 super().__init__(text_loc)
487 self._name = name
488 self._params = params
489
490 # Name.
491 @property
492 def name(self):
493 return self._name
494
495 # Parameters.
496 @property
497 def params(self):
498 return self._params
499
500 def __repr__(self):
501 return "_MacroExp({}, {}, {})".format(
502 repr(self._name),
503 repr(self._params),
504 repr(self._text_loc),
505 )
506
507
508 # A parsing error message: a string and a text location.
509 class ParseErrorMessage:
510 @classmethod
511 def _create(cls, text: str, text_loc: TextLocation):
512 self = cls.__new__(cls)
513 self._init(text, text_loc)
514 return self
515
516 def __init__(self, *args, **kwargs): # type: ignore
517 raise NotImplementedError
518
519 def _init(self, text: str, text_loc: TextLocation):
520 self._text = text
521 self._text_loc = text_loc
522
523 # Message text.
524 @property
525 def text(self):
526 return self._text
527
528 # Source text location.
529 @property
530 def text_location(self):
531 return self._text_loc
532
533
534 # A parsing error containing one or more messages (`ParseErrorMessage`).
535 class ParseError(RuntimeError):
536 @classmethod
537 def _create(cls, msg: str, text_loc: TextLocation):
538 self = cls.__new__(cls)
539 self._init(msg, text_loc)
540 return self
541
542 def __init__(self, *args, **kwargs): # type: ignore
543 raise NotImplementedError
544
545 def _init(self, msg: str, text_loc: TextLocation):
546 super().__init__(msg)
547 self._msgs = [] # type: List[ParseErrorMessage]
548 self._add_msg(msg, text_loc)
549
550 def _add_msg(self, msg: str, text_loc: TextLocation):
551 self._msgs.append(
552 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
553 msg, text_loc
554 )
555 )
556
557 # Parsing error messages.
558 #
559 # The first message is the most specific one.
560 @property
561 def messages(self):
562 return self._msgs
563
564
565 # Raises a parsing error, forwarding the parameters to the constructor.
566 def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
567 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
568
569
570 # Adds a message to the parsing error `exc`.
571 def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
572 exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage]
573
574
575 # Appends a message to the parsing error `exc` and reraises it.
576 def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
577 _add_error_msg(exc, msg, text_loc)
578 raise exc
579
580
581 # Returns a normalized version (so as to be parseable by int()) of
582 # the constant integer string `s`, possibly negative, dealing with
583 # any radix suffix.
584 def _norm_const_int(s: str):
585 neg = ""
586 pos = s
587
588 if s.startswith("-"):
589 neg = "-"
590 pos = s[1:]
591
592 for r in "xXoObB":
593 if pos.startswith("0" + r):
594 # Already correct
595 return s
596
597 # Try suffix
598 asm_suf_base = {
599 "h": "x",
600 "H": "x",
601 "q": "o",
602 "Q": "o",
603 "o": "o",
604 "O": "o",
605 "b": "b",
606 "B": "B",
607 }
608
609 for suf in asm_suf_base:
610 if pos[-1] == suf:
611 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
612
613 return s
614
615
616 # Variables dictionary type (for type hints).
617 VariablesT = Dict[str, Union[int, float]]
618
619
620 # Labels dictionary type (for type hints).
621 LabelsT = Dict[str, int]
622
623
624 # Common patterns.
625 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
626 _pos_const_int_pat = re.compile(
627 r"(?:0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+)\b"
628 )
629 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
630 _const_float_pat = re.compile(
631 r"[-+]?(?:(?:\d*\.\d+)|(?:\d+\.))(?:[Ee][+-]?\d+)?(?=\W|)"
632 )
633
634
635 # Macro definition dictionary.
636 _MacroDefsT = Dict[str, _MacroDef]
637
638
639 # Normand parser.
640 #
641 # The constructor accepts a Normand input. After building, use the `res`
642 # property to get the resulting main group.
643 class _Parser:
644 # Builds a parser to parse the Normand input `normand`, parsing
645 # immediately.
646 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
647 self._normand = normand
648 self._at = 0
649 self._line_no = 1
650 self._col_no = 1
651 self._label_names = set(labels.keys())
652 self._var_names = set(variables.keys())
653 self._macro_defs = {} # type: _MacroDefsT
654 self._parse()
655
656 # Result (main group).
657 @property
658 def res(self):
659 return self._res
660
661 # Macro definitions.
662 @property
663 def macro_defs(self):
664 return self._macro_defs
665
666 # Current text location.
667 @property
668 def _text_loc(self):
669 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
670 self._line_no, self._col_no
671 )
672
673 # Returns `True` if this parser is done parsing.
674 def _is_done(self):
675 return self._at == len(self._normand)
676
677 # Returns `True` if this parser isn't done parsing.
678 def _isnt_done(self):
679 return not self._is_done()
680
681 # Raises a parse error, creating it using the message `msg` and the
682 # current text location.
683 def _raise_error(self, msg: str) -> NoReturn:
684 _raise_error(msg, self._text_loc)
685
686 # Tries to make the pattern `pat` match the current substring,
687 # returning the match object and updating `self._at`,
688 # `self._line_no`, and `self._col_no` on success.
689 def _try_parse_pat(self, pat: Pattern[str]):
690 m = pat.match(self._normand, self._at)
691
692 if m is None:
693 return
694
695 # Skip matched string
696 self._at += len(m.group(0))
697
698 # Update line number
699 self._line_no += m.group(0).count("\n")
700
701 # Update column number
702 for i in reversed(range(self._at)):
703 if self._normand[i] == "\n" or i == 0:
704 if i == 0:
705 self._col_no = self._at + 1
706 else:
707 self._col_no = self._at - i
708
709 break
710
711 # Return match object
712 return m
713
714 # Expects the pattern `pat` to match the current substring,
715 # returning the match object and updating `self._at`,
716 # `self._line_no`, and `self._col_no` on success, or raising a parse
717 # error with the message `error_msg` on error.
718 def _expect_pat(self, pat: Pattern[str], error_msg: str):
719 # Match
720 m = self._try_parse_pat(pat)
721
722 if m is None:
723 # No match: error
724 self._raise_error(error_msg)
725
726 # Return match object
727 return m
728
729 # Pattern for _skip_ws_and_comments()
730 _ws_or_syms_or_comments_pat = re.compile(
731 r"(?:[\s/\\?&:;.,[\]_=|-]|#[^#]*?(?:\n|#))*"
732 )
733
734 # Skips as many whitespaces, insignificant symbol characters, and
735 # comments as possible.
736 def _skip_ws_and_comments(self):
737 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
738
739 # Pattern for _skip_ws()
740 _ws_pat = re.compile(r"\s*")
741
742 # Skips as many whitespaces as possible.
743 def _skip_ws(self):
744 self._try_parse_pat(self._ws_pat)
745
746 # Pattern for _try_parse_hex_byte()
747 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
748
749 # Tries to parse a hexadecimal byte, returning a byte item on
750 # success.
751 def _try_parse_hex_byte(self):
752 begin_text_loc = self._text_loc
753
754 # Match initial nibble
755 m_high = self._try_parse_pat(self._nibble_pat)
756
757 if m_high is None:
758 # No match
759 return
760
761 # Expect another nibble
762 self._skip_ws_and_comments()
763 m_low = self._expect_pat(
764 self._nibble_pat, "Expecting another hexadecimal nibble"
765 )
766
767 # Return item
768 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
769
770 # Patterns for _try_parse_bin_byte()
771 _bin_byte_bit_pat = re.compile(r"[01]")
772 _bin_byte_prefix_pat = re.compile(r"%+")
773
774 # Tries to parse a binary byte, returning a byte item on success.
775 def _try_parse_bin_byte(self):
776 begin_text_loc = self._text_loc
777
778 # Match prefix
779 m = self._try_parse_pat(self._bin_byte_prefix_pat)
780
781 if m is None:
782 # No match
783 return
784
785 # Expect as many bytes as there are `%` prefixes
786 items = [] # type: List[_Item]
787
788 for _ in range(len(m.group(0))):
789 self._skip_ws_and_comments()
790 byte_text_loc = self._text_loc
791 bits = [] # type: List[str]
792
793 # Expect eight bits
794 for _ in range(8):
795 self._skip_ws_and_comments()
796 m = self._expect_pat(
797 self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)"
798 )
799 bits.append(m.group(0))
800
801 items.append(_Byte(int("".join(bits), 2), byte_text_loc))
802
803 # Return item
804 if len(items) == 1:
805 return items[0]
806
807 # As group
808 return _Group(items, begin_text_loc)
809
810 # Patterns for _try_parse_dec_byte()
811 _dec_byte_prefix_pat = re.compile(r"\$")
812 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
813
814 # Tries to parse a decimal byte, returning a byte item on success.
815 def _try_parse_dec_byte(self):
816 begin_text_loc = self._text_loc
817
818 # Match prefix
819 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
820 # No match
821 return
822
823 # Expect the value
824 self._skip_ws()
825 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
826
827 # Compute value
828 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
829
830 # Validate
831 if val < -128 or val > 255:
832 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
833
834 # Two's complement
835 val %= 256
836
837 # Return item
838 return _Byte(val, begin_text_loc)
839
840 # Tries to parse a byte, returning a byte item on success.
841 def _try_parse_byte(self):
842 # Hexadecimal
843 item = self._try_parse_hex_byte()
844
845 if item is not None:
846 return item
847
848 # Binary
849 item = self._try_parse_bin_byte()
850
851 if item is not None:
852 return item
853
854 # Decimal
855 item = self._try_parse_dec_byte()
856
857 if item is not None:
858 return item
859
860 # Patterns for _try_parse_str()
861 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
862 _str_suffix_pat = re.compile(r'"')
863 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
864
865 # Strings corresponding to escape sequence characters
866 _str_escape_seq_strs = {
867 "0": "\0",
868 "a": "\a",
869 "b": "\b",
870 "e": "\x1b",
871 "f": "\f",
872 "n": "\n",
873 "r": "\r",
874 "t": "\t",
875 "v": "\v",
876 "\\": "\\",
877 '"': '"',
878 }
879
880 # Tries to parse a string, returning a string item on success.
881 def _try_parse_str(self):
882 begin_text_loc = self._text_loc
883
884 # Match prefix
885 m = self._try_parse_pat(self._str_prefix_pat)
886
887 if m is None:
888 # No match
889 return
890
891 # Get encoding
892 encoding = "utf8"
893
894 if m.group("len") is not None:
895 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
896
897 # Actual string
898 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
899
900 # Expect end of string
901 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
902
903 # Replace escape sequences
904 val = m.group(0)
905
906 for ec in '0abefnrtv"\\':
907 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
908
909 # Encode
910 data = val.encode(encoding)
911
912 # Return item
913 return _Str(data, begin_text_loc)
914
915 # Common right parenthesis pattern
916 _right_paren_pat = re.compile(r"\)")
917
918 # Patterns for _try_parse_group()
919 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
920
921 # Tries to parse a group, returning a group item on success.
922 def _try_parse_group(self):
923 begin_text_loc = self._text_loc
924
925 # Match prefix
926 m_open = self._try_parse_pat(self._group_prefix_pat)
927
928 if m_open is None:
929 # No match
930 return
931
932 # Parse items
933 items = self._parse_items()
934
935 # Expect end of group
936 self._skip_ws_and_comments()
937
938 if m_open.group(0) == "(":
939 pat = self._right_paren_pat
940 exp = ")"
941 else:
942 pat = self._block_end_pat
943 exp = "!end"
944
945 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
946
947 # Return item
948 return _Group(items, begin_text_loc)
949
950 # Returns a stripped expression string and an AST expression node
951 # from the expression string `expr_str` at text location `text_loc`.
952 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
953 # Create an expression node from the expression string
954 expr_str = expr_str.strip().replace("\n", " ")
955
956 try:
957 expr = ast.parse(expr_str, mode="eval")
958 except SyntaxError:
959 _raise_error(
960 "Invalid expression `{}`: invalid syntax".format(expr_str),
961 text_loc,
962 )
963
964 return expr_str, expr
965
966 # Patterns for _try_parse_num_and_attr()
967 _val_expr_pat = re.compile(r"([^}:]+):\s*")
968 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
969 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
970
971 # Tries to parse a value and attribute (fixed length in bits or
972 # `leb128`), returning a value item on success.
973 def _try_parse_num_and_attr(self):
974 begin_text_loc = self._text_loc
975
976 # Match
977 m_expr = self._try_parse_pat(self._val_expr_pat)
978
979 if m_expr is None:
980 # No match
981 return
982
983 # Create an expression node from the expression string
984 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
985
986 # Length?
987 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
988
989 if m_attr is None:
990 # LEB128?
991 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
992
993 if m_attr is None:
994 # At this point it's invalid
995 self._raise_error(
996 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
997 )
998
999 # Return LEB128 integer item
1000 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
1001 return cls(expr_str, expr, begin_text_loc)
1002 else:
1003 # Return fixed-length number item
1004 return _FlNum(
1005 expr_str,
1006 expr,
1007 int(m_attr.group(0)),
1008 begin_text_loc,
1009 )
1010
1011 # Patterns for _try_parse_var_assign()
1012 _var_assign_name_equal_pat = re.compile(r"({})\s*=".format(_py_name_pat.pattern))
1013 _var_assign_expr_pat = re.compile(r"[^}]+")
1014
1015 # Tries to parse a variable assignment, returning a variable
1016 # assignment item on success.
1017 def _try_parse_var_assign(self):
1018 begin_text_loc = self._text_loc
1019
1020 # Match
1021 m = self._try_parse_pat(self._var_assign_name_equal_pat)
1022
1023 if m is None:
1024 # No match
1025 return
1026
1027 # Validate name
1028 name = m.group(1)
1029
1030 if name == _icitte_name:
1031 _raise_error(
1032 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
1033 )
1034
1035 if name in self._label_names:
1036 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
1037
1038 # Expect an expression
1039 self._skip_ws()
1040 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
1041
1042 # Create an expression node from the expression string
1043 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
1044
1045 # Add to known variable names
1046 self._var_names.add(name)
1047
1048 # Return item
1049 return _VarAssign(
1050 name,
1051 expr_str,
1052 expr,
1053 begin_text_loc,
1054 )
1055
1056 # Pattern for _try_parse_set_bo()
1057 _bo_pat = re.compile(r"[bl]e")
1058
1059 # Tries to parse a byte order name, returning a byte order setting
1060 # item on success.
1061 def _try_parse_set_bo(self):
1062 begin_text_loc = self._text_loc
1063
1064 # Match
1065 m = self._try_parse_pat(self._bo_pat)
1066
1067 if m is None:
1068 # No match
1069 return
1070
1071 # Return corresponding item
1072 if m.group(0) == "be":
1073 return _SetBo(ByteOrder.BE, begin_text_loc)
1074 else:
1075 assert m.group(0) == "le"
1076 return _SetBo(ByteOrder.LE, begin_text_loc)
1077
1078 # Patterns for _try_parse_val_or_bo()
1079 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
1080 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
1081
1082 # Tries to parse a value, a variable assignment, or a byte order
1083 # setting, returning an item on success.
1084 def _try_parse_val_or_var_assign_or_set_bo(self):
1085 # Match prefix
1086 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
1087 # No match
1088 return
1089
1090 self._skip_ws()
1091
1092 # Variable assignment item?
1093 item = self._try_parse_var_assign()
1094
1095 if item is None:
1096 # Number item?
1097 item = self._try_parse_num_and_attr()
1098
1099 if item is None:
1100 # Byte order setting item?
1101 item = self._try_parse_set_bo()
1102
1103 if item is None:
1104 # At this point it's invalid
1105 self._raise_error(
1106 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
1107 )
1108
1109 # Expect suffix
1110 self._skip_ws()
1111 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
1112 return item
1113
1114 # Tries to parse an offset setting value (after the initial `<`),
1115 # returning an offset item on success.
1116 def _try_parse_set_offset_val(self):
1117 begin_text_loc = self._text_loc
1118
1119 # Match
1120 m = self._try_parse_pat(_pos_const_int_pat)
1121
1122 if m is None:
1123 # No match
1124 return
1125
1126 # Return item
1127 return _SetOffset(int(_norm_const_int(m.group(0)), 0), begin_text_loc)
1128
1129 # Tries to parse a label name (after the initial `<`), returning a
1130 # label item on success.
1131 def _try_parse_label_name(self):
1132 begin_text_loc = self._text_loc
1133
1134 # Match
1135 m = self._try_parse_pat(_py_name_pat)
1136
1137 if m is None:
1138 # No match
1139 return
1140
1141 # Validate
1142 name = m.group(0)
1143
1144 if name == _icitte_name:
1145 _raise_error(
1146 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1147 )
1148
1149 if name in self._label_names:
1150 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
1151
1152 if name in self._var_names:
1153 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
1154
1155 # Add to known label names
1156 self._label_names.add(name)
1157
1158 # Return item
1159 return _Label(name, begin_text_loc)
1160
1161 # Patterns for _try_parse_label_or_set_offset()
1162 _label_set_offset_prefix_pat = re.compile(r"<")
1163 _label_set_offset_suffix_pat = re.compile(r">")
1164
1165 # Tries to parse a label or an offset setting, returning an item on
1166 # success.
1167 def _try_parse_label_or_set_offset(self):
1168 # Match prefix
1169 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
1170 # No match
1171 return
1172
1173 # Offset setting item?
1174 self._skip_ws()
1175 item = self._try_parse_set_offset_val()
1176
1177 if item is None:
1178 # Label item?
1179 item = self._try_parse_label_name()
1180
1181 if item is None:
1182 # At this point it's invalid
1183 self._raise_error("Expecting a label name or an offset setting value")
1184
1185 # Expect suffix
1186 self._skip_ws()
1187 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
1188 return item
1189
1190 # Pattern for _parse_pad_val()
1191 _pad_val_prefix_pat = re.compile(r"~")
1192
1193 # Tries to parse a padding value, returning the padding value, or 0
1194 # if none.
1195 def _parse_pad_val(self):
1196 # Padding value?
1197 self._skip_ws()
1198 pad_val = 0
1199
1200 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1201 self._skip_ws()
1202 pad_val_text_loc = self._text_loc
1203 m = self._expect_pat(
1204 _pos_const_int_pat,
1205 "Expecting a positive constant integer (byte value)",
1206 )
1207
1208 # Validate
1209 pad_val = int(_norm_const_int(m.group(0)), 0)
1210
1211 if pad_val > 255:
1212 _raise_error(
1213 "Invalid padding byte value {}".format(pad_val),
1214 pad_val_text_loc,
1215 )
1216
1217 return pad_val
1218
1219 # Patterns for _try_parse_align_offset()
1220 _align_offset_prefix_pat = re.compile(r"@")
1221 _align_offset_val_pat = re.compile(r"\d+")
1222
1223 # Tries to parse an offset alignment, returning an offset alignment
1224 # item on success.
1225 def _try_parse_align_offset(self):
1226 begin_text_loc = self._text_loc
1227
1228 # Match prefix
1229 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1230 # No match
1231 return
1232
1233 # Expect an alignment
1234 self._skip_ws()
1235 align_text_loc = self._text_loc
1236 m = self._expect_pat(
1237 self._align_offset_val_pat,
1238 "Expecting an alignment (positive multiple of eight bits)",
1239 )
1240
1241 # Validate alignment
1242 val = int(m.group(0))
1243
1244 if val <= 0 or (val % 8) != 0:
1245 _raise_error(
1246 "Invalid alignment value {} (not a positive multiple of eight)".format(
1247 val
1248 ),
1249 align_text_loc,
1250 )
1251
1252 # Padding value
1253 pad_val = self._parse_pad_val()
1254
1255 # Return item
1256 return _AlignOffset(val, pad_val, begin_text_loc)
1257
1258 # Patterns for _expect_expr()
1259 _inner_expr_prefix_pat = re.compile(r"\{")
1260 _inner_expr_pat = re.compile(r"[^}]+")
1261 _inner_expr_suffix_pat = re.compile(r"\}")
1262
1263 # Parses an expression outside a `{`/`}` context.
1264 #
1265 # This function accepts:
1266 #
1267 # • A Python expression within `{` and `}`.
1268 #
1269 # • A Python name.
1270 #
1271 # • If `accept_const_int` is `True`: a constant integer, which may
1272 # be negative if `allow_neg_int` is `True`.
1273 #
1274 # • If `accept_float` is `True`: a constant floating point number.
1275 #
1276 # Returns the stripped expression string and AST expression.
1277 def _expect_expr(
1278 self,
1279 accept_const_int: bool = False,
1280 allow_neg_int: bool = False,
1281 accept_const_float: bool = False,
1282 ):
1283 begin_text_loc = self._text_loc
1284
1285 # Constant floating point number?
1286 if accept_const_float:
1287 m = self._try_parse_pat(_const_float_pat)
1288
1289 if m is not None:
1290 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1291
1292 # Constant integer?
1293 if accept_const_int:
1294 m = self._try_parse_pat(_const_int_pat)
1295
1296 if m is not None:
1297 # Negative and allowed?
1298 if m.group("neg") == "-" and not allow_neg_int:
1299 _raise_error(
1300 "Expecting a positive constant integer", begin_text_loc
1301 )
1302
1303 expr_str = _norm_const_int(m.group(0))
1304 return self._ast_expr_from_str(expr_str, begin_text_loc)
1305
1306 # Name?
1307 m = self._try_parse_pat(_py_name_pat)
1308
1309 if m is not None:
1310 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1311
1312 # Expect `{`
1313 msg_accepted_parts = ["a name", "or `{`"]
1314
1315 if accept_const_float:
1316 msg_accepted_parts.insert(0, "a constant floating point number")
1317
1318 if accept_const_int:
1319 msg_pos = "" if allow_neg_int else "positive "
1320 msg_accepted_parts.insert(0, "a {}constant integer".format(msg_pos))
1321
1322 if len(msg_accepted_parts) == 2:
1323 msg_accepted = " ".join(msg_accepted_parts)
1324 else:
1325 msg_accepted = ", ".join(msg_accepted_parts)
1326
1327 self._expect_pat(
1328 self._inner_expr_prefix_pat,
1329 "Expecting {}".format(msg_accepted),
1330 )
1331
1332 # Expect an expression
1333 self._skip_ws()
1334 expr_text_loc = self._text_loc
1335 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
1336 expr_str = m.group(0)
1337
1338 # Expect `}`
1339 self._skip_ws()
1340 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
1341
1342 return self._ast_expr_from_str(expr_str, expr_text_loc)
1343
1344 # Patterns for _try_parse_fill_until()
1345 _fill_until_prefix_pat = re.compile(r"\+")
1346 _fill_until_pad_val_prefix_pat = re.compile(r"~")
1347
1348 # Tries to parse a filling, returning a filling item on success.
1349 def _try_parse_fill_until(self):
1350 begin_text_loc = self._text_loc
1351
1352 # Match prefix
1353 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1354 # No match
1355 return
1356
1357 # Expect expression
1358 self._skip_ws()
1359 expr_str, expr = self._expect_expr(accept_const_int=True)
1360
1361 # Padding value
1362 pad_val = self._parse_pad_val()
1363
1364 # Return item
1365 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
1366
1367 # Parses the multiplier expression of a repetition (block or
1368 # post-item) and returns the expression string and AST node.
1369 def _expect_rep_mul_expr(self):
1370 return self._expect_expr(accept_const_int=True)
1371
1372 # Common block end pattern
1373 _block_end_pat = re.compile(r"!end\b")
1374
1375 # Pattern for _try_parse_rep_block()
1376 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
1377
1378 # Tries to parse a repetition block, returning a repetition item on
1379 # success.
1380 def _try_parse_rep_block(self):
1381 begin_text_loc = self._text_loc
1382
1383 # Match prefix
1384 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1385 # No match
1386 return
1387
1388 # Expect expression
1389 self._skip_ws_and_comments()
1390 expr_str, expr = self._expect_rep_mul_expr()
1391
1392 # Parse items
1393 self._skip_ws_and_comments()
1394 items_text_loc = self._text_loc
1395 items = self._parse_items()
1396
1397 # Expect end of block
1398 self._skip_ws_and_comments()
1399 self._expect_pat(
1400 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
1401 )
1402
1403 # Return item
1404 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1405
1406 # Pattern for _try_parse_cond_block()
1407 _cond_block_prefix_pat = re.compile(r"!if\b")
1408 _cond_block_else_pat = re.compile(r"!else\b")
1409
1410 # Tries to parse a conditional block, returning a conditional item
1411 # on success.
1412 def _try_parse_cond_block(self):
1413 begin_text_loc = self._text_loc
1414
1415 # Match prefix
1416 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1417 # No match
1418 return
1419
1420 # Expect expression
1421 self._skip_ws_and_comments()
1422 expr_str, expr = self._expect_expr()
1423
1424 # Parse "true" items
1425 self._skip_ws_and_comments()
1426 true_items_text_loc = self._text_loc
1427 true_items = self._parse_items()
1428 false_items = [] # type: List[_Item]
1429 false_items_text_loc = begin_text_loc
1430
1431 # `!else`?
1432 self._skip_ws_and_comments()
1433
1434 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1435 # Parse "false" items
1436 self._skip_ws_and_comments()
1437 false_items_text_loc = self._text_loc
1438 false_items = self._parse_items()
1439
1440 # Expect end of block
1441 self._expect_pat(
1442 self._block_end_pat,
1443 "Expecting an item, `!else`, or `!end` (end of conditional block)",
1444 )
1445
1446 # Return item
1447 return _Cond(
1448 _Group(true_items, true_items_text_loc),
1449 _Group(false_items, false_items_text_loc),
1450 expr_str,
1451 expr,
1452 begin_text_loc,
1453 )
1454
1455 # Common left parenthesis pattern
1456 _left_paren_pat = re.compile(r"\(")
1457
1458 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1459 _macro_params_comma_pat = re.compile(",")
1460
1461 # Patterns for _try_parse_macro_def()
1462 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1463
1464 # Tries to parse a macro definition, adding it to `self._macro_defs`
1465 # and returning `True` on success.
1466 def _try_parse_macro_def(self):
1467 begin_text_loc = self._text_loc
1468
1469 # Match prefix
1470 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1471 # No match
1472 return False
1473
1474 # Expect a name
1475 self._skip_ws()
1476 name_text_loc = self._text_loc
1477 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1478
1479 # Validate name
1480 name = m.group(0)
1481
1482 if name in self._macro_defs:
1483 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1484
1485 # Expect `(`
1486 self._skip_ws()
1487 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1488
1489 # Try to parse comma-separated parameter names
1490 param_names = [] # type: List[str]
1491 expect_comma = False
1492
1493 while True:
1494 self._skip_ws()
1495
1496 # End?
1497 if self._try_parse_pat(self._right_paren_pat) is not None:
1498 # End
1499 break
1500
1501 # Comma?
1502 if expect_comma:
1503 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1504
1505 # Expect parameter name
1506 self._skip_ws()
1507 param_text_loc = self._text_loc
1508 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1509
1510 if m.group(0) in param_names:
1511 _raise_error(
1512 "Duplicate macro parameter named `{}`".format(m.group(0)),
1513 param_text_loc,
1514 )
1515
1516 param_names.append(m.group(0))
1517 expect_comma = True
1518
1519 # Expect items
1520 self._skip_ws_and_comments()
1521 items_text_loc = self._text_loc
1522 old_var_names = self._var_names.copy()
1523 old_label_names = self._label_names.copy()
1524 self._var_names = set() # type: Set[str]
1525 self._label_names = set() # type: Set[str]
1526 items = self._parse_items()
1527 self._var_names = old_var_names
1528 self._label_names = old_label_names
1529
1530 # Expect suffix
1531 self._expect_pat(
1532 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1533 )
1534
1535 # Register macro
1536 self._macro_defs[name] = _MacroDef(
1537 name, param_names, _Group(items, items_text_loc), begin_text_loc
1538 )
1539
1540 return True
1541
1542 # Patterns for _try_parse_macro_exp()
1543 _macro_exp_prefix_pat = re.compile(r"m\b")
1544 _macro_exp_colon_pat = re.compile(r":")
1545
1546 # Tries to parse a macro expansion, returning a macro expansion item
1547 # on success.
1548 def _try_parse_macro_exp(self):
1549 begin_text_loc = self._text_loc
1550
1551 # Match prefix
1552 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1553 # No match
1554 return
1555
1556 # Expect `:`
1557 self._skip_ws()
1558 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1559
1560 # Expect a macro name
1561 self._skip_ws()
1562 name_text_loc = self._text_loc
1563 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1564
1565 # Validate name
1566 name = m.group(0)
1567 macro_def = self._macro_defs.get(name)
1568
1569 if macro_def is None:
1570 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1571
1572 # Expect `(`
1573 self._skip_ws()
1574 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1575
1576 # Try to parse comma-separated parameter values
1577 params_text_loc = self._text_loc
1578 params = [] # type: List[_MacroExpParam]
1579 expect_comma = False
1580
1581 while True:
1582 self._skip_ws()
1583
1584 # End?
1585 if self._try_parse_pat(self._right_paren_pat) is not None:
1586 # End
1587 break
1588
1589 # Expect a Value
1590 if expect_comma:
1591 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1592
1593 self._skip_ws()
1594 param_text_loc = self._text_loc
1595 params.append(
1596 _MacroExpParam(
1597 *self._expect_expr(
1598 accept_const_int=True,
1599 allow_neg_int=True,
1600 accept_const_float=True,
1601 ),
1602 text_loc=param_text_loc
1603 )
1604 )
1605 expect_comma = True
1606
1607 # Validate parameter values
1608 if len(params) != len(macro_def.param_names):
1609 sing_plur = "" if len(params) == 1 else "s"
1610 _raise_error(
1611 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1612 len(params), sing_plur, len(macro_def.param_names)
1613 ),
1614 params_text_loc,
1615 )
1616
1617 # Return item
1618 return _MacroExp(name, params, begin_text_loc)
1619
1620 # Tries to parse a base item (anything except a repetition),
1621 # returning it on success.
1622 def _try_parse_base_item(self):
1623 # Byte item?
1624 item = self._try_parse_byte()
1625
1626 if item is not None:
1627 return item
1628
1629 # String item?
1630 item = self._try_parse_str()
1631
1632 if item is not None:
1633 return item
1634
1635 # Value, variable assignment, or byte order setting item?
1636 item = self._try_parse_val_or_var_assign_or_set_bo()
1637
1638 if item is not None:
1639 return item
1640
1641 # Label or offset setting item?
1642 item = self._try_parse_label_or_set_offset()
1643
1644 if item is not None:
1645 return item
1646
1647 # Offset alignment item?
1648 item = self._try_parse_align_offset()
1649
1650 if item is not None:
1651 return item
1652
1653 # Filling item?
1654 item = self._try_parse_fill_until()
1655
1656 if item is not None:
1657 return item
1658
1659 # Group item?
1660 item = self._try_parse_group()
1661
1662 if item is not None:
1663 return item
1664
1665 # Repetition block item?
1666 item = self._try_parse_rep_block()
1667
1668 if item is not None:
1669 return item
1670
1671 # Conditional block item?
1672 item = self._try_parse_cond_block()
1673
1674 if item is not None:
1675 return item
1676
1677 # Macro expansion?
1678 item = self._try_parse_macro_exp()
1679
1680 if item is not None:
1681 return item
1682
1683 # Pattern for _try_parse_rep_post()
1684 _rep_post_prefix_pat = re.compile(r"\*")
1685
1686 # Tries to parse a post-item repetition, returning the expression
1687 # string and AST expression node on success.
1688 def _try_parse_rep_post(self):
1689 # Match prefix
1690 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
1691 # No match
1692 return
1693
1694 # Return expression string and AST expression
1695 self._skip_ws_and_comments()
1696 return self._expect_rep_mul_expr()
1697
1698 # Tries to parse an item, possibly followed by a repetition,
1699 # returning `True` on success.
1700 #
1701 # Appends any parsed item to `items`.
1702 def _try_append_item(self, items: List[_Item]):
1703 self._skip_ws_and_comments()
1704
1705 # Base item
1706 item = self._try_parse_base_item()
1707
1708 if item is None:
1709 return
1710
1711 # Parse repetition if the base item is repeatable
1712 if isinstance(item, _RepableItem):
1713 self._skip_ws_and_comments()
1714 rep_text_loc = self._text_loc
1715 rep_ret = self._try_parse_rep_post()
1716
1717 if rep_ret is not None:
1718 item = _Rep(item, *rep_ret, text_loc=rep_text_loc)
1719
1720 items.append(item)
1721 return True
1722
1723 # Parses and returns items, skipping whitespaces, insignificant
1724 # symbols, and comments when allowed, and stopping at the first
1725 # unknown character.
1726 #
1727 # Accepts and registers macro definitions if `accept_macro_defs`
1728 # is `True`.
1729 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
1730 items = [] # type: List[_Item]
1731
1732 while self._isnt_done():
1733 # Try to append item
1734 if not self._try_append_item(items):
1735 if accept_macro_defs and self._try_parse_macro_def():
1736 continue
1737
1738 # Unknown at this point
1739 break
1740
1741 return items
1742
1743 # Parses the whole Normand input, setting `self._res` to the main
1744 # group item on success.
1745 def _parse(self):
1746 if len(self._normand.strip()) == 0:
1747 # Special case to make sure there's something to consume
1748 self._res = _Group([], self._text_loc)
1749 return
1750
1751 # Parse first level items
1752 items = self._parse_items(True)
1753
1754 # Make sure there's nothing left
1755 self._skip_ws_and_comments()
1756
1757 if self._isnt_done():
1758 self._raise_error(
1759 "Unexpected character `{}`".format(self._normand[self._at])
1760 )
1761
1762 # Set main group item
1763 self._res = _Group(items, self._text_loc)
1764
1765
1766 # The return type of parse().
1767 class ParseResult:
1768 @classmethod
1769 def _create(
1770 cls,
1771 data: bytearray,
1772 variables: VariablesT,
1773 labels: LabelsT,
1774 offset: int,
1775 bo: Optional[ByteOrder],
1776 ):
1777 self = cls.__new__(cls)
1778 self._init(data, variables, labels, offset, bo)
1779 return self
1780
1781 def __init__(self, *args, **kwargs): # type: ignore
1782 raise NotImplementedError
1783
1784 def _init(
1785 self,
1786 data: bytearray,
1787 variables: VariablesT,
1788 labels: LabelsT,
1789 offset: int,
1790 bo: Optional[ByteOrder],
1791 ):
1792 self._data = data
1793 self._vars = variables
1794 self._labels = labels
1795 self._offset = offset
1796 self._bo = bo
1797
1798 # Generated data.
1799 @property
1800 def data(self):
1801 return self._data
1802
1803 # Dictionary of updated variable names to their last computed value.
1804 @property
1805 def variables(self):
1806 return self._vars
1807
1808 # Dictionary of updated main group label names to their computed
1809 # value.
1810 @property
1811 def labels(self):
1812 return self._labels
1813
1814 # Updated offset.
1815 @property
1816 def offset(self):
1817 return self._offset
1818
1819 # Updated byte order.
1820 @property
1821 def byte_order(self):
1822 return self._bo
1823
1824
1825 # Raises a parse error for the item `item`, creating it using the
1826 # message `msg`.
1827 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1828 _raise_error(msg, item.text_loc)
1829
1830
1831 # The `ICITTE` reserved name.
1832 _icitte_name = "ICITTE"
1833
1834
1835 # Base node visitor.
1836 #
1837 # Calls the _visit_name() method for each name node which isn't the name
1838 # of a call.
1839 class _NodeVisitor(ast.NodeVisitor):
1840 def __init__(self):
1841 self._parent_is_call = False
1842
1843 def generic_visit(self, node: ast.AST):
1844 if type(node) is ast.Call:
1845 self._parent_is_call = True
1846 elif type(node) is ast.Name and not self._parent_is_call:
1847 self._visit_name(node.id)
1848
1849 super().generic_visit(node)
1850 self._parent_is_call = False
1851
1852 @abc.abstractmethod
1853 def _visit_name(self, name: str):
1854 ...
1855
1856
1857 # Expression validator: validates that all the names within the
1858 # expression are allowed.
1859 class _ExprValidator(_NodeVisitor):
1860 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
1861 super().__init__()
1862 self._expr_str = expr_str
1863 self._text_loc = text_loc
1864 self._allowed_names = allowed_names
1865
1866 def _visit_name(self, name: str):
1867 # Make sure the name refers to a known and reachable
1868 # variable/label name.
1869 if name != _icitte_name and name not in self._allowed_names:
1870 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1871 name, self._expr_str
1872 )
1873
1874 allowed_names = self._allowed_names.copy()
1875 allowed_names.add(_icitte_name)
1876
1877 if len(allowed_names) > 0:
1878 allowed_names_str = ", ".join(
1879 sorted(["`{}`".format(name) for name in allowed_names])
1880 )
1881 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1882
1883 _raise_error(
1884 msg,
1885 self._text_loc,
1886 )
1887
1888
1889 # Generator state.
1890 class _GenState:
1891 def __init__(
1892 self,
1893 variables: VariablesT,
1894 labels: LabelsT,
1895 offset: int,
1896 bo: Optional[ByteOrder],
1897 ):
1898 self.variables = variables.copy()
1899 self.labels = labels.copy()
1900 self.offset = offset
1901 self.bo = bo
1902
1903 def __repr__(self):
1904 return "_GenState({}, {}, {}, {})".format(
1905 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
1906 )
1907
1908
1909 # Fixed-length number item instance.
1910 class _FlNumItemInst:
1911 def __init__(
1912 self,
1913 item: _FlNum,
1914 offset_in_data: int,
1915 state: _GenState,
1916 parse_error_msgs: List[ParseErrorMessage],
1917 ):
1918 self._item = item
1919 self._offset_in_data = offset_in_data
1920 self._state = state
1921 self._parse_error_msgs = parse_error_msgs
1922
1923 @property
1924 def item(self):
1925 return self._item
1926
1927 @property
1928 def offset_in_data(self):
1929 return self._offset_in_data
1930
1931 @property
1932 def state(self):
1933 return self._state
1934
1935 @property
1936 def parse_error_msgs(self):
1937 return self._parse_error_msgs
1938
1939
1940 # Generator of data and final state from a group item.
1941 #
1942 # Generation happens in memory at construction time. After building, use
1943 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1944 # get the resulting context.
1945 #
1946 # The steps of generation are:
1947 #
1948 # 1. Handle each item in prefix order.
1949 #
1950 # The handlers append bytes to `self._data` and update some current
1951 # state object (`_GenState` instance).
1952 #
1953 # When handling a fixed-length number item, try to evaluate its
1954 # expression using the current state. If this fails, then it might be
1955 # because the expression refers to a "future" label: save the current
1956 # offset in `self._data` (generated data) and a snapshot of the
1957 # current state within `self._fl_num_item_insts` (`_FlNumItemInst`
1958 # object). _gen_fl_num_item_insts() will deal with this later. A
1959 # `_FlNumItemInst` instance also contains a snapshot of the current
1960 # parsing error messages (`self._parse_error_msgs`) which need to be
1961 # taken into account when handling the instance later.
1962 #
1963 # When handling the items of a group, keep a map of immediate label
1964 # names to their offset. Then, after having processed all the items,
1965 # update the relevant saved state snapshots in
1966 # `self._fl_num_item_insts` with those immediate label values.
1967 # _gen_fl_num_item_insts() will deal with this later.
1968 #
1969 # 2. Handle all the fixed-length number item instances of which the
1970 # expression evaluation failed before.
1971 #
1972 # At this point, `self._fl_num_item_insts` contains everything that's
1973 # needed to evaluate the expressions, including the values of
1974 # "future" labels from the point of view of some fixed-length number
1975 # item instance.
1976 #
1977 # If an evaluation fails at this point, then it's a user error. Add
1978 # to the parsing error all the saved parsing error messages of the
1979 # instance. Those additional messages add precious context to the
1980 # error.
1981 class _Gen:
1982 def __init__(
1983 self,
1984 group: _Group,
1985 macro_defs: _MacroDefsT,
1986 variables: VariablesT,
1987 labels: LabelsT,
1988 offset: int,
1989 bo: Optional[ByteOrder],
1990 ):
1991 self._macro_defs = macro_defs
1992 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
1993 self._parse_error_msgs = [] # type: List[ParseErrorMessage]
1994 self._gen(group, _GenState(variables, labels, offset, bo))
1995
1996 # Generated bytes.
1997 @property
1998 def data(self):
1999 return self._data
2000
2001 # Updated variables.
2002 @property
2003 def variables(self):
2004 return self._final_state.variables
2005
2006 # Updated main group labels.
2007 @property
2008 def labels(self):
2009 return self._final_state.labels
2010
2011 # Updated offset.
2012 @property
2013 def offset(self):
2014 return self._final_state.offset
2015
2016 # Updated byte order.
2017 @property
2018 def bo(self):
2019 return self._final_state.bo
2020
2021 # Evaluates the expression `expr` of which the original string is
2022 # `expr_str` at the location `text_loc` considering the current
2023 # generation state `state`.
2024 #
2025 # If `allow_float` is `True`, then the type of the result may be
2026 # `float` too.
2027 @staticmethod
2028 def _eval_expr(
2029 expr_str: str,
2030 expr: ast.Expression,
2031 text_loc: TextLocation,
2032 state: _GenState,
2033 allow_float: bool = False,
2034 ):
2035 syms = {} # type: VariablesT
2036 syms.update(state.labels)
2037
2038 # Set the `ICITTE` name to the current offset
2039 syms[_icitte_name] = state.offset
2040
2041 # Add the current variables
2042 syms.update(state.variables)
2043
2044 # Validate the node and its children
2045 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
2046
2047 # Compile and evaluate expression node
2048 try:
2049 val = eval(compile(expr, "", "eval"), None, syms)
2050 except Exception as exc:
2051 _raise_error(
2052 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
2053 text_loc,
2054 )
2055
2056 # Convert `bool` result type to `int` to normalize
2057 if type(val) is bool:
2058 val = int(val)
2059
2060 # Validate result type
2061 expected_types = {int} # type: Set[type]
2062 type_msg = "`int`"
2063
2064 if allow_float:
2065 expected_types.add(float)
2066 type_msg += " or `float`"
2067
2068 if type(val) not in expected_types:
2069 _raise_error(
2070 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
2071 expr_str, type_msg, type(val).__name__
2072 ),
2073 text_loc,
2074 )
2075
2076 return val
2077
2078 # Evaluates the expression of `item` considering the current
2079 # generation state `state`.
2080 #
2081 # If `allow_float` is `True`, then the type of the result may be
2082 # `float` too.
2083 @staticmethod
2084 def _eval_item_expr(
2085 item: Union[_FlNum, _Leb128Int, _FillUntil, _VarAssign, _Rep, _Cond],
2086 state: _GenState,
2087 allow_float: bool = False,
2088 ):
2089 return _Gen._eval_expr(
2090 item.expr_str, item.expr, item.text_loc, state, allow_float
2091 )
2092
2093 # Handles the byte item `item`.
2094 def _handle_byte_item(self, item: _Byte, state: _GenState):
2095 self._data.append(item.val)
2096 state.offset += item.size
2097
2098 # Handles the string item `item`.
2099 def _handle_str_item(self, item: _Str, state: _GenState):
2100 self._data += item.data
2101 state.offset += item.size
2102
2103 # Handles the byte order setting item `item`.
2104 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
2105 # Update current byte order
2106 state.bo = item.bo
2107
2108 # Handles the variable assignment item `item`.
2109 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
2110 # Update variable
2111 state.variables[item.name] = self._eval_item_expr(item, state, True)
2112
2113 # Handles the fixed-length number item `item`.
2114 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
2115 # Validate current byte order
2116 if state.bo is None and item.len > 8:
2117 _raise_error_for_item(
2118 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2119 item.expr_str
2120 ),
2121 item,
2122 )
2123
2124 # Try an immediate evaluation. If it fails, then keep everything
2125 # needed to (try to) generate the bytes of this item later.
2126 try:
2127 data = self._gen_fl_num_item_inst_data(item, state)
2128 except Exception:
2129 self._fl_num_item_insts.append(
2130 _FlNumItemInst(
2131 item,
2132 len(self._data),
2133 copy.deepcopy(state),
2134 copy.deepcopy(self._parse_error_msgs),
2135 )
2136 )
2137
2138 # Reserve space in `self._data` for this instance
2139 data = bytes([0] * (item.len // 8))
2140
2141 # Append bytes
2142 self._data += data
2143
2144 # Update offset
2145 state.offset += len(data)
2146
2147 # Returns the size, in bytes, required to encode the value `val`
2148 # with LEB128 (signed version if `is_signed` is `True`).
2149 @staticmethod
2150 def _leb128_size_for_val(val: int, is_signed: bool):
2151 if val < 0:
2152 # Equivalent upper bound.
2153 #
2154 # For example, if `val` is -128, then the full integer for
2155 # this number of bits would be [-128, 127].
2156 val = -val - 1
2157
2158 # Number of bits (add one for the sign if needed)
2159 bits = val.bit_length() + int(is_signed)
2160
2161 if bits == 0:
2162 bits = 1
2163
2164 # Seven bits per byte
2165 return math.ceil(bits / 7)
2166
2167 # Handles the LEB128 integer item `item`.
2168 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2169 # Compute value
2170 val = self._eval_item_expr(item, state, False)
2171
2172 # Size in bytes
2173 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
2174
2175 # For each byte
2176 for _ in range(size):
2177 # Seven LSBs, MSB of the byte set (continue)
2178 self._data.append((val & 0x7F) | 0x80)
2179 val >>= 7
2180
2181 # Clear MSB of last byte (stop)
2182 self._data[-1] &= ~0x80
2183
2184 # Update offset
2185 state.offset += size
2186
2187 # Handles the group item `item`, removing the immediate labels from
2188 # `state` at the end if `remove_immediate_labels` is `True`.
2189 def _handle_group_item(
2190 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2191 ):
2192 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2193 immediate_labels = {} # type: LabelsT
2194
2195 # Handle each item
2196 for subitem in item.items:
2197 if type(subitem) is _Label:
2198 # Add to local immediate labels
2199 immediate_labels[subitem.name] = state.offset
2200
2201 self._handle_item(subitem, state)
2202
2203 # Remove immediate labels from current state if needed
2204 if remove_immediate_labels:
2205 for name in immediate_labels:
2206 del state.labels[name]
2207
2208 # Add all immediate labels to all state snapshots since
2209 # `first_fl_num_item_inst_index`.
2210 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2211 inst.state.labels.update(immediate_labels)
2212
2213 # Handles the repetition item `item`.
2214 def _handle_rep_item(self, item: _Rep, state: _GenState):
2215 # Compute the repetition count
2216 mul = _Gen._eval_item_expr(item, state)
2217
2218 # Validate result
2219 if mul < 0:
2220 _raise_error_for_item(
2221 "Invalid expression `{}`: unexpected negative result {:,}".format(
2222 item.expr_str, mul
2223 ),
2224 item,
2225 )
2226
2227 # Generate item data `mul` times
2228 for _ in range(mul):
2229 self._handle_item(item.item, state)
2230
2231 # Handles the conditional item `item`.
2232 def _handle_cond_item(self, item: _Cond, state: _GenState):
2233 # Compute the conditional value
2234 val = _Gen._eval_item_expr(item, state)
2235
2236 # Generate item data if needed
2237 if val:
2238 self._handle_item(item.true_item, state)
2239 else:
2240 self._handle_item(item.false_item, state)
2241
2242 # Evaluates the parameters of the macro expansion item `item`
2243 # considering the initial state `init_state` and returns a new state
2244 # to handle the items of the macro.
2245 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2246 # New state
2247 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2248
2249 # Evaluate the parameter expressions
2250 macro_def = self._macro_defs[item.name]
2251
2252 for param_name, param in zip(macro_def.param_names, item.params):
2253 exp_state.variables[param_name] = _Gen._eval_expr(
2254 param.expr_str, param.expr, param.text_loc, init_state, True
2255 )
2256
2257 return exp_state
2258
2259 # Handles the macro expansion item `item`.
2260 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
2261 parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
2262
2263 try:
2264 # New state
2265 exp_state = self._eval_macro_exp_params(item, state)
2266
2267 # Process the contained group
2268 init_data_size = len(self._data)
2269 parse_error_msg = (
2270 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
2271 parse_error_msg_text, item.text_loc
2272 )
2273 )
2274 self._parse_error_msgs.append(parse_error_msg)
2275 self._handle_item(self._macro_defs[item.name].group, exp_state)
2276 self._parse_error_msgs.pop()
2277 except ParseError as exc:
2278 _augment_error(exc, parse_error_msg_text, item.text_loc)
2279
2280 # Update state offset and return
2281 state.offset += len(self._data) - init_data_size
2282
2283 # Handles the offset setting item `item`.
2284 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
2285 state.offset = item.val
2286
2287 # Handles the offset alignment item `item` (adds padding).
2288 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2289 init_offset = state.offset
2290 align_bytes = item.val // 8
2291 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2292 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2293
2294 # Handles the filling item `item` (adds padding).
2295 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2296 # Compute the new offset
2297 new_offset = _Gen._eval_item_expr(item, state)
2298
2299 # Validate the new offset
2300 if new_offset < state.offset:
2301 _raise_error_for_item(
2302 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2303 item.expr_str, new_offset, state.offset
2304 ),
2305 item,
2306 )
2307
2308 # Fill
2309 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2310
2311 # Update offset
2312 state.offset = new_offset
2313
2314 # Handles the label item `item`.
2315 def _handle_label_item(self, item: _Label, state: _GenState):
2316 state.labels[item.name] = state.offset
2317
2318 # Handles the item `item`, returning the updated next repetition
2319 # instance.
2320 def _handle_item(self, item: _Item, state: _GenState):
2321 return self._item_handlers[type(item)](item, state)
2322
2323 # Generates the data for a fixed-length integer item instance having
2324 # the value `val` and returns it.
2325 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
2326 # Validate range
2327 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2328 _raise_error_for_item(
2329 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2330 val, item.len, item.expr_str
2331 ),
2332 item,
2333 )
2334
2335 # Encode result on 64 bits (to extend the sign bit whatever the
2336 # value of `item.len`).
2337 data = struct.pack(
2338 "{}{}".format(
2339 ">" if state.bo in (None, ByteOrder.BE) else "<",
2340 "Q" if val >= 0 else "q",
2341 ),
2342 val,
2343 )
2344
2345 # Keep only the requested length
2346 len_bytes = item.len // 8
2347
2348 if state.bo in (None, ByteOrder.BE):
2349 # Big endian: keep last bytes
2350 data = data[-len_bytes:]
2351 else:
2352 # Little endian: keep first bytes
2353 assert state.bo == ByteOrder.LE
2354 data = data[:len_bytes]
2355
2356 # Return data
2357 return data
2358
2359 # Generates the data for a fixed-length floating point number item
2360 # instance having the value `val` and returns it.
2361 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
2362 # Validate length
2363 if item.len not in (32, 64):
2364 _raise_error_for_item(
2365 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2366 item.len, val
2367 ),
2368 item,
2369 )
2370
2371 # Encode and return result
2372 return struct.pack(
2373 "{}{}".format(
2374 ">" if state.bo in (None, ByteOrder.BE) else "<",
2375 "f" if item.len == 32 else "d",
2376 ),
2377 val,
2378 )
2379
2380 # Generates the data for a fixed-length number item instance and
2381 # returns it.
2382 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
2383 # Compute value
2384 val = self._eval_item_expr(item, state, True)
2385
2386 # Handle depending on type
2387 if type(val) is int:
2388 return self._gen_fl_int_item_inst_data(val, item, state)
2389 else:
2390 assert type(val) is float
2391 return self._gen_fl_float_item_inst_data(val, item, state)
2392
2393 # Generates the data for all the fixed-length number item instances
2394 # and writes it at the correct offset within `self._data`.
2395 def _gen_fl_num_item_insts(self):
2396 for inst in self._fl_num_item_insts:
2397 # Generate bytes
2398 try:
2399 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2400 except ParseError as exc:
2401 # Add all the saved parse error messages for this
2402 # instance.
2403 for msg in reversed(inst.parse_error_msgs):
2404 _add_error_msg(exc, msg.text, msg.text_location)
2405
2406 raise
2407
2408 # Insert bytes into `self._data`
2409 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2410
2411 # Generates the data (`self._data`) and final state
2412 # (`self._final_state`) from `group` and the initial state `state`.
2413 def _gen(self, group: _Group, state: _GenState):
2414 # Initial state
2415 self._data = bytearray()
2416
2417 # Item handlers
2418 self._item_handlers = {
2419 _AlignOffset: self._handle_align_offset_item,
2420 _Byte: self._handle_byte_item,
2421 _Cond: self._handle_cond_item,
2422 _FillUntil: self._handle_fill_until_item,
2423 _FlNum: self._handle_fl_num_item,
2424 _Group: self._handle_group_item,
2425 _Label: self._handle_label_item,
2426 _MacroExp: self._handle_macro_exp_item,
2427 _Rep: self._handle_rep_item,
2428 _SetBo: self._handle_set_bo_item,
2429 _SetOffset: self._handle_set_offset_item,
2430 _SLeb128Int: self._handle_leb128_int_item,
2431 _Str: self._handle_str_item,
2432 _ULeb128Int: self._handle_leb128_int_item,
2433 _VarAssign: self._handle_var_assign_item,
2434 } # type: Dict[type, Callable[[Any, _GenState], None]]
2435
2436 # Handle the group item, _not_ removing the immediate labels
2437 # because the `labels` property offers them.
2438 self._handle_group_item(group, state, False)
2439
2440 # This is actually the final state
2441 self._final_state = state
2442
2443 # Generate all the fixed-length number bytes now that we know
2444 # their full state
2445 self._gen_fl_num_item_insts()
2446
2447
2448 # Returns a `ParseResult` instance containing the bytes encoded by the
2449 # input string `normand`.
2450 #
2451 # `init_variables` is a dictionary of initial variable names (valid
2452 # Python names) to integral values. A variable name must not be the
2453 # reserved name `ICITTE`.
2454 #
2455 # `init_labels` is a dictionary of initial label names (valid Python
2456 # names) to integral values. A label name must not be the reserved name
2457 # `ICITTE`.
2458 #
2459 # `init_offset` is the initial offset.
2460 #
2461 # `init_byte_order` is the initial byte order.
2462 #
2463 # Raises `ParseError` on any parsing error.
2464 def parse(
2465 normand: str,
2466 init_variables: Optional[VariablesT] = None,
2467 init_labels: Optional[LabelsT] = None,
2468 init_offset: int = 0,
2469 init_byte_order: Optional[ByteOrder] = None,
2470 ):
2471 if init_variables is None:
2472 init_variables = {}
2473
2474 if init_labels is None:
2475 init_labels = {}
2476
2477 parser = _Parser(normand, init_variables, init_labels)
2478 gen = _Gen(
2479 parser.res,
2480 parser.macro_defs,
2481 init_variables,
2482 init_labels,
2483 init_offset,
2484 init_byte_order,
2485 )
2486 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2487 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2488 )
2489
2490
2491 # Raises a command-line error with the message `msg`.
2492 def _raise_cli_error(msg: str) -> NoReturn:
2493 raise RuntimeError("Command-line error: {}".format(msg))
2494
2495
2496 # Returns the `int` or `float` value out of a CLI assignment value.
2497 def _val_from_assign_val_str(s: str, is_label: bool):
2498 s = s.strip()
2499
2500 # Floating point number?
2501 if not is_label:
2502 m = _const_float_pat.fullmatch(s)
2503
2504 if m is not None:
2505 return float(m.group(0))
2506
2507 # Integer?
2508 m = _const_int_pat.fullmatch(s)
2509
2510 if m is not None:
2511 return int(_norm_const_int(m.group(0)), 0)
2512
2513 exp = "an integer" if is_label else "a number"
2514 _raise_cli_error("Invalid assignment value `{}`: expecting {}".format(s, exp))
2515
2516
2517 # Returns a dictionary of string to numbers from the list of strings
2518 # `args` containing `NAME=VAL` entries.
2519 def _dict_from_arg(args: Optional[List[str]], is_label: bool):
2520 d = {} # type: VariablesT
2521
2522 if args is None:
2523 return d
2524
2525 for arg in args:
2526 m = re.match(r"({})\s*=\s*(.+)$".format(_py_name_pat.pattern), arg)
2527
2528 if m is None:
2529 _raise_cli_error("Invalid assignment `{}`".format(arg))
2530
2531 d[m.group(1)] = _val_from_assign_val_str(m.group(2), is_label)
2532
2533 return d
2534
2535
2536 # Parses the command-line arguments and returns, in this order:
2537 #
2538 # 1. The input file path, or `None` if none.
2539 # 2. The Normand input text.
2540 # 3. The initial offset.
2541 # 4. The initial byte order.
2542 # 5. The initial variables.
2543 # 6. The initial labels.
2544 def _parse_cli_args():
2545 import argparse
2546
2547 # Build parser
2548 ap = argparse.ArgumentParser()
2549 ap.add_argument(
2550 "--offset",
2551 metavar="OFFSET",
2552 action="store",
2553 type=int,
2554 default=0,
2555 help="initial offset (positive)",
2556 )
2557 ap.add_argument(
2558 "-b",
2559 "--byte-order",
2560 metavar="BO",
2561 choices=["be", "le"],
2562 type=str,
2563 help="initial byte order (`be` or `le`)",
2564 )
2565 ap.add_argument(
2566 "-v",
2567 "--var",
2568 metavar="NAME=VAL",
2569 action="append",
2570 help="add an initial variable (may be repeated)",
2571 )
2572 ap.add_argument(
2573 "-l",
2574 "--label",
2575 metavar="NAME=VAL",
2576 action="append",
2577 help="add an initial label (may be repeated)",
2578 )
2579 ap.add_argument(
2580 "--version", action="version", version="Normand {}".format(__version__)
2581 )
2582 ap.add_argument(
2583 "path",
2584 metavar="PATH",
2585 action="store",
2586 nargs="?",
2587 help="input path (none means standard input)",
2588 )
2589
2590 # Parse
2591 args = ap.parse_args()
2592
2593 # Read input
2594 if args.path is None:
2595 normand = sys.stdin.read()
2596 else:
2597 with open(args.path) as f:
2598 normand = f.read()
2599
2600 # Variables and labels
2601 variables = _dict_from_arg(args.var, False)
2602 labels = _dict_from_arg(args.label, True)
2603
2604 # Validate offset
2605 if args.offset < 0:
2606 _raise_cli_error("Invalid negative offset {}")
2607
2608 # Validate and set byte order
2609 bo = None # type: Optional[ByteOrder]
2610
2611 if args.byte_order is not None:
2612 if args.byte_order == "be":
2613 bo = ByteOrder.BE
2614 else:
2615 assert args.byte_order == "le"
2616 bo = ByteOrder.LE
2617
2618 # Return input and initial state
2619 return args.path, normand, args.offset, bo, variables, typing.cast(LabelsT, labels)
2620
2621
2622 # CLI entry point without exception handling.
2623 def _run_cli_with_args(
2624 normand: str,
2625 offset: int,
2626 bo: Optional[ByteOrder],
2627 variables: VariablesT,
2628 labels: LabelsT,
2629 ):
2630 sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
2631
2632
2633 # Prints the exception message `msg` and exits with status 1.
2634 def _fail(msg: str) -> NoReturn:
2635 if not msg.endswith("."):
2636 msg += "."
2637
2638 print(msg.strip(), file=sys.stderr)
2639 sys.exit(1)
2640
2641
2642 # CLI entry point.
2643 def _run_cli():
2644 try:
2645 args = _parse_cli_args()
2646 except Exception as exc:
2647 _fail(str(exc))
2648
2649 try:
2650 _run_cli_with_args(*args[1:])
2651 except ParseError as exc:
2652 import os.path
2653
2654 prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
2655 fail_msg = ""
2656
2657 for msg in reversed(exc.messages):
2658 fail_msg += "{}{}:{} - {}".format(
2659 prefix,
2660 msg.text_location.line_no,
2661 msg.text_location.col_no,
2662 msg.text,
2663 )
2664
2665 if fail_msg[-1] not in ".:;":
2666 fail_msg += "."
2667
2668 fail_msg += "\n"
2669
2670 _fail(fail_msg.strip())
2671 except Exception as exc:
2672 _fail(str(exc))
2673
2674
2675 if __name__ == "__main__":
2676 _run_cli()
This page took 0.079463 seconds and 3 git commands to generate.