Add conditional block support
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
27d52a19 33__version__ = "0.9.0"
71aaa3f7
PP
34__all__ = [
35 "ByteOrder",
36 "parse",
37 "ParseError",
38 "ParseResult",
e57a18e1
PP
39 "TextLocation",
40 "LabelsT",
41 "VariablesT",
71aaa3f7
PP
42 "__author__",
43 "__version__",
44]
45
46import re
47import abc
48import ast
49import sys
50import enum
05f81895 51import math
71aaa3f7 52import struct
e57a18e1
PP
53import typing
54from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
71aaa3f7
PP
55
56
57# Text location (line and column numbers).
e57a18e1 58class TextLocation:
71aaa3f7
PP
59 @classmethod
60 def _create(cls, line_no: int, col_no: int):
61 self = cls.__new__(cls)
62 self._init(line_no, col_no)
63 return self
64
65 def __init__(*args, **kwargs): # type: ignore
66 raise NotImplementedError
67
68 def _init(self, line_no: int, col_no: int):
69 self._line_no = line_no
70 self._col_no = col_no
71
72 # Line number.
73 @property
74 def line_no(self):
75 return self._line_no
76
77 # Column number.
78 @property
79 def col_no(self):
80 return self._col_no
81
2adf4336 82 def __repr__(self):
e57a18e1 83 return "TextLocation({}, {})".format(self._line_no, self._col_no)
2adf4336 84
71aaa3f7
PP
85
86# Any item.
87class _Item:
e57a18e1 88 def __init__(self, text_loc: TextLocation):
71aaa3f7
PP
89 self._text_loc = text_loc
90
91 # Source text location.
92 @property
93 def text_loc(self):
94 return self._text_loc
95
2adf4336
PP
96
97# Scalar item.
98class _ScalarItem(_Item):
71aaa3f7
PP
99 # Returns the size, in bytes, of this item.
100 @property
101 @abc.abstractmethod
102 def size(self) -> int:
103 ...
104
105
106# A repeatable item.
2adf4336 107class _RepableItem:
71aaa3f7
PP
108 pass
109
110
111# Single byte.
2adf4336 112class _Byte(_ScalarItem, _RepableItem):
e57a18e1 113 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
114 super().__init__(text_loc)
115 self._val = val
116
117 # Byte value.
118 @property
119 def val(self):
120 return self._val
121
122 @property
123 def size(self):
124 return 1
125
126 def __repr__(self):
676f6189 127 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
71aaa3f7
PP
128
129
130# String.
2adf4336 131class _Str(_ScalarItem, _RepableItem):
e57a18e1 132 def __init__(self, data: bytes, text_loc: TextLocation):
71aaa3f7
PP
133 super().__init__(text_loc)
134 self._data = data
135
136 # Encoded bytes.
137 @property
138 def data(self):
139 return self._data
140
141 @property
142 def size(self):
143 return len(self._data)
144
145 def __repr__(self):
676f6189 146 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
71aaa3f7
PP
147
148
149# Byte order.
150@enum.unique
151class ByteOrder(enum.Enum):
152 # Big endian.
153 BE = "be"
154
155 # Little endian.
156 LE = "le"
157
158
2adf4336
PP
159# Byte order setting.
160class _SetBo(_Item):
e57a18e1 161 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
0e8e3169 162 super().__init__(text_loc)
71aaa3f7
PP
163 self._bo = bo
164
165 @property
166 def bo(self):
167 return self._bo
168
2adf4336 169 def __repr__(self):
676f6189 170 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
71aaa3f7
PP
171
172
173# Label.
174class _Label(_Item):
e57a18e1 175 def __init__(self, name: str, text_loc: TextLocation):
71aaa3f7
PP
176 super().__init__(text_loc)
177 self._name = name
178
179 # Label name.
180 @property
181 def name(self):
182 return self._name
183
71aaa3f7 184 def __repr__(self):
676f6189 185 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
71aaa3f7
PP
186
187
2adf4336
PP
188# Offset setting.
189class _SetOffset(_Item):
e57a18e1 190 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
191 super().__init__(text_loc)
192 self._val = val
193
676f6189 194 # Offset value (bytes).
71aaa3f7
PP
195 @property
196 def val(self):
197 return self._val
198
71aaa3f7 199 def __repr__(self):
676f6189
PP
200 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
201
202
203# Offset alignment.
204class _AlignOffset(_Item):
e57a18e1 205 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
676f6189
PP
206 super().__init__(text_loc)
207 self._val = val
208 self._pad_val = pad_val
209
210 # Alignment value (bits).
211 @property
212 def val(self):
213 return self._val
214
215 # Padding byte value.
216 @property
217 def pad_val(self):
218 return self._pad_val
219
220 def __repr__(self):
221 return "_AlignOffset({}, {}, {})".format(
222 repr(self._val), repr(self._pad_val), repr(self._text_loc)
223 )
71aaa3f7
PP
224
225
226# Mixin of containing an AST expression and its string.
227class _ExprMixin:
228 def __init__(self, expr_str: str, expr: ast.Expression):
229 self._expr_str = expr_str
230 self._expr = expr
231
232 # Expression string.
233 @property
234 def expr_str(self):
235 return self._expr_str
236
237 # Expression node to evaluate.
238 @property
239 def expr(self):
240 return self._expr
241
242
2adf4336
PP
243# Variable assignment.
244class _VarAssign(_Item, _ExprMixin):
71aaa3f7 245 def __init__(
e57a18e1 246 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
71aaa3f7
PP
247 ):
248 super().__init__(text_loc)
249 _ExprMixin.__init__(self, expr_str, expr)
250 self._name = name
251
252 # Name.
253 @property
254 def name(self):
255 return self._name
256
71aaa3f7 257 def __repr__(self):
2adf4336 258 return "_VarAssign({}, {}, {}, {})".format(
676f6189
PP
259 repr(self._name),
260 repr(self._expr_str),
261 repr(self._expr),
262 repr(self._text_loc),
71aaa3f7
PP
263 )
264
265
269f6eb3
PP
266# Fixed-length number, possibly needing more than one byte.
267class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
71aaa3f7 268 def __init__(
e57a18e1 269 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
71aaa3f7
PP
270 ):
271 super().__init__(text_loc)
272 _ExprMixin.__init__(self, expr_str, expr)
273 self._len = len
274
275 # Length (bits).
276 @property
277 def len(self):
278 return self._len
279
280 @property
281 def size(self):
282 return self._len // 8
283
284 def __repr__(self):
269f6eb3 285 return "_FlNum({}, {}, {}, {})".format(
676f6189
PP
286 repr(self._expr_str),
287 repr(self._expr),
288 repr(self._len),
289 repr(self._text_loc),
71aaa3f7
PP
290 )
291
292
05f81895
PP
293# LEB128 integer.
294class _Leb128Int(_Item, _RepableItem, _ExprMixin):
e57a18e1 295 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
05f81895
PP
296 super().__init__(text_loc)
297 _ExprMixin.__init__(self, expr_str, expr)
298
299 def __repr__(self):
300 return "{}({}, {}, {})".format(
301 self.__class__.__name__,
302 repr(self._expr_str),
303 repr(self._expr),
676f6189 304 repr(self._text_loc),
05f81895
PP
305 )
306
307
308# Unsigned LEB128 integer.
309class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
310 pass
311
312
313# Signed LEB128 integer.
314class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
315 pass
316
317
71aaa3f7 318# Group of items.
2adf4336 319class _Group(_Item, _RepableItem):
e57a18e1 320 def __init__(self, items: List[_Item], text_loc: TextLocation):
71aaa3f7
PP
321 super().__init__(text_loc)
322 self._items = items
71aaa3f7
PP
323
324 # Contained items.
325 @property
326 def items(self):
327 return self._items
328
71aaa3f7 329 def __repr__(self):
676f6189 330 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
71aaa3f7
PP
331
332
333# Repetition item.
2adf4336
PP
334class _Rep(_Item, _ExprMixin):
335 def __init__(
e57a18e1 336 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
2adf4336 337 ):
71aaa3f7 338 super().__init__(text_loc)
2adf4336 339 _ExprMixin.__init__(self, expr_str, expr)
71aaa3f7 340 self._item = item
71aaa3f7
PP
341
342 # Item to repeat.
343 @property
344 def item(self):
345 return self._item
346
71aaa3f7 347 def __repr__(self):
2adf4336 348 return "_Rep({}, {}, {}, {})".format(
676f6189
PP
349 repr(self._item),
350 repr(self._expr_str),
351 repr(self._expr),
352 repr(self._text_loc),
71aaa3f7
PP
353 )
354
355
27d52a19
PP
356# Conditional item.
357class _Cond(_Item, _ExprMixin):
358 def __init__(
359 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
360 ):
361 super().__init__(text_loc)
362 _ExprMixin.__init__(self, expr_str, expr)
363 self._item = item
364
365 # Conditional item.
366 @property
367 def item(self):
368 return self._item
369
370 def __repr__(self):
371 return "_Cond({}, {}, {}, {})".format(
372 repr(self._item),
373 repr(self._expr_str),
374 repr(self._expr),
375 repr(self._text_loc),
376 )
377
378
2adf4336 379# Expression item type.
27d52a19 380_ExprItemT = Union[_FlNum, _Leb128Int, _VarAssign, _Rep, _Cond]
2adf4336
PP
381
382
71aaa3f7
PP
383# A parsing error containing a message and a text location.
384class ParseError(RuntimeError):
385 @classmethod
e57a18e1 386 def _create(cls, msg: str, text_loc: TextLocation):
71aaa3f7
PP
387 self = cls.__new__(cls)
388 self._init(msg, text_loc)
389 return self
390
391 def __init__(self, *args, **kwargs): # type: ignore
392 raise NotImplementedError
393
e57a18e1 394 def _init(self, msg: str, text_loc: TextLocation):
71aaa3f7
PP
395 super().__init__(msg)
396 self._text_loc = text_loc
397
398 # Source text location.
399 @property
400 def text_loc(self):
401 return self._text_loc
402
403
404# Raises a parsing error, forwarding the parameters to the constructor.
e57a18e1 405def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
71aaa3f7
PP
406 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
407
408
e57a18e1
PP
409# Variables dictionary type (for type hints).
410VariablesT = Dict[str, Union[int, float]]
411
412
413# Labels dictionary type (for type hints).
414LabelsT = Dict[str, int]
71aaa3f7
PP
415
416
417# Python name pattern.
418_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
419
420
421# Normand parser.
422#
423# The constructor accepts a Normand input. After building, use the `res`
424# property to get the resulting main group.
425class _Parser:
426 # Builds a parser to parse the Normand input `normand`, parsing
427 # immediately.
e57a18e1 428 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
71aaa3f7
PP
429 self._normand = normand
430 self._at = 0
431 self._line_no = 1
432 self._col_no = 1
433 self._label_names = set(labels.keys())
434 self._var_names = set(variables.keys())
435 self._parse()
436
437 # Result (main group).
438 @property
439 def res(self):
440 return self._res
441
442 # Current text location.
443 @property
444 def _text_loc(self):
e57a18e1 445 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
71aaa3f7
PP
446 self._line_no, self._col_no
447 )
448
449 # Returns `True` if this parser is done parsing.
450 def _is_done(self):
451 return self._at == len(self._normand)
452
453 # Returns `True` if this parser isn't done parsing.
454 def _isnt_done(self):
455 return not self._is_done()
456
457 # Raises a parse error, creating it using the message `msg` and the
458 # current text location.
459 def _raise_error(self, msg: str) -> NoReturn:
460 _raise_error(msg, self._text_loc)
461
462 # Tries to make the pattern `pat` match the current substring,
463 # returning the match object and updating `self._at`,
464 # `self._line_no`, and `self._col_no` on success.
465 def _try_parse_pat(self, pat: Pattern[str]):
466 m = pat.match(self._normand, self._at)
467
468 if m is None:
469 return
470
471 # Skip matched string
472 self._at += len(m.group(0))
473
474 # Update line number
475 self._line_no += m.group(0).count("\n")
476
477 # Update column number
478 for i in reversed(range(self._at)):
479 if self._normand[i] == "\n" or i == 0:
480 if i == 0:
481 self._col_no = self._at + 1
482 else:
483 self._col_no = self._at - i
484
485 break
486
487 # Return match object
488 return m
489
490 # Expects the pattern `pat` to match the current substring,
491 # returning the match object and updating `self._at`,
492 # `self._line_no`, and `self._col_no` on success, or raising a parse
493 # error with the message `error_msg` on error.
494 def _expect_pat(self, pat: Pattern[str], error_msg: str):
495 # Match
496 m = self._try_parse_pat(pat)
497
498 if m is None:
499 # No match: error
500 self._raise_error(error_msg)
501
502 # Return match object
503 return m
504
505 # Pattern for _skip_ws_and_comments()
506 _ws_or_syms_or_comments_pat = re.compile(
e57a18e1 507 r"(?:[\s/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
71aaa3f7
PP
508 )
509
510 # Skips as many whitespaces, insignificant symbol characters, and
511 # comments as possible.
512 def _skip_ws_and_comments(self):
513 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
514
515 # Pattern for _try_parse_hex_byte()
516 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
517
518 # Tries to parse a hexadecimal byte, returning a byte item on
519 # success.
520 def _try_parse_hex_byte(self):
0e8e3169
PP
521 begin_text_loc = self._text_loc
522
71aaa3f7
PP
523 # Match initial nibble
524 m_high = self._try_parse_pat(self._nibble_pat)
525
526 if m_high is None:
527 # No match
528 return
529
530 # Expect another nibble
531 self._skip_ws_and_comments()
532 m_low = self._expect_pat(
533 self._nibble_pat, "Expecting another hexadecimal nibble"
534 )
535
536 # Return item
0e8e3169 537 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
538
539 # Patterns for _try_parse_bin_byte()
540 _bin_byte_bit_pat = re.compile(r"[01]")
541 _bin_byte_prefix_pat = re.compile(r"%")
542
543 # Tries to parse a binary byte, returning a byte item on success.
544 def _try_parse_bin_byte(self):
0e8e3169
PP
545 begin_text_loc = self._text_loc
546
71aaa3f7
PP
547 # Match prefix
548 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
549 # No match
550 return
551
552 # Expect eight bits
553 bits = [] # type: List[str]
554
555 for _ in range(8):
556 self._skip_ws_and_comments()
557 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
558 bits.append(m.group(0))
559
560 # Return item
0e8e3169 561 return _Byte(int("".join(bits), 2), begin_text_loc)
71aaa3f7
PP
562
563 # Patterns for _try_parse_dec_byte()
564 _dec_byte_prefix_pat = re.compile(r"\$\s*")
565 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
566
567 # Tries to parse a decimal byte, returning a byte item on success.
568 def _try_parse_dec_byte(self):
0e8e3169
PP
569 begin_text_loc = self._text_loc
570
71aaa3f7
PP
571 # Match prefix
572 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
573 # No match
574 return
575
576 # Expect the value
577 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
578
579 # Compute value
580 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
581
582 # Validate
583 if val < -128 or val > 255:
0e8e3169 584 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
585
586 # Two's complement
05f81895 587 val %= 256
71aaa3f7
PP
588
589 # Return item
0e8e3169 590 return _Byte(val, begin_text_loc)
71aaa3f7
PP
591
592 # Tries to parse a byte, returning a byte item on success.
593 def _try_parse_byte(self):
594 # Hexadecimal
595 item = self._try_parse_hex_byte()
596
597 if item is not None:
598 return item
599
600 # Binary
601 item = self._try_parse_bin_byte()
602
603 if item is not None:
604 return item
605
606 # Decimal
607 item = self._try_parse_dec_byte()
608
609 if item is not None:
610 return item
611
612 # Patterns for _try_parse_str()
613 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
614 _str_suffix_pat = re.compile(r'"')
615 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
616
617 # Strings corresponding to escape sequence characters
618 _str_escape_seq_strs = {
619 "0": "\0",
620 "a": "\a",
621 "b": "\b",
622 "e": "\x1b",
623 "f": "\f",
624 "n": "\n",
625 "r": "\r",
626 "t": "\t",
627 "v": "\v",
628 "\\": "\\",
629 '"': '"',
630 }
631
632 # Tries to parse a string, returning a string item on success.
633 def _try_parse_str(self):
0e8e3169
PP
634 begin_text_loc = self._text_loc
635
71aaa3f7
PP
636 # Match prefix
637 m = self._try_parse_pat(self._str_prefix_pat)
638
639 if m is None:
640 # No match
641 return
642
643 # Get encoding
644 encoding = "utf8"
645
646 if m.group("len") is not None:
647 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
648
649 # Actual string
650 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
651
652 # Expect end of string
653 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
654
655 # Replace escape sequences
656 val = m.group(0)
657
658 for ec in '0abefnrtv"\\':
659 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
660
661 # Encode
662 data = val.encode(encoding)
663
664 # Return item
0e8e3169 665 return _Str(data, begin_text_loc)
71aaa3f7
PP
666
667 # Patterns for _try_parse_group()
668 _group_prefix_pat = re.compile(r"\(")
669 _group_suffix_pat = re.compile(r"\)")
670
671 # Tries to parse a group, returning a group item on success.
672 def _try_parse_group(self):
0e8e3169
PP
673 begin_text_loc = self._text_loc
674
71aaa3f7
PP
675 # Match prefix
676 if self._try_parse_pat(self._group_prefix_pat) is None:
677 # No match
678 return
679
680 # Parse items
681 items = self._parse_items()
682
683 # Expect end of group
684 self._skip_ws_and_comments()
685 self._expect_pat(
686 self._group_suffix_pat, "Expecting an item or `)` (end of group)"
687 )
688
689 # Return item
0e8e3169 690 return _Group(items, begin_text_loc)
71aaa3f7
PP
691
692 # Returns a stripped expression string and an AST expression node
693 # from the expression string `expr_str` at text location `text_loc`.
e57a18e1 694 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
71aaa3f7
PP
695 # Create an expression node from the expression string
696 expr_str = expr_str.strip().replace("\n", " ")
697
698 try:
699 expr = ast.parse(expr_str, mode="eval")
700 except SyntaxError:
701 _raise_error(
702 "Invalid expression `{}`: invalid syntax".format(expr_str),
703 text_loc,
704 )
705
706 return expr_str, expr
707
269f6eb3 708 # Patterns for _try_parse_num_and_attr()
05f81895 709 _val_expr_pat = re.compile(r"([^}:]+):\s*")
269f6eb3 710 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
05f81895 711 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
71aaa3f7 712
05f81895
PP
713 # Tries to parse a value and attribute (fixed length in bits or
714 # `leb128`), returning a value item on success.
269f6eb3 715 def _try_parse_num_and_attr(self):
71aaa3f7
PP
716 begin_text_loc = self._text_loc
717
718 # Match
719 m_expr = self._try_parse_pat(self._val_expr_pat)
720
721 if m_expr is None:
722 # No match
723 return
724
71aaa3f7
PP
725 # Create an expression node from the expression string
726 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
727
05f81895 728 # Length?
269f6eb3 729 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
05f81895
PP
730
731 if m_attr is None:
732 # LEB128?
733 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
734
735 if m_attr is None:
736 # At this point it's invalid
737 self._raise_error(
738 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
739 )
740
741 # Return LEB128 integer item
742 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
743 return cls(expr_str, expr, begin_text_loc)
744 else:
269f6eb3
PP
745 # Return fixed-length number item
746 return _FlNum(
05f81895
PP
747 expr_str,
748 expr,
749 int(m_attr.group(0)),
750 begin_text_loc,
751 )
71aaa3f7 752
269f6eb3 753 # Patterns for _try_parse_num_and_attr()
2adf4336 754 _var_assign_pat = re.compile(
71aaa3f7
PP
755 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
756 )
757
2adf4336
PP
758 # Tries to parse a variable assignment, returning a variable
759 # assignment item on success.
760 def _try_parse_var_assign(self):
71aaa3f7
PP
761 begin_text_loc = self._text_loc
762
763 # Match
2adf4336 764 m = self._try_parse_pat(self._var_assign_pat)
71aaa3f7
PP
765
766 if m is None:
767 # No match
768 return
769
770 # Validate name
771 name = m.group("name")
772
773 if name == _icitte_name:
0e8e3169
PP
774 _raise_error(
775 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
776 )
71aaa3f7
PP
777
778 if name in self._label_names:
0e8e3169 779 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
780
781 # Add to known variable names
782 self._var_names.add(name)
783
784 # Create an expression node from the expression string
785 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
786
787 # Return item
2adf4336 788 return _VarAssign(
71aaa3f7
PP
789 name,
790 expr_str,
791 expr,
0e8e3169 792 begin_text_loc,
71aaa3f7
PP
793 )
794
2adf4336 795 # Pattern for _try_parse_set_bo()
71aaa3f7
PP
796 _bo_pat = re.compile(r"[bl]e")
797
2adf4336
PP
798 # Tries to parse a byte order name, returning a byte order setting
799 # item on success.
800 def _try_parse_set_bo(self):
0e8e3169
PP
801 begin_text_loc = self._text_loc
802
71aaa3f7
PP
803 # Match
804 m = self._try_parse_pat(self._bo_pat)
805
806 if m is None:
807 # No match
808 return
809
810 # Return corresponding item
811 if m.group(0) == "be":
2adf4336 812 return _SetBo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
813 else:
814 assert m.group(0) == "le"
2adf4336 815 return _SetBo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
816
817 # Patterns for _try_parse_val_or_bo()
2adf4336
PP
818 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{\s*")
819 _val_var_assign_set_bo_suffix_pat = re.compile(r"\s*}")
71aaa3f7 820
2adf4336
PP
821 # Tries to parse a value, a variable assignment, or a byte order
822 # setting, returning an item on success.
823 def _try_parse_val_or_var_assign_or_set_bo(self):
71aaa3f7 824 # Match prefix
2adf4336 825 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
71aaa3f7
PP
826 # No match
827 return
828
2adf4336
PP
829 # Variable assignment item?
830 item = self._try_parse_var_assign()
71aaa3f7
PP
831
832 if item is None:
269f6eb3
PP
833 # Number item?
834 item = self._try_parse_num_and_attr()
71aaa3f7
PP
835
836 if item is None:
2adf4336
PP
837 # Byte order setting item?
838 item = self._try_parse_set_bo()
71aaa3f7
PP
839
840 if item is None:
841 # At this point it's invalid
2adf4336 842 self._raise_error(
269f6eb3 843 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
2adf4336 844 )
71aaa3f7
PP
845
846 # Expect suffix
2adf4336 847 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
71aaa3f7
PP
848 return item
849
e57a18e1 850 # Common positive constant integer pattern
71aaa3f7
PP
851 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
852
2adf4336
PP
853 # Tries to parse an offset setting value (after the initial `<`),
854 # returning an offset item on success.
855 def _try_parse_set_offset_val(self):
0e8e3169
PP
856 begin_text_loc = self._text_loc
857
71aaa3f7
PP
858 # Match
859 m = self._try_parse_pat(self._pos_const_int_pat)
860
861 if m is None:
862 # No match
863 return
864
865 # Return item
2adf4336 866 return _SetOffset(int(m.group(0), 0), begin_text_loc)
71aaa3f7
PP
867
868 # Tries to parse a label name (after the initial `<`), returning a
869 # label item on success.
870 def _try_parse_label_name(self):
0e8e3169
PP
871 begin_text_loc = self._text_loc
872
71aaa3f7
PP
873 # Match
874 m = self._try_parse_pat(_py_name_pat)
875
876 if m is None:
877 # No match
878 return
879
880 # Validate
881 name = m.group(0)
882
883 if name == _icitte_name:
0e8e3169
PP
884 _raise_error(
885 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
886 )
71aaa3f7
PP
887
888 if name in self._label_names:
0e8e3169 889 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
890
891 if name in self._var_names:
0e8e3169 892 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
893
894 # Add to known label names
895 self._label_names.add(name)
896
897 # Return item
0e8e3169 898 return _Label(name, begin_text_loc)
71aaa3f7 899
2adf4336
PP
900 # Patterns for _try_parse_label_or_set_offset()
901 _label_set_offset_prefix_pat = re.compile(r"<\s*")
902 _label_set_offset_suffix_pat = re.compile(r"\s*>")
71aaa3f7 903
2adf4336
PP
904 # Tries to parse a label or an offset setting, returning an item on
905 # success.
906 def _try_parse_label_or_set_offset(self):
71aaa3f7 907 # Match prefix
2adf4336 908 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
71aaa3f7
PP
909 # No match
910 return
911
2adf4336
PP
912 # Offset setting item?
913 item = self._try_parse_set_offset_val()
71aaa3f7
PP
914
915 if item is None:
916 # Label item?
917 item = self._try_parse_label_name()
918
919 if item is None:
920 # At this point it's invalid
2adf4336 921 self._raise_error("Expecting a label name or an offset setting value")
71aaa3f7
PP
922
923 # Expect suffix
2adf4336 924 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
71aaa3f7
PP
925 return item
926
676f6189
PP
927 # Patterns for _try_parse_align_offset()
928 _align_offset_prefix_pat = re.compile(r"@\s*")
929 _align_offset_val_pat = re.compile(r"(\d+)\s*")
930 _align_offset_pad_val_prefix_pat = re.compile(r"~\s*")
931
932 # Tries to parse an offset alignment, returning an offset alignment
933 # item on success.
934 def _try_parse_align_offset(self):
935 begin_text_loc = self._text_loc
936
937 # Match prefix
938 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
939 # No match
940 return
941
942 align_text_loc = self._text_loc
943 m = self._expect_pat(
944 self._align_offset_val_pat,
945 "Expecting an alignment (positive multiple of eight bits)",
946 )
947
948 # Validate alignment
949 val = int(m.group(1))
950
951 if val <= 0 or (val % 8) != 0:
952 _raise_error(
953 "Invalid alignment value {} (not a positive multiple of eight)".format(
954 val
955 ),
956 align_text_loc,
957 )
958
959 # Padding value?
960 pad_val = 0
961
962 if self._try_parse_pat(self._align_offset_pad_val_prefix_pat) is not None:
963 pad_val_text_loc = self._text_loc
964 m = self._expect_pat(self._pos_const_int_pat, "Expecting a byte value")
965
966 # Validate
967 pad_val = int(m.group(0), 0)
968
969 if pad_val > 255:
970 _raise_error(
971 "Invalid padding byte value {}".format(pad_val),
972 pad_val_text_loc,
973 )
974
975 # Return item
976 return _AlignOffset(val, pad_val, begin_text_loc)
977
e57a18e1 978 # Patterns for _expect_rep_mul_expr()
27d52a19
PP
979 _rep_cond_expr_prefix_pat = re.compile(r"\{")
980 _rep_cond_expr_pat = re.compile(r"[^}]+")
981 _rep_cond_expr_suffix_pat = re.compile(r"\}")
982
983 # Parses the expression of a conditional block or of a repetition
984 # (block or post-item) and returns the expression string and AST
985 # node.
986 def _expect_rep_cond_expr(self, accept_int: bool):
e57a18e1
PP
987 expr_text_loc = self._text_loc
988
989 # Constant integer?
27d52a19
PP
990 m = None
991
992 if accept_int:
993 m = self._try_parse_pat(self._pos_const_int_pat)
e57a18e1
PP
994
995 if m is None:
996 # Name?
997 m = self._try_parse_pat(_py_name_pat)
998
999 if m is None:
1000 # Expression?
27d52a19
PP
1001 if self._try_parse_pat(self._rep_cond_expr_prefix_pat) is None:
1002 if accept_int:
1003 mid_msg = "a positive constant integer, a name, or `{`"
1004 else:
1005 mid_msg = "a name or `{`"
1006
e57a18e1 1007 # At this point it's invalid
27d52a19 1008 self._raise_error("Expecting {}".format(mid_msg))
e57a18e1
PP
1009
1010 # Expect an expression
1011 expr_text_loc = self._text_loc
27d52a19 1012 m = self._expect_pat(self._rep_cond_expr_pat, "Expecting an expression")
e57a18e1
PP
1013 expr_str = m.group(0)
1014
1015 # Expect `}`
27d52a19 1016 self._expect_pat(self._rep_cond_expr_suffix_pat, "Expecting `}`")
e57a18e1
PP
1017 else:
1018 expr_str = m.group(0)
1019 else:
1020 expr_str = m.group(0)
1021
1022 return self._ast_expr_from_str(expr_str, expr_text_loc)
1023
27d52a19
PP
1024 # Parses the multiplier expression of a repetition (block or
1025 # post-item) and returns the expression string and AST node.
1026 def _expect_rep_mul_expr(self):
1027 return self._expect_rep_cond_expr(True)
1028
1029 # Common block end pattern
1030 _block_end_pat = re.compile(r"!end\b\s*")
1031
e57a18e1
PP
1032 # Pattern for _try_parse_rep_block()
1033 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b\s*")
e57a18e1
PP
1034
1035 # Tries to parse a repetition block, returning a repetition item on
1036 # success.
1037 def _try_parse_rep_block(self):
1038 begin_text_loc = self._text_loc
1039
1040 # Match prefix
1041 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1042 # No match
1043 return
1044
1045 # Expect expression
1046 self._skip_ws_and_comments()
1047 expr_str, expr = self._expect_rep_mul_expr()
1048
1049 # Parse items
1050 self._skip_ws_and_comments()
1051 items_text_loc = self._text_loc
1052 items = self._parse_items()
1053
1054 # Expect end of block
1055 self._skip_ws_and_comments()
1056 self._expect_pat(
27d52a19 1057 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
e57a18e1
PP
1058 )
1059
1060 # Return item
1061 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1062
27d52a19
PP
1063 # Pattern for _try_parse_cond_block()
1064 _cond_block_prefix_pat = re.compile(r"!if\b\s*")
1065
1066 # Tries to parse a conditional block, returning a conditional item
1067 # on success.
1068 def _try_parse_cond_block(self):
1069 begin_text_loc = self._text_loc
1070
1071 # Match prefix
1072 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1073 # No match
1074 return
1075
1076 # Expect expression
1077 self._skip_ws_and_comments()
1078 expr_str, expr = self._expect_rep_cond_expr(False)
1079
1080 # Parse items
1081 self._skip_ws_and_comments()
1082 items_text_loc = self._text_loc
1083 items = self._parse_items()
1084
1085 # Expect end of block
1086 self._skip_ws_and_comments()
1087 self._expect_pat(
1088 self._block_end_pat,
1089 "Expecting an item or `!end` (end of conditional block)",
1090 )
1091
1092 # Return item
1093 return _Cond(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1094
71aaa3f7
PP
1095 # Tries to parse a base item (anything except a repetition),
1096 # returning it on success.
1097 def _try_parse_base_item(self):
1098 # Byte item?
1099 item = self._try_parse_byte()
1100
1101 if item is not None:
1102 return item
1103
1104 # String item?
1105 item = self._try_parse_str()
1106
1107 if item is not None:
1108 return item
1109
2adf4336
PP
1110 # Value, variable assignment, or byte order setting item?
1111 item = self._try_parse_val_or_var_assign_or_set_bo()
71aaa3f7
PP
1112
1113 if item is not None:
1114 return item
1115
2adf4336
PP
1116 # Label or offset setting item?
1117 item = self._try_parse_label_or_set_offset()
71aaa3f7 1118
676f6189
PP
1119 if item is not None:
1120 return item
1121
1122 # Offset alignment item?
1123 item = self._try_parse_align_offset()
1124
71aaa3f7
PP
1125 if item is not None:
1126 return item
1127
1128 # Group item?
1129 item = self._try_parse_group()
1130
1131 if item is not None:
1132 return item
1133
e57a18e1
PP
1134 # Repetition (block) item?
1135 item = self._try_parse_rep_block()
71aaa3f7 1136
e57a18e1
PP
1137 if item is not None:
1138 return item
1139
27d52a19
PP
1140 # Conditional block item?
1141 item = self._try_parse_cond_block()
1142
1143 if item is not None:
1144 return item
1145
e57a18e1
PP
1146 # Pattern for _try_parse_rep_post()
1147 _rep_post_prefix_pat = re.compile(r"\*")
1148
1149 # Tries to parse a post-item repetition, returning the expression
1150 # string and AST expression node on success.
1151 def _try_parse_rep_post(self):
71aaa3f7 1152 # Match prefix
e57a18e1 1153 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
71aaa3f7 1154 # No match
2adf4336 1155 return
71aaa3f7 1156
e57a18e1 1157 # Return expression string and AST expression
71aaa3f7 1158 self._skip_ws_and_comments()
e57a18e1 1159 return self._expect_rep_mul_expr()
71aaa3f7 1160
1ca7b5e1
PP
1161 # Tries to parse an item, possibly followed by a repetition,
1162 # returning `True` on success.
1163 #
1164 # Appends any parsed item to `items`.
1165 def _try_append_item(self, items: List[_Item]):
71aaa3f7
PP
1166 self._skip_ws_and_comments()
1167
1168 # Parse a base item
1169 item = self._try_parse_base_item()
1170
1171 if item is None:
1172 # No item
1ca7b5e1 1173 return False
71aaa3f7
PP
1174
1175 # Parse repetition if the base item is repeatable
1176 if isinstance(item, _RepableItem):
0e8e3169
PP
1177 self._skip_ws_and_comments()
1178 rep_text_loc = self._text_loc
e57a18e1 1179 rep_ret = self._try_parse_rep_post()
71aaa3f7 1180
2adf4336
PP
1181 if rep_ret is not None:
1182 item = _Rep(item, rep_ret[0], rep_ret[1], rep_text_loc)
71aaa3f7 1183
1ca7b5e1
PP
1184 items.append(item)
1185 return True
71aaa3f7
PP
1186
1187 # Parses and returns items, skipping whitespaces, insignificant
1188 # symbols, and comments when allowed, and stopping at the first
1189 # unknown character.
1190 def _parse_items(self) -> List[_Item]:
1191 items = [] # type: List[_Item]
1192
1193 while self._isnt_done():
1ca7b5e1
PP
1194 # Try to append item
1195 if not self._try_append_item(items):
1196 # Unknown at this point
1197 break
71aaa3f7
PP
1198
1199 return items
1200
1201 # Parses the whole Normand input, setting `self._res` to the main
1202 # group item on success.
1203 def _parse(self):
1204 if len(self._normand.strip()) == 0:
1205 # Special case to make sure there's something to consume
1206 self._res = _Group([], self._text_loc)
1207 return
1208
1209 # Parse first level items
1210 items = self._parse_items()
1211
1212 # Make sure there's nothing left
1213 self._skip_ws_and_comments()
1214
1215 if self._isnt_done():
1216 self._raise_error(
1217 "Unexpected character `{}`".format(self._normand[self._at])
1218 )
1219
1220 # Set main group item
1221 self._res = _Group(items, self._text_loc)
1222
1223
1224# The return type of parse().
1225class ParseResult:
1226 @classmethod
1227 def _create(
1228 cls,
1229 data: bytearray,
e57a18e1
PP
1230 variables: VariablesT,
1231 labels: LabelsT,
71aaa3f7
PP
1232 offset: int,
1233 bo: Optional[ByteOrder],
1234 ):
1235 self = cls.__new__(cls)
1236 self._init(data, variables, labels, offset, bo)
1237 return self
1238
1239 def __init__(self, *args, **kwargs): # type: ignore
1240 raise NotImplementedError
1241
1242 def _init(
1243 self,
1244 data: bytearray,
e57a18e1
PP
1245 variables: VariablesT,
1246 labels: LabelsT,
71aaa3f7
PP
1247 offset: int,
1248 bo: Optional[ByteOrder],
1249 ):
1250 self._data = data
1251 self._vars = variables
1252 self._labels = labels
1253 self._offset = offset
1254 self._bo = bo
1255
1256 # Generated data.
1257 @property
1258 def data(self):
1259 return self._data
1260
1261 # Dictionary of updated variable names to their last computed value.
1262 @property
1263 def variables(self):
1264 return self._vars
1265
1266 # Dictionary of updated main group label names to their computed
1267 # value.
1268 @property
1269 def labels(self):
1270 return self._labels
1271
1272 # Updated offset.
1273 @property
1274 def offset(self):
1275 return self._offset
1276
1277 # Updated byte order.
1278 @property
1279 def byte_order(self):
1280 return self._bo
1281
1282
1283# Raises a parse error for the item `item`, creating it using the
1284# message `msg`.
1285def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1286 _raise_error(msg, item.text_loc)
1287
1288
1289# The `ICITTE` reserved name.
1290_icitte_name = "ICITTE"
1291
1292
2adf4336
PP
1293# Base node visitor.
1294#
1295# Calls the _visit_name() method for each name node which isn't the name
1296# of a call.
1297class _NodeVisitor(ast.NodeVisitor):
1298 def __init__(self):
71aaa3f7
PP
1299 self._parent_is_call = False
1300
1301 def generic_visit(self, node: ast.AST):
1302 if type(node) is ast.Call:
1303 self._parent_is_call = True
1304 elif type(node) is ast.Name and not self._parent_is_call:
2adf4336 1305 self._visit_name(node.id)
71aaa3f7
PP
1306
1307 super().generic_visit(node)
1308 self._parent_is_call = False
1309
2adf4336
PP
1310 @abc.abstractmethod
1311 def _visit_name(self, name: str):
1312 ...
1313
71aaa3f7 1314
2adf4336
PP
1315# Expression validator: validates that all the names within the
1316# expression are allowed.
1317class _ExprValidator(_NodeVisitor):
e57a18e1 1318 def __init__(self, item: _ExprItemT, allowed_names: Set[str]):
2adf4336
PP
1319 super().__init__()
1320 self._item = item
1321 self._allowed_names = allowed_names
2adf4336
PP
1322
1323 def _visit_name(self, name: str):
1324 # Make sure the name refers to a known and reachable
1325 # variable/label name.
e57a18e1 1326 if name != _icitte_name and name not in self._allowed_names:
2adf4336
PP
1327 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1328 name, self._item.expr_str
1329 )
1330
05f81895 1331 allowed_names = self._allowed_names.copy()
e57a18e1 1332 allowed_names.add(_icitte_name)
2adf4336 1333
05f81895 1334 if len(allowed_names) > 0:
2adf4336
PP
1335 allowed_names_str = ", ".join(
1336 sorted(["`{}`".format(name) for name in allowed_names])
1337 )
1338 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1339
1340 _raise_error(
1341 msg,
1342 self._item.text_loc,
1343 )
1344
1345
1346# Expression visitor getting all the contained names.
1347class _ExprNamesVisitor(_NodeVisitor):
71aaa3f7 1348 def __init__(self):
2adf4336
PP
1349 self._parent_is_call = False
1350 self._names = set() # type: Set[str]
1351
1352 @property
1353 def names(self):
1354 return self._names
71aaa3f7 1355
2adf4336
PP
1356 def _visit_name(self, name: str):
1357 self._names.add(name)
71aaa3f7 1358
71aaa3f7 1359
2adf4336
PP
1360# Generator state.
1361class _GenState:
1362 def __init__(
1b8aa84a 1363 self,
e57a18e1
PP
1364 variables: VariablesT,
1365 labels: LabelsT,
1b8aa84a
PP
1366 offset: int,
1367 bo: Optional[ByteOrder],
2adf4336
PP
1368 ):
1369 self.variables = variables.copy()
1370 self.labels = labels.copy()
1371 self.offset = offset
1372 self.bo = bo
71aaa3f7
PP
1373
1374
2adf4336 1375# Generator of data and final state from a group item.
71aaa3f7
PP
1376#
1377# Generation happens in memory at construction time. After building, use
1378# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1379# get the resulting context.
2adf4336
PP
1380#
1381# The steps of generation are:
1382#
27d52a19
PP
1383# 1. Validate that each repetition, conditional, and LEB128 integer
1384# expression uses only reachable names.
2adf4336 1385#
27d52a19
PP
1386# 2. Compute and keep the effective repetition count, conditional value,
1387# and LEB128 integer value for each repetition and LEB128 integer
1388# instance.
2adf4336
PP
1389#
1390# 3. Generate bytes, updating the initial state as it goes which becomes
1391# the final state after the operation.
1392#
27d52a19
PP
1393# During the generation, when handling a `_Rep`, `_Cond`, or
1394# `_Leb128Int` item, we already have the effective repetition count,
1395# conditional value, or value of the instance.
2adf4336
PP
1396#
1397# When handling a `_Group` item, first update the current labels with
1398# all the immediate (not nested) labels, and then handle each
1399# contained item. This gives contained item access to "future" outer
1400# labels. Then remove the immediate labels from the state so that
1401# outer items don't have access to inner labels.
71aaa3f7
PP
1402class _Gen:
1403 def __init__(
1404 self,
1405 group: _Group,
e57a18e1
PP
1406 variables: VariablesT,
1407 labels: LabelsT,
71aaa3f7
PP
1408 offset: int,
1409 bo: Optional[ByteOrder],
1410 ):
05f81895
PP
1411 self._validate_vl_exprs(group, set(variables.keys()), set(labels.keys()))
1412 self._vl_instance_vals = self._compute_vl_instance_vals(
2adf4336
PP
1413 group, _GenState(variables, labels, offset, bo)
1414 )
1415 self._gen(group, _GenState(variables, labels, offset, bo))
71aaa3f7
PP
1416
1417 # Generated bytes.
1418 @property
1419 def data(self):
1420 return self._data
1421
1422 # Updated variables.
1423 @property
1424 def variables(self):
2adf4336 1425 return self._final_state.variables
71aaa3f7
PP
1426
1427 # Updated main group labels.
1428 @property
1429 def labels(self):
2adf4336 1430 return self._final_state.labels
71aaa3f7
PP
1431
1432 # Updated offset.
1433 @property
1434 def offset(self):
2adf4336 1435 return self._final_state.offset
71aaa3f7
PP
1436
1437 # Updated byte order.
1438 @property
1439 def bo(self):
2adf4336
PP
1440 return self._final_state.bo
1441
1442 # Returns the set of used, non-called names within the AST
1443 # expression `expr`.
1444 @staticmethod
1445 def _names_of_expr(expr: ast.Expression):
1446 visitor = _ExprNamesVisitor()
1447 visitor.visit(expr)
1448 return visitor.names
1449
27d52a19
PP
1450 # Validates that all the repetition, conditional, and LEB128 integer
1451 # expressions within `group` don't refer, directly or indirectly, to
1452 # subsequent labels.
71aaa3f7 1453 #
2adf4336
PP
1454 # The strategy here is to keep a set of allowed label names, per
1455 # group, initialized to `allowed_label_names`, and a set of allowed
1456 # variable names initialized to `allowed_variable_names`.
1457 #
1458 # Then, depending on the type of `item`:
1459 #
1460 # `_Label`:
1461 # Add its name to the local allowed label names: a label
1462 # occurring before a repetition, and not within a nested group,
1463 # is always reachable.
1464 #
1465 # `_VarAssign`:
1466 # If all the names within its expression are allowed, then add
1467 # its name to the allowed variable names.
1468 #
1469 # Otherwise, remove its name from the allowed variable names (if
1470 # it's in there): a variable which refers to an unreachable name
1471 # is unreachable itself.
1472 #
27d52a19 1473 # `_Rep`, `_Cond`, and `_Leb128`:
2adf4336
PP
1474 # Make sure all the names within its expression are allowed.
1475 #
1476 # `_Group`:
1477 # Call this function for each contained item with a _copy_ of
1478 # the current allowed label names and the same current allowed
1479 # variable names.
1480 @staticmethod
05f81895 1481 def _validate_vl_exprs(
2adf4336
PP
1482 item: _Item, allowed_variable_names: Set[str], allowed_label_names: Set[str]
1483 ):
1484 if type(item) is _Label:
1485 allowed_label_names.add(item.name)
1486 elif type(item) is _VarAssign:
1487 # Check if this variable name is allowed
1488 allowed = True
1489
1490 for name in _Gen._names_of_expr(item.expr):
1491 if name not in (
1492 allowed_label_names | allowed_variable_names | {_icitte_name}
1493 ):
1494 # Not allowed
1495 allowed = False
1496 break
1497
1498 if allowed:
1499 allowed_variable_names.add(item.name)
1500 elif item.name in allowed_variable_names:
1501 allowed_variable_names.remove(item.name)
05f81895 1502 elif isinstance(item, _Leb128Int):
e57a18e1
PP
1503 # Validate the expression
1504 _ExprValidator(item, allowed_label_names | allowed_variable_names).visit(
1505 item.expr
1506 )
27d52a19 1507 elif type(item) is _Rep or type(item) is _Cond:
e57a18e1
PP
1508 # Validate the expression first
1509 _ExprValidator(item, allowed_label_names | allowed_variable_names).visit(
1510 item.expr
1511 )
2adf4336
PP
1512
1513 # Validate inner item
05f81895 1514 _Gen._validate_vl_exprs(
2adf4336
PP
1515 item.item, allowed_variable_names, allowed_label_names
1516 )
1517 elif type(item) is _Group:
1518 # Copy `allowed_label_names` so that this frame cannot
1519 # access the nested label names.
1520 group_allowed_label_names = allowed_label_names.copy()
71aaa3f7 1521
2adf4336 1522 for subitem in item.items:
05f81895 1523 _Gen._validate_vl_exprs(
2adf4336
PP
1524 subitem, allowed_variable_names, group_allowed_label_names
1525 )
71aaa3f7 1526
2adf4336
PP
1527 # Evaluates the expression of `item` considering the current
1528 # generation state `state`.
1529 #
269f6eb3
PP
1530 # If `allow_float` is `True`, then the type of the result may be
1531 # `float` too.
2adf4336 1532 @staticmethod
269f6eb3
PP
1533 def _eval_item_expr(
1534 item: _ExprItemT,
1535 state: _GenState,
269f6eb3
PP
1536 allow_float: bool = False,
1537 ):
e57a18e1
PP
1538 syms = {} # type: VariablesT
1539 syms.update(state.labels)
71aaa3f7 1540
e57a18e1
PP
1541 # Set the `ICITTE` name to the current offset
1542 syms[_icitte_name] = state.offset
71aaa3f7
PP
1543
1544 # Add the current variables
2adf4336 1545 syms.update(state.variables)
71aaa3f7
PP
1546
1547 # Validate the node and its children
e57a18e1 1548 _ExprValidator(item, set(syms.keys())).visit(item.expr)
71aaa3f7
PP
1549
1550 # Compile and evaluate expression node
1551 try:
1552 val = eval(compile(item.expr, "", "eval"), None, syms)
1553 except Exception as exc:
1554 _raise_error_for_item(
1555 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1556 item,
1557 )
1558
27d52a19
PP
1559 # Convert `bool` result type to `int` to normalize
1560 if type(val) is bool:
1561 val = int(val)
1562
269f6eb3
PP
1563 # Validate result type
1564 expected_types = {int} # type: Set[type]
1565 type_msg = "`int`"
1566
1567 if allow_float:
1568 expected_types.add(float)
1569 type_msg += " or `float`"
1570
1571 if type(val) not in expected_types:
71aaa3f7 1572 _raise_error_for_item(
269f6eb3
PP
1573 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
1574 item.expr_str, type_msg, type(val).__name__
71aaa3f7
PP
1575 ),
1576 item,
1577 )
1578
1579 return val
1580
05f81895
PP
1581 # Returns the size, in bytes, required to encode the value `val`
1582 # with LEB128 (signed version if `is_signed` is `True`).
1583 @staticmethod
1584 def _leb128_size_for_val(val: int, is_signed: bool):
1585 if val < 0:
1586 # Equivalent upper bound.
1587 #
1588 # For example, if `val` is -128, then the full integer for
1589 # this number of bits would be [-128, 127].
1590 val = -val - 1
1591
1592 # Number of bits (add one for the sign if needed)
1593 bits = val.bit_length() + int(is_signed)
1594
1595 if bits == 0:
1596 bits = 1
1597
1598 # Seven bits per byte
1599 return math.ceil(bits / 7)
1600
676f6189
PP
1601 # Returns the offset `offset` aligned according to `item`.
1602 @staticmethod
1603 def _align_offset(offset: int, item: _AlignOffset):
1604 align_bytes = item.val // 8
1605 return (offset + align_bytes - 1) // align_bytes * align_bytes
1606
27d52a19
PP
1607 # Computes the effective value for each repetition, conditional, and
1608 # LEB128 integer instance, filling `instance_vals` (if not `None`)
1609 # and returning `instance_vals`.
2adf4336 1610 #
05f81895
PP
1611 # At this point it must be known that, for a given variable-length
1612 # item, its expression only contains reachable names.
2adf4336 1613 #
27d52a19
PP
1614 # When handling a `_Rep` or `_Cond` item, this function appends its
1615 # effective multiplier/value to `instance_vals` _before_ handling
1616 # its repeated/conditional item.
2adf4336 1617 #
05f81895
PP
1618 # When handling a `_VarAssign` item, this function only evaluates it
1619 # if all its names are reachable.
2adf4336 1620 @staticmethod
05f81895 1621 def _compute_vl_instance_vals(
2adf4336
PP
1622 item: _Item, state: _GenState, instance_vals: Optional[List[int]] = None
1623 ):
1624 if instance_vals is None:
1625 instance_vals = []
1626
1627 if isinstance(item, _ScalarItem):
1628 state.offset += item.size
1629 elif type(item) is _Label:
1630 state.labels[item.name] = state.offset
1631 elif type(item) is _VarAssign:
1632 # Check if all the names are reachable
1633 do_eval = True
1634
1635 for name in _Gen._names_of_expr(item.expr):
1636 if (
1637 name != _icitte_name
1638 and name not in state.variables
1639 and name not in state.labels
1640 ):
1641 # A name is unknown: cannot evaluate
1642 do_eval = False
1643 break
1644
1645 if do_eval:
1646 # Evaluate the expression and keep the result
e57a18e1 1647 state.variables[item.name] = _Gen._eval_item_expr(item, state, True)
2adf4336
PP
1648 elif type(item) is _SetOffset:
1649 state.offset = item.val
676f6189
PP
1650 elif type(item) is _AlignOffset:
1651 state.offset = _Gen._align_offset(state.offset, item)
05f81895
PP
1652 elif isinstance(item, _Leb128Int):
1653 # Evaluate the expression
e57a18e1 1654 val = _Gen._eval_item_expr(item, state)
05f81895
PP
1655
1656 # Validate result
1657 if type(item) is _ULeb128Int and val < 0:
1658 _raise_error_for_item(
1659 "Invalid expression `{}`: unexpected negative result {:,} for a ULEB128 encoding".format(
1660 item.expr_str, val
1661 ),
1662 item,
1663 )
1664
1665 # Add the evaluation result to the to variable-length item
1666 # instance values.
1667 instance_vals.append(val)
1668
1669 # Update offset
1670 state.offset += _Gen._leb128_size_for_val(val, type(item) is _SLeb128Int)
2adf4336
PP
1671 elif type(item) is _Rep:
1672 # Evaluate the expression and keep the result
e57a18e1 1673 val = _Gen._eval_item_expr(item, state)
2adf4336
PP
1674
1675 # Validate result
1676 if val < 0:
1677 _raise_error_for_item(
1678 "Invalid expression `{}`: unexpected negative result {:,}".format(
1679 item.expr_str, val
1680 ),
1681 item,
1682 )
1683
27d52a19 1684 # Add to variable-length item instance values
2adf4336
PP
1685 instance_vals.append(val)
1686
1687 # Process the repeated item `val` times
1688 for _ in range(val):
05f81895 1689 _Gen._compute_vl_instance_vals(item.item, state, instance_vals)
27d52a19
PP
1690 elif type(item) is _Cond:
1691 # Evaluate the expression and keep the result
1692 val = _Gen._eval_item_expr(item, state)
1693
1694 # Add to variable-length item instance values
1695 instance_vals.append(val)
1696
1697 # Process the conditional item if needed
1698 if val:
1699 _Gen._compute_vl_instance_vals(item.item, state, instance_vals)
2adf4336
PP
1700 elif type(item) is _Group:
1701 prev_labels = state.labels.copy()
1702
1703 # Process each item
1704 for subitem in item.items:
05f81895 1705 _Gen._compute_vl_instance_vals(subitem, state, instance_vals)
2adf4336
PP
1706
1707 state.labels = prev_labels
1708
1709 return instance_vals
1710
676f6189
PP
1711 def _update_offset_noop(self, item: _Item, state: _GenState, next_vl_instance: int):
1712 return next_vl_instance
05f81895 1713
676f6189
PP
1714 def _dry_handle_scalar_item(
1715 self, item: _ScalarItem, state: _GenState, next_vl_instance: int
1716 ):
1717 state.offset += item.size
1718 return next_vl_instance
2adf4336 1719
676f6189
PP
1720 def _dry_handle_leb128_int_item(
1721 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1722 ):
05f81895
PP
1723 # Get the value from `self._vl_instance_vals` _before_
1724 # incrementing `next_vl_instance` to honor the order of
1725 # _compute_vl_instance_vals().
676f6189
PP
1726 state.offset += self._leb128_size_for_val(
1727 self._vl_instance_vals[next_vl_instance], type(item) is _SLeb128Int
05f81895 1728 )
2adf4336 1729
676f6189 1730 return next_vl_instance + 1
2adf4336 1731
676f6189
PP
1732 def _dry_handle_group_item(
1733 self, item: _Group, state: _GenState, next_vl_instance: int
1734 ):
2adf4336 1735 for subitem in item.items:
676f6189 1736 next_vl_instance = self._dry_handle_item(subitem, state, next_vl_instance)
2adf4336 1737
676f6189 1738 return next_vl_instance
2adf4336 1739
676f6189 1740 def _dry_handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
05f81895
PP
1741 # Get the value from `self._vl_instance_vals` _before_
1742 # incrementing `next_vl_instance` to honor the order of
1743 # _compute_vl_instance_vals().
1744 mul = self._vl_instance_vals[next_vl_instance]
1745 next_vl_instance += 1
2adf4336
PP
1746
1747 for _ in range(mul):
676f6189 1748 next_vl_instance = self._dry_handle_item(item.item, state, next_vl_instance)
2adf4336 1749
676f6189 1750 return next_vl_instance
2adf4336 1751
27d52a19
PP
1752 def _dry_handle_cond_item(
1753 self, item: _Cond, state: _GenState, next_vl_instance: int
1754 ):
1755 # Get the value from `self._vl_instance_vals` _before_
1756 # incrementing `next_vl_instance` to honor the order of
1757 # _compute_vl_instance_vals().
1758 val = self._vl_instance_vals[next_vl_instance]
1759 next_vl_instance += 1
1760
1761 if val:
1762 next_vl_instance = self._dry_handle_item(item.item, state, next_vl_instance)
1763
1764 return next_vl_instance
1765
676f6189
PP
1766 def _dry_handle_align_offset_item(
1767 self, item: _AlignOffset, state: _GenState, next_vl_instance: int
1768 ):
1769 state.offset = self._align_offset(state.offset, item)
1770 return next_vl_instance
1771
1772 def _dry_handle_set_offset_item(
1773 self, item: _SetOffset, state: _GenState, next_vl_instance: int
1774 ):
1775 state.offset = item.val
1776 return next_vl_instance
1777
1778 # Updates `state.offset` considering the generated data of `item`,
1779 # without generating any, and returns the updated next
1780 # variable-length item instance.
1781 def _dry_handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
1782 return self._dry_handle_item_funcs[type(item)](item, state, next_vl_instance)
2adf4336
PP
1783
1784 # Handles the byte item `item`.
05f81895 1785 def _handle_byte_item(self, item: _Byte, state: _GenState, next_vl_instance: int):
2adf4336
PP
1786 self._data.append(item.val)
1787 state.offset += item.size
05f81895 1788 return next_vl_instance
2adf4336
PP
1789
1790 # Handles the string item `item`.
05f81895 1791 def _handle_str_item(self, item: _Str, state: _GenState, next_vl_instance: int):
2adf4336
PP
1792 self._data += item.data
1793 state.offset += item.size
05f81895 1794 return next_vl_instance
2adf4336
PP
1795
1796 # Handles the byte order setting item `item`.
1797 def _handle_set_bo_item(
05f81895 1798 self, item: _SetBo, state: _GenState, next_vl_instance: int
2adf4336
PP
1799 ):
1800 # Update current byte order
1801 state.bo = item.bo
05f81895 1802 return next_vl_instance
2adf4336
PP
1803
1804 # Handles the variable assignment item `item`.
1805 def _handle_var_assign_item(
05f81895 1806 self, item: _VarAssign, state: _GenState, next_vl_instance: int
2adf4336 1807 ):
71aaa3f7 1808 # Update variable
e57a18e1 1809 state.variables[item.name] = self._eval_item_expr(item, state, True)
05f81895 1810 return next_vl_instance
71aaa3f7 1811
05f81895 1812 # Handles the fixed-length integer item `item`.
269f6eb3 1813 def _handle_fl_int_item(self, val: int, item: _FlNum, state: _GenState):
71aaa3f7
PP
1814 # Validate range
1815 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1816 _raise_error_for_item(
1817 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
2adf4336 1818 val, item.len, item.expr_str, state.offset
71aaa3f7
PP
1819 ),
1820 item,
1821 )
1822
1823 # Encode result on 64 bits (to extend the sign bit whatever the
1824 # value of `item.len`).
71aaa3f7
PP
1825 data = struct.pack(
1826 "{}{}".format(
2adf4336 1827 ">" if state.bo in (None, ByteOrder.BE) else "<",
71aaa3f7
PP
1828 "Q" if val >= 0 else "q",
1829 ),
1830 val,
1831 )
1832
1833 # Keep only the requested length
1834 len_bytes = item.len // 8
1835
2adf4336 1836 if state.bo in (None, ByteOrder.BE):
71aaa3f7
PP
1837 # Big endian: keep last bytes
1838 data = data[-len_bytes:]
1839 else:
1840 # Little endian: keep first bytes
2adf4336 1841 assert state.bo == ByteOrder.LE
71aaa3f7
PP
1842 data = data[:len_bytes]
1843
1844 # Append to current bytes and update offset
1845 self._data += data
269f6eb3
PP
1846
1847 # Handles the fixed-length integer item `item`.
1848 def _handle_fl_float_item(self, val: float, item: _FlNum, state: _GenState):
1849 # Validate length
1850 if item.len not in (32, 64):
1851 _raise_error_for_item(
1852 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
1853 item.len, val
1854 ),
1855 item,
1856 )
1857
1858 # Encode result
1859 self._data += struct.pack(
1860 "{}{}".format(
1861 ">" if state.bo in (None, ByteOrder.BE) else "<",
1862 "f" if item.len == 32 else "d",
1863 ),
1864 val,
1865 )
1866
1867 # Handles the fixed-length number item `item`.
1868 def _handle_fl_num_item(
1869 self, item: _FlNum, state: _GenState, next_vl_instance: int
1870 ):
1871 # Compute value
e57a18e1 1872 val = self._eval_item_expr(item, state, True)
269f6eb3
PP
1873
1874 # Validate current byte order
1875 if state.bo is None and item.len > 8:
1876 _raise_error_for_item(
1877 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
1878 item.expr_str
1879 ),
1880 item,
1881 )
1882
1883 # Handle depending on type
1884 if type(val) is int:
1885 self._handle_fl_int_item(val, item, state)
1886 else:
1887 assert type(val) is float
1888 self._handle_fl_float_item(val, item, state)
1889
1890 # Update offset
1891 state.offset += item.size
1892
05f81895
PP
1893 return next_vl_instance
1894
1895 # Handles the LEB128 integer item `item`.
1896 def _handle_leb128_int_item(
1897 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1898 ):
1899 # Get the precomputed value
1900 val = self._vl_instance_vals[next_vl_instance]
1901
1902 # Size in bytes
1903 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
1904
1905 # For each byte
1906 for _ in range(size):
1907 # Seven LSBs, MSB of the byte set (continue)
1908 self._data.append((val & 0x7F) | 0x80)
1909 val >>= 7
1910
1911 # Clear MSB of last byte (stop)
1912 self._data[-1] &= ~0x80
1913
1914 # Consumed this instance
1915 return next_vl_instance + 1
71aaa3f7 1916
2adf4336
PP
1917 # Handles the group item `item`, only removing the immediate labels
1918 # from `state.labels` if `remove_immediate_labels` is `True`.
1919 def _handle_group_item(
1920 self,
1921 item: _Group,
1922 state: _GenState,
05f81895 1923 next_vl_instance: int,
2adf4336
PP
1924 remove_immediate_labels: bool = True,
1925 ):
1926 # Compute the values of the immediate (not nested) labels. Those
1927 # labels are reachable by any expression within the group.
676f6189 1928 tmp_state = _GenState({}, {}, state.offset, None)
2adf4336 1929 immediate_label_names = set() # type: Set[str]
05f81895 1930 tmp_next_vl_instance = next_vl_instance
71aaa3f7 1931
2adf4336 1932 for subitem in item.items:
676f6189 1933 if type(subitem) is _Label:
2adf4336 1934 # New immediate label
676f6189 1935 state.labels[subitem.name] = tmp_state.offset
2adf4336
PP
1936 immediate_label_names.add(subitem.name)
1937
676f6189
PP
1938 tmp_next_vl_instance = self._dry_handle_item(
1939 subitem, tmp_state, tmp_next_vl_instance
2adf4336 1940 )
71aaa3f7 1941
2adf4336 1942 # Handle each item now with the actual state
71aaa3f7 1943 for subitem in item.items:
05f81895 1944 next_vl_instance = self._handle_item(subitem, state, next_vl_instance)
2adf4336
PP
1945
1946 # Remove immediate labels if required so that outer items won't
1947 # reach inner labels.
1948 if remove_immediate_labels:
1949 for name in immediate_label_names:
1950 del state.labels[name]
71aaa3f7 1951
05f81895 1952 return next_vl_instance
71aaa3f7 1953
2adf4336 1954 # Handles the repetition item `item`.
05f81895
PP
1955 def _handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1956 # Get the precomputed repetition count
1957 mul = self._vl_instance_vals[next_vl_instance]
1958
1959 # Consumed this instance
1960 next_vl_instance += 1
71aaa3f7 1961
2adf4336 1962 for _ in range(mul):
05f81895 1963 next_vl_instance = self._handle_item(item.item, state, next_vl_instance)
71aaa3f7 1964
05f81895 1965 return next_vl_instance
71aaa3f7 1966
27d52a19
PP
1967 # Handles the conditional item `item`.
1968 def _handle_cond_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1969 # Get the precomputed conditional value
1970 val = self._vl_instance_vals[next_vl_instance]
1971
1972 # Consumed this instance
1973 next_vl_instance += 1
1974
1975 if val:
1976 next_vl_instance = self._handle_item(item.item, state, next_vl_instance)
1977
1978 return next_vl_instance
1979
2adf4336
PP
1980 # Handles the offset setting item `item`.
1981 def _handle_set_offset_item(
05f81895 1982 self, item: _SetOffset, state: _GenState, next_vl_instance: int
2adf4336
PP
1983 ):
1984 state.offset = item.val
05f81895 1985 return next_vl_instance
2adf4336 1986
676f6189
PP
1987 # Handles offset alignment item `item` (adds padding).
1988 def _handle_align_offset_item(
1989 self, item: _AlignOffset, state: _GenState, next_vl_instance: int
1990 ):
1991 init_offset = state.offset
1992 state.offset = self._align_offset(state.offset, item)
1993 self._data += bytes([item.pad_val] * (state.offset - init_offset))
1994 return next_vl_instance
1995
2adf4336 1996 # Handles the label item `item`.
05f81895
PP
1997 def _handle_label_item(self, item: _Label, state: _GenState, next_vl_instance: int):
1998 return next_vl_instance
2adf4336
PP
1999
2000 # Handles the item `item`, returning the updated next repetition
2001 # instance.
05f81895
PP
2002 def _handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
2003 return self._item_handlers[type(item)](item, state, next_vl_instance)
2adf4336
PP
2004
2005 # Generates the data (`self._data`) and final state
2006 # (`self._final_state`) from `group` and the initial state `state`.
2007 def _gen(self, group: _Group, state: _GenState):
71aaa3f7
PP
2008 # Initial state
2009 self._data = bytearray()
71aaa3f7
PP
2010
2011 # Item handlers
2012 self._item_handlers = {
676f6189 2013 _AlignOffset: self._handle_align_offset_item,
71aaa3f7 2014 _Byte: self._handle_byte_item,
27d52a19 2015 _Cond: self._handle_cond_item,
269f6eb3 2016 _FlNum: self._handle_fl_num_item,
71aaa3f7 2017 _Group: self._handle_group_item,
2adf4336 2018 _Label: self._handle_label_item,
71aaa3f7 2019 _Rep: self._handle_rep_item,
2adf4336
PP
2020 _SetBo: self._handle_set_bo_item,
2021 _SetOffset: self._handle_set_offset_item,
05f81895 2022 _SLeb128Int: self._handle_leb128_int_item,
2adf4336 2023 _Str: self._handle_str_item,
05f81895 2024 _ULeb128Int: self._handle_leb128_int_item,
2adf4336
PP
2025 _VarAssign: self._handle_var_assign_item,
2026 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
2027
676f6189
PP
2028 # Dry item handlers (only updates the state offset)
2029 self._dry_handle_item_funcs = {
2030 _AlignOffset: self._dry_handle_align_offset_item,
2031 _Byte: self._dry_handle_scalar_item,
27d52a19 2032 _Cond: self._dry_handle_cond_item,
676f6189
PP
2033 _FlNum: self._dry_handle_scalar_item,
2034 _Group: self._dry_handle_group_item,
2035 _Label: self._update_offset_noop,
2036 _Rep: self._dry_handle_rep_item,
2037 _SetBo: self._update_offset_noop,
2038 _SetOffset: self._dry_handle_set_offset_item,
2039 _SLeb128Int: self._dry_handle_leb128_int_item,
2040 _Str: self._dry_handle_scalar_item,
2041 _ULeb128Int: self._dry_handle_leb128_int_item,
2042 _VarAssign: self._update_offset_noop,
2043 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
2adf4336
PP
2044
2045 # Handle the group item, _not_ removing the immediate labels
2046 # because the `labels` property offers them.
2047 self._handle_group_item(group, state, 0, False)
2048
2049 # This is actually the final state
2050 self._final_state = state
71aaa3f7
PP
2051
2052
2053# Returns a `ParseResult` instance containing the bytes encoded by the
2054# input string `normand`.
2055#
2056# `init_variables` is a dictionary of initial variable names (valid
2057# Python names) to integral values. A variable name must not be the
2058# reserved name `ICITTE`.
2059#
2060# `init_labels` is a dictionary of initial label names (valid Python
2061# names) to integral values. A label name must not be the reserved name
2062# `ICITTE`.
2063#
2064# `init_offset` is the initial offset.
2065#
2066# `init_byte_order` is the initial byte order.
2067#
2068# Raises `ParseError` on any parsing error.
2069def parse(
2070 normand: str,
e57a18e1
PP
2071 init_variables: Optional[VariablesT] = None,
2072 init_labels: Optional[LabelsT] = None,
71aaa3f7
PP
2073 init_offset: int = 0,
2074 init_byte_order: Optional[ByteOrder] = None,
2075):
2076 if init_variables is None:
2077 init_variables = {}
2078
2079 if init_labels is None:
2080 init_labels = {}
2081
2082 gen = _Gen(
2083 _Parser(normand, init_variables, init_labels).res,
2084 init_variables,
2085 init_labels,
2086 init_offset,
2087 init_byte_order,
2088 )
2089 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2090 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2091 )
2092
2093
2094# Parses the command-line arguments.
2095def _parse_cli_args():
2096 import argparse
2097
2098 # Build parser
2099 ap = argparse.ArgumentParser()
2100 ap.add_argument(
2101 "--offset",
2102 metavar="OFFSET",
2103 action="store",
2104 type=int,
2105 default=0,
2106 help="initial offset (positive)",
2107 )
2108 ap.add_argument(
2109 "-b",
2110 "--byte-order",
2111 metavar="BO",
2112 choices=["be", "le"],
2113 type=str,
2114 help="initial byte order (`be` or `le`)",
2115 )
2116 ap.add_argument(
2117 "--var",
2118 metavar="NAME=VAL",
2119 action="append",
2120 help="add an initial variable (may be repeated)",
2121 )
2122 ap.add_argument(
2123 "-l",
2124 "--label",
2125 metavar="NAME=VAL",
2126 action="append",
2127 help="add an initial label (may be repeated)",
2128 )
2129 ap.add_argument(
2130 "--version", action="version", version="Normand {}".format(__version__)
2131 )
2132 ap.add_argument(
2133 "path",
2134 metavar="PATH",
2135 action="store",
2136 nargs="?",
2137 help="input path (none means standard input)",
2138 )
2139
2140 # Parse
2141 return ap.parse_args()
2142
2143
2144# Raises a command-line error with the message `msg`.
2145def _raise_cli_error(msg: str) -> NoReturn:
2146 raise RuntimeError("Command-line error: {}".format(msg))
2147
2148
2149# Returns a dictionary of string to integers from the list of strings
2150# `args` containing `NAME=VAL` entries.
2151def _dict_from_arg(args: Optional[List[str]]):
e57a18e1 2152 d = {} # type: LabelsT
71aaa3f7
PP
2153
2154 if args is None:
2155 return d
2156
2157 for arg in args:
2158 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
2159
2160 if m is None:
2161 _raise_cli_error("Invalid assignment {}".format(arg))
2162
2e1c1acd
PP
2163 d[m.group(1)] = int(m.group(2))
2164
71aaa3f7
PP
2165 return d
2166
2167
2168# CLI entry point without exception handling.
2169def _try_run_cli():
2170 import os.path
2171
2172 # Parse arguments
2173 args = _parse_cli_args()
2174
2175 # Read input
2176 if args.path is None:
2177 normand = sys.stdin.read()
2178 else:
2179 with open(args.path) as f:
2180 normand = f.read()
2181
2182 # Variables and labels
e57a18e1 2183 variables = typing.cast(VariablesT, _dict_from_arg(args.var))
71aaa3f7
PP
2184 labels = _dict_from_arg(args.label)
2185
2186 # Validate offset
2187 if args.offset < 0:
2188 _raise_cli_error("Invalid negative offset {}")
2189
2190 # Validate and set byte order
2191 bo = None # type: Optional[ByteOrder]
2192
2193 if args.byte_order is not None:
2194 if args.byte_order == "be":
2195 bo = ByteOrder.BE
2196 else:
2197 assert args.byte_order == "le"
2198 bo = ByteOrder.LE
2199
2200 # Parse
2201 try:
2202 res = parse(normand, variables, labels, args.offset, bo)
2203 except ParseError as exc:
2204 prefix = ""
2205
2206 if args.path is not None:
2207 prefix = "{}:".format(os.path.abspath(args.path))
2208
2209 _fail(
2210 "{}{}:{} - {}".format(
2211 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
2212 )
2213 )
2214
2215 # Print
2216 sys.stdout.buffer.write(res.data)
2217
2218
2219# Prints the exception message `msg` and exits with status 1.
2220def _fail(msg: str) -> NoReturn:
2221 if not msg.endswith("."):
2222 msg += "."
2223
2224 print(msg, file=sys.stderr)
2225 sys.exit(1)
2226
2227
2228# CLI entry point.
2229def _run_cli():
2230 try:
2231 _try_run_cli()
2232 except Exception as exc:
2233 _fail(str(exc))
2234
2235
2236if __name__ == "__main__":
2237 _run_cli()
This page took 0.139566 seconds and 4 git commands to generate.