Add many string features
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
7a7b31e8 33__version__ = "0.19.0"
71aaa3f7 34__all__ = [
320644e2
PP
35 "__author__",
36 "__version__",
71aaa3f7 37 "ByteOrder",
320644e2 38 "LabelsT",
71aaa3f7
PP
39 "parse",
40 "ParseError",
f5dcb24c 41 "ParseErrorMessage",
71aaa3f7 42 "ParseResult",
e57a18e1 43 "TextLocation",
e57a18e1 44 "VariablesT",
71aaa3f7
PP
45]
46
47import re
48import abc
49import ast
50import sys
320644e2 51import copy
71aaa3f7 52import enum
05f81895 53import math
71aaa3f7 54import struct
e57a18e1
PP
55import typing
56from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
71aaa3f7
PP
57
58
59# Text location (line and column numbers).
e57a18e1 60class TextLocation:
71aaa3f7
PP
61 @classmethod
62 def _create(cls, line_no: int, col_no: int):
63 self = cls.__new__(cls)
64 self._init(line_no, col_no)
65 return self
66
67 def __init__(*args, **kwargs): # type: ignore
68 raise NotImplementedError
69
70 def _init(self, line_no: int, col_no: int):
71 self._line_no = line_no
72 self._col_no = col_no
73
74 # Line number.
75 @property
76 def line_no(self):
77 return self._line_no
78
79 # Column number.
80 @property
81 def col_no(self):
82 return self._col_no
83
2adf4336 84 def __repr__(self):
e57a18e1 85 return "TextLocation({}, {})".format(self._line_no, self._col_no)
2adf4336 86
71aaa3f7
PP
87
88# Any item.
89class _Item:
e57a18e1 90 def __init__(self, text_loc: TextLocation):
71aaa3f7
PP
91 self._text_loc = text_loc
92
93 # Source text location.
94 @property
95 def text_loc(self):
96 return self._text_loc
97
2adf4336
PP
98
99# Scalar item.
100class _ScalarItem(_Item):
71aaa3f7
PP
101 # Returns the size, in bytes, of this item.
102 @property
103 @abc.abstractmethod
104 def size(self) -> int:
105 ...
106
107
108# A repeatable item.
2adf4336 109class _RepableItem:
71aaa3f7
PP
110 pass
111
112
113# Single byte.
2adf4336 114class _Byte(_ScalarItem, _RepableItem):
e57a18e1 115 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
116 super().__init__(text_loc)
117 self._val = val
118
119 # Byte value.
120 @property
121 def val(self):
122 return self._val
123
124 @property
125 def size(self):
126 return 1
127
128 def __repr__(self):
676f6189 129 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
71aaa3f7
PP
130
131
7a7b31e8
PP
132# Literal string.
133class _LitStr(_ScalarItem, _RepableItem):
e57a18e1 134 def __init__(self, data: bytes, text_loc: TextLocation):
71aaa3f7
PP
135 super().__init__(text_loc)
136 self._data = data
137
138 # Encoded bytes.
139 @property
140 def data(self):
141 return self._data
142
143 @property
144 def size(self):
145 return len(self._data)
146
147 def __repr__(self):
7a7b31e8 148 return "_LitStr({}, {})".format(repr(self._data), repr(self._text_loc))
71aaa3f7
PP
149
150
151# Byte order.
152@enum.unique
153class ByteOrder(enum.Enum):
154 # Big endian.
155 BE = "be"
156
157 # Little endian.
158 LE = "le"
159
160
2adf4336
PP
161# Byte order setting.
162class _SetBo(_Item):
e57a18e1 163 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
0e8e3169 164 super().__init__(text_loc)
71aaa3f7
PP
165 self._bo = bo
166
167 @property
168 def bo(self):
169 return self._bo
170
2adf4336 171 def __repr__(self):
676f6189 172 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
71aaa3f7
PP
173
174
175# Label.
176class _Label(_Item):
e57a18e1 177 def __init__(self, name: str, text_loc: TextLocation):
71aaa3f7
PP
178 super().__init__(text_loc)
179 self._name = name
180
181 # Label name.
182 @property
183 def name(self):
184 return self._name
185
71aaa3f7 186 def __repr__(self):
676f6189 187 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
71aaa3f7
PP
188
189
2adf4336
PP
190# Offset setting.
191class _SetOffset(_Item):
e57a18e1 192 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
193 super().__init__(text_loc)
194 self._val = val
195
676f6189 196 # Offset value (bytes).
71aaa3f7
PP
197 @property
198 def val(self):
199 return self._val
200
71aaa3f7 201 def __repr__(self):
676f6189
PP
202 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
203
204
205# Offset alignment.
206class _AlignOffset(_Item):
e57a18e1 207 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
676f6189
PP
208 super().__init__(text_loc)
209 self._val = val
210 self._pad_val = pad_val
211
212 # Alignment value (bits).
213 @property
214 def val(self):
215 return self._val
216
217 # Padding byte value.
218 @property
219 def pad_val(self):
220 return self._pad_val
221
222 def __repr__(self):
223 return "_AlignOffset({}, {}, {})".format(
224 repr(self._val), repr(self._pad_val), repr(self._text_loc)
225 )
71aaa3f7
PP
226
227
228# Mixin of containing an AST expression and its string.
229class _ExprMixin:
230 def __init__(self, expr_str: str, expr: ast.Expression):
231 self._expr_str = expr_str
232 self._expr = expr
233
234 # Expression string.
235 @property
236 def expr_str(self):
237 return self._expr_str
238
239 # Expression node to evaluate.
240 @property
241 def expr(self):
242 return self._expr
243
244
25ca454b
PP
245# Fill until some offset.
246class _FillUntil(_Item, _ExprMixin):
247 def __init__(
248 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
249 ):
250 super().__init__(text_loc)
251 _ExprMixin.__init__(self, expr_str, expr)
252 self._pad_val = pad_val
253
254 # Padding byte value.
255 @property
256 def pad_val(self):
257 return self._pad_val
258
259 def __repr__(self):
260 return "_FillUntil({}, {}, {}, {})".format(
261 repr(self._expr_str),
262 repr(self._expr),
263 repr(self._pad_val),
264 repr(self._text_loc),
265 )
266
267
2adf4336
PP
268# Variable assignment.
269class _VarAssign(_Item, _ExprMixin):
71aaa3f7 270 def __init__(
e57a18e1 271 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
71aaa3f7
PP
272 ):
273 super().__init__(text_loc)
274 _ExprMixin.__init__(self, expr_str, expr)
275 self._name = name
276
277 # Name.
278 @property
279 def name(self):
280 return self._name
281
71aaa3f7 282 def __repr__(self):
2adf4336 283 return "_VarAssign({}, {}, {}, {})".format(
676f6189
PP
284 repr(self._name),
285 repr(self._expr_str),
286 repr(self._expr),
287 repr(self._text_loc),
71aaa3f7
PP
288 )
289
290
269f6eb3
PP
291# Fixed-length number, possibly needing more than one byte.
292class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
71aaa3f7 293 def __init__(
e57a18e1 294 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
71aaa3f7
PP
295 ):
296 super().__init__(text_loc)
297 _ExprMixin.__init__(self, expr_str, expr)
298 self._len = len
299
300 # Length (bits).
301 @property
302 def len(self):
303 return self._len
304
305 @property
306 def size(self):
307 return self._len // 8
308
309 def __repr__(self):
269f6eb3 310 return "_FlNum({}, {}, {}, {})".format(
676f6189
PP
311 repr(self._expr_str),
312 repr(self._expr),
313 repr(self._len),
314 repr(self._text_loc),
71aaa3f7
PP
315 )
316
317
05f81895
PP
318# LEB128 integer.
319class _Leb128Int(_Item, _RepableItem, _ExprMixin):
e57a18e1 320 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
05f81895
PP
321 super().__init__(text_loc)
322 _ExprMixin.__init__(self, expr_str, expr)
323
324 def __repr__(self):
325 return "{}({}, {}, {})".format(
326 self.__class__.__name__,
327 repr(self._expr_str),
328 repr(self._expr),
676f6189 329 repr(self._text_loc),
05f81895
PP
330 )
331
332
333# Unsigned LEB128 integer.
334class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
335 pass
336
337
338# Signed LEB128 integer.
339class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
340 pass
341
342
7a7b31e8
PP
343# String.
344class _Str(_Item, _RepableItem, _ExprMixin):
345 def __init__(
346 self, expr_str: str, expr: ast.Expression, codec: str, text_loc: TextLocation
347 ):
348 super().__init__(text_loc)
349 _ExprMixin.__init__(self, expr_str, expr)
350 self._codec = codec
351
352 # Codec name.
353 @property
354 def codec(self):
355 return self._codec
356
357 def __repr__(self):
358 return "_Str({}, {}, {}, {})".format(
359 self.__class__.__name__,
360 repr(self._expr_str),
361 repr(self._expr),
362 repr(self._codec),
363 repr(self._text_loc),
364 )
365
366
71aaa3f7 367# Group of items.
2adf4336 368class _Group(_Item, _RepableItem):
e57a18e1 369 def __init__(self, items: List[_Item], text_loc: TextLocation):
71aaa3f7
PP
370 super().__init__(text_loc)
371 self._items = items
71aaa3f7
PP
372
373 # Contained items.
374 @property
375 def items(self):
376 return self._items
377
71aaa3f7 378 def __repr__(self):
676f6189 379 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
71aaa3f7
PP
380
381
382# Repetition item.
2adf4336
PP
383class _Rep(_Item, _ExprMixin):
384 def __init__(
e57a18e1 385 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
2adf4336 386 ):
71aaa3f7 387 super().__init__(text_loc)
2adf4336 388 _ExprMixin.__init__(self, expr_str, expr)
71aaa3f7 389 self._item = item
71aaa3f7
PP
390
391 # Item to repeat.
392 @property
393 def item(self):
394 return self._item
395
71aaa3f7 396 def __repr__(self):
2adf4336 397 return "_Rep({}, {}, {}, {})".format(
676f6189
PP
398 repr(self._item),
399 repr(self._expr_str),
400 repr(self._expr),
401 repr(self._text_loc),
71aaa3f7
PP
402 )
403
404
27d52a19
PP
405# Conditional item.
406class _Cond(_Item, _ExprMixin):
407 def __init__(
12b5dbc0
PP
408 self,
409 true_item: _Item,
410 false_item: _Item,
411 expr_str: str,
412 expr: ast.Expression,
413 text_loc: TextLocation,
27d52a19
PP
414 ):
415 super().__init__(text_loc)
416 _ExprMixin.__init__(self, expr_str, expr)
12b5dbc0
PP
417 self._true_item = true_item
418 self._false_item = false_item
27d52a19 419
12b5dbc0 420 # Item when condition is true.
27d52a19 421 @property
12b5dbc0
PP
422 def true_item(self):
423 return self._true_item
424
425 # Item when condition is false.
426 @property
427 def false_item(self):
428 return self._false_item
27d52a19
PP
429
430 def __repr__(self):
12b5dbc0
PP
431 return "_Cond({}, {}, {}, {}, {})".format(
432 repr(self._true_item),
433 repr(self._false_item),
27d52a19
PP
434 repr(self._expr_str),
435 repr(self._expr),
436 repr(self._text_loc),
437 )
438
439
320644e2
PP
440# Macro definition item.
441class _MacroDef(_Item):
442 def __init__(
443 self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation
444 ):
445 super().__init__(text_loc)
446 self._name = name
447 self._param_names = param_names
448 self._group = group
449
450 # Name.
451 @property
452 def name(self):
453 return self._name
454
455 # Parameters.
456 @property
457 def param_names(self):
458 return self._param_names
459
460 # Contained items.
461 @property
462 def group(self):
463 return self._group
464
465 def __repr__(self):
466 return "_MacroDef({}, {}, {}, {})".format(
467 repr(self._name),
468 repr(self._param_names),
469 repr(self._group),
470 repr(self._text_loc),
471 )
472
473
474# Macro expansion parameter.
475class _MacroExpParam:
476 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
477 self._expr_str = expr_str
478 self._expr = expr
479 self._text_loc = text_loc
480
481 # Expression string.
482 @property
483 def expr_str(self):
484 return self._expr_str
485
486 # Expression.
487 @property
488 def expr(self):
489 return self._expr
490
491 # Source text location.
492 @property
493 def text_loc(self):
494 return self._text_loc
495
496 def __repr__(self):
497 return "_MacroExpParam({}, {}, {})".format(
498 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
499 )
500
501
502# Macro expansion item.
503class _MacroExp(_Item, _RepableItem):
504 def __init__(
505 self,
506 name: str,
507 params: List[_MacroExpParam],
508 text_loc: TextLocation,
509 ):
510 super().__init__(text_loc)
511 self._name = name
512 self._params = params
513
514 # Name.
515 @property
516 def name(self):
517 return self._name
518
519 # Parameters.
520 @property
521 def params(self):
522 return self._params
523
524 def __repr__(self):
525 return "_MacroExp({}, {}, {})".format(
526 repr(self._name),
527 repr(self._params),
528 repr(self._text_loc),
529 )
2adf4336
PP
530
531
f5dcb24c
PP
532# A parsing error message: a string and a text location.
533class ParseErrorMessage:
534 @classmethod
535 def _create(cls, text: str, text_loc: TextLocation):
536 self = cls.__new__(cls)
537 self._init(text, text_loc)
538 return self
539
540 def __init__(self, *args, **kwargs): # type: ignore
541 raise NotImplementedError
542
543 def _init(self, text: str, text_loc: TextLocation):
544 self._text = text
545 self._text_loc = text_loc
546
547 # Message text.
548 @property
549 def text(self):
550 return self._text
551
552 # Source text location.
553 @property
554 def text_location(self):
555 return self._text_loc
556
557
558# A parsing error containing one or more messages (`ParseErrorMessage`).
71aaa3f7
PP
559class ParseError(RuntimeError):
560 @classmethod
e57a18e1 561 def _create(cls, msg: str, text_loc: TextLocation):
71aaa3f7
PP
562 self = cls.__new__(cls)
563 self._init(msg, text_loc)
564 return self
565
566 def __init__(self, *args, **kwargs): # type: ignore
567 raise NotImplementedError
568
e57a18e1 569 def _init(self, msg: str, text_loc: TextLocation):
71aaa3f7 570 super().__init__(msg)
f5dcb24c
PP
571 self._msgs = [] # type: List[ParseErrorMessage]
572 self._add_msg(msg, text_loc)
71aaa3f7 573
f5dcb24c
PP
574 def _add_msg(self, msg: str, text_loc: TextLocation):
575 self._msgs.append(
576 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
577 msg, text_loc
578 )
579 )
580
581 # Parsing error messages.
582 #
583 # The first message is the most specific one.
71aaa3f7 584 @property
f5dcb24c
PP
585 def messages(self):
586 return self._msgs
71aaa3f7
PP
587
588
589# Raises a parsing error, forwarding the parameters to the constructor.
e57a18e1 590def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
71aaa3f7
PP
591 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
592
593
f5dcb24c
PP
594# Adds a message to the parsing error `exc`.
595def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
596 exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage]
597
598
599# Appends a message to the parsing error `exc` and reraises it.
600def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
601 _add_error_msg(exc, msg, text_loc)
602 raise exc
603
604
b2410769
PP
605# Returns a normalized version (so as to be parseable by int()) of
606# the constant integer string `s`, possibly negative, dealing with
607# any radix suffix.
608def _norm_const_int(s: str):
609 neg = ""
610 pos = s
611
612 if s.startswith("-"):
613 neg = "-"
614 pos = s[1:]
615
616 for r in "xXoObB":
617 if pos.startswith("0" + r):
618 # Already correct
619 return s
620
621 # Try suffix
622 asm_suf_base = {
623 "h": "x",
624 "H": "x",
625 "q": "o",
626 "Q": "o",
627 "o": "o",
628 "O": "o",
629 "b": "b",
630 "B": "B",
631 }
632
633 for suf in asm_suf_base:
634 if pos[-1] == suf:
635 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
636
637 return s
638
639
7a7b31e8
PP
640# Encodes the string `s` using the codec `codec`, raising `ParseError`
641# with `text_loc` on encoding error.
642def _encode_str(s: str, codec: str, text_loc: TextLocation):
643 try:
644 return s.encode(codec)
645 except UnicodeEncodeError:
646 _raise_error(
647 "Cannot encode `{}` with the `{}` encoding".format(s, codec), text_loc
648 )
649
650
e57a18e1 651# Variables dictionary type (for type hints).
7a7b31e8 652VariablesT = Dict[str, Union[int, float, str]]
e57a18e1
PP
653
654
655# Labels dictionary type (for type hints).
656LabelsT = Dict[str, int]
71aaa3f7
PP
657
658
b2410769 659# Common patterns.
71aaa3f7 660_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
b2410769
PP
661_pos_const_int_pat = re.compile(
662 r"(?:0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+)\b"
663)
664_const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
665_const_float_pat = re.compile(
666 r"[-+]?(?:(?:\d*\.\d+)|(?:\d+\.))(?:[Ee][+-]?\d+)?(?=\W|)"
667)
71aaa3f7
PP
668
669
320644e2
PP
670# Macro definition dictionary.
671_MacroDefsT = Dict[str, _MacroDef]
672
673
71aaa3f7
PP
674# Normand parser.
675#
676# The constructor accepts a Normand input. After building, use the `res`
677# property to get the resulting main group.
678class _Parser:
679 # Builds a parser to parse the Normand input `normand`, parsing
680 # immediately.
e57a18e1 681 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
71aaa3f7
PP
682 self._normand = normand
683 self._at = 0
684 self._line_no = 1
685 self._col_no = 1
686 self._label_names = set(labels.keys())
687 self._var_names = set(variables.keys())
320644e2 688 self._macro_defs = {} # type: _MacroDefsT
71aaa3f7
PP
689 self._parse()
690
691 # Result (main group).
692 @property
693 def res(self):
694 return self._res
695
320644e2
PP
696 # Macro definitions.
697 @property
698 def macro_defs(self):
699 return self._macro_defs
700
71aaa3f7
PP
701 # Current text location.
702 @property
703 def _text_loc(self):
e57a18e1 704 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
71aaa3f7
PP
705 self._line_no, self._col_no
706 )
707
708 # Returns `True` if this parser is done parsing.
709 def _is_done(self):
710 return self._at == len(self._normand)
711
712 # Returns `True` if this parser isn't done parsing.
713 def _isnt_done(self):
714 return not self._is_done()
715
716 # Raises a parse error, creating it using the message `msg` and the
717 # current text location.
718 def _raise_error(self, msg: str) -> NoReturn:
719 _raise_error(msg, self._text_loc)
720
721 # Tries to make the pattern `pat` match the current substring,
722 # returning the match object and updating `self._at`,
723 # `self._line_no`, and `self._col_no` on success.
724 def _try_parse_pat(self, pat: Pattern[str]):
725 m = pat.match(self._normand, self._at)
726
727 if m is None:
728 return
729
730 # Skip matched string
731 self._at += len(m.group(0))
732
733 # Update line number
734 self._line_no += m.group(0).count("\n")
735
736 # Update column number
737 for i in reversed(range(self._at)):
738 if self._normand[i] == "\n" or i == 0:
739 if i == 0:
740 self._col_no = self._at + 1
741 else:
742 self._col_no = self._at - i
743
744 break
745
746 # Return match object
747 return m
748
749 # Expects the pattern `pat` to match the current substring,
750 # returning the match object and updating `self._at`,
751 # `self._line_no`, and `self._col_no` on success, or raising a parse
752 # error with the message `error_msg` on error.
753 def _expect_pat(self, pat: Pattern[str], error_msg: str):
754 # Match
755 m = self._try_parse_pat(pat)
756
757 if m is None:
758 # No match: error
759 self._raise_error(error_msg)
760
761 # Return match object
762 return m
763
764 # Pattern for _skip_ws_and_comments()
765 _ws_or_syms_or_comments_pat = re.compile(
25ca454b 766 r"(?:[\s/\\?&:;.,[\]_=|-]|#[^#]*?(?:\n|#))*"
71aaa3f7
PP
767 )
768
769 # Skips as many whitespaces, insignificant symbol characters, and
770 # comments as possible.
771 def _skip_ws_and_comments(self):
772 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
773
320644e2
PP
774 # Pattern for _skip_ws()
775 _ws_pat = re.compile(r"\s*")
776
777 # Skips as many whitespaces as possible.
778 def _skip_ws(self):
779 self._try_parse_pat(self._ws_pat)
780
71aaa3f7
PP
781 # Pattern for _try_parse_hex_byte()
782 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
783
784 # Tries to parse a hexadecimal byte, returning a byte item on
785 # success.
786 def _try_parse_hex_byte(self):
0e8e3169
PP
787 begin_text_loc = self._text_loc
788
71aaa3f7
PP
789 # Match initial nibble
790 m_high = self._try_parse_pat(self._nibble_pat)
791
792 if m_high is None:
793 # No match
794 return
795
796 # Expect another nibble
797 self._skip_ws_and_comments()
798 m_low = self._expect_pat(
799 self._nibble_pat, "Expecting another hexadecimal nibble"
800 )
801
802 # Return item
0e8e3169 803 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
804
805 # Patterns for _try_parse_bin_byte()
806 _bin_byte_bit_pat = re.compile(r"[01]")
6dd69a2a 807 _bin_byte_prefix_pat = re.compile(r"%+")
71aaa3f7
PP
808
809 # Tries to parse a binary byte, returning a byte item on success.
810 def _try_parse_bin_byte(self):
0e8e3169
PP
811 begin_text_loc = self._text_loc
812
71aaa3f7 813 # Match prefix
6dd69a2a
PP
814 m = self._try_parse_pat(self._bin_byte_prefix_pat)
815
816 if m is None:
71aaa3f7
PP
817 # No match
818 return
819
6dd69a2a
PP
820 # Expect as many bytes as there are `%` prefixes
821 items = [] # type: List[_Item]
71aaa3f7 822
6dd69a2a 823 for _ in range(len(m.group(0))):
71aaa3f7 824 self._skip_ws_and_comments()
6dd69a2a
PP
825 byte_text_loc = self._text_loc
826 bits = [] # type: List[str]
827
828 # Expect eight bits
829 for _ in range(8):
830 self._skip_ws_and_comments()
831 m = self._expect_pat(
832 self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)"
833 )
834 bits.append(m.group(0))
835
836 items.append(_Byte(int("".join(bits), 2), byte_text_loc))
71aaa3f7
PP
837
838 # Return item
6dd69a2a
PP
839 if len(items) == 1:
840 return items[0]
841
842 # As group
843 return _Group(items, begin_text_loc)
71aaa3f7
PP
844
845 # Patterns for _try_parse_dec_byte()
320644e2 846 _dec_byte_prefix_pat = re.compile(r"\$")
71aaa3f7
PP
847 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
848
849 # Tries to parse a decimal byte, returning a byte item on success.
850 def _try_parse_dec_byte(self):
0e8e3169
PP
851 begin_text_loc = self._text_loc
852
71aaa3f7
PP
853 # Match prefix
854 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
855 # No match
856 return
857
858 # Expect the value
320644e2 859 self._skip_ws()
71aaa3f7
PP
860 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
861
862 # Compute value
863 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
864
865 # Validate
866 if val < -128 or val > 255:
0e8e3169 867 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
868
869 # Two's complement
05f81895 870 val %= 256
71aaa3f7
PP
871
872 # Return item
0e8e3169 873 return _Byte(val, begin_text_loc)
71aaa3f7
PP
874
875 # Tries to parse a byte, returning a byte item on success.
876 def _try_parse_byte(self):
877 # Hexadecimal
878 item = self._try_parse_hex_byte()
879
880 if item is not None:
881 return item
882
883 # Binary
884 item = self._try_parse_bin_byte()
885
886 if item is not None:
887 return item
888
889 # Decimal
890 item = self._try_parse_dec_byte()
891
892 if item is not None:
893 return item
894
71aaa3f7 895 # Strings corresponding to escape sequence characters
7a7b31e8 896 _lit_str_escape_seq_strs = {
71aaa3f7
PP
897 "0": "\0",
898 "a": "\a",
899 "b": "\b",
900 "e": "\x1b",
901 "f": "\f",
902 "n": "\n",
903 "r": "\r",
904 "t": "\t",
905 "v": "\v",
906 "\\": "\\",
907 '"': '"',
908 }
909
7a7b31e8
PP
910 # Patterns for _try_parse_lit_str()
911 _lit_str_prefix_suffix_pat = re.compile(r'"')
912 _lit_str_contents_pat = re.compile(r'(?:(?:\\.)|[^"])*')
0e8e3169 913
7a7b31e8
PP
914 # Parses a literal string between double quotes (without an encoding
915 # prefix) and returns the resulting string.
916 def _try_parse_lit_str(self, with_prefix: bool):
917 # Match prefix if needed
918 if with_prefix:
919 if self._try_parse_pat(self._lit_str_prefix_suffix_pat) is None:
920 # No match
921 return
71aaa3f7 922
7a7b31e8
PP
923 # Expect literal string
924 m = self._expect_pat(self._lit_str_contents_pat, "Expecting a literal string")
925
926 # Expect end of string
927 self._expect_pat(
928 self._lit_str_prefix_suffix_pat, 'Expecting `"` (end of literal string)'
929 )
930
931 # Replace escape sequences
932 val = m.group(0)
933
934 for ec in '0abefnrtv"\\':
935 val = val.replace(r"\{}".format(ec), self._lit_str_escape_seq_strs[ec])
936
937 # Return string
938 return val
939
940 # Patterns for _try_parse_utf_str_encoding()
941 _str_encoding_utf_prefix_pat = re.compile(r"u")
942 _str_encoding_utf_pat = re.compile(r"(?:8|(?:(?:16|32)(?:[bl]e)))\b")
943
944 # Tries to parse a UTF encoding specification, returning the Python
945 # codec name on success.
946 def _try_parse_utf_str_encoding(self):
947 # Match prefix
948 if self._try_parse_pat(self._str_encoding_utf_prefix_pat) is None:
71aaa3f7
PP
949 # No match
950 return
951
7a7b31e8
PP
952 # Expect UTF specification
953 m = self._expect_pat(
954 self._str_encoding_utf_pat,
955 "Expecting `8`, `16be`, `16le`, `32be` or `32le`",
956 )
71aaa3f7 957
7a7b31e8
PP
958 # Convert to codec name
959 return {
960 "8": "utf_8",
961 "16be": "utf_16_be",
962 "16le": "utf_16_le",
963 "32be": "utf_32_be",
964 "32le": "utf_32_le",
965 }[m.group(0)]
966
967 # Patterns for _try_parse_str_encoding()
968 _str_encoding_gen_prefix_pat = re.compile(r"s")
969 _str_encoding_colon_pat = re.compile(r":")
970 _str_encoding_non_utf_pat = re.compile(r"latin(?:[1-9]|10)\b")
971
972 # Tries to parse a string encoding specification, returning the
973 # Python codec name on success.
974 #
975 # Requires the general prefix (`s:`) if `req_gen_prefix` is `True`.
976 def _try_parse_str_encoding(self, req_gen_prefix: bool = False):
977 # General prefix?
978 if self._try_parse_pat(self._str_encoding_gen_prefix_pat) is not None:
979 # Expect `:`
980 self._skip_ws()
981 self._expect_pat(self._str_encoding_colon_pat, "Expecting `:`")
71aaa3f7 982
7a7b31e8
PP
983 # Expect encoding specification
984 self._skip_ws()
71aaa3f7 985
7a7b31e8
PP
986 # UTF?
987 codec = self._try_parse_utf_str_encoding()
71aaa3f7 988
7a7b31e8
PP
989 if codec is not None:
990 return codec
71aaa3f7 991
7a7b31e8
PP
992 # Expect Latin
993 m = self._expect_pat(
994 self._str_encoding_non_utf_pat,
995 "Expecting `u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`",
996 )
997 return m.group(0)
71aaa3f7 998
7a7b31e8
PP
999 # UTF?
1000 if not req_gen_prefix:
1001 return self._try_parse_utf_str_encoding()
71aaa3f7 1002
7a7b31e8
PP
1003 # Patterns for _try_parse_str()
1004 _lit_str_prefix_pat = re.compile(r'"')
1005 _str_prefix_pat = re.compile(r'"|\{')
1006 _str_expr_pat = re.compile(r"[^}]+")
1007 _str_expr_suffix_pat = re.compile(r"\}")
1008
1009 # Tries to parse a string, returning a literal string or string item
1010 # on success.
1011 def _try_parse_str(self):
1012 begin_text_loc = self._text_loc
1013
1014 # Encoding
1015 codec = self._try_parse_str_encoding()
1016
1017 # Match prefix (expect if there's an encoding specification)
1018 self._skip_ws()
1019
1020 if codec is None:
1021 # No encoding: only a literal string (UTF-8) is legal
1022 m_prefix = self._try_parse_pat(self._lit_str_prefix_pat)
1023
1024 if m_prefix is None:
1025 return
1026 else:
1027 # Encoding present: expect a string prefix
1028 m_prefix = self._expect_pat(self._str_prefix_pat, 'Expecting `"` or `{`')
1029
1030 # Literal string or expression?
1031 prefix = m_prefix.group(0)
1032
1033 if prefix == '"':
1034 # Expect literal string
1035 str_text_loc = self._text_loc
1036 val = self._try_parse_lit_str(False)
1037
1038 if val is None:
1039 self._raise_error("Expecting a literal string")
1040
1041 # Encode string
1042 data = _encode_str(val, "utf_8" if codec is None else codec, str_text_loc)
1043
1044 # Return item
1045 return _LitStr(data, begin_text_loc)
1046 else:
1047 # Expect expression
1048 self._skip_ws()
1049 expr_text_loc = self._text_loc
1050 m = self._expect_pat(self._str_expr_pat, "Expecting an expression")
1051
1052 # Expect `}`
1053 self._expect_pat(self._str_expr_suffix_pat, "Expecting `}`")
1054
1055 # Create an expression node from the expression string
1056 expr_str, expr = self._ast_expr_from_str(m.group(0), expr_text_loc)
1057
1058 # Return item
1059 assert codec is not None
1060 return _Str(expr_str, expr, codec, begin_text_loc)
71aaa3f7 1061
320644e2
PP
1062 # Common right parenthesis pattern
1063 _right_paren_pat = re.compile(r"\)")
1064
71aaa3f7 1065 # Patterns for _try_parse_group()
320644e2 1066 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
71aaa3f7
PP
1067
1068 # Tries to parse a group, returning a group item on success.
1069 def _try_parse_group(self):
0e8e3169
PP
1070 begin_text_loc = self._text_loc
1071
71aaa3f7 1072 # Match prefix
261c5ecf
PP
1073 m_open = self._try_parse_pat(self._group_prefix_pat)
1074
1075 if m_open is None:
71aaa3f7
PP
1076 # No match
1077 return
1078
1079 # Parse items
1080 items = self._parse_items()
1081
1082 # Expect end of group
1083 self._skip_ws_and_comments()
261c5ecf
PP
1084
1085 if m_open.group(0) == "(":
320644e2 1086 pat = self._right_paren_pat
261c5ecf
PP
1087 exp = ")"
1088 else:
1089 pat = self._block_end_pat
1090 exp = "!end"
1091
1092 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
71aaa3f7
PP
1093
1094 # Return item
0e8e3169 1095 return _Group(items, begin_text_loc)
71aaa3f7
PP
1096
1097 # Returns a stripped expression string and an AST expression node
1098 # from the expression string `expr_str` at text location `text_loc`.
e57a18e1 1099 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
71aaa3f7
PP
1100 # Create an expression node from the expression string
1101 expr_str = expr_str.strip().replace("\n", " ")
1102
1103 try:
1104 expr = ast.parse(expr_str, mode="eval")
1105 except SyntaxError:
1106 _raise_error(
1107 "Invalid expression `{}`: invalid syntax".format(expr_str),
1108 text_loc,
1109 )
1110
1111 return expr_str, expr
1112
7a7b31e8 1113 # Patterns for _try_parse_val()
05f81895 1114 _val_expr_pat = re.compile(r"([^}:]+):\s*")
7a7b31e8
PP
1115 _fl_num_len_fmt_pat = re.compile(r"8|16|24|32|40|48|56|64")
1116 _leb128_int_fmt_pat = re.compile(r"(u|s)leb128")
71aaa3f7 1117
7a7b31e8
PP
1118 # Tries to parse a value (number or string) and format (fixed length
1119 # in bits, `uleb128`, `sleb128`, or `s:` followed with an encoding
1120 # name), returning an item on success.
1121 def _try_parse_val(self):
71aaa3f7
PP
1122 begin_text_loc = self._text_loc
1123
1124 # Match
1125 m_expr = self._try_parse_pat(self._val_expr_pat)
1126
1127 if m_expr is None:
1128 # No match
1129 return
1130
71aaa3f7
PP
1131 # Create an expression node from the expression string
1132 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
1133
7a7b31e8
PP
1134 # Fixed length?
1135 m_fmt = self._try_parse_pat(self._fl_num_len_fmt_pat)
05f81895 1136
7a7b31e8 1137 if m_fmt is None:
05f81895 1138 # LEB128?
7a7b31e8 1139 m_fmt = self._try_parse_pat(self._leb128_int_fmt_pat)
05f81895 1140
7a7b31e8
PP
1141 if m_fmt is None:
1142 # String encoding?
1143 codec = self._try_parse_str_encoding(True)
1144
1145 if codec is None:
1146 # At this point it's invalid
1147 self._raise_error(
1148 "Expecting a fixed length (multiple of eight bits), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)"
1149 )
1150 else:
1151 # Return string item
1152 return _Str(expr_str, expr, codec, begin_text_loc)
05f81895
PP
1153
1154 # Return LEB128 integer item
7a7b31e8 1155 cls = _ULeb128Int if m_fmt.group(1) == "u" else _SLeb128Int
05f81895
PP
1156 return cls(expr_str, expr, begin_text_loc)
1157 else:
269f6eb3
PP
1158 # Return fixed-length number item
1159 return _FlNum(
05f81895
PP
1160 expr_str,
1161 expr,
7a7b31e8 1162 int(m_fmt.group(0)),
05f81895
PP
1163 begin_text_loc,
1164 )
71aaa3f7 1165
320644e2 1166 # Patterns for _try_parse_var_assign()
7a7b31e8
PP
1167 _var_assign_name_equal_pat = re.compile(
1168 r"({})\s*=(?!=)".format(_py_name_pat.pattern)
1169 )
320644e2 1170 _var_assign_expr_pat = re.compile(r"[^}]+")
71aaa3f7 1171
2adf4336
PP
1172 # Tries to parse a variable assignment, returning a variable
1173 # assignment item on success.
1174 def _try_parse_var_assign(self):
71aaa3f7
PP
1175 begin_text_loc = self._text_loc
1176
1177 # Match
320644e2 1178 m = self._try_parse_pat(self._var_assign_name_equal_pat)
71aaa3f7
PP
1179
1180 if m is None:
1181 # No match
1182 return
1183
1184 # Validate name
320644e2 1185 name = m.group(1)
71aaa3f7
PP
1186
1187 if name == _icitte_name:
0e8e3169
PP
1188 _raise_error(
1189 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
1190 )
71aaa3f7
PP
1191
1192 if name in self._label_names:
0e8e3169 1193 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7 1194
320644e2
PP
1195 # Expect an expression
1196 self._skip_ws()
1197 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
71aaa3f7
PP
1198
1199 # Create an expression node from the expression string
320644e2
PP
1200 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
1201
1202 # Add to known variable names
1203 self._var_names.add(name)
71aaa3f7
PP
1204
1205 # Return item
2adf4336 1206 return _VarAssign(
71aaa3f7
PP
1207 name,
1208 expr_str,
1209 expr,
0e8e3169 1210 begin_text_loc,
71aaa3f7
PP
1211 )
1212
2adf4336 1213 # Pattern for _try_parse_set_bo()
71aaa3f7
PP
1214 _bo_pat = re.compile(r"[bl]e")
1215
2adf4336
PP
1216 # Tries to parse a byte order name, returning a byte order setting
1217 # item on success.
1218 def _try_parse_set_bo(self):
0e8e3169
PP
1219 begin_text_loc = self._text_loc
1220
71aaa3f7
PP
1221 # Match
1222 m = self._try_parse_pat(self._bo_pat)
1223
1224 if m is None:
1225 # No match
1226 return
1227
1228 # Return corresponding item
1229 if m.group(0) == "be":
2adf4336 1230 return _SetBo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
1231 else:
1232 assert m.group(0) == "le"
2adf4336 1233 return _SetBo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
1234
1235 # Patterns for _try_parse_val_or_bo()
320644e2
PP
1236 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
1237 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
71aaa3f7 1238
2adf4336
PP
1239 # Tries to parse a value, a variable assignment, or a byte order
1240 # setting, returning an item on success.
1241 def _try_parse_val_or_var_assign_or_set_bo(self):
71aaa3f7 1242 # Match prefix
2adf4336 1243 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
71aaa3f7
PP
1244 # No match
1245 return
1246
320644e2
PP
1247 self._skip_ws()
1248
2adf4336
PP
1249 # Variable assignment item?
1250 item = self._try_parse_var_assign()
71aaa3f7
PP
1251
1252 if item is None:
7a7b31e8
PP
1253 # Value item?
1254 item = self._try_parse_val()
71aaa3f7
PP
1255
1256 if item is None:
2adf4336
PP
1257 # Byte order setting item?
1258 item = self._try_parse_set_bo()
71aaa3f7
PP
1259
1260 if item is None:
1261 # At this point it's invalid
2adf4336 1262 self._raise_error(
7a7b31e8 1263 "Expecting a fixed-length number, a string, a variable assignment, or a byte order setting"
2adf4336 1264 )
71aaa3f7
PP
1265
1266 # Expect suffix
320644e2 1267 self._skip_ws()
2adf4336 1268 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
71aaa3f7
PP
1269 return item
1270
2adf4336
PP
1271 # Tries to parse an offset setting value (after the initial `<`),
1272 # returning an offset item on success.
1273 def _try_parse_set_offset_val(self):
0e8e3169
PP
1274 begin_text_loc = self._text_loc
1275
71aaa3f7 1276 # Match
b2410769 1277 m = self._try_parse_pat(_pos_const_int_pat)
71aaa3f7
PP
1278
1279 if m is None:
1280 # No match
1281 return
1282
1283 # Return item
b2410769 1284 return _SetOffset(int(_norm_const_int(m.group(0)), 0), begin_text_loc)
71aaa3f7
PP
1285
1286 # Tries to parse a label name (after the initial `<`), returning a
1287 # label item on success.
1288 def _try_parse_label_name(self):
0e8e3169
PP
1289 begin_text_loc = self._text_loc
1290
71aaa3f7
PP
1291 # Match
1292 m = self._try_parse_pat(_py_name_pat)
1293
1294 if m is None:
1295 # No match
1296 return
1297
1298 # Validate
1299 name = m.group(0)
1300
1301 if name == _icitte_name:
0e8e3169
PP
1302 _raise_error(
1303 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1304 )
71aaa3f7
PP
1305
1306 if name in self._label_names:
0e8e3169 1307 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1308
1309 if name in self._var_names:
0e8e3169 1310 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1311
1312 # Add to known label names
1313 self._label_names.add(name)
1314
1315 # Return item
0e8e3169 1316 return _Label(name, begin_text_loc)
71aaa3f7 1317
2adf4336 1318 # Patterns for _try_parse_label_or_set_offset()
320644e2
PP
1319 _label_set_offset_prefix_pat = re.compile(r"<")
1320 _label_set_offset_suffix_pat = re.compile(r">")
71aaa3f7 1321
2adf4336
PP
1322 # Tries to parse a label or an offset setting, returning an item on
1323 # success.
1324 def _try_parse_label_or_set_offset(self):
71aaa3f7 1325 # Match prefix
2adf4336 1326 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
71aaa3f7
PP
1327 # No match
1328 return
1329
2adf4336 1330 # Offset setting item?
320644e2 1331 self._skip_ws()
2adf4336 1332 item = self._try_parse_set_offset_val()
71aaa3f7
PP
1333
1334 if item is None:
1335 # Label item?
1336 item = self._try_parse_label_name()
1337
1338 if item is None:
1339 # At this point it's invalid
2adf4336 1340 self._raise_error("Expecting a label name or an offset setting value")
71aaa3f7
PP
1341
1342 # Expect suffix
320644e2 1343 self._skip_ws()
2adf4336 1344 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
71aaa3f7
PP
1345 return item
1346
25ca454b
PP
1347 # Pattern for _parse_pad_val()
1348 _pad_val_prefix_pat = re.compile(r"~")
1349
1350 # Tries to parse a padding value, returning the padding value, or 0
1351 # if none.
1352 def _parse_pad_val(self):
1353 # Padding value?
1354 self._skip_ws()
1355 pad_val = 0
1356
1357 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1358 self._skip_ws()
1359 pad_val_text_loc = self._text_loc
1360 m = self._expect_pat(
b2410769 1361 _pos_const_int_pat,
25ca454b
PP
1362 "Expecting a positive constant integer (byte value)",
1363 )
1364
1365 # Validate
b2410769 1366 pad_val = int(_norm_const_int(m.group(0)), 0)
25ca454b
PP
1367
1368 if pad_val > 255:
1369 _raise_error(
1370 "Invalid padding byte value {}".format(pad_val),
1371 pad_val_text_loc,
1372 )
1373
1374 return pad_val
1375
676f6189 1376 # Patterns for _try_parse_align_offset()
320644e2
PP
1377 _align_offset_prefix_pat = re.compile(r"@")
1378 _align_offset_val_pat = re.compile(r"\d+")
676f6189
PP
1379
1380 # Tries to parse an offset alignment, returning an offset alignment
1381 # item on success.
1382 def _try_parse_align_offset(self):
1383 begin_text_loc = self._text_loc
1384
1385 # Match prefix
1386 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1387 # No match
1388 return
1389
320644e2 1390 # Expect an alignment
25ca454b 1391 self._skip_ws()
676f6189
PP
1392 align_text_loc = self._text_loc
1393 m = self._expect_pat(
1394 self._align_offset_val_pat,
1395 "Expecting an alignment (positive multiple of eight bits)",
1396 )
1397
1398 # Validate alignment
320644e2 1399 val = int(m.group(0))
676f6189
PP
1400
1401 if val <= 0 or (val % 8) != 0:
1402 _raise_error(
1403 "Invalid alignment value {} (not a positive multiple of eight)".format(
1404 val
1405 ),
1406 align_text_loc,
1407 )
1408
25ca454b
PP
1409 # Padding value
1410 pad_val = self._parse_pad_val()
676f6189 1411
25ca454b
PP
1412 # Return item
1413 return _AlignOffset(val, pad_val, begin_text_loc)
676f6189 1414
dbd84e74
PP
1415 # Patterns for _expect_expr()
1416 _inner_expr_prefix_pat = re.compile(r"\{")
1417 _inner_expr_pat = re.compile(r"[^}]+")
1418 _inner_expr_suffix_pat = re.compile(r"\}")
dbd84e74
PP
1419
1420 # Parses an expression outside a `{`/`}` context.
1421 #
1422 # This function accepts:
1423 #
1424 # • A Python expression within `{` and `}`.
1425 #
1426 # • A Python name.
1427 #
1428 # • If `accept_const_int` is `True`: a constant integer, which may
1429 # be negative if `allow_neg_int` is `True`.
1430 #
1431 # • If `accept_float` is `True`: a constant floating point number.
1432 #
1433 # Returns the stripped expression string and AST expression.
1434 def _expect_expr(
1435 self,
1436 accept_const_int: bool = False,
1437 allow_neg_int: bool = False,
1438 accept_const_float: bool = False,
7a7b31e8 1439 accept_lit_str: bool = False,
dbd84e74
PP
1440 ):
1441 begin_text_loc = self._text_loc
1442
1443 # Constant floating point number?
dbd84e74 1444 if accept_const_float:
b2410769 1445 m = self._try_parse_pat(_const_float_pat)
dbd84e74
PP
1446
1447 if m is not None:
1448 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1449
1450 # Constant integer?
dbd84e74 1451 if accept_const_int:
b2410769 1452 m = self._try_parse_pat(_const_int_pat)
dbd84e74
PP
1453
1454 if m is not None:
1455 # Negative and allowed?
1456 if m.group("neg") == "-" and not allow_neg_int:
1457 _raise_error(
1458 "Expecting a positive constant integer", begin_text_loc
1459 )
1460
b2410769 1461 expr_str = _norm_const_int(m.group(0))
dbd84e74
PP
1462 return self._ast_expr_from_str(expr_str, begin_text_loc)
1463
1464 # Name?
1465 m = self._try_parse_pat(_py_name_pat)
1466
1467 if m is not None:
1468 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1469
7a7b31e8
PP
1470 # Literal string
1471 if accept_lit_str:
1472 val = self._try_parse_lit_str(True)
1473
1474 if val is not None:
1475 return self._ast_expr_from_str(repr(val), begin_text_loc)
1476
dbd84e74
PP
1477 # Expect `{`
1478 msg_accepted_parts = ["a name", "or `{`"]
1479
7a7b31e8
PP
1480 if accept_lit_str:
1481 msg_accepted_parts.insert(0, "a literal string")
1482
dbd84e74
PP
1483 if accept_const_float:
1484 msg_accepted_parts.insert(0, "a constant floating point number")
1485
1486 if accept_const_int:
1487 msg_pos = "" if allow_neg_int else "positive "
1488 msg_accepted_parts.insert(0, "a {}constant integer".format(msg_pos))
1489
1490 if len(msg_accepted_parts) == 2:
1491 msg_accepted = " ".join(msg_accepted_parts)
1492 else:
1493 msg_accepted = ", ".join(msg_accepted_parts)
1494
1495 self._expect_pat(
1496 self._inner_expr_prefix_pat,
1497 "Expecting {}".format(msg_accepted),
1498 )
1499
1500 # Expect an expression
1501 self._skip_ws()
1502 expr_text_loc = self._text_loc
1503 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
1504 expr_str = m.group(0)
1505
1506 # Expect `}`
1507 self._skip_ws()
1508 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
1509
1510 return self._ast_expr_from_str(expr_str, expr_text_loc)
1511
25ca454b
PP
1512 # Patterns for _try_parse_fill_until()
1513 _fill_until_prefix_pat = re.compile(r"\+")
1514 _fill_until_pad_val_prefix_pat = re.compile(r"~")
676f6189 1515
25ca454b
PP
1516 # Tries to parse a filling, returning a filling item on success.
1517 def _try_parse_fill_until(self):
1518 begin_text_loc = self._text_loc
1519
1520 # Match prefix
1521 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1522 # No match
1523 return
1524
1525 # Expect expression
1526 self._skip_ws()
dbd84e74 1527 expr_str, expr = self._expect_expr(accept_const_int=True)
25ca454b
PP
1528
1529 # Padding value
1530 pad_val = self._parse_pad_val()
676f6189
PP
1531
1532 # Return item
25ca454b 1533 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
676f6189 1534
27d52a19
PP
1535 # Parses the multiplier expression of a repetition (block or
1536 # post-item) and returns the expression string and AST node.
1537 def _expect_rep_mul_expr(self):
dbd84e74 1538 return self._expect_expr(accept_const_int=True)
27d52a19
PP
1539
1540 # Common block end pattern
320644e2 1541 _block_end_pat = re.compile(r"!end\b")
27d52a19 1542
e57a18e1 1543 # Pattern for _try_parse_rep_block()
320644e2 1544 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
e57a18e1
PP
1545
1546 # Tries to parse a repetition block, returning a repetition item on
1547 # success.
1548 def _try_parse_rep_block(self):
1549 begin_text_loc = self._text_loc
1550
1551 # Match prefix
1552 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1553 # No match
1554 return
1555
1556 # Expect expression
1557 self._skip_ws_and_comments()
1558 expr_str, expr = self._expect_rep_mul_expr()
1559
1560 # Parse items
1561 self._skip_ws_and_comments()
1562 items_text_loc = self._text_loc
1563 items = self._parse_items()
1564
1565 # Expect end of block
1566 self._skip_ws_and_comments()
1567 self._expect_pat(
27d52a19 1568 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
e57a18e1
PP
1569 )
1570
1571 # Return item
1572 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1573
27d52a19 1574 # Pattern for _try_parse_cond_block()
320644e2 1575 _cond_block_prefix_pat = re.compile(r"!if\b")
12b5dbc0 1576 _cond_block_else_pat = re.compile(r"!else\b")
27d52a19
PP
1577
1578 # Tries to parse a conditional block, returning a conditional item
1579 # on success.
1580 def _try_parse_cond_block(self):
1581 begin_text_loc = self._text_loc
1582
1583 # Match prefix
1584 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1585 # No match
1586 return
1587
1588 # Expect expression
1589 self._skip_ws_and_comments()
dbd84e74 1590 expr_str, expr = self._expect_expr()
27d52a19 1591
12b5dbc0 1592 # Parse "true" items
27d52a19 1593 self._skip_ws_and_comments()
12b5dbc0
PP
1594 true_items_text_loc = self._text_loc
1595 true_items = self._parse_items()
1596 false_items = [] # type: List[_Item]
1597 false_items_text_loc = begin_text_loc
27d52a19 1598
12b5dbc0 1599 # `!else`?
27d52a19 1600 self._skip_ws_and_comments()
12b5dbc0
PP
1601
1602 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1603 # Parse "false" items
1604 self._skip_ws_and_comments()
1605 false_items_text_loc = self._text_loc
1606 false_items = self._parse_items()
1607
1608 # Expect end of block
27d52a19
PP
1609 self._expect_pat(
1610 self._block_end_pat,
12b5dbc0 1611 "Expecting an item, `!else`, or `!end` (end of conditional block)",
27d52a19
PP
1612 )
1613
1614 # Return item
12b5dbc0
PP
1615 return _Cond(
1616 _Group(true_items, true_items_text_loc),
1617 _Group(false_items, false_items_text_loc),
1618 expr_str,
1619 expr,
1620 begin_text_loc,
1621 )
27d52a19 1622
320644e2
PP
1623 # Common left parenthesis pattern
1624 _left_paren_pat = re.compile(r"\(")
1625
1626 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1627 _macro_params_comma_pat = re.compile(",")
1628
1629 # Patterns for _try_parse_macro_def()
1630 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1631
1632 # Tries to parse a macro definition, adding it to `self._macro_defs`
1633 # and returning `True` on success.
1634 def _try_parse_macro_def(self):
1635 begin_text_loc = self._text_loc
1636
1637 # Match prefix
1638 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1639 # No match
1640 return False
1641
1642 # Expect a name
1643 self._skip_ws()
1644 name_text_loc = self._text_loc
1645 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1646
1647 # Validate name
1648 name = m.group(0)
1649
1650 if name in self._macro_defs:
1651 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1652
1653 # Expect `(`
1654 self._skip_ws()
1655 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1656
1657 # Try to parse comma-separated parameter names
1658 param_names = [] # type: List[str]
1659 expect_comma = False
1660
1661 while True:
1662 self._skip_ws()
1663
1664 # End?
1665 if self._try_parse_pat(self._right_paren_pat) is not None:
1666 # End
1667 break
1668
1669 # Comma?
1670 if expect_comma:
1671 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1672
1673 # Expect parameter name
1674 self._skip_ws()
1675 param_text_loc = self._text_loc
1676 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1677
1678 if m.group(0) in param_names:
1679 _raise_error(
1680 "Duplicate macro parameter named `{}`".format(m.group(0)),
1681 param_text_loc,
1682 )
1683
1684 param_names.append(m.group(0))
1685 expect_comma = True
1686
1687 # Expect items
1688 self._skip_ws_and_comments()
1689 items_text_loc = self._text_loc
1690 old_var_names = self._var_names.copy()
1691 old_label_names = self._label_names.copy()
1692 self._var_names = set() # type: Set[str]
1693 self._label_names = set() # type: Set[str]
1694 items = self._parse_items()
1695 self._var_names = old_var_names
1696 self._label_names = old_label_names
1697
1698 # Expect suffix
1699 self._expect_pat(
1700 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1701 )
1702
1703 # Register macro
1704 self._macro_defs[name] = _MacroDef(
1705 name, param_names, _Group(items, items_text_loc), begin_text_loc
1706 )
1707
1708 return True
1709
1710 # Patterns for _try_parse_macro_exp()
1711 _macro_exp_prefix_pat = re.compile(r"m\b")
1712 _macro_exp_colon_pat = re.compile(r":")
1713
1714 # Tries to parse a macro expansion, returning a macro expansion item
1715 # on success.
1716 def _try_parse_macro_exp(self):
1717 begin_text_loc = self._text_loc
1718
1719 # Match prefix
1720 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1721 # No match
1722 return
1723
1724 # Expect `:`
1725 self._skip_ws()
1726 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1727
1728 # Expect a macro name
1729 self._skip_ws()
1730 name_text_loc = self._text_loc
1731 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1732
1733 # Validate name
1734 name = m.group(0)
1735 macro_def = self._macro_defs.get(name)
1736
1737 if macro_def is None:
1738 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1739
1740 # Expect `(`
1741 self._skip_ws()
1742 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1743
1744 # Try to parse comma-separated parameter values
1745 params_text_loc = self._text_loc
1746 params = [] # type: List[_MacroExpParam]
1747 expect_comma = False
1748
1749 while True:
1750 self._skip_ws()
1751
1752 # End?
1753 if self._try_parse_pat(self._right_paren_pat) is not None:
1754 # End
1755 break
1756
7a7b31e8 1757 # Expect a value
320644e2
PP
1758 if expect_comma:
1759 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1760
1761 self._skip_ws()
1762 param_text_loc = self._text_loc
1763 params.append(
1764 _MacroExpParam(
dbd84e74
PP
1765 *self._expect_expr(
1766 accept_const_int=True,
1767 allow_neg_int=True,
1768 accept_const_float=True,
7a7b31e8 1769 accept_lit_str=True,
dbd84e74 1770 ),
6dd69a2a 1771 text_loc=param_text_loc
320644e2
PP
1772 )
1773 )
1774 expect_comma = True
1775
1776 # Validate parameter values
1777 if len(params) != len(macro_def.param_names):
1778 sing_plur = "" if len(params) == 1 else "s"
1779 _raise_error(
1780 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1781 len(params), sing_plur, len(macro_def.param_names)
1782 ),
1783 params_text_loc,
1784 )
1785
1786 # Return item
1787 return _MacroExp(name, params, begin_text_loc)
1788
71aaa3f7
PP
1789 # Tries to parse a base item (anything except a repetition),
1790 # returning it on success.
1791 def _try_parse_base_item(self):
1792 # Byte item?
1793 item = self._try_parse_byte()
1794
1795 if item is not None:
1796 return item
1797
1798 # String item?
1799 item = self._try_parse_str()
1800
1801 if item is not None:
1802 return item
1803
2adf4336
PP
1804 # Value, variable assignment, or byte order setting item?
1805 item = self._try_parse_val_or_var_assign_or_set_bo()
71aaa3f7
PP
1806
1807 if item is not None:
1808 return item
1809
2adf4336
PP
1810 # Label or offset setting item?
1811 item = self._try_parse_label_or_set_offset()
71aaa3f7 1812
676f6189
PP
1813 if item is not None:
1814 return item
1815
1816 # Offset alignment item?
1817 item = self._try_parse_align_offset()
1818
25ca454b
PP
1819 if item is not None:
1820 return item
1821
1822 # Filling item?
1823 item = self._try_parse_fill_until()
1824
71aaa3f7
PP
1825 if item is not None:
1826 return item
1827
1828 # Group item?
1829 item = self._try_parse_group()
1830
1831 if item is not None:
1832 return item
1833
320644e2 1834 # Repetition block item?
e57a18e1 1835 item = self._try_parse_rep_block()
71aaa3f7 1836
e57a18e1
PP
1837 if item is not None:
1838 return item
1839
27d52a19
PP
1840 # Conditional block item?
1841 item = self._try_parse_cond_block()
1842
1843 if item is not None:
1844 return item
1845
320644e2
PP
1846 # Macro expansion?
1847 item = self._try_parse_macro_exp()
1848
1849 if item is not None:
1850 return item
1851
e57a18e1
PP
1852 # Pattern for _try_parse_rep_post()
1853 _rep_post_prefix_pat = re.compile(r"\*")
1854
1855 # Tries to parse a post-item repetition, returning the expression
1856 # string and AST expression node on success.
1857 def _try_parse_rep_post(self):
71aaa3f7 1858 # Match prefix
e57a18e1 1859 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
71aaa3f7 1860 # No match
2adf4336 1861 return
71aaa3f7 1862
e57a18e1 1863 # Return expression string and AST expression
71aaa3f7 1864 self._skip_ws_and_comments()
e57a18e1 1865 return self._expect_rep_mul_expr()
71aaa3f7 1866
1ca7b5e1
PP
1867 # Tries to parse an item, possibly followed by a repetition,
1868 # returning `True` on success.
1869 #
1870 # Appends any parsed item to `items`.
1871 def _try_append_item(self, items: List[_Item]):
71aaa3f7
PP
1872 self._skip_ws_and_comments()
1873
320644e2 1874 # Base item
71aaa3f7
PP
1875 item = self._try_parse_base_item()
1876
1877 if item is None:
320644e2 1878 return
71aaa3f7
PP
1879
1880 # Parse repetition if the base item is repeatable
1881 if isinstance(item, _RepableItem):
0e8e3169
PP
1882 self._skip_ws_and_comments()
1883 rep_text_loc = self._text_loc
e57a18e1 1884 rep_ret = self._try_parse_rep_post()
71aaa3f7 1885
2adf4336 1886 if rep_ret is not None:
6dd69a2a 1887 item = _Rep(item, *rep_ret, text_loc=rep_text_loc)
71aaa3f7 1888
1ca7b5e1
PP
1889 items.append(item)
1890 return True
71aaa3f7
PP
1891
1892 # Parses and returns items, skipping whitespaces, insignificant
1893 # symbols, and comments when allowed, and stopping at the first
1894 # unknown character.
320644e2
PP
1895 #
1896 # Accepts and registers macro definitions if `accept_macro_defs`
1897 # is `True`.
1898 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
71aaa3f7
PP
1899 items = [] # type: List[_Item]
1900
1901 while self._isnt_done():
1ca7b5e1
PP
1902 # Try to append item
1903 if not self._try_append_item(items):
320644e2
PP
1904 if accept_macro_defs and self._try_parse_macro_def():
1905 continue
1906
1ca7b5e1
PP
1907 # Unknown at this point
1908 break
71aaa3f7
PP
1909
1910 return items
1911
1912 # Parses the whole Normand input, setting `self._res` to the main
1913 # group item on success.
1914 def _parse(self):
1915 if len(self._normand.strip()) == 0:
1916 # Special case to make sure there's something to consume
1917 self._res = _Group([], self._text_loc)
1918 return
1919
1920 # Parse first level items
320644e2 1921 items = self._parse_items(True)
71aaa3f7
PP
1922
1923 # Make sure there's nothing left
1924 self._skip_ws_and_comments()
1925
1926 if self._isnt_done():
1927 self._raise_error(
1928 "Unexpected character `{}`".format(self._normand[self._at])
1929 )
1930
1931 # Set main group item
1932 self._res = _Group(items, self._text_loc)
1933
1934
1935# The return type of parse().
1936class ParseResult:
1937 @classmethod
1938 def _create(
1939 cls,
1940 data: bytearray,
e57a18e1
PP
1941 variables: VariablesT,
1942 labels: LabelsT,
71aaa3f7
PP
1943 offset: int,
1944 bo: Optional[ByteOrder],
1945 ):
1946 self = cls.__new__(cls)
1947 self._init(data, variables, labels, offset, bo)
1948 return self
1949
1950 def __init__(self, *args, **kwargs): # type: ignore
1951 raise NotImplementedError
1952
1953 def _init(
1954 self,
1955 data: bytearray,
e57a18e1
PP
1956 variables: VariablesT,
1957 labels: LabelsT,
71aaa3f7
PP
1958 offset: int,
1959 bo: Optional[ByteOrder],
1960 ):
1961 self._data = data
1962 self._vars = variables
1963 self._labels = labels
1964 self._offset = offset
1965 self._bo = bo
1966
1967 # Generated data.
1968 @property
1969 def data(self):
1970 return self._data
1971
1972 # Dictionary of updated variable names to their last computed value.
1973 @property
1974 def variables(self):
1975 return self._vars
1976
1977 # Dictionary of updated main group label names to their computed
1978 # value.
1979 @property
1980 def labels(self):
1981 return self._labels
1982
1983 # Updated offset.
1984 @property
1985 def offset(self):
1986 return self._offset
1987
1988 # Updated byte order.
1989 @property
1990 def byte_order(self):
1991 return self._bo
1992
1993
1994# Raises a parse error for the item `item`, creating it using the
1995# message `msg`.
1996def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1997 _raise_error(msg, item.text_loc)
1998
1999
2000# The `ICITTE` reserved name.
2001_icitte_name = "ICITTE"
2002
2003
2adf4336
PP
2004# Base node visitor.
2005#
2006# Calls the _visit_name() method for each name node which isn't the name
2007# of a call.
2008class _NodeVisitor(ast.NodeVisitor):
2009 def __init__(self):
71aaa3f7
PP
2010 self._parent_is_call = False
2011
2012 def generic_visit(self, node: ast.AST):
2013 if type(node) is ast.Call:
2014 self._parent_is_call = True
2015 elif type(node) is ast.Name and not self._parent_is_call:
2adf4336 2016 self._visit_name(node.id)
71aaa3f7
PP
2017
2018 super().generic_visit(node)
2019 self._parent_is_call = False
2020
2adf4336
PP
2021 @abc.abstractmethod
2022 def _visit_name(self, name: str):
2023 ...
2024
71aaa3f7 2025
2adf4336
PP
2026# Expression validator: validates that all the names within the
2027# expression are allowed.
2028class _ExprValidator(_NodeVisitor):
320644e2 2029 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2adf4336 2030 super().__init__()
320644e2
PP
2031 self._expr_str = expr_str
2032 self._text_loc = text_loc
2adf4336 2033 self._allowed_names = allowed_names
2adf4336
PP
2034
2035 def _visit_name(self, name: str):
2036 # Make sure the name refers to a known and reachable
2037 # variable/label name.
e57a18e1 2038 if name != _icitte_name and name not in self._allowed_names:
2adf4336 2039 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
320644e2 2040 name, self._expr_str
2adf4336
PP
2041 )
2042
05f81895 2043 allowed_names = self._allowed_names.copy()
e57a18e1 2044 allowed_names.add(_icitte_name)
2adf4336 2045
05f81895 2046 if len(allowed_names) > 0:
2adf4336
PP
2047 allowed_names_str = ", ".join(
2048 sorted(["`{}`".format(name) for name in allowed_names])
2049 )
2050 msg += "; the legal names are {{{}}}".format(allowed_names_str)
2051
2052 _raise_error(
2053 msg,
320644e2 2054 self._text_loc,
2adf4336
PP
2055 )
2056
2057
2adf4336
PP
2058# Generator state.
2059class _GenState:
2060 def __init__(
1b8aa84a 2061 self,
e57a18e1
PP
2062 variables: VariablesT,
2063 labels: LabelsT,
1b8aa84a
PP
2064 offset: int,
2065 bo: Optional[ByteOrder],
2adf4336
PP
2066 ):
2067 self.variables = variables.copy()
2068 self.labels = labels.copy()
2069 self.offset = offset
2070 self.bo = bo
71aaa3f7 2071
320644e2
PP
2072 def __repr__(self):
2073 return "_GenState({}, {}, {}, {})".format(
2074 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
2075 )
2076
2077
2078# Fixed-length number item instance.
2079class _FlNumItemInst:
f5dcb24c
PP
2080 def __init__(
2081 self,
2082 item: _FlNum,
2083 offset_in_data: int,
2084 state: _GenState,
2085 parse_error_msgs: List[ParseErrorMessage],
2086 ):
320644e2
PP
2087 self._item = item
2088 self._offset_in_data = offset_in_data
2089 self._state = state
f5dcb24c 2090 self._parse_error_msgs = parse_error_msgs
320644e2
PP
2091
2092 @property
2093 def item(self):
2094 return self._item
2095
2096 @property
2097 def offset_in_data(self):
2098 return self._offset_in_data
2099
2100 @property
2101 def state(self):
2102 return self._state
2103
f5dcb24c
PP
2104 @property
2105 def parse_error_msgs(self):
2106 return self._parse_error_msgs
2107
71aaa3f7 2108
2adf4336 2109# Generator of data and final state from a group item.
71aaa3f7
PP
2110#
2111# Generation happens in memory at construction time. After building, use
2112# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
2113# get the resulting context.
2adf4336
PP
2114#
2115# The steps of generation are:
2116#
320644e2
PP
2117# 1. Handle each item in prefix order.
2118#
2119# The handlers append bytes to `self._data` and update some current
2120# state object (`_GenState` instance).
2121#
2122# When handling a fixed-length number item, try to evaluate its
2123# expression using the current state. If this fails, then it might be
2124# because the expression refers to a "future" label: save the current
2125# offset in `self._data` (generated data) and a snapshot of the
2126# current state within `self._fl_num_item_insts` (`_FlNumItemInst`
f5dcb24c
PP
2127# object). _gen_fl_num_item_insts() will deal with this later. A
2128# `_FlNumItemInst` instance also contains a snapshot of the current
2129# parsing error messages (`self._parse_error_msgs`) which need to be
2130# taken into account when handling the instance later.
2adf4336 2131#
320644e2
PP
2132# When handling the items of a group, keep a map of immediate label
2133# names to their offset. Then, after having processed all the items,
2134# update the relevant saved state snapshots in
2135# `self._fl_num_item_insts` with those immediate label values.
2136# _gen_fl_num_item_insts() will deal with this later.
2adf4336 2137#
320644e2
PP
2138# 2. Handle all the fixed-length number item instances of which the
2139# expression evaluation failed before.
2adf4336 2140#
320644e2
PP
2141# At this point, `self._fl_num_item_insts` contains everything that's
2142# needed to evaluate the expressions, including the values of
2143# "future" labels from the point of view of some fixed-length number
2144# item instance.
2adf4336 2145#
f5dcb24c
PP
2146# If an evaluation fails at this point, then it's a user error. Add
2147# to the parsing error all the saved parsing error messages of the
2148# instance. Those additional messages add precious context to the
2149# error.
71aaa3f7
PP
2150class _Gen:
2151 def __init__(
2152 self,
2153 group: _Group,
320644e2 2154 macro_defs: _MacroDefsT,
e57a18e1
PP
2155 variables: VariablesT,
2156 labels: LabelsT,
71aaa3f7
PP
2157 offset: int,
2158 bo: Optional[ByteOrder],
2159 ):
320644e2
PP
2160 self._macro_defs = macro_defs
2161 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
f5dcb24c 2162 self._parse_error_msgs = [] # type: List[ParseErrorMessage]
2adf4336 2163 self._gen(group, _GenState(variables, labels, offset, bo))
71aaa3f7
PP
2164
2165 # Generated bytes.
2166 @property
2167 def data(self):
2168 return self._data
2169
2170 # Updated variables.
2171 @property
2172 def variables(self):
2adf4336 2173 return self._final_state.variables
71aaa3f7
PP
2174
2175 # Updated main group labels.
2176 @property
2177 def labels(self):
2adf4336 2178 return self._final_state.labels
71aaa3f7
PP
2179
2180 # Updated offset.
2181 @property
2182 def offset(self):
2adf4336 2183 return self._final_state.offset
71aaa3f7
PP
2184
2185 # Updated byte order.
2186 @property
2187 def bo(self):
2adf4336
PP
2188 return self._final_state.bo
2189
320644e2
PP
2190 # Evaluates the expression `expr` of which the original string is
2191 # `expr_str` at the location `text_loc` considering the current
2adf4336
PP
2192 # generation state `state`.
2193 #
7a7b31e8 2194 # If `accept_float` is `True`, then the type of the result may be
269f6eb3 2195 # `float` too.
7a7b31e8
PP
2196 #
2197 # If `accept_str` is `True`, then the type of the result may be
2198 # `str` too.
2adf4336 2199 @staticmethod
320644e2
PP
2200 def _eval_expr(
2201 expr_str: str,
2202 expr: ast.Expression,
2203 text_loc: TextLocation,
269f6eb3 2204 state: _GenState,
7a7b31e8
PP
2205 accept_float: bool = False,
2206 accept_str: bool = False,
269f6eb3 2207 ):
e57a18e1
PP
2208 syms = {} # type: VariablesT
2209 syms.update(state.labels)
71aaa3f7 2210
e57a18e1
PP
2211 # Set the `ICITTE` name to the current offset
2212 syms[_icitte_name] = state.offset
71aaa3f7
PP
2213
2214 # Add the current variables
2adf4336 2215 syms.update(state.variables)
71aaa3f7
PP
2216
2217 # Validate the node and its children
320644e2 2218 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
71aaa3f7
PP
2219
2220 # Compile and evaluate expression node
2221 try:
320644e2 2222 val = eval(compile(expr, "", "eval"), None, syms)
71aaa3f7 2223 except Exception as exc:
320644e2
PP
2224 _raise_error(
2225 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
2226 text_loc,
71aaa3f7
PP
2227 )
2228
27d52a19
PP
2229 # Convert `bool` result type to `int` to normalize
2230 if type(val) is bool:
2231 val = int(val)
2232
269f6eb3
PP
2233 # Validate result type
2234 expected_types = {int} # type: Set[type]
269f6eb3 2235
7a7b31e8 2236 if accept_float:
269f6eb3 2237 expected_types.add(float)
7a7b31e8
PP
2238
2239 if accept_str:
2240 expected_types.add(str)
269f6eb3
PP
2241
2242 if type(val) not in expected_types:
7a7b31e8
PP
2243 expected_types_str = sorted(
2244 ["`{}`".format(t.__name__) for t in expected_types]
2245 )
2246
2247 if len(expected_types_str) == 1:
2248 msg_expected = expected_types_str[0]
2249 elif len(expected_types_str) == 2:
2250 msg_expected = " or ".join(expected_types_str)
2251 else:
2252 expected_types_str[-1] = "or {}".format(expected_types_str[-1])
2253 msg_expected = ", ".join(expected_types_str)
2254
320644e2 2255 _raise_error(
269f6eb3 2256 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
7a7b31e8 2257 expr_str, msg_expected, type(val).__name__
71aaa3f7 2258 ),
320644e2 2259 text_loc,
71aaa3f7
PP
2260 )
2261
2262 return val
2263
7a7b31e8
PP
2264 # Forwards to _eval_expr() with the expression and text location of
2265 # `item`.
320644e2
PP
2266 @staticmethod
2267 def _eval_item_expr(
7a7b31e8 2268 item: Union[_Cond, _FillUntil, _FlNum, _Leb128Int, _Rep, _Str, _VarAssign],
320644e2 2269 state: _GenState,
7a7b31e8
PP
2270 accept_float: bool = False,
2271 accept_str: bool = False,
320644e2
PP
2272 ):
2273 return _Gen._eval_expr(
7a7b31e8 2274 item.expr_str, item.expr, item.text_loc, state, accept_float, accept_str
320644e2
PP
2275 )
2276
2277 # Handles the byte item `item`.
2278 def _handle_byte_item(self, item: _Byte, state: _GenState):
2279 self._data.append(item.val)
2280 state.offset += item.size
2281
7a7b31e8
PP
2282 # Handles the literal string item `item`.
2283 def _handle_lit_str_item(self, item: _LitStr, state: _GenState):
320644e2
PP
2284 self._data += item.data
2285 state.offset += item.size
2286
2287 # Handles the byte order setting item `item`.
2288 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
2289 # Update current byte order
2290 state.bo = item.bo
2291
2292 # Handles the variable assignment item `item`.
2293 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
2294 # Update variable
7a7b31e8
PP
2295 state.variables[item.name] = self._eval_item_expr(
2296 item, state, accept_float=True, accept_str=True
2297 )
320644e2
PP
2298
2299 # Handles the fixed-length number item `item`.
2300 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
2301 # Validate current byte order
2302 if state.bo is None and item.len > 8:
2303 _raise_error_for_item(
2304 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2305 item.expr_str
2306 ),
2307 item,
2308 )
2309
2310 # Try an immediate evaluation. If it fails, then keep everything
2311 # needed to (try to) generate the bytes of this item later.
2312 try:
2313 data = self._gen_fl_num_item_inst_data(item, state)
2314 except Exception:
2315 self._fl_num_item_insts.append(
f5dcb24c
PP
2316 _FlNumItemInst(
2317 item,
2318 len(self._data),
2319 copy.deepcopy(state),
2320 copy.deepcopy(self._parse_error_msgs),
2321 )
320644e2
PP
2322 )
2323
2324 # Reserve space in `self._data` for this instance
2325 data = bytes([0] * (item.len // 8))
2326
2327 # Append bytes
2328 self._data += data
2329
2330 # Update offset
2331 state.offset += len(data)
2332
05f81895
PP
2333 # Returns the size, in bytes, required to encode the value `val`
2334 # with LEB128 (signed version if `is_signed` is `True`).
2335 @staticmethod
2336 def _leb128_size_for_val(val: int, is_signed: bool):
2337 if val < 0:
2338 # Equivalent upper bound.
2339 #
2340 # For example, if `val` is -128, then the full integer for
2341 # this number of bits would be [-128, 127].
2342 val = -val - 1
2343
2344 # Number of bits (add one for the sign if needed)
2345 bits = val.bit_length() + int(is_signed)
2346
2347 if bits == 0:
2348 bits = 1
2349
2350 # Seven bits per byte
2351 return math.ceil(bits / 7)
2352
320644e2
PP
2353 # Handles the LEB128 integer item `item`.
2354 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2355 # Compute value
7a7b31e8 2356 val = self._eval_item_expr(item, state)
676f6189 2357
320644e2
PP
2358 # Size in bytes
2359 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
05f81895 2360
320644e2
PP
2361 # For each byte
2362 for _ in range(size):
2363 # Seven LSBs, MSB of the byte set (continue)
2364 self._data.append((val & 0x7F) | 0x80)
2365 val >>= 7
2adf4336 2366
320644e2
PP
2367 # Clear MSB of last byte (stop)
2368 self._data[-1] &= ~0x80
2adf4336 2369
320644e2
PP
2370 # Update offset
2371 state.offset += size
27d52a19 2372
7a7b31e8
PP
2373 # Handles the string item `item`.
2374 def _handle_str_item(self, item: _Str, state: _GenState):
2375 # Compute value
2376 val = str(self._eval_item_expr(item, state, accept_float=True, accept_str=True))
2377
2378 # Encode
2379 data = _encode_str(val, item.codec, item.text_loc)
2380
2381 # Add to data
2382 self._data += data
2383
2384 # Update offset
2385 state.offset += len(data)
2386
320644e2
PP
2387 # Handles the group item `item`, removing the immediate labels from
2388 # `state` at the end if `remove_immediate_labels` is `True`.
2389 def _handle_group_item(
2390 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2391 ):
2392 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2393 immediate_labels = {} # type: LabelsT
27d52a19 2394
320644e2
PP
2395 # Handle each item
2396 for subitem in item.items:
2397 if type(subitem) is _Label:
2398 # Add to local immediate labels
2399 immediate_labels[subitem.name] = state.offset
2adf4336 2400
320644e2 2401 self._handle_item(subitem, state)
2adf4336 2402
320644e2
PP
2403 # Remove immediate labels from current state if needed
2404 if remove_immediate_labels:
2405 for name in immediate_labels:
2406 del state.labels[name]
2adf4336 2407
320644e2
PP
2408 # Add all immediate labels to all state snapshots since
2409 # `first_fl_num_item_inst_index`.
2410 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2411 inst.state.labels.update(immediate_labels)
2adf4336 2412
320644e2
PP
2413 # Handles the repetition item `item`.
2414 def _handle_rep_item(self, item: _Rep, state: _GenState):
2415 # Compute the repetition count
2416 mul = _Gen._eval_item_expr(item, state)
05f81895 2417
320644e2
PP
2418 # Validate result
2419 if mul < 0:
2420 _raise_error_for_item(
2421 "Invalid expression `{}`: unexpected negative result {:,}".format(
2422 item.expr_str, mul
2423 ),
2424 item,
2425 )
2adf4336 2426
320644e2
PP
2427 # Generate item data `mul` times
2428 for _ in range(mul):
2429 self._handle_item(item.item, state)
2adf4336 2430
320644e2 2431 # Handles the conditional item `item`.
12b5dbc0 2432 def _handle_cond_item(self, item: _Cond, state: _GenState):
320644e2
PP
2433 # Compute the conditional value
2434 val = _Gen._eval_item_expr(item, state)
2adf4336 2435
320644e2
PP
2436 # Generate item data if needed
2437 if val:
12b5dbc0
PP
2438 self._handle_item(item.true_item, state)
2439 else:
2440 self._handle_item(item.false_item, state)
2adf4336 2441
320644e2
PP
2442 # Evaluates the parameters of the macro expansion item `item`
2443 # considering the initial state `init_state` and returns a new state
2444 # to handle the items of the macro.
2445 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2446 # New state
2447 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2adf4336 2448
320644e2
PP
2449 # Evaluate the parameter expressions
2450 macro_def = self._macro_defs[item.name]
2adf4336 2451
320644e2
PP
2452 for param_name, param in zip(macro_def.param_names, item.params):
2453 exp_state.variables[param_name] = _Gen._eval_expr(
7a7b31e8
PP
2454 param.expr_str,
2455 param.expr,
2456 param.text_loc,
2457 init_state,
2458 accept_float=True,
2459 accept_str=True,
320644e2 2460 )
2adf4336 2461
320644e2 2462 return exp_state
2adf4336 2463
320644e2
PP
2464 # Handles the macro expansion item `item`.
2465 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
f5dcb24c 2466 parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
27d52a19 2467
f5dcb24c
PP
2468 try:
2469 # New state
2470 exp_state = self._eval_macro_exp_params(item, state)
2471
2472 # Process the contained group
2473 init_data_size = len(self._data)
2474 parse_error_msg = (
2475 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
2476 parse_error_msg_text, item.text_loc
2477 )
2478 )
2479 self._parse_error_msgs.append(parse_error_msg)
2480 self._handle_item(self._macro_defs[item.name].group, exp_state)
2481 self._parse_error_msgs.pop()
2482 except ParseError as exc:
2483 _augment_error(exc, parse_error_msg_text, item.text_loc)
27d52a19 2484
320644e2
PP
2485 # Update state offset and return
2486 state.offset += len(self._data) - init_data_size
676f6189 2487
320644e2
PP
2488 # Handles the offset setting item `item`.
2489 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
676f6189 2490 state.offset = item.val
2adf4336 2491
25ca454b 2492 # Handles the offset alignment item `item` (adds padding).
320644e2
PP
2493 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2494 init_offset = state.offset
2495 align_bytes = item.val // 8
2496 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2497 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2adf4336 2498
25ca454b
PP
2499 # Handles the filling item `item` (adds padding).
2500 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2501 # Compute the new offset
2502 new_offset = _Gen._eval_item_expr(item, state)
2503
2504 # Validate the new offset
2505 if new_offset < state.offset:
2506 _raise_error_for_item(
2507 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2508 item.expr_str, new_offset, state.offset
2509 ),
2510 item,
2511 )
2512
2513 # Fill
2514 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2515
2516 # Update offset
2517 state.offset = new_offset
2518
320644e2
PP
2519 # Handles the label item `item`.
2520 def _handle_label_item(self, item: _Label, state: _GenState):
2521 state.labels[item.name] = state.offset
2adf4336 2522
320644e2
PP
2523 # Handles the item `item`, returning the updated next repetition
2524 # instance.
2525 def _handle_item(self, item: _Item, state: _GenState):
2526 return self._item_handlers[type(item)](item, state)
71aaa3f7 2527
320644e2
PP
2528 # Generates the data for a fixed-length integer item instance having
2529 # the value `val` and returns it.
2530 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
71aaa3f7
PP
2531 # Validate range
2532 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2533 _raise_error_for_item(
320644e2
PP
2534 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2535 val, item.len, item.expr_str
71aaa3f7
PP
2536 ),
2537 item,
2538 )
2539
2540 # Encode result on 64 bits (to extend the sign bit whatever the
2541 # value of `item.len`).
71aaa3f7
PP
2542 data = struct.pack(
2543 "{}{}".format(
2adf4336 2544 ">" if state.bo in (None, ByteOrder.BE) else "<",
71aaa3f7
PP
2545 "Q" if val >= 0 else "q",
2546 ),
2547 val,
2548 )
2549
2550 # Keep only the requested length
2551 len_bytes = item.len // 8
2552
2adf4336 2553 if state.bo in (None, ByteOrder.BE):
71aaa3f7
PP
2554 # Big endian: keep last bytes
2555 data = data[-len_bytes:]
2556 else:
2557 # Little endian: keep first bytes
2adf4336 2558 assert state.bo == ByteOrder.LE
71aaa3f7
PP
2559 data = data[:len_bytes]
2560
320644e2
PP
2561 # Return data
2562 return data
269f6eb3 2563
320644e2
PP
2564 # Generates the data for a fixed-length floating point number item
2565 # instance having the value `val` and returns it.
2566 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
269f6eb3
PP
2567 # Validate length
2568 if item.len not in (32, 64):
2569 _raise_error_for_item(
2570 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2571 item.len, val
2572 ),
2573 item,
2574 )
2575
320644e2
PP
2576 # Encode and return result
2577 return struct.pack(
269f6eb3
PP
2578 "{}{}".format(
2579 ">" if state.bo in (None, ByteOrder.BE) else "<",
2580 "f" if item.len == 32 else "d",
2581 ),
2582 val,
2583 )
2584
320644e2
PP
2585 # Generates the data for a fixed-length number item instance and
2586 # returns it.
2587 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
269f6eb3 2588 # Compute value
e57a18e1 2589 val = self._eval_item_expr(item, state, True)
269f6eb3 2590
269f6eb3
PP
2591 # Handle depending on type
2592 if type(val) is int:
320644e2 2593 return self._gen_fl_int_item_inst_data(val, item, state)
269f6eb3
PP
2594 else:
2595 assert type(val) is float
320644e2 2596 return self._gen_fl_float_item_inst_data(val, item, state)
05f81895 2597
320644e2
PP
2598 # Generates the data for all the fixed-length number item instances
2599 # and writes it at the correct offset within `self._data`.
2600 def _gen_fl_num_item_insts(self):
2601 for inst in self._fl_num_item_insts:
2602 # Generate bytes
f5dcb24c
PP
2603 try:
2604 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2605 except ParseError as exc:
2606 # Add all the saved parse error messages for this
2607 # instance.
2608 for msg in reversed(inst.parse_error_msgs):
2609 _add_error_msg(exc, msg.text, msg.text_location)
2610
2611 raise
05f81895 2612
320644e2
PP
2613 # Insert bytes into `self._data`
2614 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2adf4336
PP
2615
2616 # Generates the data (`self._data`) and final state
2617 # (`self._final_state`) from `group` and the initial state `state`.
2618 def _gen(self, group: _Group, state: _GenState):
71aaa3f7
PP
2619 # Initial state
2620 self._data = bytearray()
71aaa3f7
PP
2621
2622 # Item handlers
2623 self._item_handlers = {
676f6189 2624 _AlignOffset: self._handle_align_offset_item,
71aaa3f7 2625 _Byte: self._handle_byte_item,
27d52a19 2626 _Cond: self._handle_cond_item,
25ca454b 2627 _FillUntil: self._handle_fill_until_item,
269f6eb3 2628 _FlNum: self._handle_fl_num_item,
71aaa3f7 2629 _Group: self._handle_group_item,
2adf4336 2630 _Label: self._handle_label_item,
7a7b31e8 2631 _LitStr: self._handle_lit_str_item,
320644e2 2632 _MacroExp: self._handle_macro_exp_item,
71aaa3f7 2633 _Rep: self._handle_rep_item,
2adf4336
PP
2634 _SetBo: self._handle_set_bo_item,
2635 _SetOffset: self._handle_set_offset_item,
05f81895 2636 _SLeb128Int: self._handle_leb128_int_item,
2adf4336 2637 _Str: self._handle_str_item,
05f81895 2638 _ULeb128Int: self._handle_leb128_int_item,
2adf4336 2639 _VarAssign: self._handle_var_assign_item,
320644e2 2640 } # type: Dict[type, Callable[[Any, _GenState], None]]
2adf4336
PP
2641
2642 # Handle the group item, _not_ removing the immediate labels
2643 # because the `labels` property offers them.
320644e2 2644 self._handle_group_item(group, state, False)
2adf4336
PP
2645
2646 # This is actually the final state
2647 self._final_state = state
71aaa3f7 2648
320644e2
PP
2649 # Generate all the fixed-length number bytes now that we know
2650 # their full state
2651 self._gen_fl_num_item_insts()
2652
71aaa3f7
PP
2653
2654# Returns a `ParseResult` instance containing the bytes encoded by the
2655# input string `normand`.
2656#
2657# `init_variables` is a dictionary of initial variable names (valid
2658# Python names) to integral values. A variable name must not be the
2659# reserved name `ICITTE`.
2660#
2661# `init_labels` is a dictionary of initial label names (valid Python
2662# names) to integral values. A label name must not be the reserved name
2663# `ICITTE`.
2664#
2665# `init_offset` is the initial offset.
2666#
2667# `init_byte_order` is the initial byte order.
2668#
2669# Raises `ParseError` on any parsing error.
2670def parse(
2671 normand: str,
e57a18e1
PP
2672 init_variables: Optional[VariablesT] = None,
2673 init_labels: Optional[LabelsT] = None,
71aaa3f7
PP
2674 init_offset: int = 0,
2675 init_byte_order: Optional[ByteOrder] = None,
2676):
2677 if init_variables is None:
2678 init_variables = {}
2679
2680 if init_labels is None:
2681 init_labels = {}
2682
320644e2 2683 parser = _Parser(normand, init_variables, init_labels)
71aaa3f7 2684 gen = _Gen(
320644e2
PP
2685 parser.res,
2686 parser.macro_defs,
71aaa3f7
PP
2687 init_variables,
2688 init_labels,
2689 init_offset,
2690 init_byte_order,
2691 )
2692 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2693 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2694 )
2695
2696
f5dcb24c
PP
2697# Raises a command-line error with the message `msg`.
2698def _raise_cli_error(msg: str) -> NoReturn:
2699 raise RuntimeError("Command-line error: {}".format(msg))
2700
2701
b2410769
PP
2702# Returns the `int` or `float` value out of a CLI assignment value.
2703def _val_from_assign_val_str(s: str, is_label: bool):
2704 s = s.strip()
2705
2706 # Floating point number?
2707 if not is_label:
2708 m = _const_float_pat.fullmatch(s)
2709
2710 if m is not None:
2711 return float(m.group(0))
2712
2713 # Integer?
2714 m = _const_int_pat.fullmatch(s)
2715
2716 if m is not None:
2717 return int(_norm_const_int(m.group(0)), 0)
2718
2719 exp = "an integer" if is_label else "a number"
2720 _raise_cli_error("Invalid assignment value `{}`: expecting {}".format(s, exp))
2721
2722
2723# Returns a dictionary of string to numbers from the list of strings
f5dcb24c 2724# `args` containing `NAME=VAL` entries.
7a7b31e8 2725def _dict_from_arg(args: Optional[List[str]], is_label: bool, is_str_only: bool):
b2410769 2726 d = {} # type: VariablesT
f5dcb24c
PP
2727
2728 if args is None:
2729 return d
2730
2731 for arg in args:
7a7b31e8 2732 m = re.match(r"({})\s*=\s*(.*)$".format(_py_name_pat.pattern), arg)
f5dcb24c
PP
2733
2734 if m is None:
b2410769 2735 _raise_cli_error("Invalid assignment `{}`".format(arg))
f5dcb24c 2736
7a7b31e8
PP
2737 if is_str_only:
2738 val = m.group(2)
2739 else:
2740 val = _val_from_assign_val_str(m.group(2), is_label)
2741
2742 d[m.group(1)] = val
f5dcb24c
PP
2743
2744 return d
2745
2746
2747# Parses the command-line arguments and returns, in this order:
2748#
2749# 1. The input file path, or `None` if none.
2750# 2. The Normand input text.
2751# 3. The initial offset.
2752# 4. The initial byte order.
2753# 5. The initial variables.
2754# 6. The initial labels.
71aaa3f7
PP
2755def _parse_cli_args():
2756 import argparse
2757
2758 # Build parser
2759 ap = argparse.ArgumentParser()
2760 ap.add_argument(
2761 "--offset",
2762 metavar="OFFSET",
2763 action="store",
2764 type=int,
2765 default=0,
2766 help="initial offset (positive)",
2767 )
2768 ap.add_argument(
2769 "-b",
2770 "--byte-order",
2771 metavar="BO",
2772 choices=["be", "le"],
2773 type=str,
2774 help="initial byte order (`be` or `le`)",
2775 )
2776 ap.add_argument(
b2410769 2777 "-v",
71aaa3f7
PP
2778 "--var",
2779 metavar="NAME=VAL",
2780 action="append",
7a7b31e8
PP
2781 help="add an initial numeric variable (may be repeated)",
2782 )
2783 ap.add_argument(
2784 "-s",
2785 "--var-str",
2786 metavar="NAME=VAL",
2787 action="append",
2788 help="add an initial string variable (may be repeated)",
71aaa3f7
PP
2789 )
2790 ap.add_argument(
2791 "-l",
2792 "--label",
2793 metavar="NAME=VAL",
2794 action="append",
2795 help="add an initial label (may be repeated)",
2796 )
2797 ap.add_argument(
2798 "--version", action="version", version="Normand {}".format(__version__)
2799 )
2800 ap.add_argument(
2801 "path",
2802 metavar="PATH",
2803 action="store",
2804 nargs="?",
2805 help="input path (none means standard input)",
2806 )
2807
2808 # Parse
f5dcb24c 2809 args = ap.parse_args()
71aaa3f7
PP
2810
2811 # Read input
2812 if args.path is None:
2813 normand = sys.stdin.read()
2814 else:
2815 with open(args.path) as f:
2816 normand = f.read()
2817
2818 # Variables and labels
7a7b31e8
PP
2819 variables = _dict_from_arg(args.var, False, False)
2820 variables.update(_dict_from_arg(args.var_str, False, True))
2821 labels = _dict_from_arg(args.label, True, False)
71aaa3f7
PP
2822
2823 # Validate offset
2824 if args.offset < 0:
2825 _raise_cli_error("Invalid negative offset {}")
2826
2827 # Validate and set byte order
2828 bo = None # type: Optional[ByteOrder]
2829
2830 if args.byte_order is not None:
2831 if args.byte_order == "be":
2832 bo = ByteOrder.BE
2833 else:
2834 assert args.byte_order == "le"
2835 bo = ByteOrder.LE
2836
f5dcb24c 2837 # Return input and initial state
b2410769 2838 return args.path, normand, args.offset, bo, variables, typing.cast(LabelsT, labels)
71aaa3f7 2839
71aaa3f7 2840
f5dcb24c
PP
2841# CLI entry point without exception handling.
2842def _run_cli_with_args(
2843 normand: str,
2844 offset: int,
2845 bo: Optional[ByteOrder],
2846 variables: VariablesT,
2847 labels: LabelsT,
2848):
2849 sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
71aaa3f7
PP
2850
2851
2852# Prints the exception message `msg` and exits with status 1.
2853def _fail(msg: str) -> NoReturn:
2854 if not msg.endswith("."):
2855 msg += "."
2856
f5dcb24c 2857 print(msg.strip(), file=sys.stderr)
71aaa3f7
PP
2858 sys.exit(1)
2859
2860
2861# CLI entry point.
2862def _run_cli():
2863 try:
f5dcb24c
PP
2864 args = _parse_cli_args()
2865 except Exception as exc:
2866 _fail(str(exc))
2867
2868 try:
2869 _run_cli_with_args(*args[1:])
2870 except ParseError as exc:
2871 import os.path
2872
2873 prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
2874 fail_msg = ""
2875
2876 for msg in reversed(exc.messages):
2877 fail_msg += "{}{}:{} - {}".format(
2878 prefix,
2879 msg.text_location.line_no,
2880 msg.text_location.col_no,
2881 msg.text,
2882 )
2883
2884 if fail_msg[-1] not in ".:;":
2885 fail_msg += "."
2886
2887 fail_msg += "\n"
2888
2889 _fail(fail_msg.strip())
71aaa3f7
PP
2890 except Exception as exc:
2891 _fail(str(exc))
2892
2893
2894if __name__ == "__main__":
2895 _run_cli()
This page took 0.145021 seconds and 4 git commands to generate.