From: Philippe Proulx <eeppeliteloop@gmail.com>
Date: Fri, 6 Oct 2023 15:24:53 +0000 (-0400)
Subject: Make `normand.ParseError` contain a list of messages
X-Git-Tag: v0.15.0
X-Git-Url: https://git.efficios.com/?a=commitdiff_plain;h=f5dcb24cf44ba80ff91c48bb607f6761a37838c3;p=normand.git

Make `normand.ParseError` contain a list of messages

A `normand.ParseError` instance now contains a list of
`normand.ParseErrorMessage`.

A `normand.ParseErrorMessage` instance contains a message text and a
source text location.

This adds precious context to a parsing error.

For example, with

    !macro meow(yeah)
      {yeah:8}
    !end

    !macro mix(yeah)
      aa bb m:meow({yeah * 2})
    !end

    m:mix(12)
    "hello" m:mix(899)
    m:mix(16)
    m:mix(19)

we now get

    10:9 - While expanding the macro `mix`:
    6:9 - While expanding the macro `meow`:
    2:4 - Value 1,798 is outside the 8-bit range when evaluating
          expression `yeah`

Without this patch, the only available message would be the last one,
and you wouldn't know which macro expansion(s) triggered the parsing
error.

The CLI and `tests/conftest.py` are modified to take multiple parsing
error messages into account.

There was a little challenge with fixed-length number item instances
handled after the rest (in _Gen._gen_fl_num_item_insts()): at this
point, there's no current try/except context for macro expansions
because they're already handled. My current strategy is to keep a
current stack of parsing error messages (`self._parse_error_msgs`)
during the generation: when the generator initially fails to evaluate
the expression of a fixed-length number item, it copies a snapshot of
those messages to the `_FlNumItemInst` object so that we can restore
them if there's a parsing error later during
_Gen._gen_fl_num_item_insts().

Adding two nested macro expansion test to make sure we get all the
expected parsing error messages. Other tests are unchanged (single
parsing error message).

Change-Id: Iba8499608f86165e02d6d040795222cafcbca4a9
Signed-off-by: Philippe Proulx <eeppeliteloop@gmail.com>
---

diff --git a/README.adoc b/README.adoc
index 0e5c506..89731e6 100644
--- a/README.adoc
+++ b/README.adoc
@@ -29,7 +29,7 @@ _**Normand**_ is a text-to-binary processor with its own language.
 This package offers both a portable {py3} module and a command-line
 tool.
 
-WARNING: This version of Normand is 0.14, meaning both the Normand
+WARNING: This version of Normand is 0.15, meaning both the Normand
 language and the module/CLI interface aren't stable.
 
 ifdef::env-github[]
@@ -344,6 +344,8 @@ Precise error reporting::
 ----
 +
 ----
+/tmp/meow.normand:32:19 - While expanding the macro `meow`:
+/tmp/meow.normand:35:5 - While expanding the macro `zzz`:
 /tmp/meow.normand:18:9 - Value 315 is outside the 8-bit range when evaluating expression `end - ICITTE`.
 ----
 
@@ -1887,11 +1889,26 @@ class TextLocation:
         ...
 
 
+# Parsing error message.
+class ParseErrorMessage:
+    # Message text.
+    @property
+    def text(self):
+        ...
+
+    # Source text location.
+    @property
+    def text_location(self):
+        ...
+
+
 # Parsing error.
 class ParseError(RuntimeError):
-    # Source text location.
+    # Parsing error messages.
+    #
+    # The first message is the most _specific_ one.
     @property
-    def text_loc(self) -> TextLocation:
+    def messages(self):
         ...
 
 
diff --git a/normand/normand.py b/normand/normand.py
index b712150..4699a28 100644
--- a/normand/normand.py
+++ b/normand/normand.py
@@ -30,7 +30,7 @@
 # Upstream repository: <https://github.com/efficios/normand>.
 
 __author__ = "Philippe Proulx"
-__version__ = "0.14.0"
+__version__ = "0.15.0"
 __all__ = [
     "__author__",
     "__version__",
@@ -38,6 +38,7 @@ __all__ = [
     "LabelsT",
     "parse",
     "ParseError",
+    "ParseErrorMessage",
     "ParseResult",
     "TextLocation",
     "VariablesT",
@@ -504,7 +505,33 @@ class _MacroExp(_Item, _RepableItem):
         )
 
 
-# A parsing error containing a message and a text location.
+# A parsing error message: a string and a text location.
+class ParseErrorMessage:
+    @classmethod
+    def _create(cls, text: str, text_loc: TextLocation):
+        self = cls.__new__(cls)
+        self._init(text, text_loc)
+        return self
+
+    def __init__(self, *args, **kwargs):  # type: ignore
+        raise NotImplementedError
+
+    def _init(self, text: str, text_loc: TextLocation):
+        self._text = text
+        self._text_loc = text_loc
+
+    # Message text.
+    @property
+    def text(self):
+        return self._text
+
+    # Source text location.
+    @property
+    def text_location(self):
+        return self._text_loc
+
+
+# A parsing error containing one or more messages (`ParseErrorMessage`).
 class ParseError(RuntimeError):
     @classmethod
     def _create(cls, msg: str, text_loc: TextLocation):
@@ -517,12 +544,22 @@ class ParseError(RuntimeError):
 
     def _init(self, msg: str, text_loc: TextLocation):
         super().__init__(msg)
-        self._text_loc = text_loc
+        self._msgs = []  # type: List[ParseErrorMessage]
+        self._add_msg(msg, text_loc)
 
-    # Source text location.
+    def _add_msg(self, msg: str, text_loc: TextLocation):
+        self._msgs.append(
+            ParseErrorMessage._create(  # pyright: ignore[reportPrivateUsage]
+                msg, text_loc
+            )
+        )
+
+    # Parsing error messages.
+    #
+    # The first message is the most specific one.
     @property
-    def text_loc(self):
-        return self._text_loc
+    def messages(self):
+        return self._msgs
 
 
 # Raises a parsing error, forwarding the parameters to the constructor.
@@ -530,6 +567,17 @@ def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
     raise ParseError._create(msg, text_loc)  # pyright: ignore[reportPrivateUsage]
 
 
+# Adds a message to the parsing error `exc`.
+def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
+    exc._add_msg(msg, text_loc)  # pyright: ignore[reportPrivateUsage]
+
+
+# Appends a message to the parsing error `exc` and reraises it.
+def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
+    _add_error_msg(exc, msg, text_loc)
+    raise exc
+
+
 # Variables dictionary type (for type hints).
 VariablesT = Dict[str, Union[int, float]]
 
@@ -1809,10 +1857,17 @@ class _GenState:
 
 # Fixed-length number item instance.
 class _FlNumItemInst:
-    def __init__(self, item: _FlNum, offset_in_data: int, state: _GenState):
+    def __init__(
+        self,
+        item: _FlNum,
+        offset_in_data: int,
+        state: _GenState,
+        parse_error_msgs: List[ParseErrorMessage],
+    ):
         self._item = item
         self._offset_in_data = offset_in_data
         self._state = state
+        self._parse_error_msgs = parse_error_msgs
 
     @property
     def item(self):
@@ -1826,6 +1881,10 @@ class _FlNumItemInst:
     def state(self):
         return self._state
 
+    @property
+    def parse_error_msgs(self):
+        return self._parse_error_msgs
+
 
 # Generator of data and final state from a group item.
 #
@@ -1845,7 +1904,10 @@ class _FlNumItemInst:
 #    because the expression refers to a "future" label: save the current
 #    offset in `self._data` (generated data) and a snapshot of the
 #    current state within `self._fl_num_item_insts` (`_FlNumItemInst`
-#    object). _gen_fl_num_item_insts() will deal with this later.
+#    object). _gen_fl_num_item_insts() will deal with this later. A
+#    `_FlNumItemInst` instance also contains a snapshot of the current
+#    parsing error messages (`self._parse_error_msgs`) which need to be
+#    taken into account when handling the instance later.
 #
 #    When handling the items of a group, keep a map of immediate label
 #    names to their offset. Then, after having processed all the items,
@@ -1861,7 +1923,10 @@ class _FlNumItemInst:
 #    "future" labels from the point of view of some fixed-length number
 #    item instance.
 #
-#    If an evaluation fails at this point, then it's a user error.
+#    If an evaluation fails at this point, then it's a user error. Add
+#    to the parsing error all the saved parsing error messages of the
+#    instance. Those additional messages add precious context to the
+#    error.
 class _Gen:
     def __init__(
         self,
@@ -1874,6 +1939,7 @@ class _Gen:
     ):
         self._macro_defs = macro_defs
         self._fl_num_item_insts = []  # type: List[_FlNumItemInst]
+        self._parse_error_msgs = []  # type: List[ParseErrorMessage]
         self._gen(group, _GenState(variables, labels, offset, bo))
 
     # Generated bytes.
@@ -2010,7 +2076,12 @@ class _Gen:
             data = self._gen_fl_num_item_inst_data(item, state)
         except Exception:
             self._fl_num_item_insts.append(
-                _FlNumItemInst(item, len(self._data), copy.deepcopy(state))
+                _FlNumItemInst(
+                    item,
+                    len(self._data),
+                    copy.deepcopy(state),
+                    copy.deepcopy(self._parse_error_msgs),
+                )
             )
 
             # Reserve space in `self._data` for this instance
@@ -2136,12 +2207,24 @@ class _Gen:
 
     # Handles the macro expansion item `item`.
     def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
-        # New state
-        exp_state = self._eval_macro_exp_params(item, state)
+        parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
 
-        # Process the contained group
-        init_data_size = len(self._data)
-        self._handle_item(self._macro_defs[item.name].group, exp_state)
+        try:
+            # New state
+            exp_state = self._eval_macro_exp_params(item, state)
+
+            # Process the contained group
+            init_data_size = len(self._data)
+            parse_error_msg = (
+                ParseErrorMessage._create(  # pyright: ignore[reportPrivateUsage]
+                    parse_error_msg_text, item.text_loc
+                )
+            )
+            self._parse_error_msgs.append(parse_error_msg)
+            self._handle_item(self._macro_defs[item.name].group, exp_state)
+            self._parse_error_msgs.pop()
+        except ParseError as exc:
+            _augment_error(exc, parse_error_msg_text, item.text_loc)
 
         # Update state offset and return
         state.offset += len(self._data) - init_data_size
@@ -2261,7 +2344,15 @@ class _Gen:
     def _gen_fl_num_item_insts(self):
         for inst in self._fl_num_item_insts:
             # Generate bytes
-            data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
+            try:
+                data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
+            except ParseError as exc:
+                # Add all the saved parse error messages for this
+                # instance.
+                for msg in reversed(inst.parse_error_msgs):
+                    _add_error_msg(exc, msg.text, msg.text_location)
+
+                raise
 
             # Insert bytes into `self._data`
             self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
@@ -2346,7 +2437,38 @@ def parse(
     )
 
 
-# Parses the command-line arguments.
+# Raises a command-line error with the message `msg`.
+def _raise_cli_error(msg: str) -> NoReturn:
+    raise RuntimeError("Command-line error: {}".format(msg))
+
+
+# Returns a dictionary of string to integers from the list of strings
+# `args` containing `NAME=VAL` entries.
+def _dict_from_arg(args: Optional[List[str]]):
+    d = {}  # type: LabelsT
+
+    if args is None:
+        return d
+
+    for arg in args:
+        m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
+
+        if m is None:
+            _raise_cli_error("Invalid assignment {}".format(arg))
+
+        d[m.group(1)] = int(m.group(2))
+
+    return d
+
+
+# Parses the command-line arguments and returns, in this order:
+#
+# 1. The input file path, or `None` if none.
+# 2. The Normand input text.
+# 3. The initial offset.
+# 4. The initial byte order.
+# 5. The initial variables.
+# 6. The initial labels.
 def _parse_cli_args():
     import argparse
 
@@ -2393,39 +2515,7 @@ def _parse_cli_args():
     )
 
     # Parse
-    return ap.parse_args()
-
-
-# Raises a command-line error with the message `msg`.
-def _raise_cli_error(msg: str) -> NoReturn:
-    raise RuntimeError("Command-line error: {}".format(msg))
-
-
-# Returns a dictionary of string to integers from the list of strings
-# `args` containing `NAME=VAL` entries.
-def _dict_from_arg(args: Optional[List[str]]):
-    d = {}  # type: LabelsT
-
-    if args is None:
-        return d
-
-    for arg in args:
-        m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
-
-        if m is None:
-            _raise_cli_error("Invalid assignment {}".format(arg))
-
-        d[m.group(1)] = int(m.group(2))
-
-    return d
-
-
-# CLI entry point without exception handling.
-def _try_run_cli():
-    import os.path
-
-    # Parse arguments
-    args = _parse_cli_args()
+    args = ap.parse_args()
 
     # Read input
     if args.path is None:
@@ -2452,23 +2542,19 @@ def _try_run_cli():
             assert args.byte_order == "le"
             bo = ByteOrder.LE
 
-    # Parse
-    try:
-        res = parse(normand, variables, labels, args.offset, bo)
-    except ParseError as exc:
-        prefix = ""
+    # Return input and initial state
+    return args.path, normand, args.offset, bo, variables, labels
 
-        if args.path is not None:
-            prefix = "{}:".format(os.path.abspath(args.path))
-
-        _fail(
-            "{}{}:{} - {}".format(
-                prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
-            )
-        )
 
-    # Print
-    sys.stdout.buffer.write(res.data)
+# CLI entry point without exception handling.
+def _run_cli_with_args(
+    normand: str,
+    offset: int,
+    bo: Optional[ByteOrder],
+    variables: VariablesT,
+    labels: LabelsT,
+):
+    sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
 
 
 # Prints the exception message `msg` and exits with status 1.
@@ -2476,14 +2562,39 @@ def _fail(msg: str) -> NoReturn:
     if not msg.endswith("."):
         msg += "."
 
-    print(msg, file=sys.stderr)
+    print(msg.strip(), file=sys.stderr)
     sys.exit(1)
 
 
 # CLI entry point.
 def _run_cli():
     try:
-        _try_run_cli()
+        args = _parse_cli_args()
+    except Exception as exc:
+        _fail(str(exc))
+
+    try:
+        _run_cli_with_args(*args[1:])
+    except ParseError as exc:
+        import os.path
+
+        prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
+        fail_msg = ""
+
+        for msg in reversed(exc.messages):
+            fail_msg += "{}{}:{} - {}".format(
+                prefix,
+                msg.text_location.line_no,
+                msg.text_location.col_no,
+                msg.text,
+            )
+
+            if fail_msg[-1] not in ".:;":
+                fail_msg += "."
+
+            fail_msg += "\n"
+
+        _fail(fail_msg.strip())
     except Exception as exc:
         _fail(str(exc))
 
diff --git a/pyproject.toml b/pyproject.toml
index 02ffe6d..03dd81e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,7 +23,7 @@
 
 [tool.poetry]
 name = 'normand'
-version = '0.14.0'
+version = '0.15.0'
 description = 'Text-to-binary processor with its own language'
 license = 'MIT'
 authors = ['Philippe Proulx <eeppeliteloop@gmail.com>']
diff --git a/tests/conftest.py b/tests/conftest.py
index c5d4179..0c90e2a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -77,10 +77,14 @@ class _NormandTestItemFail(_NormandTestItem):
             normand.parse(normand_text)
 
         exc = exc_info.value
-        expected_msg = "{}:{} - {}".format(
-            exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
-        )
-        assert output.strip() == expected_msg
+        expected_msg = ''
+
+        for msg in reversed(exc.messages):
+            expected_msg += "{}:{} - {}\n".format(
+                msg.text_location.line_no, msg.text_location.col_no, msg.text
+            )
+
+        assert output.strip() == expected_msg.strip()
 
 
 class _NormandTestItemPass(_NormandTestItem):
diff --git a/tests/fail-macro-exp-nested-1.nt b/tests/fail-macro-exp-nested-1.nt
new file mode 100644
index 0000000..f6e5f49
--- /dev/null
+++ b/tests/fail-macro-exp-nested-1.nt
@@ -0,0 +1,16 @@
+!macro meow(yeah)
+  {yeah:8}
+!end
+
+!macro mix(yeah)
+  aa bb m:meow({yeah * 2})
+!end
+
+m:mix(12)
+"hello" m:mix(899)
+m:mix(16)
+m:mix(19)
+---
+10:9 - While expanding the macro `mix`:
+6:9 - While expanding the macro `meow`:
+2:4 - Value 1,798 is outside the 8-bit range when evaluating expression `yeah`
diff --git a/tests/fail-macro-exp-nested-2.nt b/tests/fail-macro-exp-nested-2.nt
new file mode 100644
index 0000000..1a3aa9e
--- /dev/null
+++ b/tests/fail-macro-exp-nested-2.nt
@@ -0,0 +1,13 @@
+!macro meow()
+  {1993:8}
+!end
+
+!macro mix()
+  aa bb m:meow()
+!end
+
+"hello" m:mix()
+---
+9:9 - While expanding the macro `mix`:
+6:9 - While expanding the macro `meow`:
+2:4 - Value 1,993 is outside the 8-bit range when evaluating expression `1993`