2 * Copyright 2016-2019 Philippe Proulx <pproulx@efficios.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "param-parse.h"
28 #include "common/assert.h"
32 #include <babeltrace2/babeltrace.h>
33 #include "common/common.h"
35 #include <sys/types.h>
37 /* INI-style parsing FSM states */
38 enum ini_parsing_fsm_state
{
39 /* Expect a map key (identifier) */
42 /* Expect an equal character (`=`) */
48 /* Expect a comma character (`,`) */
52 /* INI-style parsing state variables */
53 struct ini_parsing_state
{
54 /* Lexical scanner (owned by this) */
57 /* Output map value object being filled (owned by this) */
60 /* Next expected FSM state */
61 enum ini_parsing_fsm_state expecting
;
63 /* Last decoded map key (owned by this) */
64 GString
*last_map_key
;
66 /* Complete INI-style string to parse */
69 /* Error buffer (weak) */
74 * Appends an "expecting token" error to the INI-style parsing state's
78 void ini_append_error_expecting(struct ini_parsing_state
*state
,
79 GScanner
*scanner
, const char *expecting
)
84 g_string_append_printf(state
->ini_error
, "Expecting %s:\n", expecting
);
86 /* Only append error if there's one line */
87 if (strchr(state
->arg
, '\n') || strlen(state
->arg
) == 0) {
91 g_string_append_printf(state
->ini_error
, "\n %s\n", state
->arg
);
92 pos
= g_scanner_cur_position(scanner
) + 4;
94 if (!g_scanner_eof(scanner
)) {
98 for (i
= 0; i
< pos
; ++i
) {
99 g_string_append_printf(state
->ini_error
, " ");
102 g_string_append_printf(state
->ini_error
, "^\n\n");
106 void ini_append_oom_error(GString
*error
)
109 g_string_append(error
, "Out of memory\n");
113 * Parses the next token as an unsigned integer.
116 bt_value
*ini_parse_uint(struct ini_parsing_state
*state
)
118 bt_value
*value
= NULL
;
119 GTokenType token_type
= g_scanner_get_next_token(state
->scanner
);
121 if (token_type
!= G_TOKEN_INT
) {
122 ini_append_error_expecting(state
, state
->scanner
,
127 value
= bt_value_integer_unsigned_create_init(
128 state
->scanner
->value
.v_int64
);
135 * Parses the next token as a number and returns its negation.
138 bt_value
*ini_parse_neg_number(struct ini_parsing_state
*state
)
140 bt_value
*value
= NULL
;
141 GTokenType token_type
= g_scanner_get_next_token(state
->scanner
);
143 switch (token_type
) {
146 /* Negative integer */
147 uint64_t int_val
= state
->scanner
->value
.v_int64
;
149 if (int_val
> (((uint64_t) INT64_MAX
) + 1)) {
150 g_string_append_printf(state
->ini_error
,
151 "Integer value -%" PRIu64
" is outside the range of a 64-bit signed integer\n",
154 value
= bt_value_integer_signed_create_init(
155 -((int64_t) int_val
));
161 /* Negative floating point number */
162 value
= bt_value_real_create_init(
163 -state
->scanner
->value
.v_float
);
166 ini_append_error_expecting(state
, state
->scanner
, "value");
173 static bt_value
*ini_parse_value(struct ini_parsing_state
*state
);
176 * Parses the current and following tokens as an array. Arrays are
177 * formatted as an opening `[`, a list of comma-separated values, and a
178 * closing `]`. For convenience, this function supports an optional
179 * trailing comma after the last value.
181 * The current token of the parser must be the opening square bracket
182 * (`[`) of the array.
185 bt_value
*ini_parse_array(struct ini_parsing_state
*state
)
187 bt_value
*array_value
;
188 GTokenType token_type
;
190 /* The `[` character must have already been ingested */
191 BT_ASSERT(g_scanner_cur_token(state
->scanner
) == G_TOKEN_CHAR
);
192 BT_ASSERT(g_scanner_cur_value(state
->scanner
).v_char
== '[');
194 array_value
= bt_value_array_create ();
196 ini_append_oom_error(state
->ini_error
);
200 token_type
= g_scanner_get_next_token(state
->scanner
);
202 /* While the current token is not a `]` */
203 while (!(token_type
== G_TOKEN_CHAR
&&
204 g_scanner_cur_value(state
->scanner
).v_char
== ']')) {
205 bt_value
*item_value
;
206 bt_value_array_append_element_status append_status
;
208 /* Parse the item... */
209 item_value
= ini_parse_value(state
);
214 /* ... and add it to the result array */
215 append_status
= bt_value_array_append_element(array_value
,
217 BT_VALUE_PUT_REF_AND_RESET(item_value
);
218 if (append_status
< 0) {
223 * Ingest the token following the value. It should be
224 * either a comma or closing square bracket.
226 token_type
= g_scanner_get_next_token(state
->scanner
);
227 if (token_type
== G_TOKEN_CHAR
&&
228 g_scanner_cur_value(state
->scanner
).v_char
== ',') {
230 * Ingest the token following the comma. If it
231 * happens to be a closing square bracket, exit
232 * the loop and we are done (we allow trailing
233 * commas). Otherwise, we are ready for the next
234 * ini_parse_value() call.
236 token_type
= g_scanner_get_next_token(state
->scanner
);
237 } else if (token_type
!= G_TOKEN_CHAR
||
238 g_scanner_cur_value(state
->scanner
).v_char
!= ']') {
239 ini_append_error_expecting(state
, state
->scanner
,
248 BT_VALUE_PUT_REF_AND_RESET(array_value
);
255 * Parses the current and following tokens as a map. Maps are
256 * formatted as an opening `{`, a list of comma-separated entries, and a
257 * closing `}`. And entry is a key (an unquoted string), an equal sign and
258 * a value. For convenience, this function supports an optional trailing comma
259 * after the last value.
261 * The current token of the parser must be the opening curly bracket
262 * (`{`) of the array.
265 bt_value
*ini_parse_map(struct ini_parsing_state
*state
)
268 GTokenType token_type
;
271 /* The `{` character must have already been ingested */
272 BT_ASSERT(g_scanner_cur_token(state
->scanner
) == G_TOKEN_CHAR
);
273 BT_ASSERT(g_scanner_cur_value(state
->scanner
).v_char
== '{');
275 map_value
= bt_value_map_create ();
277 ini_append_oom_error(state
->ini_error
);
281 token_type
= g_scanner_get_next_token(state
->scanner
);
283 /* While the current token is not a `}` */
284 while (!(token_type
== G_TOKEN_CHAR
&&
285 g_scanner_cur_value(state
->scanner
).v_char
== '}')) {
286 bt_value
*entry_value
;
287 bt_value_map_insert_entry_status insert_entry_status
;
289 /* Expect map key. */
290 if (token_type
!= G_TOKEN_IDENTIFIER
) {
291 ini_append_error_expecting(state
, state
->scanner
,
297 key
= g_strdup(g_scanner_cur_value(state
->scanner
).v_identifier
);
299 token_type
= g_scanner_get_next_token(state
->scanner
);
301 /* Expect equal sign. */
302 if (token_type
!= G_TOKEN_CHAR
||
303 g_scanner_cur_value(state
->scanner
).v_char
!= '=') {
304 ini_append_error_expecting(state
,
305 state
->scanner
, "'='");
309 token_type
= g_scanner_get_next_token(state
->scanner
);
311 /* Parse the entry value... */
312 entry_value
= ini_parse_value(state
);
317 /* ... and add it to the result map */
318 insert_entry_status
=
319 bt_value_map_insert_entry(map_value
, key
, entry_value
);
320 BT_VALUE_PUT_REF_AND_RESET(entry_value
);
321 if (insert_entry_status
!= BT_VALUE_MAP_INSERT_ENTRY_STATUS_OK
) {
326 * Ingest the token following the value. It should be
327 * either a comma or closing curly bracket.
329 token_type
= g_scanner_get_next_token(state
->scanner
);
330 if (token_type
== G_TOKEN_CHAR
&&
331 g_scanner_cur_value(state
->scanner
).v_char
== ',') {
333 * Ingest the token following the comma. If it
334 * happens to be a closing curly bracket, exit
335 * the loop and we are done (we allow trailing
336 * commas). Otherwise, we are ready for the next
337 * ini_parse_value() call.
339 token_type
= g_scanner_get_next_token(state
->scanner
);
340 } else if (token_type
!= G_TOKEN_CHAR
||
341 g_scanner_cur_value(state
->scanner
).v_char
!= '}') {
342 ini_append_error_expecting(state
, state
->scanner
,
350 BT_VALUE_PUT_REF_AND_RESET(map_value
);
359 * Parses the current token (and the following ones if needed) as a
360 * value, returning it as a `bt_value *`.
363 bt_value
*ini_parse_value(struct ini_parsing_state
*state
)
365 bt_value
*value
= NULL
;
366 GTokenType token_type
= state
->scanner
->token
;
368 switch (token_type
) {
370 if (state
->scanner
->value
.v_char
== '-') {
371 /* Negative number */
372 value
= ini_parse_neg_number(state
);
373 } else if (state
->scanner
->value
.v_char
== '+') {
374 /* Unsigned integer */
375 value
= ini_parse_uint(state
);
376 } else if (state
->scanner
->value
.v_char
== '[') {
378 value
= ini_parse_array(state
);
379 } else if (state
->scanner
->value
.v_char
== '{') {
381 value
= ini_parse_map(state
);
383 ini_append_error_expecting(state
, state
->scanner
, "value");
390 /* Positive, signed integer */
391 uint64_t int_val
= state
->scanner
->value
.v_int64
;
393 if (int_val
> INT64_MAX
) {
394 g_string_append_printf(state
->ini_error
,
395 "Integer value %" PRIu64
" is outside the range of a 64-bit signed integer\n",
399 value
= bt_value_integer_signed_create_init(
406 /* Positive floating point number */
407 value
= bt_value_real_create_init(state
->scanner
->value
.v_float
);
411 value
= bt_value_string_create_init(state
->scanner
->value
.v_string
);
413 case G_TOKEN_IDENTIFIER
:
416 * Using symbols would be appropriate here, but said
417 * symbols are allowed as map key, so it's easier to
418 * consider everything an identifier.
420 * If one of the known symbols is not recognized here,
421 * then fall back to creating a string value.
423 const char *id
= state
->scanner
->value
.v_identifier
;
425 if (strcmp(id
, "null") == 0 || strcmp(id
, "NULL") == 0 ||
426 strcmp(id
, "nul") == 0) {
427 value
= bt_value_null
;
428 bt_value_get_ref(value
);
429 } else if (strcmp(id
, "true") == 0 || strcmp(id
, "TRUE") == 0 ||
430 strcmp(id
, "yes") == 0 ||
431 strcmp(id
, "YES") == 0) {
432 value
= bt_value_bool_create_init(true);
433 } else if (strcmp(id
, "false") == 0 ||
434 strcmp(id
, "FALSE") == 0 ||
435 strcmp(id
, "no") == 0 ||
436 strcmp(id
, "NO") == 0) {
437 value
= bt_value_bool_create_init(false);
439 value
= bt_value_string_create_init(id
);
444 /* Unset return value variable will trigger the error */
445 ini_append_error_expecting(state
, state
->scanner
, "value");
454 * Handles the current state of the INI parser.
456 * Returns 0 to continue, 1 to end, or a negative value on error.
459 int ini_handle_state(struct ini_parsing_state
*state
)
462 GTokenType token_type
;
463 bt_value
*value
= NULL
;
465 token_type
= g_scanner_get_next_token(state
->scanner
);
466 if (token_type
== G_TOKEN_EOF
) {
467 if (state
->expecting
!= INI_EXPECT_COMMA
) {
468 switch (state
->expecting
) {
469 case INI_EXPECT_EQUAL
:
470 ini_append_error_expecting(state
,
471 state
->scanner
, "`=`");
473 case INI_EXPECT_VALUE
:
474 ini_append_error_expecting(state
,
475 state
->scanner
, "value");
477 case INI_EXPECT_MAP_KEY
:
478 ini_append_error_expecting(state
,
479 state
->scanner
, "unquoted map key");
492 switch (state
->expecting
) {
493 case INI_EXPECT_MAP_KEY
:
494 if (token_type
!= G_TOKEN_IDENTIFIER
) {
495 ini_append_error_expecting(state
, state
->scanner
,
500 g_string_assign(state
->last_map_key
,
501 state
->scanner
->value
.v_identifier
);
503 if (bt_value_map_has_entry(state
->params
,
504 state
->last_map_key
->str
)) {
505 g_string_append_printf(state
->ini_error
,
506 "Duplicate parameter key: `%s`\n",
507 state
->last_map_key
->str
);
511 state
->expecting
= INI_EXPECT_EQUAL
;
513 case INI_EXPECT_EQUAL
:
514 if (token_type
!= G_TOKEN_CHAR
) {
515 ini_append_error_expecting(state
,
516 state
->scanner
, "'='");
520 if (state
->scanner
->value
.v_char
!= '=') {
521 ini_append_error_expecting(state
,
522 state
->scanner
, "'='");
526 state
->expecting
= INI_EXPECT_VALUE
;
528 case INI_EXPECT_VALUE
:
530 value
= ini_parse_value(state
);
535 state
->expecting
= INI_EXPECT_COMMA
;
538 case INI_EXPECT_COMMA
:
539 if (token_type
!= G_TOKEN_CHAR
) {
540 ini_append_error_expecting(state
,
541 state
->scanner
, "','");
545 if (state
->scanner
->value
.v_char
!= ',') {
546 ini_append_error_expecting(state
,
547 state
->scanner
, "','");
551 state
->expecting
= INI_EXPECT_MAP_KEY
;
563 if (bt_value_map_insert_entry(state
->params
,
564 state
->last_map_key
->str
, value
)) {
565 /* Only override return value on error */
571 BT_VALUE_PUT_REF_AND_RESET(value
);
576 * Converts an INI-style argument to an equivalent map value object.
578 * Return value is owned by the caller.
581 bt_value
*bt_param_parse(const char *arg
, GString
*ini_error
)
583 /* Lexical scanner configuration */
584 GScannerConfig scanner_config
= {
585 /* Skip whitespaces */
586 .cset_skip_characters
= (gchar
*) " \t\n",
588 /* Identifier syntax is: [a-zA-Z_][a-zA-Z0-9_.:-]* */
589 .cset_identifier_first
= (gchar
*)
593 .cset_identifier_nth
= (gchar
*)
598 /* "hello" and "Hello" two different keys */
599 .case_sensitive
= TRUE
,
602 .cpair_comment_single
= NULL
,
603 .skip_comment_multi
= TRUE
,
604 .skip_comment_single
= TRUE
,
605 .scan_comment_multi
= FALSE
,
608 * Do scan identifiers, including 1-char identifiers,
609 * but NULL is a normal identifier.
611 .scan_identifier
= TRUE
,
612 .scan_identifier_1char
= TRUE
,
613 .scan_identifier_NULL
= FALSE
,
616 * No specific symbols: null and boolean "symbols" are
617 * scanned as plain identifiers.
619 .scan_symbols
= FALSE
,
620 .symbol_2_token
= FALSE
,
621 .scope_0_fallback
= FALSE
,
624 * Scan "0b"-, "0"-, and "0x"-prefixed integers, but not
625 * integers prefixed with "$".
631 .scan_hex_dollar
= FALSE
,
633 /* Convert scanned numbers to integer tokens */
634 .numbers_2_int
= TRUE
,
636 /* Support both integers and floating point numbers */
637 .int_2_float
= FALSE
,
639 /* Scan integers as 64-bit signed integers */
642 /* Only scan double-quoted strings */
643 .scan_string_sq
= FALSE
,
644 .scan_string_dq
= TRUE
,
646 /* Do not converter identifiers to string tokens */
647 .identifier_2_string
= FALSE
,
649 /* Scan characters as `G_TOKEN_CHAR` token */
650 .char_2_token
= FALSE
,
652 struct ini_parsing_state state
= {
655 .expecting
= INI_EXPECT_MAP_KEY
,
657 .ini_error
= ini_error
,
660 BT_ASSERT(ini_error
);
661 g_string_assign(ini_error
, "");
662 state
.params
= bt_value_map_create();
664 ini_append_oom_error(ini_error
);
668 state
.scanner
= g_scanner_new(&scanner_config
);
669 if (!state
.scanner
) {
670 ini_append_oom_error(ini_error
);
674 state
.last_map_key
= g_string_new(NULL
);
675 if (!state
.last_map_key
) {
676 ini_append_oom_error(ini_error
);
680 /* Let the scan begin */
681 g_scanner_input_text(state
.scanner
, arg
, strlen(arg
));
684 int ret
= ini_handle_state(&state
);
689 } else if (ret
> 0) {
698 BT_VALUE_PUT_REF_AND_RESET(state
.params
);
702 g_scanner_destroy(state
.scanner
);
705 if (state
.last_map_key
) {
706 g_string_free(state
.last_map_key
, TRUE
);