Move CLI params parsing to its own lib
[babeltrace.git] / src / param-parse / param-parse.c
1 /*
2 * Copyright 2016-2019 Philippe Proulx <pproulx@efficios.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include <errno.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include "common/assert.h"
27 #include <stdio.h>
28 #include <stdbool.h>
29 #include <inttypes.h>
30 #include <babeltrace2/babeltrace.h>
31 #include "common/common.h"
32 #include <glib.h>
33 #include <sys/types.h>
34
35 /* INI-style parsing FSM states */
36 enum ini_parsing_fsm_state {
37 /* Expect a map key (identifier) */
38 INI_EXPECT_MAP_KEY,
39
40 /* Expect an equal character (`=`) */
41 INI_EXPECT_EQUAL,
42
43 /* Expect a value */
44 INI_EXPECT_VALUE,
45
46 /* Expect a comma character (`,`) */
47 INI_EXPECT_COMMA,
48 };
49
50 /* INI-style parsing state variables */
51 struct ini_parsing_state {
52 /* Lexical scanner (owned by this) */
53 GScanner *scanner;
54
55 /* Output map value object being filled (owned by this) */
56 bt_value *params;
57
58 /* Next expected FSM state */
59 enum ini_parsing_fsm_state expecting;
60
61 /* Last decoded map key (owned by this) */
62 GString *last_map_key;
63
64 /* Complete INI-style string to parse */
65 const char *arg;
66
67 /* Error buffer (weak) */
68 GString *ini_error;
69 };
70
71 /*
72 * Appends an "expecting token" error to the INI-style parsing state's
73 * error buffer.
74 */
75 static
76 void ini_append_error_expecting(struct ini_parsing_state *state,
77 GScanner *scanner, const char *expecting)
78 {
79 size_t i;
80 size_t pos;
81
82 g_string_append_printf(state->ini_error, "Expecting %s:\n", expecting);
83
84 /* Only append error if there's one line */
85 if (strchr(state->arg, '\n') || strlen(state->arg) == 0) {
86 return;
87 }
88
89 g_string_append_printf(state->ini_error, "\n %s\n", state->arg);
90 pos = g_scanner_cur_position(scanner) + 4;
91
92 if (!g_scanner_eof(scanner)) {
93 pos--;
94 }
95
96 for (i = 0; i < pos; ++i) {
97 g_string_append_printf(state->ini_error, " ");
98 }
99
100 g_string_append_printf(state->ini_error, "^\n\n");
101 }
102
103 static
104 void ini_append_oom_error(GString *error)
105 {
106 BT_ASSERT(error);
107 g_string_append(error, "Out of memory\n");
108 }
109
110 /*
111 * Parses the next token as an unsigned integer.
112 */
113 static
114 bt_value *ini_parse_uint(struct ini_parsing_state *state)
115 {
116 bt_value *value = NULL;
117 GTokenType token_type = g_scanner_get_next_token(state->scanner);
118
119 if (token_type != G_TOKEN_INT) {
120 ini_append_error_expecting(state, state->scanner,
121 "integer value");
122 goto end;
123 }
124
125 value = bt_value_integer_unsigned_create_init(
126 state->scanner->value.v_int64);
127
128 end:
129 return value;
130 }
131
132 /*
133 * Parses the next token as a number and returns its negation.
134 */
135 static
136 bt_value *ini_parse_neg_number(struct ini_parsing_state *state)
137 {
138 bt_value *value = NULL;
139 GTokenType token_type = g_scanner_get_next_token(state->scanner);
140
141 switch (token_type) {
142 case G_TOKEN_INT:
143 {
144 /* Negative integer */
145 uint64_t int_val = state->scanner->value.v_int64;
146
147 if (int_val > (((uint64_t) INT64_MAX) + 1)) {
148 g_string_append_printf(state->ini_error,
149 "Integer value -%" PRIu64 " is outside the range of a 64-bit signed integer\n",
150 int_val);
151 } else {
152 value = bt_value_integer_signed_create_init(
153 -((int64_t) int_val));
154 }
155
156 break;
157 }
158 case G_TOKEN_FLOAT:
159 /* Negative floating point number */
160 value = bt_value_real_create_init(
161 -state->scanner->value.v_float);
162 break;
163 default:
164 ini_append_error_expecting(state, state->scanner, "value");
165 break;
166 }
167
168 return value;
169 }
170
171 static bt_value *ini_parse_value(struct ini_parsing_state *state);
172
173 /*
174 * Parses the current and following tokens as an array. Arrays are
175 * formatted as an opening `[`, a list of comma-separated values, and a
176 * closing `]`. For convenience, this function supports an optional
177 * trailing comma after the last value.
178 *
179 * The current token of the parser must be the opening square bracket
180 * (`[`) of the array.
181 */
182 static
183 bt_value *ini_parse_array(struct ini_parsing_state *state)
184 {
185 bt_value *array_value;
186 GTokenType token_type;
187
188 /* The `[` character must have already been ingested */
189 BT_ASSERT(g_scanner_cur_token(state->scanner) == G_TOKEN_CHAR);
190 BT_ASSERT(g_scanner_cur_value(state->scanner).v_char == '[');
191
192 array_value = bt_value_array_create ();
193 if (!array_value) {
194 ini_append_oom_error(state->ini_error);
195 goto error;
196 }
197
198 token_type = g_scanner_get_next_token(state->scanner);
199
200 /* While the current token is not a `]` */
201 while (!(token_type == G_TOKEN_CHAR &&
202 g_scanner_cur_value(state->scanner).v_char == ']')) {
203 bt_value *item_value;
204 bt_value_array_append_element_status append_status;
205
206 /* Parse the item... */
207 item_value = ini_parse_value(state);
208 if (!item_value) {
209 goto error;
210 }
211
212 /* ... and add it to the result array */
213 append_status = bt_value_array_append_element(array_value,
214 item_value);
215 BT_VALUE_PUT_REF_AND_RESET(item_value);
216 if (append_status < 0) {
217 goto error;
218 }
219
220 /*
221 * Ingest the token following the value. It should be
222 * either a comma or closing square bracket.
223 */
224 token_type = g_scanner_get_next_token(state->scanner);
225 if (token_type == G_TOKEN_CHAR &&
226 g_scanner_cur_value(state->scanner).v_char == ',') {
227 /*
228 * Ingest the token following the comma. If it
229 * happens to be a closing square bracket, exit
230 * the loop and we are done (we allow trailing
231 * commas). Otherwise, we are ready for the next
232 * ini_parse_value() call.
233 */
234 token_type = g_scanner_get_next_token(state->scanner);
235 } else if (token_type != G_TOKEN_CHAR ||
236 g_scanner_cur_value(state->scanner).v_char != ']') {
237 ini_append_error_expecting(state, state->scanner,
238 "`,` or `]`");
239 goto error;
240 }
241 }
242
243 goto end;
244
245 error:
246 BT_VALUE_PUT_REF_AND_RESET(array_value);
247
248 end:
249 return array_value;
250 }
251
252 /*
253 * Parses the current and following tokens as a map. Maps are
254 * formatted as an opening `{`, a list of comma-separated entries, and a
255 * closing `}`. And entry is a key (an unquoted string), an equal sign and
256 * a value. For convenience, this function supports an optional trailing comma
257 * after the last value.
258 *
259 * The current token of the parser must be the opening curly bracket
260 * (`{`) of the array.
261 */
262 static
263 bt_value *ini_parse_map(struct ini_parsing_state *state)
264 {
265 bt_value *map_value;
266 GTokenType token_type;
267 gchar *key = NULL;
268
269 /* The `{` character must have already been ingested */
270 BT_ASSERT(g_scanner_cur_token(state->scanner) == G_TOKEN_CHAR);
271 BT_ASSERT(g_scanner_cur_value(state->scanner).v_char == '{');
272
273 map_value = bt_value_map_create ();
274 if (!map_value) {
275 ini_append_oom_error(state->ini_error);
276 goto error;
277 }
278
279 token_type = g_scanner_get_next_token(state->scanner);
280
281 /* While the current token is not a `}` */
282 while (!(token_type == G_TOKEN_CHAR &&
283 g_scanner_cur_value(state->scanner).v_char == '}')) {
284 bt_value *entry_value;
285 bt_value_map_insert_entry_status insert_entry_status;
286
287 /* Expect map key. */
288 if (token_type != G_TOKEN_IDENTIFIER) {
289 ini_append_error_expecting(state, state->scanner,
290 "unquoted map key");
291 goto error;
292 }
293
294 g_free(key);
295 key = g_strdup(g_scanner_cur_value(state->scanner).v_identifier);
296
297 token_type = g_scanner_get_next_token(state->scanner);
298
299 /* Expect equal sign. */
300 if (token_type != G_TOKEN_CHAR ||
301 g_scanner_cur_value(state->scanner).v_char != '=') {
302 ini_append_error_expecting(state,
303 state->scanner, "'='");
304 goto error;
305 }
306
307 token_type = g_scanner_get_next_token(state->scanner);
308
309 /* Parse the entry value... */
310 entry_value = ini_parse_value(state);
311 if (!entry_value) {
312 goto error;
313 }
314
315 /* ... and add it to the result map */
316 insert_entry_status =
317 bt_value_map_insert_entry(map_value, key, entry_value);
318 BT_VALUE_PUT_REF_AND_RESET(entry_value);
319 if (insert_entry_status != BT_VALUE_MAP_INSERT_ENTRY_STATUS_OK) {
320 goto error;
321 }
322
323 /*
324 * Ingest the token following the value. It should be
325 * either a comma or closing curly bracket.
326 */
327 token_type = g_scanner_get_next_token(state->scanner);
328 if (token_type == G_TOKEN_CHAR &&
329 g_scanner_cur_value(state->scanner).v_char == ',') {
330 /*
331 * Ingest the token following the comma. If it
332 * happens to be a closing curly bracket, exit
333 * the loop and we are done (we allow trailing
334 * commas). Otherwise, we are ready for the next
335 * ini_parse_value() call.
336 */
337 token_type = g_scanner_get_next_token(state->scanner);
338 } else if (token_type != G_TOKEN_CHAR ||
339 g_scanner_cur_value(state->scanner).v_char != '}') {
340 ini_append_error_expecting(state, state->scanner,
341 "`,` or `}`");
342 goto error;
343 }
344 }
345
346 goto end;
347 error:
348 BT_VALUE_PUT_REF_AND_RESET(map_value);
349
350 end:
351 g_free(key);
352
353 return map_value;
354 }
355
356 /*
357 * Parses the current token (and the following ones if needed) as a
358 * value, returning it as a `bt_value *`.
359 */
360 static
361 bt_value *ini_parse_value(struct ini_parsing_state *state)
362 {
363 bt_value *value = NULL;
364 GTokenType token_type = state->scanner->token;
365
366 switch (token_type) {
367 case G_TOKEN_CHAR:
368 if (state->scanner->value.v_char == '-') {
369 /* Negative number */
370 value = ini_parse_neg_number(state);
371 } else if (state->scanner->value.v_char == '+') {
372 /* Unsigned integer */
373 value = ini_parse_uint(state);
374 } else if (state->scanner->value.v_char == '[') {
375 /* Array */
376 value = ini_parse_array(state);
377 } else if (state->scanner->value.v_char == '{') {
378 /* Map */
379 value = ini_parse_map(state);
380 } else {
381 ini_append_error_expecting(state, state->scanner, "value");
382 goto end;
383 }
384
385 break;
386 case G_TOKEN_INT:
387 {
388 /* Positive, signed integer */
389 uint64_t int_val = state->scanner->value.v_int64;
390
391 if (int_val > INT64_MAX) {
392 g_string_append_printf(state->ini_error,
393 "Integer value %" PRIu64 " is outside the range of a 64-bit signed integer\n",
394 int_val);
395 goto end;
396 } else {
397 value = bt_value_integer_signed_create_init(
398 (int64_t) int_val);
399 }
400
401 break;
402 }
403 case G_TOKEN_FLOAT:
404 /* Positive floating point number */
405 value = bt_value_real_create_init(state->scanner->value.v_float);
406 break;
407 case G_TOKEN_STRING:
408 /* Quoted string */
409 value = bt_value_string_create_init(state->scanner->value.v_string);
410 break;
411 case G_TOKEN_IDENTIFIER:
412 {
413 /*
414 * Using symbols would be appropriate here, but said
415 * symbols are allowed as map key, so it's easier to
416 * consider everything an identifier.
417 *
418 * If one of the known symbols is not recognized here,
419 * then fall back to creating a string value.
420 */
421 const char *id = state->scanner->value.v_identifier;
422
423 if (strcmp(id, "null") == 0 || strcmp(id, "NULL") == 0 ||
424 strcmp(id, "nul") == 0) {
425 value = bt_value_null;
426 bt_value_get_ref(value);
427 } else if (strcmp(id, "true") == 0 || strcmp(id, "TRUE") == 0 ||
428 strcmp(id, "yes") == 0 ||
429 strcmp(id, "YES") == 0) {
430 value = bt_value_bool_create_init(true);
431 } else if (strcmp(id, "false") == 0 ||
432 strcmp(id, "FALSE") == 0 ||
433 strcmp(id, "no") == 0 ||
434 strcmp(id, "NO") == 0) {
435 value = bt_value_bool_create_init(false);
436 } else {
437 value = bt_value_string_create_init(id);
438 }
439 break;
440 }
441 default:
442 /* Unset return value variable will trigger the error */
443 ini_append_error_expecting(state, state->scanner, "value");
444 break;
445 }
446
447 end:
448 return value;
449 }
450
451 /*
452 * Handles the current state of the INI parser.
453 *
454 * Returns 0 to continue, 1 to end, or a negative value on error.
455 */
456 static
457 int ini_handle_state(struct ini_parsing_state *state)
458 {
459 int ret = 0;
460 GTokenType token_type;
461 bt_value *value = NULL;
462
463 token_type = g_scanner_get_next_token(state->scanner);
464 if (token_type == G_TOKEN_EOF) {
465 if (state->expecting != INI_EXPECT_COMMA) {
466 switch (state->expecting) {
467 case INI_EXPECT_EQUAL:
468 ini_append_error_expecting(state,
469 state->scanner, "`=`");
470 break;
471 case INI_EXPECT_VALUE:
472 ini_append_error_expecting(state,
473 state->scanner, "value");
474 break;
475 case INI_EXPECT_MAP_KEY:
476 ini_append_error_expecting(state,
477 state->scanner, "unquoted map key");
478 break;
479 default:
480 break;
481 }
482 goto error;
483 }
484
485 /* We're done! */
486 ret = 1;
487 goto success;
488 }
489
490 switch (state->expecting) {
491 case INI_EXPECT_MAP_KEY:
492 if (token_type != G_TOKEN_IDENTIFIER) {
493 ini_append_error_expecting(state, state->scanner,
494 "unquoted map key");
495 goto error;
496 }
497
498 g_string_assign(state->last_map_key,
499 state->scanner->value.v_identifier);
500
501 if (bt_value_map_has_entry(state->params,
502 state->last_map_key->str)) {
503 g_string_append_printf(state->ini_error,
504 "Duplicate parameter key: `%s`\n",
505 state->last_map_key->str);
506 goto error;
507 }
508
509 state->expecting = INI_EXPECT_EQUAL;
510 goto success;
511 case INI_EXPECT_EQUAL:
512 if (token_type != G_TOKEN_CHAR) {
513 ini_append_error_expecting(state,
514 state->scanner, "'='");
515 goto error;
516 }
517
518 if (state->scanner->value.v_char != '=') {
519 ini_append_error_expecting(state,
520 state->scanner, "'='");
521 goto error;
522 }
523
524 state->expecting = INI_EXPECT_VALUE;
525 goto success;
526 case INI_EXPECT_VALUE:
527 {
528 value = ini_parse_value(state);
529 if (!value) {
530 goto error;
531 }
532
533 state->expecting = INI_EXPECT_COMMA;
534 goto success;
535 }
536 case INI_EXPECT_COMMA:
537 if (token_type != G_TOKEN_CHAR) {
538 ini_append_error_expecting(state,
539 state->scanner, "','");
540 goto error;
541 }
542
543 if (state->scanner->value.v_char != ',') {
544 ini_append_error_expecting(state,
545 state->scanner, "','");
546 goto error;
547 }
548
549 state->expecting = INI_EXPECT_MAP_KEY;
550 goto success;
551 default:
552 abort();
553 }
554
555 error:
556 ret = -1;
557 goto end;
558
559 success:
560 if (value) {
561 if (bt_value_map_insert_entry(state->params,
562 state->last_map_key->str, value)) {
563 /* Only override return value on error */
564 ret = -1;
565 }
566 }
567
568 end:
569 BT_VALUE_PUT_REF_AND_RESET(value);
570 return ret;
571 }
572
573 /*
574 * Converts an INI-style argument to an equivalent map value object.
575 *
576 * Return value is owned by the caller.
577 */
578 BT_HIDDEN
579 bt_value *bt_param_parse(const char *arg, GString *ini_error)
580 {
581 /* Lexical scanner configuration */
582 GScannerConfig scanner_config = {
583 /* Skip whitespaces */
584 .cset_skip_characters = " \t\n",
585
586 /* Identifier syntax is: [a-zA-Z_][a-zA-Z0-9_.:-]* */
587 .cset_identifier_first =
588 G_CSET_a_2_z
589 "_"
590 G_CSET_A_2_Z,
591 .cset_identifier_nth =
592 G_CSET_a_2_z
593 "_0123456789-.:"
594 G_CSET_A_2_Z,
595
596 /* "hello" and "Hello" two different keys */
597 .case_sensitive = TRUE,
598
599 /* No comments */
600 .cpair_comment_single = NULL,
601 .skip_comment_multi = TRUE,
602 .skip_comment_single = TRUE,
603 .scan_comment_multi = FALSE,
604
605 /*
606 * Do scan identifiers, including 1-char identifiers,
607 * but NULL is a normal identifier.
608 */
609 .scan_identifier = TRUE,
610 .scan_identifier_1char = TRUE,
611 .scan_identifier_NULL = FALSE,
612
613 /*
614 * No specific symbols: null and boolean "symbols" are
615 * scanned as plain identifiers.
616 */
617 .scan_symbols = FALSE,
618 .symbol_2_token = FALSE,
619 .scope_0_fallback = FALSE,
620
621 /*
622 * Scan "0b"-, "0"-, and "0x"-prefixed integers, but not
623 * integers prefixed with "$".
624 */
625 .scan_binary = TRUE,
626 .scan_octal = TRUE,
627 .scan_float = TRUE,
628 .scan_hex = TRUE,
629 .scan_hex_dollar = FALSE,
630
631 /* Convert scanned numbers to integer tokens */
632 .numbers_2_int = TRUE,
633
634 /* Support both integers and floating point numbers */
635 .int_2_float = FALSE,
636
637 /* Scan integers as 64-bit signed integers */
638 .store_int64 = TRUE,
639
640 /* Only scan double-quoted strings */
641 .scan_string_sq = FALSE,
642 .scan_string_dq = TRUE,
643
644 /* Do not converter identifiers to string tokens */
645 .identifier_2_string = FALSE,
646
647 /* Scan characters as `G_TOKEN_CHAR` token */
648 .char_2_token = FALSE,
649 };
650 struct ini_parsing_state state = {
651 .scanner = NULL,
652 .params = NULL,
653 .expecting = INI_EXPECT_MAP_KEY,
654 .arg = arg,
655 .ini_error = ini_error,
656 };
657
658 BT_ASSERT(ini_error);
659 g_string_assign(ini_error, "");
660 state.params = bt_value_map_create();
661 if (!state.params) {
662 ini_append_oom_error(ini_error);
663 goto error;
664 }
665
666 state.scanner = g_scanner_new(&scanner_config);
667 if (!state.scanner) {
668 ini_append_oom_error(ini_error);
669 goto error;
670 }
671
672 state.last_map_key = g_string_new(NULL);
673 if (!state.last_map_key) {
674 ini_append_oom_error(ini_error);
675 goto error;
676 }
677
678 /* Let the scan begin */
679 g_scanner_input_text(state.scanner, arg, strlen(arg));
680
681 while (true) {
682 int ret = ini_handle_state(&state);
683
684 if (ret < 0) {
685 /* Error */
686 goto error;
687 } else if (ret > 0) {
688 /* Done */
689 break;
690 }
691 }
692
693 goto end;
694
695 error:
696 BT_VALUE_PUT_REF_AND_RESET(state.params);
697
698 end:
699 if (state.scanner) {
700 g_scanner_destroy(state.scanner);
701 }
702
703 if (state.last_map_key) {
704 g_string_free(state.last_map_key, TRUE);
705 }
706
707 return state.params;
708 }
This page took 0.044001 seconds and 5 git commands to generate.