5 * LTTng filter expression parser
7 * Copyright 2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
9 * SPDX-License-Identifier: LGPL-2.1-only
11 * Grammar inspired from http://www.quut.com/c/ANSI-C-grammar-y.html
21 #include "filter-ast.h"
22 #include "filter-parser.h"
23 #include "filter-bytecode.h"
24 #include "memstream.h"
26 #include <common/macros.h>
28 #define WIDTH_u64_SCANF_IS_A_BROKEN_API "20"
29 #define WIDTH_o64_SCANF_IS_A_BROKEN_API "22"
30 #define WIDTH_x64_SCANF_IS_A_BROKEN_API "17"
31 #define WIDTH_lg_SCANF_IS_A_BROKEN_API "4096" /* Hugely optimistic approximation */
34 static const int print_xml = 1;
35 #define dbg_printf(fmt, args...) \
36 printf("[debug filter_parser] " fmt, ## args)
38 static const int print_xml = 0;
39 #define dbg_printf(fmt, args...) \
41 /* do nothing but check printf format */ \
43 printf("[debug filter_parser] " fmt, ## args); \
50 int filter_parser_debug = 0;
53 int yyparse(struct filter_parser_ctx *parser_ctx, yyscan_t scanner);
55 int yylex(union YYSTYPE *yyval, yyscan_t scanner);
57 int yylex_init_extra(struct filter_parser_ctx *parser_ctx, yyscan_t * ptr_yy_globals);
59 int yylex_destroy(yyscan_t yyparser_ctx);
61 void yyrestart(FILE * in_str, yyscan_t parser_ctx);
64 struct cds_list_head gc;
69 static const char *node_type_to_str[] = {
70 [ NODE_UNKNOWN ] = "NODE_UNKNOWN",
71 [ NODE_ROOT ] = "NODE_ROOT",
72 [ NODE_EXPRESSION ] = "NODE_EXPRESSION",
73 [ NODE_OP ] = "NODE_OP",
74 [ NODE_UNARY_OP ] = "NODE_UNARY_OP",
78 const char *node_type(struct filter_node *node)
80 if (node->type < NR_NODE_TYPES)
81 return node_type_to_str[node->type];
86 static struct gc_string *gc_string_alloc(struct filter_parser_ctx *parser_ctx,
89 struct gc_string *gstr;
92 /* TODO: could be faster with find first bit or glib Gstring */
93 /* sizeof long to account for malloc header (int or long ?) */
94 for (alloclen = 8; alloclen < sizeof(long) + sizeof(*gstr) + len;
97 gstr = zmalloc(alloclen);
101 cds_list_add(&gstr->gc, &parser_ctx->allocated_strings);
102 gstr->alloclen = alloclen;
108 * note: never use gc_string_append on a string that has external references.
109 * gsrc will be garbage collected immediately, and gstr might be.
110 * Should only be used to append characters to a string literal or constant.
113 struct gc_string *gc_string_append(struct filter_parser_ctx *parser_ctx,
114 struct gc_string *gstr,
115 struct gc_string *gsrc)
117 size_t newlen = strlen(gsrc->s) + strlen(gstr->s) + 1;
120 /* TODO: could be faster with find first bit or glib Gstring */
121 /* sizeof long to account for malloc header (int or long ?) */
122 for (alloclen = 8; alloclen < sizeof(long) + sizeof(*gstr) + newlen;
125 if (alloclen > gstr->alloclen) {
126 struct gc_string *newgstr;
128 newgstr = gc_string_alloc(parser_ctx, newlen);
129 strcpy(newgstr->s, gstr->s);
130 strcat(newgstr->s, gsrc->s);
131 cds_list_del(&gstr->gc);
135 strcat(gstr->s, gsrc->s);
137 cds_list_del(&gsrc->gc);
143 void setstring(struct filter_parser_ctx *parser_ctx, YYSTYPE *lvalp, const char *src);
145 void setstring(struct filter_parser_ctx *parser_ctx, YYSTYPE *lvalp, const char *src)
147 lvalp->gs = gc_string_alloc(parser_ctx, strlen(src) + 1);
148 strcpy(lvalp->gs->s, src);
151 static struct filter_node *make_node(struct filter_parser_ctx *scanner,
154 struct filter_ast *ast = filter_parser_get_ast(scanner);
155 struct filter_node *node;
157 node = zmalloc(sizeof(*node));
160 memset(node, 0, sizeof(*node));
162 cds_list_add(&node->gc, &ast->allocated_nodes);
166 fprintf(stderr, "[error] %s: trying to create root node\n", __func__);
169 case NODE_EXPRESSION:
178 fprintf(stderr, "[error] %s: unknown node type %d\n", __func__,
186 static struct filter_node *make_op_node(struct filter_parser_ctx *scanner,
188 struct filter_node *lchild,
189 struct filter_node *rchild)
191 struct filter_ast *ast = filter_parser_get_ast(scanner);
192 struct filter_node *node;
194 node = zmalloc(sizeof(*node));
197 memset(node, 0, sizeof(*node));
198 node->type = NODE_OP;
199 cds_list_add(&node->gc, &ast->allocated_nodes);
200 node->u.op.type = type;
201 node->u.op.lchild = lchild;
202 node->u.op.rchild = rchild;
207 void yyerror(struct filter_parser_ctx *parser_ctx, yyscan_t scanner, const char *str);
209 void yyerror(struct filter_parser_ctx *parser_ctx, yyscan_t scanner, const char *str)
211 fprintf(stderr, "error %s\n", str);
222 #define parse_error(parser_ctx, str) \
224 yyerror(parser_ctx, parser_ctx->scanner, YY_("parse error: " str "\n")); \
228 static void free_strings(struct cds_list_head *list)
230 struct gc_string *gstr, *tmp;
232 cds_list_for_each_entry_safe(gstr, tmp, list, gc)
236 static struct filter_ast *filter_ast_alloc(void)
238 struct filter_ast *ast;
240 ast = zmalloc(sizeof(*ast));
243 memset(ast, 0, sizeof(*ast));
244 CDS_INIT_LIST_HEAD(&ast->allocated_nodes);
245 ast->root.type = NODE_ROOT;
249 static void filter_ast_free(struct filter_ast *ast)
251 struct filter_node *node, *tmp;
253 cds_list_for_each_entry_safe(node, tmp, &ast->allocated_nodes, gc)
259 int filter_parser_ctx_append_ast(struct filter_parser_ctx *parser_ctx)
261 return yyparse(parser_ctx, parser_ctx->scanner);
265 struct filter_parser_ctx *filter_parser_ctx_alloc(FILE *input)
267 struct filter_parser_ctx *parser_ctx;
270 yydebug = filter_parser_debug;
272 parser_ctx = zmalloc(sizeof(*parser_ctx));
275 memset(parser_ctx, 0, sizeof(*parser_ctx));
277 ret = yylex_init_extra(parser_ctx, &parser_ctx->scanner);
279 fprintf(stderr, "yylex_init error\n");
280 goto cleanup_parser_ctx;
282 /* Start processing new stream */
283 yyrestart(input, parser_ctx->scanner);
285 parser_ctx->ast = filter_ast_alloc();
286 if (!parser_ctx->ast)
288 CDS_INIT_LIST_HEAD(&parser_ctx->allocated_strings);
291 fprintf(stdout, "parser_ctx input is a%s.\n",
292 isatty(fileno(input)) ? "n interactive tty" :
293 " noninteractive file");
298 ret = yylex_destroy(parser_ctx->scanner);
300 fprintf(stderr, "yylex_destroy error\n");
307 void filter_parser_ctx_free(struct filter_parser_ctx *parser_ctx)
311 free_strings(&parser_ctx->allocated_strings);
312 filter_ast_free(parser_ctx->ast);
313 ret = yylex_destroy(parser_ctx->scanner);
315 fprintf(stderr, "yylex_destroy error\n");
320 int filter_parser_ctx_create_from_filter_expression(
321 const char *filter_expression, struct filter_parser_ctx **ctxp)
324 struct filter_parser_ctx *ctx = NULL;
327 assert(filter_expression);
331 * Casting const to non-const, as the underlying function will use it in
334 fmem = lttng_fmemopen((void *) filter_expression,
335 strlen(filter_expression), "r");
337 fprintf(stderr, "Error opening memory as stream\n");
338 ret = -LTTNG_ERR_FILTER_NOMEM;
341 ctx = filter_parser_ctx_alloc(fmem);
343 fprintf(stderr, "Error allocating parser\n");
344 ret = -LTTNG_ERR_FILTER_NOMEM;
345 goto filter_alloc_error;
347 ret = filter_parser_ctx_append_ast(ctx);
349 fprintf(stderr, "Parse error\n");
350 ret = -LTTNG_ERR_FILTER_INVAL;
354 ret = filter_visitor_print_xml(ctx, stdout, 0);
357 fprintf(stderr, "XML print error\n");
358 ret = -LTTNG_ERR_FILTER_INVAL;
363 dbg_printf("Generating IR... ");
365 ret = filter_visitor_ir_generate(ctx);
367 fprintf(stderr, "Generate IR error\n");
368 ret = -LTTNG_ERR_FILTER_INVAL;
371 dbg_printf("done\n");
373 dbg_printf("Validating IR... ");
375 ret = filter_visitor_ir_check_binary_op_nesting(ctx);
377 ret = -LTTNG_ERR_FILTER_INVAL;
381 /* Normalize globbing patterns in the expression. */
382 ret = filter_visitor_ir_normalize_glob_patterns(ctx);
384 ret = -LTTNG_ERR_FILTER_INVAL;
388 /* Validate strings used as literals in the expression. */
389 ret = filter_visitor_ir_validate_string(ctx);
391 ret = -LTTNG_ERR_FILTER_INVAL;
395 /* Validate globbing patterns in the expression. */
396 ret = filter_visitor_ir_validate_globbing(ctx);
398 ret = -LTTNG_ERR_FILTER_INVAL;
402 dbg_printf("done\n");
404 dbg_printf("Generating bytecode... ");
406 ret = filter_visitor_bytecode_generate(ctx);
408 fprintf(stderr, "Generate bytecode error\n");
409 ret = -LTTNG_ERR_FILTER_INVAL;
412 dbg_printf("done\n");
413 dbg_printf("Size of bytecode generated: %u bytes.\n",
414 bytecode_get_len(&ctx->bytecode->b));
416 /* No need to keep the memory stream. */
417 if (fclose(fmem) != 0) {
418 fprintf(stderr, "fclose (%d) \n", errno);
419 ret = -LTTNG_ERR_FILTER_INVAL;
427 filter_parser_ctx_free(ctx);
429 if (fclose(fmem) != 0) {
430 fprintf(stderr, "fclose (%d) \n", errno);
440 %parse-param {struct filter_parser_ctx *parser_ctx}
441 %parse-param {yyscan_t scanner}
442 %lex-param {yyscan_t scanner}
443 %start translation_unit
444 %token CHARACTER_CONSTANT_START SQUOTE STRING_LITERAL_START DQUOTE
445 %token ESCSEQ CHAR_STRING_TOKEN
446 %token DECIMAL_CONSTANT OCTAL_CONSTANT HEXADECIMAL_CONSTANT FLOAT_CONSTANT
447 %token LSBRAC RSBRAC LPAREN RPAREN LBRAC RBRAC RARROW
448 %token STAR PLUS MINUS
449 %token MOD_OP DIV_OP RIGHT_OP LEFT_OP
450 %token EQ_OP NE_OP LE_OP GE_OP LT_OP GT_OP AND_OP OR_OP NOT_OP
451 %token ASSIGN COLON SEMICOLON DOTDOTDOT DOT EQUAL COMMA
452 %token XOR_BIN AND_BIN OR_BIN NOT_BIN
454 %token <gs> IDENTIFIER GLOBAL_IDENTIFIER
460 struct gc_string *gs;
461 struct filter_node *n;
464 %type <gs> s_char s_char_sequence c_char c_char_sequence
466 %type <n> primary_expression
467 %type <n> prefix_expression
468 %type <n> prefix_expression_rec
469 %type <n> postfix_expression
470 %type <n> unary_expression
471 %type <n> unary_operator
472 %type <n> multiplicative_expression
473 %type <n> additive_expression
474 %type <n> shift_expression
475 %type <n> relational_expression
476 %type <n> equality_expression
477 %type <n> and_expression
478 %type <n> exclusive_or_expression
479 %type <n> inclusive_or_expression
480 %type <n> logical_and_expression
481 %type <n> logical_or_expression
483 %type <n> identifiers
493 | c_char_sequence c_char
494 { $$ = gc_string_append(parser_ctx, $1, $2); }
502 parse_error(parser_ctx, "escape sequences not supported yet");
506 /* 1.6 String literals */
511 | s_char_sequence s_char
512 { $$ = gc_string_append(parser_ctx, $1, $2); }
520 parse_error(parser_ctx, "escape sequences not supported yet");
527 $$ = make_node(parser_ctx, NODE_EXPRESSION);
528 $$->u.expression.type = AST_EXP_CONSTANT;
529 if (sscanf(yylval.gs->s, "%" WIDTH_u64_SCANF_IS_A_BROKEN_API SCNu64,
530 &$$->u.expression.u.constant) != 1) {
531 parse_error(parser_ctx, "cannot scanf decimal constant");
536 $$ = make_node(parser_ctx, NODE_EXPRESSION);
537 $$->u.expression.type = AST_EXP_CONSTANT;
538 if (!strcmp(yylval.gs->s, "0")) {
539 $$->u.expression.u.constant = 0;
540 } else if (sscanf(yylval.gs->s, "0%" WIDTH_o64_SCANF_IS_A_BROKEN_API SCNo64,
541 &$$->u.expression.u.constant) != 1) {
542 parse_error(parser_ctx, "cannot scanf octal constant");
545 | HEXADECIMAL_CONSTANT
547 $$ = make_node(parser_ctx, NODE_EXPRESSION);
548 $$->u.expression.type = AST_EXP_CONSTANT;
549 if (sscanf(yylval.gs->s, "0x%" WIDTH_x64_SCANF_IS_A_BROKEN_API SCNx64,
550 &$$->u.expression.u.constant) != 1) {
551 parse_error(parser_ctx, "cannot scanf hexadecimal constant");
556 $$ = make_node(parser_ctx, NODE_EXPRESSION);
557 $$->u.expression.type = AST_EXP_FLOAT_CONSTANT;
558 if (sscanf(yylval.gs->s, "%" WIDTH_lg_SCANF_IS_A_BROKEN_API "lg",
559 &$$->u.expression.u.float_constant) != 1) {
560 parse_error(parser_ctx, "cannot scanf float constant");
563 | STRING_LITERAL_START DQUOTE
565 $$ = make_node(parser_ctx, NODE_EXPRESSION);
566 $$->u.expression.type = AST_EXP_STRING;
567 $$->u.expression.u.string = "";
569 | STRING_LITERAL_START s_char_sequence DQUOTE
571 $$ = make_node(parser_ctx, NODE_EXPRESSION);
572 $$->u.expression.type = AST_EXP_STRING;
573 $$->u.expression.u.string = $2->s;
575 | CHARACTER_CONSTANT_START c_char_sequence SQUOTE
577 $$ = make_node(parser_ctx, NODE_EXPRESSION);
578 $$->u.expression.type = AST_EXP_STRING;
579 $$->u.expression.u.string = $2->s;
581 | LPAREN expression RPAREN
583 $$ = make_node(parser_ctx, NODE_EXPRESSION);
584 $$->u.expression.type = AST_EXP_NESTED;
585 $$->u.expression.u.child = $2;
592 $$ = make_node(parser_ctx, NODE_EXPRESSION);
593 $$->u.expression.type = AST_EXP_IDENTIFIER;
594 $$->u.expression.u.identifier = yylval.gs->s;
598 $$ = make_node(parser_ctx, NODE_EXPRESSION);
599 $$->u.expression.type = AST_EXP_GLOBAL_IDENTIFIER;
600 $$->u.expression.u.identifier = yylval.gs->s;
604 prefix_expression_rec
605 : LSBRAC unary_expression RSBRAC
609 | LSBRAC unary_expression RSBRAC prefix_expression_rec
612 $$->u.expression.pre_op = AST_LINK_BRACKET;
613 $$->u.expression.prev = $4;
622 | identifiers prefix_expression_rec
625 $$->u.expression.pre_op = AST_LINK_BRACKET;
626 $$->u.expression.next_bracket = $2;
635 | postfix_expression DOT prefix_expression
638 $$->u.expression.post_op = AST_LINK_DOT;
639 $$->u.expression.prev = $1;
641 | postfix_expression RARROW prefix_expression
644 $$->u.expression.post_op = AST_LINK_RARROW;
645 $$->u.expression.prev = $1;
654 | unary_operator unary_expression
657 $$->u.unary_op.child = $2;
664 $$ = make_node(parser_ctx, NODE_UNARY_OP);
665 $$->u.unary_op.type = AST_UNARY_PLUS;
669 $$ = make_node(parser_ctx, NODE_UNARY_OP);
670 $$->u.unary_op.type = AST_UNARY_MINUS;
674 $$ = make_node(parser_ctx, NODE_UNARY_OP);
675 $$->u.unary_op.type = AST_UNARY_NOT;
679 $$ = make_node(parser_ctx, NODE_UNARY_OP);
680 $$->u.unary_op.type = AST_UNARY_BIT_NOT;
684 multiplicative_expression
687 | multiplicative_expression STAR unary_expression
689 $$ = make_op_node(parser_ctx, AST_OP_MUL, $1, $3);
691 | multiplicative_expression DIV_OP unary_expression
693 $$ = make_op_node(parser_ctx, AST_OP_DIV, $1, $3);
695 | multiplicative_expression MOD_OP unary_expression
697 $$ = make_op_node(parser_ctx, AST_OP_MOD, $1, $3);
702 : multiplicative_expression
704 | additive_expression PLUS multiplicative_expression
706 $$ = make_op_node(parser_ctx, AST_OP_PLUS, $1, $3);
708 | additive_expression MINUS multiplicative_expression
710 $$ = make_op_node(parser_ctx, AST_OP_MINUS, $1, $3);
715 : additive_expression
717 | shift_expression LEFT_OP additive_expression
719 $$ = make_op_node(parser_ctx, AST_OP_BIT_LSHIFT, $1, $3);
721 | shift_expression RIGHT_OP additive_expression
723 $$ = make_op_node(parser_ctx, AST_OP_BIT_RSHIFT, $1, $3);
730 | and_expression AND_BIN shift_expression
732 $$ = make_op_node(parser_ctx, AST_OP_BIT_AND, $1, $3);
736 exclusive_or_expression
739 | exclusive_or_expression XOR_BIN and_expression
741 $$ = make_op_node(parser_ctx, AST_OP_BIT_XOR, $1, $3);
745 inclusive_or_expression
746 : exclusive_or_expression
748 | inclusive_or_expression OR_BIN exclusive_or_expression
750 $$ = make_op_node(parser_ctx, AST_OP_BIT_OR, $1, $3);
754 relational_expression
755 : inclusive_or_expression
757 | relational_expression LT_OP inclusive_or_expression
759 $$ = make_op_node(parser_ctx, AST_OP_LT, $1, $3);
761 | relational_expression GT_OP inclusive_or_expression
763 $$ = make_op_node(parser_ctx, AST_OP_GT, $1, $3);
765 | relational_expression LE_OP inclusive_or_expression
767 $$ = make_op_node(parser_ctx, AST_OP_LE, $1, $3);
769 | relational_expression GE_OP inclusive_or_expression
771 $$ = make_op_node(parser_ctx, AST_OP_GE, $1, $3);
776 : relational_expression
778 | equality_expression EQ_OP relational_expression
780 $$ = make_op_node(parser_ctx, AST_OP_EQ, $1, $3);
782 | equality_expression NE_OP relational_expression
784 $$ = make_op_node(parser_ctx, AST_OP_NE, $1, $3);
788 logical_and_expression
789 : equality_expression
791 | logical_and_expression AND_OP equality_expression
793 $$ = make_op_node(parser_ctx, AST_OP_AND, $1, $3);
797 logical_or_expression
798 : logical_and_expression
800 | logical_or_expression OR_OP logical_and_expression
802 $$ = make_op_node(parser_ctx, AST_OP_OR, $1, $3);
807 : logical_or_expression
814 parser_ctx->ast->root.u.root.child = $1;