X-Git-Url: http://git.efficios.com/?a=blobdiff_plain;f=formats%2Fctf%2Fmetadata%2Fctf-parser.y;h=61ec47dd28fe80fcfc0674674233a819a2d272d3;hb=697b10505ff455ec93d57868d1b15bcda4d9dd37;hp=58ea245b8d30f2b43b17cb26b7a8a72594bd0dc2;hpb=f9c67088c31c4e211e4ea9655c231d0b908a6290;p=babeltrace.git diff --git a/formats/ctf/metadata/ctf-parser.y b/formats/ctf/metadata/ctf-parser.y index 58ea245b..61ec47dd 100644 --- a/formats/ctf/metadata/ctf-parser.y +++ b/formats/ctf/metadata/ctf-parser.y @@ -26,6 +26,7 @@ */ #include +#include #include #include #include @@ -77,32 +78,18 @@ struct gc_string { }; static const char *node_type_to_str[] = { - [ NODE_UNKNOWN ] = "NODE_UNKNOWN", - [ NODE_ROOT ] = "NODE_ROOT", - [ NODE_EVENT ] = "NODE_EVENT", - [ NODE_ENV ] = "NODE_ENV", - [ NODE_STREAM ] = "NODE_STREAM", - [ NODE_TRACE ] = "NODE_TRACE", - [ NODE_CLOCK ] = "NODE_CLOCK", - [ NODE_CALLSITE ] = "NODE_CALLSITE", - [ NODE_CTF_EXPRESSION ] = "NODE_CTF_EXPRESSION", - [ NODE_UNARY_EXPRESSION ] = "NODE_UNARY_EXPRESSION", - [ NODE_TYPEDEF ] = "NODE_TYPEDEF", - [ NODE_TYPEALIAS_TARGET ] = "NODE_TYPEALIAS_TARGET", - [ NODE_TYPEALIAS_ALIAS ] = "NODE_TYPEALIAS_ALIAS", - [ NODE_TYPEALIAS ] = "NODE_TYPEALIAS", - [ NODE_TYPE_SPECIFIER ] = "NODE_TYPE_SPECIFIER", - [ NODE_TYPE_SPECIFIER_LIST ] = "NODE_TYPE_SPECIFIER_LIST", - [ NODE_POINTER ] = "NODE_POINTER", - [ NODE_TYPE_DECLARATOR ] = "NODE_TYPE_DECLARATOR", - [ NODE_FLOATING_POINT ] = "NODE_FLOATING_POINT", - [ NODE_INTEGER ] = "NODE_INTEGER", - [ NODE_STRING ] = "NODE_STRING", - [ NODE_ENUMERATOR ] = "NODE_ENUMERATOR", - [ NODE_ENUM ] = "NODE_ENUM", - [ NODE_STRUCT_OR_VARIANT_DECLARATION ] = "NODE_STRUCT_OR_VARIANT_DECLARATION", - [ NODE_VARIANT ] = "NODE_VARIANT", - [ NODE_STRUCT ] = "NODE_STRUCT", +#define ENTRY(S) [S] = #S, + FOREACH_CTF_NODES(ENTRY) +#undef ENTRY +}; + +/* + * Static node for out of memory errors. Only "type" is used. lineno is + * always left at 0. The rest of the node content can be overwritten, + * but is never used. + */ +static struct ctf_node error_node = { + .type = NODE_ERROR, }; BT_HIDDEN @@ -131,45 +118,200 @@ static struct gc_string *gc_string_alloc(struct ctf_scanner *scanner, return gstr; } -/* - * note: never use gc_string_append on a string that has external references. - * gsrc will be garbage collected immediately, and gstr might be. - * Should only be used to append characters to a string literal or constant. - */ -BT_HIDDEN -struct gc_string *gc_string_append(struct ctf_scanner *scanner, - struct gc_string *gstr, - struct gc_string *gsrc) +void setstring(struct ctf_scanner *scanner, YYSTYPE *lvalp, const char *src) { - size_t newlen = strlen(gsrc->s) + strlen(gstr->s) + 1; - size_t alloclen; + lvalp->gs = gc_string_alloc(scanner, strlen(src) + 1); + strcpy(lvalp->gs->s, src); +} - /* TODO: could be faster with find first bit or glib Gstring */ - /* sizeof long to account for malloc header (int or long ?) */ - for (alloclen = 8; alloclen < sizeof(long) + sizeof(*gstr) + newlen; - alloclen *= 2); +static +int str_check(size_t str_len, size_t offset, size_t len) +{ + /* check overflow */ + if (offset + len < offset) + return -1; + if (offset + len > str_len) + return -1; + return 0; +} - if (alloclen > gstr->alloclen) { - struct gc_string *newgstr; +static +int bt_isodigit(int c) +{ + switch (c) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + return 1; + default: + return 0; + } +} - newgstr = gc_string_alloc(scanner, newlen); - strcpy(newgstr->s, gstr->s); - strcat(newgstr->s, gsrc->s); - bt_list_del(&gstr->gc); - free(gstr); - gstr = newgstr; - } else { - strcat(gstr->s, gsrc->s); +static +int parse_base_sequence(const char *src, size_t len, size_t pos, + char *buffer, size_t *buf_len, int base) +{ + const size_t max_char = 3; + int nr_char = 0; + + while (!str_check(len, pos, 1) && nr_char < max_char) { + char c = src[pos++]; + + if (base == 8) { + if (bt_isodigit(c)) + buffer[nr_char++] = c; + else + break; + } else if (base == 16) { + if (isxdigit(c)) + buffer[nr_char++] = c; + else + break; + + } else { + /* Unsupported base */ + return -1; + } } - bt_list_del(&gsrc->gc); - free(gsrc); - return gstr; + assert(nr_char > 0); + buffer[nr_char] = '\0'; + *buf_len = nr_char; + return 0; } -void setstring(struct ctf_scanner *scanner, YYSTYPE *lvalp, const char *src) +static +int import_basic_string(struct ctf_scanner *scanner, YYSTYPE *lvalp, + size_t len, const char *src, char delim) { - lvalp->gs = gc_string_alloc(scanner, strlen(src) + 1); - strcpy(lvalp->gs->s, src); + size_t pos = 0, dpos = 0; + + if (str_check(len, pos, 1)) + return -1; + if (src[pos++] != delim) + return -1; + + while (src[pos] != delim) { + char c; + + if (str_check(len, pos, 1)) + return -1; + c = src[pos++]; + if (c == '\\') { + if (str_check(len, pos, 1)) + return -1; + c = src[pos++]; + + switch (c) { + case 'a': + c = '\a'; + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + case '\\': + c = '\\'; + break; + case '\'': + c = '\''; + break; + case '\"': + c = '\"'; + break; + case '?': + c = '?'; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + { + char oct_buffer[4]; + size_t oct_len; + + if (parse_base_sequence(src, len, pos - 1, + oct_buffer, &oct_len, 8)) + return -1; + c = strtoul(&oct_buffer[0], NULL, 8); + pos += oct_len - 1; + break; + } + case 'x': + { + char hex_buffer[4]; + size_t hex_len; + + if (parse_base_sequence(src, len, pos, + hex_buffer, &hex_len, 16)) + return -1; + c = strtoul(&hex_buffer[0], NULL, 16); + pos += hex_len; + break; + } + default: + return -1; + } + } + if (str_check(len, dpos, 1)) + return -1; + lvalp->gs->s[dpos++] = c; + } + + if (str_check(len, dpos, 1)) + return -1; + lvalp->gs->s[dpos++] = '\0'; + + if (str_check(len, pos, 1)) + return -1; + if (src[pos++] != delim) + return -1; + + if (str_check(len, pos, 1)) + return -1; + if (src[pos] != '\0') + return -1; + return 0; +} + +int import_string(struct ctf_scanner *scanner, YYSTYPE *lvalp, + const char *src, char delim) +{ + size_t len; + + len = strlen(src) + 1; + lvalp->gs = gc_string_alloc(scanner, len); + if (src[0] == 'L') { + // TODO: import wide string + printfl_error(yyget_lineno(scanner), + "Wide string not supported yet."); + return -1; + } else { + return import_basic_string(scanner, lvalp, len, src, delim); + } } static void init_scope(struct ctf_scanner_scope *scope, @@ -246,8 +388,10 @@ static struct ctf_node *make_node(struct ctf_scanner *scanner, struct ctf_node *node; node = malloc(sizeof(*node)); - if (!node) - return NULL; + if (!node) { + printfl_fatal(yyget_lineno(scanner->scanner), "out of memory"); + return &error_node; + } memset(node, 0, sizeof(*node)); node->type = type; node->lineno = yyget_lineno(scanner->scanner); @@ -257,6 +401,7 @@ static struct ctf_node *make_node(struct ctf_scanner *scanner, switch (type) { case NODE_ROOT: + node->type = NODE_ERROR; printfn_fatal(node, "trying to create root node"); break; @@ -337,6 +482,7 @@ static struct ctf_node *make_node(struct ctf_scanner *scanner, case NODE_UNKNOWN: default: + node->type = NODE_ERROR; printfn_fatal(node, "unknown node type '%d'", (int) type); break; } @@ -948,20 +1094,23 @@ void ctf_scanner_free(struct ctf_scanner *scanner) */ %expect 2 %start file -%token CHARACTER_CONSTANT_START SQUOTE STRING_LITERAL_START DQUOTE ESCSEQ CHAR_STRING_TOKEN LSBRAC RSBRAC LPAREN RPAREN LBRAC RBRAC RARROW STAR PLUS MINUS LT GT TYPEASSIGN COLON SEMICOLON DOTDOTDOT DOT EQUAL COMMA CONST CHAR DOUBLE ENUM ENV EVENT FLOATING_POINT FLOAT INTEGER INT LONG SHORT SIGNED STREAM STRING STRUCT TRACE CALLSITE CLOCK TYPEALIAS TYPEDEF UNSIGNED VARIANT VOID _BOOL _COMPLEX _IMAGINARY DECIMAL_CONSTANT OCTAL_CONSTANT HEXADECIMAL_CONSTANT TOK_ALIGN +%token INTEGER_LITERAL STRING_LITERAL CHARACTER_LITERAL LSBRAC RSBRAC LPAREN RPAREN LBRAC RBRAC RARROW STAR PLUS MINUS LT GT TYPEASSIGN COLON SEMICOLON DOTDOTDOT DOT EQUAL COMMA CONST CHAR DOUBLE ENUM ENV EVENT FLOATING_POINT FLOAT INTEGER INT LONG SHORT SIGNED STREAM STRING STRUCT TRACE CALLSITE CLOCK TYPEALIAS TYPEDEF UNSIGNED VARIANT VOID _BOOL _COMPLEX _IMAGINARY TOK_ALIGN %token IDENTIFIER ID_TYPE %token ERROR %union { long long ll; + unsigned long long ull; char c; struct gc_string *gs; struct ctf_node *n; } +%type STRING_LITERAL CHARACTER_LITERAL + %type keywords -%type s_char s_char_sequence c_char c_char_sequence +%type INTEGER_LITERAL %type postfix_expression unary_expression unary_expression_or_range %type declaration @@ -1073,41 +1222,6 @@ keywords: { $$ = yylval.gs; } ; -/* 1.5 Constants */ - -c_char_sequence: - c_char - { $$ = $1; } - | c_char_sequence c_char - { $$ = gc_string_append(scanner, $1, $2); } - ; - -c_char: - CHAR_STRING_TOKEN - { $$ = yylval.gs; } - | ESCSEQ - { - reparent_error(scanner, "escape sequences not supported yet"); - } - ; - -/* 1.6 String literals */ - -s_char_sequence: - s_char - { $$ = $1; } - | s_char_sequence s_char - { $$ = gc_string_append(scanner, $1, $2); } - ; - -s_char: - CHAR_STRING_TOKEN - { $$ = yylval.gs; } - | ESCSEQ - { - reparent_error(scanner, "escape sequences not supported yet"); - } - ; /* 2: Phrase structure grammar */ @@ -1130,50 +1244,27 @@ postfix_expression: $$->u.unary_expression.type = UNARY_STRING; $$->u.unary_expression.u.string = yylval.gs->s; } - | DECIMAL_CONSTANT + | INTEGER_LITERAL { $$ = make_node(scanner, NODE_UNARY_EXPRESSION); $$->u.unary_expression.type = UNARY_UNSIGNED_CONSTANT; - sscanf(yylval.gs->s, "%" PRIu64, - &$$->u.unary_expression.u.unsigned_constant); + $$->u.unary_expression.u.unsigned_constant = $1; } - | OCTAL_CONSTANT - { - $$ = make_node(scanner, NODE_UNARY_EXPRESSION); - $$->u.unary_expression.type = UNARY_UNSIGNED_CONSTANT; - sscanf(yylval.gs->s, "0%" PRIo64, - &$$->u.unary_expression.u.unsigned_constant); - } - | HEXADECIMAL_CONSTANT - { - $$ = make_node(scanner, NODE_UNARY_EXPRESSION); - $$->u.unary_expression.type = UNARY_UNSIGNED_CONSTANT; - sscanf(yylval.gs->s, "0x%" PRIx64, - &$$->u.unary_expression.u.unsigned_constant); - } - | STRING_LITERAL_START DQUOTE + | STRING_LITERAL { $$ = make_node(scanner, NODE_UNARY_EXPRESSION); $$->u.unary_expression.type = UNARY_STRING; - $$->u.unary_expression.u.string = ""; + $$->u.unary_expression.u.string = $1->s; } - | STRING_LITERAL_START s_char_sequence DQUOTE + | CHARACTER_LITERAL { $$ = make_node(scanner, NODE_UNARY_EXPRESSION); $$->u.unary_expression.type = UNARY_STRING; - $$->u.unary_expression.u.string = $2->s; - } - | CHARACTER_CONSTANT_START c_char_sequence SQUOTE - { - $$ = make_node(scanner, NODE_UNARY_EXPRESSION); - $$->u.unary_expression.type = UNARY_STRING; - $$->u.unary_expression.u.string = $2->s; + $$->u.unary_expression.u.string = $1->s; } | LPAREN unary_expression RPAREN { - $$ = make_node(scanner, NODE_UNARY_EXPRESSION); - $$->u.unary_expression.type = UNARY_NESTED; - $$->u.unary_expression.u.nested_exp = $2; + $$ = $2; } | postfix_expression LSBRAC unary_expression RSBRAC { @@ -2210,15 +2301,10 @@ enumerator: $$ = make_node(scanner, NODE_ENUMERATOR); $$->u.enumerator.id = $1->s; } - | STRING_LITERAL_START DQUOTE + | STRING_LITERAL { $$ = make_node(scanner, NODE_ENUMERATOR); - $$->u.enumerator.id = ""; - } - | STRING_LITERAL_START s_char_sequence DQUOTE - { - $$ = make_node(scanner, NODE_ENUMERATOR); - $$->u.enumerator.id = $2->s; + $$->u.enumerator.id = $1->s; } | IDENTIFIER EQUAL unary_expression_or_range { @@ -2238,17 +2324,11 @@ enumerator: $$->u.enumerator.id = $1->s; bt_list_splice(&($3)->tmp_head, &($$)->u.enumerator.values); } - | STRING_LITERAL_START DQUOTE EQUAL unary_expression_or_range + | STRING_LITERAL EQUAL unary_expression_or_range { $$ = make_node(scanner, NODE_ENUMERATOR); - $$->u.enumerator.id = ""; - bt_list_splice(&($4)->tmp_head, &($$)->u.enumerator.values); - } - | STRING_LITERAL_START s_char_sequence DQUOTE EQUAL unary_expression_or_range - { - $$ = make_node(scanner, NODE_ENUMERATOR); - $$->u.enumerator.id = $2->s; - bt_list_splice(&($5)->tmp_head, &($$)->u.enumerator.values); + $$->u.enumerator.id = $1->s; + bt_list_splice(&($3)->tmp_head, &($$)->u.enumerator.values); } ;