Support escape characters in metadata strings
authorEtienne Bergeron <etienneb@google.com>
Thu, 4 Apr 2013 13:44:24 +0000 (09:44 -0400)
committerMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Thu, 4 Apr 2013 13:44:24 +0000 (09:44 -0400)
Signed-off-by: Etienne Bergeron <etienneb@google.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
formats/ctf/metadata/ctf-lexer.l
formats/ctf/metadata/ctf-parser.y
tests/ctf-traces/fail/fail2/dummystream [new file with mode: 0644]
tests/ctf-traces/fail/fail2/metadata [new file with mode: 0644]
tests/ctf-traces/succeed/succeed3/dummystream [new file with mode: 0644]
tests/ctf-traces/succeed/succeed3/metadata [new file with mode: 0644]

index c79b6c83b5491cc0df9ecd023cd824c715c7c10e..a282bf7a473d6a8bd04ab3feba5906e801a7a213 100644 (file)
@@ -38,6 +38,9 @@ static void yyunput (int c, register char * yy_bp , yyscan_t yyscanner)
        __attribute__((unused));
 static int input (yyscan_t yyscanner) __attribute__((unused));
 
+BT_HIDDEN
+int import_string(struct ctf_scanner *scanner, YYSTYPE *lvalp, const char *src, char delim);
+
 %}
 
 %x comment_ml comment_sl string_lit char_const
@@ -53,7 +56,6 @@ UCHARLOWERCASE                        \\u{HEXDIGIT}{4}
 UCHARUPPERCASE                 \\U{HEXDIGIT}{8}
 ID_NONDIGIT                    {NONDIGIT}|{UCHARLOWERCASE}|{UCHARUPPERCASE}
 IDENTIFIER                     {ID_NONDIGIT}({ID_NONDIGIT}|{DIGIT})*
-ESCSEQ                         \\(\'|\"|\?|\\|a|b|f|n|r|t|v|{OCTALDIGIT}{1,3}|u{HEXDIGIT}{4}|U{HEXDIGIT}{8}|x{HEXDIGIT}+)
 %%
 
                                /*
@@ -70,17 +72,8 @@ ESCSEQ                               \\(\'|\"|\?|\\|a|b|f|n|r|t|v|{OCTALDIGIT}{1,3}|u{HEXDIGIT}{4}|U{HEXDIG
 "//"                           BEGIN(comment_sl);
 <comment_sl>[^\n]*\n           BEGIN(INITIAL);
 
-L\'                            BEGIN(char_const); return CHARACTER_CONSTANT_START;
-\'                             BEGIN(char_const); return CHARACTER_CONSTANT_START;
-<char_const>\'                 BEGIN(INITIAL); return SQUOTE;
-
-L\"                            BEGIN(string_lit); return STRING_LITERAL_START;
-\"                             BEGIN(string_lit); return STRING_LITERAL_START;
-<string_lit>\"                 BEGIN(INITIAL); return DQUOTE;
-
-<char_const,string_lit>ESCSEQ  return ESCSEQ;
-<char_const,string_lit>\n      ; /* ignore */
-<char_const,string_lit>.       setstring(yyextra, yylval, yytext); return CHAR_STRING_TOKEN;
+L?\"(\\.|[^\\"])*\"            { if (import_string(yyextra, yylval, yytext, '\"') < 0) return ERROR; else return STRING_LITERAL; }
+L?\'(\\.|[^\\'])*\'            { if (import_string(yyextra, yylval, yytext, '\'') < 0) return ERROR; else return CHARACTER_LITERAL; }
 
 "["                            return LSBRAC;
 "]"                            return RSBRAC;
index 7e3fc40bc9e68f24e311ec238178d620737138ef..ce514c682c64d68c7205d315583f5871ba424084 100644 (file)
@@ -26,6 +26,7 @@
  */
 
 #include <stdio.h>
+#include <ctype.h>
 #include <unistd.h>
 #include <string.h>
 #include <stdlib.h>
@@ -149,6 +150,144 @@ void setstring(struct ctf_scanner *scanner, YYSTYPE *lvalp, const char *src)
        strcpy(lvalp->gs->s, src);
 }
 
+static
+int str_check(size_t str_len, size_t offset, size_t len)
+{
+       /* check overflow */
+       if (offset + len < offset)
+               return -1;
+       if (offset + len > str_len)
+               return -1;
+       return 0;
+}
+
+static
+int import_basic_string(struct ctf_scanner *scanner, YYSTYPE *lvalp,
+               size_t len, const char *src, char delim)
+{
+       size_t pos = 0, dpos = 0;
+
+       if (str_check(len, pos, 1))
+               return -1;
+       if (src[pos++] != delim)
+               return -1;
+
+       while (src[pos] != delim) {
+               char c;
+
+               if (str_check(len, pos, 1))
+                       return -1;
+               c = src[pos++];
+               if (c == '\\') {
+                       if (str_check(len, pos, 1))
+                               return -1;
+                       c = src[pos++];
+
+                       switch (c) {
+                       case '0':
+                               c = '\0';
+                               break;
+                       case 'a':
+                               c = '\a';
+                               break;
+                       case 'b':
+                               c = '\b';
+                               break;
+                       case 'f':
+                               c = '\f';
+                               break;
+                       case 'n':
+                               c = '\n';
+                               break;
+                       case 'r':
+                               c = '\r';
+                               break;
+                       case 't':
+                               c = '\t';
+                               break;
+                       case 'v':
+                               c = '\v';
+                               break;
+                       case '\\':
+                               c = '\\';
+                               break;
+                       case '\'':
+                               c = '\'';
+                               break;
+                       case '\"':
+                               c = '\"';
+                               break;
+                       case '?':
+                               c = '?';
+                               break;
+                       case 'o':
+                       {
+                               size_t oct_len = 3;
+
+                               if (str_check(len, pos, oct_len))
+                                       return -1;
+                               if (!isdigit((int) src[pos]) || !isdigit((int) src[pos+1]) || !isdigit((int) src[pos+2]))
+                                       return -1;
+                               char oct_buffer[4] = { src[pos], src[pos+1], src[pos+2], '\0' };
+                               c = strtoul(&oct_buffer[0], NULL, 8);
+                               pos += oct_len;
+                               break;
+                       }
+                       case 'x':
+                       {
+                               size_t hex_len = 2;
+
+                               if (str_check(len, pos, hex_len))
+                                       return -1;
+                               if (!isxdigit((int) src[pos]) || !isxdigit((int) src[pos+1]))
+                                       return -1;
+                               char hex_buffer[3] = { src[pos], src[pos+1], '\0' };
+                               c = strtoul(&hex_buffer[0], NULL, 16);
+                               pos += hex_len;
+                               break;
+                       }
+                       default:
+                               return -1;
+                       }
+               }
+               if (str_check(len, dpos, 1))
+                       return -1;
+               lvalp->gs->s[dpos++] = c;
+       }
+
+       if (str_check(len, dpos, 1))
+               return -1;
+       lvalp->gs->s[dpos++] = '\0';
+
+       if (str_check(len, pos, 1))
+               return -1;
+       if (src[pos++] != delim)
+               return -1;
+
+       if (str_check(len, pos, 1))
+               return -1;
+       if (src[pos] != '\0')
+               return -1;
+       return 0;
+}
+
+int import_string(struct ctf_scanner *scanner, YYSTYPE *lvalp,
+               const char *src, char delim)
+{
+       size_t len;
+
+       len = strlen(src) + 1;
+       lvalp->gs = gc_string_alloc(scanner, len);
+       if (src[0] == 'L') {
+               // TODO: import wide string
+               printfl_error(yyget_lineno(scanner),
+                       "Wide string not supported yet.");
+               return -1;
+       } else {
+               return import_basic_string(scanner, lvalp, len, src, delim);
+       }
+}
+
 static void init_scope(struct ctf_scanner_scope *scope,
                       struct ctf_scanner_scope *parent)
 {
@@ -925,7 +1064,7 @@ void ctf_scanner_free(struct ctf_scanner *scanner)
  */
 %expect 2
 %start file
-%token CHARACTER_CONSTANT_START SQUOTE STRING_LITERAL_START DQUOTE ESCSEQ CHAR_STRING_TOKEN LSBRAC RSBRAC LPAREN RPAREN LBRAC RBRAC RARROW STAR PLUS MINUS LT GT TYPEASSIGN COLON SEMICOLON DOTDOTDOT DOT EQUAL COMMA CONST CHAR DOUBLE ENUM ENV EVENT FLOATING_POINT FLOAT INTEGER INT LONG SHORT SIGNED STREAM STRING STRUCT TRACE CALLSITE CLOCK TYPEALIAS TYPEDEF UNSIGNED VARIANT VOID _BOOL _COMPLEX _IMAGINARY DECIMAL_CONSTANT OCTAL_CONSTANT HEXADECIMAL_CONSTANT TOK_ALIGN
+%token STRING_LITERAL CHARACTER_LITERAL LSBRAC RSBRAC LPAREN RPAREN LBRAC RBRAC RARROW STAR PLUS MINUS LT GT TYPEASSIGN COLON SEMICOLON DOTDOTDOT DOT EQUAL COMMA CONST CHAR DOUBLE ENUM ENV EVENT FLOATING_POINT FLOAT INTEGER INT LONG SHORT SIGNED STREAM STRING STRUCT TRACE CALLSITE CLOCK TYPEALIAS TYPEDEF UNSIGNED VARIANT VOID _BOOL _COMPLEX _IMAGINARY DECIMAL_CONSTANT OCTAL_CONSTANT HEXADECIMAL_CONSTANT TOK_ALIGN
 %token <gs> IDENTIFIER ID_TYPE
 %token ERROR
 %union
@@ -936,8 +1075,9 @@ void ctf_scanner_free(struct ctf_scanner *scanner)
        struct ctf_node *n;
 }
 
+%type <gs> STRING_LITERAL CHARACTER_LITERAL
+
 %type <gs> keywords
-%type <gs> s_char s_char_sequence c_char c_char_sequence
 
 %type <n> postfix_expression unary_expression unary_expression_or_range
 
@@ -1050,41 +1190,6 @@ keywords:
                {       $$ = yylval.gs;         }
        ;
 
-/* 1.5 Constants */
-
-c_char_sequence:
-               c_char
-               {       $$ = $1;                                        }
-       |       c_char_sequence c_char
-               {       $$ = gc_string_append(scanner, $1, $2);         }
-       ;
-
-c_char:
-               CHAR_STRING_TOKEN
-               {       $$ = yylval.gs;                                 }
-       |       ESCSEQ
-               {
-                       reparent_error(scanner, "escape sequences not supported yet");
-               }
-       ;
-
-/* 1.6 String literals */
-
-s_char_sequence:
-               s_char
-               {       $$ = $1;                                        }
-       |       s_char_sequence s_char
-               {       $$ = gc_string_append(scanner, $1, $2);         }
-       ;
-
-s_char:
-               CHAR_STRING_TOKEN
-               {       $$ = yylval.gs;                                 }
-       |       ESCSEQ
-               {
-                       reparent_error(scanner, "escape sequences not supported yet");
-               }
-       ;
 
 /* 2: Phrase structure grammar */
 
@@ -1128,23 +1233,17 @@ postfix_expression:
                        sscanf(yylval.gs->s, "0x%" PRIx64,
                               &$$->u.unary_expression.u.unsigned_constant);
                }
-       |       STRING_LITERAL_START DQUOTE
-               {
-                       $$ = make_node(scanner, NODE_UNARY_EXPRESSION);
-                       $$->u.unary_expression.type = UNARY_STRING;
-                       $$->u.unary_expression.u.string = "";
-               }
-       |       STRING_LITERAL_START s_char_sequence DQUOTE
+       |       STRING_LITERAL
                {
                        $$ = make_node(scanner, NODE_UNARY_EXPRESSION);
                        $$->u.unary_expression.type = UNARY_STRING;
-                       $$->u.unary_expression.u.string = $2->s;
+                       $$->u.unary_expression.u.string = $1->s;
                }
-       |       CHARACTER_CONSTANT_START c_char_sequence SQUOTE
+       |       CHARACTER_LITERAL
                {
                        $$ = make_node(scanner, NODE_UNARY_EXPRESSION);
                        $$->u.unary_expression.type = UNARY_STRING;
-                       $$->u.unary_expression.u.string = $2->s;
+                       $$->u.unary_expression.u.string = $1->s;
                }
        |       LPAREN unary_expression RPAREN
                {
@@ -2187,15 +2286,10 @@ enumerator:
                        $$ = make_node(scanner, NODE_ENUMERATOR);
                        $$->u.enumerator.id = $1->s;
                }
-       |       STRING_LITERAL_START DQUOTE
-               {
-                       $$ = make_node(scanner, NODE_ENUMERATOR);
-                       $$->u.enumerator.id = "";
-               }
-       |       STRING_LITERAL_START s_char_sequence DQUOTE
+       |       STRING_LITERAL
                {
                        $$ = make_node(scanner, NODE_ENUMERATOR);
-                       $$->u.enumerator.id = $2->s;
+                       $$->u.enumerator.id = $1->s;
                }
        |       IDENTIFIER EQUAL unary_expression_or_range
                {
@@ -2215,17 +2309,11 @@ enumerator:
                        $$->u.enumerator.id = $1->s;
                        bt_list_splice(&($3)->tmp_head, &($$)->u.enumerator.values);
                }
-       |       STRING_LITERAL_START DQUOTE EQUAL unary_expression_or_range
-               {
-                       $$ = make_node(scanner, NODE_ENUMERATOR);
-                       $$->u.enumerator.id = "";
-                       bt_list_splice(&($4)->tmp_head, &($$)->u.enumerator.values);
-               }
-       |       STRING_LITERAL_START s_char_sequence DQUOTE EQUAL unary_expression_or_range
+       |       STRING_LITERAL EQUAL unary_expression_or_range
                {
                        $$ = make_node(scanner, NODE_ENUMERATOR);
-                       $$->u.enumerator.id = $2->s;
-                       bt_list_splice(&($5)->tmp_head, &($$)->u.enumerator.values);
+                       $$->u.enumerator.id = $1->s;
+                       bt_list_splice(&($3)->tmp_head, &($$)->u.enumerator.values);
                }
        ;
 
diff --git a/tests/ctf-traces/fail/fail2/dummystream b/tests/ctf-traces/fail/fail2/dummystream
new file mode 100644 (file)
index 0000000..53d735e
Binary files /dev/null and b/tests/ctf-traces/fail/fail2/dummystream differ
diff --git a/tests/ctf-traces/fail/fail2/metadata b/tests/ctf-traces/fail/fail2/metadata
new file mode 100644 (file)
index 0000000..456661e
Binary files /dev/null and b/tests/ctf-traces/fail/fail2/metadata differ
diff --git a/tests/ctf-traces/succeed/succeed3/dummystream b/tests/ctf-traces/succeed/succeed3/dummystream
new file mode 100644 (file)
index 0000000..53d735e
Binary files /dev/null and b/tests/ctf-traces/succeed/succeed3/dummystream differ
diff --git a/tests/ctf-traces/succeed/succeed3/metadata b/tests/ctf-traces/succeed/succeed3/metadata
new file mode 100644 (file)
index 0000000..c180766
--- /dev/null
@@ -0,0 +1,29 @@
+/* CTF 1.8 */
+typealias integer { size = 8; align = 8; signed = false; base = 10; } := uint8_t;
+typealias integer { size = 32; align = 32; signed = false; base = hex; } := uint32_t;
+
+trace {
+       major = 0;
+       minor = 1;
+       test = "\"";
+       test2 = "?\x20\o040?";
+       test3 = '\n';
+       uuid = "2a6422d0-6cee-11e0-8c08-cb07d7b3a564";
+       byte_order = le;
+       packet.header := struct {
+               uint32_t magic;
+               uint8_t  uuid[16];
+       };
+};
+
+stream {
+       packet.context := struct {
+               uint32_t content_size;
+               uint32_t packet_size;
+       };
+};
+
+event {
+       name = string;
+       fields := struct { string str; };
+};
This page took 0.030576 seconds and 4 git commands to generate.