Commit | Line | Data |
---|---|---|
866e5b51 FC |
1 | lexer grammar CTFLexer; |
2 | ||
3 | options { | |
4 | language = Java; | |
5 | } | |
6 | ||
7 | @lexer::header { | |
8 | package org.eclipse.linuxtools.ctf.parser; | |
9 | } | |
10 | ||
11 | /* | |
12 | * Lexer grammers | |
13 | */ | |
14 | ||
15 | /* | |
16 | * Keywords | |
17 | */ | |
18 | ALIGNTOK : 'align' ; | |
19 | CONSTTOK : 'const' ; | |
20 | CHARTOK : 'char' ; | |
21 | DOUBLETOK : 'double' ; | |
22 | ENUMTOK : 'enum' ; | |
23 | EVENTTOK : 'event' ; | |
24 | FLOATINGPOINTTOK : 'floating_point' ; | |
25 | FLOATTOK : 'float' ; | |
26 | INTEGERTOK : 'integer' ; | |
27 | INTTOK : 'int' ; | |
28 | LONGTOK : 'long' ; | |
29 | SHORTTOK : 'short' ; | |
30 | SIGNEDTOK : 'signed' ; | |
31 | STREAMTOK : 'stream' ; | |
32 | STRINGTOK : 'string' ; | |
33 | STRUCTTOK : 'struct' ; | |
34 | TRACETOK : 'trace' ; | |
35 | TYPEALIASTOK : 'typealias' ; | |
36 | TYPEDEFTOK : 'typedef' ; | |
37 | UNSIGNEDTOK : 'unsigned' ; | |
38 | VARIANTTOK : 'variant' ; | |
39 | VOIDTOK : 'void' ; | |
40 | BOOLTOK : '_Bool' ; | |
41 | COMPLEXTOK : '_Complex' ; | |
42 | IMAGINARYTOK : '_Imaginary' ; | |
43 | ENVTOK : 'env' ; | |
44 | CLOCKTOK : 'clock' ; | |
4c9d2941 MK |
45 | /* |
46 | * Callsite tokens (v1.9) | |
47 | */ | |
48 | CALLSITETOK : 'callsite' ; | |
49 | ||
866e5b51 FC |
50 | |
51 | /* | |
52 | * Spec still to come. | |
53 | */ | |
54 | NANNUMBERTOK : 'NaN' ; | |
55 | INFINITYTOK : '+inf' ; | |
56 | NINFINITYTOK : '-inf' ; | |
57 | ||
58 | /* | |
59 | * Symbols | |
60 | */ | |
61 | SEPARATOR : ',' ; | |
62 | COLON : ':' ; | |
63 | ELIPSES : '...' ; | |
64 | ASSIGNMENT : '=' ; | |
65 | TYPE_ASSIGNMENT : ':=' ; | |
66 | LT : '<' ; | |
67 | GT : '>' ; | |
68 | OPENBRAC : '[' ; | |
69 | CLOSEBRAC : ']' ; | |
70 | LPAREN : '(' ; | |
71 | RPAREN : ')' ; | |
72 | LCURL : '{' ; | |
73 | RCURL : '}' ; | |
74 | TERM : ';' ; | |
75 | POINTER : '*' ; | |
76 | SIGN : '+' | '-' ; | |
77 | ARROW : '->' ; | |
78 | DOT : '.' ; | |
79 | fragment BACKSLASH : '\\' ; | |
80 | ||
81 | /* | |
82 | * Boolean literals | |
83 | * - We better leave them as identifiers and numbers... | |
84 | */ | |
85 | /*TRUE : 'true' | 'TRUE' ; | |
86 | FALSE : 'false' | 'FALSE' ; | |
87 | ZERO : '0' ; | |
88 | ONE : '1' ;*/ | |
89 | ||
90 | ||
91 | /* | |
92 | * Integer literals | |
93 | */ | |
94 | OCTAL_LITERAL : '0' ('0'..'7')+ INTEGER_TYPES_SUFFIX? ; | |
95 | ||
96 | DECIMAL_LITERAL : DIGIT+ INTEGER_TYPES_SUFFIX? ; | |
97 | ||
98 | HEX_LITERAL : HEX_PREFIX HEX_DIGIT+ INTEGER_TYPES_SUFFIX? ; | |
99 | fragment HEX_DIGIT : DIGIT | ('a'..'f') | ('A'..'F') ; | |
100 | fragment HEX_PREFIX : '0' ('x' | 'X') ; | |
101 | ||
102 | /* Helpers for integer literals */ | |
103 | fragment DIGIT : '0'..'9' ; | |
104 | fragment NONZERO_DIGIT : '1'..'9' ; | |
105 | ||
106 | ||
107 | /** | |
108 | * Integer suffix for long, long long and unsigned. | |
109 | * | |
110 | * Matches all possible combination of L, LL and U. | |
111 | */ | |
112 | fragment INTEGER_TYPES_SUFFIX : | |
113 | ('l' ('l')? | 'L' ('L')?) // l, ll | |
114 | | ('u' | 'U') // u | |
115 | | ('u' | 'U') ('l' ('l')? | 'L' ('L')?) // ul, ull | |
116 | | ('l' ('l')? | 'L' ('L')?) ('u'| 'U') // lu, llu | |
117 | ; | |
118 | ||
119 | /** | |
120 | * Escape sequences | |
121 | */ | |
122 | fragment ESCAPE_SEQUENCE : | |
123 | BACKSLASH ('\'' | '"' | '?' | BACKSLASH | 'a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' ) | |
124 | | OCTAL_ESCAPE | |
125 | | UNICODE_ESCAPE | |
126 | | HEXADECIMAL_ESCAPE | |
127 | ; | |
128 | ||
129 | /** | |
130 | * Octal escape sequence | |
131 | */ | |
132 | fragment OCTAL_ESCAPE : | |
133 | BACKSLASH ('0'..'3') ('0'..'7') ('0'..'7') | |
134 | | BACKSLASH ('0'..'7') ('0'..'7') | |
135 | | BACKSLASH ('0'..'7') | |
136 | ; | |
137 | ||
138 | /** | |
139 | * Hexadecimal escape sequence | |
140 | */ | |
141 | fragment HEXADECIMAL_ESCAPE : BACKSLASH 'x' HEX_DIGIT+ ; | |
142 | ||
143 | /** | |
144 | * Unicode escape sequence | |
145 | */ | |
146 | fragment UNICODE_ESCAPE : | |
147 | BACKSLASH 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT | |
148 | | BACKSLASH 'U' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT | |
149 | ; | |
150 | ||
151 | ||
152 | /* Used in both character and string literal */ | |
153 | fragment STRINGPREFIX : 'L'; | |
154 | ||
155 | /* | |
156 | * Character literal | |
157 | */ | |
158 | CHARACTER_LITERAL : STRINGPREFIX? SINGLEQUOTE CHAR_CONTENT+ SINGLEQUOTE ; | |
159 | fragment CHAR_CONTENT : (ESCAPE_SEQUENCE | ~(BACKSLASH | SINGLEQUOTE)) ; | |
160 | fragment SINGLEQUOTE : '\''; | |
161 | ||
162 | /* | |
163 | * String literal | |
164 | */ | |
165 | STRING_LITERAL : STRINGPREFIX? DOUBLEQUOTE STRING_CONTENT* DOUBLEQUOTE ; | |
166 | fragment STRING_CONTENT : (ESCAPE_SEQUENCE | ~(BACKSLASH | DOUBLEQUOTE)) ; | |
167 | fragment DOUBLEQUOTE : '"' ; | |
168 | ||
169 | /** | |
170 | * Whitespaces | |
171 | */ | |
172 | WS : (' ' | '\r' | '\t' | '\u000C' | '\n') { $channel=HIDDEN; } ; | |
173 | ||
174 | /** | |
175 | * Multiline comment | |
176 | */ | |
177 | // About the greedy option: see page 100-101 of The Definitive ANTLR reference | |
178 | // COMMENT : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;} ; | |
179 | COMMENT : COMMENT_OPEN .* COMMENT_CLOSE { $channel = HIDDEN; } ; | |
180 | fragment COMMENT_OPEN : '/*'; | |
181 | fragment COMMENT_CLOSE : '*/'; | |
182 | ||
183 | /** | |
184 | * Single line comment | |
185 | */ | |
186 | LINE_COMMENT : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} ; | |
187 | ||
188 | /** | |
189 | * Identifiers | |
190 | */ | |
191 | IDENTIFIER : NONDIGIT (NONDIGIT | DIGIT)* ; | |
192 | fragment NONDIGIT : ('_') | ('A'..'Z') | ('a'..'z') ; |