Add LTTngUSTLogger logger plugin test
[deliverable/titan.core.git] / common / pattern_uni.y
1 /******************************************************************************
2 * Copyright (c) 2000-2016 Ericsson Telecom AB
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/epl-v10.html
7 *
8 * Contributors:
9 * Balasko, Jeno
10 * Raduly, Csaba
11 * Szalai, Gabor
12 * Zalanyi, Balazs Andor
13 *
14 ******************************************************************************/
15
16 /**
17 * Based on pattern_p.y
18 */
19
20 %{
21
22 /*********************************************************************
23 * C(++) declarations
24 *********************************************************************/
25
26 #include <stdio.h>
27 #include <string.h>
28 #include <ctype.h>
29
30 #if defined(__CYGWIN__) && defined(__clang__)
31 /* Cygwin's clang 3.0 has its own limits.h, which does not bring in
32 the system's limits.h unless we define this macro: */
33 #define __STDC_HOSTED__ 1
34 #define _GCC_NEXT_LIMITS_H
35 #endif
36 #include <limits.h>
37
38 #include <regex.h>
39 #if !defined(RE_DUP_MAX)
40 /* RE_DUP_MAX is defined in limits.h or regex.h, except on Cygwin 1.5 */
41 # include <sys/syslimits.h>
42 #endif
43
44 #include "memory.h"
45 #include "pattern.hh"
46
47 #include "Quadruple.hh"
48
49 union YYSTYPE;
50 /* defined in lexer c-file: */
51
52 extern int pattern_yylex();
53 inline int pattern_unilex() { return pattern_yylex(); }
54 extern void init_pattern_yylex(YYSTYPE*);
55 struct yy_buffer_state;
56 extern yy_buffer_state* pattern_yy_scan_string(const char*);
57 extern void pattern_yy_delete_buffer(yy_buffer_state*);
58 extern unsigned int get_nof_parentheses();
59
60 /* defined in this file: */
61
62 /** The converted regexp. */
63 static char *ret_val;
64 /** The parser error reporting function. */
65 static void pattern_unierror(const char *error_str);
66
67 static int user_groups;
68
69 #define YYERROR_VERBOSE
70
71 static void yyprint(FILE *file, int type, const YYSTYPE& value);
72 #define YYPRINT(f,t,v) yyprint(f,t,v)
73
74 %}
75
76 /*********************************************************************
77 * Bison declarations
78 *********************************************************************/
79
80 %name-prefix="pattern_uni"
81 %output="pattern_uni.cc"
82 %defines
83 %verbose
84 %expect 0
85 %start Pattern
86 %debug
87
88 /*********************************************************************
89 * The union-type
90 * Must be kept in sync with the one in pattern_p.y !
91 *********************************************************************/
92
93 %union {
94 int b; /* boolean */
95 char c; /* single character */
96 char *s; /* character string */
97 unsigned long int u; /* unsigned integer */
98 struct character_set *set; // used by nonterminals in pattern_p.y
99
100 union {
101 unsigned int value;
102 #if defined(__sparc__) || defined(__sparc)
103 struct {
104 unsigned char group;
105 unsigned char plane;
106 unsigned char row;
107 unsigned char cell;
108 } comp;
109 #else
110 struct {
111 unsigned char cell;
112 unsigned char row;
113 unsigned char plane;
114 unsigned char group;
115 } comp;
116 #endif
117 } q; // single universal char, used by nonterminals in pattern_uni.y
118 class QuadSet* qset; // used by nonterminals in pattern_uni.y
119 }
120
121 /*********************************************************************
122 * Tokens
123 *********************************************************************/
124
125 %token <c> TOK_Char "<ordinary character>"
126 %token <u> TOK_Number "<number>"
127 %token <u> TOK_Digit "<digit>"
128
129 /*********************************************************************
130 * Keywords
131 *********************************************************************/
132
133 %token KW_BS_q "\\q"
134 %token KW_BS_d "\\d"
135 %token KW_BS_w "\\w"
136 %token KW_BS_t "\\t"
137 %token KW_BS_n "\\n"
138 %token KW_BS_r "\\r"
139 %token KW_BS_s "\\s"
140 %token KW_BS_b "\\b"
141
142 %token KW_Group_Begin "("
143 %token KW_Group_End ")"
144 %token KW_Set_Begin "["
145 %token KW_Set_Begin_Neg "[^"
146 %token KW_Set_Begin_Rsbrkt "[]"
147 %token KW_Set_Begin_Neg_Rsbrkt "[^]"
148 %token KW_Set_End "]"
149 %token KW_Set_Dash_End "-]"
150
151 /*********************************************************************
152 * semantic types of nonterminals
153 *********************************************************************/
154
155 %type <b> RE_Set_Begin RE_Set_Begin_Rsbrkt RE_Set_End
156 %type <q> RE_Set_Range_Char RE_Quadruple
157 %type <s> RE_Body RE_Elems RE_Alter_Elem RE_Concat_Elem
158 RE_Multiply_Elem RE_Multiply_Statement RE_Group
159 RE_OneCharPos
160 %type <qset> RE_Set RE_Set_Body RE_Set_Elem RE_Set_NoRange_Char
161
162 /*********************************************************************
163 * Destructors
164 *********************************************************************/
165
166 %destructor { Free($$); }
167 RE_Alter_Elem
168 RE_Body
169 RE_Concat_Elem
170 RE_Elems
171 RE_Group
172 RE_Multiply_Elem
173 RE_Multiply_Statement
174 RE_OneCharPos
175
176 %destructor { delete $$; }
177 RE_Set
178 RE_Set_Body
179 RE_Set_Elem
180 RE_Set_NoRange_Char
181
182 %%
183
184 /*********************************************************************
185 * Grammar
186 *********************************************************************/
187
188 Pattern:
189 RE_Body {ret_val=$1;}
190 ;
191
192 RE_Body:
193 /* empty */
194 {
195 $$ = mcopystr("^$");
196 }
197 | RE_Elems
198 {
199 if ($1 != NULL) {
200 $$ = mprintf("^%s$", $1);
201 Free($1);
202 } else $$ = mcopystr("^$");
203 }
204 ;
205
206 RE_Elems:
207 RE_Alter_Elem { $$ = $1; }
208 | RE_Elems '|' RE_Alter_Elem
209 {
210 unsigned int nof_pars = get_nof_parentheses() + (yychar==KW_Group_End ? 1 : 0);
211 if ($3 != NULL) {
212 if ($1 != NULL) $$ = mputprintf($1, nof_pars ? "|%s" : "$|^%s", $3);
213 else $$ = mprintf( nof_pars ? "()|%s" : "()$|^%s" , $3);
214 Free($3);
215 } else {
216 if ($1 != NULL) $$ = mputstr($1, nof_pars ? "|()" : "$|^()");
217 else $$ = NULL;
218 }
219 }
220 ;
221
222 RE_Alter_Elem:
223 RE_Concat_Elem { $$ = $1; }
224 | RE_Alter_Elem RE_Concat_Elem
225 {
226 $$ = mputstr($1, $2);
227 Free($2);
228 }
229 ;
230
231 RE_Concat_Elem:
232 RE_Multiply_Elem {$$=$1;}
233 | RE_Multiply_Elem RE_Multiply_Statement
234 {
235 if ($1 != NULL && $2 != NULL) {
236 $$ = mputstr($1, $2);
237 Free($2);
238 } else {
239 Free($1);
240 Free($2);
241 $$ = NULL;
242 }
243 }
244 | '*' {$$=mcopystr("(........)*");}
245 ;
246
247 RE_Multiply_Elem:
248 RE_Group {$$=$1;}
249 | RE_OneCharPos {$$=$1;}
250 ;
251
252 RE_Group:
253 KW_Group_Begin KW_Group_End
254 {
255 user_groups++;
256 $$ = mcopystr("<)");
257 }
258 | KW_Group_Begin RE_Elems KW_Group_End
259 {
260 user_groups++;
261 if ($2 != NULL) {
262 $$ = mprintf("<%s)", $2);
263 Free($2);
264 } else {
265 $$ = mcopystr("<)");
266 }
267 }
268 ;
269
270 RE_Multiply_Statement:
271 '+'
272 {
273 $$ = mcopystr("+");
274 }
275 | '#' '(' ',' ')'
276 {
277 $$ = mcopystr("*");
278 }
279 | '#' TOK_Digit
280 {
281 if ($2 == 0) {
282 TTCN_pattern_warning("The number of repetitions is zero: `#0'.");
283 $$ = NULL;
284 } else if ($2 == 1) $$ = memptystr();
285 else {
286 if ($2 > 9) TTCN_pattern_warning("Internal error: Invalid number of "
287 "repetitions: `#%lu'.", $2);
288 $$ = mprintf("{%lu}", $2);
289 }
290 }
291 | '#' '(' TOK_Number ')'
292 {
293 if ($3 == 0) {
294 TTCN_pattern_warning("The number of repetitions is zero: `#(0)'.");
295 $$ = NULL;
296 } else if ($3 == 1) $$ = memptystr();
297 else {
298 #ifdef RE_DUP_MAX
299 if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The number of repetitions in "
300 "`#(%lu)' exceeds the limit allowed by this system (%d).", $3,
301 RE_DUP_MAX);
302 #endif
303 $$ = mprintf("{%lu}", $3);
304 }
305 }
306 | '#' '(' TOK_Number ',' TOK_Number ')'
307 {
308 #ifdef RE_DUP_MAX
309 if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The minimum number of "
310 "repetitions in `#(%lu,%lu)' exceeds the limit allowed by this system "
311 "(%d).", $3, $5, RE_DUP_MAX);
312 if ($5 > RE_DUP_MAX) TTCN_pattern_warning("The maximum number of "
313 "repetitions in `#(%lu,%lu)' exceeds the limit allowed by this system "
314 "(%d).", $3, $5, RE_DUP_MAX);
315 #endif
316 if ($3 > $5) TTCN_pattern_error("The lower bound is higher than the upper "
317 "bound in the number of repetitions: `#(%lu,%lu)'.", $3, $5);
318 if ($3 == $5) {
319 if ($3 == 0) {
320 TTCN_pattern_warning("The number of repetitions is zero: `#(0,0)'.");
321 $$ = NULL;
322 } else if ($3 == 1) $$ = memptystr();
323 else {
324 $$ = mprintf("{%lu}", $3);
325 }
326 } else {
327
328 if ($3 == 0 && $5 == 1) $$ = mcopystr("?");
329 else $$ = mprintf("{%lu,%lu}", $3, $5);
330
331 }
332 }
333 | '#' '(' ',' TOK_Number ')'
334 {
335 if ($4 == 0) {
336 TTCN_pattern_warning("The number of repetitions is zero: `#(,0)'.");
337 $$ = NULL;
338 } else {
339 #ifdef RE_DUP_MAX
340 if ($4 > RE_DUP_MAX) TTCN_pattern_warning("The maximum number of "
341 "repetitions in `#(,%lu)' exceeds the limit allowed by this system "
342 "(%d).", $4, RE_DUP_MAX);
343 #endif
344
345 if ($4 == 1) $$ = mcopystr("?");
346 else $$ = mprintf("{0,%lu}", $4);
347
348 }
349 }
350 | '#' '(' TOK_Number ',' ')'
351 {
352 if ($3 == 0) $$ = mcopystr("*");
353 else {
354 #ifdef RE_DUP_MAX
355 if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The minimum number of "
356 "repetitions in `#(%lu,)' exceeds the limit allowed by this system "
357 "(%d).", $3, RE_DUP_MAX);
358 #endif
359
360 if ($3 == 1) $$ = mcopystr("+");
361 else $$ = mprintf("{%lu,}", $3);
362
363 }
364 }
365 ;
366
367 RE_OneCharPos:
368 '?' {$$=mcopystr("(........)");}
369 | KW_BS_d {$$=mcopystr("(AAAAAAD[A-J])");}
370 | KW_BS_w {$$=mcopystr("(AAAAAAD[A-J]|AAAAAA(E[B-P]|F[A-K])|AAAAAA(G[B-P]|H[A-K]))");}
371 | KW_BS_t {$$=mcopystr("AAAAAAAJ");}
372 | KW_BS_n {$$=mcopystr("(AAAAAAA[K-N])");}
373 | KW_BS_r {$$=mcopystr("AAAAAAAN");}
374 | KW_BS_s {$$=mcopystr("(AAAAAAA[J-N]|AAAAAACA)");}
375 | KW_BS_b
376 {
377 TTCN_pattern_warning("Metacharacter `\\b' is not supported yet.");
378 $$ = NULL;
379 }
380 | TOK_Char
381 {
382 unsigned char c = $1;
383 if ($1 <= 0) TTCN_pattern_error("Character with code %u "
384 "(0x%02x) cannot be used in a pattern for type charstring.", $1, $1);
385 $$ = Quad::get_hexrepr(c);
386 }
387 | RE_Quadruple
388 {
389 $$ = Quad::get_hexrepr($1.value);
390 }
391 | RE_Set
392 {
393 if ($1->is_empty()) {
394 TTCN_pattern_error("Empty character set.");
395 $$ = NULL;
396 } else
397 $$ = $1->generate_posix();
398 delete $1;
399 }
400 ;
401
402 RE_Set:
403 /* RE_Set_Begin is 1 for "[^", 0 for "["
404 * RE_Set_Begin_Rsbrkt is 1 for "[^]", 0 for "[]"
405 * RE_Set_End is 1 for "-]", 0 for "]"
406 */
407 RE_Set_Begin RE_Set_Body RE_Set_End
408 {
409 if ($2 != NULL)
410 $$ = $2;
411 else
412 $$ = new QuadSet();
413 if ($3 && !$$->add(new Quad('-')))
414 TTCN_pattern_warning("Duplicate character `-' in the character set.");
415 if ($1)
416 $$->set_negate(true);
417 }
418 | RE_Set_Begin '-' RE_Set_Body RE_Set_End
419 {
420 if ($3 != NULL)
421 $$ = $3;
422 else
423 $$ = new QuadSet();
424 if (!$$->add(new Quad('-')))
425 TTCN_pattern_warning("Duplicate character `-' in the character set.");
426 if ($1)
427 $$->set_negate(true);
428 }
429 | RE_Set_Begin_Rsbrkt RE_Set_Body RE_Set_End
430 {
431 if ($2 != NULL)
432 $$ = $2;
433 else
434 $$ = new QuadSet();
435 if (!$$->add(new Quad(']')))
436 TTCN_pattern_warning("Duplicate character `]' in the character set.");
437 if ($3 && !$$->add(new Quad('-')))
438 TTCN_pattern_warning("Duplicate character `-' in the character set.");
439 if ($1)
440 $$->set_negate(true);
441 }
442 | RE_Set_Begin_Rsbrkt '-' RE_Set_Range_Char RE_Set_Body RE_Set_End
443 {
444 if ($4 != NULL)
445 $$ = $4;
446 else
447 $$ = new QuadSet();
448 if ((unsigned int)']' > $3.value) {
449 TTCN_pattern_error("Invalid range in the character set: the "
450 "character code of the lower bound (%u) is higher than that of the "
451 "upper bound (%u).", ']', (unsigned int)$3.value);
452 }
453 $$->add(new QuadInterval(Quad(']'), Quad($3.value)));
454 if ($5) {
455 if (!$$->add(new Quad('-')))
456 TTCN_pattern_warning("Duplicate character `-' in the character set.");
457 }
458 if ($1)
459 $$->set_negate(true);
460 }
461 ;
462
463 RE_Set_Begin:
464 KW_Set_Begin { $$ = 0; }
465 | KW_Set_Begin_Neg { $$ = 1; }
466 ;
467
468 RE_Set_Begin_Rsbrkt:
469 KW_Set_Begin_Rsbrkt { $$ = 0; }
470 | KW_Set_Begin_Neg_Rsbrkt { $$ = 1; }
471 ;
472
473 RE_Set_End:
474 KW_Set_End { $$ = 0; }
475 | KW_Set_Dash_End { $$ = 1; }
476 ;
477
478 RE_Set_Body:
479 /* empty */ { $$ = NULL; }
480 | RE_Set_Body RE_Set_Elem
481 {
482 if ($1 != NULL) {
483 $$ = $1;
484 $$->join($2);
485 delete($2);
486 } else
487 $$ = $2;
488 }
489 ;
490
491 RE_Set_Elem:
492 RE_Set_Range_Char
493 {
494 $$ = new QuadSet();
495 $$->add(new Quad($1.value));
496 }
497 | RE_Set_NoRange_Char { $$ = $1; }
498 | RE_Set_Range_Char '-' RE_Set_Range_Char
499 {
500 if ($1.value > $3.value) {
501 TTCN_pattern_error("Invalid range in the character set: the "
502 "character code of the lower bound (%u) is higher than that of the "
503 "upper bound (%u).", (unsigned int)$1.value, (unsigned int)$3.value);
504 }
505 $$ = new QuadSet();
506 $$->add(new QuadInterval(Quad($1.value), Quad($3.value)));
507 }
508 ;
509
510 RE_Set_Range_Char:
511 KW_BS_t { $$.value = '\t'; }
512 | KW_BS_r { $$.value = '\r'; }
513 | TOK_Char
514 {
515 if ($1 <= 0) TTCN_pattern_error("Character with code %u "
516 "(0x%02x) cannot be used in a pattern for type charstring.", $1, $1);
517 $$.value = $1;
518 }
519 | RE_Quadruple { $$.value = $1.value; }
520 ;
521
522 RE_Set_NoRange_Char:
523 KW_BS_d
524 {
525 $$ = new QuadSet();
526 $$->add(new QuadInterval(Quad('0'), Quad('9')));
527 }
528 | KW_BS_w
529 {
530 $$ = new QuadSet();
531 $$->add(new QuadInterval(Quad('0'), Quad('9')));
532 $$->add(new QuadInterval(Quad('A'), Quad('Z')));
533 $$->add(new QuadInterval(Quad('a'), Quad('z')));
534 }
535 | KW_BS_n
536 {
537 $$ = new QuadSet();
538 $$->add(new QuadInterval(Quad('\n'), Quad('\r')));
539 }
540 | KW_BS_s
541 {
542 $$ = new QuadSet();
543 $$->add(new QuadInterval(Quad('\t'), Quad('\r')));
544 $$->add(new Quad(' '));
545 }
546 | KW_BS_b
547 {
548 $$ = new QuadSet();
549 TTCN_pattern_error("Metacharacter `\\b' does not make any sense in a "
550 "character set.");
551 }
552 ;
553
554 RE_Quadruple:
555 KW_BS_q '{' TOK_Number ',' TOK_Number ',' TOK_Number ',' TOK_Number '}'
556 {
557 if ($3 > 127) TTCN_pattern_error("The first number (group) of quadruple "
558 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..127 "
559 "instead of %lu.", $3, $5, $7, $9, $3);
560 if ($5 > 255) TTCN_pattern_error("The second number (plane) of quadruple "
561 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 "
562 "instead of %lu.", $3, $5, $7, $9, $5);
563 if ($7 > 255) TTCN_pattern_error("The third number (row) of quadruple "
564 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 "
565 "instead of %lu.", $3, $5, $7, $9, $7);
566 if ($9 > 255) TTCN_pattern_error("The fourth number (cell) of quadruple "
567 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 "
568 "instead of %lu.", $3, $5, $7, $9, $9);
569 if ($3 == 0 && $5 == 0 && $7 == 0 && $9 == 0) TTCN_pattern_error("Zero "
570 "character (i.e. quadruple `\\q{0,0,0,0}') is not supported in a "
571 "pattern for type universal charstring.");
572 $$.comp.group = $3;
573 $$.comp.plane = $5;
574 $$.comp.row = $7;
575 $$.comp.cell = $9;
576 }
577 ;
578
579 %%
580
581 /*********************************************************************
582 * Interface
583 *********************************************************************/
584
585 char* TTCN_pattern_to_regexp_uni(const char* p_pattern, int** groups)
586 {
587 /* if you want to debug */
588 //pattern_unidebug=1;
589
590 ret_val=NULL;
591 user_groups = 0;
592
593 yy_buffer_state *flex_buffer = pattern_yy_scan_string(p_pattern);
594 if(flex_buffer == NULL) {
595 TTCN_pattern_error("Flex buffer creation failed.");
596 return NULL;
597 }
598 init_pattern_yylex(&yylval);
599 if(pattern_uniparse()) {
600 Free(ret_val);
601 ret_val=NULL;
602 }
603 pattern_yy_delete_buffer(flex_buffer);
604
605 // needed by regexp to find user specified groups
606 if (user_groups && groups) {
607 *groups = (int*)Malloc(sizeof(int) * (user_groups + 1));
608 (*groups)[0] = user_groups;
609
610 int par = -1, index = 1;
611 for (size_t i = 0; i < strlen(ret_val); i++) {
612 if (ret_val[i] == '(') {
613 par++;
614 }
615 if (ret_val[i] == '<') {
616 ret_val[i] = '(';
617 par++;
618 (*groups)[index++] = par;
619 }
620 }
621 } else if (groups)
622 *groups = (int*)0;
623
624 return ret_val;
625 }
626
627 // Backwards compatibility shim
628 char* TTCN_pattern_to_regexp_uni(const char* p_pattern, int ere, int** /*groups*/)
629 {
630 TTCN_pattern_warning("TTCN_pattern_to_regexp_uni"
631 "(const char* p_pattern, int ere, int** groups) is deprecated");
632 if (ere != 1) TTCN_pattern_error(
633 "BRE is not supported for TTCN_pattern_to_regexp_uni");
634 return TTCN_pattern_to_regexp(p_pattern);
635 }
636
637
638 /*********************************************************************
639 * Static functions
640 *********************************************************************/
641
642 void pattern_unierror(const char *error_str)
643 {
644 TTCN_pattern_error("%s", error_str);
645 }
646
647 void yyprint(FILE *file, int type, const YYSTYPE& value)
648 {
649 switch (type) {
650 case TOK_Char:
651 fprintf(file, "'%c'", value.c);
652 break;
653 case TOK_Digit: case TOK_Number:
654 fprintf(file, "'%lu'", value.u);
655 break;
656 default:
657 break;
658 }
659 }
660
This page took 0.044194 seconds and 5 git commands to generate.