indentation problem in the generated code
[deliverable/titan.core.git] / common / pattern_p.y
1 /******************************************************************************
2 * Copyright (c) 2000-2016 Ericsson Telecom AB
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/epl-v10.html
7 *
8 * Contributors:
9 * Balasko, Jeno
10 * Baranyi, Botond
11 * Delic, Adam
12 * Forstner, Matyas
13 * Raduly, Csaba
14 * Szabados, Kristof
15 * Szabo, Janos Zoltan – initial implementation
16 * Szalai, Gabor
17 *
18 ******************************************************************************/
19
20 /**
21 * Parser for TTCN-3 character patterns.
22 *
23 * \author Matyas Forstner (Matyas.Forstner@eth.ericsson.se)
24 *
25 * 20031121
26 */
27
28 %{
29
30 /*********************************************************************
31 * C(++) declarations
32 *********************************************************************/
33
34 #include <stdio.h>
35 #include <string.h>
36 #include <ctype.h>
37 #if defined(__CYGWIN__) && defined(__clang__)
38 /* Cygwin's clang 3.0 has its own limits.h, which does not bring in
39 the system's limits.h unless we define this macro: */
40 #define __STDC_HOSTED__ 1
41 #define _GCC_NEXT_LIMITS_H
42 #endif
43 #include <limits.h>
44
45 #include <regex.h>
46 #if !defined(RE_DUP_MAX)
47 /* RE_DUP_MAX is defined in limits.h or regex.h, except on Cygwin 1.5 */
48 # include <sys/syslimits.h>
49 #endif
50
51 #include "memory.h"
52 #include "pattern.hh"
53
54 /* defined in lexer c-file: */
55
56 union YYSTYPE;
57 extern int pattern_yylex();
58 extern void init_pattern_yylex(YYSTYPE *p);
59 struct yy_buffer_state;
60 extern yy_buffer_state* pattern_yy_scan_string(const char*);
61 extern int pattern_yylex_destroy();
62 extern unsigned int get_nof_parentheses();
63
64 /* defined in this file: */
65
66 /** The converted regexp. */
67 static char *ret_val;
68 /** Turns error messages for extended ASCII characters on or off */
69 static bool allow_ext_ascii = false;
70 /** The parser error reporting function. */
71 static void pattern_yyerror(const char *error_str);
72 /** Creates the POSIX equivalent of literal character \a c using the
73 * appropriate escape sequence when needed. */
74 static char *translate_character(char c);
75 /** Returns the printable equivalent of character \a c */
76 static char *print_character(char c);
77 /** Returns the printable equivalent of range \a lower .. \a upper */
78 static char *print_range(char lower, char upper);
79 /** structure for manipulating character sets */
80 struct character_set;
81 /** allocates, initializes and returns a new empty set */
82 static character_set *set_init();
83 /** allocates and returns a copy of \a set */
84 static character_set *set_copy(const character_set *set);
85 /** deallocates set \a set */
86 static void set_free(character_set *set);
87 /** returns whether set \a set is empty */
88 static int set_is_empty(const character_set *set);
89 /** returns whether set \a set contains all characters in range 1..127 */
90 static int set_is_full(const character_set *set);
91 /** returns whether set \a set contains the character \a c */
92 static int set_has_char(const character_set *set, char c);
93 /** adds character \a c to set \a set */
94 static void set_add_char(character_set *set, char c);
95 /** removes character \a c to set \a set */
96 static void set_remove_char(character_set *set, char c);
97 /** returns whether set \a set contains at least one character in the range
98 * \a lower .. \a upper */
99 static int set_has_range(const character_set *set, char lower, char upper);
100 /** adds range \a lower .. \a upper to set \a set */
101 static void set_add_range(character_set *set, char lower, char upper);
102 /** returns whether set \a set1 and \a set2 has non-empty intersect */
103 static int set_has_intersect(const character_set *set1,
104 const character_set *set2);
105 /** joins sets \a dst and \a src into \a dst */
106 static void set_join(character_set *dst, const character_set *src);
107 /** negates the set \a set */
108 static void set_negate(character_set *set);
109 /** reports the duplicate occurrences of characters and ranges in \a set1
110 * and \a set2 */
111 static void set_report_duplicates(const character_set *set1,
112 const character_set *set2);
113 /** generates the POSIX equivalent of \a set */
114 static char *set_generate_posix(const character_set *set);
115
116 #define YYERROR_VERBOSE
117
118 static void yyprint(FILE *file, int type, const YYSTYPE& value);
119 #define YYPRINT(f,t,v) yyprint(f,t,v)
120
121 %}
122
123 /*********************************************************************
124 * Bison declarations
125 *********************************************************************/
126
127 %name-prefix="pattern_yy"
128 %output="pattern_p.cc"
129 %defines
130 %verbose
131 %expect 0
132 %start Pattern
133 %debug
134
135 /*********************************************************************
136 * The union-type
137 * Must be kept in sync with the one in pattern_uni.y !
138 *********************************************************************/
139
140 %union {
141 int b; /* boolean */
142 char c; /* single character */
143 char *s; /* character string */
144 unsigned long int u; /* unsigned integer */
145 struct character_set *set; // used by nonterminals in pattern_p.y
146
147 union {
148 unsigned int value;
149 #if defined(__sparc__) || defined(__sparc)
150 struct {
151 unsigned char group;
152 unsigned char plane;
153 unsigned char row;
154 unsigned char cell;
155 } comp;
156 #else
157 struct {
158 unsigned char cell;
159 unsigned char row;
160 unsigned char plane;
161 unsigned char group;
162 } comp;
163 #endif
164 } q; // single universal char, used by nonterminals in pattern_uni.y
165 class QuadSet* qset; // used by nonterminals in pattern_uni.y
166 }
167
168 /*********************************************************************
169 * Tokens
170 *********************************************************************/
171
172 %token <c> TOK_Char "<ordinary character>"
173 %token <u> TOK_Number "<number>"
174 %token <u> TOK_Digit "<digit>"
175
176 /*********************************************************************
177 * Keywords
178 *********************************************************************/
179
180 %token KW_BS_q "\\q"
181 %token KW_BS_d "\\d"
182 %token KW_BS_w "\\w"
183 %token KW_BS_t "\\t"
184 %token KW_BS_n "\\n"
185 %token KW_BS_r "\\r"
186 %token KW_BS_s "\\s"
187 %token KW_BS_b "\\b"
188
189 %token KW_Group_Begin "("
190 %token KW_Group_End ")"
191 %token KW_Set_Begin "["
192 %token KW_Set_Begin_Neg "[^"
193 %token KW_Set_Begin_Rsbrkt "[]"
194 %token KW_Set_Begin_Neg_Rsbrkt "[^]"
195 %token KW_Set_End "]"
196 %token KW_Set_Dash_End "-]"
197
198 /*********************************************************************
199 * semantic types of nonterminals
200 *********************************************************************/
201
202 %type <b> RE_Set_Begin RE_Set_Begin_Rsbrkt RE_Set_End
203 %type <c> RE_Set_Range_Char RE_Quadruple
204 %type <s> RE_Body RE_Elems RE_Alter_Elem RE_Concat_Elem
205 RE_Multiply_Elem RE_Multiply_Statement RE_Group
206 RE_OneCharPos
207 %type <set> RE_Set RE_Set_Body RE_Set_Elem RE_Set_NoRange_Char
208
209 /*********************************************************************
210 * Destructors
211 *********************************************************************/
212
213 %destructor { Free($$); }
214 RE_Alter_Elem
215 RE_Body
216 RE_Concat_Elem
217 RE_Elems
218 RE_Group
219 RE_Multiply_Elem
220 RE_Multiply_Statement
221 RE_OneCharPos
222
223 %destructor { set_free($$); }
224 RE_Set
225 RE_Set_Body
226 RE_Set_Elem
227 RE_Set_NoRange_Char
228
229 %%
230
231 /*********************************************************************
232 * Grammar
233 *********************************************************************/
234
235 Pattern:
236 RE_Body {ret_val=$1;}
237 ;
238
239 RE_Body:
240 /* empty */
241 {
242 $$ = mcopystr("^$");
243 }
244 | RE_Elems
245 {
246 if ($1 != NULL) {
247 $$ = mprintf("^%s$", $1);
248 Free($1);
249 } else $$ = mcopystr("^$");
250 }
251 ;
252
253 RE_Elems:
254 RE_Alter_Elem { $$ = $1; }
255 | RE_Elems '|' RE_Alter_Elem
256 {
257 unsigned int nof_pars = get_nof_parentheses() + (yychar==KW_Group_End ? 1 : 0);
258 if ($3 != NULL) {
259 if ($1 != NULL) $$ = mputprintf($1, nof_pars ? "|%s" : "$|^%s", $3);
260 else $$ = mprintf( nof_pars ? "()|%s" : "()$|^%s" , $3);
261 Free($3);
262 } else {
263 if ($1 != NULL) $$ = mputstr($1, nof_pars ? "|()" : "$|^()");
264 else $$ = NULL;
265 }
266 }
267 ;
268
269 RE_Alter_Elem:
270 RE_Concat_Elem { $$ = $1; }
271 | RE_Alter_Elem RE_Concat_Elem
272 {
273 $$ = mputstr($1, $2);
274 Free($2);
275 }
276 ;
277
278 RE_Concat_Elem:
279 RE_Multiply_Elem {$$=$1;}
280 | RE_Multiply_Elem RE_Multiply_Statement
281 {
282 if ($1 != NULL && $2 != NULL) {
283 $$ = mputstr($1, $2);
284 Free($2);
285 } else {
286 Free($1);
287 Free($2);
288 $$ = NULL;
289 }
290 }
291 | '*' {$$=mcopystr(".*");}
292 ;
293
294 RE_Multiply_Elem:
295 RE_Group {$$=$1;}
296 | RE_OneCharPos {$$=$1;}
297 ;
298
299 RE_Group:
300 KW_Group_Begin KW_Group_End
301 {
302 $$ = mcopystr("()");
303 }
304 | KW_Group_Begin RE_Elems KW_Group_End
305 {
306 if ($2 != NULL) {
307 $$ = mprintf("(%s)", $2);
308 Free($2);
309 } else {
310 $$ = mcopystr("()");
311 }
312 }
313 ;
314
315 RE_Multiply_Statement:
316 '+'
317 {
318 $$ = mcopystr("+");
319 }
320 | '#' '(' ',' ')'
321 {
322 $$ = mcopystr("*");
323 }
324 | '#' TOK_Digit
325 {
326 if ($2 == 0) {
327 TTCN_pattern_warning("The number of repetitions is zero: `#0'.");
328 $$ = NULL;
329 } else if ($2 == 1) $$ = memptystr();
330 else {
331 if ($2 > 9) TTCN_pattern_warning("Internal error: Invalid number of "
332 "repetitions: `#%lu'.", $2);
333 $$ = mprintf("{%lu}", $2);
334 }
335 }
336 | '#' '(' TOK_Number ')'
337 {
338 if ($3 == 0) {
339 TTCN_pattern_warning("The number of repetitions is zero: `#(0)'.");
340 $$ = NULL;
341 } else if ($3 == 1) $$ = memptystr();
342 else {
343 #ifdef RE_DUP_MAX
344 if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The number of repetitions in "
345 "`#(%lu)' exceeds the limit allowed by this system (%d).", $3,
346 RE_DUP_MAX);
347 #endif
348 $$ = mprintf("{%lu}", $3);
349 }
350 }
351 | '#' '(' TOK_Number ',' TOK_Number ')'
352 {
353 #ifdef RE_DUP_MAX
354 if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The minimum number of "
355 "repetitions in `#(%lu,%lu)' exceeds the limit allowed by this system "
356 "(%d).", $3, $5, RE_DUP_MAX);
357 if ($5 > RE_DUP_MAX) TTCN_pattern_warning("The maximum number of "
358 "repetitions in `#(%lu,%lu)' exceeds the limit allowed by this system "
359 "(%d).", $3, $5, RE_DUP_MAX);
360 #endif
361 if ($3 > $5) TTCN_pattern_error("The lower bound is higher than the upper "
362 "bound in the number of repetitions: `#(%lu,%lu)'.", $3, $5);
363 if ($3 == $5) {
364 if ($3 == 0) {
365 TTCN_pattern_warning("The number of repetitions is zero: `#(0,0)'.");
366 $$ = NULL;
367 } else if ($3 == 1) $$ = memptystr();
368 else {
369 $$ = mprintf("{%lu}", $3);
370 }
371 } else {
372 if ($3 == 0 && $5 == 1) $$ = mcopystr("?");
373 else $$ = mprintf("{%lu,%lu}", $3, $5);
374 }
375 }
376 | '#' '(' ',' TOK_Number ')'
377 {
378 if ($4 == 0) {
379 TTCN_pattern_warning("The number of repetitions is zero: `#(,0)'.");
380 $$ = NULL;
381 } else {
382 #ifdef RE_DUP_MAX
383 if ($4 > RE_DUP_MAX) TTCN_pattern_warning("The maximum number of "
384 "repetitions in `#(,%lu)' exceeds the limit allowed by this system "
385 "(%d).", $4, RE_DUP_MAX);
386 #endif
387 if ($4 == 1) $$ = mcopystr("?");
388 else $$ = mprintf("{0,%lu}", $4);
389 }
390 }
391 | '#' '(' TOK_Number ',' ')'
392 {
393 if ($3 == 0) $$ = mcopystr("*");
394 else {
395 #ifdef RE_DUP_MAX
396 if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The minimum number of "
397 "repetitions in `#(%lu,)' exceeds the limit allowed by this system "
398 "(%d).", $3, RE_DUP_MAX);
399 #endif
400 if ($3 == 1) $$ = mcopystr("+");
401 else $$ = mprintf("{%lu,}", $3);
402 }
403 }
404 ;
405
406 RE_OneCharPos:
407 '?' {$$=mcopystr(".");}
408 | KW_BS_d {$$=mcopystr("[0-9]");}
409 | KW_BS_w {$$=mcopystr("[0-9A-Za-z]");}
410 | KW_BS_t {$$=mcopystr("\t");}
411 | KW_BS_n {$$=mcopystr("[\n-\r]");}
412 | KW_BS_r {$$=mcopystr("\r");}
413 | KW_BS_s {$$=mcopystr("[\t-\r ]");}
414 | KW_BS_b
415 {
416 TTCN_pattern_warning("Metacharacter `\\b' is not supported yet.");
417 $$ = NULL;
418 }
419 | TOK_Char
420 {
421 unsigned char c = $1;
422 if (c == 0 || (c > 127 && !allow_ext_ascii)) TTCN_pattern_error("Character "
423 "with code %u (0x%02x) cannot be used in a pattern for type charstring.", c, c);
424 $$ = translate_character($1);
425 }
426 | RE_Quadruple
427 {
428 $$ = translate_character($1);
429 }
430 | RE_Set
431 {
432 if (set_is_empty($1)) {
433 TTCN_pattern_error("Empty character set.");
434 $$ = NULL;
435 } else $$ = set_generate_posix($1);
436 set_free($1);
437 }
438 ;
439
440 RE_Set:
441 /* RE_Set_Begin is 1 for "[^", 0 for "["
442 * RE_Set_Begin_Rsbrkt is 1 for "[^]", 0 for "[]"
443 * RE_Set_End is 1 for "-]", 0 for "]"
444 */
445 RE_Set_Begin RE_Set_Body RE_Set_End
446 {
447 if ($2 != NULL) $$ = $2;
448 else $$ = set_init();
449 if ($3) {
450 if (set_has_char($$, '-'))
451 TTCN_pattern_warning("Duplicate character `-' in the character set.");
452 else set_add_char($$, '-');
453 }
454 if ($1) set_negate($$);
455 }
456 | RE_Set_Begin '-' RE_Set_Body RE_Set_End
457 {
458 if ($3 != NULL) $$ = $3;
459 else $$ = set_init();
460 if (set_has_char($$, '-'))
461 TTCN_pattern_warning("Duplicate character `-' in the character set.");
462 else set_add_char($$, '-');
463 if ($4) {
464 if (set_has_char($$, '-'))
465 TTCN_pattern_warning("Duplicate character `-' in the character set.");
466 else set_add_char($$, '-');
467 }
468 if ($1) set_negate($$);
469 }
470 | RE_Set_Begin_Rsbrkt RE_Set_Body RE_Set_End
471 {
472 if ($2 != NULL) $$ = $2;
473 else $$ = set_init();
474 if (set_has_char($$, ']'))
475 TTCN_pattern_warning("Duplicate character `]' in the character set.");
476 else set_add_char($$, ']');
477 if ($3) {
478 if (set_has_char($$, '-'))
479 TTCN_pattern_warning("Duplicate character `-' in the character set.");
480 else set_add_char($$, '-');
481 }
482 if ($1) set_negate($$);
483 }
484 | RE_Set_Begin_Rsbrkt '-' RE_Set_Range_Char RE_Set_Body RE_Set_End
485 {
486 if ($4 != NULL) $$ = $4;
487 else $$ = set_init();
488 char *range_str = print_range(']', $3);
489 if (']' > $3) {
490 TTCN_pattern_error("Invalid range `%s' in the character set: the "
491 "character code of the lower bound (%u) is higher than that of the "
492 "upper bound (%u).", range_str, ']', (unsigned char)$3);
493 } else {
494 if (set_has_range($$, ']', $3)) {
495 character_set *tmpset = set_init();
496 set_add_range(tmpset, ']', $3);
497 set_report_duplicates($$, tmpset);
498 set_free(tmpset);
499 }
500 }
501 set_add_range($$, ']', $3);
502 Free(range_str);
503 if ($5) {
504 if (set_has_char($$, '-'))
505 TTCN_pattern_warning("Duplicate character `-' in the character set.");
506 else set_add_char($$, '-');
507 }
508 if ($1) set_negate($$);
509 }
510 ;
511
512 RE_Set_Begin:
513 KW_Set_Begin { $$ = 0; }
514 | KW_Set_Begin_Neg { $$ = 1; }
515 ;
516
517 RE_Set_Begin_Rsbrkt:
518 KW_Set_Begin_Rsbrkt { $$ = 0; }
519 | KW_Set_Begin_Neg_Rsbrkt { $$ = 1; }
520 ;
521
522 RE_Set_End:
523 KW_Set_End { $$ = 0; }
524 | KW_Set_Dash_End { $$ = 1; }
525 ;
526
527 RE_Set_Body:
528 /* empty */ { $$ = NULL; }
529 | RE_Set_Body RE_Set_Elem
530 {
531 if ($1 != NULL) {
532 $$ = $1;
533 if (set_has_intersect($$, $2)) set_report_duplicates($$, $2);
534 set_join($$, $2);
535 set_free($2);
536 } else $$ = $2;
537 }
538 ;
539
540 RE_Set_Elem:
541 RE_Set_Range_Char
542 {
543 $$ = set_init();
544 set_add_char($$, $1);
545 }
546 | RE_Set_NoRange_Char { $$ = $1; }
547 | RE_Set_Range_Char '-' RE_Set_Range_Char
548 {
549 if ($1 > $3) {
550 char *range_str = print_range($1, $3);
551 TTCN_pattern_error("Invalid range `%s' in the character set: the "
552 "character code of the lower bound (%u) is higher than that of the "
553 "upper bound (%u).", range_str, (unsigned char)$1, (unsigned char)$3);
554 Free(range_str);
555 }
556 $$ = set_init();
557 set_add_range($$, $1, $3);
558 }
559 ;
560
561 RE_Set_Range_Char:
562 KW_BS_t { $$ = '\t'; }
563 | KW_BS_r { $$ = '\r'; }
564 | TOK_Char
565 {
566 unsigned char c = $1;
567 if (c == 0 || (c > 127 && !allow_ext_ascii)) TTCN_pattern_error("Character "
568 "with code %u (0x%02x) cannot be used in a pattern for type charstring.", c, c);
569 $$ = $1;
570 }
571 | RE_Quadruple { $$ = $1; }
572 ;
573
574 RE_Set_NoRange_Char:
575 KW_BS_d
576 {
577 $$ = set_init();
578 set_add_range($$, '0', '9');
579 }
580 | KW_BS_w
581 {
582 $$ = set_init();
583 set_add_range($$, '0', '9');
584 set_add_range($$, 'A', 'Z');
585 set_add_range($$, 'a', 'z');
586 }
587 | KW_BS_n
588 {
589 $$ = set_init();
590 set_add_range($$, '\n', '\r');
591 }
592 | KW_BS_s
593 {
594 $$ = set_init();
595 set_add_range($$, '\t', '\r');
596 set_add_char($$, ' ');
597 }
598 | KW_BS_b
599 {
600 TTCN_pattern_error("Metacharacter `\\b' does not make any sense in a "
601 "character set.");
602 $$ = set_init();
603 }
604 ;
605
606 RE_Quadruple:
607 KW_BS_q '{' TOK_Number ',' TOK_Number ',' TOK_Number ',' TOK_Number '}'
608 {
609 if ($3 > 127) TTCN_pattern_error("The first number (group) of quadruple "
610 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..127 "
611 "instead of %lu.", $3, $5, $7, $9, $3);
612 if ($5 > 255) TTCN_pattern_error("The second number (plane) of quadruple "
613 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 "
614 "instead of %lu.", $3, $5, $7, $9, $5);
615 if ($7 > 255) TTCN_pattern_error("The third number (row) of quadruple "
616 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 "
617 "instead of %lu.", $3, $5, $7, $9, $7);
618 if ($9 > 255) TTCN_pattern_error("The fourth number (cell) of quadruple "
619 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 "
620 "instead of %lu.", $3, $5, $7, $9, $9);
621 if ($3 > 0 || $5 > 0 || $7 > 0 || $9 > 127) TTCN_pattern_error("Quadruple "
622 "`\\q{%lu,%lu,%lu,%lu}' is not valid in a pattern for type charstring.",
623 $3, $5, $7, $9);
624 if ($3 == 0 && $5 == 0 && $7 == 0 && $9 == 0) TTCN_pattern_error("Zero "
625 "character (i.e. quadruple `\\q{0,0,0,0}') is not supported in a "
626 "pattern for type charstring.");
627 $$ = $9;
628 }
629 ;
630
631 %%
632
633 /*********************************************************************
634 * Interface
635 *********************************************************************/
636
637 char* TTCN_pattern_to_regexp(const char* p_pattern, bool utf8)
638 {
639 /* if you want to debug */
640 //pattern_yydebug=1;
641
642 ret_val=NULL;
643
644 /* allow extended ASCII characters if the pattern is in UTF-8 format */
645 allow_ext_ascii = utf8;
646
647 yy_buffer_state *flex_buffer = pattern_yy_scan_string(p_pattern);
648 if(flex_buffer == NULL) {
649 TTCN_pattern_error("Flex buffer creation failed.");
650 return NULL;
651 }
652 init_pattern_yylex(&yylval);
653 if(pattern_yyparse()) {
654 Free(ret_val);
655 ret_val=NULL;
656 }
657 pattern_yylex_destroy();
658 return ret_val;
659 }
660
661 // Backwards compatibility shim
662 char* TTCN_pattern_to_regexp(const char* p_pattern, int ere)
663 {
664 TTCN_pattern_warning(
665 "TTCN_pattern_to_regexp(const char* p_pattern, int ere) is deprecated");
666 if (ere != 1) TTCN_pattern_error(
667 "BRE is not supported for TTCN_pattern_to_regexp");
668 return TTCN_pattern_to_regexp(p_pattern);
669 }
670
671 /*********************************************************************
672 * Static functions
673 *********************************************************************/
674
675 /// Error reporting function
676 void pattern_yyerror(const char *error_str)
677 {
678 TTCN_pattern_error("%s", error_str);
679 }
680
681 /** Escape plain characters which would be metacharacters in a regex.
682 *
683 * @param c plain character
684 * @return a newly allocated string which must be Free() 'd
685 */
686 char *translate_character(char c)
687 {
688 int escape_needed = 0;
689 switch (c) {
690 case '|':
691 case '+':
692 case '?':
693 case '{':
694 case '}':
695 case '(':
696 case ')':
697 case '.':
698 case '^':
699 case '$':
700 case '[':
701 case '*':
702 case '\\':
703 escape_needed = 1;
704 break;
705 }
706 if (escape_needed) return mprintf("\\%c", c);
707 else return mputc(NULL, c);
708 }
709
710 char *print_character(char c)
711 {
712 switch (c) {
713 case '\t':
714 return mcopystr("\\t");
715 case '\r':
716 return mcopystr("\\r");
717 default:
718 if (isprint((unsigned char)c)) return mprintf("%c", c);
719 else return mprintf("\\q{0,0,0,%u}", (unsigned char)c);
720 }
721 }
722
723 char *print_range(char lower, char upper)
724 {
725 char *range_str = print_character(lower);
726 range_str = mputc(range_str, '-');
727 char *upper_str = print_character(upper);
728 range_str = mputstr(range_str, upper_str);
729 Free(upper_str);
730 return range_str;
731 }
732
733 #define CS_BITS_PER_ELEM (8 * sizeof(unsigned long))
734 #define CS_NOF_ELEMS ((128 + CS_BITS_PER_ELEM - 1) / CS_BITS_PER_ELEM)
735
736 struct character_set {
737 unsigned long set_members[CS_NOF_ELEMS];
738 };
739
740 character_set *set_init()
741 {
742 character_set *set = (character_set*)Malloc(sizeof(*set));
743 memset(set->set_members, 0, sizeof(set->set_members));
744 return set;
745 }
746
747 character_set *set_copy(const character_set *set)
748 {
749 character_set *set2 = (character_set*)Malloc(sizeof(*set2));
750 memcpy(set2, set, sizeof(*set2));
751 return set2;
752 }
753
754 void set_free(character_set *set)
755 {
756 Free(set);
757 }
758
759 int set_is_empty(const character_set *set)
760 {
761 if ((set->set_members[0] & ~1UL) != 0) return 0;
762 for (size_t i = 1; i < CS_NOF_ELEMS; i++)
763 if (set->set_members[i] != 0) return 0;
764 return 1;
765 }
766
767 int set_is_full(const character_set *set)
768 {
769 if (~(set->set_members[0] | 1UL) != 0) return 0;
770 for (size_t i = 1; i < CS_NOF_ELEMS; i++)
771 if (~set->set_members[i] != 0) return 0;
772 return 1;
773 }
774
775 int set_has_char(const character_set *set, char c)
776 {
777 if (set->set_members[c / CS_BITS_PER_ELEM] & 1UL << c % CS_BITS_PER_ELEM)
778 return 1;
779 else return 0;
780 }
781
782 void set_add_char(character_set *set, char c)
783 {
784 set->set_members[c / CS_BITS_PER_ELEM] |= 1UL << c % CS_BITS_PER_ELEM;
785 }
786
787 void set_remove_char(character_set *set, char c)
788 {
789 set->set_members[c / CS_BITS_PER_ELEM] &= ~(1UL << c % CS_BITS_PER_ELEM);
790 }
791
792 int set_has_range(const character_set *set, char lower, char upper)
793 {
794 for (size_t i = lower; i <= (unsigned char)upper; i++)
795 if (set->set_members[i / CS_BITS_PER_ELEM] & 1UL << i % CS_BITS_PER_ELEM)
796 return 1;
797 return 0;
798 }
799
800 void set_add_range(character_set *set, char lower, char upper)
801 {
802 for (size_t i = lower; i <= (unsigned char)upper; i++)
803 set->set_members[i / CS_BITS_PER_ELEM] |= 1UL << i % CS_BITS_PER_ELEM;
804 }
805
806 int set_has_intersect(const character_set *set1, const character_set *set2)
807 {
808 for (size_t i = 0; i < CS_NOF_ELEMS; i++)
809 if (set1->set_members[i] & set2->set_members[i]) return 1;
810 return 0;
811 }
812
813 void set_join(character_set *dst, const character_set *src)
814 {
815 for (size_t i = 0; i < CS_NOF_ELEMS; i++)
816 dst->set_members[i] |= src->set_members[i];
817 }
818
819 void set_negate(character_set *set)
820 {
821 for (size_t i = 0; i < CS_NOF_ELEMS; i++)
822 set->set_members[i] = ~set->set_members[i];
823 }
824
825 void set_report_duplicates(const character_set *set1,
826 const character_set *set2)
827 {
828 for (unsigned char i = 0; i <= 127; ) {
829 for (i++; i <= 127; i++)
830 if (set_has_char(set2, i) && set_has_char(set1, i)) break;
831 if (i > 127) break;
832 char lower = i;
833 for (i++; i <= 127; i++)
834 if (!set_has_char(set2, i) || !set_has_char(set1, i)) break;
835 char upper = i - 1;
836 if (lower < upper) {
837 char *range_str = print_range(lower, upper);
838 TTCN_pattern_warning("Duplicate range `%s' in the character set.",
839 range_str);
840 Free(range_str);
841 } else {
842 char *char_str = print_character(lower);
843 if(lower == '\r' ){
844 TTCN_pattern_warning("Duplicate character `%s' in the character "
845 "set. Please note the \\n includes the \\r implicitly. "
846 "Use \\q{0,0,0,10} if you would like to match the LF only.", char_str);
847 } else {
848 TTCN_pattern_warning("Duplicate character `%s' in the character "
849 "set.", char_str);
850 }
851 Free(char_str);
852 }
853 }
854 }
855
856 static char *append_posix_body(char *set_body, const character_set *set)
857 {
858 for (unsigned char i = 0; i <= 127; ) {
859 for (i++; i <= 127; i++) if (set_has_char(set, i)) break;
860 if (i > 127) break;
861 char lower = i;
862 set_body = mputc(set_body, lower);
863 for (i++; i <= 127; i++) if (!set_has_char(set, i)) break;
864 char upper = i - 1;
865 if (lower < upper) {
866 if (lower + 1 < upper) set_body = mputc(set_body, '-');
867 set_body = mputc(set_body, upper);
868 }
869 }
870 return set_body;
871 }
872
873 static char *generate_posix_body(character_set *set)
874 {
875 int has_caret;
876 if (set_has_char(set, '^') && !(set_has_char(set, '^' - 1) &&
877 set_has_char(set, '^' + 1))) {
878 set_remove_char(set, '^');
879 has_caret = 1;
880 } else has_caret = 0;
881 int has_dash;
882 if (set_has_char(set, '-') && !(set_has_char(set, '-' - 1) &&
883 set_has_char(set, '-' + 1))) {
884 set_remove_char(set, '-');
885 has_dash = 1;
886 } else has_dash = 0;
887 int has_rsbrkt;
888 if (set_has_char(set, ']') && !(set_has_char(set, ']' - 1) &&
889 set_has_char(set, ']' + 1))) {
890 set_remove_char(set, ']');
891 has_rsbrkt = 1;
892 } else has_rsbrkt = 0;
893 char *set_body = memptystr();
894 if (set_is_empty(set) && !has_rsbrkt) {
895 /* the `-' must precede the `^' */
896 if (has_dash) set_body = mputc(set_body, '-');
897 if (has_caret) set_body = mputc(set_body, '^');
898 } else {
899 /* order: ']', others, '^', '-' */
900 if (has_rsbrkt) set_body = mputc(set_body, ']');
901 set_body = append_posix_body(set_body, set);
902 if (has_caret) set_body = mputc(set_body, '^');
903 if (has_dash) set_body = mputc(set_body, '-');
904 }
905 return set_body;
906 }
907
908 static char *generate_posix_body_compl(character_set *set)
909 {
910 set_negate(set);
911 int has_dash;
912 if (set_has_char(set, '-') && !(set_has_char(set, '-' - 1) &&
913 set_has_char(set, '-' + 1))) {
914 set_remove_char(set, '-');
915 has_dash = 1;
916 } else has_dash = 0;
917 int has_rsbrkt;
918 if (set_has_char(set, ']') && !(set_has_char(set, ']' - 1) &&
919 set_has_char(set, ']' + 1))) {
920 set_remove_char(set, ']');
921 has_rsbrkt = 1;
922 } else has_rsbrkt = 0;
923 char *set_body = mcopystr("^");
924 /* order: ']', others, '-' */
925 if (has_rsbrkt) set_body = mputc(set_body, ']');
926 set_body = append_posix_body(set_body, set);
927 if (has_dash) set_body = mputc(set_body, '-');
928 return set_body;
929 }
930
931 char *set_generate_posix(const character_set *set)
932 {
933 /* a full set can only be represented in this way: */
934 if (set_is_full(set)) return mcopystr(".");
935 character_set *tempset = set_copy(set);
936 char *set_body = generate_posix_body(tempset);
937 set_free(tempset);
938 char *posix_str;
939 if (set_body[0] == '\0') {
940 Free(set_body);
941 TTCN_pattern_error("Internal error: empty POSIX set.");
942 return NULL;
943 }
944 /* do not use the set notation in POSIX if the set contains only one
945 * character */
946 if (set_body[1] == '\0') posix_str = translate_character(set_body[0]);
947 else {
948 /* create the complemented version of the same set */
949 tempset = set_copy(set);
950 char *compl_body = generate_posix_body_compl(tempset);
951 set_free(tempset);
952 if (compl_body[0] == '\0') {
953 Free(set_body);
954 Free(compl_body);
955 TTCN_pattern_error("Internal error: empty complemented POSIX set.");
956 return NULL;
957 }
958 /* use the complemented form in the POSIX equivalent if it is the shorter
959 * one */
960 if (mstrlen(compl_body) < mstrlen(set_body))
961 posix_str = mprintf("[%s]", compl_body);
962 else posix_str = mprintf("[%s]", set_body);
963 Free(compl_body);
964 }
965 Free(set_body);
966 return posix_str;
967 }
968
969 void yyprint(FILE *file, int type, const YYSTYPE& value)
970 {
971 switch (type) {
972 case TOK_Char:
973 fprintf(file, "'%c'", value.c);
974 break;
975 case TOK_Digit: case TOK_Number:
976 fprintf(file, "'%lu'", value.u);
977 break;
978 default:
979 break;
980 }
981 }
982
This page took 0.065686 seconds and 5 git commands to generate.