Titan Core Initial Contribution
[deliverable/titan.core.git] / common / pattern_p.y
1 /******************************************************************************
2 * Copyright (c) 2000-2014 Ericsson Telecom AB
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/epl-v10.html
7 ******************************************************************************/
8
9 /**
10 * Parser for TTCN-3 character patterns.
11 *
12 * \author Matyas Forstner (Matyas.Forstner@eth.ericsson.se)
13 *
14 * 20031121
15 */
16
17 %{
18
19 /*********************************************************************
20 * C(++) declarations
21 *********************************************************************/
22
23 #include <stdio.h>
24 #include <string.h>
25 #include <ctype.h>
26 #if defined(__CYGWIN__) && defined(__clang__)
27 /* Cygwin's clang 3.0 has its own limits.h, which does not bring in
28 the system's limits.h unless we define this macro: */
29 #define __STDC_HOSTED__ 1
30 #define _GCC_NEXT_LIMITS_H
31 #endif
32 #include <limits.h>
33
34 #include <regex.h>
35 #if !defined(RE_DUP_MAX)
36 /* RE_DUP_MAX is defined in limits.h or regex.h, except on Cygwin 1.5 */
37 # include <sys/syslimits.h>
38 #endif
39
40 #include "memory.h"
41 #include "pattern.hh"
42
43 /* defined in lexer c-file: */
44
45 union YYSTYPE;
46 extern int pattern_yylex();
47 extern void init_pattern_yylex(YYSTYPE *p);
48 struct yy_buffer_state;
49 extern yy_buffer_state* pattern_yy_scan_string(const char*);
50 extern int pattern_yylex_destroy();
51 extern unsigned int get_nof_parentheses();
52
53 /* defined in this file: */
54
55 /** The converted regexp. */
56 static char *ret_val;
57 /** The parser error reporting function. */
58 static void pattern_yyerror(const char *error_str);
59 /** Creates the POSIX equivalent of literal character \a c using the
60 * appropriate escape sequence when needed. */
61 static char *translate_character(char c);
62 /** Returns the printable equivalent of character \a c */
63 static char *print_character(char c);
64 /** Returns the printable equivalent of range \a lower .. \a upper */
65 static char *print_range(char lower, char upper);
66 /** structure for manipulating character sets */
67 struct character_set;
68 /** allocates, initializes and returns a new empty set */
69 static character_set *set_init();
70 /** allocates and returns a copy of \a set */
71 static character_set *set_copy(const character_set *set);
72 /** deallocates set \a set */
73 static void set_free(character_set *set);
74 /** returns whether set \a set is empty */
75 static int set_is_empty(const character_set *set);
76 /** returns whether set \a set contains all characters in range 1..127 */
77 static int set_is_full(const character_set *set);
78 /** returns whether set \a set contains the character \a c */
79 static int set_has_char(const character_set *set, char c);
80 /** adds character \a c to set \a set */
81 static void set_add_char(character_set *set, char c);
82 /** removes character \a c to set \a set */
83 static void set_remove_char(character_set *set, char c);
84 /** returns whether set \a set contains at least one character in the range
85 * \a lower .. \a upper */
86 static int set_has_range(const character_set *set, char lower, char upper);
87 /** adds range \a lower .. \a upper to set \a set */
88 static void set_add_range(character_set *set, char lower, char upper);
89 /** returns whether set \a set1 and \a set2 has non-empty intersect */
90 static int set_has_intersect(const character_set *set1,
91 const character_set *set2);
92 /** joins sets \a dst and \a src into \a dst */
93 static void set_join(character_set *dst, const character_set *src);
94 /** negates the set \a set */
95 static void set_negate(character_set *set);
96 /** reports the duplicate occurrences of characters and ranges in \a set1
97 * and \a set2 */
98 static void set_report_duplicates(const character_set *set1,
99 const character_set *set2);
100 /** generates the POSIX equivalent of \a set */
101 static char *set_generate_posix(const character_set *set);
102
103 #define YYERROR_VERBOSE
104
105 static void yyprint(FILE *file, int type, const YYSTYPE& value);
106 #define YYPRINT(f,t,v) yyprint(f,t,v)
107
108 %}
109
110 /*********************************************************************
111 * Bison declarations
112 *********************************************************************/
113
114 %name-prefix="pattern_yy"
115 %output="pattern_p.cc"
116 %defines
117 %verbose
118 %expect 0
119 %start Pattern
120 %debug
121
122 /*********************************************************************
123 * The union-type
124 * Must be kept in sync with the one in pattern_uni.y !
125 *********************************************************************/
126
127 %union {
128 int b; /* boolean */
129 char c; /* single character */
130 char *s; /* character string */
131 unsigned long int u; /* unsigned integer */
132 struct character_set *set; // used by nonterminals in pattern_p.y
133
134 union {
135 unsigned int value;
136 #if defined(__sparc__) || defined(__sparc)
137 struct {
138 unsigned char group;
139 unsigned char plane;
140 unsigned char row;
141 unsigned char cell;
142 } comp;
143 #else
144 struct {
145 unsigned char cell;
146 unsigned char row;
147 unsigned char plane;
148 unsigned char group;
149 } comp;
150 #endif
151 } q; // single universal char, used by nonterminals in pattern_uni.y
152 class QuadSet* qset; // used by nonterminals in pattern_uni.y
153 }
154
155 /*********************************************************************
156 * Tokens
157 *********************************************************************/
158
159 %token <c> TOK_Char "<ordinary character>"
160 %token <u> TOK_Number "<number>"
161 %token <u> TOK_Digit "<digit>"
162
163 /*********************************************************************
164 * Keywords
165 *********************************************************************/
166
167 %token KW_BS_q "\\q"
168 %token KW_BS_d "\\d"
169 %token KW_BS_w "\\w"
170 %token KW_BS_t "\\t"
171 %token KW_BS_n "\\n"
172 %token KW_BS_r "\\r"
173 %token KW_BS_s "\\s"
174 %token KW_BS_b "\\b"
175
176 %token KW_Group_Begin "("
177 %token KW_Group_End ")"
178 %token KW_Set_Begin "["
179 %token KW_Set_Begin_Neg "[^"
180 %token KW_Set_Begin_Rsbrkt "[]"
181 %token KW_Set_Begin_Neg_Rsbrkt "[^]"
182 %token KW_Set_End "]"
183 %token KW_Set_Dash_End "-]"
184
185 /*********************************************************************
186 * semantic types of nonterminals
187 *********************************************************************/
188
189 %type <b> RE_Set_Begin RE_Set_Begin_Rsbrkt RE_Set_End
190 %type <c> RE_Set_Range_Char RE_Quadruple
191 %type <s> RE_Body RE_Elems RE_Alter_Elem RE_Concat_Elem
192 RE_Multiply_Elem RE_Multiply_Statement RE_Group
193 RE_OneCharPos
194 %type <set> RE_Set RE_Set_Body RE_Set_Elem RE_Set_NoRange_Char
195
196 /*********************************************************************
197 * Destructors
198 *********************************************************************/
199
200 %destructor { Free($$); }
201 RE_Alter_Elem
202 RE_Body
203 RE_Concat_Elem
204 RE_Elems
205 RE_Group
206 RE_Multiply_Elem
207 RE_Multiply_Statement
208 RE_OneCharPos
209
210 %destructor { set_free($$); }
211 RE_Set
212 RE_Set_Body
213 RE_Set_Elem
214 RE_Set_NoRange_Char
215
216 %%
217
218 /*********************************************************************
219 * Grammar
220 *********************************************************************/
221
222 Pattern:
223 RE_Body {ret_val=$1;}
224 ;
225
226 RE_Body:
227 /* empty */
228 {
229 $$ = mcopystr("^$");
230 }
231 | RE_Elems
232 {
233 if ($1 != NULL) {
234 $$ = mprintf("^%s$", $1);
235 Free($1);
236 } else $$ = mcopystr("^$");
237 }
238 ;
239
240 RE_Elems:
241 RE_Alter_Elem { $$ = $1; }
242 | RE_Elems '|' RE_Alter_Elem
243 {
244 unsigned int nof_pars = get_nof_parentheses() + (yychar==KW_Group_End ? 1 : 0);
245 if ($3 != NULL) {
246 if ($1 != NULL) $$ = mputprintf($1, nof_pars ? "|%s" : "$|^%s", $3);
247 else $$ = mprintf( nof_pars ? "()|%s" : "()$|^%s" , $3);
248 Free($3);
249 } else {
250 if ($1 != NULL) $$ = mputstr($1, nof_pars ? "|()" : "$|^()");
251 else $$ = NULL;
252 }
253 }
254 ;
255
256 RE_Alter_Elem:
257 RE_Concat_Elem { $$ = $1; }
258 | RE_Alter_Elem RE_Concat_Elem
259 {
260 $$ = mputstr($1, $2);
261 Free($2);
262 }
263 ;
264
265 RE_Concat_Elem:
266 RE_Multiply_Elem {$$=$1;}
267 | RE_Multiply_Elem RE_Multiply_Statement
268 {
269 if ($1 != NULL && $2 != NULL) {
270 $$ = mputstr($1, $2);
271 Free($2);
272 } else {
273 Free($1);
274 Free($2);
275 $$ = NULL;
276 }
277 }
278 | '*' {$$=mcopystr(".*");}
279 ;
280
281 RE_Multiply_Elem:
282 RE_Group {$$=$1;}
283 | RE_OneCharPos {$$=$1;}
284 ;
285
286 RE_Group:
287 KW_Group_Begin KW_Group_End
288 {
289 $$ = mcopystr("()");
290 }
291 | KW_Group_Begin RE_Elems KW_Group_End
292 {
293 if ($2 != NULL) {
294 $$ = mprintf("(%s)", $2);
295 Free($2);
296 } else {
297 $$ = mcopystr("()");
298 }
299 }
300 ;
301
302 RE_Multiply_Statement:
303 '+'
304 {
305 $$ = mcopystr("+");
306 }
307 | '#' '(' ',' ')'
308 {
309 $$ = mcopystr("*");
310 }
311 | '#' TOK_Digit
312 {
313 if ($2 == 0) {
314 TTCN_pattern_warning("The number of repetitions is zero: `#0'.");
315 $$ = NULL;
316 } else if ($2 == 1) $$ = memptystr();
317 else {
318 if ($2 > 9) TTCN_pattern_warning("Internal error: Invalid number of "
319 "repetitions: `#%lu'.", $2);
320 $$ = mprintf("{%lu}", $2);
321 }
322 }
323 | '#' '(' TOK_Number ')'
324 {
325 if ($3 == 0) {
326 TTCN_pattern_warning("The number of repetitions is zero: `#(0)'.");
327 $$ = NULL;
328 } else if ($3 == 1) $$ = memptystr();
329 else {
330 #ifdef RE_DUP_MAX
331 if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The number of repetitions in "
332 "`#(%lu)' exceeds the limit allowed by this system (%d).", $3,
333 RE_DUP_MAX);
334 #endif
335 $$ = mprintf("{%lu}", $3);
336 }
337 }
338 | '#' '(' TOK_Number ',' TOK_Number ')'
339 {
340 #ifdef RE_DUP_MAX
341 if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The minimum number of "
342 "repetitions in `#(%lu,%lu)' exceeds the limit allowed by this system "
343 "(%d).", $3, $5, RE_DUP_MAX);
344 if ($5 > RE_DUP_MAX) TTCN_pattern_warning("The maximum number of "
345 "repetitions in `#(%lu,%lu)' exceeds the limit allowed by this system "
346 "(%d).", $3, $5, RE_DUP_MAX);
347 #endif
348 if ($3 > $5) TTCN_pattern_error("The lower bound is higher than the upper "
349 "bound in the number of repetitions: `#(%lu,%lu)'.", $3, $5);
350 if ($3 == $5) {
351 if ($3 == 0) {
352 TTCN_pattern_warning("The number of repetitions is zero: `#(0,0)'.");
353 $$ = NULL;
354 } else if ($3 == 1) $$ = memptystr();
355 else {
356 $$ = mprintf("{%lu}", $3);
357 }
358 } else {
359 if ($3 == 0 && $5 == 1) $$ = mcopystr("?");
360 else $$ = mprintf("{%lu,%lu}", $3, $5);
361 }
362 }
363 | '#' '(' ',' TOK_Number ')'
364 {
365 if ($4 == 0) {
366 TTCN_pattern_warning("The number of repetitions is zero: `#(,0)'.");
367 $$ = NULL;
368 } else {
369 #ifdef RE_DUP_MAX
370 if ($4 > RE_DUP_MAX) TTCN_pattern_warning("The maximum number of "
371 "repetitions in `#(,%lu)' exceeds the limit allowed by this system "
372 "(%d).", $4, RE_DUP_MAX);
373 #endif
374 if ($4 == 1) $$ = mcopystr("?");
375 else $$ = mprintf("{0,%lu}", $4);
376 }
377 }
378 | '#' '(' TOK_Number ',' ')'
379 {
380 if ($3 == 0) $$ = mcopystr("*");
381 else {
382 #ifdef RE_DUP_MAX
383 if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The minimum number of "
384 "repetitions in `#(%lu,)' exceeds the limit allowed by this system "
385 "(%d).", $3, RE_DUP_MAX);
386 #endif
387 if ($3 == 1) $$ = mcopystr("+");
388 else $$ = mprintf("{%lu,}", $3);
389 }
390 }
391 ;
392
393 RE_OneCharPos:
394 '?' {$$=mcopystr(".");}
395 | KW_BS_d {$$=mcopystr("[0-9]");}
396 | KW_BS_w {$$=mcopystr("[0-9A-Za-z]");}
397 | KW_BS_t {$$=mcopystr("\t");}
398 | KW_BS_n {$$=mcopystr("[\n-\r]");}
399 | KW_BS_r {$$=mcopystr("\r");}
400 | KW_BS_s {$$=mcopystr("[\t-\r ]");}
401 | KW_BS_b
402 {
403 TTCN_pattern_warning("Metacharacter `\\b' is not supported yet.");
404 $$ = NULL;
405 }
406 | TOK_Char
407 {
408 unsigned char c = $1;
409 if (c == 0 || c > 127) TTCN_pattern_error("Character with code %u "
410 "(0x%02x) cannot be used in a pattern for type charstring.", c, c);
411 $$ = translate_character($1);
412 }
413 | RE_Quadruple
414 {
415 $$ = translate_character($1);
416 }
417 | RE_Set
418 {
419 if (set_is_empty($1)) {
420 TTCN_pattern_error("Empty character set.");
421 $$ = NULL;
422 } else $$ = set_generate_posix($1);
423 set_free($1);
424 }
425 ;
426
427 RE_Set:
428 /* RE_Set_Begin is 1 for "[^", 0 for "["
429 * RE_Set_Begin_Rsbrkt is 1 for "[^]", 0 for "[]"
430 * RE_Set_End is 1 for "-]", 0 for "]"
431 */
432 RE_Set_Begin RE_Set_Body RE_Set_End
433 {
434 if ($2 != NULL) $$ = $2;
435 else $$ = set_init();
436 if ($3) {
437 if (set_has_char($$, '-'))
438 TTCN_pattern_warning("Duplicate character `-' in the character set.");
439 else set_add_char($$, '-');
440 }
441 if ($1) set_negate($$);
442 }
443 | RE_Set_Begin '-' RE_Set_Body RE_Set_End
444 {
445 if ($3 != NULL) $$ = $3;
446 else $$ = set_init();
447 if (set_has_char($$, '-'))
448 TTCN_pattern_warning("Duplicate character `-' in the character set.");
449 else set_add_char($$, '-');
450 if ($4) {
451 if (set_has_char($$, '-'))
452 TTCN_pattern_warning("Duplicate character `-' in the character set.");
453 else set_add_char($$, '-');
454 }
455 if ($1) set_negate($$);
456 }
457 | RE_Set_Begin_Rsbrkt RE_Set_Body RE_Set_End
458 {
459 if ($2 != NULL) $$ = $2;
460 else $$ = set_init();
461 if (set_has_char($$, ']'))
462 TTCN_pattern_warning("Duplicate character `]' in the character set.");
463 else set_add_char($$, ']');
464 if ($3) {
465 if (set_has_char($$, '-'))
466 TTCN_pattern_warning("Duplicate character `-' in the character set.");
467 else set_add_char($$, '-');
468 }
469 if ($1) set_negate($$);
470 }
471 | RE_Set_Begin_Rsbrkt '-' RE_Set_Range_Char RE_Set_Body RE_Set_End
472 {
473 if ($4 != NULL) $$ = $4;
474 else $$ = set_init();
475 char *range_str = print_range(']', $3);
476 if (']' > $3) {
477 TTCN_pattern_error("Invalid range `%s' in the character set: the "
478 "character code of the lower bound (%u) is higher than that of the "
479 "upper bound (%u).", range_str, ']', (unsigned char)$3);
480 } else {
481 if (set_has_range($$, ']', $3)) {
482 character_set *tmpset = set_init();
483 set_add_range(tmpset, ']', $3);
484 set_report_duplicates($$, tmpset);
485 set_free(tmpset);
486 }
487 }
488 set_add_range($$, ']', $3);
489 Free(range_str);
490 if ($5) {
491 if (set_has_char($$, '-'))
492 TTCN_pattern_warning("Duplicate character `-' in the character set.");
493 else set_add_char($$, '-');
494 }
495 if ($1) set_negate($$);
496 }
497 ;
498
499 RE_Set_Begin:
500 KW_Set_Begin { $$ = 0; }
501 | KW_Set_Begin_Neg { $$ = 1; }
502 ;
503
504 RE_Set_Begin_Rsbrkt:
505 KW_Set_Begin_Rsbrkt { $$ = 0; }
506 | KW_Set_Begin_Neg_Rsbrkt { $$ = 1; }
507 ;
508
509 RE_Set_End:
510 KW_Set_End { $$ = 0; }
511 | KW_Set_Dash_End { $$ = 1; }
512 ;
513
514 RE_Set_Body:
515 /* empty */ { $$ = NULL; }
516 | RE_Set_Body RE_Set_Elem
517 {
518 if ($1 != NULL) {
519 $$ = $1;
520 if (set_has_intersect($$, $2)) set_report_duplicates($$, $2);
521 set_join($$, $2);
522 set_free($2);
523 } else $$ = $2;
524 }
525 ;
526
527 RE_Set_Elem:
528 RE_Set_Range_Char
529 {
530 $$ = set_init();
531 set_add_char($$, $1);
532 }
533 | RE_Set_NoRange_Char { $$ = $1; }
534 | RE_Set_Range_Char '-' RE_Set_Range_Char
535 {
536 if ($1 > $3) {
537 char *range_str = print_range($1, $3);
538 TTCN_pattern_error("Invalid range `%s' in the character set: the "
539 "character code of the lower bound (%u) is higher than that of the "
540 "upper bound (%u).", range_str, (unsigned char)$1, (unsigned char)$3);
541 Free(range_str);
542 }
543 $$ = set_init();
544 set_add_range($$, $1, $3);
545 }
546 ;
547
548 RE_Set_Range_Char:
549 KW_BS_t { $$ = '\t'; }
550 | KW_BS_r { $$ = '\r'; }
551 | TOK_Char
552 {
553 unsigned char c = $1;
554 if (c == 0 || c > 127) TTCN_pattern_error("Character with code %u "
555 "(0x%02x) cannot be used in a pattern for type charstring.", c, c);
556 $$ = $1;
557 }
558 | RE_Quadruple { $$ = $1; }
559 ;
560
561 RE_Set_NoRange_Char:
562 KW_BS_d
563 {
564 $$ = set_init();
565 set_add_range($$, '0', '9');
566 }
567 | KW_BS_w
568 {
569 $$ = set_init();
570 set_add_range($$, '0', '9');
571 set_add_range($$, 'A', 'Z');
572 set_add_range($$, 'a', 'z');
573 }
574 | KW_BS_n
575 {
576 $$ = set_init();
577 set_add_range($$, '\n', '\r');
578 }
579 | KW_BS_s
580 {
581 $$ = set_init();
582 set_add_range($$, '\t', '\r');
583 set_add_char($$, ' ');
584 }
585 | KW_BS_b
586 {
587 TTCN_pattern_error("Metacharacter `\\b' does not make any sense in a "
588 "character set.");
589 $$ = set_init();
590 }
591 ;
592
593 RE_Quadruple:
594 KW_BS_q '{' TOK_Number ',' TOK_Number ',' TOK_Number ',' TOK_Number '}'
595 {
596 if ($3 > 127) TTCN_pattern_error("The first number (group) of quadruple "
597 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..127 "
598 "instead of %lu.", $3, $5, $7, $9, $3);
599 if ($5 > 255) TTCN_pattern_error("The second number (plane) of quadruple "
600 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 "
601 "instead of %lu.", $3, $5, $7, $9, $5);
602 if ($7 > 255) TTCN_pattern_error("The third number (row) of quadruple "
603 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 "
604 "instead of %lu.", $3, $5, $7, $9, $7);
605 if ($9 > 255) TTCN_pattern_error("The fourth number (cell) of quadruple "
606 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 "
607 "instead of %lu.", $3, $5, $7, $9, $9);
608 if ($3 > 0 || $5 > 0 || $7 > 0 || $9 > 127) TTCN_pattern_error("Quadruple "
609 "`\\q{%lu,%lu,%lu,%lu}' is not valid in a pattern for type charstring.",
610 $3, $5, $7, $9);
611 if ($3 == 0 && $5 == 0 && $7 == 0 && $9 == 0) TTCN_pattern_error("Zero "
612 "character (i.e. quadruple `\\q{0,0,0,0}') is not supported in a "
613 "pattern for type charstring.");
614 $$ = $9;
615 }
616 ;
617
618 %%
619
620 /*********************************************************************
621 * Interface
622 *********************************************************************/
623
624 char* TTCN_pattern_to_regexp(const char* p_pattern)
625 {
626 /* if you want to debug */
627 //pattern_yydebug=1;
628
629 ret_val=NULL;
630
631 yy_buffer_state *flex_buffer = pattern_yy_scan_string(p_pattern);
632 if(flex_buffer == NULL) {
633 TTCN_pattern_error("Flex buffer creation failed.");
634 return NULL;
635 }
636 init_pattern_yylex(&yylval);
637 if(pattern_yyparse()) {
638 Free(ret_val);
639 ret_val=NULL;
640 }
641 pattern_yylex_destroy();
642 return ret_val;
643 }
644
645 // Backwards compatibility shim
646 char* TTCN_pattern_to_regexp(const char* p_pattern, int ere)
647 {
648 TTCN_pattern_warning(
649 "TTCN_pattern_to_regexp(const char* p_pattern, int ere) is deprecated");
650 if (ere != 1) TTCN_pattern_error(
651 "BRE is not supported for TTCN_pattern_to_regexp");
652 return TTCN_pattern_to_regexp(p_pattern);
653 }
654
655 /*********************************************************************
656 * Static functions
657 *********************************************************************/
658
659 /// Error reporting function
660 void pattern_yyerror(const char *error_str)
661 {
662 TTCN_pattern_error("%s", error_str);
663 }
664
665 /** Escape plain characters which would be metacharacters in a regex.
666 *
667 * @param c plain character
668 * @return a newly allocated string which must be Free() 'd
669 */
670 char *translate_character(char c)
671 {
672 int escape_needed = 0;
673 switch (c) {
674 case '|':
675 case '+':
676 case '?':
677 case '{':
678 case '}':
679 case '(':
680 case ')':
681 case '.':
682 case '^':
683 case '$':
684 case '[':
685 case '*':
686 case '\\':
687 escape_needed = 1;
688 break;
689 }
690 if (escape_needed) return mprintf("\\%c", c);
691 else return mputc(NULL, c);
692 }
693
694 char *print_character(char c)
695 {
696 switch (c) {
697 case '\t':
698 return mcopystr("\\t");
699 case '\r':
700 return mcopystr("\\r");
701 default:
702 if (isprint((unsigned char)c)) return mprintf("%c", c);
703 else return mprintf("\\q{0,0,0,%u}", (unsigned char)c);
704 }
705 }
706
707 char *print_range(char lower, char upper)
708 {
709 char *range_str = print_character(lower);
710 range_str = mputc(range_str, '-');
711 char *upper_str = print_character(upper);
712 range_str = mputstr(range_str, upper_str);
713 Free(upper_str);
714 return range_str;
715 }
716
717 #define CS_BITS_PER_ELEM (8 * sizeof(unsigned long))
718 #define CS_NOF_ELEMS ((128 + CS_BITS_PER_ELEM - 1) / CS_BITS_PER_ELEM)
719
720 struct character_set {
721 unsigned long set_members[CS_NOF_ELEMS];
722 };
723
724 character_set *set_init()
725 {
726 character_set *set = (character_set*)Malloc(sizeof(*set));
727 memset(set->set_members, 0, sizeof(set->set_members));
728 return set;
729 }
730
731 character_set *set_copy(const character_set *set)
732 {
733 character_set *set2 = (character_set*)Malloc(sizeof(*set2));
734 memcpy(set2, set, sizeof(*set2));
735 return set2;
736 }
737
738 void set_free(character_set *set)
739 {
740 Free(set);
741 }
742
743 int set_is_empty(const character_set *set)
744 {
745 if ((set->set_members[0] & ~1UL) != 0) return 0;
746 for (size_t i = 1; i < CS_NOF_ELEMS; i++)
747 if (set->set_members[i] != 0) return 0;
748 return 1;
749 }
750
751 int set_is_full(const character_set *set)
752 {
753 if (~(set->set_members[0] | 1UL) != 0) return 0;
754 for (size_t i = 1; i < CS_NOF_ELEMS; i++)
755 if (~set->set_members[i] != 0) return 0;
756 return 1;
757 }
758
759 int set_has_char(const character_set *set, char c)
760 {
761 if (set->set_members[c / CS_BITS_PER_ELEM] & 1UL << c % CS_BITS_PER_ELEM)
762 return 1;
763 else return 0;
764 }
765
766 void set_add_char(character_set *set, char c)
767 {
768 set->set_members[c / CS_BITS_PER_ELEM] |= 1UL << c % CS_BITS_PER_ELEM;
769 }
770
771 void set_remove_char(character_set *set, char c)
772 {
773 set->set_members[c / CS_BITS_PER_ELEM] &= ~(1UL << c % CS_BITS_PER_ELEM);
774 }
775
776 int set_has_range(const character_set *set, char lower, char upper)
777 {
778 for (size_t i = lower; i <= (unsigned char)upper; i++)
779 if (set->set_members[i / CS_BITS_PER_ELEM] & 1UL << i % CS_BITS_PER_ELEM)
780 return 1;
781 return 0;
782 }
783
784 void set_add_range(character_set *set, char lower, char upper)
785 {
786 for (size_t i = lower; i <= (unsigned char)upper; i++)
787 set->set_members[i / CS_BITS_PER_ELEM] |= 1UL << i % CS_BITS_PER_ELEM;
788 }
789
790 int set_has_intersect(const character_set *set1, const character_set *set2)
791 {
792 for (size_t i = 0; i < CS_NOF_ELEMS; i++)
793 if (set1->set_members[i] & set2->set_members[i]) return 1;
794 return 0;
795 }
796
797 void set_join(character_set *dst, const character_set *src)
798 {
799 for (size_t i = 0; i < CS_NOF_ELEMS; i++)
800 dst->set_members[i] |= src->set_members[i];
801 }
802
803 void set_negate(character_set *set)
804 {
805 for (size_t i = 0; i < CS_NOF_ELEMS; i++)
806 set->set_members[i] = ~set->set_members[i];
807 }
808
809 void set_report_duplicates(const character_set *set1,
810 const character_set *set2)
811 {
812 for (unsigned char i = 0; i <= 127; ) {
813 for (i++; i <= 127; i++)
814 if (set_has_char(set2, i) && set_has_char(set1, i)) break;
815 if (i > 127) break;
816 char lower = i;
817 for (i++; i <= 127; i++)
818 if (!set_has_char(set2, i) || !set_has_char(set1, i)) break;
819 char upper = i - 1;
820 if (lower < upper) {
821 char *range_str = print_range(lower, upper);
822 TTCN_pattern_warning("Duplicate range `%s' in the character set.",
823 range_str);
824 Free(range_str);
825 } else {
826 char *char_str = print_character(lower);
827 if(lower == '\r' ){
828 TTCN_pattern_warning("Duplicate character `%s' in the character "
829 "set. Please note the \\n includes the \\r implicitly. "
830 "Use \\q{0,0,0,10} if you would like to match the LF only.", char_str);
831 } else {
832 TTCN_pattern_warning("Duplicate character `%s' in the character "
833 "set.", char_str);
834 }
835 Free(char_str);
836 }
837 }
838 }
839
840 static char *append_posix_body(char *set_body, const character_set *set)
841 {
842 for (unsigned char i = 0; i <= 127; ) {
843 for (i++; i <= 127; i++) if (set_has_char(set, i)) break;
844 if (i > 127) break;
845 char lower = i;
846 set_body = mputc(set_body, lower);
847 for (i++; i <= 127; i++) if (!set_has_char(set, i)) break;
848 char upper = i - 1;
849 if (lower < upper) {
850 if (lower + 1 < upper) set_body = mputc(set_body, '-');
851 set_body = mputc(set_body, upper);
852 }
853 }
854 return set_body;
855 }
856
857 static char *generate_posix_body(character_set *set)
858 {
859 int has_caret;
860 if (set_has_char(set, '^') && !(set_has_char(set, '^' - 1) &&
861 set_has_char(set, '^' + 1))) {
862 set_remove_char(set, '^');
863 has_caret = 1;
864 } else has_caret = 0;
865 int has_dash;
866 if (set_has_char(set, '-') && !(set_has_char(set, '-' - 1) &&
867 set_has_char(set, '-' + 1))) {
868 set_remove_char(set, '-');
869 has_dash = 1;
870 } else has_dash = 0;
871 int has_rsbrkt;
872 if (set_has_char(set, ']') && !(set_has_char(set, ']' - 1) &&
873 set_has_char(set, ']' + 1))) {
874 set_remove_char(set, ']');
875 has_rsbrkt = 1;
876 } else has_rsbrkt = 0;
877 char *set_body = memptystr();
878 if (set_is_empty(set) && !has_rsbrkt) {
879 /* the `-' must precede the `^' */
880 if (has_dash) set_body = mputc(set_body, '-');
881 if (has_caret) set_body = mputc(set_body, '^');
882 } else {
883 /* order: ']', others, '^', '-' */
884 if (has_rsbrkt) set_body = mputc(set_body, ']');
885 set_body = append_posix_body(set_body, set);
886 if (has_caret) set_body = mputc(set_body, '^');
887 if (has_dash) set_body = mputc(set_body, '-');
888 }
889 return set_body;
890 }
891
892 static char *generate_posix_body_compl(character_set *set)
893 {
894 set_negate(set);
895 int has_dash;
896 if (set_has_char(set, '-') && !(set_has_char(set, '-' - 1) &&
897 set_has_char(set, '-' + 1))) {
898 set_remove_char(set, '-');
899 has_dash = 1;
900 } else has_dash = 0;
901 int has_rsbrkt;
902 if (set_has_char(set, ']') && !(set_has_char(set, ']' - 1) &&
903 set_has_char(set, ']' + 1))) {
904 set_remove_char(set, ']');
905 has_rsbrkt = 1;
906 } else has_rsbrkt = 0;
907 char *set_body = mcopystr("^");
908 /* order: ']', others, '-' */
909 if (has_rsbrkt) set_body = mputc(set_body, ']');
910 set_body = append_posix_body(set_body, set);
911 if (has_dash) set_body = mputc(set_body, '-');
912 return set_body;
913 }
914
915 char *set_generate_posix(const character_set *set)
916 {
917 /* a full set can only be represented in this way: */
918 if (set_is_full(set)) return mcopystr(".");
919 character_set *tempset = set_copy(set);
920 char *set_body = generate_posix_body(tempset);
921 set_free(tempset);
922 char *posix_str;
923 if (set_body[0] == '\0') {
924 Free(set_body);
925 TTCN_pattern_error("Internal error: empty POSIX set.");
926 return NULL;
927 }
928 /* do not use the set notation in POSIX if the set contains only one
929 * character */
930 if (set_body[1] == '\0') posix_str = translate_character(set_body[0]);
931 else {
932 /* create the complemented version of the same set */
933 tempset = set_copy(set);
934 char *compl_body = generate_posix_body_compl(tempset);
935 set_free(tempset);
936 if (compl_body[0] == '\0') {
937 Free(set_body);
938 Free(compl_body);
939 TTCN_pattern_error("Internal error: empty complemented POSIX set.");
940 return NULL;
941 }
942 /* use the complemented form in the POSIX equivalent if it is the shorter
943 * one */
944 if (mstrlen(compl_body) < mstrlen(set_body))
945 posix_str = mprintf("[%s]", compl_body);
946 else posix_str = mprintf("[%s]", set_body);
947 Free(compl_body);
948 }
949 Free(set_body);
950 return posix_str;
951 }
952
953 void yyprint(FILE *file, int type, const YYSTYPE& value)
954 {
955 switch (type) {
956 case TOK_Char:
957 fprintf(file, "'%c'", value.c);
958 break;
959 case TOK_Digit: case TOK_Number:
960 fprintf(file, "'%lu'", value.u);
961 break;
962 default:
963 break;
964 }
965 }
966
This page took 0.055996 seconds and 6 git commands to generate.