Commit | Line | Data |
---|---|---|
970ed795 | 1 | /****************************************************************************** |
d44e3c4f | 2 | * Copyright (c) 2000-2016 Ericsson Telecom AB |
970ed795 EL |
3 | * All rights reserved. This program and the accompanying materials |
4 | * are made available under the terms of the Eclipse Public License v1.0 | |
5 | * which accompanies this distribution, and is available at | |
6 | * http://www.eclipse.org/legal/epl-v10.html | |
d44e3c4f | 7 | * |
8 | * Contributors: | |
9 | * Balasko, Jeno | |
10 | * Raduly, Csaba | |
11 | * Szalai, Gabor | |
12 | * Zalanyi, Balazs Andor | |
13 | * | |
970ed795 EL |
14 | ******************************************************************************/ |
15 | ||
16 | /** | |
17 | * Based on pattern_p.y | |
18 | */ | |
19 | ||
20 | %{ | |
21 | ||
22 | /********************************************************************* | |
23 | * C(++) declarations | |
24 | *********************************************************************/ | |
25 | ||
26 | #include <stdio.h> | |
27 | #include <string.h> | |
28 | #include <ctype.h> | |
29 | ||
30 | #if defined(__CYGWIN__) && defined(__clang__) | |
31 | /* Cygwin's clang 3.0 has its own limits.h, which does not bring in | |
32 | the system's limits.h unless we define this macro: */ | |
33 | #define __STDC_HOSTED__ 1 | |
34 | #define _GCC_NEXT_LIMITS_H | |
35 | #endif | |
36 | #include <limits.h> | |
37 | ||
38 | #include <regex.h> | |
39 | #if !defined(RE_DUP_MAX) | |
40 | /* RE_DUP_MAX is defined in limits.h or regex.h, except on Cygwin 1.5 */ | |
41 | # include <sys/syslimits.h> | |
42 | #endif | |
43 | ||
44 | #include "memory.h" | |
45 | #include "pattern.hh" | |
46 | ||
47 | #include "Quadruple.hh" | |
48 | ||
49 | union YYSTYPE; | |
50 | /* defined in lexer c-file: */ | |
51 | ||
52 | extern int pattern_yylex(); | |
53 | inline int pattern_unilex() { return pattern_yylex(); } | |
54 | extern void init_pattern_yylex(YYSTYPE*); | |
55 | struct yy_buffer_state; | |
56 | extern yy_buffer_state* pattern_yy_scan_string(const char*); | |
57 | extern void pattern_yy_delete_buffer(yy_buffer_state*); | |
58 | extern unsigned int get_nof_parentheses(); | |
59 | ||
60 | /* defined in this file: */ | |
61 | ||
62 | /** The converted regexp. */ | |
63 | static char *ret_val; | |
64 | /** The parser error reporting function. */ | |
65 | static void pattern_unierror(const char *error_str); | |
66 | ||
67 | static int user_groups; | |
68 | ||
69 | #define YYERROR_VERBOSE | |
70 | ||
71 | static void yyprint(FILE *file, int type, const YYSTYPE& value); | |
72 | #define YYPRINT(f,t,v) yyprint(f,t,v) | |
73 | ||
74 | %} | |
75 | ||
76 | /********************************************************************* | |
77 | * Bison declarations | |
78 | *********************************************************************/ | |
79 | ||
80 | %name-prefix="pattern_uni" | |
81 | %output="pattern_uni.cc" | |
82 | %defines | |
83 | %verbose | |
84 | %expect 0 | |
85 | %start Pattern | |
86 | %debug | |
87 | ||
88 | /********************************************************************* | |
89 | * The union-type | |
90 | * Must be kept in sync with the one in pattern_p.y ! | |
91 | *********************************************************************/ | |
92 | ||
93 | %union { | |
94 | int b; /* boolean */ | |
95 | char c; /* single character */ | |
96 | char *s; /* character string */ | |
97 | unsigned long int u; /* unsigned integer */ | |
98 | struct character_set *set; // used by nonterminals in pattern_p.y | |
99 | ||
100 | union { | |
101 | unsigned int value; | |
102 | #if defined(__sparc__) || defined(__sparc) | |
103 | struct { | |
104 | unsigned char group; | |
105 | unsigned char plane; | |
106 | unsigned char row; | |
107 | unsigned char cell; | |
108 | } comp; | |
109 | #else | |
110 | struct { | |
111 | unsigned char cell; | |
112 | unsigned char row; | |
113 | unsigned char plane; | |
114 | unsigned char group; | |
115 | } comp; | |
116 | #endif | |
117 | } q; // single universal char, used by nonterminals in pattern_uni.y | |
118 | class QuadSet* qset; // used by nonterminals in pattern_uni.y | |
119 | } | |
120 | ||
121 | /********************************************************************* | |
122 | * Tokens | |
123 | *********************************************************************/ | |
124 | ||
125 | %token <c> TOK_Char "<ordinary character>" | |
126 | %token <u> TOK_Number "<number>" | |
127 | %token <u> TOK_Digit "<digit>" | |
128 | ||
129 | /********************************************************************* | |
130 | * Keywords | |
131 | *********************************************************************/ | |
132 | ||
133 | %token KW_BS_q "\\q" | |
134 | %token KW_BS_d "\\d" | |
135 | %token KW_BS_w "\\w" | |
136 | %token KW_BS_t "\\t" | |
137 | %token KW_BS_n "\\n" | |
138 | %token KW_BS_r "\\r" | |
139 | %token KW_BS_s "\\s" | |
140 | %token KW_BS_b "\\b" | |
141 | ||
142 | %token KW_Group_Begin "(" | |
143 | %token KW_Group_End ")" | |
144 | %token KW_Set_Begin "[" | |
145 | %token KW_Set_Begin_Neg "[^" | |
146 | %token KW_Set_Begin_Rsbrkt "[]" | |
147 | %token KW_Set_Begin_Neg_Rsbrkt "[^]" | |
148 | %token KW_Set_End "]" | |
149 | %token KW_Set_Dash_End "-]" | |
150 | ||
151 | /********************************************************************* | |
152 | * semantic types of nonterminals | |
153 | *********************************************************************/ | |
154 | ||
155 | %type <b> RE_Set_Begin RE_Set_Begin_Rsbrkt RE_Set_End | |
156 | %type <q> RE_Set_Range_Char RE_Quadruple | |
157 | %type <s> RE_Body RE_Elems RE_Alter_Elem RE_Concat_Elem | |
158 | RE_Multiply_Elem RE_Multiply_Statement RE_Group | |
159 | RE_OneCharPos | |
160 | %type <qset> RE_Set RE_Set_Body RE_Set_Elem RE_Set_NoRange_Char | |
161 | ||
162 | /********************************************************************* | |
163 | * Destructors | |
164 | *********************************************************************/ | |
165 | ||
166 | %destructor { Free($$); } | |
167 | RE_Alter_Elem | |
168 | RE_Body | |
169 | RE_Concat_Elem | |
170 | RE_Elems | |
171 | RE_Group | |
172 | RE_Multiply_Elem | |
173 | RE_Multiply_Statement | |
174 | RE_OneCharPos | |
175 | ||
176 | %destructor { delete $$; } | |
177 | RE_Set | |
178 | RE_Set_Body | |
179 | RE_Set_Elem | |
180 | RE_Set_NoRange_Char | |
181 | ||
182 | %% | |
183 | ||
184 | /********************************************************************* | |
185 | * Grammar | |
186 | *********************************************************************/ | |
187 | ||
188 | Pattern: | |
189 | RE_Body {ret_val=$1;} | |
190 | ; | |
191 | ||
192 | RE_Body: | |
193 | /* empty */ | |
194 | { | |
195 | $$ = mcopystr("^$"); | |
196 | } | |
197 | | RE_Elems | |
198 | { | |
199 | if ($1 != NULL) { | |
200 | $$ = mprintf("^%s$", $1); | |
201 | Free($1); | |
202 | } else $$ = mcopystr("^$"); | |
203 | } | |
204 | ; | |
205 | ||
206 | RE_Elems: | |
207 | RE_Alter_Elem { $$ = $1; } | |
208 | | RE_Elems '|' RE_Alter_Elem | |
209 | { | |
210 | unsigned int nof_pars = get_nof_parentheses() + (yychar==KW_Group_End ? 1 : 0); | |
211 | if ($3 != NULL) { | |
212 | if ($1 != NULL) $$ = mputprintf($1, nof_pars ? "|%s" : "$|^%s", $3); | |
213 | else $$ = mprintf( nof_pars ? "()|%s" : "()$|^%s" , $3); | |
214 | Free($3); | |
215 | } else { | |
216 | if ($1 != NULL) $$ = mputstr($1, nof_pars ? "|()" : "$|^()"); | |
217 | else $$ = NULL; | |
218 | } | |
219 | } | |
220 | ; | |
221 | ||
222 | RE_Alter_Elem: | |
223 | RE_Concat_Elem { $$ = $1; } | |
224 | | RE_Alter_Elem RE_Concat_Elem | |
225 | { | |
226 | $$ = mputstr($1, $2); | |
227 | Free($2); | |
228 | } | |
229 | ; | |
230 | ||
231 | RE_Concat_Elem: | |
232 | RE_Multiply_Elem {$$=$1;} | |
233 | | RE_Multiply_Elem RE_Multiply_Statement | |
234 | { | |
235 | if ($1 != NULL && $2 != NULL) { | |
236 | $$ = mputstr($1, $2); | |
237 | Free($2); | |
238 | } else { | |
239 | Free($1); | |
240 | Free($2); | |
241 | $$ = NULL; | |
242 | } | |
243 | } | |
244 | | '*' {$$=mcopystr("(........)*");} | |
245 | ; | |
246 | ||
247 | RE_Multiply_Elem: | |
248 | RE_Group {$$=$1;} | |
249 | | RE_OneCharPos {$$=$1;} | |
250 | ; | |
251 | ||
252 | RE_Group: | |
253 | KW_Group_Begin KW_Group_End | |
254 | { | |
255 | user_groups++; | |
256 | $$ = mcopystr("<)"); | |
257 | } | |
258 | | KW_Group_Begin RE_Elems KW_Group_End | |
259 | { | |
260 | user_groups++; | |
261 | if ($2 != NULL) { | |
262 | $$ = mprintf("<%s)", $2); | |
263 | Free($2); | |
264 | } else { | |
265 | $$ = mcopystr("<)"); | |
266 | } | |
267 | } | |
268 | ; | |
269 | ||
270 | RE_Multiply_Statement: | |
271 | '+' | |
272 | { | |
273 | $$ = mcopystr("+"); | |
274 | } | |
275 | | '#' '(' ',' ')' | |
276 | { | |
277 | $$ = mcopystr("*"); | |
278 | } | |
279 | | '#' TOK_Digit | |
280 | { | |
281 | if ($2 == 0) { | |
282 | TTCN_pattern_warning("The number of repetitions is zero: `#0'."); | |
283 | $$ = NULL; | |
284 | } else if ($2 == 1) $$ = memptystr(); | |
285 | else { | |
286 | if ($2 > 9) TTCN_pattern_warning("Internal error: Invalid number of " | |
287 | "repetitions: `#%lu'.", $2); | |
288 | $$ = mprintf("{%lu}", $2); | |
289 | } | |
290 | } | |
291 | | '#' '(' TOK_Number ')' | |
292 | { | |
293 | if ($3 == 0) { | |
294 | TTCN_pattern_warning("The number of repetitions is zero: `#(0)'."); | |
295 | $$ = NULL; | |
296 | } else if ($3 == 1) $$ = memptystr(); | |
297 | else { | |
298 | #ifdef RE_DUP_MAX | |
299 | if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The number of repetitions in " | |
300 | "`#(%lu)' exceeds the limit allowed by this system (%d).", $3, | |
301 | RE_DUP_MAX); | |
302 | #endif | |
303 | $$ = mprintf("{%lu}", $3); | |
304 | } | |
305 | } | |
306 | | '#' '(' TOK_Number ',' TOK_Number ')' | |
307 | { | |
308 | #ifdef RE_DUP_MAX | |
309 | if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The minimum number of " | |
310 | "repetitions in `#(%lu,%lu)' exceeds the limit allowed by this system " | |
311 | "(%d).", $3, $5, RE_DUP_MAX); | |
312 | if ($5 > RE_DUP_MAX) TTCN_pattern_warning("The maximum number of " | |
313 | "repetitions in `#(%lu,%lu)' exceeds the limit allowed by this system " | |
314 | "(%d).", $3, $5, RE_DUP_MAX); | |
315 | #endif | |
316 | if ($3 > $5) TTCN_pattern_error("The lower bound is higher than the upper " | |
317 | "bound in the number of repetitions: `#(%lu,%lu)'.", $3, $5); | |
318 | if ($3 == $5) { | |
319 | if ($3 == 0) { | |
320 | TTCN_pattern_warning("The number of repetitions is zero: `#(0,0)'."); | |
321 | $$ = NULL; | |
322 | } else if ($3 == 1) $$ = memptystr(); | |
323 | else { | |
324 | $$ = mprintf("{%lu}", $3); | |
325 | } | |
326 | } else { | |
327 | ||
328 | if ($3 == 0 && $5 == 1) $$ = mcopystr("?"); | |
329 | else $$ = mprintf("{%lu,%lu}", $3, $5); | |
330 | ||
331 | } | |
332 | } | |
333 | | '#' '(' ',' TOK_Number ')' | |
334 | { | |
335 | if ($4 == 0) { | |
336 | TTCN_pattern_warning("The number of repetitions is zero: `#(,0)'."); | |
337 | $$ = NULL; | |
338 | } else { | |
339 | #ifdef RE_DUP_MAX | |
340 | if ($4 > RE_DUP_MAX) TTCN_pattern_warning("The maximum number of " | |
341 | "repetitions in `#(,%lu)' exceeds the limit allowed by this system " | |
342 | "(%d).", $4, RE_DUP_MAX); | |
343 | #endif | |
344 | ||
345 | if ($4 == 1) $$ = mcopystr("?"); | |
346 | else $$ = mprintf("{0,%lu}", $4); | |
347 | ||
348 | } | |
349 | } | |
350 | | '#' '(' TOK_Number ',' ')' | |
351 | { | |
352 | if ($3 == 0) $$ = mcopystr("*"); | |
353 | else { | |
354 | #ifdef RE_DUP_MAX | |
355 | if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The minimum number of " | |
356 | "repetitions in `#(%lu,)' exceeds the limit allowed by this system " | |
357 | "(%d).", $3, RE_DUP_MAX); | |
358 | #endif | |
359 | ||
360 | if ($3 == 1) $$ = mcopystr("+"); | |
361 | else $$ = mprintf("{%lu,}", $3); | |
362 | ||
363 | } | |
364 | } | |
365 | ; | |
366 | ||
367 | RE_OneCharPos: | |
368 | '?' {$$=mcopystr("(........)");} | |
369 | | KW_BS_d {$$=mcopystr("(AAAAAAD[A-J])");} | |
370 | | KW_BS_w {$$=mcopystr("(AAAAAAD[A-J]|AAAAAA(E[B-P]|F[A-K])|AAAAAA(G[B-P]|H[A-K]))");} | |
371 | | KW_BS_t {$$=mcopystr("AAAAAAAJ");} | |
372 | | KW_BS_n {$$=mcopystr("(AAAAAAA[K-N])");} | |
373 | | KW_BS_r {$$=mcopystr("AAAAAAAN");} | |
374 | | KW_BS_s {$$=mcopystr("(AAAAAAA[J-N]|AAAAAACA)");} | |
375 | | KW_BS_b | |
376 | { | |
377 | TTCN_pattern_warning("Metacharacter `\\b' is not supported yet."); | |
378 | $$ = NULL; | |
379 | } | |
380 | | TOK_Char | |
381 | { | |
382 | unsigned char c = $1; | |
383 | if ($1 <= 0) TTCN_pattern_error("Character with code %u " | |
384 | "(0x%02x) cannot be used in a pattern for type charstring.", $1, $1); | |
385 | $$ = Quad::get_hexrepr(c); | |
386 | } | |
387 | | RE_Quadruple | |
388 | { | |
389 | $$ = Quad::get_hexrepr($1.value); | |
390 | } | |
391 | | RE_Set | |
392 | { | |
393 | if ($1->is_empty()) { | |
394 | TTCN_pattern_error("Empty character set."); | |
395 | $$ = NULL; | |
396 | } else | |
397 | $$ = $1->generate_posix(); | |
398 | delete $1; | |
399 | } | |
400 | ; | |
401 | ||
402 | RE_Set: | |
403 | /* RE_Set_Begin is 1 for "[^", 0 for "[" | |
404 | * RE_Set_Begin_Rsbrkt is 1 for "[^]", 0 for "[]" | |
405 | * RE_Set_End is 1 for "-]", 0 for "]" | |
406 | */ | |
407 | RE_Set_Begin RE_Set_Body RE_Set_End | |
408 | { | |
409 | if ($2 != NULL) | |
410 | $$ = $2; | |
411 | else | |
412 | $$ = new QuadSet(); | |
413 | if ($3 && !$$->add(new Quad('-'))) | |
414 | TTCN_pattern_warning("Duplicate character `-' in the character set."); | |
415 | if ($1) | |
416 | $$->set_negate(true); | |
417 | } | |
418 | | RE_Set_Begin '-' RE_Set_Body RE_Set_End | |
419 | { | |
420 | if ($3 != NULL) | |
421 | $$ = $3; | |
422 | else | |
423 | $$ = new QuadSet(); | |
424 | if (!$$->add(new Quad('-'))) | |
425 | TTCN_pattern_warning("Duplicate character `-' in the character set."); | |
426 | if ($1) | |
427 | $$->set_negate(true); | |
428 | } | |
429 | | RE_Set_Begin_Rsbrkt RE_Set_Body RE_Set_End | |
430 | { | |
431 | if ($2 != NULL) | |
432 | $$ = $2; | |
433 | else | |
434 | $$ = new QuadSet(); | |
435 | if (!$$->add(new Quad(']'))) | |
436 | TTCN_pattern_warning("Duplicate character `]' in the character set."); | |
437 | if ($3 && !$$->add(new Quad('-'))) | |
438 | TTCN_pattern_warning("Duplicate character `-' in the character set."); | |
439 | if ($1) | |
440 | $$->set_negate(true); | |
441 | } | |
442 | | RE_Set_Begin_Rsbrkt '-' RE_Set_Range_Char RE_Set_Body RE_Set_End | |
443 | { | |
444 | if ($4 != NULL) | |
445 | $$ = $4; | |
446 | else | |
447 | $$ = new QuadSet(); | |
448 | if ((unsigned int)']' > $3.value) { | |
449 | TTCN_pattern_error("Invalid range in the character set: the " | |
450 | "character code of the lower bound (%u) is higher than that of the " | |
451 | "upper bound (%u).", ']', (unsigned int)$3.value); | |
452 | } | |
453 | $$->add(new QuadInterval(Quad(']'), Quad($3.value))); | |
454 | if ($5) { | |
455 | if (!$$->add(new Quad('-'))) | |
456 | TTCN_pattern_warning("Duplicate character `-' in the character set."); | |
457 | } | |
458 | if ($1) | |
459 | $$->set_negate(true); | |
460 | } | |
461 | ; | |
462 | ||
463 | RE_Set_Begin: | |
464 | KW_Set_Begin { $$ = 0; } | |
465 | | KW_Set_Begin_Neg { $$ = 1; } | |
466 | ; | |
467 | ||
468 | RE_Set_Begin_Rsbrkt: | |
469 | KW_Set_Begin_Rsbrkt { $$ = 0; } | |
470 | | KW_Set_Begin_Neg_Rsbrkt { $$ = 1; } | |
471 | ; | |
472 | ||
473 | RE_Set_End: | |
474 | KW_Set_End { $$ = 0; } | |
475 | | KW_Set_Dash_End { $$ = 1; } | |
476 | ; | |
477 | ||
478 | RE_Set_Body: | |
479 | /* empty */ { $$ = NULL; } | |
480 | | RE_Set_Body RE_Set_Elem | |
481 | { | |
482 | if ($1 != NULL) { | |
483 | $$ = $1; | |
484 | $$->join($2); | |
485 | delete($2); | |
486 | } else | |
487 | $$ = $2; | |
488 | } | |
489 | ; | |
490 | ||
491 | RE_Set_Elem: | |
492 | RE_Set_Range_Char | |
493 | { | |
494 | $$ = new QuadSet(); | |
495 | $$->add(new Quad($1.value)); | |
496 | } | |
497 | | RE_Set_NoRange_Char { $$ = $1; } | |
498 | | RE_Set_Range_Char '-' RE_Set_Range_Char | |
499 | { | |
500 | if ($1.value > $3.value) { | |
501 | TTCN_pattern_error("Invalid range in the character set: the " | |
502 | "character code of the lower bound (%u) is higher than that of the " | |
503 | "upper bound (%u).", (unsigned int)$1.value, (unsigned int)$3.value); | |
504 | } | |
505 | $$ = new QuadSet(); | |
506 | $$->add(new QuadInterval(Quad($1.value), Quad($3.value))); | |
507 | } | |
508 | ; | |
509 | ||
510 | RE_Set_Range_Char: | |
511 | KW_BS_t { $$.value = '\t'; } | |
512 | | KW_BS_r { $$.value = '\r'; } | |
513 | | TOK_Char | |
514 | { | |
515 | if ($1 <= 0) TTCN_pattern_error("Character with code %u " | |
516 | "(0x%02x) cannot be used in a pattern for type charstring.", $1, $1); | |
517 | $$.value = $1; | |
518 | } | |
519 | | RE_Quadruple { $$.value = $1.value; } | |
520 | ; | |
521 | ||
522 | RE_Set_NoRange_Char: | |
523 | KW_BS_d | |
524 | { | |
525 | $$ = new QuadSet(); | |
526 | $$->add(new QuadInterval(Quad('0'), Quad('9'))); | |
527 | } | |
528 | | KW_BS_w | |
529 | { | |
530 | $$ = new QuadSet(); | |
531 | $$->add(new QuadInterval(Quad('0'), Quad('9'))); | |
532 | $$->add(new QuadInterval(Quad('A'), Quad('Z'))); | |
533 | $$->add(new QuadInterval(Quad('a'), Quad('z'))); | |
534 | } | |
535 | | KW_BS_n | |
536 | { | |
537 | $$ = new QuadSet(); | |
538 | $$->add(new QuadInterval(Quad('\n'), Quad('\r'))); | |
539 | } | |
540 | | KW_BS_s | |
541 | { | |
542 | $$ = new QuadSet(); | |
543 | $$->add(new QuadInterval(Quad('\t'), Quad('\r'))); | |
544 | $$->add(new Quad(' ')); | |
545 | } | |
546 | | KW_BS_b | |
547 | { | |
548 | $$ = new QuadSet(); | |
549 | TTCN_pattern_error("Metacharacter `\\b' does not make any sense in a " | |
550 | "character set."); | |
551 | } | |
552 | ; | |
553 | ||
554 | RE_Quadruple: | |
555 | KW_BS_q '{' TOK_Number ',' TOK_Number ',' TOK_Number ',' TOK_Number '}' | |
556 | { | |
557 | if ($3 > 127) TTCN_pattern_error("The first number (group) of quadruple " | |
558 | "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..127 " | |
559 | "instead of %lu.", $3, $5, $7, $9, $3); | |
560 | if ($5 > 255) TTCN_pattern_error("The second number (plane) of quadruple " | |
561 | "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 " | |
562 | "instead of %lu.", $3, $5, $7, $9, $5); | |
563 | if ($7 > 255) TTCN_pattern_error("The third number (row) of quadruple " | |
564 | "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 " | |
565 | "instead of %lu.", $3, $5, $7, $9, $7); | |
566 | if ($9 > 255) TTCN_pattern_error("The fourth number (cell) of quadruple " | |
567 | "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 " | |
568 | "instead of %lu.", $3, $5, $7, $9, $9); | |
569 | if ($3 == 0 && $5 == 0 && $7 == 0 && $9 == 0) TTCN_pattern_error("Zero " | |
570 | "character (i.e. quadruple `\\q{0,0,0,0}') is not supported in a " | |
571 | "pattern for type universal charstring."); | |
572 | $$.comp.group = $3; | |
573 | $$.comp.plane = $5; | |
574 | $$.comp.row = $7; | |
575 | $$.comp.cell = $9; | |
576 | } | |
577 | ; | |
578 | ||
579 | %% | |
580 | ||
581 | /********************************************************************* | |
582 | * Interface | |
583 | *********************************************************************/ | |
584 | ||
585 | char* TTCN_pattern_to_regexp_uni(const char* p_pattern, int** groups) | |
586 | { | |
587 | /* if you want to debug */ | |
588 | //pattern_unidebug=1; | |
589 | ||
590 | ret_val=NULL; | |
591 | user_groups = 0; | |
592 | ||
593 | yy_buffer_state *flex_buffer = pattern_yy_scan_string(p_pattern); | |
594 | if(flex_buffer == NULL) { | |
595 | TTCN_pattern_error("Flex buffer creation failed."); | |
596 | return NULL; | |
597 | } | |
598 | init_pattern_yylex(&yylval); | |
599 | if(pattern_uniparse()) { | |
600 | Free(ret_val); | |
601 | ret_val=NULL; | |
602 | } | |
603 | pattern_yy_delete_buffer(flex_buffer); | |
604 | ||
605 | // needed by regexp to find user specified groups | |
606 | if (user_groups && groups) { | |
607 | *groups = (int*)Malloc(sizeof(int) * (user_groups + 1)); | |
608 | (*groups)[0] = user_groups; | |
609 | ||
610 | int par = -1, index = 1; | |
611 | for (size_t i = 0; i < strlen(ret_val); i++) { | |
612 | if (ret_val[i] == '(') { | |
613 | par++; | |
614 | } | |
615 | if (ret_val[i] == '<') { | |
616 | ret_val[i] = '('; | |
617 | par++; | |
618 | (*groups)[index++] = par; | |
619 | } | |
620 | } | |
621 | } else if (groups) | |
622 | *groups = (int*)0; | |
623 | ||
624 | return ret_val; | |
625 | } | |
626 | ||
627 | // Backwards compatibility shim | |
628 | char* TTCN_pattern_to_regexp_uni(const char* p_pattern, int ere, int** /*groups*/) | |
629 | { | |
630 | TTCN_pattern_warning("TTCN_pattern_to_regexp_uni" | |
631 | "(const char* p_pattern, int ere, int** groups) is deprecated"); | |
632 | if (ere != 1) TTCN_pattern_error( | |
633 | "BRE is not supported for TTCN_pattern_to_regexp_uni"); | |
634 | return TTCN_pattern_to_regexp(p_pattern); | |
635 | } | |
636 | ||
637 | ||
638 | /********************************************************************* | |
639 | * Static functions | |
640 | *********************************************************************/ | |
641 | ||
642 | void pattern_unierror(const char *error_str) | |
643 | { | |
644 | TTCN_pattern_error("%s", error_str); | |
645 | } | |
646 | ||
647 | void yyprint(FILE *file, int type, const YYSTYPE& value) | |
648 | { | |
649 | switch (type) { | |
650 | case TOK_Char: | |
651 | fprintf(file, "'%c'", value.c); | |
652 | break; | |
653 | case TOK_Digit: case TOK_Number: | |
654 | fprintf(file, "'%lu'", value.u); | |
655 | break; | |
656 | default: | |
657 | break; | |
658 | } | |
659 | } | |
660 |