Commit | Line | Data |
---|---|---|
970ed795 | 1 | /****************************************************************************** |
3abe9331 | 2 | * Copyright (c) 2000-2015 Ericsson Telecom AB |
970ed795 EL |
3 | * All rights reserved. This program and the accompanying materials |
4 | * are made available under the terms of the Eclipse Public License v1.0 | |
5 | * which accompanies this distribution, and is available at | |
6 | * http://www.eclipse.org/legal/epl-v10.html | |
7 | ******************************************************************************/ | |
8 | ||
9 | /** | |
10 | * Based on pattern_p.y | |
11 | */ | |
12 | ||
13 | %{ | |
14 | ||
15 | /********************************************************************* | |
16 | * C(++) declarations | |
17 | *********************************************************************/ | |
18 | ||
19 | #include <stdio.h> | |
20 | #include <string.h> | |
21 | #include <ctype.h> | |
22 | ||
23 | #if defined(__CYGWIN__) && defined(__clang__) | |
24 | /* Cygwin's clang 3.0 has its own limits.h, which does not bring in | |
25 | the system's limits.h unless we define this macro: */ | |
26 | #define __STDC_HOSTED__ 1 | |
27 | #define _GCC_NEXT_LIMITS_H | |
28 | #endif | |
29 | #include <limits.h> | |
30 | ||
31 | #include <regex.h> | |
32 | #if !defined(RE_DUP_MAX) | |
33 | /* RE_DUP_MAX is defined in limits.h or regex.h, except on Cygwin 1.5 */ | |
34 | # include <sys/syslimits.h> | |
35 | #endif | |
36 | ||
37 | #include "memory.h" | |
38 | #include "pattern.hh" | |
39 | ||
40 | #include "Quadruple.hh" | |
41 | ||
42 | union YYSTYPE; | |
43 | /* defined in lexer c-file: */ | |
44 | ||
45 | extern int pattern_yylex(); | |
46 | inline int pattern_unilex() { return pattern_yylex(); } | |
47 | extern void init_pattern_yylex(YYSTYPE*); | |
48 | struct yy_buffer_state; | |
49 | extern yy_buffer_state* pattern_yy_scan_string(const char*); | |
50 | extern void pattern_yy_delete_buffer(yy_buffer_state*); | |
51 | extern unsigned int get_nof_parentheses(); | |
52 | ||
53 | /* defined in this file: */ | |
54 | ||
55 | /** The converted regexp. */ | |
56 | static char *ret_val; | |
57 | /** The parser error reporting function. */ | |
58 | static void pattern_unierror(const char *error_str); | |
59 | ||
60 | static int user_groups; | |
61 | ||
62 | #define YYERROR_VERBOSE | |
63 | ||
64 | static void yyprint(FILE *file, int type, const YYSTYPE& value); | |
65 | #define YYPRINT(f,t,v) yyprint(f,t,v) | |
66 | ||
67 | %} | |
68 | ||
69 | /********************************************************************* | |
70 | * Bison declarations | |
71 | *********************************************************************/ | |
72 | ||
73 | %name-prefix="pattern_uni" | |
74 | %output="pattern_uni.cc" | |
75 | %defines | |
76 | %verbose | |
77 | %expect 0 | |
78 | %start Pattern | |
79 | %debug | |
80 | ||
81 | /********************************************************************* | |
82 | * The union-type | |
83 | * Must be kept in sync with the one in pattern_p.y ! | |
84 | *********************************************************************/ | |
85 | ||
86 | %union { | |
87 | int b; /* boolean */ | |
88 | char c; /* single character */ | |
89 | char *s; /* character string */ | |
90 | unsigned long int u; /* unsigned integer */ | |
91 | struct character_set *set; // used by nonterminals in pattern_p.y | |
92 | ||
93 | union { | |
94 | unsigned int value; | |
95 | #if defined(__sparc__) || defined(__sparc) | |
96 | struct { | |
97 | unsigned char group; | |
98 | unsigned char plane; | |
99 | unsigned char row; | |
100 | unsigned char cell; | |
101 | } comp; | |
102 | #else | |
103 | struct { | |
104 | unsigned char cell; | |
105 | unsigned char row; | |
106 | unsigned char plane; | |
107 | unsigned char group; | |
108 | } comp; | |
109 | #endif | |
110 | } q; // single universal char, used by nonterminals in pattern_uni.y | |
111 | class QuadSet* qset; // used by nonterminals in pattern_uni.y | |
112 | } | |
113 | ||
114 | /********************************************************************* | |
115 | * Tokens | |
116 | *********************************************************************/ | |
117 | ||
118 | %token <c> TOK_Char "<ordinary character>" | |
119 | %token <u> TOK_Number "<number>" | |
120 | %token <u> TOK_Digit "<digit>" | |
121 | ||
122 | /********************************************************************* | |
123 | * Keywords | |
124 | *********************************************************************/ | |
125 | ||
126 | %token KW_BS_q "\\q" | |
127 | %token KW_BS_d "\\d" | |
128 | %token KW_BS_w "\\w" | |
129 | %token KW_BS_t "\\t" | |
130 | %token KW_BS_n "\\n" | |
131 | %token KW_BS_r "\\r" | |
132 | %token KW_BS_s "\\s" | |
133 | %token KW_BS_b "\\b" | |
134 | ||
135 | %token KW_Group_Begin "(" | |
136 | %token KW_Group_End ")" | |
137 | %token KW_Set_Begin "[" | |
138 | %token KW_Set_Begin_Neg "[^" | |
139 | %token KW_Set_Begin_Rsbrkt "[]" | |
140 | %token KW_Set_Begin_Neg_Rsbrkt "[^]" | |
141 | %token KW_Set_End "]" | |
142 | %token KW_Set_Dash_End "-]" | |
143 | ||
144 | /********************************************************************* | |
145 | * semantic types of nonterminals | |
146 | *********************************************************************/ | |
147 | ||
148 | %type <b> RE_Set_Begin RE_Set_Begin_Rsbrkt RE_Set_End | |
149 | %type <q> RE_Set_Range_Char RE_Quadruple | |
150 | %type <s> RE_Body RE_Elems RE_Alter_Elem RE_Concat_Elem | |
151 | RE_Multiply_Elem RE_Multiply_Statement RE_Group | |
152 | RE_OneCharPos | |
153 | %type <qset> RE_Set RE_Set_Body RE_Set_Elem RE_Set_NoRange_Char | |
154 | ||
155 | /********************************************************************* | |
156 | * Destructors | |
157 | *********************************************************************/ | |
158 | ||
159 | %destructor { Free($$); } | |
160 | RE_Alter_Elem | |
161 | RE_Body | |
162 | RE_Concat_Elem | |
163 | RE_Elems | |
164 | RE_Group | |
165 | RE_Multiply_Elem | |
166 | RE_Multiply_Statement | |
167 | RE_OneCharPos | |
168 | ||
169 | %destructor { delete $$; } | |
170 | RE_Set | |
171 | RE_Set_Body | |
172 | RE_Set_Elem | |
173 | RE_Set_NoRange_Char | |
174 | ||
175 | %% | |
176 | ||
177 | /********************************************************************* | |
178 | * Grammar | |
179 | *********************************************************************/ | |
180 | ||
181 | Pattern: | |
182 | RE_Body {ret_val=$1;} | |
183 | ; | |
184 | ||
185 | RE_Body: | |
186 | /* empty */ | |
187 | { | |
188 | $$ = mcopystr("^$"); | |
189 | } | |
190 | | RE_Elems | |
191 | { | |
192 | if ($1 != NULL) { | |
193 | $$ = mprintf("^%s$", $1); | |
194 | Free($1); | |
195 | } else $$ = mcopystr("^$"); | |
196 | } | |
197 | ; | |
198 | ||
199 | RE_Elems: | |
200 | RE_Alter_Elem { $$ = $1; } | |
201 | | RE_Elems '|' RE_Alter_Elem | |
202 | { | |
203 | unsigned int nof_pars = get_nof_parentheses() + (yychar==KW_Group_End ? 1 : 0); | |
204 | if ($3 != NULL) { | |
205 | if ($1 != NULL) $$ = mputprintf($1, nof_pars ? "|%s" : "$|^%s", $3); | |
206 | else $$ = mprintf( nof_pars ? "()|%s" : "()$|^%s" , $3); | |
207 | Free($3); | |
208 | } else { | |
209 | if ($1 != NULL) $$ = mputstr($1, nof_pars ? "|()" : "$|^()"); | |
210 | else $$ = NULL; | |
211 | } | |
212 | } | |
213 | ; | |
214 | ||
215 | RE_Alter_Elem: | |
216 | RE_Concat_Elem { $$ = $1; } | |
217 | | RE_Alter_Elem RE_Concat_Elem | |
218 | { | |
219 | $$ = mputstr($1, $2); | |
220 | Free($2); | |
221 | } | |
222 | ; | |
223 | ||
224 | RE_Concat_Elem: | |
225 | RE_Multiply_Elem {$$=$1;} | |
226 | | RE_Multiply_Elem RE_Multiply_Statement | |
227 | { | |
228 | if ($1 != NULL && $2 != NULL) { | |
229 | $$ = mputstr($1, $2); | |
230 | Free($2); | |
231 | } else { | |
232 | Free($1); | |
233 | Free($2); | |
234 | $$ = NULL; | |
235 | } | |
236 | } | |
237 | | '*' {$$=mcopystr("(........)*");} | |
238 | ; | |
239 | ||
240 | RE_Multiply_Elem: | |
241 | RE_Group {$$=$1;} | |
242 | | RE_OneCharPos {$$=$1;} | |
243 | ; | |
244 | ||
245 | RE_Group: | |
246 | KW_Group_Begin KW_Group_End | |
247 | { | |
248 | user_groups++; | |
249 | $$ = mcopystr("<)"); | |
250 | } | |
251 | | KW_Group_Begin RE_Elems KW_Group_End | |
252 | { | |
253 | user_groups++; | |
254 | if ($2 != NULL) { | |
255 | $$ = mprintf("<%s)", $2); | |
256 | Free($2); | |
257 | } else { | |
258 | $$ = mcopystr("<)"); | |
259 | } | |
260 | } | |
261 | ; | |
262 | ||
263 | RE_Multiply_Statement: | |
264 | '+' | |
265 | { | |
266 | $$ = mcopystr("+"); | |
267 | } | |
268 | | '#' '(' ',' ')' | |
269 | { | |
270 | $$ = mcopystr("*"); | |
271 | } | |
272 | | '#' TOK_Digit | |
273 | { | |
274 | if ($2 == 0) { | |
275 | TTCN_pattern_warning("The number of repetitions is zero: `#0'."); | |
276 | $$ = NULL; | |
277 | } else if ($2 == 1) $$ = memptystr(); | |
278 | else { | |
279 | if ($2 > 9) TTCN_pattern_warning("Internal error: Invalid number of " | |
280 | "repetitions: `#%lu'.", $2); | |
281 | $$ = mprintf("{%lu}", $2); | |
282 | } | |
283 | } | |
284 | | '#' '(' TOK_Number ')' | |
285 | { | |
286 | if ($3 == 0) { | |
287 | TTCN_pattern_warning("The number of repetitions is zero: `#(0)'."); | |
288 | $$ = NULL; | |
289 | } else if ($3 == 1) $$ = memptystr(); | |
290 | else { | |
291 | #ifdef RE_DUP_MAX | |
292 | if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The number of repetitions in " | |
293 | "`#(%lu)' exceeds the limit allowed by this system (%d).", $3, | |
294 | RE_DUP_MAX); | |
295 | #endif | |
296 | $$ = mprintf("{%lu}", $3); | |
297 | } | |
298 | } | |
299 | | '#' '(' TOK_Number ',' TOK_Number ')' | |
300 | { | |
301 | #ifdef RE_DUP_MAX | |
302 | if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The minimum number of " | |
303 | "repetitions in `#(%lu,%lu)' exceeds the limit allowed by this system " | |
304 | "(%d).", $3, $5, RE_DUP_MAX); | |
305 | if ($5 > RE_DUP_MAX) TTCN_pattern_warning("The maximum number of " | |
306 | "repetitions in `#(%lu,%lu)' exceeds the limit allowed by this system " | |
307 | "(%d).", $3, $5, RE_DUP_MAX); | |
308 | #endif | |
309 | if ($3 > $5) TTCN_pattern_error("The lower bound is higher than the upper " | |
310 | "bound in the number of repetitions: `#(%lu,%lu)'.", $3, $5); | |
311 | if ($3 == $5) { | |
312 | if ($3 == 0) { | |
313 | TTCN_pattern_warning("The number of repetitions is zero: `#(0,0)'."); | |
314 | $$ = NULL; | |
315 | } else if ($3 == 1) $$ = memptystr(); | |
316 | else { | |
317 | $$ = mprintf("{%lu}", $3); | |
318 | } | |
319 | } else { | |
320 | ||
321 | if ($3 == 0 && $5 == 1) $$ = mcopystr("?"); | |
322 | else $$ = mprintf("{%lu,%lu}", $3, $5); | |
323 | ||
324 | } | |
325 | } | |
326 | | '#' '(' ',' TOK_Number ')' | |
327 | { | |
328 | if ($4 == 0) { | |
329 | TTCN_pattern_warning("The number of repetitions is zero: `#(,0)'."); | |
330 | $$ = NULL; | |
331 | } else { | |
332 | #ifdef RE_DUP_MAX | |
333 | if ($4 > RE_DUP_MAX) TTCN_pattern_warning("The maximum number of " | |
334 | "repetitions in `#(,%lu)' exceeds the limit allowed by this system " | |
335 | "(%d).", $4, RE_DUP_MAX); | |
336 | #endif | |
337 | ||
338 | if ($4 == 1) $$ = mcopystr("?"); | |
339 | else $$ = mprintf("{0,%lu}", $4); | |
340 | ||
341 | } | |
342 | } | |
343 | | '#' '(' TOK_Number ',' ')' | |
344 | { | |
345 | if ($3 == 0) $$ = mcopystr("*"); | |
346 | else { | |
347 | #ifdef RE_DUP_MAX | |
348 | if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The minimum number of " | |
349 | "repetitions in `#(%lu,)' exceeds the limit allowed by this system " | |
350 | "(%d).", $3, RE_DUP_MAX); | |
351 | #endif | |
352 | ||
353 | if ($3 == 1) $$ = mcopystr("+"); | |
354 | else $$ = mprintf("{%lu,}", $3); | |
355 | ||
356 | } | |
357 | } | |
358 | ; | |
359 | ||
360 | RE_OneCharPos: | |
361 | '?' {$$=mcopystr("(........)");} | |
362 | | KW_BS_d {$$=mcopystr("(AAAAAAD[A-J])");} | |
363 | | KW_BS_w {$$=mcopystr("(AAAAAAD[A-J]|AAAAAA(E[B-P]|F[A-K])|AAAAAA(G[B-P]|H[A-K]))");} | |
364 | | KW_BS_t {$$=mcopystr("AAAAAAAJ");} | |
365 | | KW_BS_n {$$=mcopystr("(AAAAAAA[K-N])");} | |
366 | | KW_BS_r {$$=mcopystr("AAAAAAAN");} | |
367 | | KW_BS_s {$$=mcopystr("(AAAAAAA[J-N]|AAAAAACA)");} | |
368 | | KW_BS_b | |
369 | { | |
370 | TTCN_pattern_warning("Metacharacter `\\b' is not supported yet."); | |
371 | $$ = NULL; | |
372 | } | |
373 | | TOK_Char | |
374 | { | |
375 | unsigned char c = $1; | |
376 | if ($1 <= 0) TTCN_pattern_error("Character with code %u " | |
377 | "(0x%02x) cannot be used in a pattern for type charstring.", $1, $1); | |
378 | $$ = Quad::get_hexrepr(c); | |
379 | } | |
380 | | RE_Quadruple | |
381 | { | |
382 | $$ = Quad::get_hexrepr($1.value); | |
383 | } | |
384 | | RE_Set | |
385 | { | |
386 | if ($1->is_empty()) { | |
387 | TTCN_pattern_error("Empty character set."); | |
388 | $$ = NULL; | |
389 | } else | |
390 | $$ = $1->generate_posix(); | |
391 | delete $1; | |
392 | } | |
393 | ; | |
394 | ||
395 | RE_Set: | |
396 | /* RE_Set_Begin is 1 for "[^", 0 for "[" | |
397 | * RE_Set_Begin_Rsbrkt is 1 for "[^]", 0 for "[]" | |
398 | * RE_Set_End is 1 for "-]", 0 for "]" | |
399 | */ | |
400 | RE_Set_Begin RE_Set_Body RE_Set_End | |
401 | { | |
402 | if ($2 != NULL) | |
403 | $$ = $2; | |
404 | else | |
405 | $$ = new QuadSet(); | |
406 | if ($3 && !$$->add(new Quad('-'))) | |
407 | TTCN_pattern_warning("Duplicate character `-' in the character set."); | |
408 | if ($1) | |
409 | $$->set_negate(true); | |
410 | } | |
411 | | RE_Set_Begin '-' RE_Set_Body RE_Set_End | |
412 | { | |
413 | if ($3 != NULL) | |
414 | $$ = $3; | |
415 | else | |
416 | $$ = new QuadSet(); | |
417 | if (!$$->add(new Quad('-'))) | |
418 | TTCN_pattern_warning("Duplicate character `-' in the character set."); | |
419 | if ($1) | |
420 | $$->set_negate(true); | |
421 | } | |
422 | | RE_Set_Begin_Rsbrkt RE_Set_Body RE_Set_End | |
423 | { | |
424 | if ($2 != NULL) | |
425 | $$ = $2; | |
426 | else | |
427 | $$ = new QuadSet(); | |
428 | if (!$$->add(new Quad(']'))) | |
429 | TTCN_pattern_warning("Duplicate character `]' in the character set."); | |
430 | if ($3 && !$$->add(new Quad('-'))) | |
431 | TTCN_pattern_warning("Duplicate character `-' in the character set."); | |
432 | if ($1) | |
433 | $$->set_negate(true); | |
434 | } | |
435 | | RE_Set_Begin_Rsbrkt '-' RE_Set_Range_Char RE_Set_Body RE_Set_End | |
436 | { | |
437 | if ($4 != NULL) | |
438 | $$ = $4; | |
439 | else | |
440 | $$ = new QuadSet(); | |
441 | if ((unsigned int)']' > $3.value) { | |
442 | TTCN_pattern_error("Invalid range in the character set: the " | |
443 | "character code of the lower bound (%u) is higher than that of the " | |
444 | "upper bound (%u).", ']', (unsigned int)$3.value); | |
445 | } | |
446 | $$->add(new QuadInterval(Quad(']'), Quad($3.value))); | |
447 | if ($5) { | |
448 | if (!$$->add(new Quad('-'))) | |
449 | TTCN_pattern_warning("Duplicate character `-' in the character set."); | |
450 | } | |
451 | if ($1) | |
452 | $$->set_negate(true); | |
453 | } | |
454 | ; | |
455 | ||
456 | RE_Set_Begin: | |
457 | KW_Set_Begin { $$ = 0; } | |
458 | | KW_Set_Begin_Neg { $$ = 1; } | |
459 | ; | |
460 | ||
461 | RE_Set_Begin_Rsbrkt: | |
462 | KW_Set_Begin_Rsbrkt { $$ = 0; } | |
463 | | KW_Set_Begin_Neg_Rsbrkt { $$ = 1; } | |
464 | ; | |
465 | ||
466 | RE_Set_End: | |
467 | KW_Set_End { $$ = 0; } | |
468 | | KW_Set_Dash_End { $$ = 1; } | |
469 | ; | |
470 | ||
471 | RE_Set_Body: | |
472 | /* empty */ { $$ = NULL; } | |
473 | | RE_Set_Body RE_Set_Elem | |
474 | { | |
475 | if ($1 != NULL) { | |
476 | $$ = $1; | |
477 | $$->join($2); | |
478 | delete($2); | |
479 | } else | |
480 | $$ = $2; | |
481 | } | |
482 | ; | |
483 | ||
484 | RE_Set_Elem: | |
485 | RE_Set_Range_Char | |
486 | { | |
487 | $$ = new QuadSet(); | |
488 | $$->add(new Quad($1.value)); | |
489 | } | |
490 | | RE_Set_NoRange_Char { $$ = $1; } | |
491 | | RE_Set_Range_Char '-' RE_Set_Range_Char | |
492 | { | |
493 | if ($1.value > $3.value) { | |
494 | TTCN_pattern_error("Invalid range in the character set: the " | |
495 | "character code of the lower bound (%u) is higher than that of the " | |
496 | "upper bound (%u).", (unsigned int)$1.value, (unsigned int)$3.value); | |
497 | } | |
498 | $$ = new QuadSet(); | |
499 | $$->add(new QuadInterval(Quad($1.value), Quad($3.value))); | |
500 | } | |
501 | ; | |
502 | ||
503 | RE_Set_Range_Char: | |
504 | KW_BS_t { $$.value = '\t'; } | |
505 | | KW_BS_r { $$.value = '\r'; } | |
506 | | TOK_Char | |
507 | { | |
508 | if ($1 <= 0) TTCN_pattern_error("Character with code %u " | |
509 | "(0x%02x) cannot be used in a pattern for type charstring.", $1, $1); | |
510 | $$.value = $1; | |
511 | } | |
512 | | RE_Quadruple { $$.value = $1.value; } | |
513 | ; | |
514 | ||
515 | RE_Set_NoRange_Char: | |
516 | KW_BS_d | |
517 | { | |
518 | $$ = new QuadSet(); | |
519 | $$->add(new QuadInterval(Quad('0'), Quad('9'))); | |
520 | } | |
521 | | KW_BS_w | |
522 | { | |
523 | $$ = new QuadSet(); | |
524 | $$->add(new QuadInterval(Quad('0'), Quad('9'))); | |
525 | $$->add(new QuadInterval(Quad('A'), Quad('Z'))); | |
526 | $$->add(new QuadInterval(Quad('a'), Quad('z'))); | |
527 | } | |
528 | | KW_BS_n | |
529 | { | |
530 | $$ = new QuadSet(); | |
531 | $$->add(new QuadInterval(Quad('\n'), Quad('\r'))); | |
532 | } | |
533 | | KW_BS_s | |
534 | { | |
535 | $$ = new QuadSet(); | |
536 | $$->add(new QuadInterval(Quad('\t'), Quad('\r'))); | |
537 | $$->add(new Quad(' ')); | |
538 | } | |
539 | | KW_BS_b | |
540 | { | |
541 | $$ = new QuadSet(); | |
542 | TTCN_pattern_error("Metacharacter `\\b' does not make any sense in a " | |
543 | "character set."); | |
544 | } | |
545 | ; | |
546 | ||
547 | RE_Quadruple: | |
548 | KW_BS_q '{' TOK_Number ',' TOK_Number ',' TOK_Number ',' TOK_Number '}' | |
549 | { | |
550 | if ($3 > 127) TTCN_pattern_error("The first number (group) of quadruple " | |
551 | "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..127 " | |
552 | "instead of %lu.", $3, $5, $7, $9, $3); | |
553 | if ($5 > 255) TTCN_pattern_error("The second number (plane) of quadruple " | |
554 | "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 " | |
555 | "instead of %lu.", $3, $5, $7, $9, $5); | |
556 | if ($7 > 255) TTCN_pattern_error("The third number (row) of quadruple " | |
557 | "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 " | |
558 | "instead of %lu.", $3, $5, $7, $9, $7); | |
559 | if ($9 > 255) TTCN_pattern_error("The fourth number (cell) of quadruple " | |
560 | "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 " | |
561 | "instead of %lu.", $3, $5, $7, $9, $9); | |
562 | if ($3 == 0 && $5 == 0 && $7 == 0 && $9 == 0) TTCN_pattern_error("Zero " | |
563 | "character (i.e. quadruple `\\q{0,0,0,0}') is not supported in a " | |
564 | "pattern for type universal charstring."); | |
565 | $$.comp.group = $3; | |
566 | $$.comp.plane = $5; | |
567 | $$.comp.row = $7; | |
568 | $$.comp.cell = $9; | |
569 | } | |
570 | ; | |
571 | ||
572 | %% | |
573 | ||
574 | /********************************************************************* | |
575 | * Interface | |
576 | *********************************************************************/ | |
577 | ||
578 | char* TTCN_pattern_to_regexp_uni(const char* p_pattern, int** groups) | |
579 | { | |
580 | /* if you want to debug */ | |
581 | //pattern_unidebug=1; | |
582 | ||
583 | ret_val=NULL; | |
584 | user_groups = 0; | |
585 | ||
586 | yy_buffer_state *flex_buffer = pattern_yy_scan_string(p_pattern); | |
587 | if(flex_buffer == NULL) { | |
588 | TTCN_pattern_error("Flex buffer creation failed."); | |
589 | return NULL; | |
590 | } | |
591 | init_pattern_yylex(&yylval); | |
592 | if(pattern_uniparse()) { | |
593 | Free(ret_val); | |
594 | ret_val=NULL; | |
595 | } | |
596 | pattern_yy_delete_buffer(flex_buffer); | |
597 | ||
598 | // needed by regexp to find user specified groups | |
599 | if (user_groups && groups) { | |
600 | *groups = (int*)Malloc(sizeof(int) * (user_groups + 1)); | |
601 | (*groups)[0] = user_groups; | |
602 | ||
603 | int par = -1, index = 1; | |
604 | for (size_t i = 0; i < strlen(ret_val); i++) { | |
605 | if (ret_val[i] == '(') { | |
606 | par++; | |
607 | } | |
608 | if (ret_val[i] == '<') { | |
609 | ret_val[i] = '('; | |
610 | par++; | |
611 | (*groups)[index++] = par; | |
612 | } | |
613 | } | |
614 | } else if (groups) | |
615 | *groups = (int*)0; | |
616 | ||
617 | return ret_val; | |
618 | } | |
619 | ||
620 | // Backwards compatibility shim | |
621 | char* TTCN_pattern_to_regexp_uni(const char* p_pattern, int ere, int** /*groups*/) | |
622 | { | |
623 | TTCN_pattern_warning("TTCN_pattern_to_regexp_uni" | |
624 | "(const char* p_pattern, int ere, int** groups) is deprecated"); | |
625 | if (ere != 1) TTCN_pattern_error( | |
626 | "BRE is not supported for TTCN_pattern_to_regexp_uni"); | |
627 | return TTCN_pattern_to_regexp(p_pattern); | |
628 | } | |
629 | ||
630 | ||
631 | /********************************************************************* | |
632 | * Static functions | |
633 | *********************************************************************/ | |
634 | ||
635 | void pattern_unierror(const char *error_str) | |
636 | { | |
637 | TTCN_pattern_error("%s", error_str); | |
638 | } | |
639 | ||
640 | void yyprint(FILE *file, int type, const YYSTYPE& value) | |
641 | { | |
642 | switch (type) { | |
643 | case TOK_Char: | |
644 | fprintf(file, "'%c'", value.c); | |
645 | break; | |
646 | case TOK_Digit: case TOK_Number: | |
647 | fprintf(file, "'%lu'", value.u); | |
648 | break; | |
649 | default: | |
650 | break; | |
651 | } | |
652 | } | |
653 |