* core-aout.c (fetch_core_registers): Cast core_reg_size to int
[deliverable/binutils-gdb.git] / gdb / gnu-regex.c
CommitLineData
dd3b648e
RP
1/* Extended regular expression matching and search library.
2 Copyright (C) 1985, 1989 Free Software Foundation, Inc.
3
99a7de40
JG
4This program is free software; you can redistribute it and/or modify
5it under the terms of the GNU General Public License as published by
6the Free Software Foundation; either version 2 of the License, or
7(at your option) any later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
6c9638b4 16Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
dd3b648e
RP
17
18/* To test, compile with -Dtest.
19 This Dtestable feature turns this into a self-contained program
20 which reads a pattern, describes how it compiles,
21 then reads a string and searches for it. */
22
23#ifdef emacs
24
25/* The `emacs' switch turns on certain special matching commands
26 that make sense only in emacs. */
27
28#include "config.h"
29#include "lisp.h"
30#include "buffer.h"
31#include "syntax.h"
32
33#else /* not emacs */
34
ba47c66a 35#include "defs.h"
2b576293 36#include "gdb_string.h"
6405302d
FF
37#undef malloc
38#define malloc xmalloc
dd3b648e
RP
39
40/*
41 * Define the syntax stuff, so we can do the \<...\> things.
42 */
43
44#ifndef Sword /* must be non-zero in some of the tests below... */
45#define Sword 1
46#endif
47
48#define SYNTAX(c) re_syntax_table[c]
49
50#ifdef SYNTAX_TABLE
51
52char *re_syntax_table;
53
54#else
55
56static char re_syntax_table[256];
57
58static void
59init_syntax_once ()
60{
61 register int c;
62 static int done = 0;
63
64 if (done)
65 return;
66
4ed97c9a 67 memset (re_syntax_table, '\0', sizeof re_syntax_table);
dd3b648e
RP
68
69 for (c = 'a'; c <= 'z'; c++)
70 re_syntax_table[c] = Sword;
71
72 for (c = 'A'; c <= 'Z'; c++)
73 re_syntax_table[c] = Sword;
74
75 for (c = '0'; c <= '9'; c++)
76 re_syntax_table[c] = Sword;
77
78 done = 1;
79}
80
81#endif /* SYNTAX_TABLE */
82#endif /* not emacs */
83
811f1bdc 84#include "gnu-regex.h"
dd3b648e
RP
85
86/* Number of failure points to allocate space for initially,
87 when matching. If this number is exceeded, more space is allocated,
88 so it is not a hard limit. */
89
90#ifndef NFAILURES
91#define NFAILURES 80
92#endif /* NFAILURES */
93
94/* width of a byte in bits */
95
96#define BYTEWIDTH 8
97
0d98155c
PS
98/* We remove any previous definition of `SIGN_EXTEND_CHAR',
99 since ours (we hope) works properly with all combinations of
100 machines, compilers, `char' and `unsigned char' argument types.
101 (Per Bothner suggested the basic approach.) */
102#undef SIGN_EXTEND_CHAR
103#if __STDC__
104#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
105#else /* not __STDC__ */
106/* As in Harbison and Steele. */
107#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
dd3b648e
RP
108#endif
109\f
110static int obscure_syntax = 0;
111
112/* Specify the precise syntax of regexp for compilation.
113 This provides for compatibility for various utilities
114 which historically have different, incompatible syntaxes.
115
116 The argument SYNTAX is a bit-mask containing the two bits
117 RE_NO_BK_PARENS and RE_NO_BK_VBAR. */
118
119int
120re_set_syntax (syntax)
51b57ded 121 int syntax;
dd3b648e
RP
122{
123 int ret;
124
125 ret = obscure_syntax;
126 obscure_syntax = syntax;
127 return ret;
128}
129\f
130/* re_compile_pattern takes a regular-expression string
131 and converts it into a buffer full of byte commands for matching.
132
133 PATTERN is the address of the pattern string
134 SIZE is the length of it.
135 BUFP is a struct re_pattern_buffer * which points to the info
136 on where to store the byte commands.
137 This structure contains a char * which points to the
138 actual space, which should have been obtained with malloc.
139 re_compile_pattern may use realloc to grow the buffer space.
140
141 The number of bytes of commands can be found out by looking in
142 the struct re_pattern_buffer that bufp pointed to,
143 after re_compile_pattern returns.
144*/
145
146#define PATPUSH(ch) (*b++ = (char) (ch))
147
148#define PATFETCH(c) \
149 {if (p == pend) goto end_of_pattern; \
150 c = * (unsigned char *) p++; \
151 if (translate) c = translate[c]; }
152
153#define PATFETCH_RAW(c) \
154 {if (p == pend) goto end_of_pattern; \
155 c = * (unsigned char *) p++; }
156
157#define PATUNFETCH p--
158
ee6d646a
JK
159/* This is not an arbitrary limit: the arguments which represent offsets
160 into the pattern are two bytes long. So if 2^16 bytes turns out to
161 be too small, many things would have to change. */
162#define MAX_BUF_SIZE (1 << 16)
163
164
165/* Extend the buffer by twice its current size via realloc and
166 reset the pointers that pointed into the old block to point to the
167 correct places in the new one. If extending the buffer results in it
168 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
169#define EXTEND_BUFFER \
170 do { \
171 char *old_buffer = bufp->buffer; \
172 if (bufp->allocated == MAX_BUF_SIZE) \
173 goto too_big; \
174 bufp->allocated <<= 1; \
175 if (bufp->allocated > MAX_BUF_SIZE) \
176 bufp->allocated = MAX_BUF_SIZE; \
177 bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated);\
178 if (bufp->buffer == NULL) \
179 goto memory_exhausted; \
180 /* If the buffer moved, move all the pointers into it. */ \
181 if (old_buffer != bufp->buffer) \
182 { \
183 b = (b - old_buffer) + bufp->buffer; \
184 begalt = (begalt - old_buffer) + bufp->buffer; \
185 if (fixup_jump) \
186 fixup_jump = (fixup_jump - old_buffer) + bufp->buffer;\
187 if (laststart) \
188 laststart = (laststart - old_buffer) + bufp->buffer; \
189 if (pending_exact) \
190 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
191 } \
192 } while (0)
dd3b648e 193
50e0dc41 194static void store_jump (), insert_jump ();
dd3b648e
RP
195
196char *
197re_compile_pattern (pattern, size, bufp)
198 char *pattern;
199 int size;
200 struct re_pattern_buffer *bufp;
201{
202 register char *b = bufp->buffer;
203 register char *p = pattern;
204 char *pend = pattern + size;
205 register unsigned c, c1;
206 char *p1;
207 unsigned char *translate = (unsigned char *) bufp->translate;
208
209 /* address of the count-byte of the most recently inserted "exactn" command.
210 This makes it possible to tell whether a new exact-match character
211 can be added to that command or requires a new "exactn" command. */
212
213 char *pending_exact = 0;
214
215 /* address of the place where a forward-jump should go
216 to the end of the containing expression.
217 Each alternative of an "or", except the last, ends with a forward-jump
218 of this sort. */
219
220 char *fixup_jump = 0;
221
222 /* address of start of the most recently finished expression.
223 This tells postfix * where to find the start of its operand. */
224
225 char *laststart = 0;
226
227 /* In processing a repeat, 1 means zero matches is allowed */
228
229 char zero_times_ok;
230
231 /* In processing a repeat, 1 means many matches is allowed */
232
233 char many_times_ok;
234
235 /* address of beginning of regexp, or inside of last \( */
236
237 char *begalt = b;
238
239 /* Stack of information saved by \( and restored by \).
240 Four stack elements are pushed by each \(:
241 First, the value of b.
242 Second, the value of fixup_jump.
243 Third, the value of regnum.
244 Fourth, the value of begalt. */
245
246 int stackb[40];
247 int *stackp = stackb;
248 int *stacke = stackb + 40;
249 int *stackt;
250
251 /* Counts \('s as they are encountered. Remembered for the matching \),
252 where it becomes the "register number" to put in the stop_memory command */
253
254 int regnum = 1;
255
256 bufp->fastmap_accurate = 0;
257
258#ifndef emacs
259#ifndef SYNTAX_TABLE
260 /*
261 * Initialize the syntax table.
262 */
263 init_syntax_once();
264#endif
265#endif
266
267 if (bufp->allocated == 0)
268 {
269 bufp->allocated = 28;
270 if (bufp->buffer)
271 /* EXTEND_BUFFER loses when bufp->allocated is 0 */
272 bufp->buffer = (char *) realloc (bufp->buffer, 28);
273 else
274 /* Caller did not allocate a buffer. Do it for him */
275 bufp->buffer = (char *) malloc (28);
276 if (!bufp->buffer) goto memory_exhausted;
277 begalt = b = bufp->buffer;
278 }
279
280 while (p != pend)
281 {
282 if (b - bufp->buffer > bufp->allocated - 10)
283 /* Note that EXTEND_BUFFER clobbers c */
284 EXTEND_BUFFER;
285
286 PATFETCH (c);
287
288 switch (c)
289 {
290 case '$':
291 if (obscure_syntax & RE_TIGHT_VBAR)
292 {
293 if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
294 goto normal_char;
295 /* Make operand of last vbar end before this `$'. */
296 if (fixup_jump)
297 store_jump (fixup_jump, jump, b);
298 fixup_jump = 0;
299 PATPUSH (endline);
300 break;
301 }
302
303 /* $ means succeed if at end of line, but only in special contexts.
304 If randomly in the middle of a pattern, it is a normal character. */
305 if (p == pend || *p == '\n'
306 || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
307 || (obscure_syntax & RE_NO_BK_PARENS
308 ? *p == ')'
309 : *p == '\\' && p[1] == ')')
310 || (obscure_syntax & RE_NO_BK_VBAR
311 ? *p == '|'
312 : *p == '\\' && p[1] == '|'))
313 {
314 PATPUSH (endline);
315 break;
316 }
317 goto normal_char;
318
319 case '^':
320 /* ^ means succeed if at beg of line, but only if no preceding pattern. */
321
322 if (laststart && p[-2] != '\n'
323 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
324 goto normal_char;
325 if (obscure_syntax & RE_TIGHT_VBAR)
326 {
327 if (p != pattern + 1
328 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
329 goto normal_char;
330 PATPUSH (begline);
331 begalt = b;
332 }
333 else
334 PATPUSH (begline);
335 break;
336
337 case '+':
338 case '?':
339 if (obscure_syntax & RE_BK_PLUS_QM)
340 goto normal_char;
341 handle_plus:
342 case '*':
343 /* If there is no previous pattern, char not special. */
344 if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
345 goto normal_char;
346 /* If there is a sequence of repetition chars,
347 collapse it down to equivalent to just one. */
348 zero_times_ok = 0;
349 many_times_ok = 0;
350 while (1)
351 {
352 zero_times_ok |= c != '+';
353 many_times_ok |= c != '?';
354 if (p == pend)
355 break;
356 PATFETCH (c);
357 if (c == '*')
358 ;
359 else if (!(obscure_syntax & RE_BK_PLUS_QM)
360 && (c == '+' || c == '?'))
361 ;
362 else if ((obscure_syntax & RE_BK_PLUS_QM)
363 && c == '\\')
364 {
365 int c1;
366 PATFETCH (c1);
367 if (!(c1 == '+' || c1 == '?'))
368 {
369 PATUNFETCH;
370 PATUNFETCH;
371 break;
372 }
373 c = c1;
374 }
375 else
376 {
377 PATUNFETCH;
378 break;
379 }
380 }
381
382 /* Star, etc. applied to an empty pattern is equivalent
383 to an empty pattern. */
384 if (!laststart)
385 break;
386
387 /* Now we know whether 0 matches is allowed,
388 and whether 2 or more matches is allowed. */
389 if (many_times_ok)
390 {
391 /* If more than one repetition is allowed,
392 put in a backward jump at the end. */
393 store_jump (b, maybe_finalize_jump, laststart - 3);
394 b += 3;
395 }
396 insert_jump (on_failure_jump, laststart, b + 3, b);
397 pending_exact = 0;
398 b += 3;
399 if (!zero_times_ok)
400 {
401 /* At least one repetition required: insert before the loop
402 a skip over the initial on-failure-jump instruction */
403 insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
404 b += 3;
405 }
406 break;
407
408 case '.':
409 laststart = b;
410 PATPUSH (anychar);
411 break;
412
413 case '[':
414 while (b - bufp->buffer
415 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
416 /* Note that EXTEND_BUFFER clobbers c */
417 EXTEND_BUFFER;
418
419 laststart = b;
420 if (*p == '^')
421 PATPUSH (charset_not), p++;
422 else
423 PATPUSH (charset);
424 p1 = p;
425
426 PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
427 /* Clear the whole map */
4ed97c9a 428 memset (b, '\0', (1 << BYTEWIDTH) / BYTEWIDTH);
dd3b648e
RP
429 /* Read in characters and ranges, setting map bits */
430 while (1)
431 {
432 PATFETCH (c);
433 if (c == ']' && p != p1 + 1) break;
434 if (*p == '-' && p[1] != ']')
435 {
436 PATFETCH (c1);
437 PATFETCH (c1);
438 while (c <= c1)
439 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
440 }
441 else
442 {
443 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
444 }
445 }
446 /* Discard any bitmap bytes that are all 0 at the end of the map.
447 Decrement the map-length byte too. */
448 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
449 b[-1]--;
450 b += b[-1];
451 break;
452
453 case '(':
454 if (! (obscure_syntax & RE_NO_BK_PARENS))
455 goto normal_char;
456 else
457 goto handle_open;
458
459 case ')':
460 if (! (obscure_syntax & RE_NO_BK_PARENS))
461 goto normal_char;
462 else
463 goto handle_close;
464
465 case '\n':
466 if (! (obscure_syntax & RE_NEWLINE_OR))
467 goto normal_char;
468 else
469 goto handle_bar;
470
471 case '|':
472 if (! (obscure_syntax & RE_NO_BK_VBAR))
473 goto normal_char;
474 else
475 goto handle_bar;
476
477 case '\\':
478 if (p == pend) goto invalid_pattern;
479 PATFETCH_RAW (c);
480 switch (c)
481 {
482 case '(':
483 if (obscure_syntax & RE_NO_BK_PARENS)
484 goto normal_backsl;
485 handle_open:
486 if (stackp == stacke) goto nesting_too_deep;
487 if (regnum < RE_NREGS)
488 {
489 PATPUSH (start_memory);
490 PATPUSH (regnum);
491 }
492 *stackp++ = b - bufp->buffer;
493 *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
494 *stackp++ = regnum++;
495 *stackp++ = begalt - bufp->buffer;
496 fixup_jump = 0;
497 laststart = 0;
498 begalt = b;
499 break;
500
501 case ')':
502 if (obscure_syntax & RE_NO_BK_PARENS)
503 goto normal_backsl;
504 handle_close:
505 if (stackp == stackb) goto unmatched_close;
506 begalt = *--stackp + bufp->buffer;
507 if (fixup_jump)
508 store_jump (fixup_jump, jump, b);
509 if (stackp[-1] < RE_NREGS)
510 {
511 PATPUSH (stop_memory);
512 PATPUSH (stackp[-1]);
513 }
514 stackp -= 2;
515 fixup_jump = 0;
516 if (*stackp)
517 fixup_jump = *stackp + bufp->buffer - 1;
518 laststart = *--stackp + bufp->buffer;
519 break;
520
521 case '|':
522 if (obscure_syntax & RE_NO_BK_VBAR)
523 goto normal_backsl;
524 handle_bar:
525 insert_jump (on_failure_jump, begalt, b + 6, b);
526 pending_exact = 0;
527 b += 3;
528 if (fixup_jump)
529 store_jump (fixup_jump, jump, b);
530 fixup_jump = b;
531 b += 3;
532 laststart = 0;
533 begalt = b;
534 break;
535
536#ifdef emacs
537 case '=':
538 PATPUSH (at_dot);
539 break;
540
541 case 's':
542 laststart = b;
543 PATPUSH (syntaxspec);
544 PATFETCH (c);
545 PATPUSH (syntax_spec_code[c]);
546 break;
547
548 case 'S':
549 laststart = b;
550 PATPUSH (notsyntaxspec);
551 PATFETCH (c);
552 PATPUSH (syntax_spec_code[c]);
553 break;
554#endif /* emacs */
555
556 case 'w':
557 laststart = b;
558 PATPUSH (wordchar);
559 break;
560
561 case 'W':
562 laststart = b;
563 PATPUSH (notwordchar);
564 break;
565
566 case '<':
567 PATPUSH (wordbeg);
568 break;
569
570 case '>':
571 PATPUSH (wordend);
572 break;
573
574 case 'b':
575 PATPUSH (wordbound);
576 break;
577
578 case 'B':
579 PATPUSH (notwordbound);
580 break;
581
582 case '`':
583 PATPUSH (begbuf);
584 break;
585
586 case '\'':
587 PATPUSH (endbuf);
588 break;
589
590 case '1':
591 case '2':
592 case '3':
593 case '4':
594 case '5':
595 case '6':
596 case '7':
597 case '8':
598 case '9':
599 c1 = c - '0';
600 if (c1 >= regnum)
601 goto normal_char;
602 for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
603 if (*stackt == c1)
604 goto normal_char;
605 laststart = b;
606 PATPUSH (duplicate);
607 PATPUSH (c1);
608 break;
609
610 case '+':
611 case '?':
612 if (obscure_syntax & RE_BK_PLUS_QM)
613 goto handle_plus;
614
615 default:
616 normal_backsl:
617 /* You might think it would be useful for \ to mean
618 not to translate; but if we don't translate it
619 it will never match anything. */
620 if (translate) c = translate[c];
621 goto normal_char;
622 }
623 break;
624
625 default:
626 normal_char:
627 if (!pending_exact || pending_exact + *pending_exact + 1 != b
628 || *pending_exact == 0177 || *p == '*' || *p == '^'
629 || ((obscure_syntax & RE_BK_PLUS_QM)
630 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
631 : (*p == '+' || *p == '?')))
632 {
633 laststart = b;
634 PATPUSH (exactn);
635 pending_exact = b;
636 PATPUSH (0);
637 }
638 PATPUSH (c);
639 (*pending_exact)++;
640 }
641 }
642
643 if (fixup_jump)
644 store_jump (fixup_jump, jump, b);
645
646 if (stackp != stackb) goto unmatched_open;
647
648 bufp->used = b - bufp->buffer;
649 return 0;
650
651 invalid_pattern:
652 return "Invalid regular expression";
653
654 unmatched_open:
655 return "Unmatched \\(";
656
657 unmatched_close:
658 return "Unmatched \\)";
659
660 end_of_pattern:
661 return "Premature end of regular expression";
662
663 nesting_too_deep:
664 return "Nesting too deep";
665
666 too_big:
667 return "Regular expression too big";
668
669 memory_exhausted:
670 return "Memory exhausted";
671}
672
673/* Store where `from' points a jump operation to jump to where `to' points.
674 `opcode' is the opcode to store. */
675
50e0dc41 676static void
dd3b648e
RP
677store_jump (from, opcode, to)
678 char *from, *to;
679 char opcode;
680{
681 from[0] = opcode;
682 from[1] = (to - (from + 3)) & 0377;
683 from[2] = (to - (from + 3)) >> 8;
684}
685
686/* Open up space at char FROM, and insert there a jump to TO.
687 CURRENT_END gives te end of the storage no in use,
688 so we know how much data to copy up.
689 OP is the opcode of the jump to insert.
690
691 If you call this function, you must zero out pending_exact. */
692
50e0dc41 693static void
dd3b648e
RP
694insert_jump (op, from, to, current_end)
695 char op;
696 char *from, *to, *current_end;
697{
698 register char *pto = current_end + 3;
699 register char *pfrom = current_end;
700 while (pfrom != from)
701 *--pto = *--pfrom;
702 store_jump (from, op, to);
703}
704\f
705/* Given a pattern, compute a fastmap from it.
706 The fastmap records which of the (1 << BYTEWIDTH) possible characters
707 can start a string that matches the pattern.
708 This fastmap is used by re_search to skip quickly over totally implausible text.
709
710 The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
711 as bufp->fastmap.
712 The other components of bufp describe the pattern to be used. */
713
714void
715re_compile_fastmap (bufp)
716 struct re_pattern_buffer *bufp;
717{
718 unsigned char *pattern = (unsigned char *) bufp->buffer;
719 int size = bufp->used;
720 register char *fastmap = bufp->fastmap;
721 register unsigned char *p = pattern;
722 register unsigned char *pend = pattern + size;
51b57ded 723 register int j;
dd3b648e
RP
724 unsigned char *translate = (unsigned char *) bufp->translate;
725
726 unsigned char *stackb[NFAILURES];
727 unsigned char **stackp = stackb;
728
4ed97c9a 729 memset (fastmap, '\0', (1 << BYTEWIDTH));
dd3b648e
RP
730 bufp->fastmap_accurate = 1;
731 bufp->can_be_null = 0;
732
733 while (p)
734 {
735 if (p == pend)
736 {
737 bufp->can_be_null = 1;
738 break;
739 }
740#ifdef SWITCH_ENUM_BUG
741 switch ((int) ((enum regexpcode) *p++))
742#else
743 switch ((enum regexpcode) *p++)
744#endif
745 {
746 case exactn:
747 if (translate)
748 fastmap[translate[p[1]]] = 1;
749 else
750 fastmap[p[1]] = 1;
751 break;
752
753 case begline:
754 case before_dot:
755 case at_dot:
756 case after_dot:
757 case begbuf:
758 case endbuf:
759 case wordbound:
760 case notwordbound:
761 case wordbeg:
762 case wordend:
763 continue;
764
765 case endline:
766 if (translate)
767 fastmap[translate['\n']] = 1;
768 else
769 fastmap['\n'] = 1;
770 if (bufp->can_be_null != 1)
771 bufp->can_be_null = 2;
772 break;
773
774 case finalize_jump:
775 case maybe_finalize_jump:
776 case jump:
777 case dummy_failure_jump:
778 bufp->can_be_null = 1;
779 j = *p++ & 0377;
780 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
781 p += j + 1; /* The 1 compensates for missing ++ above */
782 if (j > 0)
783 continue;
784 /* Jump backward reached implies we just went through
785 the body of a loop and matched nothing.
786 Opcode jumped to should be an on_failure_jump.
787 Just treat it like an ordinary jump.
788 For a * loop, it has pushed its failure point already;
789 if so, discard that as redundant. */
790 if ((enum regexpcode) *p != on_failure_jump)
791 continue;
792 p++;
793 j = *p++ & 0377;
794 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
795 p += j + 1; /* The 1 compensates for missing ++ above */
796 if (stackp != stackb && *stackp == p)
797 stackp--;
798 continue;
799
800 case on_failure_jump:
801 j = *p++ & 0377;
802 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
803 p++;
804 *++stackp = p + j;
805 continue;
806
807 case start_memory:
808 case stop_memory:
809 p++;
810 continue;
811
812 case duplicate:
813 bufp->can_be_null = 1;
814 fastmap['\n'] = 1;
815 case anychar:
816 for (j = 0; j < (1 << BYTEWIDTH); j++)
817 if (j != '\n')
818 fastmap[j] = 1;
819 if (bufp->can_be_null)
820 return;
821 /* Don't return; check the alternative paths
822 so we can set can_be_null if appropriate. */
823 break;
824
825 case wordchar:
826 for (j = 0; j < (1 << BYTEWIDTH); j++)
827 if (SYNTAX (j) == Sword)
828 fastmap[j] = 1;
829 break;
830
831 case notwordchar:
832 for (j = 0; j < (1 << BYTEWIDTH); j++)
833 if (SYNTAX (j) != Sword)
834 fastmap[j] = 1;
835 break;
836
837#ifdef emacs
838 case syntaxspec:
839 k = *p++;
840 for (j = 0; j < (1 << BYTEWIDTH); j++)
841 if (SYNTAX (j) == (enum syntaxcode) k)
842 fastmap[j] = 1;
843 break;
844
845 case notsyntaxspec:
846 k = *p++;
847 for (j = 0; j < (1 << BYTEWIDTH); j++)
848 if (SYNTAX (j) != (enum syntaxcode) k)
849 fastmap[j] = 1;
850 break;
851#endif /* emacs */
852
853 case charset:
854 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
855 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
856 {
857 if (translate)
858 fastmap[translate[j]] = 1;
859 else
860 fastmap[j] = 1;
861 }
862 break;
863
864 case charset_not:
865 /* Chars beyond end of map must be allowed */
866 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
867 if (translate)
868 fastmap[translate[j]] = 1;
869 else
870 fastmap[j] = 1;
871
872 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
873 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
874 {
875 if (translate)
876 fastmap[translate[j]] = 1;
877 else
878 fastmap[j] = 1;
879 }
880 break;
6b14af2b
FF
881 case unused:
882 case syntaxspec:
883 case notsyntaxspec:
884 default:
885 break;
dd3b648e
RP
886 }
887
888 /* Get here means we have successfully found the possible starting characters
889 of one path of the pattern. We need not follow this path any farther.
890 Instead, look at the next alternative remembered in the stack. */
891 if (stackp != stackb)
892 p = *stackp--;
893 else
894 break;
895 }
896}
897\f
898/* Like re_search_2, below, but only one string is specified. */
899
900int
901re_search (pbufp, string, size, startpos, range, regs)
902 struct re_pattern_buffer *pbufp;
903 char *string;
904 int size, startpos, range;
905 struct re_registers *regs;
906{
907 return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
908}
909
910/* Like re_match_2 but tries first a match starting at index STARTPOS,
911 then at STARTPOS + 1, and so on.
912 RANGE is the number of places to try before giving up.
913 If RANGE is negative, the starting positions tried are
914 STARTPOS, STARTPOS - 1, etc.
915 It is up to the caller to make sure that range is not so large
916 as to take the starting position outside of the input strings.
917
918The value returned is the position at which the match was found,
919 or -1 if no match was found,
920 or -2 if error (such as failure stack overflow). */
921
922int
923re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
924 struct re_pattern_buffer *pbufp;
925 char *string1, *string2;
926 int size1, size2;
927 int startpos;
928 register int range;
929 struct re_registers *regs;
930 int mstop;
931{
932 register char *fastmap = pbufp->fastmap;
933 register unsigned char *translate = (unsigned char *) pbufp->translate;
934 int total = size1 + size2;
935 int val;
936
937 /* Update the fastmap now if not correct already */
938 if (fastmap && !pbufp->fastmap_accurate)
939 re_compile_fastmap (pbufp);
940
941 /* Don't waste time in a long search for a pattern
942 that says it is anchored. */
943 if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
944 && range > 0)
945 {
946 if (startpos > 0)
947 return -1;
948 else
949 range = 1;
950 }
951
952 while (1)
953 {
954 /* If a fastmap is supplied, skip quickly over characters
955 that cannot possibly be the start of a match.
956 Note, however, that if the pattern can possibly match
957 the null string, we must test it at each starting point
958 so that we take the first null string we get. */
959
960 if (fastmap && startpos < total && pbufp->can_be_null != 1)
961 {
962 if (range > 0)
963 {
964 register int lim = 0;
965 register unsigned char *p;
966 int irange = range;
967 if (startpos < size1 && startpos + range >= size1)
968 lim = range - (size1 - startpos);
969
970 p = ((unsigned char *)
971 &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
972
973 if (translate)
974 {
975 while (range > lim && !fastmap[translate[*p++]])
976 range--;
977 }
978 else
979 {
980 while (range > lim && !fastmap[*p++])
981 range--;
982 }
983 startpos += irange - range;
984 }
985 else
986 {
987 register unsigned char c;
988 if (startpos >= size1)
989 c = string2[startpos - size1];
990 else
991 c = string1[startpos];
992 c &= 0xff;
993 if (translate ? !fastmap[translate[c]] : !fastmap[c])
994 goto advance;
995 }
996 }
997
998 if (range >= 0 && startpos == total
999 && fastmap && pbufp->can_be_null == 0)
1000 return -1;
1001
1002 val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop);
1003 if (0 <= val)
1004 {
1005 if (val == -2)
1006 return -2;
1007 return startpos;
1008 }
1009
1010#ifdef C_ALLOCA
1011 alloca (0);
1012#endif /* C_ALLOCA */
1013
1014 advance:
1015 if (!range) break;
1016 if (range > 0) range--, startpos++; else range++, startpos--;
1017 }
1018 return -1;
1019}
1020\f
1021#ifndef emacs /* emacs never uses this */
1022int
1023re_match (pbufp, string, size, pos, regs)
1024 struct re_pattern_buffer *pbufp;
1025 char *string;
1026 int size, pos;
1027 struct re_registers *regs;
1028{
1029 return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
1030}
1031#endif /* emacs */
1032
1033/* Maximum size of failure stack. Beyond this, overflow is an error. */
1034
1035int re_max_failures = 2000;
1036
9823e3f4 1037static int memcmp_translate();
dd3b648e
RP
1038/* Match the pattern described by PBUFP
1039 against data which is the virtual concatenation of STRING1 and STRING2.
1040 SIZE1 and SIZE2 are the sizes of the two data strings.
1041 Start the match at position POS.
1042 Do not consider matching past the position MSTOP.
1043
1044 If pbufp->fastmap is nonzero, then it had better be up to date.
1045
1046 The reason that the data to match are specified as two components
1047 which are to be regarded as concatenated
1048 is so this function can be used directly on the contents of an Emacs buffer.
1049
1050 -1 is returned if there is no match. -2 is returned if there is
1051 an error (such as match stack overflow). Otherwise the value is the length
1052 of the substring which was matched. */
1053
1054int
1055re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
1056 struct re_pattern_buffer *pbufp;
1057 unsigned char *string1, *string2;
1058 int size1, size2;
1059 int pos;
1060 struct re_registers *regs;
1061 int mstop;
1062{
1063 register unsigned char *p = (unsigned char *) pbufp->buffer;
1064 register unsigned char *pend = p + pbufp->used;
1065 /* End of first string */
1066 unsigned char *end1;
1067 /* End of second string */
1068 unsigned char *end2;
1069 /* Pointer just past last char to consider matching */
1070 unsigned char *end_match_1, *end_match_2;
1071 register unsigned char *d, *dend;
1072 register int mcnt;
1073 unsigned char *translate = (unsigned char *) pbufp->translate;
1074
1075 /* Failure point stack. Each place that can handle a failure further down the line
1076 pushes a failure point on this stack. It consists of two char *'s.
1077 The first one pushed is where to resume scanning the pattern;
1078 the second pushed is where to resume scanning the strings.
1079 If the latter is zero, the failure point is a "dummy".
1080 If a failure happens and the innermost failure point is dormant,
1081 it discards that failure point and tries the next one. */
1082
1083 unsigned char *initial_stack[2 * NFAILURES];
1084 unsigned char **stackb = initial_stack;
1085 unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
1086
1087 /* Information on the "contents" of registers.
1088 These are pointers into the input strings; they record
1089 just what was matched (on this attempt) by some part of the pattern.
1090 The start_memory command stores the start of a register's contents
1091 and the stop_memory command stores the end.
1092
1093 At that point, regstart[regnum] points to the first character in the register,
1094 regend[regnum] points to the first character beyond the end of the register,
1095 regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
1096 and regend_seg1[regnum] is true iff regend[regnum] points into string1. */
1097
1098 unsigned char *regstart[RE_NREGS];
1099 unsigned char *regend[RE_NREGS];
1100 unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
1101
1102 /* Set up pointers to ends of strings.
1103 Don't allow the second string to be empty unless both are empty. */
1104 if (!size2)
1105 {
1106 string2 = string1;
1107 size2 = size1;
1108 string1 = 0;
1109 size1 = 0;
1110 }
1111 end1 = string1 + size1;
1112 end2 = string2 + size2;
1113
1114 /* Compute where to stop matching, within the two strings */
1115 if (mstop <= size1)
1116 {
1117 end_match_1 = string1 + mstop;
1118 end_match_2 = string2;
1119 }
1120 else
1121 {
1122 end_match_1 = end1;
1123 end_match_2 = string2 + mstop - size1;
1124 }
1125
1126 /* Initialize \) text positions to -1
1127 to mark ones that no \( or \) has been seen for. */
1128
b52cac6b 1129 for (mcnt = 0; mcnt < (int) (sizeof (regend) / sizeof (*regend)); mcnt++)
dd3b648e
RP
1130 regend[mcnt] = (unsigned char *) -1;
1131
1132 /* `p' scans through the pattern as `d' scans through the data.
1133 `dend' is the end of the input string that `d' points within.
1134 `d' is advanced into the following input string whenever necessary,
1135 but this happens before fetching;
1136 therefore, at the beginning of the loop,
1137 `d' can be pointing at the end of a string,
1138 but it cannot equal string2. */
1139
1140 if (pos <= size1)
1141 d = string1 + pos, dend = end_match_1;
1142 else
1143 d = string2 + pos - size1, dend = end_match_2;
1144
1145/* Write PREFETCH; just before fetching a character with *d. */
1146#define PREFETCH \
1147 while (d == dend) \
1148 { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
1149 d = string2; /* end of string1 => advance to string2. */ \
1150 dend = end_match_2; }
1151
1152 /* This loop loops over pattern commands.
1153 It exits by returning from the function if match is complete,
1154 or it drops through if match fails at this starting point in the input data. */
1155
1156 while (1)
1157 {
1158 if (p == pend)
1159 /* End of pattern means we have succeeded! */
1160 {
1161 /* If caller wants register contents data back, convert it to indices */
1162 if (regs)
1163 {
1164 regs->start[0] = pos;
1165 if (dend == end_match_1)
1166 regs->end[0] = d - string1;
1167 else
1168 regs->end[0] = d - string2 + size1;
1169 for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
1170 {
1171 if (regend[mcnt] == (unsigned char *) -1)
1172 {
1173 regs->start[mcnt] = -1;
1174 regs->end[mcnt] = -1;
1175 continue;
1176 }
1177 if (regstart_seg1[mcnt])
1178 regs->start[mcnt] = regstart[mcnt] - string1;
1179 else
1180 regs->start[mcnt] = regstart[mcnt] - string2 + size1;
1181 if (regend_seg1[mcnt])
1182 regs->end[mcnt] = regend[mcnt] - string1;
1183 else
1184 regs->end[mcnt] = regend[mcnt] - string2 + size1;
1185 }
1186 }
1187 if (dend == end_match_1)
1188 return (d - string1 - pos);
1189 else
1190 return d - string2 + size1 - pos;
1191 }
1192
1193 /* Otherwise match next pattern command */
1194#ifdef SWITCH_ENUM_BUG
1195 switch ((int) ((enum regexpcode) *p++))
1196#else
1197 switch ((enum regexpcode) *p++)
1198#endif
1199 {
1200
1201 /* \( is represented by a start_memory, \) by a stop_memory.
1202 Both of those commands contain a "register number" argument.
1203 The text matched within the \( and \) is recorded under that number.
1204 Then, \<digit> turns into a `duplicate' command which
1205 is followed by the numeric value of <digit> as the register number. */
1206
1207 case start_memory:
1208 regstart[*p] = d;
1209 regstart_seg1[*p++] = (dend == end_match_1);
1210 break;
1211
1212 case stop_memory:
1213 regend[*p] = d;
1214 regend_seg1[*p++] = (dend == end_match_1);
1215 break;
1216
1217 case duplicate:
1218 {
1219 int regno = *p++; /* Get which register to match against */
1220 register unsigned char *d2, *dend2;
1221
1222 d2 = regstart[regno];
1223 dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
1224 ? regend[regno] : end_match_1);
1225 while (1)
1226 {
1227 /* Advance to next segment in register contents, if necessary */
1228 while (d2 == dend2)
1229 {
1230 if (dend2 == end_match_2) break;
1231 if (dend2 == regend[regno]) break;
1232 d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
1233 }
1234 /* At end of register contents => success */
1235 if (d2 == dend2) break;
1236
1237 /* Advance to next segment in data being matched, if necessary */
1238 PREFETCH;
1239
1240 /* mcnt gets # consecutive chars to compare */
1241 mcnt = dend - d;
1242 if (mcnt > dend2 - d2)
1243 mcnt = dend2 - d2;
1244 /* Compare that many; failure if mismatch, else skip them. */
9823e3f4 1245 if (translate ? memcmp_translate (d, d2, mcnt, translate) : memcmp (d, d2, mcnt))
dd3b648e
RP
1246 goto fail;
1247 d += mcnt, d2 += mcnt;
1248 }
1249 }
1250 break;
1251
1252 case anychar:
1253 /* fetch a data character */
1254 PREFETCH;
1255 /* Match anything but a newline. */
1256 if ((translate ? translate[*d++] : *d++) == '\n')
1257 goto fail;
1258 break;
1259
1260 case charset:
1261 case charset_not:
1262 {
1263 /* Nonzero for charset_not */
1264 int not = 0;
1265 register int c;
1266 if (*(p - 1) == (unsigned char) charset_not)
1267 not = 1;
1268
1269 /* fetch a data character */
1270 PREFETCH;
1271
1272 if (translate)
1273 c = translate [*d];
1274 else
1275 c = *d;
1276
1277 if (c < *p * BYTEWIDTH
1278 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1279 not = !not;
1280
1281 p += 1 + *p;
1282
1283 if (!not) goto fail;
1284 d++;
1285 break;
1286 }
1287
1288 case begline:
1289 if (d == string1 || d[-1] == '\n')
1290 break;
1291 goto fail;
1292
1293 case endline:
1294 if (d == end2
1295 || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
1296 break;
1297 goto fail;
1298
1299 /* "or" constructs ("|") are handled by starting each alternative
1300 with an on_failure_jump that points to the start of the next alternative.
1301 Each alternative except the last ends with a jump to the joining point.
1302 (Actually, each jump except for the last one really jumps
1303 to the following jump, because tensioning the jumps is a hassle.) */
1304
1305 /* The start of a stupid repeat has an on_failure_jump that points
1306 past the end of the repeat text.
1307 This makes a failure point so that, on failure to match a repetition,
1308 matching restarts past as many repetitions have been found
1309 with no way to fail and look for another one. */
1310
1311 /* A smart repeat is similar but loops back to the on_failure_jump
1312 so that each repetition makes another failure point. */
1313
1314 case on_failure_jump:
1315 if (stackp == stacke)
1316 {
1317 unsigned char **stackx;
1318 if (stacke - stackb > re_max_failures * 2)
1319 return -2;
1320 stackx = (unsigned char **) alloca (2 * (stacke - stackb)
1321 * sizeof (char *));
ade40d31 1322 memcpy (stackx, stackb, (stacke - stackb) * sizeof (char *));
dd3b648e
RP
1323 stackp = stackx + (stackp - stackb);
1324 stacke = stackx + 2 * (stacke - stackb);
1325 stackb = stackx;
1326 }
1327 mcnt = *p++ & 0377;
1328 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1329 p++;
1330 *stackp++ = mcnt + p;
1331 *stackp++ = d;
1332 break;
1333
1334 /* The end of a smart repeat has an maybe_finalize_jump back.
1335 Change it either to a finalize_jump or an ordinary jump. */
1336
1337 case maybe_finalize_jump:
1338 mcnt = *p++ & 0377;
1339 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1340 p++;
1341 {
1342 register unsigned char *p2 = p;
1343 /* Compare what follows with the begining of the repeat.
1344 If we can establish that there is nothing that they would
1345 both match, we can change to finalize_jump */
1346 while (p2 != pend
1347 && (*p2 == (unsigned char) stop_memory
1348 || *p2 == (unsigned char) start_memory))
1349 p2++;
1350 if (p2 == pend)
1351 p[-3] = (unsigned char) finalize_jump;
1352 else if (*p2 == (unsigned char) exactn
1353 || *p2 == (unsigned char) endline)
1354 {
1355 register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
1356 register unsigned char *p1 = p + mcnt;
1357 /* p1[0] ... p1[2] are an on_failure_jump.
1358 Examine what follows that */
1359 if (p1[3] == (unsigned char) exactn && p1[5] != c)
1360 p[-3] = (unsigned char) finalize_jump;
1361 else if (p1[3] == (unsigned char) charset
1362 || p1[3] == (unsigned char) charset_not)
1363 {
1364 int not = p1[3] == (unsigned char) charset_not;
1365 if (c < p1[4] * BYTEWIDTH
1366 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1367 not = !not;
1368 /* not is 1 if c would match */
1369 /* That means it is not safe to finalize */
1370 if (!not)
1371 p[-3] = (unsigned char) finalize_jump;
1372 }
1373 }
1374 }
1375 p -= 2;
1376 if (p[-1] != (unsigned char) finalize_jump)
1377 {
1378 p[-1] = (unsigned char) jump;
1379 goto nofinalize;
1380 }
1381
1382 /* The end of a stupid repeat has a finalize-jump
1383 back to the start, where another failure point will be made
1384 which will point after all the repetitions found so far. */
1385
1386 case finalize_jump:
1387 stackp -= 2;
1388
1389 case jump:
1390 nofinalize:
1391 mcnt = *p++ & 0377;
1392 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1393 p += mcnt + 1; /* The 1 compensates for missing ++ above */
1394 break;
1395
1396 case dummy_failure_jump:
1397 if (stackp == stacke)
1398 {
1399 unsigned char **stackx
1400 = (unsigned char **) alloca (2 * (stacke - stackb)
1401 * sizeof (char *));
ade40d31 1402 memcpy (stackx, stackb, (stacke - stackb) * sizeof (char *));
dd3b648e
RP
1403 stackp = stackx + (stackp - stackb);
1404 stacke = stackx + 2 * (stacke - stackb);
1405 stackb = stackx;
1406 }
1407 *stackp++ = 0;
1408 *stackp++ = 0;
1409 goto nofinalize;
1410
1411 case wordbound:
1412 if (d == string1 /* Points to first char */
1413 || d == end2 /* Points to end */
1414 || (d == end1 && size2 == 0)) /* Points to end */
1415 break;
1416 if ((SYNTAX (d[-1]) == Sword)
1417 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1418 break;
1419 goto fail;
1420
1421 case notwordbound:
1422 if (d == string1 /* Points to first char */
1423 || d == end2 /* Points to end */
1424 || (d == end1 && size2 == 0)) /* Points to end */
1425 goto fail;
1426 if ((SYNTAX (d[-1]) == Sword)
1427 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1428 goto fail;
1429 break;
1430
1431 case wordbeg:
1432 if (d == end2 /* Points to end */
1433 || (d == end1 && size2 == 0) /* Points to end */
1434 || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
1435 goto fail;
1436 if (d == string1 /* Points to first char */
1437 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1438 break;
1439 goto fail;
1440
1441 case wordend:
1442 if (d == string1 /* Points to first char */
1443 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1444 goto fail;
1445 if (d == end2 /* Points to end */
1446 || (d == end1 && size2 == 0) /* Points to end */
1447 || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
1448 break;
1449 goto fail;
1450
1451#ifdef emacs
1452 case before_dot:
1453 if (((d - string2 <= (unsigned) size2)
1454 ? d - bf_p2 : d - bf_p1)
1455 <= point)
1456 goto fail;
1457 break;
1458
1459 case at_dot:
1460 if (((d - string2 <= (unsigned) size2)
1461 ? d - bf_p2 : d - bf_p1)
1462 == point)
1463 goto fail;
1464 break;
1465
1466 case after_dot:
1467 if (((d - string2 <= (unsigned) size2)
1468 ? d - bf_p2 : d - bf_p1)
1469 >= point)
1470 goto fail;
1471 break;
1472
1473 case wordchar:
1474 mcnt = (int) Sword;
1475 goto matchsyntax;
1476
1477 case syntaxspec:
1478 mcnt = *p++;
1479 matchsyntax:
1480 PREFETCH;
1481 if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
1482 break;
1483
1484 case notwordchar:
1485 mcnt = (int) Sword;
1486 goto matchnotsyntax;
1487
1488 case notsyntaxspec:
1489 mcnt = *p++;
1490 matchnotsyntax:
1491 PREFETCH;
1492 if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
1493 break;
1494#else
1495 case wordchar:
1496 PREFETCH;
1497 if (SYNTAX (*d++) == 0) goto fail;
1498 break;
1499
1500 case notwordchar:
1501 PREFETCH;
1502 if (SYNTAX (*d++) != 0) goto fail;
1503 break;
1504#endif /* not emacs */
1505
1506 case begbuf:
1507 if (d == string1) /* Note, d cannot equal string2 */
1508 break; /* unless string1 == string2. */
1509 goto fail;
1510
1511 case endbuf:
1512 if (d == end2 || (d == end1 && size2 == 0))
1513 break;
1514 goto fail;
1515
1516 case exactn:
1517 /* Match the next few pattern characters exactly.
1518 mcnt is how many characters to match. */
1519 mcnt = *p++;
1520 if (translate)
1521 {
1522 do
1523 {
1524 PREFETCH;
1525 if (translate[*d++] != *p++) goto fail;
1526 }
1527 while (--mcnt);
1528 }
1529 else
1530 {
1531 do
1532 {
1533 PREFETCH;
1534 if (*d++ != *p++) goto fail;
1535 }
1536 while (--mcnt);
1537 }
1538 break;
6b14af2b
FF
1539 case unused:
1540 case before_dot:
1541 case at_dot:
1542 case after_dot:
1543 case syntaxspec:
1544 case notsyntaxspec:
1545 default:
1546 break;
dd3b648e
RP
1547 }
1548 continue; /* Successfully matched one pattern command; keep matching */
1549
1550 /* Jump here if any matching operation fails. */
1551 fail:
1552 if (stackp != stackb)
1553 /* A restart point is known. Restart there and pop it. */
1554 {
1555 if (!stackp[-2])
1556 { /* If innermost failure point is dormant, flush it and keep looking */
1557 stackp -= 2;
1558 goto fail;
1559 }
1560 d = *--stackp;
1561 p = *--stackp;
1562 if (d >= string1 && d <= end1)
1563 dend = end_match_1;
1564 }
1565 else break; /* Matching at this starting point really fails! */
1566 }
1567 return -1; /* Failure to match */
1568}
1569
1570static int
9823e3f4 1571memcmp_translate (s1, s2, len, translate)
dd3b648e
RP
1572 unsigned char *s1, *s2;
1573 register int len;
1574 unsigned char *translate;
1575{
1576 register unsigned char *p1 = s1, *p2 = s2;
1577 while (len)
1578 {
1579 if (translate [*p1++] != translate [*p2++]) return 1;
1580 len--;
1581 }
1582 return 0;
1583}
1584\f
1585/* Entry points compatible with bsd4.2 regex library */
1586
1587#ifndef emacs
1588
1589static struct re_pattern_buffer re_comp_buf;
1590
1591char *
1592re_comp (s)
ba47c66a 1593 const char *s;
dd3b648e
RP
1594{
1595 if (!s)
1596 {
1597 if (!re_comp_buf.buffer)
1598 return "No previous regular expression";
1599 return 0;
1600 }
1601
1602 if (!re_comp_buf.buffer)
1603 {
1604 if (!(re_comp_buf.buffer = (char *) malloc (200)))
1605 return "Memory exhausted";
1606 re_comp_buf.allocated = 200;
1607 if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
1608 return "Memory exhausted";
1609 }
1610 return re_compile_pattern (s, strlen (s), &re_comp_buf);
1611}
1612
1613int
1614re_exec (s)
1615 char *s;
1616{
1617 int len = strlen (s);
1618 return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
1619}
1620
1621#endif /* emacs */
1622\f
1623#ifdef test
1624
1625#include <stdio.h>
1626
1627/* Indexed by a character, gives the upper case equivalent of the character */
1628
1629static char upcase[0400] =
1630 { 000, 001, 002, 003, 004, 005, 006, 007,
1631 010, 011, 012, 013, 014, 015, 016, 017,
1632 020, 021, 022, 023, 024, 025, 026, 027,
1633 030, 031, 032, 033, 034, 035, 036, 037,
1634 040, 041, 042, 043, 044, 045, 046, 047,
1635 050, 051, 052, 053, 054, 055, 056, 057,
1636 060, 061, 062, 063, 064, 065, 066, 067,
1637 070, 071, 072, 073, 074, 075, 076, 077,
1638 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1639 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1640 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1641 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
1642 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1643 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1644 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1645 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
1646 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
1647 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
1648 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
1649 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
1650 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
1651 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1652 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
1653 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
1654 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
1655 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
1656 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
1657 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1658 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
1659 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
1660 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
1661 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
1662 };
1663
1664main (argc, argv)
1665 int argc;
1666 char **argv;
1667{
1668 char pat[80];
1669 struct re_pattern_buffer buf;
1670 int i;
1671 char c;
1672 char fastmap[(1 << BYTEWIDTH)];
1673
1674 /* Allow a command argument to specify the style of syntax. */
1675 if (argc > 1)
1676 obscure_syntax = atoi (argv[1]);
1677
1678 buf.allocated = 40;
1679 buf.buffer = (char *) malloc (buf.allocated);
1680 buf.fastmap = fastmap;
1681 buf.translate = upcase;
1682
1683 while (1)
1684 {
1685 gets (pat);
1686
1687 if (*pat)
1688 {
1689 re_compile_pattern (pat, strlen(pat), &buf);
1690
1691 for (i = 0; i < buf.used; i++)
1692 printchar (buf.buffer[i]);
1693
199b2450 1694 putchar_unfiltered ('\n');
dd3b648e 1695
199b2450 1696 printf_unfiltered ("%d allocated, %d used.\n", buf.allocated, buf.used);
dd3b648e
RP
1697
1698 re_compile_fastmap (&buf);
199b2450 1699 printf_unfiltered ("Allowed by fastmap: ");
dd3b648e
RP
1700 for (i = 0; i < (1 << BYTEWIDTH); i++)
1701 if (fastmap[i]) printchar (i);
199b2450 1702 putchar_unfiltered ('\n');
dd3b648e
RP
1703 }
1704
1705 gets (pat); /* Now read the string to match against */
1706
1707 i = re_match (&buf, pat, strlen (pat), 0, 0);
199b2450 1708 printf_unfiltered ("Match value %d.\n", i);
dd3b648e
RP
1709 }
1710}
1711
1712#ifdef NOTDEF
1713print_buf (bufp)
1714 struct re_pattern_buffer *bufp;
1715{
1716 int i;
1717
199b2450 1718 printf_unfiltered ("buf is :\n----------------\n");
dd3b648e
RP
1719 for (i = 0; i < bufp->used; i++)
1720 printchar (bufp->buffer[i]);
1721
199b2450 1722 printf_unfiltered ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
dd3b648e 1723
199b2450 1724 printf_unfiltered ("Allowed by fastmap: ");
dd3b648e
RP
1725 for (i = 0; i < (1 << BYTEWIDTH); i++)
1726 if (bufp->fastmap[i])
1727 printchar (i);
199b2450 1728 printf_unfiltered ("\nAllowed by translate: ");
dd3b648e
RP
1729 if (bufp->translate)
1730 for (i = 0; i < (1 << BYTEWIDTH); i++)
1731 if (bufp->translate[i])
1732 printchar (i);
199b2450
TL
1733 printf_unfiltered ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
1734 printf_unfiltered ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
dd3b648e
RP
1735}
1736#endif
1737
1738printchar (c)
1739 char c;
1740{
1741 if (c < 041 || c >= 0177)
1742 {
199b2450
TL
1743 putchar_unfiltered ('\\');
1744 putchar_unfiltered (((c >> 6) & 3) + '0');
1745 putchar_unfiltered (((c >> 3) & 7) + '0');
1746 putchar_unfiltered ((c & 7) + '0');
dd3b648e
RP
1747 }
1748 else
199b2450 1749 putchar_unfiltered (c);
dd3b648e
RP
1750}
1751
1752error (string)
1753 char *string;
1754{
199b2450 1755 puts_unfiltered (string);
dd3b648e
RP
1756 exit (1);
1757}
1758
1759#endif /* test */
This page took 0.424907 seconds and 4 git commands to generate.