ppcbug-rom checkin
[deliverable/binutils-gdb.git] / gdb / gnu-regex.c
1 /* Extended regular expression matching and search library.
2 Copyright (C) 1985, 1989 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
17
18 /* To test, compile with -Dtest.
19 This Dtestable feature turns this into a self-contained program
20 which reads a pattern, describes how it compiles,
21 then reads a string and searches for it. */
22
23 #ifdef emacs
24
25 /* The `emacs' switch turns on certain special matching commands
26 that make sense only in emacs. */
27
28 #include "config.h"
29 #include "lisp.h"
30 #include "buffer.h"
31 #include "syntax.h"
32
33 #else /* not emacs */
34
35 #include "defs.h"
36 #include "gdb_string.h"
37
38 /*
39 * Define the syntax stuff, so we can do the \<...\> things.
40 */
41
42 #ifndef Sword /* must be non-zero in some of the tests below... */
43 #define Sword 1
44 #endif
45
46 #define SYNTAX(c) re_syntax_table[c]
47
48 #ifdef SYNTAX_TABLE
49
50 char *re_syntax_table;
51
52 #else
53
54 static char re_syntax_table[256];
55
56 static void
57 init_syntax_once ()
58 {
59 register int c;
60 static int done = 0;
61
62 if (done)
63 return;
64
65 memset (re_syntax_table, '\0', sizeof re_syntax_table);
66
67 for (c = 'a'; c <= 'z'; c++)
68 re_syntax_table[c] = Sword;
69
70 for (c = 'A'; c <= 'Z'; c++)
71 re_syntax_table[c] = Sword;
72
73 for (c = '0'; c <= '9'; c++)
74 re_syntax_table[c] = Sword;
75
76 done = 1;
77 }
78
79 #endif /* SYNTAX_TABLE */
80 #endif /* not emacs */
81
82 #include "gnu-regex.h"
83
84 /* Number of failure points to allocate space for initially,
85 when matching. If this number is exceeded, more space is allocated,
86 so it is not a hard limit. */
87
88 #ifndef NFAILURES
89 #define NFAILURES 80
90 #endif /* NFAILURES */
91
92 /* width of a byte in bits */
93
94 #define BYTEWIDTH 8
95
96 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
97 since ours (we hope) works properly with all combinations of
98 machines, compilers, `char' and `unsigned char' argument types.
99 (Per Bothner suggested the basic approach.) */
100 #undef SIGN_EXTEND_CHAR
101 #if __STDC__
102 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
103 #else /* not __STDC__ */
104 /* As in Harbison and Steele. */
105 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
106 #endif
107 \f
108 static int obscure_syntax = 0;
109
110 /* Specify the precise syntax of regexp for compilation.
111 This provides for compatibility for various utilities
112 which historically have different, incompatible syntaxes.
113
114 The argument SYNTAX is a bit-mask containing the two bits
115 RE_NO_BK_PARENS and RE_NO_BK_VBAR. */
116
117 int
118 re_set_syntax (syntax)
119 int syntax;
120 {
121 int ret;
122
123 ret = obscure_syntax;
124 obscure_syntax = syntax;
125 return ret;
126 }
127 \f
128 /* re_compile_pattern takes a regular-expression string
129 and converts it into a buffer full of byte commands for matching.
130
131 PATTERN is the address of the pattern string
132 SIZE is the length of it.
133 BUFP is a struct re_pattern_buffer * which points to the info
134 on where to store the byte commands.
135 This structure contains a char * which points to the
136 actual space, which should have been obtained with malloc.
137 re_compile_pattern may use realloc to grow the buffer space.
138
139 The number of bytes of commands can be found out by looking in
140 the struct re_pattern_buffer that bufp pointed to,
141 after re_compile_pattern returns.
142 */
143
144 #define PATPUSH(ch) (*b++ = (char) (ch))
145
146 #define PATFETCH(c) \
147 {if (p == pend) goto end_of_pattern; \
148 c = * (unsigned char *) p++; \
149 if (translate) c = translate[c]; }
150
151 #define PATFETCH_RAW(c) \
152 {if (p == pend) goto end_of_pattern; \
153 c = * (unsigned char *) p++; }
154
155 #define PATUNFETCH p--
156
157 /* This is not an arbitrary limit: the arguments which represent offsets
158 into the pattern are two bytes long. So if 2^16 bytes turns out to
159 be too small, many things would have to change. */
160 #define MAX_BUF_SIZE (1 << 16)
161
162
163 /* Extend the buffer by twice its current size via realloc and
164 reset the pointers that pointed into the old block to point to the
165 correct places in the new one. If extending the buffer results in it
166 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
167 #define EXTEND_BUFFER \
168 do { \
169 char *old_buffer = bufp->buffer; \
170 if (bufp->allocated == MAX_BUF_SIZE) \
171 goto too_big; \
172 bufp->allocated <<= 1; \
173 if (bufp->allocated > MAX_BUF_SIZE) \
174 bufp->allocated = MAX_BUF_SIZE; \
175 bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated);\
176 if (bufp->buffer == NULL) \
177 goto memory_exhausted; \
178 /* If the buffer moved, move all the pointers into it. */ \
179 if (old_buffer != bufp->buffer) \
180 { \
181 b = (b - old_buffer) + bufp->buffer; \
182 begalt = (begalt - old_buffer) + bufp->buffer; \
183 if (fixup_jump) \
184 fixup_jump = (fixup_jump - old_buffer) + bufp->buffer;\
185 if (laststart) \
186 laststart = (laststart - old_buffer) + bufp->buffer; \
187 if (pending_exact) \
188 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
189 } \
190 } while (0)
191
192 static void store_jump (), insert_jump ();
193
194 char *
195 re_compile_pattern (pattern, size, bufp)
196 char *pattern;
197 int size;
198 struct re_pattern_buffer *bufp;
199 {
200 register char *b = bufp->buffer;
201 register char *p = pattern;
202 char *pend = pattern + size;
203 register unsigned c, c1;
204 char *p1;
205 unsigned char *translate = (unsigned char *) bufp->translate;
206
207 /* address of the count-byte of the most recently inserted "exactn" command.
208 This makes it possible to tell whether a new exact-match character
209 can be added to that command or requires a new "exactn" command. */
210
211 char *pending_exact = 0;
212
213 /* address of the place where a forward-jump should go
214 to the end of the containing expression.
215 Each alternative of an "or", except the last, ends with a forward-jump
216 of this sort. */
217
218 char *fixup_jump = 0;
219
220 /* address of start of the most recently finished expression.
221 This tells postfix * where to find the start of its operand. */
222
223 char *laststart = 0;
224
225 /* In processing a repeat, 1 means zero matches is allowed */
226
227 char zero_times_ok;
228
229 /* In processing a repeat, 1 means many matches is allowed */
230
231 char many_times_ok;
232
233 /* address of beginning of regexp, or inside of last \( */
234
235 char *begalt = b;
236
237 /* Stack of information saved by \( and restored by \).
238 Four stack elements are pushed by each \(:
239 First, the value of b.
240 Second, the value of fixup_jump.
241 Third, the value of regnum.
242 Fourth, the value of begalt. */
243
244 int stackb[40];
245 int *stackp = stackb;
246 int *stacke = stackb + 40;
247 int *stackt;
248
249 /* Counts \('s as they are encountered. Remembered for the matching \),
250 where it becomes the "register number" to put in the stop_memory command */
251
252 int regnum = 1;
253
254 bufp->fastmap_accurate = 0;
255
256 #ifndef emacs
257 #ifndef SYNTAX_TABLE
258 /*
259 * Initialize the syntax table.
260 */
261 init_syntax_once();
262 #endif
263 #endif
264
265 if (bufp->allocated == 0)
266 {
267 bufp->allocated = 28;
268 if (bufp->buffer)
269 /* EXTEND_BUFFER loses when bufp->allocated is 0 */
270 bufp->buffer = (char *) realloc (bufp->buffer, 28);
271 else
272 /* Caller did not allocate a buffer. Do it for him */
273 bufp->buffer = (char *) malloc (28);
274 if (!bufp->buffer) goto memory_exhausted;
275 begalt = b = bufp->buffer;
276 }
277
278 while (p != pend)
279 {
280 if (b - bufp->buffer > bufp->allocated - 10)
281 /* Note that EXTEND_BUFFER clobbers c */
282 EXTEND_BUFFER;
283
284 PATFETCH (c);
285
286 switch (c)
287 {
288 case '$':
289 if (obscure_syntax & RE_TIGHT_VBAR)
290 {
291 if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
292 goto normal_char;
293 /* Make operand of last vbar end before this `$'. */
294 if (fixup_jump)
295 store_jump (fixup_jump, jump, b);
296 fixup_jump = 0;
297 PATPUSH (endline);
298 break;
299 }
300
301 /* $ means succeed if at end of line, but only in special contexts.
302 If randomly in the middle of a pattern, it is a normal character. */
303 if (p == pend || *p == '\n'
304 || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
305 || (obscure_syntax & RE_NO_BK_PARENS
306 ? *p == ')'
307 : *p == '\\' && p[1] == ')')
308 || (obscure_syntax & RE_NO_BK_VBAR
309 ? *p == '|'
310 : *p == '\\' && p[1] == '|'))
311 {
312 PATPUSH (endline);
313 break;
314 }
315 goto normal_char;
316
317 case '^':
318 /* ^ means succeed if at beg of line, but only if no preceding pattern. */
319
320 if (laststart && p[-2] != '\n'
321 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
322 goto normal_char;
323 if (obscure_syntax & RE_TIGHT_VBAR)
324 {
325 if (p != pattern + 1
326 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
327 goto normal_char;
328 PATPUSH (begline);
329 begalt = b;
330 }
331 else
332 PATPUSH (begline);
333 break;
334
335 case '+':
336 case '?':
337 if (obscure_syntax & RE_BK_PLUS_QM)
338 goto normal_char;
339 handle_plus:
340 case '*':
341 /* If there is no previous pattern, char not special. */
342 if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
343 goto normal_char;
344 /* If there is a sequence of repetition chars,
345 collapse it down to equivalent to just one. */
346 zero_times_ok = 0;
347 many_times_ok = 0;
348 while (1)
349 {
350 zero_times_ok |= c != '+';
351 many_times_ok |= c != '?';
352 if (p == pend)
353 break;
354 PATFETCH (c);
355 if (c == '*')
356 ;
357 else if (!(obscure_syntax & RE_BK_PLUS_QM)
358 && (c == '+' || c == '?'))
359 ;
360 else if ((obscure_syntax & RE_BK_PLUS_QM)
361 && c == '\\')
362 {
363 int c1;
364 PATFETCH (c1);
365 if (!(c1 == '+' || c1 == '?'))
366 {
367 PATUNFETCH;
368 PATUNFETCH;
369 break;
370 }
371 c = c1;
372 }
373 else
374 {
375 PATUNFETCH;
376 break;
377 }
378 }
379
380 /* Star, etc. applied to an empty pattern is equivalent
381 to an empty pattern. */
382 if (!laststart)
383 break;
384
385 /* Now we know whether 0 matches is allowed,
386 and whether 2 or more matches is allowed. */
387 if (many_times_ok)
388 {
389 /* If more than one repetition is allowed,
390 put in a backward jump at the end. */
391 store_jump (b, maybe_finalize_jump, laststart - 3);
392 b += 3;
393 }
394 insert_jump (on_failure_jump, laststart, b + 3, b);
395 pending_exact = 0;
396 b += 3;
397 if (!zero_times_ok)
398 {
399 /* At least one repetition required: insert before the loop
400 a skip over the initial on-failure-jump instruction */
401 insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
402 b += 3;
403 }
404 break;
405
406 case '.':
407 laststart = b;
408 PATPUSH (anychar);
409 break;
410
411 case '[':
412 while (b - bufp->buffer
413 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
414 /* Note that EXTEND_BUFFER clobbers c */
415 EXTEND_BUFFER;
416
417 laststart = b;
418 if (*p == '^')
419 PATPUSH (charset_not), p++;
420 else
421 PATPUSH (charset);
422 p1 = p;
423
424 PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
425 /* Clear the whole map */
426 memset (b, '\0', (1 << BYTEWIDTH) / BYTEWIDTH);
427 /* Read in characters and ranges, setting map bits */
428 while (1)
429 {
430 PATFETCH (c);
431 if (c == ']' && p != p1 + 1) break;
432 if (*p == '-' && p[1] != ']')
433 {
434 PATFETCH (c1);
435 PATFETCH (c1);
436 while (c <= c1)
437 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
438 }
439 else
440 {
441 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
442 }
443 }
444 /* Discard any bitmap bytes that are all 0 at the end of the map.
445 Decrement the map-length byte too. */
446 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
447 b[-1]--;
448 b += b[-1];
449 break;
450
451 case '(':
452 if (! (obscure_syntax & RE_NO_BK_PARENS))
453 goto normal_char;
454 else
455 goto handle_open;
456
457 case ')':
458 if (! (obscure_syntax & RE_NO_BK_PARENS))
459 goto normal_char;
460 else
461 goto handle_close;
462
463 case '\n':
464 if (! (obscure_syntax & RE_NEWLINE_OR))
465 goto normal_char;
466 else
467 goto handle_bar;
468
469 case '|':
470 if (! (obscure_syntax & RE_NO_BK_VBAR))
471 goto normal_char;
472 else
473 goto handle_bar;
474
475 case '\\':
476 if (p == pend) goto invalid_pattern;
477 PATFETCH_RAW (c);
478 switch (c)
479 {
480 case '(':
481 if (obscure_syntax & RE_NO_BK_PARENS)
482 goto normal_backsl;
483 handle_open:
484 if (stackp == stacke) goto nesting_too_deep;
485 if (regnum < RE_NREGS)
486 {
487 PATPUSH (start_memory);
488 PATPUSH (regnum);
489 }
490 *stackp++ = b - bufp->buffer;
491 *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
492 *stackp++ = regnum++;
493 *stackp++ = begalt - bufp->buffer;
494 fixup_jump = 0;
495 laststart = 0;
496 begalt = b;
497 break;
498
499 case ')':
500 if (obscure_syntax & RE_NO_BK_PARENS)
501 goto normal_backsl;
502 handle_close:
503 if (stackp == stackb) goto unmatched_close;
504 begalt = *--stackp + bufp->buffer;
505 if (fixup_jump)
506 store_jump (fixup_jump, jump, b);
507 if (stackp[-1] < RE_NREGS)
508 {
509 PATPUSH (stop_memory);
510 PATPUSH (stackp[-1]);
511 }
512 stackp -= 2;
513 fixup_jump = 0;
514 if (*stackp)
515 fixup_jump = *stackp + bufp->buffer - 1;
516 laststart = *--stackp + bufp->buffer;
517 break;
518
519 case '|':
520 if (obscure_syntax & RE_NO_BK_VBAR)
521 goto normal_backsl;
522 handle_bar:
523 insert_jump (on_failure_jump, begalt, b + 6, b);
524 pending_exact = 0;
525 b += 3;
526 if (fixup_jump)
527 store_jump (fixup_jump, jump, b);
528 fixup_jump = b;
529 b += 3;
530 laststart = 0;
531 begalt = b;
532 break;
533
534 #ifdef emacs
535 case '=':
536 PATPUSH (at_dot);
537 break;
538
539 case 's':
540 laststart = b;
541 PATPUSH (syntaxspec);
542 PATFETCH (c);
543 PATPUSH (syntax_spec_code[c]);
544 break;
545
546 case 'S':
547 laststart = b;
548 PATPUSH (notsyntaxspec);
549 PATFETCH (c);
550 PATPUSH (syntax_spec_code[c]);
551 break;
552 #endif /* emacs */
553
554 case 'w':
555 laststart = b;
556 PATPUSH (wordchar);
557 break;
558
559 case 'W':
560 laststart = b;
561 PATPUSH (notwordchar);
562 break;
563
564 case '<':
565 PATPUSH (wordbeg);
566 break;
567
568 case '>':
569 PATPUSH (wordend);
570 break;
571
572 case 'b':
573 PATPUSH (wordbound);
574 break;
575
576 case 'B':
577 PATPUSH (notwordbound);
578 break;
579
580 case '`':
581 PATPUSH (begbuf);
582 break;
583
584 case '\'':
585 PATPUSH (endbuf);
586 break;
587
588 case '1':
589 case '2':
590 case '3':
591 case '4':
592 case '5':
593 case '6':
594 case '7':
595 case '8':
596 case '9':
597 c1 = c - '0';
598 if (c1 >= regnum)
599 goto normal_char;
600 for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
601 if (*stackt == c1)
602 goto normal_char;
603 laststart = b;
604 PATPUSH (duplicate);
605 PATPUSH (c1);
606 break;
607
608 case '+':
609 case '?':
610 if (obscure_syntax & RE_BK_PLUS_QM)
611 goto handle_plus;
612
613 default:
614 normal_backsl:
615 /* You might think it would be useful for \ to mean
616 not to translate; but if we don't translate it
617 it will never match anything. */
618 if (translate) c = translate[c];
619 goto normal_char;
620 }
621 break;
622
623 default:
624 normal_char:
625 if (!pending_exact || pending_exact + *pending_exact + 1 != b
626 || *pending_exact == 0177 || *p == '*' || *p == '^'
627 || ((obscure_syntax & RE_BK_PLUS_QM)
628 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
629 : (*p == '+' || *p == '?')))
630 {
631 laststart = b;
632 PATPUSH (exactn);
633 pending_exact = b;
634 PATPUSH (0);
635 }
636 PATPUSH (c);
637 (*pending_exact)++;
638 }
639 }
640
641 if (fixup_jump)
642 store_jump (fixup_jump, jump, b);
643
644 if (stackp != stackb) goto unmatched_open;
645
646 bufp->used = b - bufp->buffer;
647 return 0;
648
649 invalid_pattern:
650 return "Invalid regular expression";
651
652 unmatched_open:
653 return "Unmatched \\(";
654
655 unmatched_close:
656 return "Unmatched \\)";
657
658 end_of_pattern:
659 return "Premature end of regular expression";
660
661 nesting_too_deep:
662 return "Nesting too deep";
663
664 too_big:
665 return "Regular expression too big";
666
667 memory_exhausted:
668 return "Memory exhausted";
669 }
670
671 /* Store where `from' points a jump operation to jump to where `to' points.
672 `opcode' is the opcode to store. */
673
674 static void
675 store_jump (from, opcode, to)
676 char *from, *to;
677 char opcode;
678 {
679 from[0] = opcode;
680 from[1] = (to - (from + 3)) & 0377;
681 from[2] = (to - (from + 3)) >> 8;
682 }
683
684 /* Open up space at char FROM, and insert there a jump to TO.
685 CURRENT_END gives te end of the storage no in use,
686 so we know how much data to copy up.
687 OP is the opcode of the jump to insert.
688
689 If you call this function, you must zero out pending_exact. */
690
691 static void
692 insert_jump (op, from, to, current_end)
693 char op;
694 char *from, *to, *current_end;
695 {
696 register char *pto = current_end + 3;
697 register char *pfrom = current_end;
698 while (pfrom != from)
699 *--pto = *--pfrom;
700 store_jump (from, op, to);
701 }
702 \f
703 /* Given a pattern, compute a fastmap from it.
704 The fastmap records which of the (1 << BYTEWIDTH) possible characters
705 can start a string that matches the pattern.
706 This fastmap is used by re_search to skip quickly over totally implausible text.
707
708 The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
709 as bufp->fastmap.
710 The other components of bufp describe the pattern to be used. */
711
712 void
713 re_compile_fastmap (bufp)
714 struct re_pattern_buffer *bufp;
715 {
716 unsigned char *pattern = (unsigned char *) bufp->buffer;
717 int size = bufp->used;
718 register char *fastmap = bufp->fastmap;
719 register unsigned char *p = pattern;
720 register unsigned char *pend = pattern + size;
721 register int j;
722 unsigned char *translate = (unsigned char *) bufp->translate;
723
724 unsigned char *stackb[NFAILURES];
725 unsigned char **stackp = stackb;
726
727 memset (fastmap, '\0', (1 << BYTEWIDTH));
728 bufp->fastmap_accurate = 1;
729 bufp->can_be_null = 0;
730
731 while (p)
732 {
733 if (p == pend)
734 {
735 bufp->can_be_null = 1;
736 break;
737 }
738 #ifdef SWITCH_ENUM_BUG
739 switch ((int) ((enum regexpcode) *p++))
740 #else
741 switch ((enum regexpcode) *p++)
742 #endif
743 {
744 case exactn:
745 if (translate)
746 fastmap[translate[p[1]]] = 1;
747 else
748 fastmap[p[1]] = 1;
749 break;
750
751 case begline:
752 case before_dot:
753 case at_dot:
754 case after_dot:
755 case begbuf:
756 case endbuf:
757 case wordbound:
758 case notwordbound:
759 case wordbeg:
760 case wordend:
761 continue;
762
763 case endline:
764 if (translate)
765 fastmap[translate['\n']] = 1;
766 else
767 fastmap['\n'] = 1;
768 if (bufp->can_be_null != 1)
769 bufp->can_be_null = 2;
770 break;
771
772 case finalize_jump:
773 case maybe_finalize_jump:
774 case jump:
775 case dummy_failure_jump:
776 bufp->can_be_null = 1;
777 j = *p++ & 0377;
778 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
779 p += j + 1; /* The 1 compensates for missing ++ above */
780 if (j > 0)
781 continue;
782 /* Jump backward reached implies we just went through
783 the body of a loop and matched nothing.
784 Opcode jumped to should be an on_failure_jump.
785 Just treat it like an ordinary jump.
786 For a * loop, it has pushed its failure point already;
787 if so, discard that as redundant. */
788 if ((enum regexpcode) *p != on_failure_jump)
789 continue;
790 p++;
791 j = *p++ & 0377;
792 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
793 p += j + 1; /* The 1 compensates for missing ++ above */
794 if (stackp != stackb && *stackp == p)
795 stackp--;
796 continue;
797
798 case on_failure_jump:
799 j = *p++ & 0377;
800 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
801 p++;
802 *++stackp = p + j;
803 continue;
804
805 case start_memory:
806 case stop_memory:
807 p++;
808 continue;
809
810 case duplicate:
811 bufp->can_be_null = 1;
812 fastmap['\n'] = 1;
813 case anychar:
814 for (j = 0; j < (1 << BYTEWIDTH); j++)
815 if (j != '\n')
816 fastmap[j] = 1;
817 if (bufp->can_be_null)
818 return;
819 /* Don't return; check the alternative paths
820 so we can set can_be_null if appropriate. */
821 break;
822
823 case wordchar:
824 for (j = 0; j < (1 << BYTEWIDTH); j++)
825 if (SYNTAX (j) == Sword)
826 fastmap[j] = 1;
827 break;
828
829 case notwordchar:
830 for (j = 0; j < (1 << BYTEWIDTH); j++)
831 if (SYNTAX (j) != Sword)
832 fastmap[j] = 1;
833 break;
834
835 #ifdef emacs
836 case syntaxspec:
837 k = *p++;
838 for (j = 0; j < (1 << BYTEWIDTH); j++)
839 if (SYNTAX (j) == (enum syntaxcode) k)
840 fastmap[j] = 1;
841 break;
842
843 case notsyntaxspec:
844 k = *p++;
845 for (j = 0; j < (1 << BYTEWIDTH); j++)
846 if (SYNTAX (j) != (enum syntaxcode) k)
847 fastmap[j] = 1;
848 break;
849 #endif /* emacs */
850
851 case charset:
852 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
853 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
854 {
855 if (translate)
856 fastmap[translate[j]] = 1;
857 else
858 fastmap[j] = 1;
859 }
860 break;
861
862 case charset_not:
863 /* Chars beyond end of map must be allowed */
864 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
865 if (translate)
866 fastmap[translate[j]] = 1;
867 else
868 fastmap[j] = 1;
869
870 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
871 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
872 {
873 if (translate)
874 fastmap[translate[j]] = 1;
875 else
876 fastmap[j] = 1;
877 }
878 break;
879 case unused:
880 case syntaxspec:
881 case notsyntaxspec:
882 default:
883 break;
884 }
885
886 /* Get here means we have successfully found the possible starting characters
887 of one path of the pattern. We need not follow this path any farther.
888 Instead, look at the next alternative remembered in the stack. */
889 if (stackp != stackb)
890 p = *stackp--;
891 else
892 break;
893 }
894 }
895 \f
896 /* Like re_search_2, below, but only one string is specified. */
897
898 int
899 re_search (pbufp, string, size, startpos, range, regs)
900 struct re_pattern_buffer *pbufp;
901 char *string;
902 int size, startpos, range;
903 struct re_registers *regs;
904 {
905 return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
906 }
907
908 /* Like re_match_2 but tries first a match starting at index STARTPOS,
909 then at STARTPOS + 1, and so on.
910 RANGE is the number of places to try before giving up.
911 If RANGE is negative, the starting positions tried are
912 STARTPOS, STARTPOS - 1, etc.
913 It is up to the caller to make sure that range is not so large
914 as to take the starting position outside of the input strings.
915
916 The value returned is the position at which the match was found,
917 or -1 if no match was found,
918 or -2 if error (such as failure stack overflow). */
919
920 int
921 re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
922 struct re_pattern_buffer *pbufp;
923 char *string1, *string2;
924 int size1, size2;
925 int startpos;
926 register int range;
927 struct re_registers *regs;
928 int mstop;
929 {
930 register char *fastmap = pbufp->fastmap;
931 register unsigned char *translate = (unsigned char *) pbufp->translate;
932 int total = size1 + size2;
933 int val;
934
935 /* Update the fastmap now if not correct already */
936 if (fastmap && !pbufp->fastmap_accurate)
937 re_compile_fastmap (pbufp);
938
939 /* Don't waste time in a long search for a pattern
940 that says it is anchored. */
941 if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
942 && range > 0)
943 {
944 if (startpos > 0)
945 return -1;
946 else
947 range = 1;
948 }
949
950 while (1)
951 {
952 /* If a fastmap is supplied, skip quickly over characters
953 that cannot possibly be the start of a match.
954 Note, however, that if the pattern can possibly match
955 the null string, we must test it at each starting point
956 so that we take the first null string we get. */
957
958 if (fastmap && startpos < total && pbufp->can_be_null != 1)
959 {
960 if (range > 0)
961 {
962 register int lim = 0;
963 register unsigned char *p;
964 int irange = range;
965 if (startpos < size1 && startpos + range >= size1)
966 lim = range - (size1 - startpos);
967
968 p = ((unsigned char *)
969 &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
970
971 if (translate)
972 {
973 while (range > lim && !fastmap[translate[*p++]])
974 range--;
975 }
976 else
977 {
978 while (range > lim && !fastmap[*p++])
979 range--;
980 }
981 startpos += irange - range;
982 }
983 else
984 {
985 register unsigned char c;
986 if (startpos >= size1)
987 c = string2[startpos - size1];
988 else
989 c = string1[startpos];
990 c &= 0xff;
991 if (translate ? !fastmap[translate[c]] : !fastmap[c])
992 goto advance;
993 }
994 }
995
996 if (range >= 0 && startpos == total
997 && fastmap && pbufp->can_be_null == 0)
998 return -1;
999
1000 val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop);
1001 if (0 <= val)
1002 {
1003 if (val == -2)
1004 return -2;
1005 return startpos;
1006 }
1007
1008 #ifdef C_ALLOCA
1009 alloca (0);
1010 #endif /* C_ALLOCA */
1011
1012 advance:
1013 if (!range) break;
1014 if (range > 0) range--, startpos++; else range++, startpos--;
1015 }
1016 return -1;
1017 }
1018 \f
1019 #ifndef emacs /* emacs never uses this */
1020 int
1021 re_match (pbufp, string, size, pos, regs)
1022 struct re_pattern_buffer *pbufp;
1023 char *string;
1024 int size, pos;
1025 struct re_registers *regs;
1026 {
1027 return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
1028 }
1029 #endif /* emacs */
1030
1031 /* Maximum size of failure stack. Beyond this, overflow is an error. */
1032
1033 int re_max_failures = 2000;
1034
1035 static int memcmp_translate();
1036 /* Match the pattern described by PBUFP
1037 against data which is the virtual concatenation of STRING1 and STRING2.
1038 SIZE1 and SIZE2 are the sizes of the two data strings.
1039 Start the match at position POS.
1040 Do not consider matching past the position MSTOP.
1041
1042 If pbufp->fastmap is nonzero, then it had better be up to date.
1043
1044 The reason that the data to match are specified as two components
1045 which are to be regarded as concatenated
1046 is so this function can be used directly on the contents of an Emacs buffer.
1047
1048 -1 is returned if there is no match. -2 is returned if there is
1049 an error (such as match stack overflow). Otherwise the value is the length
1050 of the substring which was matched. */
1051
1052 int
1053 re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
1054 struct re_pattern_buffer *pbufp;
1055 unsigned char *string1, *string2;
1056 int size1, size2;
1057 int pos;
1058 struct re_registers *regs;
1059 int mstop;
1060 {
1061 register unsigned char *p = (unsigned char *) pbufp->buffer;
1062 register unsigned char *pend = p + pbufp->used;
1063 /* End of first string */
1064 unsigned char *end1;
1065 /* End of second string */
1066 unsigned char *end2;
1067 /* Pointer just past last char to consider matching */
1068 unsigned char *end_match_1, *end_match_2;
1069 register unsigned char *d, *dend;
1070 register int mcnt;
1071 unsigned char *translate = (unsigned char *) pbufp->translate;
1072
1073 /* Failure point stack. Each place that can handle a failure further down the line
1074 pushes a failure point on this stack. It consists of two char *'s.
1075 The first one pushed is where to resume scanning the pattern;
1076 the second pushed is where to resume scanning the strings.
1077 If the latter is zero, the failure point is a "dummy".
1078 If a failure happens and the innermost failure point is dormant,
1079 it discards that failure point and tries the next one. */
1080
1081 unsigned char *initial_stack[2 * NFAILURES];
1082 unsigned char **stackb = initial_stack;
1083 unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
1084
1085 /* Information on the "contents" of registers.
1086 These are pointers into the input strings; they record
1087 just what was matched (on this attempt) by some part of the pattern.
1088 The start_memory command stores the start of a register's contents
1089 and the stop_memory command stores the end.
1090
1091 At that point, regstart[regnum] points to the first character in the register,
1092 regend[regnum] points to the first character beyond the end of the register,
1093 regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
1094 and regend_seg1[regnum] is true iff regend[regnum] points into string1. */
1095
1096 unsigned char *regstart[RE_NREGS];
1097 unsigned char *regend[RE_NREGS];
1098 unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
1099
1100 /* Set up pointers to ends of strings.
1101 Don't allow the second string to be empty unless both are empty. */
1102 if (!size2)
1103 {
1104 string2 = string1;
1105 size2 = size1;
1106 string1 = 0;
1107 size1 = 0;
1108 }
1109 end1 = string1 + size1;
1110 end2 = string2 + size2;
1111
1112 /* Compute where to stop matching, within the two strings */
1113 if (mstop <= size1)
1114 {
1115 end_match_1 = string1 + mstop;
1116 end_match_2 = string2;
1117 }
1118 else
1119 {
1120 end_match_1 = end1;
1121 end_match_2 = string2 + mstop - size1;
1122 }
1123
1124 /* Initialize \) text positions to -1
1125 to mark ones that no \( or \) has been seen for. */
1126
1127 for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++)
1128 regend[mcnt] = (unsigned char *) -1;
1129
1130 /* `p' scans through the pattern as `d' scans through the data.
1131 `dend' is the end of the input string that `d' points within.
1132 `d' is advanced into the following input string whenever necessary,
1133 but this happens before fetching;
1134 therefore, at the beginning of the loop,
1135 `d' can be pointing at the end of a string,
1136 but it cannot equal string2. */
1137
1138 if (pos <= size1)
1139 d = string1 + pos, dend = end_match_1;
1140 else
1141 d = string2 + pos - size1, dend = end_match_2;
1142
1143 /* Write PREFETCH; just before fetching a character with *d. */
1144 #define PREFETCH \
1145 while (d == dend) \
1146 { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
1147 d = string2; /* end of string1 => advance to string2. */ \
1148 dend = end_match_2; }
1149
1150 /* This loop loops over pattern commands.
1151 It exits by returning from the function if match is complete,
1152 or it drops through if match fails at this starting point in the input data. */
1153
1154 while (1)
1155 {
1156 if (p == pend)
1157 /* End of pattern means we have succeeded! */
1158 {
1159 /* If caller wants register contents data back, convert it to indices */
1160 if (regs)
1161 {
1162 regs->start[0] = pos;
1163 if (dend == end_match_1)
1164 regs->end[0] = d - string1;
1165 else
1166 regs->end[0] = d - string2 + size1;
1167 for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
1168 {
1169 if (regend[mcnt] == (unsigned char *) -1)
1170 {
1171 regs->start[mcnt] = -1;
1172 regs->end[mcnt] = -1;
1173 continue;
1174 }
1175 if (regstart_seg1[mcnt])
1176 regs->start[mcnt] = regstart[mcnt] - string1;
1177 else
1178 regs->start[mcnt] = regstart[mcnt] - string2 + size1;
1179 if (regend_seg1[mcnt])
1180 regs->end[mcnt] = regend[mcnt] - string1;
1181 else
1182 regs->end[mcnt] = regend[mcnt] - string2 + size1;
1183 }
1184 }
1185 if (dend == end_match_1)
1186 return (d - string1 - pos);
1187 else
1188 return d - string2 + size1 - pos;
1189 }
1190
1191 /* Otherwise match next pattern command */
1192 #ifdef SWITCH_ENUM_BUG
1193 switch ((int) ((enum regexpcode) *p++))
1194 #else
1195 switch ((enum regexpcode) *p++)
1196 #endif
1197 {
1198
1199 /* \( is represented by a start_memory, \) by a stop_memory.
1200 Both of those commands contain a "register number" argument.
1201 The text matched within the \( and \) is recorded under that number.
1202 Then, \<digit> turns into a `duplicate' command which
1203 is followed by the numeric value of <digit> as the register number. */
1204
1205 case start_memory:
1206 regstart[*p] = d;
1207 regstart_seg1[*p++] = (dend == end_match_1);
1208 break;
1209
1210 case stop_memory:
1211 regend[*p] = d;
1212 regend_seg1[*p++] = (dend == end_match_1);
1213 break;
1214
1215 case duplicate:
1216 {
1217 int regno = *p++; /* Get which register to match against */
1218 register unsigned char *d2, *dend2;
1219
1220 d2 = regstart[regno];
1221 dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
1222 ? regend[regno] : end_match_1);
1223 while (1)
1224 {
1225 /* Advance to next segment in register contents, if necessary */
1226 while (d2 == dend2)
1227 {
1228 if (dend2 == end_match_2) break;
1229 if (dend2 == regend[regno]) break;
1230 d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
1231 }
1232 /* At end of register contents => success */
1233 if (d2 == dend2) break;
1234
1235 /* Advance to next segment in data being matched, if necessary */
1236 PREFETCH;
1237
1238 /* mcnt gets # consecutive chars to compare */
1239 mcnt = dend - d;
1240 if (mcnt > dend2 - d2)
1241 mcnt = dend2 - d2;
1242 /* Compare that many; failure if mismatch, else skip them. */
1243 if (translate ? memcmp_translate (d, d2, mcnt, translate) : memcmp (d, d2, mcnt))
1244 goto fail;
1245 d += mcnt, d2 += mcnt;
1246 }
1247 }
1248 break;
1249
1250 case anychar:
1251 /* fetch a data character */
1252 PREFETCH;
1253 /* Match anything but a newline. */
1254 if ((translate ? translate[*d++] : *d++) == '\n')
1255 goto fail;
1256 break;
1257
1258 case charset:
1259 case charset_not:
1260 {
1261 /* Nonzero for charset_not */
1262 int not = 0;
1263 register int c;
1264 if (*(p - 1) == (unsigned char) charset_not)
1265 not = 1;
1266
1267 /* fetch a data character */
1268 PREFETCH;
1269
1270 if (translate)
1271 c = translate [*d];
1272 else
1273 c = *d;
1274
1275 if (c < *p * BYTEWIDTH
1276 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1277 not = !not;
1278
1279 p += 1 + *p;
1280
1281 if (!not) goto fail;
1282 d++;
1283 break;
1284 }
1285
1286 case begline:
1287 if (d == string1 || d[-1] == '\n')
1288 break;
1289 goto fail;
1290
1291 case endline:
1292 if (d == end2
1293 || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
1294 break;
1295 goto fail;
1296
1297 /* "or" constructs ("|") are handled by starting each alternative
1298 with an on_failure_jump that points to the start of the next alternative.
1299 Each alternative except the last ends with a jump to the joining point.
1300 (Actually, each jump except for the last one really jumps
1301 to the following jump, because tensioning the jumps is a hassle.) */
1302
1303 /* The start of a stupid repeat has an on_failure_jump that points
1304 past the end of the repeat text.
1305 This makes a failure point so that, on failure to match a repetition,
1306 matching restarts past as many repetitions have been found
1307 with no way to fail and look for another one. */
1308
1309 /* A smart repeat is similar but loops back to the on_failure_jump
1310 so that each repetition makes another failure point. */
1311
1312 case on_failure_jump:
1313 if (stackp == stacke)
1314 {
1315 unsigned char **stackx;
1316 if (stacke - stackb > re_max_failures * 2)
1317 return -2;
1318 stackx = (unsigned char **) alloca (2 * (stacke - stackb)
1319 * sizeof (char *));
1320 memcpy (stackx, stackb, (stacke - stackb) * sizeof (char *));
1321 stackp = stackx + (stackp - stackb);
1322 stacke = stackx + 2 * (stacke - stackb);
1323 stackb = stackx;
1324 }
1325 mcnt = *p++ & 0377;
1326 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1327 p++;
1328 *stackp++ = mcnt + p;
1329 *stackp++ = d;
1330 break;
1331
1332 /* The end of a smart repeat has an maybe_finalize_jump back.
1333 Change it either to a finalize_jump or an ordinary jump. */
1334
1335 case maybe_finalize_jump:
1336 mcnt = *p++ & 0377;
1337 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1338 p++;
1339 {
1340 register unsigned char *p2 = p;
1341 /* Compare what follows with the begining of the repeat.
1342 If we can establish that there is nothing that they would
1343 both match, we can change to finalize_jump */
1344 while (p2 != pend
1345 && (*p2 == (unsigned char) stop_memory
1346 || *p2 == (unsigned char) start_memory))
1347 p2++;
1348 if (p2 == pend)
1349 p[-3] = (unsigned char) finalize_jump;
1350 else if (*p2 == (unsigned char) exactn
1351 || *p2 == (unsigned char) endline)
1352 {
1353 register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
1354 register unsigned char *p1 = p + mcnt;
1355 /* p1[0] ... p1[2] are an on_failure_jump.
1356 Examine what follows that */
1357 if (p1[3] == (unsigned char) exactn && p1[5] != c)
1358 p[-3] = (unsigned char) finalize_jump;
1359 else if (p1[3] == (unsigned char) charset
1360 || p1[3] == (unsigned char) charset_not)
1361 {
1362 int not = p1[3] == (unsigned char) charset_not;
1363 if (c < p1[4] * BYTEWIDTH
1364 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1365 not = !not;
1366 /* not is 1 if c would match */
1367 /* That means it is not safe to finalize */
1368 if (!not)
1369 p[-3] = (unsigned char) finalize_jump;
1370 }
1371 }
1372 }
1373 p -= 2;
1374 if (p[-1] != (unsigned char) finalize_jump)
1375 {
1376 p[-1] = (unsigned char) jump;
1377 goto nofinalize;
1378 }
1379
1380 /* The end of a stupid repeat has a finalize-jump
1381 back to the start, where another failure point will be made
1382 which will point after all the repetitions found so far. */
1383
1384 case finalize_jump:
1385 stackp -= 2;
1386
1387 case jump:
1388 nofinalize:
1389 mcnt = *p++ & 0377;
1390 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1391 p += mcnt + 1; /* The 1 compensates for missing ++ above */
1392 break;
1393
1394 case dummy_failure_jump:
1395 if (stackp == stacke)
1396 {
1397 unsigned char **stackx
1398 = (unsigned char **) alloca (2 * (stacke - stackb)
1399 * sizeof (char *));
1400 memcpy (stackx, stackb, (stacke - stackb) * sizeof (char *));
1401 stackp = stackx + (stackp - stackb);
1402 stacke = stackx + 2 * (stacke - stackb);
1403 stackb = stackx;
1404 }
1405 *stackp++ = 0;
1406 *stackp++ = 0;
1407 goto nofinalize;
1408
1409 case wordbound:
1410 if (d == string1 /* Points to first char */
1411 || d == end2 /* Points to end */
1412 || (d == end1 && size2 == 0)) /* Points to end */
1413 break;
1414 if ((SYNTAX (d[-1]) == Sword)
1415 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1416 break;
1417 goto fail;
1418
1419 case notwordbound:
1420 if (d == string1 /* Points to first char */
1421 || d == end2 /* Points to end */
1422 || (d == end1 && size2 == 0)) /* Points to end */
1423 goto fail;
1424 if ((SYNTAX (d[-1]) == Sword)
1425 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1426 goto fail;
1427 break;
1428
1429 case wordbeg:
1430 if (d == end2 /* Points to end */
1431 || (d == end1 && size2 == 0) /* Points to end */
1432 || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
1433 goto fail;
1434 if (d == string1 /* Points to first char */
1435 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1436 break;
1437 goto fail;
1438
1439 case wordend:
1440 if (d == string1 /* Points to first char */
1441 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1442 goto fail;
1443 if (d == end2 /* Points to end */
1444 || (d == end1 && size2 == 0) /* Points to end */
1445 || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
1446 break;
1447 goto fail;
1448
1449 #ifdef emacs
1450 case before_dot:
1451 if (((d - string2 <= (unsigned) size2)
1452 ? d - bf_p2 : d - bf_p1)
1453 <= point)
1454 goto fail;
1455 break;
1456
1457 case at_dot:
1458 if (((d - string2 <= (unsigned) size2)
1459 ? d - bf_p2 : d - bf_p1)
1460 == point)
1461 goto fail;
1462 break;
1463
1464 case after_dot:
1465 if (((d - string2 <= (unsigned) size2)
1466 ? d - bf_p2 : d - bf_p1)
1467 >= point)
1468 goto fail;
1469 break;
1470
1471 case wordchar:
1472 mcnt = (int) Sword;
1473 goto matchsyntax;
1474
1475 case syntaxspec:
1476 mcnt = *p++;
1477 matchsyntax:
1478 PREFETCH;
1479 if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
1480 break;
1481
1482 case notwordchar:
1483 mcnt = (int) Sword;
1484 goto matchnotsyntax;
1485
1486 case notsyntaxspec:
1487 mcnt = *p++;
1488 matchnotsyntax:
1489 PREFETCH;
1490 if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
1491 break;
1492 #else
1493 case wordchar:
1494 PREFETCH;
1495 if (SYNTAX (*d++) == 0) goto fail;
1496 break;
1497
1498 case notwordchar:
1499 PREFETCH;
1500 if (SYNTAX (*d++) != 0) goto fail;
1501 break;
1502 #endif /* not emacs */
1503
1504 case begbuf:
1505 if (d == string1) /* Note, d cannot equal string2 */
1506 break; /* unless string1 == string2. */
1507 goto fail;
1508
1509 case endbuf:
1510 if (d == end2 || (d == end1 && size2 == 0))
1511 break;
1512 goto fail;
1513
1514 case exactn:
1515 /* Match the next few pattern characters exactly.
1516 mcnt is how many characters to match. */
1517 mcnt = *p++;
1518 if (translate)
1519 {
1520 do
1521 {
1522 PREFETCH;
1523 if (translate[*d++] != *p++) goto fail;
1524 }
1525 while (--mcnt);
1526 }
1527 else
1528 {
1529 do
1530 {
1531 PREFETCH;
1532 if (*d++ != *p++) goto fail;
1533 }
1534 while (--mcnt);
1535 }
1536 break;
1537 case unused:
1538 case before_dot:
1539 case at_dot:
1540 case after_dot:
1541 case syntaxspec:
1542 case notsyntaxspec:
1543 default:
1544 break;
1545 }
1546 continue; /* Successfully matched one pattern command; keep matching */
1547
1548 /* Jump here if any matching operation fails. */
1549 fail:
1550 if (stackp != stackb)
1551 /* A restart point is known. Restart there and pop it. */
1552 {
1553 if (!stackp[-2])
1554 { /* If innermost failure point is dormant, flush it and keep looking */
1555 stackp -= 2;
1556 goto fail;
1557 }
1558 d = *--stackp;
1559 p = *--stackp;
1560 if (d >= string1 && d <= end1)
1561 dend = end_match_1;
1562 }
1563 else break; /* Matching at this starting point really fails! */
1564 }
1565 return -1; /* Failure to match */
1566 }
1567
1568 static int
1569 memcmp_translate (s1, s2, len, translate)
1570 unsigned char *s1, *s2;
1571 register int len;
1572 unsigned char *translate;
1573 {
1574 register unsigned char *p1 = s1, *p2 = s2;
1575 while (len)
1576 {
1577 if (translate [*p1++] != translate [*p2++]) return 1;
1578 len--;
1579 }
1580 return 0;
1581 }
1582 \f
1583 /* Entry points compatible with bsd4.2 regex library */
1584
1585 #ifndef emacs
1586
1587 static struct re_pattern_buffer re_comp_buf;
1588
1589 char *
1590 re_comp (s)
1591 const char *s;
1592 {
1593 if (!s)
1594 {
1595 if (!re_comp_buf.buffer)
1596 return "No previous regular expression";
1597 return 0;
1598 }
1599
1600 if (!re_comp_buf.buffer)
1601 {
1602 if (!(re_comp_buf.buffer = (char *) malloc (200)))
1603 return "Memory exhausted";
1604 re_comp_buf.allocated = 200;
1605 if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
1606 return "Memory exhausted";
1607 }
1608 return re_compile_pattern (s, strlen (s), &re_comp_buf);
1609 }
1610
1611 int
1612 re_exec (s)
1613 char *s;
1614 {
1615 int len = strlen (s);
1616 return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
1617 }
1618
1619 #endif /* emacs */
1620 \f
1621 #ifdef test
1622
1623 #include <stdio.h>
1624
1625 /* Indexed by a character, gives the upper case equivalent of the character */
1626
1627 static char upcase[0400] =
1628 { 000, 001, 002, 003, 004, 005, 006, 007,
1629 010, 011, 012, 013, 014, 015, 016, 017,
1630 020, 021, 022, 023, 024, 025, 026, 027,
1631 030, 031, 032, 033, 034, 035, 036, 037,
1632 040, 041, 042, 043, 044, 045, 046, 047,
1633 050, 051, 052, 053, 054, 055, 056, 057,
1634 060, 061, 062, 063, 064, 065, 066, 067,
1635 070, 071, 072, 073, 074, 075, 076, 077,
1636 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1637 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1638 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1639 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
1640 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1641 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1642 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1643 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
1644 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
1645 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
1646 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
1647 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
1648 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
1649 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1650 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
1651 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
1652 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
1653 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
1654 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
1655 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1656 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
1657 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
1658 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
1659 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
1660 };
1661
1662 main (argc, argv)
1663 int argc;
1664 char **argv;
1665 {
1666 char pat[80];
1667 struct re_pattern_buffer buf;
1668 int i;
1669 char c;
1670 char fastmap[(1 << BYTEWIDTH)];
1671
1672 /* Allow a command argument to specify the style of syntax. */
1673 if (argc > 1)
1674 obscure_syntax = atoi (argv[1]);
1675
1676 buf.allocated = 40;
1677 buf.buffer = (char *) malloc (buf.allocated);
1678 buf.fastmap = fastmap;
1679 buf.translate = upcase;
1680
1681 while (1)
1682 {
1683 gets (pat);
1684
1685 if (*pat)
1686 {
1687 re_compile_pattern (pat, strlen(pat), &buf);
1688
1689 for (i = 0; i < buf.used; i++)
1690 printchar (buf.buffer[i]);
1691
1692 putchar_unfiltered ('\n');
1693
1694 printf_unfiltered ("%d allocated, %d used.\n", buf.allocated, buf.used);
1695
1696 re_compile_fastmap (&buf);
1697 printf_unfiltered ("Allowed by fastmap: ");
1698 for (i = 0; i < (1 << BYTEWIDTH); i++)
1699 if (fastmap[i]) printchar (i);
1700 putchar_unfiltered ('\n');
1701 }
1702
1703 gets (pat); /* Now read the string to match against */
1704
1705 i = re_match (&buf, pat, strlen (pat), 0, 0);
1706 printf_unfiltered ("Match value %d.\n", i);
1707 }
1708 }
1709
1710 #ifdef NOTDEF
1711 print_buf (bufp)
1712 struct re_pattern_buffer *bufp;
1713 {
1714 int i;
1715
1716 printf_unfiltered ("buf is :\n----------------\n");
1717 for (i = 0; i < bufp->used; i++)
1718 printchar (bufp->buffer[i]);
1719
1720 printf_unfiltered ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
1721
1722 printf_unfiltered ("Allowed by fastmap: ");
1723 for (i = 0; i < (1 << BYTEWIDTH); i++)
1724 if (bufp->fastmap[i])
1725 printchar (i);
1726 printf_unfiltered ("\nAllowed by translate: ");
1727 if (bufp->translate)
1728 for (i = 0; i < (1 << BYTEWIDTH); i++)
1729 if (bufp->translate[i])
1730 printchar (i);
1731 printf_unfiltered ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
1732 printf_unfiltered ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
1733 }
1734 #endif
1735
1736 printchar (c)
1737 char c;
1738 {
1739 if (c < 041 || c >= 0177)
1740 {
1741 putchar_unfiltered ('\\');
1742 putchar_unfiltered (((c >> 6) & 3) + '0');
1743 putchar_unfiltered (((c >> 3) & 7) + '0');
1744 putchar_unfiltered ((c & 7) + '0');
1745 }
1746 else
1747 putchar_unfiltered (c);
1748 }
1749
1750 error (string)
1751 char *string;
1752 {
1753 puts_unfiltered (string);
1754 exit (1);
1755 }
1756
1757 #endif /* test */
This page took 0.168285 seconds and 4 git commands to generate.