* stabs.texinfo: Document the format for C++ nested types.
[deliverable/binutils-gdb.git] / gdb / regex.c
CommitLineData
dd3b648e
RP
1/* Extended regular expression matching and search library.
2 Copyright (C) 1985, 1989 Free Software Foundation, Inc.
3
99a7de40
JG
4This program is free software; you can redistribute it and/or modify
5it under the terms of the GNU General Public License as published by
6the Free Software Foundation; either version 2 of the License, or
7(at your option) any later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
dd3b648e
RP
17
18/* To test, compile with -Dtest.
19 This Dtestable feature turns this into a self-contained program
20 which reads a pattern, describes how it compiles,
21 then reads a string and searches for it. */
22
23#ifdef emacs
24
25/* The `emacs' switch turns on certain special matching commands
26 that make sense only in emacs. */
27
28#include "config.h"
29#include "lisp.h"
30#include "buffer.h"
31#include "syntax.h"
32
33#else /* not emacs */
34
35#ifdef USG
36#ifndef BSTRING
37#define bcopy(s,d,n) memcpy((d),(s),(n))
38#define bcmp(s1,s2,n) memcmp((s1),(s2),(n))
39#define bzero(s,n) memset((s),0,(n))
40#endif
41#endif
42
43/* Make alloca work the best possible way. */
44#ifdef __GNUC__
45#define alloca __builtin_alloca
46#else
47#ifdef sparc
48#include <alloca.h>
49#endif
50#endif
51
52/*
53 * Define the syntax stuff, so we can do the \<...\> things.
54 */
55
56#ifndef Sword /* must be non-zero in some of the tests below... */
57#define Sword 1
58#endif
59
60#define SYNTAX(c) re_syntax_table[c]
61
62#ifdef SYNTAX_TABLE
63
64char *re_syntax_table;
65
66#else
67
68static char re_syntax_table[256];
69
70static void
71init_syntax_once ()
72{
73 register int c;
74 static int done = 0;
75
76 if (done)
77 return;
78
79 bzero (re_syntax_table, sizeof re_syntax_table);
80
81 for (c = 'a'; c <= 'z'; c++)
82 re_syntax_table[c] = Sword;
83
84 for (c = 'A'; c <= 'Z'; c++)
85 re_syntax_table[c] = Sword;
86
87 for (c = '0'; c <= '9'; c++)
88 re_syntax_table[c] = Sword;
89
90 done = 1;
91}
92
93#endif /* SYNTAX_TABLE */
94#endif /* not emacs */
95
96#include "regex.h"
97
98/* Number of failure points to allocate space for initially,
99 when matching. If this number is exceeded, more space is allocated,
100 so it is not a hard limit. */
101
102#ifndef NFAILURES
103#define NFAILURES 80
104#endif /* NFAILURES */
105
106/* width of a byte in bits */
107
108#define BYTEWIDTH 8
109
110#ifndef SIGN_EXTEND_CHAR
111#define SIGN_EXTEND_CHAR(x) (x)
112#endif
113\f
114static int obscure_syntax = 0;
115
116/* Specify the precise syntax of regexp for compilation.
117 This provides for compatibility for various utilities
118 which historically have different, incompatible syntaxes.
119
120 The argument SYNTAX is a bit-mask containing the two bits
121 RE_NO_BK_PARENS and RE_NO_BK_VBAR. */
122
123int
124re_set_syntax (syntax)
51b57ded 125 int syntax;
dd3b648e
RP
126{
127 int ret;
128
129 ret = obscure_syntax;
130 obscure_syntax = syntax;
131 return ret;
132}
133\f
134/* re_compile_pattern takes a regular-expression string
135 and converts it into a buffer full of byte commands for matching.
136
137 PATTERN is the address of the pattern string
138 SIZE is the length of it.
139 BUFP is a struct re_pattern_buffer * which points to the info
140 on where to store the byte commands.
141 This structure contains a char * which points to the
142 actual space, which should have been obtained with malloc.
143 re_compile_pattern may use realloc to grow the buffer space.
144
145 The number of bytes of commands can be found out by looking in
146 the struct re_pattern_buffer that bufp pointed to,
147 after re_compile_pattern returns.
148*/
149
150#define PATPUSH(ch) (*b++ = (char) (ch))
151
152#define PATFETCH(c) \
153 {if (p == pend) goto end_of_pattern; \
154 c = * (unsigned char *) p++; \
155 if (translate) c = translate[c]; }
156
157#define PATFETCH_RAW(c) \
158 {if (p == pend) goto end_of_pattern; \
159 c = * (unsigned char *) p++; }
160
161#define PATUNFETCH p--
162
163#define EXTEND_BUFFER \
164 { char *old_buffer = bufp->buffer; \
165 if (bufp->allocated == (1<<16)) goto too_big; \
166 bufp->allocated *= 2; \
167 if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
168 if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
169 goto memory_exhausted; \
170 c = bufp->buffer - old_buffer; \
171 b += c; \
172 if (fixup_jump) \
173 fixup_jump += c; \
174 if (laststart) \
175 laststart += c; \
176 begalt += c; \
177 if (pending_exact) \
178 pending_exact += c; \
179 }
180
50e0dc41 181static void store_jump (), insert_jump ();
dd3b648e
RP
182
183char *
184re_compile_pattern (pattern, size, bufp)
185 char *pattern;
186 int size;
187 struct re_pattern_buffer *bufp;
188{
189 register char *b = bufp->buffer;
190 register char *p = pattern;
191 char *pend = pattern + size;
192 register unsigned c, c1;
193 char *p1;
194 unsigned char *translate = (unsigned char *) bufp->translate;
195
196 /* address of the count-byte of the most recently inserted "exactn" command.
197 This makes it possible to tell whether a new exact-match character
198 can be added to that command or requires a new "exactn" command. */
199
200 char *pending_exact = 0;
201
202 /* address of the place where a forward-jump should go
203 to the end of the containing expression.
204 Each alternative of an "or", except the last, ends with a forward-jump
205 of this sort. */
206
207 char *fixup_jump = 0;
208
209 /* address of start of the most recently finished expression.
210 This tells postfix * where to find the start of its operand. */
211
212 char *laststart = 0;
213
214 /* In processing a repeat, 1 means zero matches is allowed */
215
216 char zero_times_ok;
217
218 /* In processing a repeat, 1 means many matches is allowed */
219
220 char many_times_ok;
221
222 /* address of beginning of regexp, or inside of last \( */
223
224 char *begalt = b;
225
226 /* Stack of information saved by \( and restored by \).
227 Four stack elements are pushed by each \(:
228 First, the value of b.
229 Second, the value of fixup_jump.
230 Third, the value of regnum.
231 Fourth, the value of begalt. */
232
233 int stackb[40];
234 int *stackp = stackb;
235 int *stacke = stackb + 40;
236 int *stackt;
237
238 /* Counts \('s as they are encountered. Remembered for the matching \),
239 where it becomes the "register number" to put in the stop_memory command */
240
241 int regnum = 1;
242
243 bufp->fastmap_accurate = 0;
244
245#ifndef emacs
246#ifndef SYNTAX_TABLE
247 /*
248 * Initialize the syntax table.
249 */
250 init_syntax_once();
251#endif
252#endif
253
254 if (bufp->allocated == 0)
255 {
256 bufp->allocated = 28;
257 if (bufp->buffer)
258 /* EXTEND_BUFFER loses when bufp->allocated is 0 */
259 bufp->buffer = (char *) realloc (bufp->buffer, 28);
260 else
261 /* Caller did not allocate a buffer. Do it for him */
262 bufp->buffer = (char *) malloc (28);
263 if (!bufp->buffer) goto memory_exhausted;
264 begalt = b = bufp->buffer;
265 }
266
267 while (p != pend)
268 {
269 if (b - bufp->buffer > bufp->allocated - 10)
270 /* Note that EXTEND_BUFFER clobbers c */
271 EXTEND_BUFFER;
272
273 PATFETCH (c);
274
275 switch (c)
276 {
277 case '$':
278 if (obscure_syntax & RE_TIGHT_VBAR)
279 {
280 if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
281 goto normal_char;
282 /* Make operand of last vbar end before this `$'. */
283 if (fixup_jump)
284 store_jump (fixup_jump, jump, b);
285 fixup_jump = 0;
286 PATPUSH (endline);
287 break;
288 }
289
290 /* $ means succeed if at end of line, but only in special contexts.
291 If randomly in the middle of a pattern, it is a normal character. */
292 if (p == pend || *p == '\n'
293 || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
294 || (obscure_syntax & RE_NO_BK_PARENS
295 ? *p == ')'
296 : *p == '\\' && p[1] == ')')
297 || (obscure_syntax & RE_NO_BK_VBAR
298 ? *p == '|'
299 : *p == '\\' && p[1] == '|'))
300 {
301 PATPUSH (endline);
302 break;
303 }
304 goto normal_char;
305
306 case '^':
307 /* ^ means succeed if at beg of line, but only if no preceding pattern. */
308
309 if (laststart && p[-2] != '\n'
310 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
311 goto normal_char;
312 if (obscure_syntax & RE_TIGHT_VBAR)
313 {
314 if (p != pattern + 1
315 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
316 goto normal_char;
317 PATPUSH (begline);
318 begalt = b;
319 }
320 else
321 PATPUSH (begline);
322 break;
323
324 case '+':
325 case '?':
326 if (obscure_syntax & RE_BK_PLUS_QM)
327 goto normal_char;
328 handle_plus:
329 case '*':
330 /* If there is no previous pattern, char not special. */
331 if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
332 goto normal_char;
333 /* If there is a sequence of repetition chars,
334 collapse it down to equivalent to just one. */
335 zero_times_ok = 0;
336 many_times_ok = 0;
337 while (1)
338 {
339 zero_times_ok |= c != '+';
340 many_times_ok |= c != '?';
341 if (p == pend)
342 break;
343 PATFETCH (c);
344 if (c == '*')
345 ;
346 else if (!(obscure_syntax & RE_BK_PLUS_QM)
347 && (c == '+' || c == '?'))
348 ;
349 else if ((obscure_syntax & RE_BK_PLUS_QM)
350 && c == '\\')
351 {
352 int c1;
353 PATFETCH (c1);
354 if (!(c1 == '+' || c1 == '?'))
355 {
356 PATUNFETCH;
357 PATUNFETCH;
358 break;
359 }
360 c = c1;
361 }
362 else
363 {
364 PATUNFETCH;
365 break;
366 }
367 }
368
369 /* Star, etc. applied to an empty pattern is equivalent
370 to an empty pattern. */
371 if (!laststart)
372 break;
373
374 /* Now we know whether 0 matches is allowed,
375 and whether 2 or more matches is allowed. */
376 if (many_times_ok)
377 {
378 /* If more than one repetition is allowed,
379 put in a backward jump at the end. */
380 store_jump (b, maybe_finalize_jump, laststart - 3);
381 b += 3;
382 }
383 insert_jump (on_failure_jump, laststart, b + 3, b);
384 pending_exact = 0;
385 b += 3;
386 if (!zero_times_ok)
387 {
388 /* At least one repetition required: insert before the loop
389 a skip over the initial on-failure-jump instruction */
390 insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
391 b += 3;
392 }
393 break;
394
395 case '.':
396 laststart = b;
397 PATPUSH (anychar);
398 break;
399
400 case '[':
401 while (b - bufp->buffer
402 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
403 /* Note that EXTEND_BUFFER clobbers c */
404 EXTEND_BUFFER;
405
406 laststart = b;
407 if (*p == '^')
408 PATPUSH (charset_not), p++;
409 else
410 PATPUSH (charset);
411 p1 = p;
412
413 PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
414 /* Clear the whole map */
415 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
416 /* Read in characters and ranges, setting map bits */
417 while (1)
418 {
419 PATFETCH (c);
420 if (c == ']' && p != p1 + 1) break;
421 if (*p == '-' && p[1] != ']')
422 {
423 PATFETCH (c1);
424 PATFETCH (c1);
425 while (c <= c1)
426 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
427 }
428 else
429 {
430 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
431 }
432 }
433 /* Discard any bitmap bytes that are all 0 at the end of the map.
434 Decrement the map-length byte too. */
435 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
436 b[-1]--;
437 b += b[-1];
438 break;
439
440 case '(':
441 if (! (obscure_syntax & RE_NO_BK_PARENS))
442 goto normal_char;
443 else
444 goto handle_open;
445
446 case ')':
447 if (! (obscure_syntax & RE_NO_BK_PARENS))
448 goto normal_char;
449 else
450 goto handle_close;
451
452 case '\n':
453 if (! (obscure_syntax & RE_NEWLINE_OR))
454 goto normal_char;
455 else
456 goto handle_bar;
457
458 case '|':
459 if (! (obscure_syntax & RE_NO_BK_VBAR))
460 goto normal_char;
461 else
462 goto handle_bar;
463
464 case '\\':
465 if (p == pend) goto invalid_pattern;
466 PATFETCH_RAW (c);
467 switch (c)
468 {
469 case '(':
470 if (obscure_syntax & RE_NO_BK_PARENS)
471 goto normal_backsl;
472 handle_open:
473 if (stackp == stacke) goto nesting_too_deep;
474 if (regnum < RE_NREGS)
475 {
476 PATPUSH (start_memory);
477 PATPUSH (regnum);
478 }
479 *stackp++ = b - bufp->buffer;
480 *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
481 *stackp++ = regnum++;
482 *stackp++ = begalt - bufp->buffer;
483 fixup_jump = 0;
484 laststart = 0;
485 begalt = b;
486 break;
487
488 case ')':
489 if (obscure_syntax & RE_NO_BK_PARENS)
490 goto normal_backsl;
491 handle_close:
492 if (stackp == stackb) goto unmatched_close;
493 begalt = *--stackp + bufp->buffer;
494 if (fixup_jump)
495 store_jump (fixup_jump, jump, b);
496 if (stackp[-1] < RE_NREGS)
497 {
498 PATPUSH (stop_memory);
499 PATPUSH (stackp[-1]);
500 }
501 stackp -= 2;
502 fixup_jump = 0;
503 if (*stackp)
504 fixup_jump = *stackp + bufp->buffer - 1;
505 laststart = *--stackp + bufp->buffer;
506 break;
507
508 case '|':
509 if (obscure_syntax & RE_NO_BK_VBAR)
510 goto normal_backsl;
511 handle_bar:
512 insert_jump (on_failure_jump, begalt, b + 6, b);
513 pending_exact = 0;
514 b += 3;
515 if (fixup_jump)
516 store_jump (fixup_jump, jump, b);
517 fixup_jump = b;
518 b += 3;
519 laststart = 0;
520 begalt = b;
521 break;
522
523#ifdef emacs
524 case '=':
525 PATPUSH (at_dot);
526 break;
527
528 case 's':
529 laststart = b;
530 PATPUSH (syntaxspec);
531 PATFETCH (c);
532 PATPUSH (syntax_spec_code[c]);
533 break;
534
535 case 'S':
536 laststart = b;
537 PATPUSH (notsyntaxspec);
538 PATFETCH (c);
539 PATPUSH (syntax_spec_code[c]);
540 break;
541#endif /* emacs */
542
543 case 'w':
544 laststart = b;
545 PATPUSH (wordchar);
546 break;
547
548 case 'W':
549 laststart = b;
550 PATPUSH (notwordchar);
551 break;
552
553 case '<':
554 PATPUSH (wordbeg);
555 break;
556
557 case '>':
558 PATPUSH (wordend);
559 break;
560
561 case 'b':
562 PATPUSH (wordbound);
563 break;
564
565 case 'B':
566 PATPUSH (notwordbound);
567 break;
568
569 case '`':
570 PATPUSH (begbuf);
571 break;
572
573 case '\'':
574 PATPUSH (endbuf);
575 break;
576
577 case '1':
578 case '2':
579 case '3':
580 case '4':
581 case '5':
582 case '6':
583 case '7':
584 case '8':
585 case '9':
586 c1 = c - '0';
587 if (c1 >= regnum)
588 goto normal_char;
589 for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
590 if (*stackt == c1)
591 goto normal_char;
592 laststart = b;
593 PATPUSH (duplicate);
594 PATPUSH (c1);
595 break;
596
597 case '+':
598 case '?':
599 if (obscure_syntax & RE_BK_PLUS_QM)
600 goto handle_plus;
601
602 default:
603 normal_backsl:
604 /* You might think it would be useful for \ to mean
605 not to translate; but if we don't translate it
606 it will never match anything. */
607 if (translate) c = translate[c];
608 goto normal_char;
609 }
610 break;
611
612 default:
613 normal_char:
614 if (!pending_exact || pending_exact + *pending_exact + 1 != b
615 || *pending_exact == 0177 || *p == '*' || *p == '^'
616 || ((obscure_syntax & RE_BK_PLUS_QM)
617 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
618 : (*p == '+' || *p == '?')))
619 {
620 laststart = b;
621 PATPUSH (exactn);
622 pending_exact = b;
623 PATPUSH (0);
624 }
625 PATPUSH (c);
626 (*pending_exact)++;
627 }
628 }
629
630 if (fixup_jump)
631 store_jump (fixup_jump, jump, b);
632
633 if (stackp != stackb) goto unmatched_open;
634
635 bufp->used = b - bufp->buffer;
636 return 0;
637
638 invalid_pattern:
639 return "Invalid regular expression";
640
641 unmatched_open:
642 return "Unmatched \\(";
643
644 unmatched_close:
645 return "Unmatched \\)";
646
647 end_of_pattern:
648 return "Premature end of regular expression";
649
650 nesting_too_deep:
651 return "Nesting too deep";
652
653 too_big:
654 return "Regular expression too big";
655
656 memory_exhausted:
657 return "Memory exhausted";
658}
659
660/* Store where `from' points a jump operation to jump to where `to' points.
661 `opcode' is the opcode to store. */
662
50e0dc41 663static void
dd3b648e
RP
664store_jump (from, opcode, to)
665 char *from, *to;
666 char opcode;
667{
668 from[0] = opcode;
669 from[1] = (to - (from + 3)) & 0377;
670 from[2] = (to - (from + 3)) >> 8;
671}
672
673/* Open up space at char FROM, and insert there a jump to TO.
674 CURRENT_END gives te end of the storage no in use,
675 so we know how much data to copy up.
676 OP is the opcode of the jump to insert.
677
678 If you call this function, you must zero out pending_exact. */
679
50e0dc41 680static void
dd3b648e
RP
681insert_jump (op, from, to, current_end)
682 char op;
683 char *from, *to, *current_end;
684{
685 register char *pto = current_end + 3;
686 register char *pfrom = current_end;
687 while (pfrom != from)
688 *--pto = *--pfrom;
689 store_jump (from, op, to);
690}
691\f
692/* Given a pattern, compute a fastmap from it.
693 The fastmap records which of the (1 << BYTEWIDTH) possible characters
694 can start a string that matches the pattern.
695 This fastmap is used by re_search to skip quickly over totally implausible text.
696
697 The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
698 as bufp->fastmap.
699 The other components of bufp describe the pattern to be used. */
700
701void
702re_compile_fastmap (bufp)
703 struct re_pattern_buffer *bufp;
704{
705 unsigned char *pattern = (unsigned char *) bufp->buffer;
706 int size = bufp->used;
707 register char *fastmap = bufp->fastmap;
708 register unsigned char *p = pattern;
709 register unsigned char *pend = pattern + size;
51b57ded 710 register int j;
dd3b648e
RP
711 unsigned char *translate = (unsigned char *) bufp->translate;
712
713 unsigned char *stackb[NFAILURES];
714 unsigned char **stackp = stackb;
715
716 bzero (fastmap, (1 << BYTEWIDTH));
717 bufp->fastmap_accurate = 1;
718 bufp->can_be_null = 0;
719
720 while (p)
721 {
722 if (p == pend)
723 {
724 bufp->can_be_null = 1;
725 break;
726 }
727#ifdef SWITCH_ENUM_BUG
728 switch ((int) ((enum regexpcode) *p++))
729#else
730 switch ((enum regexpcode) *p++)
731#endif
732 {
733 case exactn:
734 if (translate)
735 fastmap[translate[p[1]]] = 1;
736 else
737 fastmap[p[1]] = 1;
738 break;
739
740 case begline:
741 case before_dot:
742 case at_dot:
743 case after_dot:
744 case begbuf:
745 case endbuf:
746 case wordbound:
747 case notwordbound:
748 case wordbeg:
749 case wordend:
750 continue;
751
752 case endline:
753 if (translate)
754 fastmap[translate['\n']] = 1;
755 else
756 fastmap['\n'] = 1;
757 if (bufp->can_be_null != 1)
758 bufp->can_be_null = 2;
759 break;
760
761 case finalize_jump:
762 case maybe_finalize_jump:
763 case jump:
764 case dummy_failure_jump:
765 bufp->can_be_null = 1;
766 j = *p++ & 0377;
767 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
768 p += j + 1; /* The 1 compensates for missing ++ above */
769 if (j > 0)
770 continue;
771 /* Jump backward reached implies we just went through
772 the body of a loop and matched nothing.
773 Opcode jumped to should be an on_failure_jump.
774 Just treat it like an ordinary jump.
775 For a * loop, it has pushed its failure point already;
776 if so, discard that as redundant. */
777 if ((enum regexpcode) *p != on_failure_jump)
778 continue;
779 p++;
780 j = *p++ & 0377;
781 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
782 p += j + 1; /* The 1 compensates for missing ++ above */
783 if (stackp != stackb && *stackp == p)
784 stackp--;
785 continue;
786
787 case on_failure_jump:
788 j = *p++ & 0377;
789 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
790 p++;
791 *++stackp = p + j;
792 continue;
793
794 case start_memory:
795 case stop_memory:
796 p++;
797 continue;
798
799 case duplicate:
800 bufp->can_be_null = 1;
801 fastmap['\n'] = 1;
802 case anychar:
803 for (j = 0; j < (1 << BYTEWIDTH); j++)
804 if (j != '\n')
805 fastmap[j] = 1;
806 if (bufp->can_be_null)
807 return;
808 /* Don't return; check the alternative paths
809 so we can set can_be_null if appropriate. */
810 break;
811
812 case wordchar:
813 for (j = 0; j < (1 << BYTEWIDTH); j++)
814 if (SYNTAX (j) == Sword)
815 fastmap[j] = 1;
816 break;
817
818 case notwordchar:
819 for (j = 0; j < (1 << BYTEWIDTH); j++)
820 if (SYNTAX (j) != Sword)
821 fastmap[j] = 1;
822 break;
823
824#ifdef emacs
825 case syntaxspec:
826 k = *p++;
827 for (j = 0; j < (1 << BYTEWIDTH); j++)
828 if (SYNTAX (j) == (enum syntaxcode) k)
829 fastmap[j] = 1;
830 break;
831
832 case notsyntaxspec:
833 k = *p++;
834 for (j = 0; j < (1 << BYTEWIDTH); j++)
835 if (SYNTAX (j) != (enum syntaxcode) k)
836 fastmap[j] = 1;
837 break;
838#endif /* emacs */
839
840 case charset:
841 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
842 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
843 {
844 if (translate)
845 fastmap[translate[j]] = 1;
846 else
847 fastmap[j] = 1;
848 }
849 break;
850
851 case charset_not:
852 /* Chars beyond end of map must be allowed */
853 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
854 if (translate)
855 fastmap[translate[j]] = 1;
856 else
857 fastmap[j] = 1;
858
859 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
860 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
861 {
862 if (translate)
863 fastmap[translate[j]] = 1;
864 else
865 fastmap[j] = 1;
866 }
867 break;
868 }
869
870 /* Get here means we have successfully found the possible starting characters
871 of one path of the pattern. We need not follow this path any farther.
872 Instead, look at the next alternative remembered in the stack. */
873 if (stackp != stackb)
874 p = *stackp--;
875 else
876 break;
877 }
878}
879\f
880/* Like re_search_2, below, but only one string is specified. */
881
882int
883re_search (pbufp, string, size, startpos, range, regs)
884 struct re_pattern_buffer *pbufp;
885 char *string;
886 int size, startpos, range;
887 struct re_registers *regs;
888{
889 return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
890}
891
892/* Like re_match_2 but tries first a match starting at index STARTPOS,
893 then at STARTPOS + 1, and so on.
894 RANGE is the number of places to try before giving up.
895 If RANGE is negative, the starting positions tried are
896 STARTPOS, STARTPOS - 1, etc.
897 It is up to the caller to make sure that range is not so large
898 as to take the starting position outside of the input strings.
899
900The value returned is the position at which the match was found,
901 or -1 if no match was found,
902 or -2 if error (such as failure stack overflow). */
903
904int
905re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
906 struct re_pattern_buffer *pbufp;
907 char *string1, *string2;
908 int size1, size2;
909 int startpos;
910 register int range;
911 struct re_registers *regs;
912 int mstop;
913{
914 register char *fastmap = pbufp->fastmap;
915 register unsigned char *translate = (unsigned char *) pbufp->translate;
916 int total = size1 + size2;
917 int val;
918
919 /* Update the fastmap now if not correct already */
920 if (fastmap && !pbufp->fastmap_accurate)
921 re_compile_fastmap (pbufp);
922
923 /* Don't waste time in a long search for a pattern
924 that says it is anchored. */
925 if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
926 && range > 0)
927 {
928 if (startpos > 0)
929 return -1;
930 else
931 range = 1;
932 }
933
934 while (1)
935 {
936 /* If a fastmap is supplied, skip quickly over characters
937 that cannot possibly be the start of a match.
938 Note, however, that if the pattern can possibly match
939 the null string, we must test it at each starting point
940 so that we take the first null string we get. */
941
942 if (fastmap && startpos < total && pbufp->can_be_null != 1)
943 {
944 if (range > 0)
945 {
946 register int lim = 0;
947 register unsigned char *p;
948 int irange = range;
949 if (startpos < size1 && startpos + range >= size1)
950 lim = range - (size1 - startpos);
951
952 p = ((unsigned char *)
953 &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
954
955 if (translate)
956 {
957 while (range > lim && !fastmap[translate[*p++]])
958 range--;
959 }
960 else
961 {
962 while (range > lim && !fastmap[*p++])
963 range--;
964 }
965 startpos += irange - range;
966 }
967 else
968 {
969 register unsigned char c;
970 if (startpos >= size1)
971 c = string2[startpos - size1];
972 else
973 c = string1[startpos];
974 c &= 0xff;
975 if (translate ? !fastmap[translate[c]] : !fastmap[c])
976 goto advance;
977 }
978 }
979
980 if (range >= 0 && startpos == total
981 && fastmap && pbufp->can_be_null == 0)
982 return -1;
983
984 val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop);
985 if (0 <= val)
986 {
987 if (val == -2)
988 return -2;
989 return startpos;
990 }
991
992#ifdef C_ALLOCA
993 alloca (0);
994#endif /* C_ALLOCA */
995
996 advance:
997 if (!range) break;
998 if (range > 0) range--, startpos++; else range++, startpos--;
999 }
1000 return -1;
1001}
1002\f
1003#ifndef emacs /* emacs never uses this */
1004int
1005re_match (pbufp, string, size, pos, regs)
1006 struct re_pattern_buffer *pbufp;
1007 char *string;
1008 int size, pos;
1009 struct re_registers *regs;
1010{
1011 return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
1012}
1013#endif /* emacs */
1014
1015/* Maximum size of failure stack. Beyond this, overflow is an error. */
1016
1017int re_max_failures = 2000;
1018
1019static int bcmp_translate();
1020/* Match the pattern described by PBUFP
1021 against data which is the virtual concatenation of STRING1 and STRING2.
1022 SIZE1 and SIZE2 are the sizes of the two data strings.
1023 Start the match at position POS.
1024 Do not consider matching past the position MSTOP.
1025
1026 If pbufp->fastmap is nonzero, then it had better be up to date.
1027
1028 The reason that the data to match are specified as two components
1029 which are to be regarded as concatenated
1030 is so this function can be used directly on the contents of an Emacs buffer.
1031
1032 -1 is returned if there is no match. -2 is returned if there is
1033 an error (such as match stack overflow). Otherwise the value is the length
1034 of the substring which was matched. */
1035
1036int
1037re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
1038 struct re_pattern_buffer *pbufp;
1039 unsigned char *string1, *string2;
1040 int size1, size2;
1041 int pos;
1042 struct re_registers *regs;
1043 int mstop;
1044{
1045 register unsigned char *p = (unsigned char *) pbufp->buffer;
1046 register unsigned char *pend = p + pbufp->used;
1047 /* End of first string */
1048 unsigned char *end1;
1049 /* End of second string */
1050 unsigned char *end2;
1051 /* Pointer just past last char to consider matching */
1052 unsigned char *end_match_1, *end_match_2;
1053 register unsigned char *d, *dend;
1054 register int mcnt;
1055 unsigned char *translate = (unsigned char *) pbufp->translate;
1056
1057 /* Failure point stack. Each place that can handle a failure further down the line
1058 pushes a failure point on this stack. It consists of two char *'s.
1059 The first one pushed is where to resume scanning the pattern;
1060 the second pushed is where to resume scanning the strings.
1061 If the latter is zero, the failure point is a "dummy".
1062 If a failure happens and the innermost failure point is dormant,
1063 it discards that failure point and tries the next one. */
1064
1065 unsigned char *initial_stack[2 * NFAILURES];
1066 unsigned char **stackb = initial_stack;
1067 unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
1068
1069 /* Information on the "contents" of registers.
1070 These are pointers into the input strings; they record
1071 just what was matched (on this attempt) by some part of the pattern.
1072 The start_memory command stores the start of a register's contents
1073 and the stop_memory command stores the end.
1074
1075 At that point, regstart[regnum] points to the first character in the register,
1076 regend[regnum] points to the first character beyond the end of the register,
1077 regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
1078 and regend_seg1[regnum] is true iff regend[regnum] points into string1. */
1079
1080 unsigned char *regstart[RE_NREGS];
1081 unsigned char *regend[RE_NREGS];
1082 unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
1083
1084 /* Set up pointers to ends of strings.
1085 Don't allow the second string to be empty unless both are empty. */
1086 if (!size2)
1087 {
1088 string2 = string1;
1089 size2 = size1;
1090 string1 = 0;
1091 size1 = 0;
1092 }
1093 end1 = string1 + size1;
1094 end2 = string2 + size2;
1095
1096 /* Compute where to stop matching, within the two strings */
1097 if (mstop <= size1)
1098 {
1099 end_match_1 = string1 + mstop;
1100 end_match_2 = string2;
1101 }
1102 else
1103 {
1104 end_match_1 = end1;
1105 end_match_2 = string2 + mstop - size1;
1106 }
1107
1108 /* Initialize \) text positions to -1
1109 to mark ones that no \( or \) has been seen for. */
1110
1111 for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++)
1112 regend[mcnt] = (unsigned char *) -1;
1113
1114 /* `p' scans through the pattern as `d' scans through the data.
1115 `dend' is the end of the input string that `d' points within.
1116 `d' is advanced into the following input string whenever necessary,
1117 but this happens before fetching;
1118 therefore, at the beginning of the loop,
1119 `d' can be pointing at the end of a string,
1120 but it cannot equal string2. */
1121
1122 if (pos <= size1)
1123 d = string1 + pos, dend = end_match_1;
1124 else
1125 d = string2 + pos - size1, dend = end_match_2;
1126
1127/* Write PREFETCH; just before fetching a character with *d. */
1128#define PREFETCH \
1129 while (d == dend) \
1130 { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
1131 d = string2; /* end of string1 => advance to string2. */ \
1132 dend = end_match_2; }
1133
1134 /* This loop loops over pattern commands.
1135 It exits by returning from the function if match is complete,
1136 or it drops through if match fails at this starting point in the input data. */
1137
1138 while (1)
1139 {
1140 if (p == pend)
1141 /* End of pattern means we have succeeded! */
1142 {
1143 /* If caller wants register contents data back, convert it to indices */
1144 if (regs)
1145 {
1146 regs->start[0] = pos;
1147 if (dend == end_match_1)
1148 regs->end[0] = d - string1;
1149 else
1150 regs->end[0] = d - string2 + size1;
1151 for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
1152 {
1153 if (regend[mcnt] == (unsigned char *) -1)
1154 {
1155 regs->start[mcnt] = -1;
1156 regs->end[mcnt] = -1;
1157 continue;
1158 }
1159 if (regstart_seg1[mcnt])
1160 regs->start[mcnt] = regstart[mcnt] - string1;
1161 else
1162 regs->start[mcnt] = regstart[mcnt] - string2 + size1;
1163 if (regend_seg1[mcnt])
1164 regs->end[mcnt] = regend[mcnt] - string1;
1165 else
1166 regs->end[mcnt] = regend[mcnt] - string2 + size1;
1167 }
1168 }
1169 if (dend == end_match_1)
1170 return (d - string1 - pos);
1171 else
1172 return d - string2 + size1 - pos;
1173 }
1174
1175 /* Otherwise match next pattern command */
1176#ifdef SWITCH_ENUM_BUG
1177 switch ((int) ((enum regexpcode) *p++))
1178#else
1179 switch ((enum regexpcode) *p++)
1180#endif
1181 {
1182
1183 /* \( is represented by a start_memory, \) by a stop_memory.
1184 Both of those commands contain a "register number" argument.
1185 The text matched within the \( and \) is recorded under that number.
1186 Then, \<digit> turns into a `duplicate' command which
1187 is followed by the numeric value of <digit> as the register number. */
1188
1189 case start_memory:
1190 regstart[*p] = d;
1191 regstart_seg1[*p++] = (dend == end_match_1);
1192 break;
1193
1194 case stop_memory:
1195 regend[*p] = d;
1196 regend_seg1[*p++] = (dend == end_match_1);
1197 break;
1198
1199 case duplicate:
1200 {
1201 int regno = *p++; /* Get which register to match against */
1202 register unsigned char *d2, *dend2;
1203
1204 d2 = regstart[regno];
1205 dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
1206 ? regend[regno] : end_match_1);
1207 while (1)
1208 {
1209 /* Advance to next segment in register contents, if necessary */
1210 while (d2 == dend2)
1211 {
1212 if (dend2 == end_match_2) break;
1213 if (dend2 == regend[regno]) break;
1214 d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
1215 }
1216 /* At end of register contents => success */
1217 if (d2 == dend2) break;
1218
1219 /* Advance to next segment in data being matched, if necessary */
1220 PREFETCH;
1221
1222 /* mcnt gets # consecutive chars to compare */
1223 mcnt = dend - d;
1224 if (mcnt > dend2 - d2)
1225 mcnt = dend2 - d2;
1226 /* Compare that many; failure if mismatch, else skip them. */
1227 if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
1228 goto fail;
1229 d += mcnt, d2 += mcnt;
1230 }
1231 }
1232 break;
1233
1234 case anychar:
1235 /* fetch a data character */
1236 PREFETCH;
1237 /* Match anything but a newline. */
1238 if ((translate ? translate[*d++] : *d++) == '\n')
1239 goto fail;
1240 break;
1241
1242 case charset:
1243 case charset_not:
1244 {
1245 /* Nonzero for charset_not */
1246 int not = 0;
1247 register int c;
1248 if (*(p - 1) == (unsigned char) charset_not)
1249 not = 1;
1250
1251 /* fetch a data character */
1252 PREFETCH;
1253
1254 if (translate)
1255 c = translate [*d];
1256 else
1257 c = *d;
1258
1259 if (c < *p * BYTEWIDTH
1260 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1261 not = !not;
1262
1263 p += 1 + *p;
1264
1265 if (!not) goto fail;
1266 d++;
1267 break;
1268 }
1269
1270 case begline:
1271 if (d == string1 || d[-1] == '\n')
1272 break;
1273 goto fail;
1274
1275 case endline:
1276 if (d == end2
1277 || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
1278 break;
1279 goto fail;
1280
1281 /* "or" constructs ("|") are handled by starting each alternative
1282 with an on_failure_jump that points to the start of the next alternative.
1283 Each alternative except the last ends with a jump to the joining point.
1284 (Actually, each jump except for the last one really jumps
1285 to the following jump, because tensioning the jumps is a hassle.) */
1286
1287 /* The start of a stupid repeat has an on_failure_jump that points
1288 past the end of the repeat text.
1289 This makes a failure point so that, on failure to match a repetition,
1290 matching restarts past as many repetitions have been found
1291 with no way to fail and look for another one. */
1292
1293 /* A smart repeat is similar but loops back to the on_failure_jump
1294 so that each repetition makes another failure point. */
1295
1296 case on_failure_jump:
1297 if (stackp == stacke)
1298 {
1299 unsigned char **stackx;
1300 if (stacke - stackb > re_max_failures * 2)
1301 return -2;
1302 stackx = (unsigned char **) alloca (2 * (stacke - stackb)
1303 * sizeof (char *));
1304 bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
1305 stackp = stackx + (stackp - stackb);
1306 stacke = stackx + 2 * (stacke - stackb);
1307 stackb = stackx;
1308 }
1309 mcnt = *p++ & 0377;
1310 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1311 p++;
1312 *stackp++ = mcnt + p;
1313 *stackp++ = d;
1314 break;
1315
1316 /* The end of a smart repeat has an maybe_finalize_jump back.
1317 Change it either to a finalize_jump or an ordinary jump. */
1318
1319 case maybe_finalize_jump:
1320 mcnt = *p++ & 0377;
1321 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1322 p++;
1323 {
1324 register unsigned char *p2 = p;
1325 /* Compare what follows with the begining of the repeat.
1326 If we can establish that there is nothing that they would
1327 both match, we can change to finalize_jump */
1328 while (p2 != pend
1329 && (*p2 == (unsigned char) stop_memory
1330 || *p2 == (unsigned char) start_memory))
1331 p2++;
1332 if (p2 == pend)
1333 p[-3] = (unsigned char) finalize_jump;
1334 else if (*p2 == (unsigned char) exactn
1335 || *p2 == (unsigned char) endline)
1336 {
1337 register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
1338 register unsigned char *p1 = p + mcnt;
1339 /* p1[0] ... p1[2] are an on_failure_jump.
1340 Examine what follows that */
1341 if (p1[3] == (unsigned char) exactn && p1[5] != c)
1342 p[-3] = (unsigned char) finalize_jump;
1343 else if (p1[3] == (unsigned char) charset
1344 || p1[3] == (unsigned char) charset_not)
1345 {
1346 int not = p1[3] == (unsigned char) charset_not;
1347 if (c < p1[4] * BYTEWIDTH
1348 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1349 not = !not;
1350 /* not is 1 if c would match */
1351 /* That means it is not safe to finalize */
1352 if (!not)
1353 p[-3] = (unsigned char) finalize_jump;
1354 }
1355 }
1356 }
1357 p -= 2;
1358 if (p[-1] != (unsigned char) finalize_jump)
1359 {
1360 p[-1] = (unsigned char) jump;
1361 goto nofinalize;
1362 }
1363
1364 /* The end of a stupid repeat has a finalize-jump
1365 back to the start, where another failure point will be made
1366 which will point after all the repetitions found so far. */
1367
1368 case finalize_jump:
1369 stackp -= 2;
1370
1371 case jump:
1372 nofinalize:
1373 mcnt = *p++ & 0377;
1374 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1375 p += mcnt + 1; /* The 1 compensates for missing ++ above */
1376 break;
1377
1378 case dummy_failure_jump:
1379 if (stackp == stacke)
1380 {
1381 unsigned char **stackx
1382 = (unsigned char **) alloca (2 * (stacke - stackb)
1383 * sizeof (char *));
1384 bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
1385 stackp = stackx + (stackp - stackb);
1386 stacke = stackx + 2 * (stacke - stackb);
1387 stackb = stackx;
1388 }
1389 *stackp++ = 0;
1390 *stackp++ = 0;
1391 goto nofinalize;
1392
1393 case wordbound:
1394 if (d == string1 /* Points to first char */
1395 || d == end2 /* Points to end */
1396 || (d == end1 && size2 == 0)) /* Points to end */
1397 break;
1398 if ((SYNTAX (d[-1]) == Sword)
1399 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1400 break;
1401 goto fail;
1402
1403 case notwordbound:
1404 if (d == string1 /* Points to first char */
1405 || d == end2 /* Points to end */
1406 || (d == end1 && size2 == 0)) /* Points to end */
1407 goto fail;
1408 if ((SYNTAX (d[-1]) == Sword)
1409 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1410 goto fail;
1411 break;
1412
1413 case wordbeg:
1414 if (d == end2 /* Points to end */
1415 || (d == end1 && size2 == 0) /* Points to end */
1416 || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
1417 goto fail;
1418 if (d == string1 /* Points to first char */
1419 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1420 break;
1421 goto fail;
1422
1423 case wordend:
1424 if (d == string1 /* Points to first char */
1425 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1426 goto fail;
1427 if (d == end2 /* Points to end */
1428 || (d == end1 && size2 == 0) /* Points to end */
1429 || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
1430 break;
1431 goto fail;
1432
1433#ifdef emacs
1434 case before_dot:
1435 if (((d - string2 <= (unsigned) size2)
1436 ? d - bf_p2 : d - bf_p1)
1437 <= point)
1438 goto fail;
1439 break;
1440
1441 case at_dot:
1442 if (((d - string2 <= (unsigned) size2)
1443 ? d - bf_p2 : d - bf_p1)
1444 == point)
1445 goto fail;
1446 break;
1447
1448 case after_dot:
1449 if (((d - string2 <= (unsigned) size2)
1450 ? d - bf_p2 : d - bf_p1)
1451 >= point)
1452 goto fail;
1453 break;
1454
1455 case wordchar:
1456 mcnt = (int) Sword;
1457 goto matchsyntax;
1458
1459 case syntaxspec:
1460 mcnt = *p++;
1461 matchsyntax:
1462 PREFETCH;
1463 if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
1464 break;
1465
1466 case notwordchar:
1467 mcnt = (int) Sword;
1468 goto matchnotsyntax;
1469
1470 case notsyntaxspec:
1471 mcnt = *p++;
1472 matchnotsyntax:
1473 PREFETCH;
1474 if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
1475 break;
1476#else
1477 case wordchar:
1478 PREFETCH;
1479 if (SYNTAX (*d++) == 0) goto fail;
1480 break;
1481
1482 case notwordchar:
1483 PREFETCH;
1484 if (SYNTAX (*d++) != 0) goto fail;
1485 break;
1486#endif /* not emacs */
1487
1488 case begbuf:
1489 if (d == string1) /* Note, d cannot equal string2 */
1490 break; /* unless string1 == string2. */
1491 goto fail;
1492
1493 case endbuf:
1494 if (d == end2 || (d == end1 && size2 == 0))
1495 break;
1496 goto fail;
1497
1498 case exactn:
1499 /* Match the next few pattern characters exactly.
1500 mcnt is how many characters to match. */
1501 mcnt = *p++;
1502 if (translate)
1503 {
1504 do
1505 {
1506 PREFETCH;
1507 if (translate[*d++] != *p++) goto fail;
1508 }
1509 while (--mcnt);
1510 }
1511 else
1512 {
1513 do
1514 {
1515 PREFETCH;
1516 if (*d++ != *p++) goto fail;
1517 }
1518 while (--mcnt);
1519 }
1520 break;
1521 }
1522 continue; /* Successfully matched one pattern command; keep matching */
1523
1524 /* Jump here if any matching operation fails. */
1525 fail:
1526 if (stackp != stackb)
1527 /* A restart point is known. Restart there and pop it. */
1528 {
1529 if (!stackp[-2])
1530 { /* If innermost failure point is dormant, flush it and keep looking */
1531 stackp -= 2;
1532 goto fail;
1533 }
1534 d = *--stackp;
1535 p = *--stackp;
1536 if (d >= string1 && d <= end1)
1537 dend = end_match_1;
1538 }
1539 else break; /* Matching at this starting point really fails! */
1540 }
1541 return -1; /* Failure to match */
1542}
1543
1544static int
1545bcmp_translate (s1, s2, len, translate)
1546 unsigned char *s1, *s2;
1547 register int len;
1548 unsigned char *translate;
1549{
1550 register unsigned char *p1 = s1, *p2 = s2;
1551 while (len)
1552 {
1553 if (translate [*p1++] != translate [*p2++]) return 1;
1554 len--;
1555 }
1556 return 0;
1557}
1558\f
1559/* Entry points compatible with bsd4.2 regex library */
1560
1561#ifndef emacs
1562
1563static struct re_pattern_buffer re_comp_buf;
1564
1565char *
1566re_comp (s)
1567 char *s;
1568{
1569 if (!s)
1570 {
1571 if (!re_comp_buf.buffer)
1572 return "No previous regular expression";
1573 return 0;
1574 }
1575
1576 if (!re_comp_buf.buffer)
1577 {
1578 if (!(re_comp_buf.buffer = (char *) malloc (200)))
1579 return "Memory exhausted";
1580 re_comp_buf.allocated = 200;
1581 if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
1582 return "Memory exhausted";
1583 }
1584 return re_compile_pattern (s, strlen (s), &re_comp_buf);
1585}
1586
1587int
1588re_exec (s)
1589 char *s;
1590{
1591 int len = strlen (s);
1592 return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
1593}
1594
1595#endif /* emacs */
1596\f
1597#ifdef test
1598
1599#include <stdio.h>
1600
1601/* Indexed by a character, gives the upper case equivalent of the character */
1602
1603static char upcase[0400] =
1604 { 000, 001, 002, 003, 004, 005, 006, 007,
1605 010, 011, 012, 013, 014, 015, 016, 017,
1606 020, 021, 022, 023, 024, 025, 026, 027,
1607 030, 031, 032, 033, 034, 035, 036, 037,
1608 040, 041, 042, 043, 044, 045, 046, 047,
1609 050, 051, 052, 053, 054, 055, 056, 057,
1610 060, 061, 062, 063, 064, 065, 066, 067,
1611 070, 071, 072, 073, 074, 075, 076, 077,
1612 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1613 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1614 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1615 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
1616 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1617 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1618 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1619 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
1620 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
1621 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
1622 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
1623 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
1624 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
1625 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1626 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
1627 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
1628 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
1629 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
1630 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
1631 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1632 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
1633 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
1634 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
1635 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
1636 };
1637
1638main (argc, argv)
1639 int argc;
1640 char **argv;
1641{
1642 char pat[80];
1643 struct re_pattern_buffer buf;
1644 int i;
1645 char c;
1646 char fastmap[(1 << BYTEWIDTH)];
1647
1648 /* Allow a command argument to specify the style of syntax. */
1649 if (argc > 1)
1650 obscure_syntax = atoi (argv[1]);
1651
1652 buf.allocated = 40;
1653 buf.buffer = (char *) malloc (buf.allocated);
1654 buf.fastmap = fastmap;
1655 buf.translate = upcase;
1656
1657 while (1)
1658 {
1659 gets (pat);
1660
1661 if (*pat)
1662 {
1663 re_compile_pattern (pat, strlen(pat), &buf);
1664
1665 for (i = 0; i < buf.used; i++)
1666 printchar (buf.buffer[i]);
1667
1668 putchar ('\n');
1669
1670 printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
1671
1672 re_compile_fastmap (&buf);
1673 printf ("Allowed by fastmap: ");
1674 for (i = 0; i < (1 << BYTEWIDTH); i++)
1675 if (fastmap[i]) printchar (i);
1676 putchar ('\n');
1677 }
1678
1679 gets (pat); /* Now read the string to match against */
1680
1681 i = re_match (&buf, pat, strlen (pat), 0, 0);
1682 printf ("Match value %d.\n", i);
1683 }
1684}
1685
1686#ifdef NOTDEF
1687print_buf (bufp)
1688 struct re_pattern_buffer *bufp;
1689{
1690 int i;
1691
1692 printf ("buf is :\n----------------\n");
1693 for (i = 0; i < bufp->used; i++)
1694 printchar (bufp->buffer[i]);
1695
1696 printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
1697
1698 printf ("Allowed by fastmap: ");
1699 for (i = 0; i < (1 << BYTEWIDTH); i++)
1700 if (bufp->fastmap[i])
1701 printchar (i);
1702 printf ("\nAllowed by translate: ");
1703 if (bufp->translate)
1704 for (i = 0; i < (1 << BYTEWIDTH); i++)
1705 if (bufp->translate[i])
1706 printchar (i);
1707 printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
1708 printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
1709}
1710#endif
1711
1712printchar (c)
1713 char c;
1714{
1715 if (c < 041 || c >= 0177)
1716 {
1717 putchar ('\\');
1718 putchar (((c >> 6) & 3) + '0');
1719 putchar (((c >> 3) & 7) + '0');
1720 putchar ((c & 7) + '0');
1721 }
1722 else
1723 putchar (c);
1724}
1725
1726error (string)
1727 char *string;
1728{
1729 puts (string);
1730 exit (1);
1731}
1732
1733#endif /* test */
This page took 0.136784 seconds and 4 git commands to generate.