gdb-3.3
[deliverable/binutils-gdb.git] / gdb / regex.c
CommitLineData
4187119d 1/* Extended regular expression matching and search library.
2 Copyright (C) 1985, 1989 Free Software Foundation, Inc.
e91b87a3 3
4187119d 4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 1, or (at your option)
7 any later version.
e91b87a3 8
4187119d 9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
e91b87a3 13
4187119d 14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
e91b87a3 17
e91b87a3 18
4187119d 19 In other words, you are welcome to use, share and improve this program.
20 You are forbidden to forbid anyone else to use, share and improve
21 what you give them. Help stamp out software-hoarding! */
e91b87a3 22
23
24/* To test, compile with -Dtest.
25 This Dtestable feature turns this into a self-contained program
26 which reads a pattern, describes how it compiles,
27 then reads a string and searches for it. */
28
29#ifdef emacs
30
31/* The `emacs' switch turns on certain special matching commands
32 that make sense only in emacs. */
33
34#include "config.h"
35#include "lisp.h"
36#include "buffer.h"
37#include "syntax.h"
38
39#else /* not emacs */
40
41#ifdef USG
4187119d 42#ifndef BSTRING
e91b87a3 43#define bcopy(s,d,n) memcpy((d),(s),(n))
44#define bcmp(s1,s2,n) memcmp((s1),(s2),(n))
45#define bzero(s,n) memset((s),0,(n))
46#endif
4187119d 47#endif
e91b87a3 48
49/* Make alloca work the best possible way. */
50#ifdef __GNUC__
51#define alloca __builtin_alloca
52#else
53#ifdef sparc
54#include <alloca.h>
55#endif
56#endif
57
58/*
59 * Define the syntax stuff, so we can do the \<...\> things.
60 */
61
62#ifndef Sword /* must be non-zero in some of the tests below... */
63#define Sword 1
64#endif
65
66#define SYNTAX(c) re_syntax_table[c]
67
68#ifdef SYNTAX_TABLE
69
70char *re_syntax_table;
71
72#else
73
74static char re_syntax_table[256];
75
76static void
77init_syntax_once ()
78{
79 register int c;
80 static int done = 0;
81
82 if (done)
83 return;
84
85 bzero (re_syntax_table, sizeof re_syntax_table);
86
87 for (c = 'a'; c <= 'z'; c++)
88 re_syntax_table[c] = Sword;
89
90 for (c = 'A'; c <= 'Z'; c++)
91 re_syntax_table[c] = Sword;
92
93 for (c = '0'; c <= '9'; c++)
94 re_syntax_table[c] = Sword;
95
96 done = 1;
97}
98
99#endif /* SYNTAX_TABLE */
100#endif /* not emacs */
101
102#include "regex.h"
103
104/* Number of failure points to allocate space for initially,
105 when matching. If this number is exceeded, more space is allocated,
106 so it is not a hard limit. */
107
108#ifndef NFAILURES
109#define NFAILURES 80
110#endif /* NFAILURES */
111
112/* width of a byte in bits */
113
114#define BYTEWIDTH 8
115
116#ifndef SIGN_EXTEND_CHAR
117#define SIGN_EXTEND_CHAR(x) (x)
118#endif
119\f
120static int obscure_syntax = 0;
121
122/* Specify the precise syntax of regexp for compilation.
123 This provides for compatibility for various utilities
124 which historically have different, incompatible syntaxes.
125
126 The argument SYNTAX is a bit-mask containing the two bits
127 RE_NO_BK_PARENS and RE_NO_BK_VBAR. */
128
129int
130re_set_syntax (syntax)
131{
132 int ret;
133
134 ret = obscure_syntax;
135 obscure_syntax = syntax;
136 return ret;
137}
138\f
139/* re_compile_pattern takes a regular-expression string
140 and converts it into a buffer full of byte commands for matching.
141
142 PATTERN is the address of the pattern string
143 SIZE is the length of it.
144 BUFP is a struct re_pattern_buffer * which points to the info
145 on where to store the byte commands.
146 This structure contains a char * which points to the
147 actual space, which should have been obtained with malloc.
148 re_compile_pattern may use realloc to grow the buffer space.
149
150 The number of bytes of commands can be found out by looking in
151 the struct re_pattern_buffer that bufp pointed to,
152 after re_compile_pattern returns.
153*/
154
155#define PATPUSH(ch) (*b++ = (char) (ch))
156
157#define PATFETCH(c) \
158 {if (p == pend) goto end_of_pattern; \
159 c = * (unsigned char *) p++; \
160 if (translate) c = translate[c]; }
161
162#define PATFETCH_RAW(c) \
163 {if (p == pend) goto end_of_pattern; \
164 c = * (unsigned char *) p++; }
165
166#define PATUNFETCH p--
167
168#define EXTEND_BUFFER \
169 { char *old_buffer = bufp->buffer; \
170 if (bufp->allocated == (1<<16)) goto too_big; \
171 bufp->allocated *= 2; \
172 if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
173 if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
174 goto memory_exhausted; \
175 c = bufp->buffer - old_buffer; \
176 b += c; \
177 if (fixup_jump) \
178 fixup_jump += c; \
179 if (laststart) \
180 laststart += c; \
181 begalt += c; \
182 if (pending_exact) \
183 pending_exact += c; \
184 }
185
186static int store_jump (), insert_jump ();
187
188char *
189re_compile_pattern (pattern, size, bufp)
190 char *pattern;
191 int size;
192 struct re_pattern_buffer *bufp;
193{
194 register char *b = bufp->buffer;
195 register char *p = pattern;
196 char *pend = pattern + size;
197 register unsigned c, c1;
198 char *p1;
199 unsigned char *translate = (unsigned char *) bufp->translate;
200
201 /* address of the count-byte of the most recently inserted "exactn" command.
202 This makes it possible to tell whether a new exact-match character
203 can be added to that command or requires a new "exactn" command. */
204
205 char *pending_exact = 0;
206
207 /* address of the place where a forward-jump should go
208 to the end of the containing expression.
209 Each alternative of an "or", except the last, ends with a forward-jump
210 of this sort. */
211
212 char *fixup_jump = 0;
213
214 /* address of start of the most recently finished expression.
215 This tells postfix * where to find the start of its operand. */
216
217 char *laststart = 0;
218
219 /* In processing a repeat, 1 means zero matches is allowed */
220
221 char zero_times_ok;
222
223 /* In processing a repeat, 1 means many matches is allowed */
224
225 char many_times_ok;
226
227 /* address of beginning of regexp, or inside of last \( */
228
229 char *begalt = b;
230
231 /* Stack of information saved by \( and restored by \).
232 Four stack elements are pushed by each \(:
233 First, the value of b.
234 Second, the value of fixup_jump.
235 Third, the value of regnum.
236 Fourth, the value of begalt. */
237
238 int stackb[40];
239 int *stackp = stackb;
240 int *stacke = stackb + 40;
241 int *stackt;
242
243 /* Counts \('s as they are encountered. Remembered for the matching \),
244 where it becomes the "register number" to put in the stop_memory command */
245
246 int regnum = 1;
247
248 bufp->fastmap_accurate = 0;
249
250#ifndef emacs
251#ifndef SYNTAX_TABLE
252 /*
253 * Initialize the syntax table.
254 */
255 init_syntax_once();
256#endif
257#endif
258
259 if (bufp->allocated == 0)
260 {
261 bufp->allocated = 28;
262 if (bufp->buffer)
263 /* EXTEND_BUFFER loses when bufp->allocated is 0 */
264 bufp->buffer = (char *) realloc (bufp->buffer, 28);
265 else
266 /* Caller did not allocate a buffer. Do it for him */
267 bufp->buffer = (char *) malloc (28);
268 if (!bufp->buffer) goto memory_exhausted;
269 begalt = b = bufp->buffer;
270 }
271
272 while (p != pend)
273 {
274 if (b - bufp->buffer > bufp->allocated - 10)
275 /* Note that EXTEND_BUFFER clobbers c */
276 EXTEND_BUFFER;
277
278 PATFETCH (c);
279
280 switch (c)
281 {
282 case '$':
283 if (obscure_syntax & RE_TIGHT_VBAR)
284 {
285 if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
286 goto normal_char;
287 /* Make operand of last vbar end before this `$'. */
288 if (fixup_jump)
289 store_jump (fixup_jump, jump, b);
290 fixup_jump = 0;
291 PATPUSH (endline);
292 break;
293 }
294
295 /* $ means succeed if at end of line, but only in special contexts.
296 If randomly in the middle of a pattern, it is a normal character. */
297 if (p == pend || *p == '\n'
298 || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
299 || (obscure_syntax & RE_NO_BK_PARENS
300 ? *p == ')'
301 : *p == '\\' && p[1] == ')')
302 || (obscure_syntax & RE_NO_BK_VBAR
303 ? *p == '|'
304 : *p == '\\' && p[1] == '|'))
305 {
306 PATPUSH (endline);
307 break;
308 }
309 goto normal_char;
310
311 case '^':
312 /* ^ means succeed if at beg of line, but only if no preceding pattern. */
313
314 if (laststart && p[-2] != '\n'
315 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
316 goto normal_char;
317 if (obscure_syntax & RE_TIGHT_VBAR)
318 {
319 if (p != pattern + 1
320 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
321 goto normal_char;
322 PATPUSH (begline);
323 begalt = b;
324 }
325 else
326 PATPUSH (begline);
327 break;
328
329 case '+':
330 case '?':
331 if (obscure_syntax & RE_BK_PLUS_QM)
332 goto normal_char;
333 handle_plus:
334 case '*':
335 /* If there is no previous pattern, char not special. */
336 if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
337 goto normal_char;
338 /* If there is a sequence of repetition chars,
339 collapse it down to equivalent to just one. */
340 zero_times_ok = 0;
341 many_times_ok = 0;
342 while (1)
343 {
344 zero_times_ok |= c != '+';
345 many_times_ok |= c != '?';
346 if (p == pend)
347 break;
348 PATFETCH (c);
349 if (c == '*')
350 ;
351 else if (!(obscure_syntax & RE_BK_PLUS_QM)
352 && (c == '+' || c == '?'))
353 ;
354 else if ((obscure_syntax & RE_BK_PLUS_QM)
355 && c == '\\')
356 {
357 int c1;
358 PATFETCH (c1);
359 if (!(c1 == '+' || c1 == '?'))
360 {
361 PATUNFETCH;
362 PATUNFETCH;
363 break;
364 }
365 c = c1;
366 }
367 else
368 {
369 PATUNFETCH;
370 break;
371 }
372 }
373
374 /* Star, etc. applied to an empty pattern is equivalent
375 to an empty pattern. */
376 if (!laststart)
377 break;
378
379 /* Now we know whether 0 matches is allowed,
380 and whether 2 or more matches is allowed. */
381 if (many_times_ok)
382 {
383 /* If more than one repetition is allowed,
384 put in a backward jump at the end. */
385 store_jump (b, maybe_finalize_jump, laststart - 3);
386 b += 3;
387 }
388 insert_jump (on_failure_jump, laststart, b + 3, b);
389 pending_exact = 0;
390 b += 3;
391 if (!zero_times_ok)
392 {
393 /* At least one repetition required: insert before the loop
394 a skip over the initial on-failure-jump instruction */
395 insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
396 b += 3;
397 }
398 break;
399
400 case '.':
401 laststart = b;
402 PATPUSH (anychar);
403 break;
404
405 case '[':
406 while (b - bufp->buffer
407 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
408 /* Note that EXTEND_BUFFER clobbers c */
409 EXTEND_BUFFER;
410
411 laststart = b;
412 if (*p == '^')
413 PATPUSH (charset_not), p++;
414 else
415 PATPUSH (charset);
416 p1 = p;
417
418 PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
419 /* Clear the whole map */
420 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
421 /* Read in characters and ranges, setting map bits */
422 while (1)
423 {
424 PATFETCH (c);
425 if (c == ']' && p != p1 + 1) break;
426 if (*p == '-' && p[1] != ']')
427 {
428 PATFETCH (c1);
429 PATFETCH (c1);
430 while (c <= c1)
431 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
432 }
433 else
434 {
435 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
436 }
437 }
438 /* Discard any bitmap bytes that are all 0 at the end of the map.
439 Decrement the map-length byte too. */
440 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
441 b[-1]--;
442 b += b[-1];
443 break;
444
445 case '(':
446 if (! (obscure_syntax & RE_NO_BK_PARENS))
447 goto normal_char;
448 else
449 goto handle_open;
450
451 case ')':
452 if (! (obscure_syntax & RE_NO_BK_PARENS))
453 goto normal_char;
454 else
455 goto handle_close;
456
457 case '\n':
458 if (! (obscure_syntax & RE_NEWLINE_OR))
459 goto normal_char;
460 else
461 goto handle_bar;
462
463 case '|':
464 if (! (obscure_syntax & RE_NO_BK_VBAR))
465 goto normal_char;
466 else
467 goto handle_bar;
468
469 case '\\':
470 if (p == pend) goto invalid_pattern;
471 PATFETCH_RAW (c);
472 switch (c)
473 {
474 case '(':
475 if (obscure_syntax & RE_NO_BK_PARENS)
476 goto normal_backsl;
477 handle_open:
478 if (stackp == stacke) goto nesting_too_deep;
479 if (regnum < RE_NREGS)
480 {
481 PATPUSH (start_memory);
482 PATPUSH (regnum);
483 }
484 *stackp++ = b - bufp->buffer;
485 *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
486 *stackp++ = regnum++;
487 *stackp++ = begalt - bufp->buffer;
488 fixup_jump = 0;
489 laststart = 0;
490 begalt = b;
491 break;
492
493 case ')':
494 if (obscure_syntax & RE_NO_BK_PARENS)
495 goto normal_backsl;
496 handle_close:
497 if (stackp == stackb) goto unmatched_close;
498 begalt = *--stackp + bufp->buffer;
499 if (fixup_jump)
500 store_jump (fixup_jump, jump, b);
501 if (stackp[-1] < RE_NREGS)
502 {
503 PATPUSH (stop_memory);
504 PATPUSH (stackp[-1]);
505 }
506 stackp -= 2;
507 fixup_jump = 0;
508 if (*stackp)
509 fixup_jump = *stackp + bufp->buffer - 1;
510 laststart = *--stackp + bufp->buffer;
511 break;
512
513 case '|':
514 if (obscure_syntax & RE_NO_BK_VBAR)
515 goto normal_backsl;
516 handle_bar:
517 insert_jump (on_failure_jump, begalt, b + 6, b);
518 pending_exact = 0;
519 b += 3;
520 if (fixup_jump)
521 store_jump (fixup_jump, jump, b);
522 fixup_jump = b;
523 b += 3;
524 laststart = 0;
525 begalt = b;
526 break;
527
528#ifdef emacs
529 case '=':
530 PATPUSH (at_dot);
531 break;
532
533 case 's':
534 laststart = b;
535 PATPUSH (syntaxspec);
536 PATFETCH (c);
537 PATPUSH (syntax_spec_code[c]);
538 break;
539
540 case 'S':
541 laststart = b;
542 PATPUSH (notsyntaxspec);
543 PATFETCH (c);
544 PATPUSH (syntax_spec_code[c]);
545 break;
546#endif /* emacs */
547
548 case 'w':
549 laststart = b;
550 PATPUSH (wordchar);
551 break;
552
553 case 'W':
554 laststart = b;
555 PATPUSH (notwordchar);
556 break;
557
558 case '<':
559 PATPUSH (wordbeg);
560 break;
561
562 case '>':
563 PATPUSH (wordend);
564 break;
565
566 case 'b':
567 PATPUSH (wordbound);
568 break;
569
570 case 'B':
571 PATPUSH (notwordbound);
572 break;
573
574 case '`':
575 PATPUSH (begbuf);
576 break;
577
578 case '\'':
579 PATPUSH (endbuf);
580 break;
581
582 case '1':
583 case '2':
584 case '3':
585 case '4':
586 case '5':
587 case '6':
588 case '7':
589 case '8':
590 case '9':
591 c1 = c - '0';
592 if (c1 >= regnum)
593 goto normal_char;
594 for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
595 if (*stackt == c1)
596 goto normal_char;
597 laststart = b;
598 PATPUSH (duplicate);
599 PATPUSH (c1);
600 break;
601
602 case '+':
603 case '?':
604 if (obscure_syntax & RE_BK_PLUS_QM)
605 goto handle_plus;
606
607 default:
608 normal_backsl:
609 /* You might think it would be useful for \ to mean
610 not to translate; but if we don't translate it
611 it will never match anything. */
612 if (translate) c = translate[c];
613 goto normal_char;
614 }
615 break;
616
617 default:
618 normal_char:
619 if (!pending_exact || pending_exact + *pending_exact + 1 != b
620 || *pending_exact == 0177 || *p == '*' || *p == '^'
621 || ((obscure_syntax & RE_BK_PLUS_QM)
622 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
623 : (*p == '+' || *p == '?')))
624 {
625 laststart = b;
626 PATPUSH (exactn);
627 pending_exact = b;
628 PATPUSH (0);
629 }
630 PATPUSH (c);
631 (*pending_exact)++;
632 }
633 }
634
635 if (fixup_jump)
636 store_jump (fixup_jump, jump, b);
637
638 if (stackp != stackb) goto unmatched_open;
639
640 bufp->used = b - bufp->buffer;
641 return 0;
642
643 invalid_pattern:
644 return "Invalid regular expression";
645
646 unmatched_open:
647 return "Unmatched \\(";
648
649 unmatched_close:
650 return "Unmatched \\)";
651
652 end_of_pattern:
653 return "Premature end of regular expression";
654
655 nesting_too_deep:
656 return "Nesting too deep";
657
658 too_big:
659 return "Regular expression too big";
660
661 memory_exhausted:
662 return "Memory exhausted";
663}
664
665/* Store where `from' points a jump operation to jump to where `to' points.
666 `opcode' is the opcode to store. */
667
668static int
669store_jump (from, opcode, to)
670 char *from, *to;
671 char opcode;
672{
673 from[0] = opcode;
674 from[1] = (to - (from + 3)) & 0377;
675 from[2] = (to - (from + 3)) >> 8;
676}
677
678/* Open up space at char FROM, and insert there a jump to TO.
679 CURRENT_END gives te end of the storage no in use,
680 so we know how much data to copy up.
681 OP is the opcode of the jump to insert.
682
683 If you call this function, you must zero out pending_exact. */
684
685static int
686insert_jump (op, from, to, current_end)
687 char op;
688 char *from, *to, *current_end;
689{
690 register char *pto = current_end + 3;
691 register char *pfrom = current_end;
692 while (pfrom != from)
693 *--pto = *--pfrom;
694 store_jump (from, op, to);
695}
696\f
697/* Given a pattern, compute a fastmap from it.
698 The fastmap records which of the (1 << BYTEWIDTH) possible characters
699 can start a string that matches the pattern.
700 This fastmap is used by re_search to skip quickly over totally implausible text.
701
702 The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
703 as bufp->fastmap.
704 The other components of bufp describe the pattern to be used. */
705
706void
707re_compile_fastmap (bufp)
708 struct re_pattern_buffer *bufp;
709{
710 unsigned char *pattern = (unsigned char *) bufp->buffer;
711 int size = bufp->used;
712 register char *fastmap = bufp->fastmap;
713 register unsigned char *p = pattern;
714 register unsigned char *pend = pattern + size;
715 register int j, k;
716 unsigned char *translate = (unsigned char *) bufp->translate;
717
718 unsigned char *stackb[NFAILURES];
719 unsigned char **stackp = stackb;
720
721 bzero (fastmap, (1 << BYTEWIDTH));
722 bufp->fastmap_accurate = 1;
723 bufp->can_be_null = 0;
724
725 while (p)
726 {
727 if (p == pend)
728 {
729 bufp->can_be_null = 1;
730 break;
731 }
732#ifdef SWITCH_ENUM_BUG
733 switch ((int) ((enum regexpcode) *p++))
734#else
735 switch ((enum regexpcode) *p++)
736#endif
737 {
738 case exactn:
739 if (translate)
740 fastmap[translate[p[1]]] = 1;
741 else
742 fastmap[p[1]] = 1;
743 break;
744
745 case begline:
746 case before_dot:
747 case at_dot:
748 case after_dot:
749 case begbuf:
750 case endbuf:
751 case wordbound:
752 case notwordbound:
753 case wordbeg:
754 case wordend:
755 continue;
756
757 case endline:
758 if (translate)
759 fastmap[translate['\n']] = 1;
760 else
761 fastmap['\n'] = 1;
762 if (bufp->can_be_null != 1)
763 bufp->can_be_null = 2;
764 break;
765
766 case finalize_jump:
767 case maybe_finalize_jump:
768 case jump:
769 case dummy_failure_jump:
770 bufp->can_be_null = 1;
771 j = *p++ & 0377;
772 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
773 p += j + 1; /* The 1 compensates for missing ++ above */
774 if (j > 0)
775 continue;
776 /* Jump backward reached implies we just went through
777 the body of a loop and matched nothing.
778 Opcode jumped to should be an on_failure_jump.
779 Just treat it like an ordinary jump.
780 For a * loop, it has pushed its failure point already;
781 if so, discard that as redundant. */
782 if ((enum regexpcode) *p != on_failure_jump)
783 continue;
784 p++;
785 j = *p++ & 0377;
786 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
787 p += j + 1; /* The 1 compensates for missing ++ above */
788 if (stackp != stackb && *stackp == p)
789 stackp--;
790 continue;
791
792 case on_failure_jump:
793 j = *p++ & 0377;
794 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
795 p++;
796 *++stackp = p + j;
797 continue;
798
799 case start_memory:
800 case stop_memory:
801 p++;
802 continue;
803
804 case duplicate:
805 bufp->can_be_null = 1;
806 fastmap['\n'] = 1;
807 case anychar:
808 for (j = 0; j < (1 << BYTEWIDTH); j++)
809 if (j != '\n')
810 fastmap[j] = 1;
811 if (bufp->can_be_null)
812 return;
813 /* Don't return; check the alternative paths
814 so we can set can_be_null if appropriate. */
815 break;
816
817 case wordchar:
818 for (j = 0; j < (1 << BYTEWIDTH); j++)
819 if (SYNTAX (j) == Sword)
820 fastmap[j] = 1;
821 break;
822
823 case notwordchar:
824 for (j = 0; j < (1 << BYTEWIDTH); j++)
825 if (SYNTAX (j) != Sword)
826 fastmap[j] = 1;
827 break;
828
829#ifdef emacs
830 case syntaxspec:
831 k = *p++;
832 for (j = 0; j < (1 << BYTEWIDTH); j++)
833 if (SYNTAX (j) == (enum syntaxcode) k)
834 fastmap[j] = 1;
835 break;
836
837 case notsyntaxspec:
838 k = *p++;
839 for (j = 0; j < (1 << BYTEWIDTH); j++)
840 if (SYNTAX (j) != (enum syntaxcode) k)
841 fastmap[j] = 1;
842 break;
843#endif /* emacs */
844
845 case charset:
846 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
847 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
848 {
849 if (translate)
850 fastmap[translate[j]] = 1;
851 else
852 fastmap[j] = 1;
853 }
854 break;
855
856 case charset_not:
857 /* Chars beyond end of map must be allowed */
858 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
859 if (translate)
860 fastmap[translate[j]] = 1;
861 else
862 fastmap[j] = 1;
863
864 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
865 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
866 {
867 if (translate)
868 fastmap[translate[j]] = 1;
869 else
870 fastmap[j] = 1;
871 }
872 break;
873 }
874
875 /* Get here means we have successfully found the possible starting characters
876 of one path of the pattern. We need not follow this path any farther.
877 Instead, look at the next alternative remembered in the stack. */
878 if (stackp != stackb)
879 p = *stackp--;
880 else
881 break;
882 }
883}
884\f
885/* Like re_search_2, below, but only one string is specified. */
886
887int
888re_search (pbufp, string, size, startpos, range, regs)
889 struct re_pattern_buffer *pbufp;
890 char *string;
891 int size, startpos, range;
892 struct re_registers *regs;
893{
894 return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
895}
896
897/* Like re_match_2 but tries first a match starting at index STARTPOS,
898 then at STARTPOS + 1, and so on.
899 RANGE is the number of places to try before giving up.
900 If RANGE is negative, the starting positions tried are
901 STARTPOS, STARTPOS - 1, etc.
902 It is up to the caller to make sure that range is not so large
903 as to take the starting position outside of the input strings.
904
905The value returned is the position at which the match was found,
906 or -1 if no match was found,
907 or -2 if error (such as failure stack overflow). */
908
909int
910re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
911 struct re_pattern_buffer *pbufp;
912 char *string1, *string2;
913 int size1, size2;
914 int startpos;
915 register int range;
916 struct re_registers *regs;
917 int mstop;
918{
919 register char *fastmap = pbufp->fastmap;
920 register unsigned char *translate = (unsigned char *) pbufp->translate;
921 int total = size1 + size2;
922 int val;
923
924 /* Update the fastmap now if not correct already */
925 if (fastmap && !pbufp->fastmap_accurate)
926 re_compile_fastmap (pbufp);
927
928 /* Don't waste time in a long search for a pattern
929 that says it is anchored. */
930 if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
931 && range > 0)
932 {
933 if (startpos > 0)
934 return -1;
935 else
936 range = 1;
937 }
938
939 while (1)
940 {
941 /* If a fastmap is supplied, skip quickly over characters
942 that cannot possibly be the start of a match.
943 Note, however, that if the pattern can possibly match
944 the null string, we must test it at each starting point
945 so that we take the first null string we get. */
946
947 if (fastmap && startpos < total && pbufp->can_be_null != 1)
948 {
949 if (range > 0)
950 {
951 register int lim = 0;
952 register unsigned char *p;
953 int irange = range;
954 if (startpos < size1 && startpos + range >= size1)
955 lim = range - (size1 - startpos);
956
957 p = ((unsigned char *)
958 &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
959
960 if (translate)
961 {
962 while (range > lim && !fastmap[translate[*p++]])
963 range--;
964 }
965 else
966 {
967 while (range > lim && !fastmap[*p++])
968 range--;
969 }
970 startpos += irange - range;
971 }
972 else
973 {
974 register unsigned char c;
975 if (startpos >= size1)
976 c = string2[startpos - size1];
977 else
978 c = string1[startpos];
979 c &= 0xff;
980 if (translate ? !fastmap[translate[c]] : !fastmap[c])
981 goto advance;
982 }
983 }
984
985 if (range >= 0 && startpos == total
986 && fastmap && pbufp->can_be_null == 0)
987 return -1;
988
989 val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop);
990 if (0 <= val)
991 {
992 if (val == -2)
993 return -2;
994 return startpos;
995 }
996
997#ifdef C_ALLOCA
998 alloca (0);
999#endif /* C_ALLOCA */
1000
1001 advance:
1002 if (!range) break;
1003 if (range > 0) range--, startpos++; else range++, startpos--;
1004 }
1005 return -1;
1006}
1007\f
1008#ifndef emacs /* emacs never uses this */
1009int
1010re_match (pbufp, string, size, pos, regs)
1011 struct re_pattern_buffer *pbufp;
1012 char *string;
1013 int size, pos;
1014 struct re_registers *regs;
1015{
1016 return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
1017}
1018#endif /* emacs */
1019
1020/* Maximum size of failure stack. Beyond this, overflow is an error. */
1021
1022int re_max_failures = 2000;
1023
1024static int bcmp_translate();
1025/* Match the pattern described by PBUFP
1026 against data which is the virtual concatenation of STRING1 and STRING2.
1027 SIZE1 and SIZE2 are the sizes of the two data strings.
1028 Start the match at position POS.
1029 Do not consider matching past the position MSTOP.
1030
1031 If pbufp->fastmap is nonzero, then it had better be up to date.
1032
1033 The reason that the data to match are specified as two components
1034 which are to be regarded as concatenated
1035 is so this function can be used directly on the contents of an Emacs buffer.
1036
1037 -1 is returned if there is no match. -2 is returned if there is
1038 an error (such as match stack overflow). Otherwise the value is the length
1039 of the substring which was matched. */
1040
1041int
1042re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
1043 struct re_pattern_buffer *pbufp;
1044 unsigned char *string1, *string2;
1045 int size1, size2;
1046 int pos;
1047 struct re_registers *regs;
1048 int mstop;
1049{
1050 register unsigned char *p = (unsigned char *) pbufp->buffer;
1051 register unsigned char *pend = p + pbufp->used;
1052 /* End of first string */
1053 unsigned char *end1;
1054 /* End of second string */
1055 unsigned char *end2;
1056 /* Pointer just past last char to consider matching */
1057 unsigned char *end_match_1, *end_match_2;
1058 register unsigned char *d, *dend;
1059 register int mcnt;
1060 unsigned char *translate = (unsigned char *) pbufp->translate;
1061
1062 /* Failure point stack. Each place that can handle a failure further down the line
1063 pushes a failure point on this stack. It consists of two char *'s.
1064 The first one pushed is where to resume scanning the pattern;
1065 the second pushed is where to resume scanning the strings.
1066 If the latter is zero, the failure point is a "dummy".
1067 If a failure happens and the innermost failure point is dormant,
1068 it discards that failure point and tries the next one. */
1069
1070 unsigned char *initial_stack[2 * NFAILURES];
1071 unsigned char **stackb = initial_stack;
1072 unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
1073
1074 /* Information on the "contents" of registers.
1075 These are pointers into the input strings; they record
1076 just what was matched (on this attempt) by some part of the pattern.
1077 The start_memory command stores the start of a register's contents
1078 and the stop_memory command stores the end.
1079
1080 At that point, regstart[regnum] points to the first character in the register,
1081 regend[regnum] points to the first character beyond the end of the register,
1082 regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
1083 and regend_seg1[regnum] is true iff regend[regnum] points into string1. */
1084
1085 unsigned char *regstart[RE_NREGS];
1086 unsigned char *regend[RE_NREGS];
1087 unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
1088
1089 /* Set up pointers to ends of strings.
1090 Don't allow the second string to be empty unless both are empty. */
1091 if (!size2)
1092 {
1093 string2 = string1;
1094 size2 = size1;
1095 string1 = 0;
1096 size1 = 0;
1097 }
1098 end1 = string1 + size1;
1099 end2 = string2 + size2;
1100
1101 /* Compute where to stop matching, within the two strings */
1102 if (mstop <= size1)
1103 {
1104 end_match_1 = string1 + mstop;
1105 end_match_2 = string2;
1106 }
1107 else
1108 {
1109 end_match_1 = end1;
1110 end_match_2 = string2 + mstop - size1;
1111 }
1112
1113 /* Initialize \) text positions to -1
1114 to mark ones that no \( or \) has been seen for. */
1115
1116 for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++)
1117 regend[mcnt] = (unsigned char *) -1;
1118
1119 /* `p' scans through the pattern as `d' scans through the data.
1120 `dend' is the end of the input string that `d' points within.
1121 `d' is advanced into the following input string whenever necessary,
1122 but this happens before fetching;
1123 therefore, at the beginning of the loop,
1124 `d' can be pointing at the end of a string,
1125 but it cannot equal string2. */
1126
1127 if (pos <= size1)
1128 d = string1 + pos, dend = end_match_1;
1129 else
1130 d = string2 + pos - size1, dend = end_match_2;
1131
1132/* Write PREFETCH; just before fetching a character with *d. */
1133#define PREFETCH \
1134 while (d == dend) \
1135 { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
1136 d = string2; /* end of string1 => advance to string2. */ \
1137 dend = end_match_2; }
1138
1139 /* This loop loops over pattern commands.
1140 It exits by returning from the function if match is complete,
1141 or it drops through if match fails at this starting point in the input data. */
1142
1143 while (1)
1144 {
1145 if (p == pend)
1146 /* End of pattern means we have succeeded! */
1147 {
1148 /* If caller wants register contents data back, convert it to indices */
1149 if (regs)
1150 {
1151 regs->start[0] = pos;
1152 if (dend == end_match_1)
1153 regs->end[0] = d - string1;
1154 else
1155 regs->end[0] = d - string2 + size1;
1156 for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
1157 {
1158 if (regend[mcnt] == (unsigned char *) -1)
1159 {
1160 regs->start[mcnt] = -1;
1161 regs->end[mcnt] = -1;
1162 continue;
1163 }
1164 if (regstart_seg1[mcnt])
1165 regs->start[mcnt] = regstart[mcnt] - string1;
1166 else
1167 regs->start[mcnt] = regstart[mcnt] - string2 + size1;
1168 if (regend_seg1[mcnt])
1169 regs->end[mcnt] = regend[mcnt] - string1;
1170 else
1171 regs->end[mcnt] = regend[mcnt] - string2 + size1;
1172 }
1173 }
1174 if (dend == end_match_1)
1175 return (d - string1 - pos);
1176 else
1177 return d - string2 + size1 - pos;
1178 }
1179
1180 /* Otherwise match next pattern command */
1181#ifdef SWITCH_ENUM_BUG
1182 switch ((int) ((enum regexpcode) *p++))
1183#else
1184 switch ((enum regexpcode) *p++)
1185#endif
1186 {
1187
1188 /* \( is represented by a start_memory, \) by a stop_memory.
1189 Both of those commands contain a "register number" argument.
1190 The text matched within the \( and \) is recorded under that number.
1191 Then, \<digit> turns into a `duplicate' command which
1192 is followed by the numeric value of <digit> as the register number. */
1193
1194 case start_memory:
1195 regstart[*p] = d;
1196 regstart_seg1[*p++] = (dend == end_match_1);
1197 break;
1198
1199 case stop_memory:
1200 regend[*p] = d;
1201 regend_seg1[*p++] = (dend == end_match_1);
1202 break;
1203
1204 case duplicate:
1205 {
1206 int regno = *p++; /* Get which register to match against */
1207 register unsigned char *d2, *dend2;
1208
1209 d2 = regstart[regno];
1210 dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
1211 ? regend[regno] : end_match_1);
1212 while (1)
1213 {
1214 /* Advance to next segment in register contents, if necessary */
1215 while (d2 == dend2)
1216 {
1217 if (dend2 == end_match_2) break;
1218 if (dend2 == regend[regno]) break;
1219 d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
1220 }
1221 /* At end of register contents => success */
1222 if (d2 == dend2) break;
1223
1224 /* Advance to next segment in data being matched, if necessary */
1225 PREFETCH;
1226
1227 /* mcnt gets # consecutive chars to compare */
1228 mcnt = dend - d;
1229 if (mcnt > dend2 - d2)
1230 mcnt = dend2 - d2;
1231 /* Compare that many; failure if mismatch, else skip them. */
1232 if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
1233 goto fail;
1234 d += mcnt, d2 += mcnt;
1235 }
1236 }
1237 break;
1238
1239 case anychar:
1240 /* fetch a data character */
1241 PREFETCH;
1242 /* Match anything but a newline. */
1243 if ((translate ? translate[*d++] : *d++) == '\n')
1244 goto fail;
1245 break;
1246
1247 case charset:
1248 case charset_not:
1249 {
1250 /* Nonzero for charset_not */
1251 int not = 0;
1252 register int c;
1253 if (*(p - 1) == (unsigned char) charset_not)
1254 not = 1;
1255
1256 /* fetch a data character */
1257 PREFETCH;
1258
1259 if (translate)
1260 c = translate [*d];
1261 else
1262 c = *d;
1263
1264 if (c < *p * BYTEWIDTH
1265 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1266 not = !not;
1267
1268 p += 1 + *p;
1269
1270 if (!not) goto fail;
1271 d++;
1272 break;
1273 }
1274
1275 case begline:
1276 if (d == string1 || d[-1] == '\n')
1277 break;
1278 goto fail;
1279
1280 case endline:
1281 if (d == end2
1282 || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
1283 break;
1284 goto fail;
1285
1286 /* "or" constructs ("|") are handled by starting each alternative
1287 with an on_failure_jump that points to the start of the next alternative.
1288 Each alternative except the last ends with a jump to the joining point.
1289 (Actually, each jump except for the last one really jumps
1290 to the following jump, because tensioning the jumps is a hassle.) */
1291
1292 /* The start of a stupid repeat has an on_failure_jump that points
1293 past the end of the repeat text.
1294 This makes a failure point so that, on failure to match a repetition,
1295 matching restarts past as many repetitions have been found
1296 with no way to fail and look for another one. */
1297
1298 /* A smart repeat is similar but loops back to the on_failure_jump
1299 so that each repetition makes another failure point. */
1300
1301 case on_failure_jump:
1302 if (stackp == stacke)
1303 {
1304 unsigned char **stackx;
1305 if (stacke - stackb > re_max_failures * 2)
1306 return -2;
1307 stackx = (unsigned char **) alloca (2 * (stacke - stackb)
1308 * sizeof (char *));
1309 bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
1310 stackp = stackx + (stackp - stackb);
1311 stacke = stackx + 2 * (stacke - stackb);
1312 stackb = stackx;
1313 }
1314 mcnt = *p++ & 0377;
1315 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1316 p++;
1317 *stackp++ = mcnt + p;
1318 *stackp++ = d;
1319 break;
1320
1321 /* The end of a smart repeat has an maybe_finalize_jump back.
1322 Change it either to a finalize_jump or an ordinary jump. */
1323
1324 case maybe_finalize_jump:
1325 mcnt = *p++ & 0377;
1326 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1327 p++;
1328 {
1329 register unsigned char *p2 = p;
1330 /* Compare what follows with the begining of the repeat.
1331 If we can establish that there is nothing that they would
1332 both match, we can change to finalize_jump */
1333 while (p2 != pend
1334 && (*p2 == (unsigned char) stop_memory
1335 || *p2 == (unsigned char) start_memory))
1336 p2++;
1337 if (p2 == pend)
1338 p[-3] = (unsigned char) finalize_jump;
1339 else if (*p2 == (unsigned char) exactn
1340 || *p2 == (unsigned char) endline)
1341 {
1342 register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
1343 register unsigned char *p1 = p + mcnt;
1344 /* p1[0] ... p1[2] are an on_failure_jump.
1345 Examine what follows that */
1346 if (p1[3] == (unsigned char) exactn && p1[5] != c)
1347 p[-3] = (unsigned char) finalize_jump;
1348 else if (p1[3] == (unsigned char) charset
1349 || p1[3] == (unsigned char) charset_not)
1350 {
1351 int not = p1[3] == (unsigned char) charset_not;
1352 if (c < p1[4] * BYTEWIDTH
1353 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1354 not = !not;
1355 /* not is 1 if c would match */
1356 /* That means it is not safe to finalize */
1357 if (!not)
1358 p[-3] = (unsigned char) finalize_jump;
1359 }
1360 }
1361 }
1362 p -= 2;
1363 if (p[-1] != (unsigned char) finalize_jump)
1364 {
1365 p[-1] = (unsigned char) jump;
1366 goto nofinalize;
1367 }
1368
1369 /* The end of a stupid repeat has a finalize-jump
1370 back to the start, where another failure point will be made
1371 which will point after all the repetitions found so far. */
1372
1373 case finalize_jump:
1374 stackp -= 2;
1375
1376 case jump:
1377 nofinalize:
1378 mcnt = *p++ & 0377;
1379 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1380 p += mcnt + 1; /* The 1 compensates for missing ++ above */
1381 break;
1382
1383 case dummy_failure_jump:
1384 if (stackp == stacke)
1385 {
1386 unsigned char **stackx
1387 = (unsigned char **) alloca (2 * (stacke - stackb)
1388 * sizeof (char *));
1389 bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
1390 stackp = stackx + (stackp - stackb);
1391 stacke = stackx + 2 * (stacke - stackb);
1392 stackb = stackx;
1393 }
1394 *stackp++ = 0;
1395 *stackp++ = 0;
1396 goto nofinalize;
1397
1398 case wordbound:
1399 if (d == string1 /* Points to first char */
1400 || d == end2 /* Points to end */
1401 || (d == end1 && size2 == 0)) /* Points to end */
1402 break;
1403 if ((SYNTAX (d[-1]) == Sword)
1404 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1405 break;
1406 goto fail;
1407
1408 case notwordbound:
1409 if (d == string1 /* Points to first char */
1410 || d == end2 /* Points to end */
1411 || (d == end1 && size2 == 0)) /* Points to end */
1412 goto fail;
1413 if ((SYNTAX (d[-1]) == Sword)
1414 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1415 goto fail;
1416 break;
1417
1418 case wordbeg:
1419 if (d == end2 /* Points to end */
1420 || (d == end1 && size2 == 0) /* Points to end */
1421 || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
1422 goto fail;
1423 if (d == string1 /* Points to first char */
1424 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1425 break;
1426 goto fail;
1427
1428 case wordend:
1429 if (d == string1 /* Points to first char */
1430 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1431 goto fail;
1432 if (d == end2 /* Points to end */
1433 || (d == end1 && size2 == 0) /* Points to end */
1434 || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
1435 break;
1436 goto fail;
1437
1438#ifdef emacs
1439 case before_dot:
1440 if (((d - string2 <= (unsigned) size2)
1441 ? d - bf_p2 : d - bf_p1)
1442 <= point)
1443 goto fail;
1444 break;
1445
1446 case at_dot:
1447 if (((d - string2 <= (unsigned) size2)
1448 ? d - bf_p2 : d - bf_p1)
1449 == point)
1450 goto fail;
1451 break;
1452
1453 case after_dot:
1454 if (((d - string2 <= (unsigned) size2)
1455 ? d - bf_p2 : d - bf_p1)
1456 >= point)
1457 goto fail;
1458 break;
1459
1460 case wordchar:
1461 mcnt = (int) Sword;
1462 goto matchsyntax;
1463
1464 case syntaxspec:
1465 mcnt = *p++;
1466 matchsyntax:
1467 PREFETCH;
1468 if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
1469 break;
1470
1471 case notwordchar:
1472 mcnt = (int) Sword;
1473 goto matchnotsyntax;
1474
1475 case notsyntaxspec:
1476 mcnt = *p++;
1477 matchnotsyntax:
1478 PREFETCH;
1479 if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
1480 break;
1481#else
1482 case wordchar:
1483 PREFETCH;
1484 if (SYNTAX (*d++) == 0) goto fail;
1485 break;
1486
1487 case notwordchar:
1488 PREFETCH;
1489 if (SYNTAX (*d++) != 0) goto fail;
1490 break;
1491#endif /* not emacs */
1492
1493 case begbuf:
1494 if (d == string1) /* Note, d cannot equal string2 */
1495 break; /* unless string1 == string2. */
1496 goto fail;
1497
1498 case endbuf:
1499 if (d == end2 || (d == end1 && size2 == 0))
1500 break;
1501 goto fail;
1502
1503 case exactn:
1504 /* Match the next few pattern characters exactly.
1505 mcnt is how many characters to match. */
1506 mcnt = *p++;
1507 if (translate)
1508 {
1509 do
1510 {
1511 PREFETCH;
1512 if (translate[*d++] != *p++) goto fail;
1513 }
1514 while (--mcnt);
1515 }
1516 else
1517 {
1518 do
1519 {
1520 PREFETCH;
1521 if (*d++ != *p++) goto fail;
1522 }
1523 while (--mcnt);
1524 }
1525 break;
1526 }
1527 continue; /* Successfully matched one pattern command; keep matching */
1528
1529 /* Jump here if any matching operation fails. */
1530 fail:
1531 if (stackp != stackb)
1532 /* A restart point is known. Restart there and pop it. */
1533 {
1534 if (!stackp[-2])
1535 { /* If innermost failure point is dormant, flush it and keep looking */
1536 stackp -= 2;
1537 goto fail;
1538 }
1539 d = *--stackp;
1540 p = *--stackp;
1541 if (d >= string1 && d <= end1)
1542 dend = end_match_1;
1543 }
1544 else break; /* Matching at this starting point really fails! */
1545 }
1546 return -1; /* Failure to match */
1547}
1548
1549static int
1550bcmp_translate (s1, s2, len, translate)
1551 unsigned char *s1, *s2;
1552 register int len;
1553 unsigned char *translate;
1554{
1555 register unsigned char *p1 = s1, *p2 = s2;
1556 while (len)
1557 {
1558 if (translate [*p1++] != translate [*p2++]) return 1;
1559 len--;
1560 }
1561 return 0;
1562}
1563\f
1564/* Entry points compatible with bsd4.2 regex library */
1565
1566#ifndef emacs
1567
1568static struct re_pattern_buffer re_comp_buf;
1569
1570char *
1571re_comp (s)
1572 char *s;
1573{
1574 if (!s)
1575 {
1576 if (!re_comp_buf.buffer)
1577 return "No previous regular expression";
1578 return 0;
1579 }
1580
1581 if (!re_comp_buf.buffer)
1582 {
1583 if (!(re_comp_buf.buffer = (char *) malloc (200)))
1584 return "Memory exhausted";
1585 re_comp_buf.allocated = 200;
1586 if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
1587 return "Memory exhausted";
1588 }
1589 return re_compile_pattern (s, strlen (s), &re_comp_buf);
1590}
1591
1592int
1593re_exec (s)
1594 char *s;
1595{
1596 int len = strlen (s);
1597 return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
1598}
1599
1600#endif /* emacs */
1601\f
1602#ifdef test
1603
1604#include <stdio.h>
1605
1606/* Indexed by a character, gives the upper case equivalent of the character */
1607
1608static char upcase[0400] =
1609 { 000, 001, 002, 003, 004, 005, 006, 007,
1610 010, 011, 012, 013, 014, 015, 016, 017,
1611 020, 021, 022, 023, 024, 025, 026, 027,
1612 030, 031, 032, 033, 034, 035, 036, 037,
1613 040, 041, 042, 043, 044, 045, 046, 047,
1614 050, 051, 052, 053, 054, 055, 056, 057,
1615 060, 061, 062, 063, 064, 065, 066, 067,
1616 070, 071, 072, 073, 074, 075, 076, 077,
1617 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1618 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1619 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1620 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
1621 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1622 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1623 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1624 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
1625 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
1626 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
1627 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
1628 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
1629 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
1630 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1631 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
1632 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
1633 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
1634 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
1635 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
1636 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1637 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
1638 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
1639 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
1640 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
1641 };
1642
1643main (argc, argv)
1644 int argc;
1645 char **argv;
1646{
1647 char pat[80];
1648 struct re_pattern_buffer buf;
1649 int i;
1650 char c;
1651 char fastmap[(1 << BYTEWIDTH)];
1652
1653 /* Allow a command argument to specify the style of syntax. */
1654 if (argc > 1)
1655 obscure_syntax = atoi (argv[1]);
1656
1657 buf.allocated = 40;
1658 buf.buffer = (char *) malloc (buf.allocated);
1659 buf.fastmap = fastmap;
1660 buf.translate = upcase;
1661
1662 while (1)
1663 {
1664 gets (pat);
1665
1666 if (*pat)
1667 {
1668 re_compile_pattern (pat, strlen(pat), &buf);
1669
1670 for (i = 0; i < buf.used; i++)
1671 printchar (buf.buffer[i]);
1672
1673 putchar ('\n');
1674
1675 printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
1676
1677 re_compile_fastmap (&buf);
1678 printf ("Allowed by fastmap: ");
1679 for (i = 0; i < (1 << BYTEWIDTH); i++)
1680 if (fastmap[i]) printchar (i);
1681 putchar ('\n');
1682 }
1683
1684 gets (pat); /* Now read the string to match against */
1685
1686 i = re_match (&buf, pat, strlen (pat), 0, 0);
1687 printf ("Match value %d.\n", i);
1688 }
1689}
1690
1691#ifdef NOTDEF
1692print_buf (bufp)
1693 struct re_pattern_buffer *bufp;
1694{
1695 int i;
1696
1697 printf ("buf is :\n----------------\n");
1698 for (i = 0; i < bufp->used; i++)
1699 printchar (bufp->buffer[i]);
1700
1701 printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
1702
1703 printf ("Allowed by fastmap: ");
1704 for (i = 0; i < (1 << BYTEWIDTH); i++)
1705 if (bufp->fastmap[i])
1706 printchar (i);
1707 printf ("\nAllowed by translate: ");
1708 if (bufp->translate)
1709 for (i = 0; i < (1 << BYTEWIDTH); i++)
1710 if (bufp->translate[i])
1711 printchar (i);
1712 printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
1713 printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
1714}
1715#endif
1716
1717printchar (c)
1718 char c;
1719{
1720 if (c < 041 || c >= 0177)
1721 {
1722 putchar ('\\');
1723 putchar (((c >> 6) & 3) + '0');
1724 putchar (((c >> 3) & 7) + '0');
1725 putchar ((c & 7) + '0');
1726 }
1727 else
1728 putchar (c);
1729}
1730
1731error (string)
1732 char *string;
1733{
1734 puts (string);
1735 exit (1);
1736}
1737
1738#endif /* test */
This page took 0.101583 seconds and 4 git commands to generate.