sun386 host/target/native separation
[deliverable/binutils-gdb.git] / gdb / regex.c
CommitLineData
dd3b648e
RP
1/* Extended regular expression matching and search library.
2 Copyright (C) 1985, 1989 Free Software Foundation, Inc.
3
99a7de40
JG
4This program is free software; you can redistribute it and/or modify
5it under the terms of the GNU General Public License as published by
6the Free Software Foundation; either version 2 of the License, or
7(at your option) any later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
dd3b648e
RP
17
18/* To test, compile with -Dtest.
19 This Dtestable feature turns this into a self-contained program
20 which reads a pattern, describes how it compiles,
21 then reads a string and searches for it. */
22
23#ifdef emacs
24
25/* The `emacs' switch turns on certain special matching commands
26 that make sense only in emacs. */
27
28#include "config.h"
29#include "lisp.h"
30#include "buffer.h"
31#include "syntax.h"
32
33#else /* not emacs */
34
dd3b648e
RP
35#define bcopy(s,d,n) memcpy((d),(s),(n))
36#define bcmp(s1,s2,n) memcmp((s1),(s2),(n))
37#define bzero(s,n) memset((s),0,(n))
dd3b648e
RP
38
39/* Make alloca work the best possible way. */
40#ifdef __GNUC__
41#define alloca __builtin_alloca
42#else
43#ifdef sparc
44#include <alloca.h>
45#endif
46#endif
47
48/*
49 * Define the syntax stuff, so we can do the \<...\> things.
50 */
51
52#ifndef Sword /* must be non-zero in some of the tests below... */
53#define Sword 1
54#endif
55
56#define SYNTAX(c) re_syntax_table[c]
57
58#ifdef SYNTAX_TABLE
59
60char *re_syntax_table;
61
62#else
63
64static char re_syntax_table[256];
65
66static void
67init_syntax_once ()
68{
69 register int c;
70 static int done = 0;
71
72 if (done)
73 return;
74
75 bzero (re_syntax_table, sizeof re_syntax_table);
76
77 for (c = 'a'; c <= 'z'; c++)
78 re_syntax_table[c] = Sword;
79
80 for (c = 'A'; c <= 'Z'; c++)
81 re_syntax_table[c] = Sword;
82
83 for (c = '0'; c <= '9'; c++)
84 re_syntax_table[c] = Sword;
85
86 done = 1;
87}
88
89#endif /* SYNTAX_TABLE */
90#endif /* not emacs */
91
92#include "regex.h"
93
94/* Number of failure points to allocate space for initially,
95 when matching. If this number is exceeded, more space is allocated,
96 so it is not a hard limit. */
97
98#ifndef NFAILURES
99#define NFAILURES 80
100#endif /* NFAILURES */
101
102/* width of a byte in bits */
103
104#define BYTEWIDTH 8
105
106#ifndef SIGN_EXTEND_CHAR
107#define SIGN_EXTEND_CHAR(x) (x)
108#endif
109\f
110static int obscure_syntax = 0;
111
112/* Specify the precise syntax of regexp for compilation.
113 This provides for compatibility for various utilities
114 which historically have different, incompatible syntaxes.
115
116 The argument SYNTAX is a bit-mask containing the two bits
117 RE_NO_BK_PARENS and RE_NO_BK_VBAR. */
118
119int
120re_set_syntax (syntax)
51b57ded 121 int syntax;
dd3b648e
RP
122{
123 int ret;
124
125 ret = obscure_syntax;
126 obscure_syntax = syntax;
127 return ret;
128}
129\f
130/* re_compile_pattern takes a regular-expression string
131 and converts it into a buffer full of byte commands for matching.
132
133 PATTERN is the address of the pattern string
134 SIZE is the length of it.
135 BUFP is a struct re_pattern_buffer * which points to the info
136 on where to store the byte commands.
137 This structure contains a char * which points to the
138 actual space, which should have been obtained with malloc.
139 re_compile_pattern may use realloc to grow the buffer space.
140
141 The number of bytes of commands can be found out by looking in
142 the struct re_pattern_buffer that bufp pointed to,
143 after re_compile_pattern returns.
144*/
145
146#define PATPUSH(ch) (*b++ = (char) (ch))
147
148#define PATFETCH(c) \
149 {if (p == pend) goto end_of_pattern; \
150 c = * (unsigned char *) p++; \
151 if (translate) c = translate[c]; }
152
153#define PATFETCH_RAW(c) \
154 {if (p == pend) goto end_of_pattern; \
155 c = * (unsigned char *) p++; }
156
157#define PATUNFETCH p--
158
159#define EXTEND_BUFFER \
160 { char *old_buffer = bufp->buffer; \
161 if (bufp->allocated == (1<<16)) goto too_big; \
162 bufp->allocated *= 2; \
163 if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
164 if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
165 goto memory_exhausted; \
166 c = bufp->buffer - old_buffer; \
167 b += c; \
168 if (fixup_jump) \
169 fixup_jump += c; \
170 if (laststart) \
171 laststart += c; \
172 begalt += c; \
173 if (pending_exact) \
174 pending_exact += c; \
175 }
176
50e0dc41 177static void store_jump (), insert_jump ();
dd3b648e
RP
178
179char *
180re_compile_pattern (pattern, size, bufp)
181 char *pattern;
182 int size;
183 struct re_pattern_buffer *bufp;
184{
185 register char *b = bufp->buffer;
186 register char *p = pattern;
187 char *pend = pattern + size;
188 register unsigned c, c1;
189 char *p1;
190 unsigned char *translate = (unsigned char *) bufp->translate;
191
192 /* address of the count-byte of the most recently inserted "exactn" command.
193 This makes it possible to tell whether a new exact-match character
194 can be added to that command or requires a new "exactn" command. */
195
196 char *pending_exact = 0;
197
198 /* address of the place where a forward-jump should go
199 to the end of the containing expression.
200 Each alternative of an "or", except the last, ends with a forward-jump
201 of this sort. */
202
203 char *fixup_jump = 0;
204
205 /* address of start of the most recently finished expression.
206 This tells postfix * where to find the start of its operand. */
207
208 char *laststart = 0;
209
210 /* In processing a repeat, 1 means zero matches is allowed */
211
212 char zero_times_ok;
213
214 /* In processing a repeat, 1 means many matches is allowed */
215
216 char many_times_ok;
217
218 /* address of beginning of regexp, or inside of last \( */
219
220 char *begalt = b;
221
222 /* Stack of information saved by \( and restored by \).
223 Four stack elements are pushed by each \(:
224 First, the value of b.
225 Second, the value of fixup_jump.
226 Third, the value of regnum.
227 Fourth, the value of begalt. */
228
229 int stackb[40];
230 int *stackp = stackb;
231 int *stacke = stackb + 40;
232 int *stackt;
233
234 /* Counts \('s as they are encountered. Remembered for the matching \),
235 where it becomes the "register number" to put in the stop_memory command */
236
237 int regnum = 1;
238
239 bufp->fastmap_accurate = 0;
240
241#ifndef emacs
242#ifndef SYNTAX_TABLE
243 /*
244 * Initialize the syntax table.
245 */
246 init_syntax_once();
247#endif
248#endif
249
250 if (bufp->allocated == 0)
251 {
252 bufp->allocated = 28;
253 if (bufp->buffer)
254 /* EXTEND_BUFFER loses when bufp->allocated is 0 */
255 bufp->buffer = (char *) realloc (bufp->buffer, 28);
256 else
257 /* Caller did not allocate a buffer. Do it for him */
258 bufp->buffer = (char *) malloc (28);
259 if (!bufp->buffer) goto memory_exhausted;
260 begalt = b = bufp->buffer;
261 }
262
263 while (p != pend)
264 {
265 if (b - bufp->buffer > bufp->allocated - 10)
266 /* Note that EXTEND_BUFFER clobbers c */
267 EXTEND_BUFFER;
268
269 PATFETCH (c);
270
271 switch (c)
272 {
273 case '$':
274 if (obscure_syntax & RE_TIGHT_VBAR)
275 {
276 if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
277 goto normal_char;
278 /* Make operand of last vbar end before this `$'. */
279 if (fixup_jump)
280 store_jump (fixup_jump, jump, b);
281 fixup_jump = 0;
282 PATPUSH (endline);
283 break;
284 }
285
286 /* $ means succeed if at end of line, but only in special contexts.
287 If randomly in the middle of a pattern, it is a normal character. */
288 if (p == pend || *p == '\n'
289 || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
290 || (obscure_syntax & RE_NO_BK_PARENS
291 ? *p == ')'
292 : *p == '\\' && p[1] == ')')
293 || (obscure_syntax & RE_NO_BK_VBAR
294 ? *p == '|'
295 : *p == '\\' && p[1] == '|'))
296 {
297 PATPUSH (endline);
298 break;
299 }
300 goto normal_char;
301
302 case '^':
303 /* ^ means succeed if at beg of line, but only if no preceding pattern. */
304
305 if (laststart && p[-2] != '\n'
306 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
307 goto normal_char;
308 if (obscure_syntax & RE_TIGHT_VBAR)
309 {
310 if (p != pattern + 1
311 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
312 goto normal_char;
313 PATPUSH (begline);
314 begalt = b;
315 }
316 else
317 PATPUSH (begline);
318 break;
319
320 case '+':
321 case '?':
322 if (obscure_syntax & RE_BK_PLUS_QM)
323 goto normal_char;
324 handle_plus:
325 case '*':
326 /* If there is no previous pattern, char not special. */
327 if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
328 goto normal_char;
329 /* If there is a sequence of repetition chars,
330 collapse it down to equivalent to just one. */
331 zero_times_ok = 0;
332 many_times_ok = 0;
333 while (1)
334 {
335 zero_times_ok |= c != '+';
336 many_times_ok |= c != '?';
337 if (p == pend)
338 break;
339 PATFETCH (c);
340 if (c == '*')
341 ;
342 else if (!(obscure_syntax & RE_BK_PLUS_QM)
343 && (c == '+' || c == '?'))
344 ;
345 else if ((obscure_syntax & RE_BK_PLUS_QM)
346 && c == '\\')
347 {
348 int c1;
349 PATFETCH (c1);
350 if (!(c1 == '+' || c1 == '?'))
351 {
352 PATUNFETCH;
353 PATUNFETCH;
354 break;
355 }
356 c = c1;
357 }
358 else
359 {
360 PATUNFETCH;
361 break;
362 }
363 }
364
365 /* Star, etc. applied to an empty pattern is equivalent
366 to an empty pattern. */
367 if (!laststart)
368 break;
369
370 /* Now we know whether 0 matches is allowed,
371 and whether 2 or more matches is allowed. */
372 if (many_times_ok)
373 {
374 /* If more than one repetition is allowed,
375 put in a backward jump at the end. */
376 store_jump (b, maybe_finalize_jump, laststart - 3);
377 b += 3;
378 }
379 insert_jump (on_failure_jump, laststart, b + 3, b);
380 pending_exact = 0;
381 b += 3;
382 if (!zero_times_ok)
383 {
384 /* At least one repetition required: insert before the loop
385 a skip over the initial on-failure-jump instruction */
386 insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
387 b += 3;
388 }
389 break;
390
391 case '.':
392 laststart = b;
393 PATPUSH (anychar);
394 break;
395
396 case '[':
397 while (b - bufp->buffer
398 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
399 /* Note that EXTEND_BUFFER clobbers c */
400 EXTEND_BUFFER;
401
402 laststart = b;
403 if (*p == '^')
404 PATPUSH (charset_not), p++;
405 else
406 PATPUSH (charset);
407 p1 = p;
408
409 PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
410 /* Clear the whole map */
411 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
412 /* Read in characters and ranges, setting map bits */
413 while (1)
414 {
415 PATFETCH (c);
416 if (c == ']' && p != p1 + 1) break;
417 if (*p == '-' && p[1] != ']')
418 {
419 PATFETCH (c1);
420 PATFETCH (c1);
421 while (c <= c1)
422 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
423 }
424 else
425 {
426 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
427 }
428 }
429 /* Discard any bitmap bytes that are all 0 at the end of the map.
430 Decrement the map-length byte too. */
431 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
432 b[-1]--;
433 b += b[-1];
434 break;
435
436 case '(':
437 if (! (obscure_syntax & RE_NO_BK_PARENS))
438 goto normal_char;
439 else
440 goto handle_open;
441
442 case ')':
443 if (! (obscure_syntax & RE_NO_BK_PARENS))
444 goto normal_char;
445 else
446 goto handle_close;
447
448 case '\n':
449 if (! (obscure_syntax & RE_NEWLINE_OR))
450 goto normal_char;
451 else
452 goto handle_bar;
453
454 case '|':
455 if (! (obscure_syntax & RE_NO_BK_VBAR))
456 goto normal_char;
457 else
458 goto handle_bar;
459
460 case '\\':
461 if (p == pend) goto invalid_pattern;
462 PATFETCH_RAW (c);
463 switch (c)
464 {
465 case '(':
466 if (obscure_syntax & RE_NO_BK_PARENS)
467 goto normal_backsl;
468 handle_open:
469 if (stackp == stacke) goto nesting_too_deep;
470 if (regnum < RE_NREGS)
471 {
472 PATPUSH (start_memory);
473 PATPUSH (regnum);
474 }
475 *stackp++ = b - bufp->buffer;
476 *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
477 *stackp++ = regnum++;
478 *stackp++ = begalt - bufp->buffer;
479 fixup_jump = 0;
480 laststart = 0;
481 begalt = b;
482 break;
483
484 case ')':
485 if (obscure_syntax & RE_NO_BK_PARENS)
486 goto normal_backsl;
487 handle_close:
488 if (stackp == stackb) goto unmatched_close;
489 begalt = *--stackp + bufp->buffer;
490 if (fixup_jump)
491 store_jump (fixup_jump, jump, b);
492 if (stackp[-1] < RE_NREGS)
493 {
494 PATPUSH (stop_memory);
495 PATPUSH (stackp[-1]);
496 }
497 stackp -= 2;
498 fixup_jump = 0;
499 if (*stackp)
500 fixup_jump = *stackp + bufp->buffer - 1;
501 laststart = *--stackp + bufp->buffer;
502 break;
503
504 case '|':
505 if (obscure_syntax & RE_NO_BK_VBAR)
506 goto normal_backsl;
507 handle_bar:
508 insert_jump (on_failure_jump, begalt, b + 6, b);
509 pending_exact = 0;
510 b += 3;
511 if (fixup_jump)
512 store_jump (fixup_jump, jump, b);
513 fixup_jump = b;
514 b += 3;
515 laststart = 0;
516 begalt = b;
517 break;
518
519#ifdef emacs
520 case '=':
521 PATPUSH (at_dot);
522 break;
523
524 case 's':
525 laststart = b;
526 PATPUSH (syntaxspec);
527 PATFETCH (c);
528 PATPUSH (syntax_spec_code[c]);
529 break;
530
531 case 'S':
532 laststart = b;
533 PATPUSH (notsyntaxspec);
534 PATFETCH (c);
535 PATPUSH (syntax_spec_code[c]);
536 break;
537#endif /* emacs */
538
539 case 'w':
540 laststart = b;
541 PATPUSH (wordchar);
542 break;
543
544 case 'W':
545 laststart = b;
546 PATPUSH (notwordchar);
547 break;
548
549 case '<':
550 PATPUSH (wordbeg);
551 break;
552
553 case '>':
554 PATPUSH (wordend);
555 break;
556
557 case 'b':
558 PATPUSH (wordbound);
559 break;
560
561 case 'B':
562 PATPUSH (notwordbound);
563 break;
564
565 case '`':
566 PATPUSH (begbuf);
567 break;
568
569 case '\'':
570 PATPUSH (endbuf);
571 break;
572
573 case '1':
574 case '2':
575 case '3':
576 case '4':
577 case '5':
578 case '6':
579 case '7':
580 case '8':
581 case '9':
582 c1 = c - '0';
583 if (c1 >= regnum)
584 goto normal_char;
585 for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
586 if (*stackt == c1)
587 goto normal_char;
588 laststart = b;
589 PATPUSH (duplicate);
590 PATPUSH (c1);
591 break;
592
593 case '+':
594 case '?':
595 if (obscure_syntax & RE_BK_PLUS_QM)
596 goto handle_plus;
597
598 default:
599 normal_backsl:
600 /* You might think it would be useful for \ to mean
601 not to translate; but if we don't translate it
602 it will never match anything. */
603 if (translate) c = translate[c];
604 goto normal_char;
605 }
606 break;
607
608 default:
609 normal_char:
610 if (!pending_exact || pending_exact + *pending_exact + 1 != b
611 || *pending_exact == 0177 || *p == '*' || *p == '^'
612 || ((obscure_syntax & RE_BK_PLUS_QM)
613 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
614 : (*p == '+' || *p == '?')))
615 {
616 laststart = b;
617 PATPUSH (exactn);
618 pending_exact = b;
619 PATPUSH (0);
620 }
621 PATPUSH (c);
622 (*pending_exact)++;
623 }
624 }
625
626 if (fixup_jump)
627 store_jump (fixup_jump, jump, b);
628
629 if (stackp != stackb) goto unmatched_open;
630
631 bufp->used = b - bufp->buffer;
632 return 0;
633
634 invalid_pattern:
635 return "Invalid regular expression";
636
637 unmatched_open:
638 return "Unmatched \\(";
639
640 unmatched_close:
641 return "Unmatched \\)";
642
643 end_of_pattern:
644 return "Premature end of regular expression";
645
646 nesting_too_deep:
647 return "Nesting too deep";
648
649 too_big:
650 return "Regular expression too big";
651
652 memory_exhausted:
653 return "Memory exhausted";
654}
655
656/* Store where `from' points a jump operation to jump to where `to' points.
657 `opcode' is the opcode to store. */
658
50e0dc41 659static void
dd3b648e
RP
660store_jump (from, opcode, to)
661 char *from, *to;
662 char opcode;
663{
664 from[0] = opcode;
665 from[1] = (to - (from + 3)) & 0377;
666 from[2] = (to - (from + 3)) >> 8;
667}
668
669/* Open up space at char FROM, and insert there a jump to TO.
670 CURRENT_END gives te end of the storage no in use,
671 so we know how much data to copy up.
672 OP is the opcode of the jump to insert.
673
674 If you call this function, you must zero out pending_exact. */
675
50e0dc41 676static void
dd3b648e
RP
677insert_jump (op, from, to, current_end)
678 char op;
679 char *from, *to, *current_end;
680{
681 register char *pto = current_end + 3;
682 register char *pfrom = current_end;
683 while (pfrom != from)
684 *--pto = *--pfrom;
685 store_jump (from, op, to);
686}
687\f
688/* Given a pattern, compute a fastmap from it.
689 The fastmap records which of the (1 << BYTEWIDTH) possible characters
690 can start a string that matches the pattern.
691 This fastmap is used by re_search to skip quickly over totally implausible text.
692
693 The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
694 as bufp->fastmap.
695 The other components of bufp describe the pattern to be used. */
696
697void
698re_compile_fastmap (bufp)
699 struct re_pattern_buffer *bufp;
700{
701 unsigned char *pattern = (unsigned char *) bufp->buffer;
702 int size = bufp->used;
703 register char *fastmap = bufp->fastmap;
704 register unsigned char *p = pattern;
705 register unsigned char *pend = pattern + size;
51b57ded 706 register int j;
dd3b648e
RP
707 unsigned char *translate = (unsigned char *) bufp->translate;
708
709 unsigned char *stackb[NFAILURES];
710 unsigned char **stackp = stackb;
711
712 bzero (fastmap, (1 << BYTEWIDTH));
713 bufp->fastmap_accurate = 1;
714 bufp->can_be_null = 0;
715
716 while (p)
717 {
718 if (p == pend)
719 {
720 bufp->can_be_null = 1;
721 break;
722 }
723#ifdef SWITCH_ENUM_BUG
724 switch ((int) ((enum regexpcode) *p++))
725#else
726 switch ((enum regexpcode) *p++)
727#endif
728 {
729 case exactn:
730 if (translate)
731 fastmap[translate[p[1]]] = 1;
732 else
733 fastmap[p[1]] = 1;
734 break;
735
736 case begline:
737 case before_dot:
738 case at_dot:
739 case after_dot:
740 case begbuf:
741 case endbuf:
742 case wordbound:
743 case notwordbound:
744 case wordbeg:
745 case wordend:
746 continue;
747
748 case endline:
749 if (translate)
750 fastmap[translate['\n']] = 1;
751 else
752 fastmap['\n'] = 1;
753 if (bufp->can_be_null != 1)
754 bufp->can_be_null = 2;
755 break;
756
757 case finalize_jump:
758 case maybe_finalize_jump:
759 case jump:
760 case dummy_failure_jump:
761 bufp->can_be_null = 1;
762 j = *p++ & 0377;
763 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
764 p += j + 1; /* The 1 compensates for missing ++ above */
765 if (j > 0)
766 continue;
767 /* Jump backward reached implies we just went through
768 the body of a loop and matched nothing.
769 Opcode jumped to should be an on_failure_jump.
770 Just treat it like an ordinary jump.
771 For a * loop, it has pushed its failure point already;
772 if so, discard that as redundant. */
773 if ((enum regexpcode) *p != on_failure_jump)
774 continue;
775 p++;
776 j = *p++ & 0377;
777 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
778 p += j + 1; /* The 1 compensates for missing ++ above */
779 if (stackp != stackb && *stackp == p)
780 stackp--;
781 continue;
782
783 case on_failure_jump:
784 j = *p++ & 0377;
785 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
786 p++;
787 *++stackp = p + j;
788 continue;
789
790 case start_memory:
791 case stop_memory:
792 p++;
793 continue;
794
795 case duplicate:
796 bufp->can_be_null = 1;
797 fastmap['\n'] = 1;
798 case anychar:
799 for (j = 0; j < (1 << BYTEWIDTH); j++)
800 if (j != '\n')
801 fastmap[j] = 1;
802 if (bufp->can_be_null)
803 return;
804 /* Don't return; check the alternative paths
805 so we can set can_be_null if appropriate. */
806 break;
807
808 case wordchar:
809 for (j = 0; j < (1 << BYTEWIDTH); j++)
810 if (SYNTAX (j) == Sword)
811 fastmap[j] = 1;
812 break;
813
814 case notwordchar:
815 for (j = 0; j < (1 << BYTEWIDTH); j++)
816 if (SYNTAX (j) != Sword)
817 fastmap[j] = 1;
818 break;
819
820#ifdef emacs
821 case syntaxspec:
822 k = *p++;
823 for (j = 0; j < (1 << BYTEWIDTH); j++)
824 if (SYNTAX (j) == (enum syntaxcode) k)
825 fastmap[j] = 1;
826 break;
827
828 case notsyntaxspec:
829 k = *p++;
830 for (j = 0; j < (1 << BYTEWIDTH); j++)
831 if (SYNTAX (j) != (enum syntaxcode) k)
832 fastmap[j] = 1;
833 break;
834#endif /* emacs */
835
836 case charset:
837 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
838 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
839 {
840 if (translate)
841 fastmap[translate[j]] = 1;
842 else
843 fastmap[j] = 1;
844 }
845 break;
846
847 case charset_not:
848 /* Chars beyond end of map must be allowed */
849 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
850 if (translate)
851 fastmap[translate[j]] = 1;
852 else
853 fastmap[j] = 1;
854
855 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
856 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
857 {
858 if (translate)
859 fastmap[translate[j]] = 1;
860 else
861 fastmap[j] = 1;
862 }
863 break;
864 }
865
866 /* Get here means we have successfully found the possible starting characters
867 of one path of the pattern. We need not follow this path any farther.
868 Instead, look at the next alternative remembered in the stack. */
869 if (stackp != stackb)
870 p = *stackp--;
871 else
872 break;
873 }
874}
875\f
876/* Like re_search_2, below, but only one string is specified. */
877
878int
879re_search (pbufp, string, size, startpos, range, regs)
880 struct re_pattern_buffer *pbufp;
881 char *string;
882 int size, startpos, range;
883 struct re_registers *regs;
884{
885 return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
886}
887
888/* Like re_match_2 but tries first a match starting at index STARTPOS,
889 then at STARTPOS + 1, and so on.
890 RANGE is the number of places to try before giving up.
891 If RANGE is negative, the starting positions tried are
892 STARTPOS, STARTPOS - 1, etc.
893 It is up to the caller to make sure that range is not so large
894 as to take the starting position outside of the input strings.
895
896The value returned is the position at which the match was found,
897 or -1 if no match was found,
898 or -2 if error (such as failure stack overflow). */
899
900int
901re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
902 struct re_pattern_buffer *pbufp;
903 char *string1, *string2;
904 int size1, size2;
905 int startpos;
906 register int range;
907 struct re_registers *regs;
908 int mstop;
909{
910 register char *fastmap = pbufp->fastmap;
911 register unsigned char *translate = (unsigned char *) pbufp->translate;
912 int total = size1 + size2;
913 int val;
914
915 /* Update the fastmap now if not correct already */
916 if (fastmap && !pbufp->fastmap_accurate)
917 re_compile_fastmap (pbufp);
918
919 /* Don't waste time in a long search for a pattern
920 that says it is anchored. */
921 if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
922 && range > 0)
923 {
924 if (startpos > 0)
925 return -1;
926 else
927 range = 1;
928 }
929
930 while (1)
931 {
932 /* If a fastmap is supplied, skip quickly over characters
933 that cannot possibly be the start of a match.
934 Note, however, that if the pattern can possibly match
935 the null string, we must test it at each starting point
936 so that we take the first null string we get. */
937
938 if (fastmap && startpos < total && pbufp->can_be_null != 1)
939 {
940 if (range > 0)
941 {
942 register int lim = 0;
943 register unsigned char *p;
944 int irange = range;
945 if (startpos < size1 && startpos + range >= size1)
946 lim = range - (size1 - startpos);
947
948 p = ((unsigned char *)
949 &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
950
951 if (translate)
952 {
953 while (range > lim && !fastmap[translate[*p++]])
954 range--;
955 }
956 else
957 {
958 while (range > lim && !fastmap[*p++])
959 range--;
960 }
961 startpos += irange - range;
962 }
963 else
964 {
965 register unsigned char c;
966 if (startpos >= size1)
967 c = string2[startpos - size1];
968 else
969 c = string1[startpos];
970 c &= 0xff;
971 if (translate ? !fastmap[translate[c]] : !fastmap[c])
972 goto advance;
973 }
974 }
975
976 if (range >= 0 && startpos == total
977 && fastmap && pbufp->can_be_null == 0)
978 return -1;
979
980 val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop);
981 if (0 <= val)
982 {
983 if (val == -2)
984 return -2;
985 return startpos;
986 }
987
988#ifdef C_ALLOCA
989 alloca (0);
990#endif /* C_ALLOCA */
991
992 advance:
993 if (!range) break;
994 if (range > 0) range--, startpos++; else range++, startpos--;
995 }
996 return -1;
997}
998\f
999#ifndef emacs /* emacs never uses this */
1000int
1001re_match (pbufp, string, size, pos, regs)
1002 struct re_pattern_buffer *pbufp;
1003 char *string;
1004 int size, pos;
1005 struct re_registers *regs;
1006{
1007 return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
1008}
1009#endif /* emacs */
1010
1011/* Maximum size of failure stack. Beyond this, overflow is an error. */
1012
1013int re_max_failures = 2000;
1014
1015static int bcmp_translate();
1016/* Match the pattern described by PBUFP
1017 against data which is the virtual concatenation of STRING1 and STRING2.
1018 SIZE1 and SIZE2 are the sizes of the two data strings.
1019 Start the match at position POS.
1020 Do not consider matching past the position MSTOP.
1021
1022 If pbufp->fastmap is nonzero, then it had better be up to date.
1023
1024 The reason that the data to match are specified as two components
1025 which are to be regarded as concatenated
1026 is so this function can be used directly on the contents of an Emacs buffer.
1027
1028 -1 is returned if there is no match. -2 is returned if there is
1029 an error (such as match stack overflow). Otherwise the value is the length
1030 of the substring which was matched. */
1031
1032int
1033re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
1034 struct re_pattern_buffer *pbufp;
1035 unsigned char *string1, *string2;
1036 int size1, size2;
1037 int pos;
1038 struct re_registers *regs;
1039 int mstop;
1040{
1041 register unsigned char *p = (unsigned char *) pbufp->buffer;
1042 register unsigned char *pend = p + pbufp->used;
1043 /* End of first string */
1044 unsigned char *end1;
1045 /* End of second string */
1046 unsigned char *end2;
1047 /* Pointer just past last char to consider matching */
1048 unsigned char *end_match_1, *end_match_2;
1049 register unsigned char *d, *dend;
1050 register int mcnt;
1051 unsigned char *translate = (unsigned char *) pbufp->translate;
1052
1053 /* Failure point stack. Each place that can handle a failure further down the line
1054 pushes a failure point on this stack. It consists of two char *'s.
1055 The first one pushed is where to resume scanning the pattern;
1056 the second pushed is where to resume scanning the strings.
1057 If the latter is zero, the failure point is a "dummy".
1058 If a failure happens and the innermost failure point is dormant,
1059 it discards that failure point and tries the next one. */
1060
1061 unsigned char *initial_stack[2 * NFAILURES];
1062 unsigned char **stackb = initial_stack;
1063 unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
1064
1065 /* Information on the "contents" of registers.
1066 These are pointers into the input strings; they record
1067 just what was matched (on this attempt) by some part of the pattern.
1068 The start_memory command stores the start of a register's contents
1069 and the stop_memory command stores the end.
1070
1071 At that point, regstart[regnum] points to the first character in the register,
1072 regend[regnum] points to the first character beyond the end of the register,
1073 regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
1074 and regend_seg1[regnum] is true iff regend[regnum] points into string1. */
1075
1076 unsigned char *regstart[RE_NREGS];
1077 unsigned char *regend[RE_NREGS];
1078 unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
1079
1080 /* Set up pointers to ends of strings.
1081 Don't allow the second string to be empty unless both are empty. */
1082 if (!size2)
1083 {
1084 string2 = string1;
1085 size2 = size1;
1086 string1 = 0;
1087 size1 = 0;
1088 }
1089 end1 = string1 + size1;
1090 end2 = string2 + size2;
1091
1092 /* Compute where to stop matching, within the two strings */
1093 if (mstop <= size1)
1094 {
1095 end_match_1 = string1 + mstop;
1096 end_match_2 = string2;
1097 }
1098 else
1099 {
1100 end_match_1 = end1;
1101 end_match_2 = string2 + mstop - size1;
1102 }
1103
1104 /* Initialize \) text positions to -1
1105 to mark ones that no \( or \) has been seen for. */
1106
1107 for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++)
1108 regend[mcnt] = (unsigned char *) -1;
1109
1110 /* `p' scans through the pattern as `d' scans through the data.
1111 `dend' is the end of the input string that `d' points within.
1112 `d' is advanced into the following input string whenever necessary,
1113 but this happens before fetching;
1114 therefore, at the beginning of the loop,
1115 `d' can be pointing at the end of a string,
1116 but it cannot equal string2. */
1117
1118 if (pos <= size1)
1119 d = string1 + pos, dend = end_match_1;
1120 else
1121 d = string2 + pos - size1, dend = end_match_2;
1122
1123/* Write PREFETCH; just before fetching a character with *d. */
1124#define PREFETCH \
1125 while (d == dend) \
1126 { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
1127 d = string2; /* end of string1 => advance to string2. */ \
1128 dend = end_match_2; }
1129
1130 /* This loop loops over pattern commands.
1131 It exits by returning from the function if match is complete,
1132 or it drops through if match fails at this starting point in the input data. */
1133
1134 while (1)
1135 {
1136 if (p == pend)
1137 /* End of pattern means we have succeeded! */
1138 {
1139 /* If caller wants register contents data back, convert it to indices */
1140 if (regs)
1141 {
1142 regs->start[0] = pos;
1143 if (dend == end_match_1)
1144 regs->end[0] = d - string1;
1145 else
1146 regs->end[0] = d - string2 + size1;
1147 for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
1148 {
1149 if (regend[mcnt] == (unsigned char *) -1)
1150 {
1151 regs->start[mcnt] = -1;
1152 regs->end[mcnt] = -1;
1153 continue;
1154 }
1155 if (regstart_seg1[mcnt])
1156 regs->start[mcnt] = regstart[mcnt] - string1;
1157 else
1158 regs->start[mcnt] = regstart[mcnt] - string2 + size1;
1159 if (regend_seg1[mcnt])
1160 regs->end[mcnt] = regend[mcnt] - string1;
1161 else
1162 regs->end[mcnt] = regend[mcnt] - string2 + size1;
1163 }
1164 }
1165 if (dend == end_match_1)
1166 return (d - string1 - pos);
1167 else
1168 return d - string2 + size1 - pos;
1169 }
1170
1171 /* Otherwise match next pattern command */
1172#ifdef SWITCH_ENUM_BUG
1173 switch ((int) ((enum regexpcode) *p++))
1174#else
1175 switch ((enum regexpcode) *p++)
1176#endif
1177 {
1178
1179 /* \( is represented by a start_memory, \) by a stop_memory.
1180 Both of those commands contain a "register number" argument.
1181 The text matched within the \( and \) is recorded under that number.
1182 Then, \<digit> turns into a `duplicate' command which
1183 is followed by the numeric value of <digit> as the register number. */
1184
1185 case start_memory:
1186 regstart[*p] = d;
1187 regstart_seg1[*p++] = (dend == end_match_1);
1188 break;
1189
1190 case stop_memory:
1191 regend[*p] = d;
1192 regend_seg1[*p++] = (dend == end_match_1);
1193 break;
1194
1195 case duplicate:
1196 {
1197 int regno = *p++; /* Get which register to match against */
1198 register unsigned char *d2, *dend2;
1199
1200 d2 = regstart[regno];
1201 dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
1202 ? regend[regno] : end_match_1);
1203 while (1)
1204 {
1205 /* Advance to next segment in register contents, if necessary */
1206 while (d2 == dend2)
1207 {
1208 if (dend2 == end_match_2) break;
1209 if (dend2 == regend[regno]) break;
1210 d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
1211 }
1212 /* At end of register contents => success */
1213 if (d2 == dend2) break;
1214
1215 /* Advance to next segment in data being matched, if necessary */
1216 PREFETCH;
1217
1218 /* mcnt gets # consecutive chars to compare */
1219 mcnt = dend - d;
1220 if (mcnt > dend2 - d2)
1221 mcnt = dend2 - d2;
1222 /* Compare that many; failure if mismatch, else skip them. */
1223 if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
1224 goto fail;
1225 d += mcnt, d2 += mcnt;
1226 }
1227 }
1228 break;
1229
1230 case anychar:
1231 /* fetch a data character */
1232 PREFETCH;
1233 /* Match anything but a newline. */
1234 if ((translate ? translate[*d++] : *d++) == '\n')
1235 goto fail;
1236 break;
1237
1238 case charset:
1239 case charset_not:
1240 {
1241 /* Nonzero for charset_not */
1242 int not = 0;
1243 register int c;
1244 if (*(p - 1) == (unsigned char) charset_not)
1245 not = 1;
1246
1247 /* fetch a data character */
1248 PREFETCH;
1249
1250 if (translate)
1251 c = translate [*d];
1252 else
1253 c = *d;
1254
1255 if (c < *p * BYTEWIDTH
1256 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1257 not = !not;
1258
1259 p += 1 + *p;
1260
1261 if (!not) goto fail;
1262 d++;
1263 break;
1264 }
1265
1266 case begline:
1267 if (d == string1 || d[-1] == '\n')
1268 break;
1269 goto fail;
1270
1271 case endline:
1272 if (d == end2
1273 || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
1274 break;
1275 goto fail;
1276
1277 /* "or" constructs ("|") are handled by starting each alternative
1278 with an on_failure_jump that points to the start of the next alternative.
1279 Each alternative except the last ends with a jump to the joining point.
1280 (Actually, each jump except for the last one really jumps
1281 to the following jump, because tensioning the jumps is a hassle.) */
1282
1283 /* The start of a stupid repeat has an on_failure_jump that points
1284 past the end of the repeat text.
1285 This makes a failure point so that, on failure to match a repetition,
1286 matching restarts past as many repetitions have been found
1287 with no way to fail and look for another one. */
1288
1289 /* A smart repeat is similar but loops back to the on_failure_jump
1290 so that each repetition makes another failure point. */
1291
1292 case on_failure_jump:
1293 if (stackp == stacke)
1294 {
1295 unsigned char **stackx;
1296 if (stacke - stackb > re_max_failures * 2)
1297 return -2;
1298 stackx = (unsigned char **) alloca (2 * (stacke - stackb)
1299 * sizeof (char *));
1300 bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
1301 stackp = stackx + (stackp - stackb);
1302 stacke = stackx + 2 * (stacke - stackb);
1303 stackb = stackx;
1304 }
1305 mcnt = *p++ & 0377;
1306 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1307 p++;
1308 *stackp++ = mcnt + p;
1309 *stackp++ = d;
1310 break;
1311
1312 /* The end of a smart repeat has an maybe_finalize_jump back.
1313 Change it either to a finalize_jump or an ordinary jump. */
1314
1315 case maybe_finalize_jump:
1316 mcnt = *p++ & 0377;
1317 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1318 p++;
1319 {
1320 register unsigned char *p2 = p;
1321 /* Compare what follows with the begining of the repeat.
1322 If we can establish that there is nothing that they would
1323 both match, we can change to finalize_jump */
1324 while (p2 != pend
1325 && (*p2 == (unsigned char) stop_memory
1326 || *p2 == (unsigned char) start_memory))
1327 p2++;
1328 if (p2 == pend)
1329 p[-3] = (unsigned char) finalize_jump;
1330 else if (*p2 == (unsigned char) exactn
1331 || *p2 == (unsigned char) endline)
1332 {
1333 register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
1334 register unsigned char *p1 = p + mcnt;
1335 /* p1[0] ... p1[2] are an on_failure_jump.
1336 Examine what follows that */
1337 if (p1[3] == (unsigned char) exactn && p1[5] != c)
1338 p[-3] = (unsigned char) finalize_jump;
1339 else if (p1[3] == (unsigned char) charset
1340 || p1[3] == (unsigned char) charset_not)
1341 {
1342 int not = p1[3] == (unsigned char) charset_not;
1343 if (c < p1[4] * BYTEWIDTH
1344 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1345 not = !not;
1346 /* not is 1 if c would match */
1347 /* That means it is not safe to finalize */
1348 if (!not)
1349 p[-3] = (unsigned char) finalize_jump;
1350 }
1351 }
1352 }
1353 p -= 2;
1354 if (p[-1] != (unsigned char) finalize_jump)
1355 {
1356 p[-1] = (unsigned char) jump;
1357 goto nofinalize;
1358 }
1359
1360 /* The end of a stupid repeat has a finalize-jump
1361 back to the start, where another failure point will be made
1362 which will point after all the repetitions found so far. */
1363
1364 case finalize_jump:
1365 stackp -= 2;
1366
1367 case jump:
1368 nofinalize:
1369 mcnt = *p++ & 0377;
1370 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1371 p += mcnt + 1; /* The 1 compensates for missing ++ above */
1372 break;
1373
1374 case dummy_failure_jump:
1375 if (stackp == stacke)
1376 {
1377 unsigned char **stackx
1378 = (unsigned char **) alloca (2 * (stacke - stackb)
1379 * sizeof (char *));
1380 bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
1381 stackp = stackx + (stackp - stackb);
1382 stacke = stackx + 2 * (stacke - stackb);
1383 stackb = stackx;
1384 }
1385 *stackp++ = 0;
1386 *stackp++ = 0;
1387 goto nofinalize;
1388
1389 case wordbound:
1390 if (d == string1 /* Points to first char */
1391 || d == end2 /* Points to end */
1392 || (d == end1 && size2 == 0)) /* Points to end */
1393 break;
1394 if ((SYNTAX (d[-1]) == Sword)
1395 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1396 break;
1397 goto fail;
1398
1399 case notwordbound:
1400 if (d == string1 /* Points to first char */
1401 || d == end2 /* Points to end */
1402 || (d == end1 && size2 == 0)) /* Points to end */
1403 goto fail;
1404 if ((SYNTAX (d[-1]) == Sword)
1405 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1406 goto fail;
1407 break;
1408
1409 case wordbeg:
1410 if (d == end2 /* Points to end */
1411 || (d == end1 && size2 == 0) /* Points to end */
1412 || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
1413 goto fail;
1414 if (d == string1 /* Points to first char */
1415 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1416 break;
1417 goto fail;
1418
1419 case wordend:
1420 if (d == string1 /* Points to first char */
1421 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1422 goto fail;
1423 if (d == end2 /* Points to end */
1424 || (d == end1 && size2 == 0) /* Points to end */
1425 || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
1426 break;
1427 goto fail;
1428
1429#ifdef emacs
1430 case before_dot:
1431 if (((d - string2 <= (unsigned) size2)
1432 ? d - bf_p2 : d - bf_p1)
1433 <= point)
1434 goto fail;
1435 break;
1436
1437 case at_dot:
1438 if (((d - string2 <= (unsigned) size2)
1439 ? d - bf_p2 : d - bf_p1)
1440 == point)
1441 goto fail;
1442 break;
1443
1444 case after_dot:
1445 if (((d - string2 <= (unsigned) size2)
1446 ? d - bf_p2 : d - bf_p1)
1447 >= point)
1448 goto fail;
1449 break;
1450
1451 case wordchar:
1452 mcnt = (int) Sword;
1453 goto matchsyntax;
1454
1455 case syntaxspec:
1456 mcnt = *p++;
1457 matchsyntax:
1458 PREFETCH;
1459 if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
1460 break;
1461
1462 case notwordchar:
1463 mcnt = (int) Sword;
1464 goto matchnotsyntax;
1465
1466 case notsyntaxspec:
1467 mcnt = *p++;
1468 matchnotsyntax:
1469 PREFETCH;
1470 if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
1471 break;
1472#else
1473 case wordchar:
1474 PREFETCH;
1475 if (SYNTAX (*d++) == 0) goto fail;
1476 break;
1477
1478 case notwordchar:
1479 PREFETCH;
1480 if (SYNTAX (*d++) != 0) goto fail;
1481 break;
1482#endif /* not emacs */
1483
1484 case begbuf:
1485 if (d == string1) /* Note, d cannot equal string2 */
1486 break; /* unless string1 == string2. */
1487 goto fail;
1488
1489 case endbuf:
1490 if (d == end2 || (d == end1 && size2 == 0))
1491 break;
1492 goto fail;
1493
1494 case exactn:
1495 /* Match the next few pattern characters exactly.
1496 mcnt is how many characters to match. */
1497 mcnt = *p++;
1498 if (translate)
1499 {
1500 do
1501 {
1502 PREFETCH;
1503 if (translate[*d++] != *p++) goto fail;
1504 }
1505 while (--mcnt);
1506 }
1507 else
1508 {
1509 do
1510 {
1511 PREFETCH;
1512 if (*d++ != *p++) goto fail;
1513 }
1514 while (--mcnt);
1515 }
1516 break;
1517 }
1518 continue; /* Successfully matched one pattern command; keep matching */
1519
1520 /* Jump here if any matching operation fails. */
1521 fail:
1522 if (stackp != stackb)
1523 /* A restart point is known. Restart there and pop it. */
1524 {
1525 if (!stackp[-2])
1526 { /* If innermost failure point is dormant, flush it and keep looking */
1527 stackp -= 2;
1528 goto fail;
1529 }
1530 d = *--stackp;
1531 p = *--stackp;
1532 if (d >= string1 && d <= end1)
1533 dend = end_match_1;
1534 }
1535 else break; /* Matching at this starting point really fails! */
1536 }
1537 return -1; /* Failure to match */
1538}
1539
1540static int
1541bcmp_translate (s1, s2, len, translate)
1542 unsigned char *s1, *s2;
1543 register int len;
1544 unsigned char *translate;
1545{
1546 register unsigned char *p1 = s1, *p2 = s2;
1547 while (len)
1548 {
1549 if (translate [*p1++] != translate [*p2++]) return 1;
1550 len--;
1551 }
1552 return 0;
1553}
1554\f
1555/* Entry points compatible with bsd4.2 regex library */
1556
1557#ifndef emacs
1558
1559static struct re_pattern_buffer re_comp_buf;
1560
1561char *
1562re_comp (s)
1563 char *s;
1564{
1565 if (!s)
1566 {
1567 if (!re_comp_buf.buffer)
1568 return "No previous regular expression";
1569 return 0;
1570 }
1571
1572 if (!re_comp_buf.buffer)
1573 {
1574 if (!(re_comp_buf.buffer = (char *) malloc (200)))
1575 return "Memory exhausted";
1576 re_comp_buf.allocated = 200;
1577 if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
1578 return "Memory exhausted";
1579 }
1580 return re_compile_pattern (s, strlen (s), &re_comp_buf);
1581}
1582
1583int
1584re_exec (s)
1585 char *s;
1586{
1587 int len = strlen (s);
1588 return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
1589}
1590
1591#endif /* emacs */
1592\f
1593#ifdef test
1594
1595#include <stdio.h>
1596
1597/* Indexed by a character, gives the upper case equivalent of the character */
1598
1599static char upcase[0400] =
1600 { 000, 001, 002, 003, 004, 005, 006, 007,
1601 010, 011, 012, 013, 014, 015, 016, 017,
1602 020, 021, 022, 023, 024, 025, 026, 027,
1603 030, 031, 032, 033, 034, 035, 036, 037,
1604 040, 041, 042, 043, 044, 045, 046, 047,
1605 050, 051, 052, 053, 054, 055, 056, 057,
1606 060, 061, 062, 063, 064, 065, 066, 067,
1607 070, 071, 072, 073, 074, 075, 076, 077,
1608 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1609 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1610 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1611 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
1612 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1613 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1614 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1615 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
1616 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
1617 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
1618 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
1619 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
1620 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
1621 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1622 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
1623 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
1624 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
1625 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
1626 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
1627 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1628 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
1629 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
1630 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
1631 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
1632 };
1633
1634main (argc, argv)
1635 int argc;
1636 char **argv;
1637{
1638 char pat[80];
1639 struct re_pattern_buffer buf;
1640 int i;
1641 char c;
1642 char fastmap[(1 << BYTEWIDTH)];
1643
1644 /* Allow a command argument to specify the style of syntax. */
1645 if (argc > 1)
1646 obscure_syntax = atoi (argv[1]);
1647
1648 buf.allocated = 40;
1649 buf.buffer = (char *) malloc (buf.allocated);
1650 buf.fastmap = fastmap;
1651 buf.translate = upcase;
1652
1653 while (1)
1654 {
1655 gets (pat);
1656
1657 if (*pat)
1658 {
1659 re_compile_pattern (pat, strlen(pat), &buf);
1660
1661 for (i = 0; i < buf.used; i++)
1662 printchar (buf.buffer[i]);
1663
1664 putchar ('\n');
1665
1666 printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
1667
1668 re_compile_fastmap (&buf);
1669 printf ("Allowed by fastmap: ");
1670 for (i = 0; i < (1 << BYTEWIDTH); i++)
1671 if (fastmap[i]) printchar (i);
1672 putchar ('\n');
1673 }
1674
1675 gets (pat); /* Now read the string to match against */
1676
1677 i = re_match (&buf, pat, strlen (pat), 0, 0);
1678 printf ("Match value %d.\n", i);
1679 }
1680}
1681
1682#ifdef NOTDEF
1683print_buf (bufp)
1684 struct re_pattern_buffer *bufp;
1685{
1686 int i;
1687
1688 printf ("buf is :\n----------------\n");
1689 for (i = 0; i < bufp->used; i++)
1690 printchar (bufp->buffer[i]);
1691
1692 printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
1693
1694 printf ("Allowed by fastmap: ");
1695 for (i = 0; i < (1 << BYTEWIDTH); i++)
1696 if (bufp->fastmap[i])
1697 printchar (i);
1698 printf ("\nAllowed by translate: ");
1699 if (bufp->translate)
1700 for (i = 0; i < (1 << BYTEWIDTH); i++)
1701 if (bufp->translate[i])
1702 printchar (i);
1703 printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
1704 printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
1705}
1706#endif
1707
1708printchar (c)
1709 char c;
1710{
1711 if (c < 041 || c >= 0177)
1712 {
1713 putchar ('\\');
1714 putchar (((c >> 6) & 3) + '0');
1715 putchar (((c >> 3) & 7) + '0');
1716 putchar ((c & 7) + '0');
1717 }
1718 else
1719 putchar (c);
1720}
1721
1722error (string)
1723 char *string;
1724{
1725 puts (string);
1726 exit (1);
1727}
1728
1729#endif /* test */
This page took 0.130261 seconds and 4 git commands to generate.