* elf.c (_bfd_elf_make_section_from_shdr): Only set SEC_DATA if
[deliverable/binutils-gdb.git] / gdb / regex.c
CommitLineData
dd3b648e
RP
1/* Extended regular expression matching and search library.
2 Copyright (C) 1985, 1989 Free Software Foundation, Inc.
3
99a7de40
JG
4This program is free software; you can redistribute it and/or modify
5it under the terms of the GNU General Public License as published by
6the Free Software Foundation; either version 2 of the License, or
7(at your option) any later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
dd3b648e
RP
17
18/* To test, compile with -Dtest.
19 This Dtestable feature turns this into a self-contained program
20 which reads a pattern, describes how it compiles,
21 then reads a string and searches for it. */
22
23#ifdef emacs
24
25/* The `emacs' switch turns on certain special matching commands
26 that make sense only in emacs. */
27
28#include "config.h"
29#include "lisp.h"
30#include "buffer.h"
31#include "syntax.h"
32
33#else /* not emacs */
34
ba47c66a
PS
35#include "defs.h"
36#include <string.h>
dd3b648e
RP
37
38/*
39 * Define the syntax stuff, so we can do the \<...\> things.
40 */
41
42#ifndef Sword /* must be non-zero in some of the tests below... */
43#define Sword 1
44#endif
45
46#define SYNTAX(c) re_syntax_table[c]
47
48#ifdef SYNTAX_TABLE
49
50char *re_syntax_table;
51
52#else
53
54static char re_syntax_table[256];
55
56static void
57init_syntax_once ()
58{
59 register int c;
60 static int done = 0;
61
62 if (done)
63 return;
64
4ed97c9a 65 memset (re_syntax_table, '\0', sizeof re_syntax_table);
dd3b648e
RP
66
67 for (c = 'a'; c <= 'z'; c++)
68 re_syntax_table[c] = Sword;
69
70 for (c = 'A'; c <= 'Z'; c++)
71 re_syntax_table[c] = Sword;
72
73 for (c = '0'; c <= '9'; c++)
74 re_syntax_table[c] = Sword;
75
76 done = 1;
77}
78
79#endif /* SYNTAX_TABLE */
80#endif /* not emacs */
81
82#include "regex.h"
83
84/* Number of failure points to allocate space for initially,
85 when matching. If this number is exceeded, more space is allocated,
86 so it is not a hard limit. */
87
88#ifndef NFAILURES
89#define NFAILURES 80
90#endif /* NFAILURES */
91
92/* width of a byte in bits */
93
94#define BYTEWIDTH 8
95
96#ifndef SIGN_EXTEND_CHAR
97#define SIGN_EXTEND_CHAR(x) (x)
98#endif
99\f
100static int obscure_syntax = 0;
101
102/* Specify the precise syntax of regexp for compilation.
103 This provides for compatibility for various utilities
104 which historically have different, incompatible syntaxes.
105
106 The argument SYNTAX is a bit-mask containing the two bits
107 RE_NO_BK_PARENS and RE_NO_BK_VBAR. */
108
109int
110re_set_syntax (syntax)
51b57ded 111 int syntax;
dd3b648e
RP
112{
113 int ret;
114
115 ret = obscure_syntax;
116 obscure_syntax = syntax;
117 return ret;
118}
119\f
120/* re_compile_pattern takes a regular-expression string
121 and converts it into a buffer full of byte commands for matching.
122
123 PATTERN is the address of the pattern string
124 SIZE is the length of it.
125 BUFP is a struct re_pattern_buffer * which points to the info
126 on where to store the byte commands.
127 This structure contains a char * which points to the
128 actual space, which should have been obtained with malloc.
129 re_compile_pattern may use realloc to grow the buffer space.
130
131 The number of bytes of commands can be found out by looking in
132 the struct re_pattern_buffer that bufp pointed to,
133 after re_compile_pattern returns.
134*/
135
136#define PATPUSH(ch) (*b++ = (char) (ch))
137
138#define PATFETCH(c) \
139 {if (p == pend) goto end_of_pattern; \
140 c = * (unsigned char *) p++; \
141 if (translate) c = translate[c]; }
142
143#define PATFETCH_RAW(c) \
144 {if (p == pend) goto end_of_pattern; \
145 c = * (unsigned char *) p++; }
146
147#define PATUNFETCH p--
148
ee6d646a
JK
149/* This is not an arbitrary limit: the arguments which represent offsets
150 into the pattern are two bytes long. So if 2^16 bytes turns out to
151 be too small, many things would have to change. */
152#define MAX_BUF_SIZE (1 << 16)
153
154
155/* Extend the buffer by twice its current size via realloc and
156 reset the pointers that pointed into the old block to point to the
157 correct places in the new one. If extending the buffer results in it
158 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
159#define EXTEND_BUFFER \
160 do { \
161 char *old_buffer = bufp->buffer; \
162 if (bufp->allocated == MAX_BUF_SIZE) \
163 goto too_big; \
164 bufp->allocated <<= 1; \
165 if (bufp->allocated > MAX_BUF_SIZE) \
166 bufp->allocated = MAX_BUF_SIZE; \
167 bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated);\
168 if (bufp->buffer == NULL) \
169 goto memory_exhausted; \
170 /* If the buffer moved, move all the pointers into it. */ \
171 if (old_buffer != bufp->buffer) \
172 { \
173 b = (b - old_buffer) + bufp->buffer; \
174 begalt = (begalt - old_buffer) + bufp->buffer; \
175 if (fixup_jump) \
176 fixup_jump = (fixup_jump - old_buffer) + bufp->buffer;\
177 if (laststart) \
178 laststart = (laststart - old_buffer) + bufp->buffer; \
179 if (pending_exact) \
180 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
181 } \
182 } while (0)
dd3b648e 183
50e0dc41 184static void store_jump (), insert_jump ();
dd3b648e
RP
185
186char *
187re_compile_pattern (pattern, size, bufp)
188 char *pattern;
189 int size;
190 struct re_pattern_buffer *bufp;
191{
192 register char *b = bufp->buffer;
193 register char *p = pattern;
194 char *pend = pattern + size;
195 register unsigned c, c1;
196 char *p1;
197 unsigned char *translate = (unsigned char *) bufp->translate;
198
199 /* address of the count-byte of the most recently inserted "exactn" command.
200 This makes it possible to tell whether a new exact-match character
201 can be added to that command or requires a new "exactn" command. */
202
203 char *pending_exact = 0;
204
205 /* address of the place where a forward-jump should go
206 to the end of the containing expression.
207 Each alternative of an "or", except the last, ends with a forward-jump
208 of this sort. */
209
210 char *fixup_jump = 0;
211
212 /* address of start of the most recently finished expression.
213 This tells postfix * where to find the start of its operand. */
214
215 char *laststart = 0;
216
217 /* In processing a repeat, 1 means zero matches is allowed */
218
219 char zero_times_ok;
220
221 /* In processing a repeat, 1 means many matches is allowed */
222
223 char many_times_ok;
224
225 /* address of beginning of regexp, or inside of last \( */
226
227 char *begalt = b;
228
229 /* Stack of information saved by \( and restored by \).
230 Four stack elements are pushed by each \(:
231 First, the value of b.
232 Second, the value of fixup_jump.
233 Third, the value of regnum.
234 Fourth, the value of begalt. */
235
236 int stackb[40];
237 int *stackp = stackb;
238 int *stacke = stackb + 40;
239 int *stackt;
240
241 /* Counts \('s as they are encountered. Remembered for the matching \),
242 where it becomes the "register number" to put in the stop_memory command */
243
244 int regnum = 1;
245
246 bufp->fastmap_accurate = 0;
247
248#ifndef emacs
249#ifndef SYNTAX_TABLE
250 /*
251 * Initialize the syntax table.
252 */
253 init_syntax_once();
254#endif
255#endif
256
257 if (bufp->allocated == 0)
258 {
259 bufp->allocated = 28;
260 if (bufp->buffer)
261 /* EXTEND_BUFFER loses when bufp->allocated is 0 */
262 bufp->buffer = (char *) realloc (bufp->buffer, 28);
263 else
264 /* Caller did not allocate a buffer. Do it for him */
265 bufp->buffer = (char *) malloc (28);
266 if (!bufp->buffer) goto memory_exhausted;
267 begalt = b = bufp->buffer;
268 }
269
270 while (p != pend)
271 {
272 if (b - bufp->buffer > bufp->allocated - 10)
273 /* Note that EXTEND_BUFFER clobbers c */
274 EXTEND_BUFFER;
275
276 PATFETCH (c);
277
278 switch (c)
279 {
280 case '$':
281 if (obscure_syntax & RE_TIGHT_VBAR)
282 {
283 if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
284 goto normal_char;
285 /* Make operand of last vbar end before this `$'. */
286 if (fixup_jump)
287 store_jump (fixup_jump, jump, b);
288 fixup_jump = 0;
289 PATPUSH (endline);
290 break;
291 }
292
293 /* $ means succeed if at end of line, but only in special contexts.
294 If randomly in the middle of a pattern, it is a normal character. */
295 if (p == pend || *p == '\n'
296 || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
297 || (obscure_syntax & RE_NO_BK_PARENS
298 ? *p == ')'
299 : *p == '\\' && p[1] == ')')
300 || (obscure_syntax & RE_NO_BK_VBAR
301 ? *p == '|'
302 : *p == '\\' && p[1] == '|'))
303 {
304 PATPUSH (endline);
305 break;
306 }
307 goto normal_char;
308
309 case '^':
310 /* ^ means succeed if at beg of line, but only if no preceding pattern. */
311
312 if (laststart && p[-2] != '\n'
313 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
314 goto normal_char;
315 if (obscure_syntax & RE_TIGHT_VBAR)
316 {
317 if (p != pattern + 1
318 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
319 goto normal_char;
320 PATPUSH (begline);
321 begalt = b;
322 }
323 else
324 PATPUSH (begline);
325 break;
326
327 case '+':
328 case '?':
329 if (obscure_syntax & RE_BK_PLUS_QM)
330 goto normal_char;
331 handle_plus:
332 case '*':
333 /* If there is no previous pattern, char not special. */
334 if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
335 goto normal_char;
336 /* If there is a sequence of repetition chars,
337 collapse it down to equivalent to just one. */
338 zero_times_ok = 0;
339 many_times_ok = 0;
340 while (1)
341 {
342 zero_times_ok |= c != '+';
343 many_times_ok |= c != '?';
344 if (p == pend)
345 break;
346 PATFETCH (c);
347 if (c == '*')
348 ;
349 else if (!(obscure_syntax & RE_BK_PLUS_QM)
350 && (c == '+' || c == '?'))
351 ;
352 else if ((obscure_syntax & RE_BK_PLUS_QM)
353 && c == '\\')
354 {
355 int c1;
356 PATFETCH (c1);
357 if (!(c1 == '+' || c1 == '?'))
358 {
359 PATUNFETCH;
360 PATUNFETCH;
361 break;
362 }
363 c = c1;
364 }
365 else
366 {
367 PATUNFETCH;
368 break;
369 }
370 }
371
372 /* Star, etc. applied to an empty pattern is equivalent
373 to an empty pattern. */
374 if (!laststart)
375 break;
376
377 /* Now we know whether 0 matches is allowed,
378 and whether 2 or more matches is allowed. */
379 if (many_times_ok)
380 {
381 /* If more than one repetition is allowed,
382 put in a backward jump at the end. */
383 store_jump (b, maybe_finalize_jump, laststart - 3);
384 b += 3;
385 }
386 insert_jump (on_failure_jump, laststart, b + 3, b);
387 pending_exact = 0;
388 b += 3;
389 if (!zero_times_ok)
390 {
391 /* At least one repetition required: insert before the loop
392 a skip over the initial on-failure-jump instruction */
393 insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
394 b += 3;
395 }
396 break;
397
398 case '.':
399 laststart = b;
400 PATPUSH (anychar);
401 break;
402
403 case '[':
404 while (b - bufp->buffer
405 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
406 /* Note that EXTEND_BUFFER clobbers c */
407 EXTEND_BUFFER;
408
409 laststart = b;
410 if (*p == '^')
411 PATPUSH (charset_not), p++;
412 else
413 PATPUSH (charset);
414 p1 = p;
415
416 PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
417 /* Clear the whole map */
4ed97c9a 418 memset (b, '\0', (1 << BYTEWIDTH) / BYTEWIDTH);
dd3b648e
RP
419 /* Read in characters and ranges, setting map bits */
420 while (1)
421 {
422 PATFETCH (c);
423 if (c == ']' && p != p1 + 1) break;
424 if (*p == '-' && p[1] != ']')
425 {
426 PATFETCH (c1);
427 PATFETCH (c1);
428 while (c <= c1)
429 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
430 }
431 else
432 {
433 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
434 }
435 }
436 /* Discard any bitmap bytes that are all 0 at the end of the map.
437 Decrement the map-length byte too. */
438 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
439 b[-1]--;
440 b += b[-1];
441 break;
442
443 case '(':
444 if (! (obscure_syntax & RE_NO_BK_PARENS))
445 goto normal_char;
446 else
447 goto handle_open;
448
449 case ')':
450 if (! (obscure_syntax & RE_NO_BK_PARENS))
451 goto normal_char;
452 else
453 goto handle_close;
454
455 case '\n':
456 if (! (obscure_syntax & RE_NEWLINE_OR))
457 goto normal_char;
458 else
459 goto handle_bar;
460
461 case '|':
462 if (! (obscure_syntax & RE_NO_BK_VBAR))
463 goto normal_char;
464 else
465 goto handle_bar;
466
467 case '\\':
468 if (p == pend) goto invalid_pattern;
469 PATFETCH_RAW (c);
470 switch (c)
471 {
472 case '(':
473 if (obscure_syntax & RE_NO_BK_PARENS)
474 goto normal_backsl;
475 handle_open:
476 if (stackp == stacke) goto nesting_too_deep;
477 if (regnum < RE_NREGS)
478 {
479 PATPUSH (start_memory);
480 PATPUSH (regnum);
481 }
482 *stackp++ = b - bufp->buffer;
483 *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
484 *stackp++ = regnum++;
485 *stackp++ = begalt - bufp->buffer;
486 fixup_jump = 0;
487 laststart = 0;
488 begalt = b;
489 break;
490
491 case ')':
492 if (obscure_syntax & RE_NO_BK_PARENS)
493 goto normal_backsl;
494 handle_close:
495 if (stackp == stackb) goto unmatched_close;
496 begalt = *--stackp + bufp->buffer;
497 if (fixup_jump)
498 store_jump (fixup_jump, jump, b);
499 if (stackp[-1] < RE_NREGS)
500 {
501 PATPUSH (stop_memory);
502 PATPUSH (stackp[-1]);
503 }
504 stackp -= 2;
505 fixup_jump = 0;
506 if (*stackp)
507 fixup_jump = *stackp + bufp->buffer - 1;
508 laststart = *--stackp + bufp->buffer;
509 break;
510
511 case '|':
512 if (obscure_syntax & RE_NO_BK_VBAR)
513 goto normal_backsl;
514 handle_bar:
515 insert_jump (on_failure_jump, begalt, b + 6, b);
516 pending_exact = 0;
517 b += 3;
518 if (fixup_jump)
519 store_jump (fixup_jump, jump, b);
520 fixup_jump = b;
521 b += 3;
522 laststart = 0;
523 begalt = b;
524 break;
525
526#ifdef emacs
527 case '=':
528 PATPUSH (at_dot);
529 break;
530
531 case 's':
532 laststart = b;
533 PATPUSH (syntaxspec);
534 PATFETCH (c);
535 PATPUSH (syntax_spec_code[c]);
536 break;
537
538 case 'S':
539 laststart = b;
540 PATPUSH (notsyntaxspec);
541 PATFETCH (c);
542 PATPUSH (syntax_spec_code[c]);
543 break;
544#endif /* emacs */
545
546 case 'w':
547 laststart = b;
548 PATPUSH (wordchar);
549 break;
550
551 case 'W':
552 laststart = b;
553 PATPUSH (notwordchar);
554 break;
555
556 case '<':
557 PATPUSH (wordbeg);
558 break;
559
560 case '>':
561 PATPUSH (wordend);
562 break;
563
564 case 'b':
565 PATPUSH (wordbound);
566 break;
567
568 case 'B':
569 PATPUSH (notwordbound);
570 break;
571
572 case '`':
573 PATPUSH (begbuf);
574 break;
575
576 case '\'':
577 PATPUSH (endbuf);
578 break;
579
580 case '1':
581 case '2':
582 case '3':
583 case '4':
584 case '5':
585 case '6':
586 case '7':
587 case '8':
588 case '9':
589 c1 = c - '0';
590 if (c1 >= regnum)
591 goto normal_char;
592 for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
593 if (*stackt == c1)
594 goto normal_char;
595 laststart = b;
596 PATPUSH (duplicate);
597 PATPUSH (c1);
598 break;
599
600 case '+':
601 case '?':
602 if (obscure_syntax & RE_BK_PLUS_QM)
603 goto handle_plus;
604
605 default:
606 normal_backsl:
607 /* You might think it would be useful for \ to mean
608 not to translate; but if we don't translate it
609 it will never match anything. */
610 if (translate) c = translate[c];
611 goto normal_char;
612 }
613 break;
614
615 default:
616 normal_char:
617 if (!pending_exact || pending_exact + *pending_exact + 1 != b
618 || *pending_exact == 0177 || *p == '*' || *p == '^'
619 || ((obscure_syntax & RE_BK_PLUS_QM)
620 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
621 : (*p == '+' || *p == '?')))
622 {
623 laststart = b;
624 PATPUSH (exactn);
625 pending_exact = b;
626 PATPUSH (0);
627 }
628 PATPUSH (c);
629 (*pending_exact)++;
630 }
631 }
632
633 if (fixup_jump)
634 store_jump (fixup_jump, jump, b);
635
636 if (stackp != stackb) goto unmatched_open;
637
638 bufp->used = b - bufp->buffer;
639 return 0;
640
641 invalid_pattern:
642 return "Invalid regular expression";
643
644 unmatched_open:
645 return "Unmatched \\(";
646
647 unmatched_close:
648 return "Unmatched \\)";
649
650 end_of_pattern:
651 return "Premature end of regular expression";
652
653 nesting_too_deep:
654 return "Nesting too deep";
655
656 too_big:
657 return "Regular expression too big";
658
659 memory_exhausted:
660 return "Memory exhausted";
661}
662
663/* Store where `from' points a jump operation to jump to where `to' points.
664 `opcode' is the opcode to store. */
665
50e0dc41 666static void
dd3b648e
RP
667store_jump (from, opcode, to)
668 char *from, *to;
669 char opcode;
670{
671 from[0] = opcode;
672 from[1] = (to - (from + 3)) & 0377;
673 from[2] = (to - (from + 3)) >> 8;
674}
675
676/* Open up space at char FROM, and insert there a jump to TO.
677 CURRENT_END gives te end of the storage no in use,
678 so we know how much data to copy up.
679 OP is the opcode of the jump to insert.
680
681 If you call this function, you must zero out pending_exact. */
682
50e0dc41 683static void
dd3b648e
RP
684insert_jump (op, from, to, current_end)
685 char op;
686 char *from, *to, *current_end;
687{
688 register char *pto = current_end + 3;
689 register char *pfrom = current_end;
690 while (pfrom != from)
691 *--pto = *--pfrom;
692 store_jump (from, op, to);
693}
694\f
695/* Given a pattern, compute a fastmap from it.
696 The fastmap records which of the (1 << BYTEWIDTH) possible characters
697 can start a string that matches the pattern.
698 This fastmap is used by re_search to skip quickly over totally implausible text.
699
700 The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
701 as bufp->fastmap.
702 The other components of bufp describe the pattern to be used. */
703
704void
705re_compile_fastmap (bufp)
706 struct re_pattern_buffer *bufp;
707{
708 unsigned char *pattern = (unsigned char *) bufp->buffer;
709 int size = bufp->used;
710 register char *fastmap = bufp->fastmap;
711 register unsigned char *p = pattern;
712 register unsigned char *pend = pattern + size;
51b57ded 713 register int j;
dd3b648e
RP
714 unsigned char *translate = (unsigned char *) bufp->translate;
715
716 unsigned char *stackb[NFAILURES];
717 unsigned char **stackp = stackb;
718
4ed97c9a 719 memset (fastmap, '\0', (1 << BYTEWIDTH));
dd3b648e
RP
720 bufp->fastmap_accurate = 1;
721 bufp->can_be_null = 0;
722
723 while (p)
724 {
725 if (p == pend)
726 {
727 bufp->can_be_null = 1;
728 break;
729 }
730#ifdef SWITCH_ENUM_BUG
731 switch ((int) ((enum regexpcode) *p++))
732#else
733 switch ((enum regexpcode) *p++)
734#endif
735 {
736 case exactn:
737 if (translate)
738 fastmap[translate[p[1]]] = 1;
739 else
740 fastmap[p[1]] = 1;
741 break;
742
743 case begline:
744 case before_dot:
745 case at_dot:
746 case after_dot:
747 case begbuf:
748 case endbuf:
749 case wordbound:
750 case notwordbound:
751 case wordbeg:
752 case wordend:
753 continue;
754
755 case endline:
756 if (translate)
757 fastmap[translate['\n']] = 1;
758 else
759 fastmap['\n'] = 1;
760 if (bufp->can_be_null != 1)
761 bufp->can_be_null = 2;
762 break;
763
764 case finalize_jump:
765 case maybe_finalize_jump:
766 case jump:
767 case dummy_failure_jump:
768 bufp->can_be_null = 1;
769 j = *p++ & 0377;
770 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
771 p += j + 1; /* The 1 compensates for missing ++ above */
772 if (j > 0)
773 continue;
774 /* Jump backward reached implies we just went through
775 the body of a loop and matched nothing.
776 Opcode jumped to should be an on_failure_jump.
777 Just treat it like an ordinary jump.
778 For a * loop, it has pushed its failure point already;
779 if so, discard that as redundant. */
780 if ((enum regexpcode) *p != on_failure_jump)
781 continue;
782 p++;
783 j = *p++ & 0377;
784 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
785 p += j + 1; /* The 1 compensates for missing ++ above */
786 if (stackp != stackb && *stackp == p)
787 stackp--;
788 continue;
789
790 case on_failure_jump:
791 j = *p++ & 0377;
792 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
793 p++;
794 *++stackp = p + j;
795 continue;
796
797 case start_memory:
798 case stop_memory:
799 p++;
800 continue;
801
802 case duplicate:
803 bufp->can_be_null = 1;
804 fastmap['\n'] = 1;
805 case anychar:
806 for (j = 0; j < (1 << BYTEWIDTH); j++)
807 if (j != '\n')
808 fastmap[j] = 1;
809 if (bufp->can_be_null)
810 return;
811 /* Don't return; check the alternative paths
812 so we can set can_be_null if appropriate. */
813 break;
814
815 case wordchar:
816 for (j = 0; j < (1 << BYTEWIDTH); j++)
817 if (SYNTAX (j) == Sword)
818 fastmap[j] = 1;
819 break;
820
821 case notwordchar:
822 for (j = 0; j < (1 << BYTEWIDTH); j++)
823 if (SYNTAX (j) != Sword)
824 fastmap[j] = 1;
825 break;
826
827#ifdef emacs
828 case syntaxspec:
829 k = *p++;
830 for (j = 0; j < (1 << BYTEWIDTH); j++)
831 if (SYNTAX (j) == (enum syntaxcode) k)
832 fastmap[j] = 1;
833 break;
834
835 case notsyntaxspec:
836 k = *p++;
837 for (j = 0; j < (1 << BYTEWIDTH); j++)
838 if (SYNTAX (j) != (enum syntaxcode) k)
839 fastmap[j] = 1;
840 break;
841#endif /* emacs */
842
843 case charset:
844 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
845 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
846 {
847 if (translate)
848 fastmap[translate[j]] = 1;
849 else
850 fastmap[j] = 1;
851 }
852 break;
853
854 case charset_not:
855 /* Chars beyond end of map must be allowed */
856 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
857 if (translate)
858 fastmap[translate[j]] = 1;
859 else
860 fastmap[j] = 1;
861
862 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
863 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
864 {
865 if (translate)
866 fastmap[translate[j]] = 1;
867 else
868 fastmap[j] = 1;
869 }
870 break;
871 }
872
873 /* Get here means we have successfully found the possible starting characters
874 of one path of the pattern. We need not follow this path any farther.
875 Instead, look at the next alternative remembered in the stack. */
876 if (stackp != stackb)
877 p = *stackp--;
878 else
879 break;
880 }
881}
882\f
883/* Like re_search_2, below, but only one string is specified. */
884
885int
886re_search (pbufp, string, size, startpos, range, regs)
887 struct re_pattern_buffer *pbufp;
888 char *string;
889 int size, startpos, range;
890 struct re_registers *regs;
891{
892 return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
893}
894
895/* Like re_match_2 but tries first a match starting at index STARTPOS,
896 then at STARTPOS + 1, and so on.
897 RANGE is the number of places to try before giving up.
898 If RANGE is negative, the starting positions tried are
899 STARTPOS, STARTPOS - 1, etc.
900 It is up to the caller to make sure that range is not so large
901 as to take the starting position outside of the input strings.
902
903The value returned is the position at which the match was found,
904 or -1 if no match was found,
905 or -2 if error (such as failure stack overflow). */
906
907int
908re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
909 struct re_pattern_buffer *pbufp;
910 char *string1, *string2;
911 int size1, size2;
912 int startpos;
913 register int range;
914 struct re_registers *regs;
915 int mstop;
916{
917 register char *fastmap = pbufp->fastmap;
918 register unsigned char *translate = (unsigned char *) pbufp->translate;
919 int total = size1 + size2;
920 int val;
921
922 /* Update the fastmap now if not correct already */
923 if (fastmap && !pbufp->fastmap_accurate)
924 re_compile_fastmap (pbufp);
925
926 /* Don't waste time in a long search for a pattern
927 that says it is anchored. */
928 if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
929 && range > 0)
930 {
931 if (startpos > 0)
932 return -1;
933 else
934 range = 1;
935 }
936
937 while (1)
938 {
939 /* If a fastmap is supplied, skip quickly over characters
940 that cannot possibly be the start of a match.
941 Note, however, that if the pattern can possibly match
942 the null string, we must test it at each starting point
943 so that we take the first null string we get. */
944
945 if (fastmap && startpos < total && pbufp->can_be_null != 1)
946 {
947 if (range > 0)
948 {
949 register int lim = 0;
950 register unsigned char *p;
951 int irange = range;
952 if (startpos < size1 && startpos + range >= size1)
953 lim = range - (size1 - startpos);
954
955 p = ((unsigned char *)
956 &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
957
958 if (translate)
959 {
960 while (range > lim && !fastmap[translate[*p++]])
961 range--;
962 }
963 else
964 {
965 while (range > lim && !fastmap[*p++])
966 range--;
967 }
968 startpos += irange - range;
969 }
970 else
971 {
972 register unsigned char c;
973 if (startpos >= size1)
974 c = string2[startpos - size1];
975 else
976 c = string1[startpos];
977 c &= 0xff;
978 if (translate ? !fastmap[translate[c]] : !fastmap[c])
979 goto advance;
980 }
981 }
982
983 if (range >= 0 && startpos == total
984 && fastmap && pbufp->can_be_null == 0)
985 return -1;
986
987 val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop);
988 if (0 <= val)
989 {
990 if (val == -2)
991 return -2;
992 return startpos;
993 }
994
995#ifdef C_ALLOCA
996 alloca (0);
997#endif /* C_ALLOCA */
998
999 advance:
1000 if (!range) break;
1001 if (range > 0) range--, startpos++; else range++, startpos--;
1002 }
1003 return -1;
1004}
1005\f
1006#ifndef emacs /* emacs never uses this */
1007int
1008re_match (pbufp, string, size, pos, regs)
1009 struct re_pattern_buffer *pbufp;
1010 char *string;
1011 int size, pos;
1012 struct re_registers *regs;
1013{
1014 return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
1015}
1016#endif /* emacs */
1017
1018/* Maximum size of failure stack. Beyond this, overflow is an error. */
1019
1020int re_max_failures = 2000;
1021
9823e3f4 1022static int memcmp_translate();
dd3b648e
RP
1023/* Match the pattern described by PBUFP
1024 against data which is the virtual concatenation of STRING1 and STRING2.
1025 SIZE1 and SIZE2 are the sizes of the two data strings.
1026 Start the match at position POS.
1027 Do not consider matching past the position MSTOP.
1028
1029 If pbufp->fastmap is nonzero, then it had better be up to date.
1030
1031 The reason that the data to match are specified as two components
1032 which are to be regarded as concatenated
1033 is so this function can be used directly on the contents of an Emacs buffer.
1034
1035 -1 is returned if there is no match. -2 is returned if there is
1036 an error (such as match stack overflow). Otherwise the value is the length
1037 of the substring which was matched. */
1038
1039int
1040re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
1041 struct re_pattern_buffer *pbufp;
1042 unsigned char *string1, *string2;
1043 int size1, size2;
1044 int pos;
1045 struct re_registers *regs;
1046 int mstop;
1047{
1048 register unsigned char *p = (unsigned char *) pbufp->buffer;
1049 register unsigned char *pend = p + pbufp->used;
1050 /* End of first string */
1051 unsigned char *end1;
1052 /* End of second string */
1053 unsigned char *end2;
1054 /* Pointer just past last char to consider matching */
1055 unsigned char *end_match_1, *end_match_2;
1056 register unsigned char *d, *dend;
1057 register int mcnt;
1058 unsigned char *translate = (unsigned char *) pbufp->translate;
1059
1060 /* Failure point stack. Each place that can handle a failure further down the line
1061 pushes a failure point on this stack. It consists of two char *'s.
1062 The first one pushed is where to resume scanning the pattern;
1063 the second pushed is where to resume scanning the strings.
1064 If the latter is zero, the failure point is a "dummy".
1065 If a failure happens and the innermost failure point is dormant,
1066 it discards that failure point and tries the next one. */
1067
1068 unsigned char *initial_stack[2 * NFAILURES];
1069 unsigned char **stackb = initial_stack;
1070 unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
1071
1072 /* Information on the "contents" of registers.
1073 These are pointers into the input strings; they record
1074 just what was matched (on this attempt) by some part of the pattern.
1075 The start_memory command stores the start of a register's contents
1076 and the stop_memory command stores the end.
1077
1078 At that point, regstart[regnum] points to the first character in the register,
1079 regend[regnum] points to the first character beyond the end of the register,
1080 regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
1081 and regend_seg1[regnum] is true iff regend[regnum] points into string1. */
1082
1083 unsigned char *regstart[RE_NREGS];
1084 unsigned char *regend[RE_NREGS];
1085 unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
1086
1087 /* Set up pointers to ends of strings.
1088 Don't allow the second string to be empty unless both are empty. */
1089 if (!size2)
1090 {
1091 string2 = string1;
1092 size2 = size1;
1093 string1 = 0;
1094 size1 = 0;
1095 }
1096 end1 = string1 + size1;
1097 end2 = string2 + size2;
1098
1099 /* Compute where to stop matching, within the two strings */
1100 if (mstop <= size1)
1101 {
1102 end_match_1 = string1 + mstop;
1103 end_match_2 = string2;
1104 }
1105 else
1106 {
1107 end_match_1 = end1;
1108 end_match_2 = string2 + mstop - size1;
1109 }
1110
1111 /* Initialize \) text positions to -1
1112 to mark ones that no \( or \) has been seen for. */
1113
1114 for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++)
1115 regend[mcnt] = (unsigned char *) -1;
1116
1117 /* `p' scans through the pattern as `d' scans through the data.
1118 `dend' is the end of the input string that `d' points within.
1119 `d' is advanced into the following input string whenever necessary,
1120 but this happens before fetching;
1121 therefore, at the beginning of the loop,
1122 `d' can be pointing at the end of a string,
1123 but it cannot equal string2. */
1124
1125 if (pos <= size1)
1126 d = string1 + pos, dend = end_match_1;
1127 else
1128 d = string2 + pos - size1, dend = end_match_2;
1129
1130/* Write PREFETCH; just before fetching a character with *d. */
1131#define PREFETCH \
1132 while (d == dend) \
1133 { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
1134 d = string2; /* end of string1 => advance to string2. */ \
1135 dend = end_match_2; }
1136
1137 /* This loop loops over pattern commands.
1138 It exits by returning from the function if match is complete,
1139 or it drops through if match fails at this starting point in the input data. */
1140
1141 while (1)
1142 {
1143 if (p == pend)
1144 /* End of pattern means we have succeeded! */
1145 {
1146 /* If caller wants register contents data back, convert it to indices */
1147 if (regs)
1148 {
1149 regs->start[0] = pos;
1150 if (dend == end_match_1)
1151 regs->end[0] = d - string1;
1152 else
1153 regs->end[0] = d - string2 + size1;
1154 for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
1155 {
1156 if (regend[mcnt] == (unsigned char *) -1)
1157 {
1158 regs->start[mcnt] = -1;
1159 regs->end[mcnt] = -1;
1160 continue;
1161 }
1162 if (regstart_seg1[mcnt])
1163 regs->start[mcnt] = regstart[mcnt] - string1;
1164 else
1165 regs->start[mcnt] = regstart[mcnt] - string2 + size1;
1166 if (regend_seg1[mcnt])
1167 regs->end[mcnt] = regend[mcnt] - string1;
1168 else
1169 regs->end[mcnt] = regend[mcnt] - string2 + size1;
1170 }
1171 }
1172 if (dend == end_match_1)
1173 return (d - string1 - pos);
1174 else
1175 return d - string2 + size1 - pos;
1176 }
1177
1178 /* Otherwise match next pattern command */
1179#ifdef SWITCH_ENUM_BUG
1180 switch ((int) ((enum regexpcode) *p++))
1181#else
1182 switch ((enum regexpcode) *p++)
1183#endif
1184 {
1185
1186 /* \( is represented by a start_memory, \) by a stop_memory.
1187 Both of those commands contain a "register number" argument.
1188 The text matched within the \( and \) is recorded under that number.
1189 Then, \<digit> turns into a `duplicate' command which
1190 is followed by the numeric value of <digit> as the register number. */
1191
1192 case start_memory:
1193 regstart[*p] = d;
1194 regstart_seg1[*p++] = (dend == end_match_1);
1195 break;
1196
1197 case stop_memory:
1198 regend[*p] = d;
1199 regend_seg1[*p++] = (dend == end_match_1);
1200 break;
1201
1202 case duplicate:
1203 {
1204 int regno = *p++; /* Get which register to match against */
1205 register unsigned char *d2, *dend2;
1206
1207 d2 = regstart[regno];
1208 dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
1209 ? regend[regno] : end_match_1);
1210 while (1)
1211 {
1212 /* Advance to next segment in register contents, if necessary */
1213 while (d2 == dend2)
1214 {
1215 if (dend2 == end_match_2) break;
1216 if (dend2 == regend[regno]) break;
1217 d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
1218 }
1219 /* At end of register contents => success */
1220 if (d2 == dend2) break;
1221
1222 /* Advance to next segment in data being matched, if necessary */
1223 PREFETCH;
1224
1225 /* mcnt gets # consecutive chars to compare */
1226 mcnt = dend - d;
1227 if (mcnt > dend2 - d2)
1228 mcnt = dend2 - d2;
1229 /* Compare that many; failure if mismatch, else skip them. */
9823e3f4 1230 if (translate ? memcmp_translate (d, d2, mcnt, translate) : memcmp (d, d2, mcnt))
dd3b648e
RP
1231 goto fail;
1232 d += mcnt, d2 += mcnt;
1233 }
1234 }
1235 break;
1236
1237 case anychar:
1238 /* fetch a data character */
1239 PREFETCH;
1240 /* Match anything but a newline. */
1241 if ((translate ? translate[*d++] : *d++) == '\n')
1242 goto fail;
1243 break;
1244
1245 case charset:
1246 case charset_not:
1247 {
1248 /* Nonzero for charset_not */
1249 int not = 0;
1250 register int c;
1251 if (*(p - 1) == (unsigned char) charset_not)
1252 not = 1;
1253
1254 /* fetch a data character */
1255 PREFETCH;
1256
1257 if (translate)
1258 c = translate [*d];
1259 else
1260 c = *d;
1261
1262 if (c < *p * BYTEWIDTH
1263 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1264 not = !not;
1265
1266 p += 1 + *p;
1267
1268 if (!not) goto fail;
1269 d++;
1270 break;
1271 }
1272
1273 case begline:
1274 if (d == string1 || d[-1] == '\n')
1275 break;
1276 goto fail;
1277
1278 case endline:
1279 if (d == end2
1280 || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
1281 break;
1282 goto fail;
1283
1284 /* "or" constructs ("|") are handled by starting each alternative
1285 with an on_failure_jump that points to the start of the next alternative.
1286 Each alternative except the last ends with a jump to the joining point.
1287 (Actually, each jump except for the last one really jumps
1288 to the following jump, because tensioning the jumps is a hassle.) */
1289
1290 /* The start of a stupid repeat has an on_failure_jump that points
1291 past the end of the repeat text.
1292 This makes a failure point so that, on failure to match a repetition,
1293 matching restarts past as many repetitions have been found
1294 with no way to fail and look for another one. */
1295
1296 /* A smart repeat is similar but loops back to the on_failure_jump
1297 so that each repetition makes another failure point. */
1298
1299 case on_failure_jump:
1300 if (stackp == stacke)
1301 {
1302 unsigned char **stackx;
1303 if (stacke - stackb > re_max_failures * 2)
1304 return -2;
1305 stackx = (unsigned char **) alloca (2 * (stacke - stackb)
1306 * sizeof (char *));
ade40d31 1307 memcpy (stackx, stackb, (stacke - stackb) * sizeof (char *));
dd3b648e
RP
1308 stackp = stackx + (stackp - stackb);
1309 stacke = stackx + 2 * (stacke - stackb);
1310 stackb = stackx;
1311 }
1312 mcnt = *p++ & 0377;
1313 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1314 p++;
1315 *stackp++ = mcnt + p;
1316 *stackp++ = d;
1317 break;
1318
1319 /* The end of a smart repeat has an maybe_finalize_jump back.
1320 Change it either to a finalize_jump or an ordinary jump. */
1321
1322 case maybe_finalize_jump:
1323 mcnt = *p++ & 0377;
1324 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1325 p++;
1326 {
1327 register unsigned char *p2 = p;
1328 /* Compare what follows with the begining of the repeat.
1329 If we can establish that there is nothing that they would
1330 both match, we can change to finalize_jump */
1331 while (p2 != pend
1332 && (*p2 == (unsigned char) stop_memory
1333 || *p2 == (unsigned char) start_memory))
1334 p2++;
1335 if (p2 == pend)
1336 p[-3] = (unsigned char) finalize_jump;
1337 else if (*p2 == (unsigned char) exactn
1338 || *p2 == (unsigned char) endline)
1339 {
1340 register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
1341 register unsigned char *p1 = p + mcnt;
1342 /* p1[0] ... p1[2] are an on_failure_jump.
1343 Examine what follows that */
1344 if (p1[3] == (unsigned char) exactn && p1[5] != c)
1345 p[-3] = (unsigned char) finalize_jump;
1346 else if (p1[3] == (unsigned char) charset
1347 || p1[3] == (unsigned char) charset_not)
1348 {
1349 int not = p1[3] == (unsigned char) charset_not;
1350 if (c < p1[4] * BYTEWIDTH
1351 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1352 not = !not;
1353 /* not is 1 if c would match */
1354 /* That means it is not safe to finalize */
1355 if (!not)
1356 p[-3] = (unsigned char) finalize_jump;
1357 }
1358 }
1359 }
1360 p -= 2;
1361 if (p[-1] != (unsigned char) finalize_jump)
1362 {
1363 p[-1] = (unsigned char) jump;
1364 goto nofinalize;
1365 }
1366
1367 /* The end of a stupid repeat has a finalize-jump
1368 back to the start, where another failure point will be made
1369 which will point after all the repetitions found so far. */
1370
1371 case finalize_jump:
1372 stackp -= 2;
1373
1374 case jump:
1375 nofinalize:
1376 mcnt = *p++ & 0377;
1377 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1378 p += mcnt + 1; /* The 1 compensates for missing ++ above */
1379 break;
1380
1381 case dummy_failure_jump:
1382 if (stackp == stacke)
1383 {
1384 unsigned char **stackx
1385 = (unsigned char **) alloca (2 * (stacke - stackb)
1386 * sizeof (char *));
ade40d31 1387 memcpy (stackx, stackb, (stacke - stackb) * sizeof (char *));
dd3b648e
RP
1388 stackp = stackx + (stackp - stackb);
1389 stacke = stackx + 2 * (stacke - stackb);
1390 stackb = stackx;
1391 }
1392 *stackp++ = 0;
1393 *stackp++ = 0;
1394 goto nofinalize;
1395
1396 case wordbound:
1397 if (d == string1 /* Points to first char */
1398 || d == end2 /* Points to end */
1399 || (d == end1 && size2 == 0)) /* Points to end */
1400 break;
1401 if ((SYNTAX (d[-1]) == Sword)
1402 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1403 break;
1404 goto fail;
1405
1406 case notwordbound:
1407 if (d == string1 /* Points to first char */
1408 || d == end2 /* Points to end */
1409 || (d == end1 && size2 == 0)) /* Points to end */
1410 goto fail;
1411 if ((SYNTAX (d[-1]) == Sword)
1412 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1413 goto fail;
1414 break;
1415
1416 case wordbeg:
1417 if (d == end2 /* Points to end */
1418 || (d == end1 && size2 == 0) /* Points to end */
1419 || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
1420 goto fail;
1421 if (d == string1 /* Points to first char */
1422 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1423 break;
1424 goto fail;
1425
1426 case wordend:
1427 if (d == string1 /* Points to first char */
1428 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1429 goto fail;
1430 if (d == end2 /* Points to end */
1431 || (d == end1 && size2 == 0) /* Points to end */
1432 || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
1433 break;
1434 goto fail;
1435
1436#ifdef emacs
1437 case before_dot:
1438 if (((d - string2 <= (unsigned) size2)
1439 ? d - bf_p2 : d - bf_p1)
1440 <= point)
1441 goto fail;
1442 break;
1443
1444 case at_dot:
1445 if (((d - string2 <= (unsigned) size2)
1446 ? d - bf_p2 : d - bf_p1)
1447 == point)
1448 goto fail;
1449 break;
1450
1451 case after_dot:
1452 if (((d - string2 <= (unsigned) size2)
1453 ? d - bf_p2 : d - bf_p1)
1454 >= point)
1455 goto fail;
1456 break;
1457
1458 case wordchar:
1459 mcnt = (int) Sword;
1460 goto matchsyntax;
1461
1462 case syntaxspec:
1463 mcnt = *p++;
1464 matchsyntax:
1465 PREFETCH;
1466 if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
1467 break;
1468
1469 case notwordchar:
1470 mcnt = (int) Sword;
1471 goto matchnotsyntax;
1472
1473 case notsyntaxspec:
1474 mcnt = *p++;
1475 matchnotsyntax:
1476 PREFETCH;
1477 if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
1478 break;
1479#else
1480 case wordchar:
1481 PREFETCH;
1482 if (SYNTAX (*d++) == 0) goto fail;
1483 break;
1484
1485 case notwordchar:
1486 PREFETCH;
1487 if (SYNTAX (*d++) != 0) goto fail;
1488 break;
1489#endif /* not emacs */
1490
1491 case begbuf:
1492 if (d == string1) /* Note, d cannot equal string2 */
1493 break; /* unless string1 == string2. */
1494 goto fail;
1495
1496 case endbuf:
1497 if (d == end2 || (d == end1 && size2 == 0))
1498 break;
1499 goto fail;
1500
1501 case exactn:
1502 /* Match the next few pattern characters exactly.
1503 mcnt is how many characters to match. */
1504 mcnt = *p++;
1505 if (translate)
1506 {
1507 do
1508 {
1509 PREFETCH;
1510 if (translate[*d++] != *p++) goto fail;
1511 }
1512 while (--mcnt);
1513 }
1514 else
1515 {
1516 do
1517 {
1518 PREFETCH;
1519 if (*d++ != *p++) goto fail;
1520 }
1521 while (--mcnt);
1522 }
1523 break;
1524 }
1525 continue; /* Successfully matched one pattern command; keep matching */
1526
1527 /* Jump here if any matching operation fails. */
1528 fail:
1529 if (stackp != stackb)
1530 /* A restart point is known. Restart there and pop it. */
1531 {
1532 if (!stackp[-2])
1533 { /* If innermost failure point is dormant, flush it and keep looking */
1534 stackp -= 2;
1535 goto fail;
1536 }
1537 d = *--stackp;
1538 p = *--stackp;
1539 if (d >= string1 && d <= end1)
1540 dend = end_match_1;
1541 }
1542 else break; /* Matching at this starting point really fails! */
1543 }
1544 return -1; /* Failure to match */
1545}
1546
1547static int
9823e3f4 1548memcmp_translate (s1, s2, len, translate)
dd3b648e
RP
1549 unsigned char *s1, *s2;
1550 register int len;
1551 unsigned char *translate;
1552{
1553 register unsigned char *p1 = s1, *p2 = s2;
1554 while (len)
1555 {
1556 if (translate [*p1++] != translate [*p2++]) return 1;
1557 len--;
1558 }
1559 return 0;
1560}
1561\f
1562/* Entry points compatible with bsd4.2 regex library */
1563
1564#ifndef emacs
1565
1566static struct re_pattern_buffer re_comp_buf;
1567
1568char *
1569re_comp (s)
ba47c66a 1570 const char *s;
dd3b648e
RP
1571{
1572 if (!s)
1573 {
1574 if (!re_comp_buf.buffer)
1575 return "No previous regular expression";
1576 return 0;
1577 }
1578
1579 if (!re_comp_buf.buffer)
1580 {
1581 if (!(re_comp_buf.buffer = (char *) malloc (200)))
1582 return "Memory exhausted";
1583 re_comp_buf.allocated = 200;
1584 if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
1585 return "Memory exhausted";
1586 }
1587 return re_compile_pattern (s, strlen (s), &re_comp_buf);
1588}
1589
1590int
1591re_exec (s)
1592 char *s;
1593{
1594 int len = strlen (s);
1595 return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
1596}
1597
1598#endif /* emacs */
1599\f
1600#ifdef test
1601
1602#include <stdio.h>
1603
1604/* Indexed by a character, gives the upper case equivalent of the character */
1605
1606static char upcase[0400] =
1607 { 000, 001, 002, 003, 004, 005, 006, 007,
1608 010, 011, 012, 013, 014, 015, 016, 017,
1609 020, 021, 022, 023, 024, 025, 026, 027,
1610 030, 031, 032, 033, 034, 035, 036, 037,
1611 040, 041, 042, 043, 044, 045, 046, 047,
1612 050, 051, 052, 053, 054, 055, 056, 057,
1613 060, 061, 062, 063, 064, 065, 066, 067,
1614 070, 071, 072, 073, 074, 075, 076, 077,
1615 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1616 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1617 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1618 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
1619 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1620 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1621 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1622 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
1623 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
1624 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
1625 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
1626 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
1627 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
1628 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1629 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
1630 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
1631 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
1632 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
1633 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
1634 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1635 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
1636 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
1637 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
1638 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
1639 };
1640
1641main (argc, argv)
1642 int argc;
1643 char **argv;
1644{
1645 char pat[80];
1646 struct re_pattern_buffer buf;
1647 int i;
1648 char c;
1649 char fastmap[(1 << BYTEWIDTH)];
1650
1651 /* Allow a command argument to specify the style of syntax. */
1652 if (argc > 1)
1653 obscure_syntax = atoi (argv[1]);
1654
1655 buf.allocated = 40;
1656 buf.buffer = (char *) malloc (buf.allocated);
1657 buf.fastmap = fastmap;
1658 buf.translate = upcase;
1659
1660 while (1)
1661 {
1662 gets (pat);
1663
1664 if (*pat)
1665 {
1666 re_compile_pattern (pat, strlen(pat), &buf);
1667
1668 for (i = 0; i < buf.used; i++)
1669 printchar (buf.buffer[i]);
1670
199b2450 1671 putchar_unfiltered ('\n');
dd3b648e 1672
199b2450 1673 printf_unfiltered ("%d allocated, %d used.\n", buf.allocated, buf.used);
dd3b648e
RP
1674
1675 re_compile_fastmap (&buf);
199b2450 1676 printf_unfiltered ("Allowed by fastmap: ");
dd3b648e
RP
1677 for (i = 0; i < (1 << BYTEWIDTH); i++)
1678 if (fastmap[i]) printchar (i);
199b2450 1679 putchar_unfiltered ('\n');
dd3b648e
RP
1680 }
1681
1682 gets (pat); /* Now read the string to match against */
1683
1684 i = re_match (&buf, pat, strlen (pat), 0, 0);
199b2450 1685 printf_unfiltered ("Match value %d.\n", i);
dd3b648e
RP
1686 }
1687}
1688
1689#ifdef NOTDEF
1690print_buf (bufp)
1691 struct re_pattern_buffer *bufp;
1692{
1693 int i;
1694
199b2450 1695 printf_unfiltered ("buf is :\n----------------\n");
dd3b648e
RP
1696 for (i = 0; i < bufp->used; i++)
1697 printchar (bufp->buffer[i]);
1698
199b2450 1699 printf_unfiltered ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
dd3b648e 1700
199b2450 1701 printf_unfiltered ("Allowed by fastmap: ");
dd3b648e
RP
1702 for (i = 0; i < (1 << BYTEWIDTH); i++)
1703 if (bufp->fastmap[i])
1704 printchar (i);
199b2450 1705 printf_unfiltered ("\nAllowed by translate: ");
dd3b648e
RP
1706 if (bufp->translate)
1707 for (i = 0; i < (1 << BYTEWIDTH); i++)
1708 if (bufp->translate[i])
1709 printchar (i);
199b2450
TL
1710 printf_unfiltered ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
1711 printf_unfiltered ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
dd3b648e
RP
1712}
1713#endif
1714
1715printchar (c)
1716 char c;
1717{
1718 if (c < 041 || c >= 0177)
1719 {
199b2450
TL
1720 putchar_unfiltered ('\\');
1721 putchar_unfiltered (((c >> 6) & 3) + '0');
1722 putchar_unfiltered (((c >> 3) & 7) + '0');
1723 putchar_unfiltered ((c & 7) + '0');
dd3b648e
RP
1724 }
1725 else
199b2450 1726 putchar_unfiltered (c);
dd3b648e
RP
1727}
1728
1729error (string)
1730 char *string;
1731{
199b2450 1732 puts_unfiltered (string);
dd3b648e
RP
1733 exit (1);
1734}
1735
1736#endif /* test */
This page took 0.315956 seconds and 4 git commands to generate.