Main Page | Class Hierarchy | Class List | File List | Class Members | File Members

regex.c

Go to the documentation of this file.
00001 /* Extended regular expression matching and search library,
00002    version 0.12.
00003    (Implements POSIX draft P10003.2/D11.2, except for
00004    internationalization features.)
00005 
00006    Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
00007 
00008    This program is free software; you can redistribute it and/or modify
00009    it under the terms of the GNU General Public License as published by
00010    the Free Software Foundation; either version 2, or (at your option)
00011    any later version.
00012 
00013    This program is distributed in the hope that it will be useful,
00014    but WITHOUT ANY WARRANTY; without even the implied warranty of
00015    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016    GNU General Public License for more details.
00017 
00018    You should have received a copy of the GNU General Public License
00019    along with this program; if not, write to the Free Software
00020    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
00021 
00022 /* AIX requires this to be the first thing in the file. */
00023 #if defined (_AIX) && !defined (REGEX_MALLOC)
00024   #pragma alloca
00025 #endif
00026 
00027 #define _GNU_SOURCE
00028 
00029 #ifdef HAVE_CONFIG_H
00030 #include <config.h>
00031 #endif
00032 
00033 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
00034 #include <sys/types.h>
00035 
00036 /* This is for other GNU distributions with internationalized messages.  */
00037 #if HAVE_LIBINTL_H || defined (_LIBC)
00038 # include <libintl.h>
00039 #else
00040 # define gettext(msgid) (msgid)
00041 #endif
00042 
00043 /* The `emacs' switch turns on certain matching commands
00044    that make sense only in Emacs. */
00045 #ifdef emacs
00046 
00047 #include "lisp.h"
00048 #include "buffer.h"
00049 #include "syntax.h"
00050 
00051 #else  /* not emacs */
00052 
00053 /* If we are not linking with Emacs proper,
00054    we can't use the relocating allocator
00055    even if config.h says that we can.  */
00056 #undef REL_ALLOC
00057 
00058 #if defined (STDC_HEADERS) || defined (_LIBC)
00059 #include <stdlib.h>
00060 #else
00061 char *malloc ();
00062 char *realloc ();
00063 #endif
00064 
00065 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
00066    If nothing else has been done, use the method below.  */
00067 #ifdef INHIBIT_STRING_HEADER
00068 #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
00069 #if !defined (bzero) && !defined (bcopy)
00070 #undef INHIBIT_STRING_HEADER
00071 #endif
00072 #endif
00073 #endif
00074 
00075 /* This is the normal way of making sure we have a bcopy and a bzero.
00076    This is used in most programs--a few other programs avoid this
00077    by defining INHIBIT_STRING_HEADER.  */
00078 #ifndef INHIBIT_STRING_HEADER
00079 /*#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)*/
00080 #include <string.h>
00081 #ifndef bcmp
00082 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
00083 #endif
00084 #ifndef bcopy
00085 #define bcopy(s, d, n)  memcpy ((d), (s), (n))
00086 #endif
00087 #ifndef bzero
00088 #define bzero(s, n)     memset ((s), 0, (n))
00089 #endif
00090 /*#else
00091 #include <strings.h>
00092 #endif*/
00093 #endif
00094 
00095 /* Define the syntax stuff for <, >, etc.  */
00096 
00097 /* This must be nonzero for the wordchar and notwordchar pattern
00098    commands in re_match_2.  */
00099 #ifndef Sword 
00100 #define Sword 1
00101 #endif
00102 
00103 #ifdef SWITCH_ENUM_BUG
00104 #define SWITCH_ENUM_CAST(x) ((int)(x))
00105 #else
00106 #define SWITCH_ENUM_CAST(x) (x)
00107 #endif
00108 
00109 #ifdef SYNTAX_TABLE
00110 
00111 extern char *re_syntax_table;
00112 
00113 #else /* not SYNTAX_TABLE */
00114 
00115 /* How many characters in the character set.  */
00116 #define CHAR_SET_SIZE 256
00117 
00118 static char re_syntax_table[CHAR_SET_SIZE];
00119 
00120 static void
00121 init_syntax_once ()
00122 {
00123    register int c;
00124    static int done = 0;
00125 
00126    if (done)
00127         return;
00128 
00129    bzero (re_syntax_table, sizeof re_syntax_table);
00130 
00131    for (c = 'a'; c <= 'z'; c++)
00132         re_syntax_table[c] = Sword;
00133 
00134    for (c = 'A'; c <= 'Z'; c++)
00135         re_syntax_table[c] = Sword;
00136 
00137    for (c = '0'; c <= '9'; c++)
00138         re_syntax_table[c] = Sword;
00139 
00140    re_syntax_table['_'] = Sword;
00141 
00142    done = 1;
00143 }
00144 
00145 #endif /* not SYNTAX_TABLE */
00146 
00147 #define SYNTAX(c) re_syntax_table[c]
00148 
00149 #endif /* not emacs */
00150 
00151 /* Get the interface, including the syntax bits.  */
00152 #include "regex.h"
00153 
00154 /* isalpha etc. are used for the character classes.  */
00155 #include <ctype.h>
00156 
00157 /* Jim Meyering writes:
00158 
00159    "... Some ctype macros are valid only for character codes that
00160    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
00161    using /bin/cc or gcc but without giving an ansi option).  So, all
00162    ctype uses should be through macros like ISPRINT...  If
00163    STDC_HEADERS is defined, then autoconf has verified that the ctype
00164    macros don't need to be guarded with references to isascii. ...
00165    Defining isascii to 1 should let any compiler worth its salt
00166    eliminate the && through constant folding."  */
00167 
00168 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
00169 #define ISASCII(c) 1
00170 #else
00171 #define ISASCII(c) isascii(c)
00172 #endif
00173 
00174 #ifdef isblank
00175 #define ISBLANK(c) (ISASCII (c) && isblank (c))
00176 #else
00177 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
00178 #endif
00179 #ifdef isgraph
00180 #define ISGRAPH(c) (ISASCII (c) && isgraph (c))
00181 #else
00182 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
00183 #endif
00184 
00185 #define ISPRINT(c) (ISASCII (c) && isprint (c))
00186 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
00187 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
00188 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
00189 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
00190 #define ISLOWER(c) (ISASCII (c) && islower (c))
00191 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
00192 #define ISSPACE(c) (ISASCII (c) && isspace (c))
00193 #define ISUPPER(c) (ISASCII (c) && isupper (c))
00194 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
00195 
00196 #ifndef NULL
00197 #define NULL (void *)0
00198 #endif
00199 
00200 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
00201    since ours (we hope) works properly with all combinations of
00202    machines, compilers, `char' and `unsigned char' argument types.
00203    (Per Bothner suggested the basic approach.)  */
00204 #undef SIGN_EXTEND_CHAR
00205 #if __STDC__
00206 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
00207 #else  /* not __STDC__ */
00208 /* As in Harbison and Steele.  */
00209 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
00210 #endif
00211 
00212 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
00213    use `alloca' instead of `malloc'.  This is because using malloc in
00214    re_search* or re_match* could cause memory leaks when C-g is used in
00215    Emacs; also, malloc is slower and causes storage fragmentation.  On
00216    the other hand, malloc is more portable, and easier to debug.  
00217    
00218    Because we sometimes use alloca, some routines have to be macros,
00219    not functions -- `alloca'-allocated space disappears at the end of the
00220    function it is called in.  */
00221 
00222 #ifdef REGEX_MALLOC
00223 
00224 #define REGEX_ALLOCATE malloc
00225 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
00226 #define REGEX_FREE free
00227 
00228 #else /* not REGEX_MALLOC  */
00229 
00230 /* Emacs already defines alloca, sometimes.  */
00231 #ifndef alloca
00232 
00233 /* Make alloca work the best possible way.  */
00234 #ifdef __GNUC__
00235 #define alloca __builtin_alloca
00236 #else /* not __GNUC__ */
00237 #if HAVE_ALLOCA_H
00238 #include <alloca.h>
00239 #else /* not __GNUC__ or HAVE_ALLOCA_H */
00240 #ifndef _AIX /* Already did AIX, up at the top.  */
00241 char *alloca ();
00242 #endif /* not _AIX */
00243 #endif /* not HAVE_ALLOCA_H */ 
00244 #endif /* not __GNUC__ */
00245 
00246 #endif /* not alloca */
00247 
00248 #define REGEX_ALLOCATE alloca
00249 
00250 /* Assumes a `char *destination' variable.  */
00251 #define REGEX_REALLOCATE(source, osize, nsize)                          \
00252   (destination = (char *) alloca (nsize),                               \
00253    bcopy (source, destination, osize),                                  \
00254    destination)
00255 
00256 /* No need to do anything to free, after alloca.  */
00257 #define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
00258 
00259 #endif /* not REGEX_MALLOC */
00260 
00261 /* Define how to allocate the failure stack.  */
00262 
00263 #ifdef REL_ALLOC
00264 #define REGEX_ALLOCATE_STACK(size)                              \
00265   r_alloc (&failure_stack_ptr, (size))
00266 #define REGEX_REALLOCATE_STACK(source, osize, nsize)            \
00267   r_re_alloc (&failure_stack_ptr, (nsize))
00268 #define REGEX_FREE_STACK(ptr)                                   \
00269   r_alloc_free (&failure_stack_ptr)
00270 
00271 #else /* not REL_ALLOC */
00272 
00273 #ifdef REGEX_MALLOC
00274 
00275 #define REGEX_ALLOCATE_STACK malloc
00276 #define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
00277 #define REGEX_FREE_STACK free
00278 
00279 #else /* not REGEX_MALLOC */
00280 
00281 #define REGEX_ALLOCATE_STACK alloca
00282 
00283 #define REGEX_REALLOCATE_STACK(source, osize, nsize)                    \
00284    REGEX_REALLOCATE (source, osize, nsize)
00285 /* No need to explicitly free anything.  */
00286 #define REGEX_FREE_STACK(arg)
00287 
00288 #endif /* not REGEX_MALLOC */
00289 #endif /* not REL_ALLOC */
00290 
00291 
00292 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
00293    `string1' or just past its end.  This works if PTR is NULL, which is
00294    a good thing.  */
00295 #define FIRST_STRING_P(ptr)                                     \
00296   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
00297 
00298 /* (Re)Allocate N items of type T using malloc, or fail.  */
00299 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
00300 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
00301 #define RETALLOC_IF(addr, n, t) \
00302   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
00303 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
00304 
00305 #define BYTEWIDTH 8 /* In bits.  */
00306 
00307 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
00308 
00309 #undef MAX
00310 #undef MIN
00311 #define MAX(a, b) ((a) > (b) ? (a) : (b))
00312 #define MIN(a, b) ((a) < (b) ? (a) : (b))
00313 
00314 typedef char boolean;
00315 #define false 0
00316 #define true 1
00317 
00318 static int re_match_2_internal ();
00319 
00320 /* These are the command codes that appear in compiled regular
00321    expressions.  Some opcodes are followed by argument bytes.  A
00322    command code can specify any interpretation whatsoever for its
00323    arguments.  Zero bytes may appear in the compiled regular expression.  */
00324 
00325 typedef enum
00326 {
00327   no_op = 0,
00328 
00329   /* Succeed right away--no more backtracking.  */
00330   succeed,
00331 
00332            /* Followed by one byte giving n, then by n literal bytes.  */
00333   exactn,
00334 
00335            /* Matches any (more or less) character.  */
00336   anychar,
00337 
00338            /* Matches any one char belonging to specified set.  First
00339                  following byte is number of bitmap bytes.  Then come bytes
00340                  for a bitmap saying which chars are in.  Bits in each byte
00341                  are ordered low-bit-first.  A character is in the set if its
00342                  bit is 1.  A character too large to have a bit in the map is
00343                  automatically not in the set.  */
00344   charset,
00345 
00346            /* Same parameters as charset, but match any character that is
00347                  not one of those specified.  */
00348   charset_not,
00349 
00350            /* Start remembering the text that is matched, for storing in a
00351                  register.  Followed by one byte with the register number, in
00352                  the range 0 to one less than the pattern buffer's re_nsub
00353                  field.  Then followed by one byte with the number of groups
00354                  inner to this one.  (This last has to be part of the
00355                  start_memory only because we need it in the on_failure_jump
00356                  of re_match_2.)  */
00357   start_memory,
00358 
00359            /* Stop remembering the text that is matched and store it in a
00360                  memory register.  Followed by one byte with the register
00361                  number, in the range 0 to one less than `re_nsub' in the
00362                  pattern buffer, and one byte with the number of inner groups,
00363                  just like `start_memory'.  (We need the number of inner
00364                  groups here because we don't have any easy way of finding the
00365                  corresponding start_memory when we're at a stop_memory.)  */
00366   stop_memory,
00367 
00368            /* Match a duplicate of something remembered. Followed by one
00369                  byte containing the register number.  */
00370   duplicate,
00371 
00372            /* Fail unless at beginning of line.  */
00373   begline,
00374 
00375            /* Fail unless at end of line.  */
00376   endline,
00377 
00378            /* Succeeds if at beginning of buffer (if emacs) or at beginning
00379                  of string to be matched (if not).  */
00380   begbuf,
00381 
00382            /* Analogously, for end of buffer/string.  */
00383   endbuf,
00384  
00385            /* Followed by two byte relative address to which to jump.  */
00386   jump, 
00387 
00388         /* Same as jump, but marks the end of an alternative.  */
00389   jump_past_alt,
00390 
00391            /* Followed by two-byte relative address of place to resume at
00392                  in case of failure.  */
00393   on_failure_jump,
00394         
00395            /* Like on_failure_jump, but pushes a placeholder instead of the
00396                  current string position when executed.  */
00397   on_failure_keep_string_jump,
00398   
00399            /* Throw away latest failure point and then jump to following
00400                  two-byte relative address.  */
00401   pop_failure_jump,
00402 
00403            /* Change to pop_failure_jump if know won't have to backtrack to
00404                  match; otherwise change to jump.  This is used to jump
00405                  back to the beginning of a repeat.  If what follows this jump
00406                  clearly won't match what the repeat does, such that we can be
00407                  sure that there is no use backtracking out of repetitions
00408                  already matched, then we change it to a pop_failure_jump.
00409                  Followed by two-byte address.  */
00410   maybe_pop_jump,
00411 
00412            /* Jump to following two-byte address, and push a dummy failure
00413                  point. This failure point will be thrown away if an attempt
00414                  is made to use it for a failure.  A `+' construct makes this
00415                  before the first repeat.  Also used as an intermediary kind
00416                  of jump when compiling an alternative.  */
00417   dummy_failure_jump,
00418 
00419         /* Push a dummy failure point and continue.  Used at the end of
00420            alternatives.  */
00421   push_dummy_failure,
00422 
00423            /* Followed by two-byte relative address and two-byte number n.
00424                  After matching N times, jump to the address upon failure.  */
00425   succeed_n,
00426 
00427            /* Followed by two-byte relative address, and two-byte number n.
00428                  Jump to the address N times, then fail.  */
00429   jump_n,
00430 
00431            /* Set the following two-byte relative address to the
00432                  subsequent two-byte number.  The address *includes* the two
00433                  bytes of number.  */
00434   set_number_at,
00435 
00436   wordchar,     /* Matches any word-constituent character.  */
00437   notwordchar,  /* Matches any char that is not a word-constituent.  */
00438 
00439   wordbeg,      /* Succeeds if at word beginning.  */
00440   wordend,      /* Succeeds if at word end.  */
00441 
00442   wordbound,    /* Succeeds if at a word boundary.  */
00443   notwordbound  /* Succeeds if not at a word boundary.  */
00444 
00445 #ifdef emacs
00446   ,before_dot,  /* Succeeds if before point.  */
00447   at_dot,       /* Succeeds if at point.  */
00448   after_dot,    /* Succeeds if after point.  */
00449 
00450         /* Matches any character whose syntax is specified.  Followed by
00451                  a byte which contains a syntax code, e.g., Sword.  */
00452   syntaxspec,
00453 
00454         /* Matches any character whose syntax is not that specified.  */
00455   notsyntaxspec
00456 #endif /* emacs */
00457 } re_opcode_t;
00458 
00459 /* Common operations on the compiled pattern.  */
00460 
00461 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
00462 
00463 #define STORE_NUMBER(destination, number)                               \
00464   do {                                                                  \
00465     (destination)[0] = (number) & 0377;                                 \
00466     (destination)[1] = (number) >> 8;                                   \
00467   } while (0)
00468 
00469 /* Same as STORE_NUMBER, except increment DESTINATION to
00470    the byte after where the number is stored.  Therefore, DESTINATION
00471    must be an lvalue.  */
00472 
00473 #define STORE_NUMBER_AND_INCR(destination, number)                      \
00474   do {                                                                  \
00475     STORE_NUMBER (destination, number);                                 \
00476     (destination) += 2;                                                 \
00477   } while (0)
00478 
00479 /* Put into DESTINATION a number stored in two contiguous bytes starting
00480    at SOURCE.  */
00481 
00482 #define EXTRACT_NUMBER(destination, source)                             \
00483   do {                                                                  \
00484     (destination) = *(source) & 0377;                                   \
00485     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
00486   } while (0)
00487 
00488 #ifdef DEBUG
00489 static void
00490 extract_number (dest, source)
00491     int *dest;
00492     unsigned char *source;
00493 {
00494   int temp = SIGN_EXTEND_CHAR (*(source + 1)); 
00495   *dest = *source & 0377;
00496   *dest += temp << 8;
00497 }
00498 
00499 #ifndef EXTRACT_MACROS /* To debug the macros.  */
00500 #undef EXTRACT_NUMBER
00501 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
00502 #endif /* not EXTRACT_MACROS */
00503 
00504 #endif /* DEBUG */
00505 
00506 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
00507    SOURCE must be an lvalue.  */
00508 
00509 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
00510   do {                                                                  \
00511     EXTRACT_NUMBER (destination, source);                               \
00512     (source) += 2;                                                      \
00513   } while (0)
00514 
00515 #ifdef DEBUG
00516 static void
00517 extract_number_and_incr (destination, source)
00518     int *destination;
00519     unsigned char **source;
00520 { 
00521   extract_number (destination, *source);
00522   *source += 2;
00523 }
00524 
00525 #ifndef EXTRACT_MACROS
00526 #undef EXTRACT_NUMBER_AND_INCR
00527 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
00528   extract_number_and_incr (&dest, &src)
00529 #endif /* not EXTRACT_MACROS */
00530 
00531 #endif /* DEBUG */
00532 
00533 /* If DEBUG is defined, Regex prints many voluminous messages about what
00534    it is doing (if the variable `debug' is nonzero).  If linked with the
00535    main program in `iregex.c', you can enter patterns and strings
00536    interactively.  And if linked with the main program in `main.c' and
00537    the other test files, you can run the already-written tests.  */
00538 
00539 #ifdef DEBUG
00540 
00541 /* We use standard I/O for debugging.  */
00542 #include <stdio.h>
00543 
00544 /* It is useful to test things that ``must'' be true when debugging.  */
00545 #include <assert.h>
00546 
00547 static int debug = 0;
00548 
00549 #define DEBUG_STATEMENT(e) e
00550 #define DEBUG_PRINT1(x) if (debug) printf (x)
00551 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
00552 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
00553 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
00554 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                           \
00555   if (debug) print_partial_compiled_pattern (s, e)
00556 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                  \
00557   if (debug) print_double_string (w, s1, sz1, s2, sz2)
00558 
00559 
00560 /* Print the fastmap in human-readable form.  */
00561 
00562 void
00563 print_fastmap (fastmap)
00564     char *fastmap;
00565 {
00566   unsigned was_a_range = 0;
00567   unsigned i = 0;  
00568   
00569   while (i < (1 << BYTEWIDTH))
00570     {
00571          if (fastmap[i++])
00572         {
00573           was_a_range = 0;
00574                 putchar (i - 1);
00575                 while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
00576                   {
00577                     was_a_range = 1;
00578                     i++;
00579                   }
00580           if (was_a_range)
00581                   {
00582                     printf ("-");
00583                     putchar (i - 1);
00584                   }
00585            }
00586     }
00587   putchar ('\n'); 
00588 }
00589 
00590 
00591 /* Print a compiled pattern string in human-readable form, starting at
00592    the START pointer into it and ending just before the pointer END.  */
00593 
00594 void
00595 print_partial_compiled_pattern (start, end)
00596     unsigned char *start;
00597     unsigned char *end;
00598 {
00599   int mcnt, mcnt2;
00600   unsigned char *p = start;
00601   unsigned char *pend = end;
00602 
00603   if (start == NULL)
00604     {
00605          printf ("(null)\n");
00606          return;
00607     }
00608     
00609   /* Loop over pattern commands.  */
00610   while (p < pend)
00611     {
00612          printf ("%d:\t", p - start);
00613 
00614          switch ((re_opcode_t) *p++)
00615         {
00616            case no_op:
00617                 printf ("/no_op");
00618                 break;
00619 
00620         case exactn:
00621           mcnt = *p++;
00622                 printf ("/exactn/%d", mcnt);
00623                 do
00624             {
00625                     putchar ('/');
00626                  putchar (*p++);
00627                   }
00628                 while (--mcnt);
00629                 break;
00630 
00631         case start_memory:
00632                 mcnt = *p++;
00633                 printf ("/start_memory/%d/%d", mcnt, *p++);
00634                 break;
00635 
00636         case stop_memory:
00637                 mcnt = *p++;
00638           printf ("/stop_memory/%d/%d", mcnt, *p++);
00639                 break;
00640 
00641         case duplicate:
00642           printf ("/duplicate/%d", *p++);
00643           break;
00644 
00645         case anychar:
00646           printf ("/anychar");
00647           break;
00648 
00649         case charset:
00650            case charset_not:
00651                 {
00652                   register int c, last = -100;
00653             register int in_range = 0;
00654 
00655             printf ("/charset [%s",
00656                           (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
00657                   
00658                   assert (p + *p < pend);
00659 
00660                   for (c = 0; c < 256; c++)
00661                  if (c / 8 < *p
00662                   && (p[1 + (c/8)] & (1 << (c % 8))))
00663                 {
00664                   /* Are we starting a range?  */
00665                   if (last + 1 == c && ! in_range)
00666                     {
00667                          putchar ('-');
00668                          in_range = 1;
00669                     }
00670                   /* Have we broken a range?  */
00671                   else if (last + 1 != c && in_range)
00672                     {
00673                          putchar (last);
00674                          in_range = 0;
00675                     }
00676                          
00677                   if (! in_range)
00678                     putchar (c);
00679 
00680                   last = c;
00681                     }
00682 
00683             if (in_range)
00684                  putchar (last);
00685 
00686             putchar (']');
00687 
00688             p += 1 + *p;
00689           }
00690           break;
00691 
00692         case begline:
00693           printf ("/begline");
00694                 break;
00695 
00696         case endline:
00697                 printf ("/endline");
00698                 break;
00699 
00700         case on_failure_jump:
00701                 extract_number_and_incr (&mcnt, &p);
00702           printf ("/on_failure_jump to %d", p + mcnt - start);
00703                 break;
00704 
00705         case on_failure_keep_string_jump:
00706                 extract_number_and_incr (&mcnt, &p);
00707           printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
00708                 break;
00709 
00710         case dummy_failure_jump:
00711                 extract_number_and_incr (&mcnt, &p);
00712           printf ("/dummy_failure_jump to %d", p + mcnt - start);
00713                 break;
00714 
00715         case push_dummy_failure:
00716                 printf ("/push_dummy_failure");
00717                 break;
00718                 
00719            case maybe_pop_jump:
00720                 extract_number_and_incr (&mcnt, &p);
00721           printf ("/maybe_pop_jump to %d", p + mcnt - start);
00722           break;
00723 
00724            case pop_failure_jump:
00725           extract_number_and_incr (&mcnt, &p);
00726           printf ("/pop_failure_jump to %d", p + mcnt - start);
00727           break;          
00728                 
00729            case jump_past_alt:
00730           extract_number_and_incr (&mcnt, &p);
00731           printf ("/jump_past_alt to %d", p + mcnt - start);
00732           break;          
00733                 
00734            case jump:
00735           extract_number_and_incr (&mcnt, &p);
00736           printf ("/jump to %d", p + mcnt - start);
00737           break;
00738 
00739            case succeed_n: 
00740                 extract_number_and_incr (&mcnt, &p);
00741                 extract_number_and_incr (&mcnt2, &p);
00742           printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
00743                 break;
00744            
00745            case jump_n: 
00746                 extract_number_and_incr (&mcnt, &p);
00747                 extract_number_and_incr (&mcnt2, &p);
00748           printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
00749                 break;
00750            
00751            case set_number_at: 
00752                 extract_number_and_incr (&mcnt, &p);
00753                 extract_number_and_incr (&mcnt2, &p);
00754           printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
00755                 break;
00756            
00757            case wordbound:
00758           printf ("/wordbound");
00759           break;
00760 
00761         case notwordbound:
00762           printf ("/notwordbound");
00763                 break;
00764 
00765         case wordbeg:
00766           printf ("/wordbeg");
00767           break;
00768                 
00769         case wordend:
00770           printf ("/wordend");
00771                 
00772 #ifdef emacs
00773         case before_dot:
00774           printf ("/before_dot");
00775                 break;
00776 
00777         case at_dot:
00778           printf ("/at_dot");
00779                 break;
00780 
00781         case after_dot:
00782           printf ("/after_dot");
00783                 break;
00784 
00785         case syntaxspec:
00786                 printf ("/syntaxspec");
00787           mcnt = *p++;
00788           printf ("/%d", mcnt);
00789                 break;
00790           
00791         case notsyntaxspec:
00792                 printf ("/notsyntaxspec");
00793           mcnt = *p++;
00794           printf ("/%d", mcnt);
00795           break;
00796 #endif /* emacs */
00797 
00798         case wordchar:
00799           printf ("/wordchar");
00800                 break;
00801           
00802         case notwordchar:
00803           printf ("/notwordchar");
00804                 break;
00805 
00806         case begbuf:
00807           printf ("/begbuf");
00808                 break;
00809 
00810         case endbuf:
00811           printf ("/endbuf");
00812                 break;
00813 
00814            default:
00815                 printf ("?%d", *(p-1));
00816         }
00817 
00818          putchar ('\n');
00819     }
00820 
00821   printf ("%d:\tend of pattern.\n", p - start);
00822 }
00823 
00824 
00825 void
00826 print_compiled_pattern (bufp)
00827     struct re_pattern_buffer *bufp;
00828 {
00829   unsigned char *buffer = bufp->buffer;
00830 
00831   print_partial_compiled_pattern (buffer, buffer + bufp->used);
00832   printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
00833 
00834   if (bufp->fastmap_accurate && bufp->fastmap)
00835     {
00836          printf ("fastmap: ");
00837          print_fastmap (bufp->fastmap);
00838     }
00839 
00840   printf ("re_nsub: %d\t", bufp->re_nsub);
00841   printf ("regs_alloc: %d\t", bufp->regs_allocated);
00842   printf ("can_be_null: %d\t", bufp->can_be_null);
00843   printf ("newline_anchor: %d\n", bufp->newline_anchor);
00844   printf ("no_sub: %d\t", bufp->no_sub);
00845   printf ("not_bol: %d\t", bufp->not_bol);
00846   printf ("not_eol: %d\t", bufp->not_eol);
00847   printf ("syntax: %d\n", bufp->syntax);
00848   /* Perhaps we should print the translate table?  */
00849 }
00850 
00851 
00852 void
00853 print_double_string (where, string1, size1, string2, size2)
00854     const char *where;
00855     const char *string1;
00856     const char *string2;
00857     int size1;
00858     int size2;
00859 {
00860   unsigned this_char;
00861   
00862   if (where == NULL)
00863     printf ("(null)");
00864   else
00865     {
00866          if (FIRST_STRING_P (where))
00867            {
00868                 for (this_char = where - string1; this_char < size1; this_char++)
00869                   putchar (string1[this_char]);
00870 
00871                 where = string2;    
00872            }
00873 
00874          for (this_char = where - string2; this_char < size2; this_char++)
00875            putchar (string2[this_char]);
00876     }
00877 }
00878 
00879 #else /* not DEBUG */
00880 
00881 #undef assert
00882 #define assert(e)
00883 
00884 #define DEBUG_STATEMENT(e)
00885 #define DEBUG_PRINT1(x)
00886 #define DEBUG_PRINT2(x1, x2)
00887 #define DEBUG_PRINT3(x1, x2, x3)
00888 #define DEBUG_PRINT4(x1, x2, x3, x4)
00889 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
00890 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
00891 
00892 #endif /* not DEBUG */
00893 
00894 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
00895    also be assigned to arbitrarily: each pattern buffer stores its own
00896    syntax, so it can be changed between regex compilations.  */
00897 /* This has no initializer because initialized variables in Emacs
00898    become read-only after dumping.  */
00899 reg_syntax_t re_syntax_options;
00900 
00901 
00902 /* Specify the precise syntax of regexps for compilation.  This provides
00903    for compatibility for various utilities which historically have
00904    different, incompatible syntaxes.
00905 
00906    The argument SYNTAX is a bit mask comprised of the various bits
00907    defined in regex.h.  We return the old syntax.  */
00908 
00909 reg_syntax_t
00910 re_set_syntax (syntax)
00911     reg_syntax_t syntax;
00912 {
00913   reg_syntax_t ret = re_syntax_options;
00914   
00915   re_syntax_options = syntax;
00916   return ret;
00917 }
00918 
00919 /* This table gives an error message for each of the error codes listed
00920    in regex.h.  Obviously the order here has to be same as there.
00921    POSIX doesn't require that we do anything for REG_NOERROR,
00922    but why not be nice?  */
00923 
00924 static const char *re_error_msgid[] =
00925   { "Success",                                  /* REG_NOERROR */
00926     "No match",                                 /* REG_NOMATCH */
00927     "Invalid regular expression",               /* REG_BADPAT */
00928     "Invalid collation character",              /* REG_ECOLLATE */
00929     "Invalid character class name",             /* REG_ECTYPE */
00930     "Trailing backslash",                       /* REG_EESCAPE */
00931     "Invalid back reference",                   /* REG_ESUBREG */
00932     "Unmatched [ or [^",                        /* REG_EBRACK */
00933     "Unmatched ( or \\(",                       /* REG_EPAREN */
00934     "Unmatched \\{",                            /* REG_EBRACE */
00935     "Invalid content of \\{\\}",                /* REG_BADBR */
00936     "Invalid range end",                        /* REG_ERANGE */
00937     "Memory exhausted",                         /* REG_ESPACE */
00938     "Invalid preceding regular expression",     /* REG_BADRPT */
00939     "Premature end of regular expression",      /* REG_EEND */
00940     "Regular expression too big",               /* REG_ESIZE */
00941     "Unmatched ) or \\)",                       /* REG_ERPAREN */
00942   };
00943 
00944 /* Avoiding alloca during matching, to placate r_alloc.  */
00945 
00946 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
00947    searching and matching functions should not call alloca.  On some
00948    systems, alloca is implemented in terms of malloc, and if we're
00949    using the relocating allocator routines, then malloc could cause a
00950    relocation, which might (if the strings being searched are in the
00951    ralloc heap) shift the data out from underneath the regexp
00952    routines.
00953 
00954    Here's another reason to avoid allocation: Emacs 
00955    processes input from X in a signal handler; processing X input may
00956    call malloc; if input arrives while a matching routine is calling
00957    malloc, then we're scrod.  But Emacs can't just block input while
00958    calling matching routines; then we don't notice interrupts when
00959    they come in.  So, Emacs blocks input around all regexp calls
00960    except the matching calls, which it leaves unprotected, in the
00961    faith that they will not malloc.  */
00962 
00963 /* Normally, this is fine.  */
00964 #define MATCH_MAY_ALLOCATE
00965 
00966 /* When using GNU C, we are not REALLY using the C alloca, no matter
00967    what config.h may say.  So don't take precautions for it.  */
00968 #ifdef __GNUC__
00969 #undef C_ALLOCA
00970 #endif
00971 
00972 /* The match routines may not allocate if (1) they would do it with malloc
00973    and (2) it's not safe for them to use malloc.
00974    Note that if REL_ALLOC is defined, matching would not use malloc for the
00975    failure stack, but we would still use it for the register vectors;
00976    so REL_ALLOC should not affect this.  */
00977 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
00978 #undef MATCH_MAY_ALLOCATE
00979 #endif
00980 
00981 
00982 /* Failure stack declarations and macros; both re_compile_fastmap and
00983    re_match_2 use a failure stack.  These have to be macros because of
00984    REGEX_ALLOCATE_STACK.  */
00985    
00986 
00987 /* Number of failure points for which to initially allocate space
00988    when matching.  If this number is exceeded, we allocate more
00989    space, so it is not a hard limit.  */
00990 #ifndef INIT_FAILURE_ALLOC
00991 #define INIT_FAILURE_ALLOC 5
00992 #endif
00993 
00994 /* Roughly the maximum number of failure points on the stack.  Would be
00995    exactly that if always used MAX_FAILURE_SPACE each time we failed.
00996    This is a variable only so users of regex can assign to it; we never
00997    change it ourselves.  */
00998 #if defined (MATCH_MAY_ALLOCATE)
00999 int re_max_failures = 200000;
01000 #else
01001 int re_max_failures = 2000;
01002 #endif
01003 
01004 union fail_stack_elt
01005 {
01006   unsigned char *pointer;
01007   int integer;
01008 };
01009 
01010 typedef union fail_stack_elt fail_stack_elt_t;
01011 
01012 typedef struct
01013 {
01014   fail_stack_elt_t *stack;
01015   unsigned size;
01016   unsigned avail;                       /* Offset of next open position.  */
01017 } fail_stack_type;
01018 
01019 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
01020 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
01021 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
01022 
01023 
01024 /* Define macros to initialize and free the failure stack.
01025    Do `return -2' if the alloc fails.  */
01026 
01027 #ifdef MATCH_MAY_ALLOCATE
01028 #define INIT_FAIL_STACK()                                               \
01029   do {                                                                  \
01030     fail_stack.stack = (fail_stack_elt_t *)                             \
01031          REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
01032                                                                         \
01033     if (fail_stack.stack == NULL)                                       \
01034          return -2;                                                     \
01035                                                                         \
01036     fail_stack.size = INIT_FAILURE_ALLOC;                               \
01037     fail_stack.avail = 0;                                               \
01038   } while (0)
01039 
01040 #define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
01041 #else
01042 #define INIT_FAIL_STACK()                                               \
01043   do {                                                                  \
01044     fail_stack.avail = 0;                                               \
01045   } while (0)
01046 
01047 #define RESET_FAIL_STACK()
01048 #endif
01049 
01050 
01051 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
01052 
01053    Return 1 if succeeds, and 0 if either ran out of memory
01054    allocating space for it or it was already too large.  
01055    
01056    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
01057 
01058 #define DOUBLE_FAIL_STACK(fail_stack)                                   \
01059   ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS              \
01060    ? 0                                                                  \
01061    : ((fail_stack).stack = (fail_stack_elt_t *)                         \
01062            REGEX_REALLOCATE_STACK ((fail_stack).stack,                  \
01063                 (fail_stack).size * sizeof (fail_stack_elt_t),          \
01064                 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),  \
01065                                                                         \
01066          (fail_stack).stack == NULL                                     \
01067          ? 0                                                            \
01068          : ((fail_stack).size <<= 1,                                    \
01069             1)))
01070 
01071 
01072 /* Push pointer POINTER on FAIL_STACK. 
01073    Return 1 if was able to do so and 0 if ran out of memory allocating
01074    space to do so.  */
01075 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                            \
01076   ((FAIL_STACK_FULL ()                                                  \
01077     && !DOUBLE_FAIL_STACK (FAIL_STACK))                                 \
01078    ? 0                                                                  \
01079    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,       \
01080          1))
01081 
01082 /* Push a pointer value onto the failure stack.
01083    Assumes the variable `fail_stack'.  Probably should only
01084    be called from within `PUSH_FAILURE_POINT'.  */
01085 #define PUSH_FAILURE_POINTER(item)                                      \
01086   fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
01087 
01088 /* This pushes an integer-valued item onto the failure stack.
01089    Assumes the variable `fail_stack'.  Probably should only
01090    be called from within `PUSH_FAILURE_POINT'.  */
01091 #define PUSH_FAILURE_INT(item)                                  \
01092   fail_stack.stack[fail_stack.avail++].integer = (item)
01093 
01094 /* Push a fail_stack_elt_t value onto the failure stack.
01095    Assumes the variable `fail_stack'.  Probably should only
01096    be called from within `PUSH_FAILURE_POINT'.  */
01097 #define PUSH_FAILURE_ELT(item)                                  \
01098   fail_stack.stack[fail_stack.avail++] =  (item)
01099 
01100 /* These three POP... operations complement the three PUSH... operations.
01101    All assume that `fail_stack' is nonempty.  */
01102 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
01103 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
01104 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
01105 
01106 /* Used to omit pushing failure point id's when we're not debugging.  */
01107 #ifdef DEBUG
01108 #define DEBUG_PUSH PUSH_FAILURE_INT
01109 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
01110 #else
01111 #define DEBUG_PUSH(item)
01112 #define DEBUG_POP(item_addr)
01113 #endif
01114 
01115 
01116 /* Push the information about the state we will need
01117    if we ever fail back to it.  
01118    
01119    Requires variables fail_stack, regstart, regend, reg_info, and
01120    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
01121    declared.
01122    
01123    Does `return FAILURE_CODE' if runs out of memory.  */
01124 
01125 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
01126   do {                                                                  \
01127     char *destination;                                                  \
01128     /* Must be int, so when we don't save any registers, the arithmetic \
01129           of 0 + -1 isn't done as unsigned.  */                         \
01130     int this_reg;                                                       \
01131                                                                         \
01132     destination = 0;    /* inhibit possible compiler warning */         \
01133     DEBUG_STATEMENT (failure_id++);                                     \
01134     DEBUG_STATEMENT (nfailure_points_pushed++);                         \
01135     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);           \
01136     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
01137     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
01138                                                                         \
01139     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);           \
01140     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);       \
01141                                                                         \
01142     /* Ensure we have enough space allocated for what we will push.  */ \
01143     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                   \
01144          {                                                                      \
01145            if (!DOUBLE_FAIL_STACK (fail_stack))                         \
01146                 return failure_code;                                            \
01147                                                                         \
01148            DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",           \
01149                           (fail_stack).size);                           \
01150            DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
01151          }                                                                      \
01152                                                                         \
01153     /* Push the info, starting with the registers.  */                  \
01154     DEBUG_PRINT1 ("\n");                                                \
01155                                                                         \
01156     for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;  \
01157             this_reg++)                                                 \
01158          {                                                                      \
01159         DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);                 \
01160            DEBUG_STATEMENT (num_regs_pushed++);                         \
01161                                                                         \
01162         DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);         \
01163            PUSH_FAILURE_POINTER (regstart[this_reg]);                   \
01164                                                                                                                   \
01165         DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);             \
01166            PUSH_FAILURE_POINTER (regend[this_reg]);                     \
01167                                                                         \
01168         DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);    \
01169            DEBUG_PRINT2 (" match_null=%d",                                      \
01170                                   REG_MATCH_NULL_STRING_P (reg_info[this_reg]));        \
01171            DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
01172            DEBUG_PRINT2 (" matched_something=%d",                               \
01173                                   MATCHED_SOMETHING (reg_info[this_reg]));              \
01174            DEBUG_PRINT2 (" ever_matched=%d",                            \
01175                                   EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
01176         DEBUG_PRINT1 ("\n");                                            \
01177            PUSH_FAILURE_ELT (reg_info[this_reg].word);                  \
01178          }                                                                      \
01179                                                                         \
01180     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
01181     PUSH_FAILURE_INT (lowest_active_reg);                               \
01182                                                                         \
01183     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
01184     PUSH_FAILURE_INT (highest_active_reg);                              \
01185                                                                         \
01186     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);           \
01187     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);           \
01188     PUSH_FAILURE_POINTER (pattern_place);                               \
01189                                                                         \
01190     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);            \
01191     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
01192                                  size2);                                \
01193     DEBUG_PRINT1 ("'\n");                                               \
01194     PUSH_FAILURE_POINTER (string_place);                                \
01195                                                                         \
01196     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);            \
01197     DEBUG_PUSH (failure_id);                                            \
01198   } while (0)
01199 
01200 /* This is the number of items that are pushed and popped on the stack
01201    for each register.  */
01202 #define NUM_REG_ITEMS  3
01203 
01204 /* Individual items aside from the registers.  */
01205 #ifdef DEBUG
01206 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
01207 #else
01208 #define NUM_NONREG_ITEMS 4
01209 #endif
01210 
01211 /* We push at most this many items on the stack.  */
01212 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
01213 
01214 /* We actually push this many items.  */
01215 #define NUM_FAILURE_ITEMS                                               \
01216   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS         \
01217     + NUM_NONREG_ITEMS)
01218 
01219 /* How many items can still be added to the stack without overflowing it.  */
01220 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
01221 
01222 
01223 /* Pops what PUSH_FAIL_STACK pushes.
01224 
01225    We restore into the parameters, all of which should be lvalues:
01226         STR -- the saved data position.
01227         PAT -- the saved pattern position.
01228         LOW_REG, HIGH_REG -- the highest and lowest active registers.
01229         REGSTART, REGEND -- arrays of string positions.
01230         REG_INFO -- array of information about each subexpression.
01231    
01232    Also assumes the variables `fail_stack' and (if debugging), `bufp',
01233    `pend', `string1', `size1', `string2', and `size2'.  */
01234 
01235 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
01236 {                                                                       \
01237   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)                        \
01238   int this_reg;                                                         \
01239   const unsigned char *string_temp;                                     \
01240                                                                         \
01241   assert (!FAIL_STACK_EMPTY ());                                        \
01242                                                                         \
01243   /* Remove failure points and point to how many regs pushed.  */       \
01244   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                                \
01245   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);    \
01246   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);     \
01247                                                                         \
01248   assert (fail_stack.avail >= NUM_NONREG_ITEMS);                        \
01249                                                                         \
01250   DEBUG_POP (&failure_id);                                              \
01251   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);              \
01252                                                                         \
01253   /* If the saved string location is NULL, it came from an              \
01254         on_failure_keep_string_jump opcode, and we want to throw away the       \
01255         saved NULL, thus retaining our current position in the string.  */      \
01256   string_temp = POP_FAILURE_POINTER ();                                 \
01257   if (string_temp != NULL)                                              \
01258     str = (const char *) string_temp;                                   \
01259                                                                         \
01260   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);                       \
01261   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);      \
01262   DEBUG_PRINT1 ("'\n");                                                 \
01263                                                                         \
01264   pat = (unsigned char *) POP_FAILURE_POINTER ();                       \
01265   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);                       \
01266   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                       \
01267                                                                         \
01268   /* Restore register info.  */                                         \
01269   high_reg = (unsigned) POP_FAILURE_INT ();                             \
01270   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);           \
01271                                                                         \
01272   low_reg = (unsigned) POP_FAILURE_INT ();                              \
01273   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);            \
01274                                                                         \
01275   for (this_reg = high_reg; this_reg >= low_reg; this_reg--)            \
01276     {                                                                   \
01277          DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);                      \
01278                                                                         \
01279          reg_info[this_reg].word = POP_FAILURE_ELT ();                  \
01280          DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);               \
01281                                                                         \
01282          regend[this_reg] = (const char *) POP_FAILURE_POINTER ();              \
01283          DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);          \
01284                                                                         \
01285          regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();    \
01286          DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);              \
01287     }                                                                   \
01288                                                                         \
01289   set_regs_matched_done = 0;                                            \
01290   DEBUG_STATEMENT (nfailure_points_popped++);                           \
01291 } /* POP_FAILURE_POINT */
01292 
01293 
01294 
01295 /* Structure for per-register (a.k.a. per-group) information.
01296    Other register information, such as the
01297    starting and ending positions (which are addresses), and the list of
01298    inner groups (which is a bits list) are maintained in separate
01299    variables.  
01300    
01301    We are making a (strictly speaking) nonportable assumption here: that
01302    the compiler will pack our bit fields into something that fits into
01303    the type of `word', i.e., is something that fits into one item on the
01304    failure stack.  */
01305 
01306 typedef union
01307 {
01308   fail_stack_elt_t word;
01309   struct
01310   {
01311          /* This field is one if this group can match the empty string,
01312             zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
01313 #define MATCH_NULL_UNSET_VALUE 3
01314     unsigned match_null_string_p : 2;
01315     unsigned is_active : 1;
01316     unsigned matched_something : 1;
01317     unsigned ever_matched_something : 1;
01318   } bits;
01319 } register_info_type;
01320 
01321 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
01322 #define IS_ACTIVE(R)  ((R).bits.is_active)
01323 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
01324 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
01325 
01326 
01327 /* Call this when have matched a real character; it sets `matched' flags
01328    for the subexpressions which we are currently inside.  Also records
01329    that those subexprs have matched.  */
01330 #define SET_REGS_MATCHED()                                              \
01331   do                                                                    \
01332     {                                                                   \
01333          if (!set_regs_matched_done)                                    \
01334         {                                                               \
01335           unsigned r;                                                   \
01336           set_regs_matched_done = 1;                                    \
01337           for (r = lowest_active_reg; r <= highest_active_reg; r++)     \
01338             {                                                           \
01339                  MATCHED_SOMETHING (reg_info[r])                                \
01340                 = EVER_MATCHED_SOMETHING (reg_info[r])                  \
01341                 = 1;                                                    \
01342             }                                                           \
01343         }                                                               \
01344     }                                                                   \
01345   while (0)
01346 
01347 /* Registers are set to a sentinel when they haven't yet matched.  */
01348 static char reg_unset_dummy;
01349 #define REG_UNSET_VALUE (&reg_unset_dummy)
01350 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
01351 
01352 /* Subroutine declarations and macros for regex_compile.  */
01353 
01354 static void store_op1 (), store_op2 ();
01355 static void insert_op1 (), insert_op2 ();
01356 static boolean at_begline_loc_p (), at_endline_loc_p ();
01357 static boolean group_in_compile_stack ();
01358 static reg_errcode_t compile_range ();
01359 
01360 /* Fetch the next character in the uncompiled pattern---translating it 
01361    if necessary.  Also cast from a signed character in the constant
01362    string passed to us by the user to an unsigned char that we can use
01363    as an array index (in, e.g., `translate').  */
01364 #define PATFETCH(c)                                                     \
01365   do {if (p == pend) return REG_EEND;                                   \
01366     c = (unsigned char) *p++;                                           \
01367     if (translate) c = translate[c];                                    \
01368   } while (0)
01369 
01370 /* Fetch the next character in the uncompiled pattern, with no
01371    translation.  */
01372 #define PATFETCH_RAW(c)                                                 \
01373   do {if (p == pend) return REG_EEND;                                   \
01374     c = (unsigned char) *p++;                                           \
01375   } while (0)
01376 
01377 /* Go backwards one character in the pattern.  */
01378 #define PATUNFETCH p--
01379 
01380 
01381 /* If `translate' is non-null, return translate[D], else just D.  We
01382    cast the subscript to translate because some data is declared as
01383    `char *', to avoid warnings when a string constant is passed.  But
01384    when we use a character as a subscript we must make it unsigned.  */
01385 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
01386 
01387 
01388 /* Macros for outputting the compiled pattern into `buffer'.  */
01389 
01390 /* If the buffer isn't allocated when it comes in, use this.  */
01391 #define INIT_BUF_SIZE  32
01392 
01393 /* Make sure we have at least N more bytes of space in buffer.  */
01394 #define GET_BUFFER_SPACE(n)                                             \
01395     while (b - bufp->buffer + (n) > bufp->allocated)                    \
01396          EXTEND_BUFFER ()
01397 
01398 /* Make sure we have one more byte of buffer space and then add C to it.  */
01399 #define BUF_PUSH(c)                                                     \
01400   do {                                                                  \
01401     GET_BUFFER_SPACE (1);                                               \
01402     *b++ = (unsigned char) (c);                                         \
01403   } while (0)
01404 
01405 
01406 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
01407 #define BUF_PUSH_2(c1, c2)                                              \
01408   do {                                                                  \
01409     GET_BUFFER_SPACE (2);                                               \
01410     *b++ = (unsigned char) (c1);                                        \
01411     *b++ = (unsigned char) (c2);                                        \
01412   } while (0)
01413 
01414 
01415 /* As with BUF_PUSH_2, except for three bytes.  */
01416 #define BUF_PUSH_3(c1, c2, c3)                                          \
01417   do {                                                                  \
01418     GET_BUFFER_SPACE (3);                                               \
01419     *b++ = (unsigned char) (c1);                                        \
01420     *b++ = (unsigned char) (c2);                                        \
01421     *b++ = (unsigned char) (c3);                                        \
01422   } while (0)
01423 
01424 
01425 /* Store a jump with opcode OP at LOC to location TO.  We store a
01426    relative address offset by the three bytes the jump itself occupies.  */
01427 #define STORE_JUMP(op, loc, to) \
01428   store_op1 (op, loc, (to) - (loc) - 3)
01429 
01430 /* Likewise, for a two-argument jump.  */
01431 #define STORE_JUMP2(op, loc, to, arg) \
01432   store_op2 (op, loc, (to) - (loc) - 3, arg)
01433 
01434 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
01435 #define INSERT_JUMP(op, loc, to) \
01436   insert_op1 (op, loc, (to) - (loc) - 3, b)
01437 
01438 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
01439 #define INSERT_JUMP2(op, loc, to, arg) \
01440   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
01441 
01442 
01443 /* This is not an arbitrary limit: the arguments which represent offsets
01444    into the pattern are two bytes long.  So if 2^16 bytes turns out to
01445    be too small, many things would have to change.  */
01446 #define MAX_BUF_SIZE (1L << 16)
01447 
01448 
01449 /* Extend the buffer by twice its current size via realloc and
01450    reset the pointers that pointed into the old block to point to the
01451    correct places in the new one.  If extending the buffer results in it
01452    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
01453 #define EXTEND_BUFFER()                                                 \
01454   do {                                                                  \
01455     unsigned char *old_buffer = bufp->buffer;                           \
01456     if (bufp->allocated == MAX_BUF_SIZE)                                \
01457          return REG_ESIZE;                                                      \
01458     bufp->allocated <<= 1;                                              \
01459     if (bufp->allocated > MAX_BUF_SIZE)                                 \
01460          bufp->allocated = MAX_BUF_SIZE;                                        \
01461     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
01462     if (bufp->buffer == NULL)                                           \
01463          return REG_ESPACE;                                             \
01464     /* If the buffer moved, move all the pointers into it.  */          \
01465     if (old_buffer != bufp->buffer)                                     \
01466          {                                                                      \
01467            b = (b - old_buffer) + bufp->buffer;                         \
01468            begalt = (begalt - old_buffer) + bufp->buffer;                       \
01469            if (fixup_alt_jump)                                          \
01470                 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
01471            if (laststart)                                                       \
01472                 laststart = (laststart - old_buffer) + bufp->buffer;            \
01473            if (pending_exact)                                           \
01474                 pending_exact = (pending_exact - old_buffer) + bufp->buffer;    \
01475          }                                                                      \
01476   } while (0)
01477 
01478 
01479 /* Since we have one byte reserved for the register number argument to
01480    {start,stop}_memory, the maximum number of groups we can report
01481    things about is what fits in that byte.  */
01482 #define MAX_REGNUM 255
01483 
01484 /* But patterns can have more than `MAX_REGNUM' registers.  We just
01485    ignore the excess.  */
01486 typedef unsigned regnum_t;
01487 
01488 
01489 /* Macros for the compile stack.  */
01490 
01491 /* Since offsets can go either forwards or backwards, this type needs to
01492    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
01493 typedef int pattern_offset_t;
01494 
01495 typedef struct
01496 {
01497   pattern_offset_t begalt_offset;
01498   pattern_offset_t fixup_alt_jump;
01499   pattern_offset_t inner_group_offset;
01500   pattern_offset_t laststart_offset;  
01501   regnum_t regnum;
01502 } compile_stack_elt_t;
01503 
01504 
01505 typedef struct
01506 {
01507   compile_stack_elt_t *stack;
01508   unsigned size;
01509   unsigned avail;                       /* Offset of next open position.  */
01510 } compile_stack_type;
01511 
01512 
01513 #define INIT_COMPILE_STACK_SIZE 32
01514 
01515 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
01516 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
01517 
01518 /* The next available element.  */
01519 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
01520 
01521 
01522 /* Set the bit for character C in a list.  */
01523 #define SET_LIST_BIT(c)                               \
01524   (b[((unsigned char) (c)) / BYTEWIDTH]               \
01525    |= 1 << (((unsigned char) c) % BYTEWIDTH))
01526 
01527 
01528 /* Get the next unsigned number in the uncompiled pattern.  */
01529 #define GET_UNSIGNED_NUMBER(num)                                        \
01530   { if (p != pend)                                                      \
01531         {                                                                       \
01532           PATFETCH (c);                                                         \
01533           while (ISDIGIT (c))                                           \
01534             {                                                           \
01535                  if (num < 0)                                                   \
01536                     num = 0;                                                    \
01537                  num = num * 10 + c - '0';                                      \
01538                  if (p == pend)                                                 \
01539                     break;                                                      \
01540                  PATFETCH (c);                                          \
01541             }                                                           \
01542           }                                                             \
01543     }           
01544 
01545 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
01546 
01547 #define IS_CHAR_CLASS(string)                                           \
01548    (STREQ (string, "alpha") || STREQ (string, "upper")                  \
01549     || STREQ (string, "lower") || STREQ (string, "digit")               \
01550     || STREQ (string, "alnum") || STREQ (string, "xdigit")              \
01551     || STREQ (string, "space") || STREQ (string, "print")               \
01552     || STREQ (string, "punct") || STREQ (string, "graph")               \
01553     || STREQ (string, "cntrl") || STREQ (string, "blank"))
01554 
01555 #ifndef MATCH_MAY_ALLOCATE
01556 
01557 /* If we cannot allocate large objects within re_match_2_internal,
01558    we make the fail stack and register vectors global.
01559    The fail stack, we grow to the maximum size when a regexp
01560    is compiled.
01561    The register vectors, we adjust in size each time we
01562    compile a regexp, according to the number of registers it needs.  */
01563 
01564 static fail_stack_type fail_stack;
01565 
01566 /* Size with which the following vectors are currently allocated.
01567    That is so we can make them bigger as needed,
01568    but never make them smaller.  */
01569 static int regs_allocated_size;
01570 
01571 static const char **     regstart, **     regend;
01572 static const char ** old_regstart, ** old_regend;
01573 static const char **best_regstart, **best_regend;
01574 static register_info_type *reg_info; 
01575 static const char **reg_dummy;
01576 static register_info_type *reg_info_dummy;
01577 
01578 /* Make the register vectors big enough for NUM_REGS registers,
01579    but don't make them smaller.  */
01580 
01581 static
01582 regex_grow_registers (num_regs)
01583         int num_regs;
01584 {
01585   if (num_regs > regs_allocated_size)
01586     {
01587          RETALLOC_IF (regstart,  num_regs, const char *);
01588          RETALLOC_IF (regend,    num_regs, const char *);
01589          RETALLOC_IF (old_regstart, num_regs, const char *);
01590          RETALLOC_IF (old_regend,        num_regs, const char *);
01591          RETALLOC_IF (best_regstart, num_regs, const char *);
01592          RETALLOC_IF (best_regend,       num_regs, const char *);
01593          RETALLOC_IF (reg_info,  num_regs, register_info_type);
01594          RETALLOC_IF (reg_dummy,         num_regs, const char *);
01595          RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
01596 
01597          regs_allocated_size = num_regs;
01598     }
01599 }
01600 
01601 #endif /* not MATCH_MAY_ALLOCATE */
01602 
01603 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
01604    Returns one of error codes defined in `regex.h', or zero for success.
01605 
01606    Assumes the `allocated' (and perhaps `buffer') and `translate'
01607    fields are set in BUFP on entry.
01608 
01609    If it succeeds, results are put in BUFP (if it returns an error, the
01610    contents of BUFP are undefined):
01611         `buffer' is the compiled pattern;
01612         `syntax' is set to SYNTAX;
01613         `used' is set to the length of the compiled pattern;
01614         `fastmap_accurate' is zero;
01615         `re_nsub' is the number of subexpressions in PATTERN;
01616         `not_bol' and `not_eol' are zero;
01617    
01618    The `fastmap' and `newline_anchor' fields are neither
01619    examined nor set.  */
01620 
01621 /* Return, freeing storage we allocated.  */
01622 #define FREE_STACK_RETURN(value)                \
01623   return (free (compile_stack.stack), value)
01624 
01625 static reg_errcode_t
01626 regex_compile (pattern, size, syntax, bufp)
01627         const char *pattern;
01628         int size;
01629         reg_syntax_t syntax;
01630         struct re_pattern_buffer *bufp;
01631 {
01632   /* We fetch characters from PATTERN here.  Even though PATTERN is
01633         `char *' (i.e., signed), we declare these variables as unsigned, so
01634         they can be reliably used as array indices.  */
01635   register unsigned char c, c1;
01636   
01637   /* A random temporary spot in PATTERN.  */
01638   const char *p1;
01639 
01640   /* Points to the end of the buffer, where we should append.  */
01641   register unsigned char *b;
01642   
01643   /* Keeps track of unclosed groups.  */
01644   compile_stack_type compile_stack;
01645 
01646   /* Points to the current (ending) position in the pattern.  */
01647   const char *p = pattern;
01648   const char *pend = pattern + size;
01649   
01650   /* How to translate the characters in the pattern.  */
01651   char *translate = bufp->translate;
01652 
01653   /* Address of the count-byte of the most recently inserted `exactn'
01654         command.  This makes it possible to tell if a new exact-match
01655         character can be added to that command or if the character requires
01656         a new `exactn' command.  */
01657   unsigned char *pending_exact = 0;
01658 
01659   /* Address of start of the most recently finished expression.
01660         This tells, e.g., postfix * where to find the start of its
01661         operand.  Reset at the beginning of groups and alternatives.  */
01662   unsigned char *laststart = 0;
01663 
01664   /* Address of beginning of regexp, or inside of last group.  */
01665   unsigned char *begalt;
01666 
01667   /* Place in the uncompiled pattern (i.e., the {) to
01668         which to go back if the interval is invalid.  */
01669   const char *beg_interval;
01670                          
01671   /* Address of the place where a forward jump should go to the end of
01672         the containing expression.  Each alternative of an `or' -- except the
01673         last -- ends with a forward jump of this sort.  */
01674   unsigned char *fixup_alt_jump = 0;
01675 
01676   /* Counts open-groups as they are encountered.  Remembered for the
01677         matching close-group on the compile stack, so the same register
01678         number is put in the stop_memory as the start_memory.  */
01679   regnum_t regnum = 0;
01680 
01681 #ifdef DEBUG
01682   DEBUG_PRINT1 ("\nCompiling pattern: ");
01683   if (debug)
01684     {
01685          unsigned debug_count;
01686          
01687          for (debug_count = 0; debug_count < size; debug_count++)
01688            putchar (pattern[debug_count]);
01689          putchar ('\n');
01690     }
01691 #endif /* DEBUG */
01692 
01693   /* Initialize the compile stack.  */
01694   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
01695   if (compile_stack.stack == NULL)
01696     return REG_ESPACE;
01697 
01698   compile_stack.size = INIT_COMPILE_STACK_SIZE;
01699   compile_stack.avail = 0;
01700 
01701   /* Initialize the pattern buffer.  */
01702   bufp->syntax = syntax;
01703   bufp->fastmap_accurate = 0;
01704   bufp->not_bol = bufp->not_eol = 0;
01705 
01706   /* Set `used' to zero, so that if we return an error, the pattern
01707         printer (for debugging) will think there's no pattern.  We reset it
01708         at the end.  */
01709   bufp->used = 0;
01710   
01711   /* Always count groups, whether or not bufp->no_sub is set.  */
01712   bufp->re_nsub = 0;                            
01713 
01714 #if !defined (emacs) && !defined (SYNTAX_TABLE)
01715   /* Initialize the syntax table.  */
01716    init_syntax_once ();
01717 #endif
01718 
01719   if (bufp->allocated == 0)
01720     {
01721          if (bufp->buffer)
01722         { /* If zero allocated, but buffer is non-null, try to realloc
01723                    enough space.  This loses if buffer's address is bogus, but
01724                    that is the user's responsibility.  */
01725                 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
01726            }
01727          else
01728            { /* Caller did not allocate a buffer.  Do it for them.  */
01729                 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
01730            }
01731          if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
01732 
01733          bufp->allocated = INIT_BUF_SIZE;
01734     }
01735 
01736   begalt = b = bufp->buffer;
01737 
01738   /* Loop through the uncompiled pattern until we're at the end.  */
01739   while (p != pend)
01740     {
01741          PATFETCH (c);
01742 
01743          switch (c)
01744            {
01745            case '^':
01746                 {
01747                   if (   /* If at start of pattern, it's an operator.  */
01748                             p == pattern + 1
01749                             /* If context independent, it's an operator.  */
01750                          || syntax & RE_CONTEXT_INDEP_ANCHORS
01751                             /* Otherwise, depends on what's come before.  */
01752                          || at_begline_loc_p (pattern, p, syntax))
01753                     BUF_PUSH (begline);
01754                   else
01755                     goto normal_char;
01756                 }
01757                 break;
01758 
01759 
01760            case '$':
01761                 {
01762                   if (   /* If at end of pattern, it's an operator.  */
01763                             p == pend 
01764                             /* If context independent, it's an operator.  */
01765                          || syntax & RE_CONTEXT_INDEP_ANCHORS
01766                             /* Otherwise, depends on what's next.  */
01767                          || at_endline_loc_p (p, pend, syntax))
01768                         BUF_PUSH (endline);
01769                    else
01770                         goto normal_char;
01771                  }
01772                  break;
01773 
01774 
01775         case '+':
01776            case '?':
01777                 if ((syntax & RE_BK_PLUS_QM)
01778                     || (syntax & RE_LIMITED_OPS))
01779                   goto normal_char;
01780            handle_plus:
01781            case '*':
01782                 /* If there is no previous pattern... */
01783                 if (!laststart)
01784                   {
01785                     if (syntax & RE_CONTEXT_INVALID_OPS)
01786                          FREE_STACK_RETURN (REG_BADRPT);
01787                     else if (!(syntax & RE_CONTEXT_INDEP_OPS))
01788                          goto normal_char;
01789                   }
01790 
01791                 {
01792                   /* Are we optimizing this jump?  */
01793                   boolean keep_string_p = false;
01794                   
01795                   /* 1 means zero (many) matches is allowed.  */
01796                   char zero_times_ok = 0, many_times_ok = 0;
01797 
01798                   /* If there is a sequence of repetition chars, collapse it
01799                         down to just one (the right one).  We can't combine
01800                         interval operators with these because of, e.g., `a{2}*',
01801                         which should only match an even number of `a's.  */
01802 
01803                   for (;;)
01804                     {
01805                          zero_times_ok |= c != '+';
01806                          many_times_ok |= c != '?';
01807 
01808                          if (p == pend)
01809                            break;
01810 
01811                          PATFETCH (c);
01812 
01813                          if (c == '*'
01814                                 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
01815                            ;
01816 
01817                          else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
01818                            {
01819                                 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
01820 
01821                                 PATFETCH (c1);
01822                                 if (!(c1 == '+' || c1 == '?'))
01823                                   {
01824                                     PATUNFETCH;
01825                                     PATUNFETCH;
01826                                     break;
01827                                   }
01828 
01829                                 c = c1;
01830                            }
01831                          else
01832                            {
01833                                 PATUNFETCH;
01834                                 break;
01835                            }
01836 
01837                          /* If we get here, we found another repeat character.  */
01838                         }
01839 
01840                   /* Star, etc. applied to an empty pattern is equivalent
01841                         to an empty pattern.  */
01842                   if (!laststart)  
01843                     break;
01844 
01845                   /* Now we know whether or not zero matches is allowed
01846                         and also whether or not two or more matches is allowed.  */
01847                   if (many_times_ok)
01848                     { /* More than one repetition is allowed, so put in at the
01849                             end a backward relative jump from `b' to before the next
01850                             jump we're going to put in below (which jumps from
01851                             laststart to after this jump).  
01852 
01853                             But if we are at the `*' in the exact sequence `.*\n',
01854                             insert an unconditional jump backwards to the .,
01855                             instead of the beginning of the loop.  This way we only
01856                             push a failure point once, instead of every time
01857                             through the loop.  */
01858                          assert (p - 1 > pattern);
01859 
01860                          /* Allocate the space for the jump.  */
01861                          GET_BUFFER_SPACE (3);
01862 
01863                          /* We know we are not at the first character of the pattern,
01864                             because laststart was nonzero.  And we've already
01865                             incremented `p', by the way, to be the character after
01866                             the `*'.  Do we have to do something analogous here
01867                             for null bytes, because of RE_DOT_NOT_NULL?  */
01868                          if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
01869                     && zero_times_ok
01870                                 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
01871                                 && !(syntax & RE_DOT_NEWLINE))
01872                            { /* We have .*\n.  */
01873                                 STORE_JUMP (jump, b, laststart);
01874                                 keep_string_p = true;
01875                            }
01876                          else
01877                            /* Anything else.  */
01878                            STORE_JUMP (maybe_pop_jump, b, laststart - 3);
01879 
01880                          /* We've added more stuff to the buffer.  */
01881                          b += 3;
01882                     }
01883 
01884                   /* On failure, jump from laststart to b + 3, which will be the
01885                         end of the buffer after this jump is inserted.  */
01886                   GET_BUFFER_SPACE (3);
01887                   INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
01888                                                             : on_failure_jump,
01889                                         laststart, b + 3);
01890                   pending_exact = 0;
01891                   b += 3;
01892 
01893                   if (!zero_times_ok)
01894                     {
01895                          /* At least one repetition is required, so insert a
01896                             `dummy_failure_jump' before the initial
01897                             `on_failure_jump' instruction of the loop. This
01898                             effects a skip over that instruction the first time
01899                             we hit that loop.  */
01900                          GET_BUFFER_SPACE (3);
01901                          INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
01902                          b += 3;
01903                     }
01904                   }
01905           break;
01906 
01907 
01908         case '.':
01909                 laststart = b;
01910                 BUF_PUSH (anychar);
01911                 break;
01912 
01913 
01914            case '[':
01915                 {
01916                   boolean had_char_class = false;
01917 
01918                   if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
01919 
01920                   /* Ensure that we have enough space to push a charset: the
01921                         opcode, the length count, and the bitset; 34 bytes in all.  */
01922             GET_BUFFER_SPACE (34);
01923 
01924                   laststart = b;
01925 
01926                   /* We test `*p == '^' twice, instead of using an if
01927                         statement, so we only need one BUF_PUSH.  */
01928                   BUF_PUSH (*p == '^' ? charset_not : charset); 
01929                   if (*p == '^')
01930                     p++;
01931 
01932                   /* Remember the first position in the bracket expression.  */
01933                   p1 = p;
01934 
01935                   /* Push the number of bytes in the bitmap.  */
01936                   BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
01937 
01938                   /* Clear the whole map.  */
01939                   bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
01940 
01941                   /* charset_not matches newline according to a syntax bit.  */
01942                   if ((re_opcode_t) b[-2] == charset_not
01943                          && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
01944                     SET_LIST_BIT ('\n');
01945 
01946                   /* Read in characters and ranges, setting map bits.  */
01947                   for (;;)
01948                     {
01949                          if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
01950 
01951                          PATFETCH (c);
01952 
01953                          /* \ might escape characters inside [...] and [^...].  */
01954                          if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
01955                            {
01956                                 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
01957 
01958                                 PATFETCH (c1);
01959                                 SET_LIST_BIT (c1);
01960                                 continue;
01961                            }
01962 
01963                          /* Could be the end of the bracket expression.  If it's
01964                             not (i.e., when the bracket expression is `[]' so
01965                             far), the ']' character bit gets set way below.  */
01966                          if (c == ']' && p != p1 + 1)
01967                            break;
01968 
01969                          /* Look ahead to see if it's a range when the last thing
01970                             was a character class.  */
01971                          if (had_char_class && c == '-' && *p != ']')
01972                            FREE_STACK_RETURN (REG_ERANGE);
01973 
01974                          /* Look ahead to see if it's a range when the last thing
01975                             was a character: if this is a hyphen not at the
01976                             beginning or the end of a list, then it's the range
01977                             operator.  */
01978                          if (c == '-' 
01979                                 && !(p - 2 >= pattern && p[-2] == '[') 
01980                                 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
01981                                 && *p != ']')
01982                            {
01983                                 reg_errcode_t ret
01984                                   = compile_range (&p, pend, translate, syntax, b);
01985                                 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
01986                            }
01987 
01988                          else if (p[0] == '-' && p[1] != ']')
01989                            { /* This handles ranges made up of characters only.  */
01990                                 reg_errcode_t ret;
01991 
01992                     /* Move past the `-'.  */
01993                                 PATFETCH (c1);
01994                                 
01995                                 ret = compile_range (&p, pend, translate, syntax, b);
01996                                 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
01997                            }
01998 
01999                          /* See if we're at the beginning of a possible character
02000                             class.  */
02001 
02002                          else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
02003                            { /* Leave room for the null.  */
02004                                 char str[CHAR_CLASS_MAX_LENGTH + 1];
02005 
02006                                 PATFETCH (c);
02007                                 c1 = 0;
02008 
02009                                 /* If pattern is `[[:'.  */
02010                                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02011 
02012                                 for (;;)
02013                                   {
02014                                     PATFETCH (c);
02015                                     if (c == ':' || c == ']' || p == pend
02016                                            || c1 == CHAR_CLASS_MAX_LENGTH)
02017                                          break;
02018                                     str[c1++] = c;
02019                                   }
02020                                 str[c1] = '\0';
02021 
02022                                 /* If isn't a word bracketed by `[:' and:`]':
02023                                    undo the ending character, the letters, and leave 
02024                                    the leading `:' and `[' (but set bits for them).  */
02025                                 if (c == ':' && *p == ']')
02026                                   {
02027                                     int ch;
02028                                     boolean is_alnum = STREQ (str, "alnum");
02029                                     boolean is_alpha = STREQ (str, "alpha");
02030                                     boolean is_blank = STREQ (str, "blank");
02031                                     boolean is_cntrl = STREQ (str, "cntrl");
02032                                     boolean is_digit = STREQ (str, "digit");
02033                                     boolean is_graph = STREQ (str, "graph");
02034                                     boolean is_lower = STREQ (str, "lower");
02035                                     boolean is_print = STREQ (str, "print");
02036                                     boolean is_punct = STREQ (str, "punct");
02037                                     boolean is_space = STREQ (str, "space");
02038                                     boolean is_upper = STREQ (str, "upper");
02039                                     boolean is_xdigit = STREQ (str, "xdigit");
02040                                     
02041                                     if (!IS_CHAR_CLASS (str))
02042                           FREE_STACK_RETURN (REG_ECTYPE);
02043 
02044                                     /* Throw away the ] at the end of the character
02045                                           class.  */
02046                                     PATFETCH (c);                                       
02047 
02048                                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02049 
02050                                     for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
02051                                          {
02052                             /* This was split into 3 if's to
02053                                   avoid an arbitrary limit in some compiler.  */
02054                                            if (   (is_alnum  && ISALNUM (ch))
02055                                                   || (is_alpha  && ISALPHA (ch))
02056                                                   || (is_blank  && ISBLANK (ch))
02057                                                   || (is_cntrl  && ISCNTRL (ch)))
02058                                  SET_LIST_BIT (ch);
02059                             if (   (is_digit  && ISDIGIT (ch))
02060                                                   || (is_graph  && ISGRAPH (ch))
02061                                                   || (is_lower  && ISLOWER (ch))
02062                                                   || (is_print  && ISPRINT (ch)))
02063                                  SET_LIST_BIT (ch);
02064                             if (   (is_punct  && ISPUNCT (ch))
02065                                                   || (is_space  && ISSPACE (ch))
02066                                                   || (is_upper  && ISUPPER (ch))
02067                                                   || (is_xdigit && ISXDIGIT (ch)))
02068                                  SET_LIST_BIT (ch);
02069                                          }
02070                                     had_char_class = true;
02071                                   }
02072                                 else
02073                                   {
02074                                     c1++;
02075                                     while (c1--)    
02076                                          PATUNFETCH;
02077                                     SET_LIST_BIT ('[');
02078                                     SET_LIST_BIT (':');
02079                                     had_char_class = false;
02080                                   }
02081                            }
02082                          else
02083                            {
02084                                 had_char_class = false;
02085                                 SET_LIST_BIT (c);
02086                            }
02087                     }
02088 
02089                   /* Discard any (non)matching list bytes that are all 0 at the
02090                         end of the map.  Decrease the map-length byte too.  */
02091                   while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 
02092                     b[-1]--; 
02093                   b += b[-1];
02094                 }
02095                 break;
02096 
02097 
02098         case '(':
02099                 if (syntax & RE_NO_BK_PARENS)
02100                   goto handle_open;
02101                 else
02102                   goto normal_char;
02103 
02104 
02105            case ')':
02106                 if (syntax & RE_NO_BK_PARENS)
02107                   goto handle_close;
02108                 else
02109                   goto normal_char;
02110 
02111 
02112            case '\n':
02113                 if (syntax & RE_NEWLINE_ALT)
02114                   goto handle_alt;
02115                 else
02116                   goto normal_char;
02117 
02118 
02119         case '|':
02120                 if (syntax & RE_NO_BK_VBAR)
02121                   goto handle_alt;
02122                 else
02123                   goto normal_char;
02124 
02125 
02126            case '{':
02127                  if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
02128                    goto handle_interval;
02129                  else
02130                    goto normal_char;
02131 
02132 
02133            case '\\':
02134                 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02135 
02136                 /* Do not translate the character after the \, so that we can
02137                    distinguish, e.g., \B from \b, even if we normally would
02138                    translate, e.g., B to b.  */
02139                 PATFETCH_RAW (c);
02140 
02141                 switch (c)
02142                   {
02143                   case '(':
02144                     if (syntax & RE_NO_BK_PARENS)
02145                          goto normal_backslash;
02146 
02147                   handle_open:
02148                     bufp->re_nsub++;
02149                     regnum++;
02150 
02151                     if (COMPILE_STACK_FULL)
02152                          { 
02153                            RETALLOC (compile_stack.stack, compile_stack.size << 1,
02154                                            compile_stack_elt_t);
02155                            if (compile_stack.stack == NULL) return REG_ESPACE;
02156 
02157                            compile_stack.size <<= 1;
02158                          }
02159 
02160                     /* These are the values to restore when we hit end of this
02161                           group.  They are all relative offsets, so that if the
02162                           whole pattern moves because of realloc, they will still
02163                           be valid.  */
02164                     COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
02165                     COMPILE_STACK_TOP.fixup_alt_jump 
02166                          = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
02167                     COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
02168                     COMPILE_STACK_TOP.regnum = regnum;
02169 
02170                     /* We will eventually replace the 0 with the number of
02171                           groups inner to this one.  But do not push a
02172                           start_memory for groups beyond the last one we can
02173                           represent in the compiled pattern.  */
02174                     if (regnum <= MAX_REGNUM)
02175                          {
02176                            COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
02177                            BUF_PUSH_3 (start_memory, regnum, 0);
02178                          }
02179                          
02180                     compile_stack.avail++;
02181 
02182                     fixup_alt_jump = 0;
02183                     laststart = 0;
02184                     begalt = b;
02185                  /* If we've reached MAX_REGNUM groups, then this open
02186                  won't actually generate any code, so we'll have to
02187                  clear pending_exact explicitly.  */
02188                  pending_exact = 0;
02189                     break;
02190 
02191 
02192                   case ')':
02193                     if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
02194 
02195                     if (COMPILE_STACK_EMPTY)
02196                          if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
02197                            goto normal_backslash;
02198                          else
02199                            FREE_STACK_RETURN (REG_ERPAREN);
02200 
02201                   handle_close:
02202                     if (fixup_alt_jump)
02203                          { /* Push a dummy failure point at the end of the
02204                                  alternative for a possible future
02205                                  `pop_failure_jump' to pop.  See comments at
02206                                  `push_dummy_failure' in `re_match_2'.  */
02207                            BUF_PUSH (push_dummy_failure);
02208                            
02209                            /* We allocated space for this jump when we assigned
02210                                  to `fixup_alt_jump', in the `handle_alt' case below.  */
02211                            STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
02212                          }
02213 
02214                     /* See similar code for backslashed left paren above.  */
02215                     if (COMPILE_STACK_EMPTY)
02216                          if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
02217                            goto normal_char;
02218                          else
02219                            FREE_STACK_RETURN (REG_ERPAREN);
02220 
02221                     /* Since we just checked for an empty stack above, this
02222                           ``can't happen''.  */
02223                     assert (compile_stack.avail != 0);
02224                     {
02225                          /* We don't just want to restore into `regnum', because
02226                             later groups should continue to be numbered higher,
02227                             as in `(ab)c(de)' -- the second group is #2.  */
02228                          regnum_t this_group_regnum;
02229 
02230                          compile_stack.avail--;         
02231                          begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
02232                          fixup_alt_jump
02233                            = COMPILE_STACK_TOP.fixup_alt_jump
02234                                 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 
02235                                 : 0;
02236                          laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
02237                          this_group_regnum = COMPILE_STACK_TOP.regnum;
02238                 /* If we've reached MAX_REGNUM groups, then this open
02239                    won't actually generate any code, so we'll have to
02240                    clear pending_exact explicitly.  */
02241                 pending_exact = 0;
02242 
02243                          /* We're at the end of the group, so now we know how many
02244                             groups were inside this one.  */
02245                          if (this_group_regnum <= MAX_REGNUM)
02246                            {
02247                                 unsigned char *inner_group_loc
02248                                   = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
02249                                 
02250                                 *inner_group_loc = regnum - this_group_regnum;
02251                                 BUF_PUSH_3 (stop_memory, this_group_regnum,
02252                                                   regnum - this_group_regnum);
02253                            }
02254                     }
02255                     break;
02256 
02257 
02258                   case '|':                                     /* `\|'.  */
02259                     if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
02260                          goto normal_backslash;
02261                   handle_alt:
02262                     if (syntax & RE_LIMITED_OPS)
02263                          goto normal_char;
02264 
02265                     /* Insert before the previous alternative a jump which
02266                           jumps to this alternative if the former fails.  */
02267                     GET_BUFFER_SPACE (3);
02268                     INSERT_JUMP (on_failure_jump, begalt, b + 6);
02269                     pending_exact = 0;
02270                     b += 3;
02271 
02272                     /* The alternative before this one has a jump after it
02273                           which gets executed if it gets matched.  Adjust that
02274                           jump so it will jump to this alternative's analogous
02275                           jump (put in below, which in turn will jump to the next
02276                           (if any) alternative's such jump, etc.).  The last such
02277                           jump jumps to the correct final destination.  A picture:
02278                                          _____ _____ 
02279                                          |   | |   |   
02280                                          |   v |   v 
02281                                         a | b   | c   
02282 
02283                           If we are at `b', then fixup_alt_jump right now points to a
02284                           three-byte space after `a'.  We'll put in the jump, set
02285                           fixup_alt_jump to right after `b', and leave behind three
02286                           bytes which we'll fill in when we get to after `c'.  */
02287 
02288                     if (fixup_alt_jump)
02289                          STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
02290 
02291                     /* Mark and leave space for a jump after this alternative,
02292                           to be filled in later either by next alternative or
02293                           when know we're at the end of a series of alternatives.  */
02294                     fixup_alt_jump = b;
02295                     GET_BUFFER_SPACE (3);
02296                     b += 3;
02297 
02298                     laststart = 0;
02299                     begalt = b;
02300                     break;
02301 
02302 
02303                   case '{': 
02304                     /* If \{ is a literal.  */
02305                     if (!(syntax & RE_INTERVALS)
02306                                  /* If we're at `\{' and it's not the open-interval 
02307                                     operator.  */
02308                            || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
02309                            || (p - 2 == pattern  &&  p == pend))
02310                          goto normal_backslash;
02311 
02312                   handle_interval:
02313                     {
02314                          /* If got here, then the syntax allows intervals.  */
02315 
02316                          /* At least (most) this many matches must be made.  */
02317                          int lower_bound = -1, upper_bound = -1;
02318 
02319                          beg_interval = p - 1;
02320 
02321                          if (p == pend)
02322                            {
02323                                 if (syntax & RE_NO_BK_BRACES)
02324                                   goto unfetch_interval;
02325                                 else
02326                                   FREE_STACK_RETURN (REG_EBRACE);
02327                            }
02328 
02329                          GET_UNSIGNED_NUMBER (lower_bound);
02330 
02331                          if (c == ',')
02332                            {
02333                                 GET_UNSIGNED_NUMBER (upper_bound);
02334                                 if (upper_bound < 0) upper_bound = RE_DUP_MAX;
02335                            }
02336                          else
02337                            /* Interval such as `{1}' => match exactly once. */
02338                            upper_bound = lower_bound;
02339 
02340                          if (lower_bound < 0 || upper_bound > RE_DUP_MAX
02341                                 || lower_bound > upper_bound)
02342                            {
02343                                 if (syntax & RE_NO_BK_BRACES)
02344                                   goto unfetch_interval;
02345                                 else 
02346                                   FREE_STACK_RETURN (REG_BADBR);
02347                            }
02348 
02349                          if (!(syntax & RE_NO_BK_BRACES)) 
02350                            {
02351                                 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
02352 
02353                                 PATFETCH (c);
02354                            }
02355 
02356                          if (c != '}')
02357                            {
02358                                 if (syntax & RE_NO_BK_BRACES)
02359                                   goto unfetch_interval;
02360                                 else 
02361                                   FREE_STACK_RETURN (REG_BADBR);
02362                            }
02363 
02364                          /* We just parsed a valid interval.  */
02365 
02366                          /* If it's invalid to have no preceding re.  */
02367                          if (!laststart)
02368                            {
02369                                 if (syntax & RE_CONTEXT_INVALID_OPS)
02370                                   FREE_STACK_RETURN (REG_BADRPT);
02371                                 else if (syntax & RE_CONTEXT_INDEP_OPS)
02372                                   laststart = b;
02373                                 else
02374                                   goto unfetch_interval;
02375                            }
02376 
02377                          /* If the upper bound is zero, don't want to succeed at
02378                             all; jump from `laststart' to `b + 3', which will be
02379                             the end of the buffer after we insert the jump.  */
02380                           if (upper_bound == 0)
02381                             {
02382                                  GET_BUFFER_SPACE (3);
02383                                  INSERT_JUMP (jump, laststart, b + 3);
02384                                  b += 3;
02385                             }
02386 
02387                           /* Otherwise, we have a nontrivial interval.  When
02388                                 we're all done, the pattern will look like:
02389                                   set_number_at <jump count> <upper bound>
02390                                   set_number_at <succeed_n count> <lower bound>
02391                                   succeed_n <after jump addr> <succeed_n count>
02392                                   <body of loop>
02393                                   jump_n <succeed_n addr> <jump count>
02394                                 (The upper bound and `jump_n' are omitted if
02395                                 `upper_bound' is 1, though.)  */
02396                           else 
02397                             { /* If the upper bound is > 1, we need to insert
02398                                     more at the end of the loop.  */
02399                                  unsigned nbytes = 10 + (upper_bound > 1) * 10;
02400 
02401                                  GET_BUFFER_SPACE (nbytes);
02402 
02403                                  /* Initialize lower bound of the `succeed_n', even
02404                                     though it will be set during matching by its
02405                                     attendant `set_number_at' (inserted next),
02406                                     because `re_compile_fastmap' needs to know.
02407                                     Jump to the `jump_n' we might insert below.  */
02408                                  INSERT_JUMP2 (succeed_n, laststart,
02409                                                         b + 5 + (upper_bound > 1) * 5,
02410                                                         lower_bound);
02411                                  b += 5;
02412 
02413                                  /* Code to initialize the lower bound.  Insert 
02414                                     before the `succeed_n'.  The `5' is the last two
02415                                     bytes of this `set_number_at', plus 3 bytes of
02416                                     the following `succeed_n'.  */
02417                                  insert_op2 (set_number_at, laststart, 5, lower_bound, b);
02418                                  b += 5;
02419 
02420                                  if (upper_bound > 1)
02421                                    { /* More than one repetition is allowed, so
02422                                            append a backward jump to the `succeed_n'
02423                                            that starts this interval.
02424                                            
02425                                            When we've reached this during matching,
02426                                            we'll have matched the interval once, so
02427                                            jump back only `upper_bound - 1' times.  */
02428                                         STORE_JUMP2 (jump_n, b, laststart + 5,
02429                                                            upper_bound - 1);
02430                                         b += 5;
02431 
02432                                         /* The location we want to set is the second
02433                                            parameter of the `jump_n'; that is `b-2' as
02434                                            an absolute address.  `laststart' will be
02435                                            the `set_number_at' we're about to insert;
02436                                            `laststart+3' the number to set, the source
02437                                            for the relative address.  But we are
02438                                            inserting into the middle of the pattern --
02439                                            so everything is getting moved up by 5.
02440                                            Conclusion: (b - 2) - (laststart + 3) + 5,
02441                                            i.e., b - laststart.
02442                                            
02443                                            We insert this at the beginning of the loop
02444                                            so that if we fail during matching, we'll
02445                                            reinitialize the bounds.  */
02446                                         insert_op2 (set_number_at, laststart, b - laststart,
02447                                                           upper_bound - 1, b);
02448                                         b += 5;
02449                                    }
02450                             }
02451                          pending_exact = 0;
02452                          beg_interval = NULL;
02453                     }
02454                     break;
02455 
02456                   unfetch_interval:
02457                     /* If an invalid interval, match the characters as literals.  */
02458                         assert (beg_interval);
02459                         p = beg_interval;
02460                         beg_interval = NULL;
02461 
02462                         /* normal_char and normal_backslash need `c'.  */
02463                         PATFETCH (c);   
02464 
02465                         if (!(syntax & RE_NO_BK_BRACES))
02466                           {
02467                             if (p > pattern  &&  p[-1] == '\\')
02468                                  goto normal_backslash;
02469                           }
02470                         goto normal_char;
02471 
02472 #ifdef emacs
02473                   /* There is no way to specify the before_dot and after_dot
02474                         operators.  rms says this is ok.  --karl  */
02475                   case '=':
02476                     BUF_PUSH (at_dot);
02477                     break;
02478 
02479                   case 's':     
02480                     laststart = b;
02481                     PATFETCH (c);
02482                     BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
02483                     break;
02484 
02485                   case 'S':
02486                     laststart = b;
02487                     PATFETCH (c);
02488                     BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
02489                     break;
02490 #endif /* emacs */
02491 
02492 
02493                   case 'w':
02494                     laststart = b;
02495                     BUF_PUSH (wordchar);
02496                     break;
02497 
02498 
02499                   case 'W':
02500                     laststart = b;
02501                     BUF_PUSH (notwordchar);
02502                     break;
02503 
02504 
02505                   case '<':
02506                     BUF_PUSH (wordbeg);
02507                     break;
02508 
02509                   case '>':
02510                     BUF_PUSH (wordend);
02511                     break;
02512 
02513                   case 'b':
02514                     BUF_PUSH (wordbound);
02515                     break;
02516 
02517                   case 'B':
02518                     BUF_PUSH (notwordbound);
02519                     break;
02520 
02521                   case '`':
02522                     BUF_PUSH (begbuf);
02523                     break;
02524 
02525                   case '\'':
02526                     BUF_PUSH (endbuf);
02527                     break;
02528 
02529                   case '1': case '2': case '3': case '4': case '5':
02530                   case '6': case '7': case '8': case '9':
02531                     if (syntax & RE_NO_BK_REFS)
02532                          goto normal_char;
02533 
02534                     c1 = c - '0';
02535 
02536                     if (c1 > regnum)
02537                          FREE_STACK_RETURN (REG_ESUBREG);
02538 
02539                     /* Can't back reference to a subexpression if inside of it.  */
02540                     if (group_in_compile_stack (compile_stack, c1))
02541                          goto normal_char;
02542 
02543                     laststart = b;
02544                     BUF_PUSH_2 (duplicate, c1);
02545                     break;
02546 
02547 
02548                   case '+':
02549                   case '?':
02550                     if (syntax & RE_BK_PLUS_QM)
02551                          goto handle_plus;
02552                     else
02553                          goto normal_backslash;
02554 
02555                   default:
02556                   normal_backslash:
02557                     /* You might think it would be useful for \ to mean
02558                           not to translate; but if we don't translate it
02559                           it will never match anything.  */
02560                     c = TRANSLATE (c);
02561                     goto normal_char;
02562                   }
02563                 break;
02564 
02565 
02566         default:
02567            /* Expects the character in `c'.  */
02568         normal_char:
02569                  /* If no exactn currently being built.  */
02570                 if (!pending_exact 
02571 
02572                     /* If last exactn not at current position.  */
02573                     || pending_exact + *pending_exact + 1 != b
02574                     
02575                     /* We have only one byte following the exactn for the count.  */
02576                  || *pending_exact == (1 << BYTEWIDTH) - 1
02577 
02578                     /* If followed by a repetition operator.  */
02579                     || *p == '*' || *p == '^'
02580                  || ((syntax & RE_BK_PLUS_QM)
02581                   ? *p == '\\' && (p[1] == '+' || p[1] == '?')
02582                   : (*p == '+' || *p == '?'))
02583                  || ((syntax & RE_INTERVALS)
02584                            && ((syntax & RE_NO_BK_BRACES)
02585                          ? *p == '{'
02586                                   : (p[0] == '\\' && p[1] == '{'))))
02587             {
02588                  /* Start building a new exactn.  */
02589                     
02590                     laststart = b;
02591 
02592                  BUF_PUSH_2 (exactn, 0);
02593                  pending_exact = b - 1;
02594                   }
02595                   
02596           BUF_PUSH (c);
02597                 (*pending_exact)++;
02598           break;
02599            } /* switch (c) */
02600     } /* while p != pend */
02601 
02602   
02603   /* Through the pattern now.  */
02604   
02605   if (fixup_alt_jump)
02606     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
02607 
02608   if (!COMPILE_STACK_EMPTY) 
02609     FREE_STACK_RETURN (REG_EPAREN);
02610 
02611   /* If we don't want backtracking, force success
02612         the first time we reach the end of the compiled pattern.  */
02613   if (syntax & RE_NO_POSIX_BACKTRACKING)
02614     BUF_PUSH (succeed);
02615 
02616   free (compile_stack.stack);
02617 
02618   /* We have succeeded; set the length of the buffer.  */
02619   bufp->used = b - bufp->buffer;
02620 
02621 #ifdef DEBUG
02622   if (debug)
02623     {
02624          DEBUG_PRINT1 ("\nCompiled pattern: \n");
02625          print_compiled_pattern (bufp);
02626     }
02627 #endif /* DEBUG */
02628 
02629 #ifndef MATCH_MAY_ALLOCATE
02630   /* Initialize the failure stack to the largest possible stack.  This
02631         isn't necessary unless we're trying to avoid calling alloca in
02632         the search and match routines.  */
02633   {
02634     int num_regs = bufp->re_nsub + 1;
02635 
02636     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
02637           is strictly greater than re_max_failures, the largest possible stack
02638           is 2 * re_max_failures failure points.  */
02639     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
02640          {
02641         fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
02642 
02643 #ifdef emacs
02644         if (! fail_stack.stack)
02645           fail_stack.stack
02646             = (fail_stack_elt_t *) xmalloc (fail_stack.size 
02647                                             * sizeof (fail_stack_elt_t));
02648         else
02649           fail_stack.stack
02650             = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
02651                                                 (fail_stack.size
02652                                                  * sizeof (fail_stack_elt_t)));
02653 #else /* not emacs */
02654         if (! fail_stack.stack)
02655           fail_stack.stack
02656             = (fail_stack_elt_t *) malloc (fail_stack.size 
02657                                            * sizeof (fail_stack_elt_t));
02658         else
02659           fail_stack.stack
02660             = (fail_stack_elt_t *) realloc (fail_stack.stack,
02661                                             (fail_stack.size
02662                                                 * sizeof (fail_stack_elt_t)));
02663 #endif /* not emacs */
02664          }
02665 
02666     regex_grow_registers (num_regs);
02667   }
02668 #endif /* not MATCH_MAY_ALLOCATE */
02669 
02670   return REG_NOERROR;
02671 } /* regex_compile */
02672 
02673 /* Subroutines for `regex_compile'.  */
02674 
02675 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
02676 
02677 static void
02678 store_op1 (op, loc, arg)
02679     re_opcode_t op;
02680     unsigned char *loc;
02681     int arg;
02682 {
02683   *loc = (unsigned char) op;
02684   STORE_NUMBER (loc + 1, arg);
02685 }
02686 
02687 
02688 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
02689 
02690 static void
02691 store_op2 (op, loc, arg1, arg2)
02692     re_opcode_t op;
02693     unsigned char *loc;
02694     int arg1, arg2;
02695 {
02696   *loc = (unsigned char) op;
02697   STORE_NUMBER (loc + 1, arg1);
02698   STORE_NUMBER (loc + 3, arg2);
02699 }
02700 
02701 
02702 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
02703    for OP followed by two-byte integer parameter ARG.  */
02704 
02705 static void
02706 insert_op1 (op, loc, arg, end)
02707     re_opcode_t op;
02708     unsigned char *loc;
02709     int arg;
02710     unsigned char *end;    
02711 {
02712   register unsigned char *pfrom = end;
02713   register unsigned char *pto = end + 3;
02714 
02715   while (pfrom != loc)
02716     *--pto = *--pfrom;
02717     
02718   store_op1 (op, loc, arg);
02719 }
02720 
02721 
02722 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
02723 
02724 static void
02725 insert_op2 (op, loc, arg1, arg2, end)
02726     re_opcode_t op;
02727     unsigned char *loc;
02728     int arg1, arg2;
02729     unsigned char *end;    
02730 {
02731   register unsigned char *pfrom = end;
02732   register unsigned char *pto = end + 5;
02733 
02734   while (pfrom != loc)
02735     *--pto = *--pfrom;
02736     
02737   store_op2 (op, loc, arg1, arg2);
02738 }
02739 
02740 
02741 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
02742    after an alternative or a begin-subexpression.  We assume there is at
02743    least one character before the ^.  */
02744 
02745 static boolean
02746 at_begline_loc_p (pattern, p, syntax)
02747     const char *pattern, *p;
02748     reg_syntax_t syntax;
02749 {
02750   const char *prev = p - 2;
02751   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
02752   
02753   return
02754           /* After a subexpression?  */
02755           (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
02756           /* After an alternative?  */
02757     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
02758 }
02759 
02760 
02761 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
02762    at least one character after the $, i.e., `P < PEND'.  */
02763 
02764 static boolean
02765 at_endline_loc_p (p, pend, syntax)
02766     const char *p, *pend;
02767     int syntax;
02768 {
02769   const char *next = p;
02770   boolean next_backslash = *next == '\\';
02771   const char *next_next = p + 1 < pend ? p + 1 : 0;
02772   
02773   return
02774           /* Before a subexpression?  */
02775           (syntax & RE_NO_BK_PARENS ? *next == ')'
02776            : next_backslash && next_next && *next_next == ')')
02777           /* Before an alternative?  */
02778     || (syntax & RE_NO_BK_VBAR ? *next == '|'
02779            : next_backslash && next_next && *next_next == '|');
02780 }
02781 
02782 
02783 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and 
02784    false if it's not.  */
02785 
02786 static boolean
02787 group_in_compile_stack (compile_stack, regnum)
02788     compile_stack_type compile_stack;
02789     regnum_t regnum;
02790 {
02791   int this_element;
02792 
02793   for (this_element = compile_stack.avail - 1;  
02794           this_element >= 0; 
02795           this_element--)
02796     if (compile_stack.stack[this_element].regnum == regnum)
02797          return true;
02798 
02799   return false;
02800 }
02801 
02802 
02803 /* Read the ending character of a range (in a bracket expression) from the
02804    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
02805    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
02806    Then we set the translation of all bits between the starting and
02807    ending characters (inclusive) in the compiled pattern B.
02808    
02809    Return an error code.
02810    
02811    We use these short variable names so we can use the same macros as
02812    `regex_compile' itself.  */
02813 
02814 static reg_errcode_t
02815 compile_range (p_ptr, pend, translate, syntax, b)
02816     const char **p_ptr, *pend;
02817     char *translate;
02818     reg_syntax_t syntax;
02819     unsigned char *b;
02820 {
02821   unsigned this_char;
02822 
02823   const char *p = *p_ptr;
02824   int range_start, range_end;
02825   
02826   if (p == pend)
02827     return REG_ERANGE;
02828 
02829   /* Even though the pattern is a signed `char *', we need to fetch
02830         with unsigned char *'s; if the high bit of the pattern character
02831         is set, the range endpoints will be negative if we fetch using a
02832         signed char *.
02833 
02834         We also want to fetch the endpoints without translating them; the 
02835         appropriate translation is done in the bit-setting loop below.  */
02836   /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
02837   range_start = ((const unsigned char *) p)[-2];
02838   range_end   = ((const unsigned char *) p)[0];
02839 
02840   /* Have to increment the pointer into the pattern string, so the
02841         caller isn't still at the ending character.  */
02842   (*p_ptr)++;
02843 
02844   /* If the start is after the end, the range is empty.  */
02845   if (range_start > range_end)
02846     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
02847 
02848   /* Here we see why `this_char' has to be larger than an `unsigned
02849         char' -- the range is inclusive, so if `range_end' == 0xff
02850         (assuming 8-bit characters), we would otherwise go into an infinite
02851         loop, since all characters <= 0xff.  */
02852   for (this_char = range_start; this_char <= range_end; this_char++)
02853     {
02854          SET_LIST_BIT (TRANSLATE (this_char));
02855     }
02856   
02857   return REG_NOERROR;
02858 }
02859 
02860 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
02861    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
02862    characters can start a string that matches the pattern.  This fastmap
02863    is used by re_search to skip quickly over impossible starting points.
02864 
02865    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
02866    area as BUFP->fastmap.
02867    
02868    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
02869    the pattern buffer.
02870 
02871    Returns 0 if we succeed, -2 if an internal error.   */
02872 
02873 int
02874 re_compile_fastmap (bufp)
02875         struct re_pattern_buffer *bufp;
02876 {
02877   int j, k;
02878 #ifdef MATCH_MAY_ALLOCATE
02879   fail_stack_type fail_stack;
02880 #endif
02881 #ifndef REGEX_MALLOC
02882   char *destination;
02883 #endif
02884   /* We don't push any register information onto the failure stack.  */
02885   unsigned num_regs = 0;
02886   
02887   register char *fastmap = bufp->fastmap;
02888   unsigned char *pattern = bufp->buffer;
02889   unsigned long size = bufp->used;
02890   unsigned char *p = pattern;
02891   register unsigned char *pend = pattern + size;
02892 
02893 #ifdef REL_ALLOC
02894   /* This holds the pointer to the failure stack, when
02895         it is allocated relocatably.  */
02896   fail_stack_elt_t *failure_stack_ptr;
02897 #endif
02898 
02899   /* Assume that each path through the pattern can be null until
02900         proven otherwise.  We set this false at the bottom of switch
02901         statement, to which we get only if a particular path doesn't
02902         match the empty string.  */
02903   boolean path_can_be_null = true;
02904 
02905   /* We aren't doing a `succeed_n' to begin with.  */
02906   boolean succeed_n_p = false;
02907 
02908   assert (fastmap != NULL && p != NULL);
02909   
02910   INIT_FAIL_STACK ();
02911   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
02912   bufp->fastmap_accurate = 1;       /* It will be when we're done.  */
02913   bufp->can_be_null = 0;
02914          
02915   while (1)
02916     {
02917          if (p == pend || *p == succeed)
02918         {
02919           /* We have reached the (effective) end of pattern.  */
02920           if (!FAIL_STACK_EMPTY ())
02921             {
02922                  bufp->can_be_null |= path_can_be_null;
02923 
02924                  /* Reset for next path.  */
02925                  path_can_be_null = true;
02926 
02927                  p = fail_stack.stack[--fail_stack.avail].pointer;
02928 
02929                  continue;
02930             }
02931           else
02932             break;
02933         }
02934 
02935          /* We should never be about to go beyond the end of the pattern.  */
02936          assert (p < pend);
02937          
02938          switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
02939         {
02940 
02941            /* I guess the idea here is to simply not bother with a fastmap
02942                  if a backreference is used, since it's too hard to figure out
02943                  the fastmap for the corresponding group.  Setting
02944                  `can_be_null' stops `re_search_2' from using the fastmap, so
02945                  that is all we do.  */
02946         case duplicate:
02947           bufp->can_be_null = 1;
02948                 goto done;
02949 
02950 
02951          /* Following are the cases which match a character.  These end
02952             with `break'.  */
02953 
02954         case exactn:
02955                 fastmap[p[1]] = 1;
02956           break;
02957 
02958 
02959            case charset:
02960                 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
02961             if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
02962                     fastmap[j] = 1;
02963           break;
02964 
02965 
02966         case charset_not:
02967           /* Chars beyond end of map must be allowed.  */
02968           for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
02969                   fastmap[j] = 1;
02970 
02971           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
02972             if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
02973                     fastmap[j] = 1;
02974                 break;
02975 
02976 
02977         case wordchar:
02978           for (j = 0; j < (1 << BYTEWIDTH); j++)
02979             if (SYNTAX (j) == Sword)
02980                  fastmap[j] = 1;
02981           break;
02982 
02983 
02984         case notwordchar:
02985           for (j = 0; j < (1 << BYTEWIDTH); j++)
02986             if (SYNTAX (j) != Sword)
02987                  fastmap[j] = 1;
02988           break;
02989 
02990 
02991            case anychar:
02992           {
02993             int fastmap_newline = fastmap['\n'];
02994 
02995             /* `.' matches anything ...  */
02996             for (j = 0; j < (1 << BYTEWIDTH); j++)
02997                  fastmap[j] = 1;
02998 
02999             /* ... except perhaps newline.  */
03000             if (!(bufp->syntax & RE_DOT_NEWLINE))
03001                  fastmap['\n'] = fastmap_newline;
03002 
03003             /* Return if we have already set `can_be_null'; if we have,
03004                   then the fastmap is irrelevant.  Something's wrong here.  */
03005             else if (bufp->can_be_null)
03006                  goto done;
03007 
03008             /* Otherwise, have to check alternative paths.  */
03009             break;
03010           }
03011 
03012 #ifdef emacs
03013            case syntaxspec:
03014           k = *p++;
03015           for (j = 0; j < (1 << BYTEWIDTH); j++)
03016             if (SYNTAX (j) == (enum syntaxcode) k)
03017                  fastmap[j] = 1;
03018           break;
03019 
03020 
03021         case notsyntaxspec:
03022           k = *p++;
03023           for (j = 0; j < (1 << BYTEWIDTH); j++)
03024             if (SYNTAX (j) != (enum syntaxcode) k)
03025                  fastmap[j] = 1;
03026           break;
03027 
03028 
03029          /* All cases after this match the empty string.  These end with
03030             `continue'.  */
03031 
03032 
03033         case before_dot:
03034         case at_dot:
03035         case after_dot:
03036                 continue;
03037 #endif /* not emacs */
03038 
03039 
03040            case no_op:
03041            case begline:
03042            case endline:
03043         case begbuf:
03044         case endbuf:
03045         case wordbound:
03046         case notwordbound:
03047         case wordbeg:
03048         case wordend:
03049            case push_dummy_failure:
03050                 continue;
03051 
03052 
03053         case jump_n:
03054            case pop_failure_jump:
03055         case maybe_pop_jump:
03056         case jump:
03057            case jump_past_alt:
03058         case dummy_failure_jump:
03059                 EXTRACT_NUMBER_AND_INCR (j, p);
03060           p += j;       
03061           if (j > 0)
03062             continue;
03063                   
03064                 /* Jump backward implies we just went through the body of a
03065                    loop and matched nothing.  Opcode jumped to should be
03066                    `on_failure_jump' or `succeed_n'.  Just treat it like an
03067                    ordinary jump.  For a * loop, it has pushed its failure
03068                    point already; if so, discard that as redundant.  */
03069                 if ((re_opcode_t) *p != on_failure_jump
03070                  && (re_opcode_t) *p != succeed_n)
03071             continue;
03072 
03073                 p++;
03074                 EXTRACT_NUMBER_AND_INCR (j, p);
03075                 p += j;         
03076           
03077                 /* If what's on the stack is where we are now, pop it.  */
03078                 if (!FAIL_STACK_EMPTY () 
03079                  && fail_stack.stack[fail_stack.avail - 1].pointer == p)
03080                   fail_stack.avail--;
03081 
03082                 continue;
03083 
03084 
03085            case on_failure_jump:
03086            case on_failure_keep_string_jump:
03087         handle_on_failure_jump:
03088                 EXTRACT_NUMBER_AND_INCR (j, p);
03089 
03090                 /* For some patterns, e.g., `(a?)?', `p+j' here points to the
03091                    end of the pattern.  We don't want to push such a point,
03092                    since when we restore it above, entering the switch will
03093                    increment `p' past the end of the pattern.  We don't need
03094                    to push such a point since we obviously won't find any more
03095                    fastmap entries beyond `pend'.  Such a pattern can match
03096                    the null string, though.  */
03097                 if (p + j < pend)
03098                   {
03099                     if (!PUSH_PATTERN_OP (p + j, fail_stack))
03100                 {
03101                   RESET_FAIL_STACK ();
03102                   return -2;
03103                 }
03104                   }
03105                 else
03106                   bufp->can_be_null = 1;
03107 
03108                 if (succeed_n_p)
03109                   {
03110                     EXTRACT_NUMBER_AND_INCR (k, p);     /* Skip the n.  */
03111                     succeed_n_p = false;
03112             }
03113 
03114                 continue;
03115 
03116 
03117         case succeed_n:
03118                 /* Get to the number of times to succeed.  */
03119                 p += 2;         
03120 
03121                 /* Increment p past the n for when k != 0.  */
03122                 EXTRACT_NUMBER_AND_INCR (k, p);
03123                 if (k == 0)
03124             {
03125                     p -= 4;
03126                  succeed_n_p = true;  /* Spaghetti code alert.  */
03127                     goto handle_on_failure_jump;
03128                   }
03129                 continue;
03130 
03131 
03132         case set_number_at:
03133                 p += 4;
03134                 continue;
03135 
03136 
03137         case start_memory:
03138            case stop_memory:
03139           p += 2;
03140           continue;
03141 
03142 
03143         default:
03144                 abort (); /* We have listed all the cases.  */
03145            } /* switch *p++ */
03146 
03147          /* Getting here means we have found the possible starting
03148             characters for one path of the pattern -- and that the empty
03149             string does not match.  We need not follow this path further.
03150             Instead, look at the next alternative (remembered on the
03151             stack), or quit if no more.  The test at the top of the loop
03152             does these things.  */
03153          path_can_be_null = false;
03154          p = pend;
03155     } /* while p */
03156 
03157   /* Set `can_be_null' for the last path (also the first path, if the
03158         pattern is empty).  */
03159   bufp->can_be_null |= path_can_be_null;
03160 
03161  done:
03162   RESET_FAIL_STACK ();
03163   return 0;
03164 } /* re_compile_fastmap */
03165 
03166 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
03167    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
03168    this memory for recording register information.  STARTS and ENDS
03169    must be allocated using the malloc library routine, and must each
03170    be at least NUM_REGS * sizeof (regoff_t) bytes long.
03171 
03172    If NUM_REGS == 0, then subsequent matches should allocate their own
03173    register data.
03174 
03175    Unless this function is called, the first search or match using
03176    PATTERN_BUFFER will allocate its own register data, without
03177    freeing the old data.  */
03178 
03179 void
03180 re_set_registers (bufp, regs, num_regs, starts, ends)
03181     struct re_pattern_buffer *bufp;
03182     struct re_registers *regs;
03183     unsigned num_regs;
03184     regoff_t *starts, *ends;
03185 {
03186   if (num_regs)
03187     {
03188          bufp->regs_allocated = REGS_REALLOCATE;
03189          regs->num_regs = num_regs;
03190          regs->start = starts;
03191          regs->end = ends;
03192     }
03193   else
03194     {
03195          bufp->regs_allocated = REGS_UNALLOCATED;
03196          regs->num_regs = 0;
03197          regs->start = regs->end = (regoff_t *) 0;
03198     }
03199 }
03200 
03201 /* Searching routines.  */
03202 
03203 /* Like re_search_2, below, but only one string is specified, and
03204    doesn't let you say where to stop matching. */
03205 
03206 int
03207 re_search (bufp, string, size, startpos, range, regs)
03208         struct re_pattern_buffer *bufp;
03209         const char *string;
03210         int size, startpos, range;
03211         struct re_registers *regs;
03212 {
03213   return re_search_2 (bufp, NULL, 0, string, size, startpos, range, 
03214                          regs, size);
03215 }
03216 
03217 
03218 /* Using the compiled pattern in BUFP->buffer, first tries to match the
03219    virtual concatenation of STRING1 and STRING2, starting first at index
03220    STARTPOS, then at STARTPOS + 1, and so on.
03221    
03222    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
03223    
03224    RANGE is how far to scan while trying to match.  RANGE = 0 means try
03225    only at STARTPOS; in general, the last start tried is STARTPOS +
03226    RANGE.
03227    
03228    In REGS, return the indices of the virtual concatenation of STRING1
03229    and STRING2 that matched the entire BUFP->buffer and its contained
03230    subexpressions.
03231    
03232    Do not consider matching one past the index STOP in the virtual
03233    concatenation of STRING1 and STRING2.
03234 
03235    We return either the position in the strings at which the match was
03236    found, -1 if no match, or -2 if error (such as failure
03237    stack overflow).  */
03238 
03239 int
03240 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
03241         struct re_pattern_buffer *bufp;
03242         const char *string1, *string2;
03243         int size1, size2;
03244         int startpos;
03245         int range;
03246         struct re_registers *regs;
03247         int stop;
03248 {
03249   int val;
03250   register char *fastmap = bufp->fastmap;
03251   register char *translate = bufp->translate;
03252   int total_size = size1 + size2;
03253   int endpos = startpos + range;
03254 
03255   /* Check for out-of-range STARTPOS.  */
03256   if (startpos < 0 || startpos > total_size)
03257     return -1;
03258     
03259   /* Fix up RANGE if it might eventually take us outside
03260         the virtual concatenation of STRING1 and STRING2.  */
03261   if (endpos < -1)
03262     range = -1 - startpos;
03263   else if (endpos > total_size)
03264     range = total_size - startpos;
03265 
03266   /* If the search isn't to be a backwards one, don't waste time in a
03267         search for a pattern that must be anchored.  */
03268   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
03269     {
03270          if (startpos > 0)
03271         return -1;
03272          else
03273         range = 1;
03274     }
03275 
03276   /* Update the fastmap now if not correct already.  */
03277   if (fastmap && !bufp->fastmap_accurate)
03278     if (re_compile_fastmap (bufp) == -2)
03279          return -2;
03280   
03281   /* Loop through the string, looking for a place to start matching.  */
03282   for (;;)
03283     { 
03284          /* If a fastmap is supplied, skip quickly over characters that
03285             cannot be the start of a match.  If the pattern can match the
03286             null string, however, we don't need to skip characters; we want
03287             the first null string.  */
03288          if (fastmap && startpos < total_size && !bufp->can_be_null)
03289         {
03290           if (range > 0)        /* Searching forwards.  */
03291             {
03292                  register const char *d;
03293                  register int lim = 0;
03294                  int irange = range;
03295 
03296                     if (startpos < size1 && startpos + range >= size1)
03297                          lim = range - (size1 - startpos);
03298 
03299                  d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
03300    
03301                     /* Written out as an if-else to avoid testing `translate'
03302                           inside the loop.  */
03303                  if (translate)
03304                          while (range > lim
03305                                    && !fastmap[(unsigned char)
03306                                    translate[(unsigned char) *d++]])
03307                            range--;
03308                  else
03309                          while (range > lim && !fastmap[(unsigned char) *d++])
03310                            range--;
03311 
03312                  startpos += irange - range;
03313             }
03314           else                          /* Searching backwards.  */
03315             {
03316                  register char c = (size1 == 0 || startpos >= size1
03317                                                    ? string2[startpos - size1] 
03318                                                    : string1[startpos]);
03319 
03320                  if (!fastmap[(unsigned char) TRANSLATE (c)])
03321                 goto advance;
03322             }
03323         }
03324 
03325          /* If can't match the null string, and that's all we have left, fail.  */
03326          if (range >= 0 && startpos == total_size && fastmap
03327                 && !bufp->can_be_null)
03328         return -1;
03329 
03330          val = re_match_2_internal (bufp, string1, size1, string2, size2,
03331                                  startpos, regs, stop);
03332 #ifndef REGEX_MALLOC
03333 #ifdef C_ALLOCA
03334          alloca (0);
03335 #endif
03336 #endif
03337 
03338          if (val >= 0)
03339         return startpos;
03340            
03341          if (val == -2)
03342         return -2;
03343 
03344     advance:
03345          if (!range) 
03346            break;
03347          else if (range > 0) 
03348            {
03349                 range--; 
03350                 startpos++;
03351            }
03352          else
03353            {
03354                 range++; 
03355                 startpos--;
03356            }
03357     }
03358   return -1;
03359 } /* re_search_2 */
03360 
03361 /* Declarations and macros for re_match_2.  */
03362 
03363 static int bcmp_translate ();
03364 static boolean alt_match_null_string_p (),
03365                         common_op_match_null_string_p (),
03366                         group_match_null_string_p ();
03367 
03368 /* This converts PTR, a pointer into one of the search strings `string1'
03369    and `string2' into an offset from the beginning of that string.  */
03370 #define POINTER_TO_OFFSET(ptr)                  \
03371   (FIRST_STRING_P (ptr)                         \
03372    ? ((regoff_t) ((ptr) - string1))             \
03373    : ((regoff_t) ((ptr) - string2 + size1)))
03374 
03375 /* Macros for dealing with the split strings in re_match_2.  */
03376 
03377 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
03378 
03379 /* Call before fetching a character with *d.  This switches over to
03380    string2 if necessary.  */
03381 #define PREFETCH()                                                      \
03382   while (d == dend)                                                     \
03383     {                                                                   \
03384          /* End of string2 => fail.  */                                 \
03385          if (dend == end_match_2)                                               \
03386            goto fail;                                                   \
03387          /* End of string1 => advance to string2.  */                   \
03388          d = string2;                                                   \
03389          dend = end_match_2;                                            \
03390     }
03391 
03392 
03393 /* Test if at very beginning or at very end of the virtual concatenation
03394    of `string1' and `string2'.  If only one string, it's `string2'.  */
03395 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
03396 #define AT_STRINGS_END(d) ((d) == end2) 
03397 
03398 
03399 /* Test if D points to a character which is word-constituent.  We have
03400    two special cases to check for: if past the end of string1, look at
03401    the first character in string2; and if before the beginning of
03402    string2, look at the last character in string1.  */
03403 #define WORDCHAR_P(d)                                                   \
03404   (SYNTAX ((d) == end1 ? *string2                                       \
03405                  : (d) == string2 - 1 ? *(end1 - 1) : *(d))                     \
03406    == Sword)
03407 
03408 /* Test if the character before D and the one at D differ with respect
03409    to being word-constituent.  */
03410 #define AT_WORD_BOUNDARY(d)                                             \
03411   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                             \
03412    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
03413 
03414 
03415 /* Free everything we malloc.  */
03416 #ifdef MATCH_MAY_ALLOCATE
03417 #define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
03418 #define FREE_VARIABLES()                                                \
03419   do {                                                                  \
03420     REGEX_FREE_STACK (fail_stack.stack);                                \
03421     FREE_VAR (regstart);                                                \
03422     FREE_VAR (regend);                                                  \
03423     FREE_VAR (old_regstart);                                            \
03424     FREE_VAR (old_regend);                                              \
03425     FREE_VAR (best_regstart);                                           \
03426     FREE_VAR (best_regend);                                             \
03427     FREE_VAR (reg_info);                                                \
03428     FREE_VAR (reg_dummy);                                               \
03429     FREE_VAR (reg_info_dummy);                                          \
03430   } while (0)
03431 #else
03432 #define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
03433 #endif /* not MATCH_MAY_ALLOCATE */
03434 
03435 /* These values must meet several constraints.  They must not be valid
03436    register values; since we have a limit of 255 registers (because
03437    we use only one byte in the pattern for the register number), we can
03438    use numbers larger than 255.  They must differ by 1, because of
03439    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
03440    be larger than the value for the highest register, so we do not try
03441    to actually save any registers when none are active.  */
03442 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
03443 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
03444 
03445 /* Matching routines.  */
03446 
03447 #ifndef emacs   /* Emacs never uses this.  */
03448 /* re_match is like re_match_2 except it takes only a single string.  */
03449 
03450 int
03451 re_match (bufp, string, size, pos, regs)
03452         struct re_pattern_buffer *bufp;
03453         const char *string;
03454         int size, pos;
03455         struct re_registers *regs;
03456 {
03457   int result = re_match_2_internal (bufp, NULL, 0, string, size,
03458                                     pos, regs, size);
03459 #ifndef REGEX_MALLOC
03460   alloca (0);
03461 #endif
03462   return result;
03463 }
03464 #endif /* not emacs */
03465 
03466 
03467 /* re_match_2 matches the compiled pattern in BUFP against the
03468    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
03469    and SIZE2, respectively).  We start matching at POS, and stop
03470    matching at STOP.
03471    
03472    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
03473    store offsets for the substring each group matched in REGS.  See the
03474    documentation for exactly how many groups we fill.
03475 
03476    We return -1 if no match, -2 if an internal error (such as the
03477    failure stack overflowing).  Otherwise, we return the length of the
03478    matched substring.  */
03479 
03480 int
03481 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
03482         struct re_pattern_buffer *bufp;
03483         const char *string1, *string2;
03484         int size1, size2;
03485         int pos;
03486         struct re_registers *regs;
03487         int stop;
03488 {
03489   int result = re_match_2_internal (bufp, string1, size1, string2, size2,
03490                                     pos, regs, stop);
03491 #ifndef REGEX_MALLOC
03492   alloca (0);
03493 #endif
03494   return result;
03495 }
03496 
03497 /* This is a separate function so that we can force an alloca cleanup
03498    afterwards.  */
03499 static int
03500 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
03501         struct re_pattern_buffer *bufp;
03502         const char *string1, *string2;
03503         int size1, size2;
03504         int pos;
03505         struct re_registers *regs;
03506         int stop;
03507 {
03508   /* General temporaries.  */
03509   int mcnt;
03510   unsigned char *p1;
03511 
03512   /* Just past the end of the corresponding string.  */
03513   const char *end1, *end2;
03514 
03515   /* Pointers into string1 and string2, just past the last characters in
03516         each to consider matching.  */
03517   const char *end_match_1, *end_match_2;
03518 
03519   /* Where we are in the data, and the end of the current string.  */
03520   const char *d, *dend;
03521   
03522   /* Where we are in the pattern, and the end of the pattern.  */
03523   unsigned char *p = bufp->buffer;
03524   register unsigned char *pend = p + bufp->used;
03525 
03526   /* Mark the opcode just after a start_memory, so we can test for an
03527         empty subpattern when we get to the stop_memory.  */
03528   unsigned char *just_past_start_mem = 0;
03529 
03530   /* We use this to map every character in the string.  */
03531   char *translate = bufp->translate;
03532 
03533   /* Failure point stack.  Each place that can handle a failure further
03534         down the line pushes a failure point on this stack.  It consists of
03535         restart, regend, and reg_info for all registers corresponding to
03536         the subexpressions we're currently inside, plus the number of such
03537         registers, and, finally, two char *'s.  The first char * is where
03538         to resume scanning the pattern; the second one is where to resume
03539         scanning the strings.  If the latter is zero, the failure point is
03540         a ``dummy''; if a failure happens and the failure point is a dummy,
03541         it gets discarded and the next next one is tried.  */
03542 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
03543   fail_stack_type fail_stack;
03544 #endif
03545 #ifdef DEBUG
03546   static unsigned failure_id = 0;
03547   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
03548 #endif
03549 
03550 #ifdef REL_ALLOC
03551   /* This holds the pointer to the failure stack, when
03552         it is allocated relocatably.  */
03553   fail_stack_elt_t *failure_stack_ptr;
03554 #endif
03555 
03556   /* We fill all the registers internally, independent of what we
03557         return, for use in backreferences.  The number here includes
03558         an element for register zero.  */
03559   unsigned num_regs = bufp->re_nsub + 1;
03560   
03561   /* The currently active registers.  */
03562   unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
03563   unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
03564 
03565   /* Information on the contents of registers. These are pointers into
03566         the input strings; they record just what was matched (on this
03567         attempt) by a subexpression part of the pattern, that is, the
03568         regnum-th regstart pointer points to where in the pattern we began
03569         matching and the regnum-th regend points to right after where we
03570         stopped matching the regnum-th subexpression.  (The zeroth register
03571         keeps track of what the whole pattern matches.)  */
03572 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
03573   const char **regstart, **regend;
03574 #endif
03575 
03576   /* If a group that's operated upon by a repetition operator fails to
03577         match anything, then the register for its start will need to be
03578         restored because it will have been set to wherever in the string we
03579         are when we last see its open-group operator.  Similarly for a
03580         register's end.  */
03581 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
03582   const char **old_regstart, **old_regend;
03583 #endif
03584 
03585   /* The is_active field of reg_info helps us keep track of which (possibly
03586         nested) subexpressions we are currently in. The matched_something
03587         field of reg_info[reg_num] helps us tell whether or not we have
03588         matched any of the pattern so far this time through the reg_num-th
03589         subexpression.  These two fields get reset each time through any
03590         loop their register is in.  */
03591 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
03592   register_info_type *reg_info; 
03593 #endif
03594 
03595   /* The following record the register info as found in the above
03596         variables when we find a match better than any we've seen before. 
03597         This happens as we backtrack through the failure points, which in
03598         turn happens only if we have not yet matched the entire string. */
03599   unsigned best_regs_set = false;
03600 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
03601   const char **best_regstart, **best_regend;
03602 #endif
03603   
03604   /* Logically, this is `best_regend[0]'.  But we don't want to have to
03605         allocate space for that if we're not allocating space for anything
03606         else (see below).  Also, we never need info about register 0 for
03607         any of the other register vectors, and it seems rather a kludge to
03608         treat `best_regend' differently than the rest.  So we keep track of
03609         the end of the best match so far in a separate variable.  We
03610         initialize this to NULL so that when we backtrack the first time
03611         and need to test it, it's not garbage.  */
03612   const char *match_end = NULL;
03613 
03614   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
03615   int set_regs_matched_done = 0;
03616 
03617   /* Used when we pop values we don't care about.  */
03618 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
03619   const char **reg_dummy;
03620   register_info_type *reg_info_dummy;
03621 #endif
03622 
03623 #ifdef DEBUG
03624   /* Counts the total number of registers pushed.  */
03625   unsigned num_regs_pushed = 0;         
03626 #endif
03627 
03628   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
03629   
03630   INIT_FAIL_STACK ();
03631   
03632 #ifdef MATCH_MAY_ALLOCATE
03633   /* Do not bother to initialize all the register variables if there are
03634         no groups in the pattern, as it takes a fair amount of time.  If
03635         there are groups, we include space for register 0 (the whole
03636         pattern), even though we never use it, since it simplifies the
03637         array indexing.  We should fix this.  */
03638   if (bufp->re_nsub)
03639     {
03640          regstart = REGEX_TALLOC (num_regs, const char *);
03641          regend = REGEX_TALLOC (num_regs, const char *);
03642          old_regstart = REGEX_TALLOC (num_regs, const char *);
03643          old_regend = REGEX_TALLOC (num_regs, const char *);
03644          best_regstart = REGEX_TALLOC (num_regs, const char *);
03645          best_regend = REGEX_TALLOC (num_regs, const char *);
03646          reg_info = REGEX_TALLOC (num_regs, register_info_type);
03647          reg_dummy = REGEX_TALLOC (num_regs, const char *);
03648          reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
03649 
03650          if (!(regstart && regend && old_regstart && old_regend && reg_info 
03651                   && best_regstart && best_regend && reg_dummy && reg_info_dummy)) 
03652            {
03653                 FREE_VARIABLES ();
03654                 return -2;
03655            }
03656     }
03657   else
03658     {
03659          /* We must initialize all our variables to NULL, so that
03660             `FREE_VARIABLES' doesn't try to free them.  */
03661          regstart = regend = old_regstart = old_regend = best_regstart
03662            = best_regend = reg_dummy = NULL;
03663          reg_info = reg_info_dummy = (register_info_type *) NULL;
03664     }
03665 #endif /* MATCH_MAY_ALLOCATE */
03666 
03667   /* The starting position is bogus.  */
03668   if (pos < 0 || pos > size1 + size2)
03669     {
03670          FREE_VARIABLES ();
03671          return -1;
03672     }
03673     
03674   /* Initialize subexpression text positions to -1 to mark ones that no
03675         start_memory/stop_memory has been seen for. Also initialize the
03676         register information struct.  */
03677   for (mcnt = 1; mcnt < num_regs; mcnt++)
03678     {
03679          regstart[mcnt] = regend[mcnt] 
03680            = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
03681            
03682          REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
03683          IS_ACTIVE (reg_info[mcnt]) = 0;
03684          MATCHED_SOMETHING (reg_info[mcnt]) = 0;
03685          EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
03686     }
03687   
03688   /* We move `string1' into `string2' if the latter's empty -- but not if
03689         `string1' is null.  */
03690   if (size2 == 0 && string1 != NULL)
03691     {
03692          string2 = string1;
03693          size2 = size1;
03694          string1 = 0;
03695          size1 = 0;
03696     }
03697   end1 = string1 + size1;
03698   end2 = string2 + size2;
03699 
03700   /* Compute where to stop matching, within the two strings.  */
03701   if (stop <= size1)
03702     {
03703          end_match_1 = string1 + stop;
03704          end_match_2 = string2;
03705     }
03706   else
03707     {
03708          end_match_1 = end1;
03709          end_match_2 = string2 + stop - size1;
03710     }
03711 
03712   /* `p' scans through the pattern as `d' scans through the data. 
03713         `dend' is the end of the input string that `d' points within.  `d'
03714         is advanced into the following input string whenever necessary, but
03715         this happens before fetching; therefore, at the beginning of the
03716         loop, `d' can be pointing at the end of a string, but it cannot
03717         equal `string2'.  */
03718   if (size1 > 0 && pos <= size1)
03719     {
03720          d = string1 + pos;
03721          dend = end_match_1;
03722     }
03723   else
03724     {
03725          d = string2 + pos - size1;
03726          dend = end_match_2;
03727     }
03728 
03729   DEBUG_PRINT1 ("The compiled pattern is: ");
03730   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
03731   DEBUG_PRINT1 ("The string to match is: `");
03732   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
03733   DEBUG_PRINT1 ("'\n");
03734   
03735   /* This loops over pattern commands.  It exits by returning from the
03736         function if the match is complete, or it drops through if the match
03737         fails at this starting point in the input data.  */
03738   for (;;)
03739     {
03740          DEBUG_PRINT2 ("\n0x%x: ", p);
03741 
03742          if (p == pend)
03743         { /* End of pattern means we might have succeeded.  */
03744                 DEBUG_PRINT1 ("end of pattern ... ");
03745                 
03746           /* If we haven't matched the entire string, and we want the
03747                    longest match, try backtracking.  */
03748                 if (d != end_match_2)
03749             {
03750                  /* 1 if this match ends in the same string (string1 or string2)
03751                  as the best previous match.  */
03752                  boolean same_str_p = (FIRST_STRING_P (match_end) 
03753                                     == MATCHING_IN_FIRST_STRING);
03754                  /* 1 if this match is the best seen so far.  */
03755                  boolean best_match_p;
03756 
03757                  /* AIX compiler got confused when this was combined
03758                  with the previous declaration.  */
03759                  if (same_str_p)
03760                 best_match_p = d > match_end;
03761                  else
03762                 best_match_p = !MATCHING_IN_FIRST_STRING;
03763 
03764                     DEBUG_PRINT1 ("backtracking.\n");
03765                     
03766                     if (!FAIL_STACK_EMPTY ())
03767                          { /* More failure points to try.  */
03768 
03769                            /* If exceeds best match so far, save it.  */
03770                            if (!best_regs_set || best_match_p)
03771                                 {
03772                                   best_regs_set = true;
03773                                   match_end = d;
03774                                   
03775                                   DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
03776                                   
03777                                   for (mcnt = 1; mcnt < num_regs; mcnt++)
03778                                     {
03779                                          best_regstart[mcnt] = regstart[mcnt];
03780                                          best_regend[mcnt] = regend[mcnt];
03781                                     }
03782                                 }
03783                            goto fail;          
03784                          }
03785 
03786                     /* If no failure points, don't restore garbage.  And if
03787                           last match is real best match, don't restore second
03788                           best one. */
03789                     else if (best_regs_set && !best_match_p)
03790                          {
03791                    restore_best_regs:
03792                            /* Restore best match.  It may happen that `dend ==
03793                                  end_match_1' while the restored d is in string2.
03794                                  For example, the pattern `x.*y.*z' against the
03795                                  strings `x-' and `y-z-', if the two strings are
03796                                  not consecutive in memory.  */
03797                            DEBUG_PRINT1 ("Restoring best registers.\n");
03798                            
03799                            d = match_end;
03800                            dend = ((d >= string1 && d <= end1)
03801                                  ? end_match_1 : end_match_2);
03802 
03803                   for (mcnt = 1; mcnt < num_regs; mcnt++)
03804                     {
03805                          regstart[mcnt] = best_regstart[mcnt];
03806                          regend[mcnt] = best_regend[mcnt];
03807                     }
03808                          }
03809                   } /* d != end_match_2 */
03810 
03811         succeed_label:
03812                 DEBUG_PRINT1 ("Accepting match.\n");
03813 
03814                 /* If caller wants register contents data back, do it.  */
03815                 if (regs && !bufp->no_sub)
03816             {
03817                     /* Have the register data arrays been allocated?  */
03818                     if (bufp->regs_allocated == REGS_UNALLOCATED)
03819                          { /* No.  So allocate them with malloc.  We need one
03820                                  extra element beyond `num_regs' for the `-1' marker
03821                                  GNU code uses.  */
03822                            regs->num_regs = MAX (RE_NREGS, num_regs + 1);
03823                            regs->start = TALLOC (regs->num_regs, regoff_t);
03824                            regs->end = TALLOC (regs->num_regs, regoff_t);
03825                            if (regs->start == NULL || regs->end == NULL)
03826                     {
03827                          FREE_VARIABLES ();
03828                          return -2;
03829                     }
03830                            bufp->regs_allocated = REGS_REALLOCATE;
03831                          }
03832                     else if (bufp->regs_allocated == REGS_REALLOCATE)
03833                          { /* Yes.  If we need more elements than were already
03834                                  allocated, reallocate them.  If we need fewer, just
03835                                  leave it alone.  */
03836                            if (regs->num_regs < num_regs + 1)
03837                                 {
03838                                   regs->num_regs = num_regs + 1;
03839                                   RETALLOC (regs->start, regs->num_regs, regoff_t);
03840                                   RETALLOC (regs->end, regs->num_regs, regoff_t);
03841                                   if (regs->start == NULL || regs->end == NULL)
03842                         {
03843                           FREE_VARIABLES ();
03844                           return -2;
03845                         }
03846                                 }
03847                          }
03848                     else
03849                 {
03850                   /* These braces fend off a "empty body in an else-statement"
03851                         warning under GCC when assert expands to nothing.  */
03852                   assert (bufp->regs_allocated == REGS_FIXED);
03853                 }
03854 
03855                     /* Convert the pointer data in `regstart' and `regend' to
03856                           indices.  Register zero has to be set differently,
03857                           since we haven't kept track of any info for it.  */
03858                     if (regs->num_regs > 0)
03859                          {
03860                            regs->start[0] = pos;
03861                            regs->end[0] = (MATCHING_IN_FIRST_STRING
03862                                   ? ((regoff_t) (d - string1))
03863                                         : ((regoff_t) (d - string2 + size1)));
03864                          }
03865                     
03866                     /* Go through the first `min (num_regs, regs->num_regs)'
03867                           registers, since that is all we initialized.  */
03868                  for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
03869                 {
03870                            if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
03871                                 regs->start[mcnt] = regs->end[mcnt] = -1;
03872                            else
03873                                 {
03874                          regs->start[mcnt]
03875                         = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
03876                                   regs->end[mcnt]
03877                         = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
03878                                 }
03879                 }
03880                     
03881                     /* If the regs structure we return has more elements than
03882                           were in the pattern, set the extra elements to -1.  If
03883                           we (re)allocated the registers, this is the case,
03884                           because we always allocate enough to have at least one
03885                           -1 at the end.  */
03886                     for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
03887                          regs->start[mcnt] = regs->end[mcnt] = -1;
03888             } /* regs && !bufp->no_sub */
03889 
03890                 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
03891                                     nfailure_points_pushed, nfailure_points_popped,
03892                                     nfailure_points_pushed - nfailure_points_popped);
03893                 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
03894 
03895                 mcnt = d - pos - (MATCHING_IN_FIRST_STRING 
03896                             ? string1 
03897                             : string2 - size1);
03898 
03899                 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
03900 
03901                 FREE_VARIABLES ();
03902                 return mcnt;
03903            }
03904 
03905          /* Otherwise match next pattern command.  */
03906          switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
03907         {
03908            /* Ignore these.  Used to ignore the n of succeed_n's which
03909                  currently have n == 0.  */
03910            case no_op:
03911                 DEBUG_PRINT1 ("EXECUTING no_op.\n");
03912                 break;
03913 
03914         case succeed:
03915                 DEBUG_PRINT1 ("EXECUTING succeed.\n");
03916           goto succeed_label;
03917 
03918            /* Match the next n pattern characters exactly.  The following
03919                  byte in the pattern defines n, and the n bytes after that
03920                  are the characters to match.  */
03921         case exactn:
03922           mcnt = *p++;
03923                 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
03924 
03925                 /* This is written out as an if-else so we don't waste time
03926                    testing `translate' inside the loop.  */
03927                 if (translate)
03928             {
03929                  do
03930                 {
03931                   PREFETCH ();
03932                   if (translate[(unsigned char) *d++] != (char) *p++)
03933                                 goto fail;
03934                 }
03935                  while (--mcnt);
03936             }
03937           else
03938             {
03939                  do
03940                 {
03941                   PREFETCH ();
03942                   if (*d++ != (char) *p++) goto fail;
03943                 }
03944                  while (--mcnt);
03945             }
03946           SET_REGS_MATCHED ();
03947                 break;
03948 
03949 
03950            /* Match any character except possibly a newline or a null.  */
03951         case anychar:
03952                 DEBUG_PRINT1 ("EXECUTING anychar.\n");
03953 
03954                 PREFETCH ();
03955 
03956                 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
03957                     || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
03958             goto fail;
03959 
03960                 SET_REGS_MATCHED ();
03961                 DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
03962                 d++;
03963           break;
03964 
03965 
03966         case charset:
03967         case charset_not:
03968           {
03969             register unsigned char c;
03970             boolean not = (re_opcode_t) *(p - 1) == charset_not;
03971 
03972                   DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
03973 
03974             PREFETCH ();
03975             c = TRANSLATE (*d); /* The character to match.  */
03976 
03977                   /* Cast to `unsigned' instead of `unsigned char' in case the
03978                         bit list is a full 32 bytes long.  */
03979             if (c < (unsigned) (*p * BYTEWIDTH)
03980                 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
03981                  not = !not;
03982 
03983             p += 1 + *p;
03984 
03985             if (!not) goto fail;
03986                   
03987             SET_REGS_MATCHED ();
03988                   d++;
03989             break;
03990           }
03991 
03992 
03993            /* The beginning of a group is represented by start_memory.
03994                  The arguments are the register number in the next byte, and the
03995                  number of groups inner to this one in the next.  The text
03996                  matched within the group is recorded (in the internal
03997                  registers data structure) under the register number.  */
03998            case start_memory:
03999           DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
04000 
04001                 /* Find out if this group can match the empty string.  */
04002           p1 = p;               /* To send to group_match_null_string_p.  */
04003                 
04004                 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
04005                   REG_MATCH_NULL_STRING_P (reg_info[*p]) 
04006                     = group_match_null_string_p (&p1, pend, reg_info);
04007 
04008                 /* Save the position in the string where we were the last time
04009                    we were at this open-group operator in case the group is
04010                    operated upon by a repetition operator, e.g., with `(a*)*b'
04011                    against `ab'; then we want to ignore where we are now in
04012                    the string in case this attempt to match fails.  */
04013                 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
04014                                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
04015                                             : regstart[*p];
04016           DEBUG_PRINT2 ("  old_regstart: %d\n", 
04017                          POINTER_TO_OFFSET (old_regstart[*p]));
04018 
04019                 regstart[*p] = d;
04020           DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
04021 
04022                 IS_ACTIVE (reg_info[*p]) = 1;
04023                 MATCHED_SOMETHING (reg_info[*p]) = 0;
04024 
04025           /* Clear this whenever we change the register activity status.  */
04026           set_regs_matched_done = 0;
04027                 
04028                 /* This is the new highest active register.  */
04029                 highest_active_reg = *p;
04030                 
04031                 /* If nothing was active before, this is the new lowest active
04032                    register.  */
04033                 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
04034                   lowest_active_reg = *p;
04035 
04036                 /* Move past the register number and inner group count.  */
04037                 p += 2;
04038           just_past_start_mem = p;
04039 
04040                 break;
04041 
04042 
04043            /* The stop_memory opcode represents the end of a group.  Its
04044                  arguments are the same as start_memory's: the register
04045                  number, and the number of inner groups.  */
04046         case stop_memory:
04047           DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
04048                    
04049                 /* We need to save the string position the last time we were at
04050                    this close-group operator in case the group is operated
04051                    upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
04052                    against `aba'; then we want to ignore where we are now in
04053                    the string in case this attempt to match fails.  */
04054                 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
04055                                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
04056                            : regend[*p];
04057           DEBUG_PRINT2 ("      old_regend: %d\n", 
04058                          POINTER_TO_OFFSET (old_regend[*p]));
04059 
04060                 regend[*p] = d;
04061           DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
04062 
04063                 /* This register isn't active anymore.  */
04064                 IS_ACTIVE (reg_info[*p]) = 0;
04065 
04066           /* Clear this whenever we change the register activity status.  */
04067           set_regs_matched_done = 0;
04068 
04069                 /* If this was the only register active, nothing is active
04070                    anymore.  */
04071                 if (lowest_active_reg == highest_active_reg)
04072                   {
04073                     lowest_active_reg = NO_LOWEST_ACTIVE_REG;
04074                     highest_active_reg = NO_HIGHEST_ACTIVE_REG;
04075                   }
04076                 else
04077                   { /* We must scan for the new highest active register, since
04078                           it isn't necessarily one less than now: consider
04079                           (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
04080                           new highest active register is 1.  */
04081                     unsigned char r = *p - 1;
04082                     while (r > 0 && !IS_ACTIVE (reg_info[r]))
04083                          r--;
04084                     
04085                     /* If we end up at register zero, that means that we saved
04086                           the registers as the result of an `on_failure_jump', not
04087                           a `start_memory', and we jumped to past the innermost
04088                           `stop_memory'.  For example, in ((.)*) we save
04089                           registers 1 and 2 as a result of the *, but when we pop
04090                           back to the second ), we are at the stop_memory 1.
04091                           Thus, nothing is active.  */
04092                  if (r == 0)
04093                          {
04094                            lowest_active_reg = NO_LOWEST_ACTIVE_REG;
04095                            highest_active_reg = NO_HIGHEST_ACTIVE_REG;
04096                          }
04097                     else
04098                          highest_active_reg = r;
04099                   }
04100                 
04101                 /* If just failed to match something this time around with a
04102                    group that's operated on by a repetition operator, try to
04103                    force exit from the ``loop'', and restore the register
04104                    information for this group that we had before trying this
04105                    last match.  */
04106                 if ((!MATCHED_SOMETHING (reg_info[*p])
04107                         || just_past_start_mem == p - 1)
04108                  && (p + 2) < pend)              
04109                   {
04110                     boolean is_a_jump_n = false;
04111                     
04112                     p1 = p + 2;
04113                     mcnt = 0;
04114                     switch ((re_opcode_t) *p1++)
04115                          {
04116                            case jump_n:
04117                     is_a_jump_n = true;
04118                            case pop_failure_jump:
04119                   case maybe_pop_jump:
04120                   case jump:
04121                   case dummy_failure_jump:
04122                                 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04123                     if (is_a_jump_n)
04124                          p1 += 2;
04125                                 break;
04126                            
04127                            default:
04128                                 /* do nothing */ ;
04129                          }
04130                  p1 += mcnt;
04131            
04132                     /* If the next operation is a jump backwards in the pattern
04133                     to an on_failure_jump right before the start_memory
04134                           corresponding to this stop_memory, exit from the loop
04135                           by forcing a failure after pushing on the stack the
04136                           on_failure_jump's jump in the pattern, and d.  */
04137                     if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
04138                            && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
04139                 {
04140                            /* If this group ever matched anything, then restore
04141                                  what its registers were before trying this last
04142                                  failed match, e.g., with `(a*)*b' against `ab' for
04143                                  regstart[1], and, e.g., with `((a*)*(b*)*)*'
04144                                  against `aba' for regend[3].
04145                                  
04146                                  Also restore the registers for inner groups for,
04147                                  e.g., `((a*)(b*))*' against `aba' (register 3 would
04148                                  otherwise get trashed).  */
04149                                  
04150                            if (EVER_MATCHED_SOMETHING (reg_info[*p]))
04151                     {
04152                          unsigned r; 
04153            
04154                                   EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
04155                                   
04156                          /* Restore this and inner groups' (if any) registers.  */
04157                                   for (r = *p; r < *p + *(p + 1); r++)
04158                                     {
04159                                          regstart[r] = old_regstart[r];
04160 
04161                                          /* xx why this test?  */
04162                                          if (old_regend[r] >= regstart[r])
04163                                            regend[r] = old_regend[r];
04164                                     }     
04165                                 }
04166                   p1++;
04167                            EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04168                            PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
04169 
04170                            goto fail;
04171                          }
04172                   }
04173                 
04174                 /* Move past the register number and the inner group count.  */
04175                 p += 2;
04176                 break;
04177 
04178 
04179         /* <digit> has been turned into a `duplicate' command which is
04180                  followed by the numeric value of <digit> as the register number.  */
04181            case duplicate:
04182           {
04183             register const char *d2, *dend2;
04184             int regno = *p++;   /* Get which register to match against.  */
04185             DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
04186 
04187             /* Can't back reference a group which we've never matched.  */
04188                   if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
04189                     goto fail;
04190                     
04191                   /* Where in input to try to start matching.  */
04192                   d2 = regstart[regno];
04193                   
04194                   /* Where to stop matching; if both the place to start and
04195                         the place to stop matching are in the same string, then
04196                         set to the place to stop, otherwise, for now have to use
04197                         the end of the first string.  */
04198 
04199                   dend2 = ((FIRST_STRING_P (regstart[regno]) 
04200                          == FIRST_STRING_P (regend[regno]))
04201                         ? regend[regno] : end_match_1);
04202             for (;;)
04203                  {
04204                 /* If necessary, advance to next segment in register
04205                             contents.  */
04206                 while (d2 == dend2)
04207                   {
04208                     if (dend2 == end_match_2) break;
04209                     if (dend2 == regend[regno]) break;
04210 
04211                                 /* End of string1 => advance to string2. */
04212                                 d2 = string2;
04213                                 dend2 = regend[regno];
04214                   }
04215                 /* At end of register contents => success */
04216                 if (d2 == dend2) break;
04217 
04218                 /* If necessary, advance to next segment in data.  */
04219                 PREFETCH ();
04220 
04221                 /* How many characters left in this segment to match.  */
04222                 mcnt = dend - d;
04223                          
04224                 /* Want how many consecutive characters we can match in
04225                             one shot, so, if necessary, adjust the count.  */
04226                          if (mcnt > dend2 - d2)
04227                   mcnt = dend2 - d2;
04228                            
04229                 /* Compare that many; failure if mismatch, else move
04230                             past them.  */
04231                 if (translate 
04232                                 ? bcmp_translate (d, d2, mcnt, translate) 
04233                                 : bcmp (d, d2, mcnt))
04234                   goto fail;
04235                 d += mcnt, d2 += mcnt;
04236 
04237                 /* Do this because we've match some characters.  */
04238                 SET_REGS_MATCHED ();
04239                  }
04240           }
04241           break;
04242 
04243 
04244            /* begline matches the empty string at the beginning of the string
04245                  (unless `not_bol' is set in `bufp'), and, if
04246                  `newline_anchor' is set, after newlines.  */
04247         case begline:
04248                 DEBUG_PRINT1 ("EXECUTING begline.\n");
04249                 
04250                 if (AT_STRINGS_BEG (d))
04251                   {
04252                     if (!bufp->not_bol) break;
04253                   }
04254                 else if (d[-1] == '\n' && bufp->newline_anchor)
04255                   {
04256                     break;
04257                   }
04258                 /* In all other cases, we fail.  */
04259                 goto fail;
04260 
04261 
04262            /* endline is the dual of begline.  */
04263         case endline:
04264                 DEBUG_PRINT1 ("EXECUTING endline.\n");
04265 
04266                 if (AT_STRINGS_END (d))
04267                   {
04268                     if (!bufp->not_eol) break;
04269                   }
04270                 
04271                 /* We have to ``prefetch'' the next character.  */
04272                 else if ((d == end1 ? *string2 : *d) == '\n'
04273                             && bufp->newline_anchor)
04274                   {
04275                     break;
04276                   }
04277                 goto fail;
04278 
04279 
04280         /* Match at the very beginning of the data.  */
04281            case begbuf:
04282                 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
04283                 if (AT_STRINGS_BEG (d))
04284                   break;
04285                 goto fail;
04286 
04287 
04288         /* Match at the very end of the data.  */
04289            case endbuf:
04290                 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
04291           if (AT_STRINGS_END (d))
04292             break;
04293                 goto fail;
04294 
04295 
04296            /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
04297                  pushes NULL as the value for the string on the stack.  Then
04298                  `pop_failure_point' will keep the current value for the
04299                  string, instead of restoring it.  To see why, consider
04300                  matching `foo\nbar' against `.*\n'.  The .* matches the foo;
04301                  then the . fails against the \n.  But the next thing we want
04302                  to do is match the \n against the \n; if we restored the
04303                  string value, we would be back at the foo.
04304                  
04305                  Because this is used only in specific cases, we don't need to
04306                  check all the things that `on_failure_jump' does, to make
04307                  sure the right things get saved on the stack.  Hence we don't
04308                  share its code.  The only reason to push anything on the
04309                  stack at all is that otherwise we would have to change
04310                  `anychar's code to do something besides goto fail in this
04311                  case; that seems worse than this.  */
04312            case on_failure_keep_string_jump:
04313                 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
04314                 
04315                 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04316                 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
04317 
04318                 PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
04319                 break;
04320 
04321 
04322         /* Uses of on_failure_jump:
04323            
04324                  Each alternative starts with an on_failure_jump that points
04325                  to the beginning of the next alternative.  Each alternative
04326                  except the last ends with a jump that in effect jumps past
04327                  the rest of the alternatives.  (They really jump to the
04328                  ending jump of the following alternative, because tensioning
04329                  these jumps is a hassle.)
04330 
04331                  Repeats start with an on_failure_jump that points past both
04332                  the repetition text and either the following jump or
04333                  pop_failure_jump back to this on_failure_jump.  */
04334         case on_failure_jump:
04335            on_failure:
04336                 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
04337 
04338                 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04339                 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
04340 
04341                 /* If this on_failure_jump comes right before a group (i.e.,
04342                    the original * applied to a group), save the information
04343                    for that group and all inner ones, so that if we fail back
04344                    to this point, the group's information will be correct.
04345                    For example, in \(a*\)*\1, we need the preceding group,
04346                    and in \(\(a*\)b*\)\2, we need the inner group.  */
04347 
04348                 /* We can't use `p' to check ahead because we push
04349                    a failure point to `p + mcnt' after we do this.  */
04350                 p1 = p;
04351 
04352                 /* We need to skip no_op's before we look for the
04353                    start_memory in case this on_failure_jump is happening as
04354                    the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
04355                    against aba.  */
04356                 while (p1 < pend && (re_opcode_t) *p1 == no_op)
04357                   p1++;
04358 
04359                 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
04360                   {
04361                     /* We have a new highest active register now.  This will
04362                           get reset at the start_memory we are about to get to,
04363                           but we will have saved all the registers relevant to
04364                           this repetition op, as described above.  */
04365                     highest_active_reg = *(p1 + 1) + *(p1 + 2);
04366                     if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
04367                          lowest_active_reg = *(p1 + 1);
04368                   }
04369 
04370                 DEBUG_PRINT1 (":\n");
04371                 PUSH_FAILURE_POINT (p + mcnt, d, -2);
04372                 break;
04373 
04374 
04375            /* A smart repeat ends with `maybe_pop_jump'.
04376            We change it to either `pop_failure_jump' or `jump'.  */
04377            case maybe_pop_jump:
04378                 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04379                 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
04380                 {
04381             register unsigned char *p2 = p;
04382 
04383                   /* Compare the beginning of the repeat with what in the
04384                         pattern follows its end. If we can establish that there
04385                         is nothing that they would both match, i.e., that we
04386                         would have to backtrack because of (as in, e.g., `a*a')
04387                         then we can change to pop_failure_jump, because we'll
04388                         never have to backtrack.
04389                         
04390                         This is not true in the case of alternatives: in
04391                         `(a|ab)*' we do need to backtrack to the `ab' alternative
04392                         (e.g., if the string was `ab').  But instead of trying to
04393                         detect that here, the alternative has put on a dummy
04394                         failure point which is what we will end up popping.  */
04395 
04396             /* Skip over open/close-group commands.
04397                   If what follows this loop is a ...+ construct,
04398                   look at what begins its body, since we will have to
04399                   match at least one of that.  */
04400             while (1)
04401                  {
04402                 if (p2 + 2 < pend
04403                     && ((re_opcode_t) *p2 == stop_memory
04404                         || (re_opcode_t) *p2 == start_memory))
04405                   p2 += 3;
04406                 else if (p2 + 6 < pend
04407                          && (re_opcode_t) *p2 == dummy_failure_jump)
04408                   p2 += 6;
04409                 else
04410                   break;
04411                  }
04412 
04413             p1 = p + mcnt;
04414             /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
04415                   to the `maybe_finalize_jump' of this case.  Examine what 
04416                   follows.  */
04417 
04418                   /* If we're at the end of the pattern, we can change.  */
04419                   if (p2 == pend)
04420                  {
04421                 /* Consider what happens when matching ":\(.*\)"
04422                    against ":/".  I don't really understand this code
04423                    yet.  */
04424                    p[-3] = (unsigned char) pop_failure_jump;
04425                          DEBUG_PRINT1
04426                            ("  End of pattern: change to `pop_failure_jump'.\n");
04427                     }
04428 
04429                   else if ((re_opcode_t) *p2 == exactn
04430                         || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
04431                  {
04432                 register unsigned char c
04433                            = *p2 == (unsigned char) endline ? '\n' : p2[2];
04434 
04435                          if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
04436                            {
04437                     p[-3] = (unsigned char) pop_failure_jump;
04438                                 DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
04439                                                     c, p1[5]);
04440                            }
04441                            
04442                 else if ((re_opcode_t) p1[3] == charset
04443                          || (re_opcode_t) p1[3] == charset_not)
04444                   {
04445                     int not = (re_opcode_t) p1[3] == charset_not;
04446                                 
04447                     if (c < (unsigned char) (p1[4] * BYTEWIDTH)
04448                         && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
04449                          not = !not;
04450 
04451                                 /* `not' is equal to 1 if c would match, which means
04452                                     that we can't change to pop_failure_jump.  */
04453                     if (!not)
04454                                   {
04455                            p[-3] = (unsigned char) pop_failure_jump;
04456                                     DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
04457                                   }
04458                   }
04459                  }
04460                   else if ((re_opcode_t) *p2 == charset)
04461                  {
04462 #ifdef DEBUG
04463                 register unsigned char c
04464                            = *p2 == (unsigned char) endline ? '\n' : p2[2];
04465 #endif
04466 
04467                          if ((re_opcode_t) p1[3] == exactn
04468                     && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
04469                           && (p2[1 + p1[4] / BYTEWIDTH]
04470                                  & (1 << (p1[4] % BYTEWIDTH)))))
04471                            {
04472                     p[-3] = (unsigned char) pop_failure_jump;
04473                                 DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
04474                                                     c, p1[5]);
04475                            }
04476                            
04477                 else if ((re_opcode_t) p1[3] == charset_not)
04478                   {
04479                     int idx;
04480                     /* We win if the charset_not inside the loop
04481                           lists every character listed in the charset after.  */
04482                     for (idx = 0; idx < (int) p2[1]; idx++)
04483                          if (! (p2[2 + idx] == 0
04484                                 || (idx < (int) p1[4]
04485                                  && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
04486                         break;
04487 
04488                     if (idx == p2[1])
04489                                   {
04490                            p[-3] = (unsigned char) pop_failure_jump;
04491                                     DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
04492                                   }
04493                   }
04494                 else if ((re_opcode_t) p1[3] == charset)
04495                   {
04496                     int idx;
04497                     /* We win if the charset inside the loop
04498                           has no overlap with the one after the loop.  */
04499                     for (idx = 0;
04500                          idx < (int) p2[1] && idx < (int) p1[4];
04501                          idx++)
04502                          if ((p2[2 + idx] & p1[5 + idx]) != 0)
04503                         break;
04504 
04505                     if (idx == p2[1] || idx == p1[4])
04506                                   {
04507                            p[-3] = (unsigned char) pop_failure_jump;
04508                                     DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
04509                                   }
04510                   }
04511                  }
04512           }
04513           p -= 2;               /* Point at relative address again.  */
04514           if ((re_opcode_t) p[-1] != pop_failure_jump)
04515             {
04516                  p[-1] = (unsigned char) jump;
04517                     DEBUG_PRINT1 ("  Match => jump.\n");
04518                  goto unconditional_jump;
04519             }
04520            /* Note fall through.  */
04521 
04522 
04523         /* The end of a simple repeat has a pop_failure_jump back to
04524                  its matching on_failure_jump, where the latter will push a
04525                  failure point.  The pop_failure_jump takes off failure
04526                  points put on by this pop_failure_jump's matching
04527                  on_failure_jump; we got through the pattern to here from the
04528                  matching on_failure_jump, so didn't fail.  */
04529            case pop_failure_jump:
04530                 {
04531                   /* We need to pass separate storage for the lowest and
04532                         highest registers, even though we don't care about the
04533                         actual values.  Otherwise, we will restore only one
04534                         register from the stack, since lowest will == highest in
04535                         `pop_failure_point'.  */
04536                   unsigned dummy_low_reg, dummy_high_reg;
04537                   unsigned char *pdummy;
04538                   const char *sdummy;
04539 
04540                   DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
04541                   POP_FAILURE_POINT (sdummy, pdummy,
04542                                                  dummy_low_reg, dummy_high_reg,
04543                                                  reg_dummy, reg_dummy, reg_info_dummy);
04544                 }
04545                 /* Note fall through.  */
04546 
04547                 
04548            /* Unconditionally jump (without popping any failure points).  */
04549            case jump:
04550         unconditional_jump:
04551           EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
04552                 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
04553           p += mcnt;                            /* Do the jump.  */
04554                 DEBUG_PRINT2 ("(to 0x%x).\n", p);
04555           break;
04556 
04557         
04558            /* We need this opcode so we can detect where alternatives end
04559                  in `group_match_null_string_p' et al.  */
04560            case jump_past_alt:
04561                 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
04562                 goto unconditional_jump;
04563 
04564 
04565            /* Normally, the on_failure_jump pushes a failure point, which
04566                  then gets popped at pop_failure_jump.  We will end up at
04567                  pop_failure_jump, also, and with a pattern of, say, `a+', we
04568                  are skipping over the on_failure_jump, so we have to push
04569                  something meaningless for pop_failure_jump to pop.  */
04570            case dummy_failure_jump:
04571                 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
04572                 /* It doesn't matter what we push for the string here.  What
04573                    the code at `fail' tests is the value for the pattern.  */
04574                 PUSH_FAILURE_POINT (0, 0, -2);
04575                 goto unconditional_jump;
04576 
04577 
04578            /* At the end of an alternative, we need to push a dummy failure
04579                  point in case we are followed by a `pop_failure_jump', because
04580                  we don't want the failure point for the alternative to be
04581                  popped.  For example, matching `(a|ab)*' against `aab'
04582                  requires that we match the `ab' alternative.  */
04583            case push_dummy_failure:
04584                 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
04585                 /* See comments just above at `dummy_failure_jump' about the
04586                    two zeroes.  */
04587                 PUSH_FAILURE_POINT (0, 0, -2);
04588                 break;
04589 
04590            /* Have to succeed matching what follows at least n times.
04591                  After that, handle like `on_failure_jump'.  */
04592            case succeed_n: 
04593                 EXTRACT_NUMBER (mcnt, p + 2);
04594                 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
04595 
04596                 assert (mcnt >= 0);
04597                 /* Originally, this is how many times we HAVE to succeed.  */
04598                 if (mcnt > 0)
04599                   {
04600                         mcnt--;
04601                   p += 2;
04602                         STORE_NUMBER_AND_INCR (p, mcnt);
04603                         DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
04604                   }
04605           else if (mcnt == 0)
04606                   {
04607                     DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
04608                  p[2] = (unsigned char) no_op;
04609                     p[3] = (unsigned char) no_op;
04610                     goto on_failure;
04611                   }
04612                 break;
04613            
04614            case jump_n: 
04615                 EXTRACT_NUMBER (mcnt, p + 2);
04616                 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
04617 
04618                 /* Originally, this is how many times we CAN jump.  */
04619                 if (mcnt)
04620                   {
04621                         mcnt--;
04622                         STORE_NUMBER (p + 2, mcnt);
04623                   goto unconditional_jump;           
04624                   }
04625                 /* If don't have to jump any more, skip over the rest of command.  */
04626           else      
04627             p += 4;                  
04628                 break;
04629            
04630         case set_number_at:
04631           {
04632                   DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
04633 
04634                   EXTRACT_NUMBER_AND_INCR (mcnt, p);
04635                   p1 = p + mcnt;
04636                   EXTRACT_NUMBER_AND_INCR (mcnt, p);
04637                   DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
04638             STORE_NUMBER (p1, mcnt);
04639                   break;
04640                 }
04641 
04642            case wordbound:
04643                 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
04644                 if (AT_WORD_BOUNDARY (d))
04645             break;
04646                 goto fail;
04647 
04648         case notwordbound:
04649                 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
04650           if (AT_WORD_BOUNDARY (d))
04651             goto fail;
04652                 break;
04653 
04654         case wordbeg:
04655                 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
04656           if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
04657             break;
04658                 goto fail;
04659 
04660         case wordend:
04661                 DEBUG_PRINT1 ("EXECUTING wordend.\n");
04662           if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
04663                     && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
04664             break;
04665                 goto fail;
04666 
04667 #ifdef emacs
04668         case before_dot:
04669                 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
04670           if (PTR_CHAR_POS ((unsigned char *) d) >= point)
04671             goto fail;
04672           break;
04673   
04674         case at_dot:
04675                 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
04676           if (PTR_CHAR_POS ((unsigned char *) d) != point)
04677             goto fail;
04678           break;
04679   
04680         case after_dot:
04681                 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
04682                 if (PTR_CHAR_POS ((unsigned char *) d) <= point)
04683             goto fail;
04684           break;
04685 #if 0 /* not emacs19 */
04686         case at_dot:
04687                 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
04688           if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
04689             goto fail;
04690           break;
04691 #endif /* not emacs19 */
04692 
04693         case syntaxspec:
04694                 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
04695           mcnt = *p++;
04696           goto matchsyntax;
04697 
04698            case wordchar:
04699                 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
04700           mcnt = (int) Sword;
04701            matchsyntax:
04702           PREFETCH ();
04703           /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
04704           d++;
04705           if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
04706             goto fail;
04707                 SET_REGS_MATCHED ();
04708           break;
04709 
04710         case notsyntaxspec:
04711                 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
04712           mcnt = *p++;
04713           goto matchnotsyntax;
04714 
04715            case notwordchar:
04716                 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
04717           mcnt = (int) Sword;
04718            matchnotsyntax:
04719           PREFETCH ();
04720           /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
04721           d++;
04722           if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
04723             goto fail;
04724           SET_REGS_MATCHED ();
04725                 break;
04726 
04727 #else /* not emacs */
04728         case wordchar:
04729                 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
04730           PREFETCH ();
04731                 if (!WORDCHAR_P (d))
04732                   goto fail;
04733           SET_REGS_MATCHED ();
04734                 d++;
04735           break;
04736           
04737         case notwordchar:
04738                 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
04739           PREFETCH ();
04740           if (WORDCHAR_P (d))
04741                   goto fail;
04742                 SET_REGS_MATCHED ();
04743                 d++;
04744           break;
04745 #endif /* not emacs */
04746                 
04747            default:
04748                 abort ();
04749         }
04750          continue;  /* Successfully executed one pattern command; keep going.  */
04751 
04752 
04753     /* We goto here if a matching operation fails. */
04754     fail:
04755          if (!FAIL_STACK_EMPTY ())
04756         { /* A restart point is known.  Restore to that state.  */
04757                 DEBUG_PRINT1 ("\nFAIL:\n");
04758                 POP_FAILURE_POINT (d, p,
04759                                             lowest_active_reg, highest_active_reg,
04760                                             regstart, regend, reg_info);
04761 
04762                 /* If this failure point is a dummy, try the next one.  */
04763                 if (!p)
04764             goto fail;
04765 
04766                 /* If we failed to the end of the pattern, don't examine *p.  */
04767           assert (p <= pend);
04768                 if (p < pend)
04769                   {
04770                     boolean is_a_jump_n = false;
04771                     
04772                     /* If failed to a backwards jump that's part of a repetition
04773                           loop, need to pop this failure point and use the next one.  */
04774                     switch ((re_opcode_t) *p)
04775                          {
04776                          case jump_n:
04777                            is_a_jump_n = true;
04778                          case maybe_pop_jump:
04779                          case pop_failure_jump:
04780                          case jump:
04781                            p1 = p + 1;
04782                            EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04783                            p1 += mcnt;  
04784 
04785                            if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
04786                                   || (!is_a_jump_n
04787                                          && (re_opcode_t) *p1 == on_failure_jump))
04788                                 goto fail;
04789                            break;
04790                          default:
04791                            /* do nothing */ ;
04792                          }
04793                   }
04794 
04795                 if (d >= string1 && d <= end1)
04796             dend = end_match_1;
04797            }
04798          else
04799            break;   /* Matching at this starting point really fails.  */
04800     } /* for (;;) */
04801 
04802   if (best_regs_set)
04803     goto restore_best_regs;
04804 
04805   FREE_VARIABLES ();
04806 
04807   return -1;                            /* Failure to match.  */
04808 } /* re_match_2 */
04809 
04810 /* Subroutine definitions for re_match_2.  */
04811 
04812 
04813 /* We are passed P pointing to a register number after a start_memory.
04814    
04815    Return true if the pattern up to the corresponding stop_memory can
04816    match the empty string, and false otherwise.
04817    
04818    If we find the matching stop_memory, sets P to point to one past its number.
04819    Otherwise, sets P to an undefined byte less than or equal to END.
04820 
04821    We don't handle duplicates properly (yet).  */
04822 
04823 static boolean
04824 group_match_null_string_p (p, end, reg_info)
04825     unsigned char **p, *end;
04826     register_info_type *reg_info;
04827 {
04828   int mcnt;
04829   /* Point to after the args to the start_memory.  */
04830   unsigned char *p1 = *p + 2;
04831   
04832   while (p1 < end)
04833     {
04834          /* Skip over opcodes that can match nothing, and return true or
04835          false, as appropriate, when we get to one that can't, or to the
04836             matching stop_memory.  */
04837          
04838          switch ((re_opcode_t) *p1)
04839            {
04840            /* Could be either a loop or a series of alternatives.  */
04841            case on_failure_jump:
04842                 p1++;
04843                 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04844                 
04845                 /* If the next operation is not a jump backwards in the
04846                 pattern.  */
04847 
04848           if (mcnt >= 0)
04849             {
04850                     /* Go through the on_failure_jumps of the alternatives,
04851                           seeing if any of the alternatives cannot match nothing.
04852                           The last alternative starts with only a jump,
04853                           whereas the rest start with on_failure_jump and end
04854                           with a jump, e.g., here is the pattern for `a|b|c':
04855 
04856                           /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
04857                           /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
04858                           /exactn/1/c                                           
04859 
04860                           So, we have to first go through the first (n-1)
04861                           alternatives and then deal with the last one separately.  */
04862 
04863 
04864                     /* Deal with the first (n-1) alternatives, which start
04865                           with an on_failure_jump (see above) that jumps to right
04866                           past a jump_past_alt.  */
04867 
04868                     while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
04869                          {
04870                            /* `mcnt' holds how many bytes long the alternative
04871                                  is, including the ending `jump_past_alt' and
04872                                  its number.  */
04873 
04874                            if (!alt_match_null_string_p (p1, p1 + mcnt - 3, 
04875                                                                   reg_info))
04876                                 return false;
04877 
04878                            /* Move to right after this alternative, including the
04879                         jump_past_alt.  */
04880                            p1 += mcnt;  
04881 
04882                            /* Break if it's the beginning of an n-th alternative
04883                                  that doesn't begin with an on_failure_jump.  */
04884                            if ((re_opcode_t) *p1 != on_failure_jump)
04885                                 break;
04886                 
04887                   /* Still have to check that it's not an n-th
04888                         alternative that starts with an on_failure_jump.  */
04889                   p1++;
04890                            EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04891                            if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
04892                                 {
04893                          /* Get to the beginning of the n-th alternative.  */
04894                                   p1 -= 3;
04895                                   break;
04896                                 }
04897                          }
04898 
04899                     /* Deal with the last alternative: go back and get number
04900                           of the `jump_past_alt' just before it.  `mcnt' contains
04901                           the length of the alternative.  */
04902                     EXTRACT_NUMBER (mcnt, p1 - 2);
04903 
04904                     if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
04905                          return false;
04906 
04907                     p1 += mcnt; /* Get past the n-th alternative.  */
04908                   } /* if mcnt > 0 */
04909                 break;
04910 
04911                 
04912            case stop_memory:
04913           assert (p1[1] == **p);
04914                 *p = p1 + 2;
04915                 return true;
04916 
04917            
04918            default: 
04919                 if (!common_op_match_null_string_p (&p1, end, reg_info))
04920                   return false;
04921            }
04922     } /* while p1 < end */
04923 
04924   return false;
04925 } /* group_match_null_string_p */
04926 
04927 
04928 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
04929    It expects P to be the first byte of a single alternative and END one
04930    byte past the last. The alternative can contain groups.  */
04931    
04932 static boolean
04933 alt_match_null_string_p (p, end, reg_info)
04934     unsigned char *p, *end;
04935     register_info_type *reg_info;
04936 {
04937   int mcnt;
04938   unsigned char *p1 = p;
04939   
04940   while (p1 < end)
04941     {
04942          /* Skip over opcodes that can match nothing, and break when we get 
04943             to one that can't.  */
04944          
04945          switch ((re_opcode_t) *p1)
04946            {
04947         /* It's a loop.  */
04948            case on_failure_jump:
04949                 p1++;
04950                 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04951                 p1 += mcnt;
04952                 break;
04953                 
04954         default: 
04955                 if (!common_op_match_null_string_p (&p1, end, reg_info))
04956                   return false;
04957            }
04958     }  /* while p1 < end */
04959 
04960   return true;
04961 } /* alt_match_null_string_p */
04962 
04963 
04964 /* Deals with the ops common to group_match_null_string_p and
04965    alt_match_null_string_p.  
04966    
04967    Sets P to one after the op and its arguments, if any.  */
04968 
04969 static boolean
04970 common_op_match_null_string_p (p, end, reg_info)
04971     unsigned char **p, *end;
04972     register_info_type *reg_info;
04973 {
04974   int mcnt;
04975   boolean ret;
04976   int reg_no;
04977   unsigned char *p1 = *p;
04978 
04979   switch ((re_opcode_t) *p1++)
04980     {
04981     case no_op:
04982     case begline:
04983     case endline:
04984     case begbuf:
04985     case endbuf:
04986     case wordbeg:
04987     case wordend:
04988     case wordbound:
04989     case notwordbound:
04990 #ifdef emacs
04991     case before_dot:
04992     case at_dot:
04993     case after_dot:
04994 #endif
04995          break;
04996 
04997     case start_memory:
04998          reg_no = *p1;
04999          assert (reg_no > 0 && reg_no <= MAX_REGNUM);
05000          ret = group_match_null_string_p (&p1, end, reg_info);
05001          
05002          /* Have to set this here in case we're checking a group which
05003             contains a group and a back reference to it.  */
05004 
05005          if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
05006            REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
05007 
05008          if (!ret)
05009            return false;
05010          break;
05011                 
05012     /* If this is an optimized succeed_n for zero times, make the jump.  */
05013     case jump:
05014          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05015          if (mcnt >= 0)
05016            p1 += mcnt;
05017          else
05018            return false;
05019          break;
05020 
05021     case succeed_n:
05022          /* Get to the number of times to succeed.  */
05023          p1 += 2;               
05024          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05025 
05026          if (mcnt == 0)
05027            {
05028                 p1 -= 4;
05029                 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05030                 p1 += mcnt;
05031            }
05032          else
05033            return false;
05034          break;
05035 
05036     case duplicate: 
05037          if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
05038            return false;
05039          break;
05040 
05041     case set_number_at:
05042          p1 += 4;
05043 
05044     default:
05045          /* All other opcodes mean we cannot match the empty string.  */
05046          return false;
05047   }
05048 
05049   *p = p1;
05050   return true;
05051 } /* common_op_match_null_string_p */
05052 
05053 
05054 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
05055    bytes; nonzero otherwise.  */
05056    
05057 static int
05058 bcmp_translate (s1, s2, len, translate)
05059         unsigned char *s1, *s2;
05060         register int len;
05061         char *translate;
05062 {
05063   register unsigned char *p1 = s1, *p2 = s2;
05064   while (len)
05065     {
05066          if (translate[*p1++] != translate[*p2++]) return 1;
05067          len--;
05068     }
05069   return 0;
05070 }
05071 
05072 /* Entry points for GNU code.  */
05073 
05074 /* re_compile_pattern is the GNU regular expression compiler: it
05075    compiles PATTERN (of length SIZE) and puts the result in BUFP.
05076    Returns 0 if the pattern was valid, otherwise an error string.
05077    
05078    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
05079    are set in BUFP on entry.
05080    
05081    We call regex_compile to do the actual compilation.  */
05082 
05083 const char *
05084 re_compile_pattern (pattern, length, bufp)
05085         const char *pattern;
05086         int length;
05087         struct re_pattern_buffer *bufp;
05088 {
05089   reg_errcode_t ret;
05090   
05091   /* GNU code is written to assume at least RE_NREGS registers will be set
05092         (and at least one extra will be -1).  */
05093   bufp->regs_allocated = REGS_UNALLOCATED;
05094   
05095   /* And GNU code determines whether or not to get register information
05096         by passing null for the REGS argument to re_match, etc., not by
05097         setting no_sub.  */
05098   bufp->no_sub = 0;
05099   
05100   /* Match anchors at newline.  */
05101   bufp->newline_anchor = 1;
05102   
05103   ret = regex_compile (pattern, length, re_syntax_options, bufp);
05104 
05105   if (!ret)
05106     return NULL;
05107   return gettext (re_error_msgid[(int) ret]);
05108 }     
05109 
05110 /* Entry points compatible with 4.2 BSD regex library.  We don't define
05111    them unless specifically requested.  */
05112 
05113 #ifdef _REGEX_RE_COMP
05114 
05115 /* BSD has one and only one pattern buffer.  */
05116 static struct re_pattern_buffer re_comp_buf;
05117 
05118 char *
05119 re_comp (s)
05120     const char *s;
05121 {
05122   reg_errcode_t ret;
05123   
05124   if (!s)
05125     {
05126          if (!re_comp_buf.buffer)
05127         return gettext ("No previous regular expression");
05128          return 0;
05129     }
05130 
05131   if (!re_comp_buf.buffer)
05132     {
05133          re_comp_buf.buffer = (unsigned char *) malloc (200);
05134          if (re_comp_buf.buffer == NULL)
05135            return gettext (re_error_msgid[(int) REG_ESPACE]);
05136          re_comp_buf.allocated = 200;
05137 
05138          re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
05139          if (re_comp_buf.fastmap == NULL)
05140         return gettext (re_error_msgid[(int) REG_ESPACE]);
05141     }
05142 
05143   /* Since `re_exec' always passes NULL for the `regs' argument, we
05144         don't need to initialize the pattern buffer fields which affect it.  */
05145 
05146   /* Match anchors at newlines.  */
05147   re_comp_buf.newline_anchor = 1;
05148 
05149   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
05150   
05151   if (!ret)
05152     return NULL;
05153 
05154   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
05155   return (char *) gettext (re_error_msgid[(int) ret]);
05156 }
05157 
05158 
05159 int
05160 re_exec (s)
05161     const char *s;
05162 {
05163   const int len = strlen (s);
05164   return
05165     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
05166 }
05167 #endif /* _REGEX_RE_COMP */
05168 
05169 /* POSIX.2 functions.  Don't define these for Emacs.  */
05170 
05171 #ifndef emacs
05172 
05173 /* regcomp takes a regular expression as a string and compiles it.
05174 
05175    PREG is a regex_t *.  We do not expect any fields to be initialized,
05176    since POSIX says we shouldn't.  Thus, we set
05177 
05178         `buffer' to the compiled pattern;
05179         `used' to the length of the compiled pattern;
05180         `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
05181           REG_EXTENDED bit in CFLAGS is set; otherwise, to
05182           RE_SYNTAX_POSIX_BASIC;
05183         `newline_anchor' to REG_NEWLINE being set in CFLAGS;
05184         `fastmap' and `fastmap_accurate' to zero;
05185         `re_nsub' to the number of subexpressions in PATTERN.
05186 
05187    PATTERN is the address of the pattern string.
05188 
05189    CFLAGS is a series of bits which affect compilation.
05190 
05191         If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
05192         use POSIX basic syntax.
05193 
05194         If REG_NEWLINE is set, then . and [^...] don't match newline.
05195         Also, regexec will try a match beginning after every newline.
05196 
05197         If REG_ICASE is set, then we considers upper- and lowercase
05198         versions of letters to be equivalent when matching.
05199 
05200         If REG_NOSUB is set, then when PREG is passed to regexec, that
05201         routine will report only success or failure, and nothing about the
05202         registers.
05203 
05204    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
05205    the return codes and their meanings.)  */
05206 
05207 int
05208 regcomp (preg, pattern, cflags)
05209     regex_t *preg;
05210     const char *pattern; 
05211     int cflags;
05212 {
05213   reg_errcode_t ret;
05214   unsigned syntax
05215     = (cflags & REG_EXTENDED) ?
05216          RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
05217 
05218   /* regex_compile will allocate the space for the compiled pattern.  */
05219   preg->buffer = 0;
05220   preg->allocated = 0;
05221   preg->used = 0;
05222   
05223   /* Don't bother to use a fastmap when searching.  This simplifies the
05224         REG_NEWLINE case: if we used a fastmap, we'd have to put all the
05225         characters after newlines into the fastmap.  This way, we just try
05226         every character.  */
05227   preg->fastmap = 0;
05228   
05229   if (cflags & REG_ICASE)
05230     {
05231          unsigned i;
05232          
05233          preg->translate = (char *) malloc (CHAR_SET_SIZE);
05234          if (preg->translate == NULL)
05235            return (int) REG_ESPACE;
05236 
05237          /* Map uppercase characters to corresponding lowercase ones.  */
05238          for (i = 0; i < CHAR_SET_SIZE; i++)
05239            preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
05240     }
05241   else
05242     preg->translate = NULL;
05243 
05244   /* If REG_NEWLINE is set, newlines are treated differently.  */
05245   if (cflags & REG_NEWLINE)
05246     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
05247          syntax &= ~RE_DOT_NEWLINE;
05248          syntax |= RE_HAT_LISTS_NOT_NEWLINE;
05249          /* It also changes the matching behavior.  */
05250          preg->newline_anchor = 1;
05251     }
05252   else
05253     preg->newline_anchor = 0;
05254 
05255   preg->no_sub = !!(cflags & REG_NOSUB);
05256 
05257   /* POSIX says a null character in the pattern terminates it, so we 
05258         can use strlen here in compiling the pattern.  */
05259   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
05260   
05261   /* POSIX doesn't distinguish between an unmatched open-group and an
05262         unmatched close-group: both are REG_EPAREN.  */
05263   if (ret == REG_ERPAREN) ret = REG_EPAREN;
05264   
05265   return (int) ret;
05266 }
05267 
05268 
05269 /* regexec searches for a given pattern, specified by PREG, in the
05270    string STRING.
05271    
05272    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
05273    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
05274    least NMATCH elements, and we set them to the offsets of the
05275    corresponding matched substrings.
05276    
05277    EFLAGS specifies `execution flags' which affect matching: if
05278    REG_NOTBOL is set, then ^ does not match at the beginning of the
05279    string; if REG_NOTEOL is set, then $ does not match at the end.
05280    
05281    We return 0 if we find a match and REG_NOMATCH if not.  */
05282 
05283 int
05284 regexec (preg, string, nmatch, pmatch, eflags)
05285     const regex_t *preg;
05286     const char *string; 
05287     size_t nmatch; 
05288     regmatch_t pmatch[]; 
05289     int eflags;
05290 {
05291   int ret;
05292   struct re_registers regs;
05293   regex_t private_preg;
05294   int len = strlen (string);
05295   boolean want_reg_info = !preg->no_sub && nmatch > 0;
05296 
05297   private_preg = *preg;
05298   
05299   private_preg.not_bol = !!(eflags & REG_NOTBOL);
05300   private_preg.not_eol = !!(eflags & REG_NOTEOL);
05301   
05302   /* The user has told us exactly how many registers to return
05303         information about, via `nmatch'.  We have to pass that on to the
05304         matching routines.  */
05305   private_preg.regs_allocated = REGS_FIXED;
05306   
05307   if (want_reg_info)
05308     {
05309          regs.num_regs = nmatch;
05310          regs.start = TALLOC (nmatch, regoff_t);
05311          regs.end = TALLOC (nmatch, regoff_t);
05312          if (regs.start == NULL || regs.end == NULL)
05313            return (int) REG_NOMATCH;
05314     }
05315 
05316   /* Perform the searching operation.  */
05317   ret = re_search (&private_preg, string, len,
05318                             /* start: */ 0, /* range: */ len,
05319                             want_reg_info ? &regs : (struct re_registers *) 0);
05320   
05321   /* Copy the register information to the POSIX structure.  */
05322   if (want_reg_info)
05323     {
05324          if (ret >= 0)
05325            {
05326                 unsigned r;
05327 
05328                 for (r = 0; r < nmatch; r++)
05329                   {
05330                     pmatch[r].rm_so = regs.start[r];
05331                     pmatch[r].rm_eo = regs.end[r];
05332                   }
05333            }
05334 
05335          /* If we needed the temporary register info, free the space now.  */
05336          free (regs.start);
05337          free (regs.end);
05338     }
05339 
05340   /* We want zero return to mean success, unlike `re_search'.  */
05341   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
05342 }
05343 
05344 
05345 /* Returns a message corresponding to an error code, ERRCODE, returned
05346    from either regcomp or regexec.   We don't use PREG here.  */
05347 
05348 size_t
05349 regerror (errcode, preg, errbuf, errbuf_size)
05350     int errcode;
05351     const regex_t *preg;
05352     char *errbuf;
05353     size_t errbuf_size;
05354 {
05355   const char *msg;
05356   size_t msg_size;
05357 
05358   if (errcode < 0
05359          || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
05360     /* Only error codes returned by the rest of the code should be passed 
05361           to this routine.  If we are given anything else, or if other regex
05362           code generates an invalid error code, then the program has a bug.
05363           Dump core so we can fix it.  */
05364     abort ();
05365 
05366   msg = gettext (re_error_msgid[errcode]);
05367 
05368   msg_size = strlen (msg) + 1; /* Includes the null.  */
05369   
05370   if (errbuf_size != 0)
05371     {
05372          if (msg_size > errbuf_size)
05373            {
05374                 strncpy (errbuf, msg, errbuf_size - 1);
05375                 errbuf[errbuf_size - 1] = 0;
05376            }
05377          else
05378            strcpy (errbuf, msg);
05379     }
05380 
05381   return msg_size;
05382 }
05383 
05384 
05385 /* Free dynamically allocated space used by PREG.  */
05386 
05387 void
05388 regfree (preg)
05389     regex_t *preg;
05390 {
05391   if (preg->buffer != NULL)
05392     free (preg->buffer);
05393   preg->buffer = NULL;
05394   
05395   preg->allocated = 0;
05396   preg->used = 0;
05397 
05398   if (preg->fastmap != NULL)
05399     free (preg->fastmap);
05400   preg->fastmap = NULL;
05401   preg->fastmap_accurate = 0;
05402 
05403   if (preg->translate != NULL)
05404     free (preg->translate);
05405   preg->translate = NULL;
05406 }
05407 
05408 #endif /* not emacs  */
05409 
05410 /*
05411 Local variables:
05412 make-backup-files: t
05413 version-control: t
05414 trim-versions-without-asking: nil
05415 End:
05416 */

Generated on Fri May 7 21:15:43 2004 by doxygen 1.3.5