00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #if defined (_AIX) && !defined (REGEX_MALLOC)
00024 #pragma alloca
00025 #endif
00026
00027 #define _GNU_SOURCE
00028
00029 #ifdef HAVE_CONFIG_H
00030 #include <config.h>
00031 #endif
00032
00033
00034 #include <sys/types.h>
00035
00036
00037 #if HAVE_LIBINTL_H || defined (_LIBC)
00038 # include <libintl.h>
00039 #else
00040 # define gettext(msgid) (msgid)
00041 #endif
00042
00043
00044
00045 #ifdef emacs
00046
00047 #include "lisp.h"
00048 #include "buffer.h"
00049 #include "syntax.h"
00050
00051 #else
00052
00053
00054
00055
00056 #undef REL_ALLOC
00057
00058 #if defined (STDC_HEADERS) || defined (_LIBC)
00059 #include <stdlib.h>
00060 #else
00061 char *malloc ();
00062 char *realloc ();
00063 #endif
00064
00065
00066
00067 #ifdef INHIBIT_STRING_HEADER
00068 #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
00069 #if !defined (bzero) && !defined (bcopy)
00070 #undef INHIBIT_STRING_HEADER
00071 #endif
00072 #endif
00073 #endif
00074
00075
00076
00077
00078 #ifndef INHIBIT_STRING_HEADER
00079
00080 #include <string.h>
00081 #ifndef bcmp
00082 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
00083 #endif
00084 #ifndef bcopy
00085 #define bcopy(s, d, n) memcpy ((d), (s), (n))
00086 #endif
00087 #ifndef bzero
00088 #define bzero(s, n) memset ((s), 0, (n))
00089 #endif
00090
00091
00092
00093 #endif
00094
00095
00096
00097
00098
00099 #ifndef Sword
00100 #define Sword 1
00101 #endif
00102
00103 #ifdef SWITCH_ENUM_BUG
00104 #define SWITCH_ENUM_CAST(x) ((int)(x))
00105 #else
00106 #define SWITCH_ENUM_CAST(x) (x)
00107 #endif
00108
00109 #ifdef SYNTAX_TABLE
00110
00111 extern char *re_syntax_table;
00112
00113 #else
00114
00115
00116 #define CHAR_SET_SIZE 256
00117
00118 static char re_syntax_table[CHAR_SET_SIZE];
00119
00120 static void
00121 init_syntax_once ()
00122 {
00123 register int c;
00124 static int done = 0;
00125
00126 if (done)
00127 return;
00128
00129 bzero (re_syntax_table, sizeof re_syntax_table);
00130
00131 for (c = 'a'; c <= 'z'; c++)
00132 re_syntax_table[c] = Sword;
00133
00134 for (c = 'A'; c <= 'Z'; c++)
00135 re_syntax_table[c] = Sword;
00136
00137 for (c = '0'; c <= '9'; c++)
00138 re_syntax_table[c] = Sword;
00139
00140 re_syntax_table['_'] = Sword;
00141
00142 done = 1;
00143 }
00144
00145 #endif
00146
00147 #define SYNTAX(c) re_syntax_table[c]
00148
00149 #endif
00150
00151
00152 #include "regex.h"
00153
00154
00155 #include <ctype.h>
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
00169 #define ISASCII(c) 1
00170 #else
00171 #define ISASCII(c) isascii(c)
00172 #endif
00173
00174 #ifdef isblank
00175 #define ISBLANK(c) (ISASCII (c) && isblank (c))
00176 #else
00177 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
00178 #endif
00179 #ifdef isgraph
00180 #define ISGRAPH(c) (ISASCII (c) && isgraph (c))
00181 #else
00182 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
00183 #endif
00184
00185 #define ISPRINT(c) (ISASCII (c) && isprint (c))
00186 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
00187 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
00188 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
00189 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
00190 #define ISLOWER(c) (ISASCII (c) && islower (c))
00191 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
00192 #define ISSPACE(c) (ISASCII (c) && isspace (c))
00193 #define ISUPPER(c) (ISASCII (c) && isupper (c))
00194 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
00195
00196 #ifndef NULL
00197 #define NULL (void *)0
00198 #endif
00199
00200
00201
00202
00203
00204 #undef SIGN_EXTEND_CHAR
00205 #if __STDC__
00206 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
00207 #else
00208
00209 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
00210 #endif
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222 #ifdef REGEX_MALLOC
00223
00224 #define REGEX_ALLOCATE malloc
00225 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
00226 #define REGEX_FREE free
00227
00228 #else
00229
00230
00231 #ifndef alloca
00232
00233
00234 #ifdef __GNUC__
00235 #define alloca __builtin_alloca
00236 #else
00237 #if HAVE_ALLOCA_H
00238 #include <alloca.h>
00239 #else
00240 #ifndef _AIX
00241 char *alloca ();
00242 #endif
00243 #endif
00244 #endif
00245
00246 #endif
00247
00248 #define REGEX_ALLOCATE alloca
00249
00250
00251 #define REGEX_REALLOCATE(source, osize, nsize) \
00252 (destination = (char *) alloca (nsize), \
00253 bcopy (source, destination, osize), \
00254 destination)
00255
00256
00257 #define REGEX_FREE(arg) ((void)0)
00258
00259 #endif
00260
00261
00262
00263 #ifdef REL_ALLOC
00264 #define REGEX_ALLOCATE_STACK(size) \
00265 r_alloc (&failure_stack_ptr, (size))
00266 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \
00267 r_re_alloc (&failure_stack_ptr, (nsize))
00268 #define REGEX_FREE_STACK(ptr) \
00269 r_alloc_free (&failure_stack_ptr)
00270
00271 #else
00272
00273 #ifdef REGEX_MALLOC
00274
00275 #define REGEX_ALLOCATE_STACK malloc
00276 #define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
00277 #define REGEX_FREE_STACK free
00278
00279 #else
00280
00281 #define REGEX_ALLOCATE_STACK alloca
00282
00283 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \
00284 REGEX_REALLOCATE (source, osize, nsize)
00285
00286 #define REGEX_FREE_STACK(arg)
00287
00288 #endif
00289 #endif
00290
00291
00292
00293
00294
00295 #define FIRST_STRING_P(ptr) \
00296 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
00297
00298
00299 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
00300 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
00301 #define RETALLOC_IF(addr, n, t) \
00302 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
00303 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
00304
00305 #define BYTEWIDTH 8
00306
00307 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
00308
00309 #undef MAX
00310 #undef MIN
00311 #define MAX(a, b) ((a) > (b) ? (a) : (b))
00312 #define MIN(a, b) ((a) < (b) ? (a) : (b))
00313
00314 typedef char boolean;
00315 #define false 0
00316 #define true 1
00317
00318 static int re_match_2_internal ();
00319
00320
00321
00322
00323
00324
00325 typedef enum
00326 {
00327 no_op = 0,
00328
00329
00330 succeed,
00331
00332
00333 exactn,
00334
00335
00336 anychar,
00337
00338
00339
00340
00341
00342
00343
00344 charset,
00345
00346
00347
00348 charset_not,
00349
00350
00351
00352
00353
00354
00355
00356
00357 start_memory,
00358
00359
00360
00361
00362
00363
00364
00365
00366 stop_memory,
00367
00368
00369
00370 duplicate,
00371
00372
00373 begline,
00374
00375
00376 endline,
00377
00378
00379
00380 begbuf,
00381
00382
00383 endbuf,
00384
00385
00386 jump,
00387
00388
00389 jump_past_alt,
00390
00391
00392
00393 on_failure_jump,
00394
00395
00396
00397 on_failure_keep_string_jump,
00398
00399
00400
00401 pop_failure_jump,
00402
00403
00404
00405
00406
00407
00408
00409
00410 maybe_pop_jump,
00411
00412
00413
00414
00415
00416
00417 dummy_failure_jump,
00418
00419
00420
00421 push_dummy_failure,
00422
00423
00424
00425 succeed_n,
00426
00427
00428
00429 jump_n,
00430
00431
00432
00433
00434 set_number_at,
00435
00436 wordchar,
00437 notwordchar,
00438
00439 wordbeg,
00440 wordend,
00441
00442 wordbound,
00443 notwordbound
00444
00445 #ifdef emacs
00446 ,before_dot,
00447 at_dot,
00448 after_dot,
00449
00450
00451
00452 syntaxspec,
00453
00454
00455 notsyntaxspec
00456 #endif
00457 } re_opcode_t;
00458
00459
00460
00461
00462
00463 #define STORE_NUMBER(destination, number) \
00464 do { \
00465 (destination)[0] = (number) & 0377; \
00466 (destination)[1] = (number) >> 8; \
00467 } while (0)
00468
00469
00470
00471
00472
00473 #define STORE_NUMBER_AND_INCR(destination, number) \
00474 do { \
00475 STORE_NUMBER (destination, number); \
00476 (destination) += 2; \
00477 } while (0)
00478
00479
00480
00481
00482 #define EXTRACT_NUMBER(destination, source) \
00483 do { \
00484 (destination) = *(source) & 0377; \
00485 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
00486 } while (0)
00487
00488 #ifdef DEBUG
00489 static void
00490 extract_number (dest, source)
00491 int *dest;
00492 unsigned char *source;
00493 {
00494 int temp = SIGN_EXTEND_CHAR (*(source + 1));
00495 *dest = *source & 0377;
00496 *dest += temp << 8;
00497 }
00498
00499 #ifndef EXTRACT_MACROS
00500 #undef EXTRACT_NUMBER
00501 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
00502 #endif
00503
00504 #endif
00505
00506
00507
00508
00509 #define EXTRACT_NUMBER_AND_INCR(destination, source) \
00510 do { \
00511 EXTRACT_NUMBER (destination, source); \
00512 (source) += 2; \
00513 } while (0)
00514
00515 #ifdef DEBUG
00516 static void
00517 extract_number_and_incr (destination, source)
00518 int *destination;
00519 unsigned char **source;
00520 {
00521 extract_number (destination, *source);
00522 *source += 2;
00523 }
00524
00525 #ifndef EXTRACT_MACROS
00526 #undef EXTRACT_NUMBER_AND_INCR
00527 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
00528 extract_number_and_incr (&dest, &src)
00529 #endif
00530
00531 #endif
00532
00533
00534
00535
00536
00537
00538
00539 #ifdef DEBUG
00540
00541
00542 #include <stdio.h>
00543
00544
00545 #include <assert.h>
00546
00547 static int debug = 0;
00548
00549 #define DEBUG_STATEMENT(e) e
00550 #define DEBUG_PRINT1(x) if (debug) printf (x)
00551 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
00552 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
00553 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
00554 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
00555 if (debug) print_partial_compiled_pattern (s, e)
00556 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
00557 if (debug) print_double_string (w, s1, sz1, s2, sz2)
00558
00559
00560
00561
00562 void
00563 print_fastmap (fastmap)
00564 char *fastmap;
00565 {
00566 unsigned was_a_range = 0;
00567 unsigned i = 0;
00568
00569 while (i < (1 << BYTEWIDTH))
00570 {
00571 if (fastmap[i++])
00572 {
00573 was_a_range = 0;
00574 putchar (i - 1);
00575 while (i < (1 << BYTEWIDTH) && fastmap[i])
00576 {
00577 was_a_range = 1;
00578 i++;
00579 }
00580 if (was_a_range)
00581 {
00582 printf ("-");
00583 putchar (i - 1);
00584 }
00585 }
00586 }
00587 putchar ('\n');
00588 }
00589
00590
00591
00592
00593
00594 void
00595 print_partial_compiled_pattern (start, end)
00596 unsigned char *start;
00597 unsigned char *end;
00598 {
00599 int mcnt, mcnt2;
00600 unsigned char *p = start;
00601 unsigned char *pend = end;
00602
00603 if (start == NULL)
00604 {
00605 printf ("(null)\n");
00606 return;
00607 }
00608
00609
00610 while (p < pend)
00611 {
00612 printf ("%d:\t", p - start);
00613
00614 switch ((re_opcode_t) *p++)
00615 {
00616 case no_op:
00617 printf ("/no_op");
00618 break;
00619
00620 case exactn:
00621 mcnt = *p++;
00622 printf ("/exactn/%d", mcnt);
00623 do
00624 {
00625 putchar ('/');
00626 putchar (*p++);
00627 }
00628 while (--mcnt);
00629 break;
00630
00631 case start_memory:
00632 mcnt = *p++;
00633 printf ("/start_memory/%d/%d", mcnt, *p++);
00634 break;
00635
00636 case stop_memory:
00637 mcnt = *p++;
00638 printf ("/stop_memory/%d/%d", mcnt, *p++);
00639 break;
00640
00641 case duplicate:
00642 printf ("/duplicate/%d", *p++);
00643 break;
00644
00645 case anychar:
00646 printf ("/anychar");
00647 break;
00648
00649 case charset:
00650 case charset_not:
00651 {
00652 register int c, last = -100;
00653 register int in_range = 0;
00654
00655 printf ("/charset [%s",
00656 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
00657
00658 assert (p + *p < pend);
00659
00660 for (c = 0; c < 256; c++)
00661 if (c / 8 < *p
00662 && (p[1 + (c/8)] & (1 << (c % 8))))
00663 {
00664
00665 if (last + 1 == c && ! in_range)
00666 {
00667 putchar ('-');
00668 in_range = 1;
00669 }
00670
00671 else if (last + 1 != c && in_range)
00672 {
00673 putchar (last);
00674 in_range = 0;
00675 }
00676
00677 if (! in_range)
00678 putchar (c);
00679
00680 last = c;
00681 }
00682
00683 if (in_range)
00684 putchar (last);
00685
00686 putchar (']');
00687
00688 p += 1 + *p;
00689 }
00690 break;
00691
00692 case begline:
00693 printf ("/begline");
00694 break;
00695
00696 case endline:
00697 printf ("/endline");
00698 break;
00699
00700 case on_failure_jump:
00701 extract_number_and_incr (&mcnt, &p);
00702 printf ("/on_failure_jump to %d", p + mcnt - start);
00703 break;
00704
00705 case on_failure_keep_string_jump:
00706 extract_number_and_incr (&mcnt, &p);
00707 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
00708 break;
00709
00710 case dummy_failure_jump:
00711 extract_number_and_incr (&mcnt, &p);
00712 printf ("/dummy_failure_jump to %d", p + mcnt - start);
00713 break;
00714
00715 case push_dummy_failure:
00716 printf ("/push_dummy_failure");
00717 break;
00718
00719 case maybe_pop_jump:
00720 extract_number_and_incr (&mcnt, &p);
00721 printf ("/maybe_pop_jump to %d", p + mcnt - start);
00722 break;
00723
00724 case pop_failure_jump:
00725 extract_number_and_incr (&mcnt, &p);
00726 printf ("/pop_failure_jump to %d", p + mcnt - start);
00727 break;
00728
00729 case jump_past_alt:
00730 extract_number_and_incr (&mcnt, &p);
00731 printf ("/jump_past_alt to %d", p + mcnt - start);
00732 break;
00733
00734 case jump:
00735 extract_number_and_incr (&mcnt, &p);
00736 printf ("/jump to %d", p + mcnt - start);
00737 break;
00738
00739 case succeed_n:
00740 extract_number_and_incr (&mcnt, &p);
00741 extract_number_and_incr (&mcnt2, &p);
00742 printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
00743 break;
00744
00745 case jump_n:
00746 extract_number_and_incr (&mcnt, &p);
00747 extract_number_and_incr (&mcnt2, &p);
00748 printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
00749 break;
00750
00751 case set_number_at:
00752 extract_number_and_incr (&mcnt, &p);
00753 extract_number_and_incr (&mcnt2, &p);
00754 printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
00755 break;
00756
00757 case wordbound:
00758 printf ("/wordbound");
00759 break;
00760
00761 case notwordbound:
00762 printf ("/notwordbound");
00763 break;
00764
00765 case wordbeg:
00766 printf ("/wordbeg");
00767 break;
00768
00769 case wordend:
00770 printf ("/wordend");
00771
00772 #ifdef emacs
00773 case before_dot:
00774 printf ("/before_dot");
00775 break;
00776
00777 case at_dot:
00778 printf ("/at_dot");
00779 break;
00780
00781 case after_dot:
00782 printf ("/after_dot");
00783 break;
00784
00785 case syntaxspec:
00786 printf ("/syntaxspec");
00787 mcnt = *p++;
00788 printf ("/%d", mcnt);
00789 break;
00790
00791 case notsyntaxspec:
00792 printf ("/notsyntaxspec");
00793 mcnt = *p++;
00794 printf ("/%d", mcnt);
00795 break;
00796 #endif
00797
00798 case wordchar:
00799 printf ("/wordchar");
00800 break;
00801
00802 case notwordchar:
00803 printf ("/notwordchar");
00804 break;
00805
00806 case begbuf:
00807 printf ("/begbuf");
00808 break;
00809
00810 case endbuf:
00811 printf ("/endbuf");
00812 break;
00813
00814 default:
00815 printf ("?%d", *(p-1));
00816 }
00817
00818 putchar ('\n');
00819 }
00820
00821 printf ("%d:\tend of pattern.\n", p - start);
00822 }
00823
00824
00825 void
00826 print_compiled_pattern (bufp)
00827 struct re_pattern_buffer *bufp;
00828 {
00829 unsigned char *buffer = bufp->buffer;
00830
00831 print_partial_compiled_pattern (buffer, buffer + bufp->used);
00832 printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
00833
00834 if (bufp->fastmap_accurate && bufp->fastmap)
00835 {
00836 printf ("fastmap: ");
00837 print_fastmap (bufp->fastmap);
00838 }
00839
00840 printf ("re_nsub: %d\t", bufp->re_nsub);
00841 printf ("regs_alloc: %d\t", bufp->regs_allocated);
00842 printf ("can_be_null: %d\t", bufp->can_be_null);
00843 printf ("newline_anchor: %d\n", bufp->newline_anchor);
00844 printf ("no_sub: %d\t", bufp->no_sub);
00845 printf ("not_bol: %d\t", bufp->not_bol);
00846 printf ("not_eol: %d\t", bufp->not_eol);
00847 printf ("syntax: %d\n", bufp->syntax);
00848
00849 }
00850
00851
00852 void
00853 print_double_string (where, string1, size1, string2, size2)
00854 const char *where;
00855 const char *string1;
00856 const char *string2;
00857 int size1;
00858 int size2;
00859 {
00860 unsigned this_char;
00861
00862 if (where == NULL)
00863 printf ("(null)");
00864 else
00865 {
00866 if (FIRST_STRING_P (where))
00867 {
00868 for (this_char = where - string1; this_char < size1; this_char++)
00869 putchar (string1[this_char]);
00870
00871 where = string2;
00872 }
00873
00874 for (this_char = where - string2; this_char < size2; this_char++)
00875 putchar (string2[this_char]);
00876 }
00877 }
00878
00879 #else
00880
00881 #undef assert
00882 #define assert(e)
00883
00884 #define DEBUG_STATEMENT(e)
00885 #define DEBUG_PRINT1(x)
00886 #define DEBUG_PRINT2(x1, x2)
00887 #define DEBUG_PRINT3(x1, x2, x3)
00888 #define DEBUG_PRINT4(x1, x2, x3, x4)
00889 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
00890 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
00891
00892 #endif
00893
00894
00895
00896
00897
00898
00899 reg_syntax_t re_syntax_options;
00900
00901
00902
00903
00904
00905
00906
00907
00908
00909 reg_syntax_t
00910 re_set_syntax (syntax)
00911 reg_syntax_t syntax;
00912 {
00913 reg_syntax_t ret = re_syntax_options;
00914
00915 re_syntax_options = syntax;
00916 return ret;
00917 }
00918
00919
00920
00921
00922
00923
00924 static const char *re_error_msgid[] =
00925 { "Success",
00926 "No match",
00927 "Invalid regular expression",
00928 "Invalid collation character",
00929 "Invalid character class name",
00930 "Trailing backslash",
00931 "Invalid back reference",
00932 "Unmatched [ or [^",
00933 "Unmatched ( or \\(",
00934 "Unmatched \\{",
00935 "Invalid content of \\{\\}",
00936 "Invalid range end",
00937 "Memory exhausted",
00938 "Invalid preceding regular expression",
00939 "Premature end of regular expression",
00940 "Regular expression too big",
00941 "Unmatched ) or \\)",
00942 };
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961
00962
00963
00964 #define MATCH_MAY_ALLOCATE
00965
00966
00967
00968 #ifdef __GNUC__
00969 #undef C_ALLOCA
00970 #endif
00971
00972
00973
00974
00975
00976
00977 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
00978 #undef MATCH_MAY_ALLOCATE
00979 #endif
00980
00981
00982
00983
00984
00985
00986
00987
00988
00989
00990 #ifndef INIT_FAILURE_ALLOC
00991 #define INIT_FAILURE_ALLOC 5
00992 #endif
00993
00994
00995
00996
00997
00998 #if defined (MATCH_MAY_ALLOCATE)
00999 int re_max_failures = 200000;
01000 #else
01001 int re_max_failures = 2000;
01002 #endif
01003
01004 union fail_stack_elt
01005 {
01006 unsigned char *pointer;
01007 int integer;
01008 };
01009
01010 typedef union fail_stack_elt fail_stack_elt_t;
01011
01012 typedef struct
01013 {
01014 fail_stack_elt_t *stack;
01015 unsigned size;
01016 unsigned avail;
01017 } fail_stack_type;
01018
01019 #define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
01020 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
01021 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
01022
01023
01024
01025
01026
01027 #ifdef MATCH_MAY_ALLOCATE
01028 #define INIT_FAIL_STACK() \
01029 do { \
01030 fail_stack.stack = (fail_stack_elt_t *) \
01031 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
01032 \
01033 if (fail_stack.stack == NULL) \
01034 return -2; \
01035 \
01036 fail_stack.size = INIT_FAILURE_ALLOC; \
01037 fail_stack.avail = 0; \
01038 } while (0)
01039
01040 #define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
01041 #else
01042 #define INIT_FAIL_STACK() \
01043 do { \
01044 fail_stack.avail = 0; \
01045 } while (0)
01046
01047 #define RESET_FAIL_STACK()
01048 #endif
01049
01050
01051
01052
01053
01054
01055
01056
01057
01058 #define DOUBLE_FAIL_STACK(fail_stack) \
01059 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
01060 ? 0 \
01061 : ((fail_stack).stack = (fail_stack_elt_t *) \
01062 REGEX_REALLOCATE_STACK ((fail_stack).stack, \
01063 (fail_stack).size * sizeof (fail_stack_elt_t), \
01064 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
01065 \
01066 (fail_stack).stack == NULL \
01067 ? 0 \
01068 : ((fail_stack).size <<= 1, \
01069 1)))
01070
01071
01072
01073
01074
01075 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
01076 ((FAIL_STACK_FULL () \
01077 && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
01078 ? 0 \
01079 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
01080 1))
01081
01082
01083
01084
01085 #define PUSH_FAILURE_POINTER(item) \
01086 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
01087
01088
01089
01090
01091 #define PUSH_FAILURE_INT(item) \
01092 fail_stack.stack[fail_stack.avail++].integer = (item)
01093
01094
01095
01096
01097 #define PUSH_FAILURE_ELT(item) \
01098 fail_stack.stack[fail_stack.avail++] = (item)
01099
01100
01101
01102 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
01103 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
01104 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
01105
01106
01107 #ifdef DEBUG
01108 #define DEBUG_PUSH PUSH_FAILURE_INT
01109 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
01110 #else
01111 #define DEBUG_PUSH(item)
01112 #define DEBUG_POP(item_addr)
01113 #endif
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123
01124
01125 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
01126 do { \
01127 char *destination; \
01128
01129 \
01130 int this_reg; \
01131 \
01132 destination = 0; \
01133 DEBUG_STATEMENT (failure_id++); \
01134 DEBUG_STATEMENT (nfailure_points_pushed++); \
01135 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
01136 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
01137 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
01138 \
01139 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
01140 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
01141 \
01142 \
01143 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
01144 { \
01145 if (!DOUBLE_FAIL_STACK (fail_stack)) \
01146 return failure_code; \
01147 \
01148 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
01149 (fail_stack).size); \
01150 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
01151 } \
01152 \
01153 \
01154 DEBUG_PRINT1 ("\n"); \
01155 \
01156 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
01157 this_reg++) \
01158 { \
01159 DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
01160 DEBUG_STATEMENT (num_regs_pushed++); \
01161 \
01162 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
01163 PUSH_FAILURE_POINTER (regstart[this_reg]); \
01164 \
01165 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
01166 PUSH_FAILURE_POINTER (regend[this_reg]); \
01167 \
01168 DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
01169 DEBUG_PRINT2 (" match_null=%d", \
01170 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
01171 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
01172 DEBUG_PRINT2 (" matched_something=%d", \
01173 MATCHED_SOMETHING (reg_info[this_reg])); \
01174 DEBUG_PRINT2 (" ever_matched=%d", \
01175 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
01176 DEBUG_PRINT1 ("\n"); \
01177 PUSH_FAILURE_ELT (reg_info[this_reg].word); \
01178 } \
01179 \
01180 DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
01181 PUSH_FAILURE_INT (lowest_active_reg); \
01182 \
01183 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
01184 PUSH_FAILURE_INT (highest_active_reg); \
01185 \
01186 DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
01187 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
01188 PUSH_FAILURE_POINTER (pattern_place); \
01189 \
01190 DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
01191 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
01192 size2); \
01193 DEBUG_PRINT1 ("'\n"); \
01194 PUSH_FAILURE_POINTER (string_place); \
01195 \
01196 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
01197 DEBUG_PUSH (failure_id); \
01198 } while (0)
01199
01200
01201
01202 #define NUM_REG_ITEMS 3
01203
01204
01205 #ifdef DEBUG
01206 #define NUM_NONREG_ITEMS 5
01207 #else
01208 #define NUM_NONREG_ITEMS 4
01209 #endif
01210
01211
01212 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
01213
01214
01215 #define NUM_FAILURE_ITEMS \
01216 ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
01217 + NUM_NONREG_ITEMS)
01218
01219
01220 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
01221
01222
01223
01224
01225
01226
01227
01228
01229
01230
01231
01232
01233
01234
01235 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
01236 { \
01237 DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
01238 int this_reg; \
01239 const unsigned char *string_temp; \
01240 \
01241 assert (!FAIL_STACK_EMPTY ()); \
01242 \
01243 \
01244 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
01245 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
01246 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
01247 \
01248 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
01249 \
01250 DEBUG_POP (&failure_id); \
01251 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
01252 \
01253
01254
01255 \
01256 string_temp = POP_FAILURE_POINTER (); \
01257 if (string_temp != NULL) \
01258 str = (const char *) string_temp; \
01259 \
01260 DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
01261 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
01262 DEBUG_PRINT1 ("'\n"); \
01263 \
01264 pat = (unsigned char *) POP_FAILURE_POINTER (); \
01265 DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
01266 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
01267 \
01268 \
01269 high_reg = (unsigned) POP_FAILURE_INT (); \
01270 DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
01271 \
01272 low_reg = (unsigned) POP_FAILURE_INT (); \
01273 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
01274 \
01275 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
01276 { \
01277 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
01278 \
01279 reg_info[this_reg].word = POP_FAILURE_ELT (); \
01280 DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
01281 \
01282 regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
01283 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
01284 \
01285 regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
01286 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
01287 } \
01288 \
01289 set_regs_matched_done = 0; \
01290 DEBUG_STATEMENT (nfailure_points_popped++); \
01291 }
01292
01293
01294
01295
01296
01297
01298
01299
01300
01301
01302
01303
01304
01305
01306 typedef union
01307 {
01308 fail_stack_elt_t word;
01309 struct
01310 {
01311
01312
01313 #define MATCH_NULL_UNSET_VALUE 3
01314 unsigned match_null_string_p : 2;
01315 unsigned is_active : 1;
01316 unsigned matched_something : 1;
01317 unsigned ever_matched_something : 1;
01318 } bits;
01319 } register_info_type;
01320
01321 #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
01322 #define IS_ACTIVE(R) ((R).bits.is_active)
01323 #define MATCHED_SOMETHING(R) ((R).bits.matched_something)
01324 #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
01325
01326
01327
01328
01329
01330 #define SET_REGS_MATCHED() \
01331 do \
01332 { \
01333 if (!set_regs_matched_done) \
01334 { \
01335 unsigned r; \
01336 set_regs_matched_done = 1; \
01337 for (r = lowest_active_reg; r <= highest_active_reg; r++) \
01338 { \
01339 MATCHED_SOMETHING (reg_info[r]) \
01340 = EVER_MATCHED_SOMETHING (reg_info[r]) \
01341 = 1; \
01342 } \
01343 } \
01344 } \
01345 while (0)
01346
01347
01348 static char reg_unset_dummy;
01349 #define REG_UNSET_VALUE (®_unset_dummy)
01350 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
01351
01352
01353
01354 static void store_op1 (), store_op2 ();
01355 static void insert_op1 (), insert_op2 ();
01356 static boolean at_begline_loc_p (), at_endline_loc_p ();
01357 static boolean group_in_compile_stack ();
01358 static reg_errcode_t compile_range ();
01359
01360
01361
01362
01363
01364 #define PATFETCH(c) \
01365 do {if (p == pend) return REG_EEND; \
01366 c = (unsigned char) *p++; \
01367 if (translate) c = translate[c]; \
01368 } while (0)
01369
01370
01371
01372 #define PATFETCH_RAW(c) \
01373 do {if (p == pend) return REG_EEND; \
01374 c = (unsigned char) *p++; \
01375 } while (0)
01376
01377
01378 #define PATUNFETCH p--
01379
01380
01381
01382
01383
01384
01385 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
01386
01387
01388
01389
01390
01391 #define INIT_BUF_SIZE 32
01392
01393
01394 #define GET_BUFFER_SPACE(n) \
01395 while (b - bufp->buffer + (n) > bufp->allocated) \
01396 EXTEND_BUFFER ()
01397
01398
01399 #define BUF_PUSH(c) \
01400 do { \
01401 GET_BUFFER_SPACE (1); \
01402 *b++ = (unsigned char) (c); \
01403 } while (0)
01404
01405
01406
01407 #define BUF_PUSH_2(c1, c2) \
01408 do { \
01409 GET_BUFFER_SPACE (2); \
01410 *b++ = (unsigned char) (c1); \
01411 *b++ = (unsigned char) (c2); \
01412 } while (0)
01413
01414
01415
01416 #define BUF_PUSH_3(c1, c2, c3) \
01417 do { \
01418 GET_BUFFER_SPACE (3); \
01419 *b++ = (unsigned char) (c1); \
01420 *b++ = (unsigned char) (c2); \
01421 *b++ = (unsigned char) (c3); \
01422 } while (0)
01423
01424
01425
01426
01427 #define STORE_JUMP(op, loc, to) \
01428 store_op1 (op, loc, (to) - (loc) - 3)
01429
01430
01431 #define STORE_JUMP2(op, loc, to, arg) \
01432 store_op2 (op, loc, (to) - (loc) - 3, arg)
01433
01434
01435 #define INSERT_JUMP(op, loc, to) \
01436 insert_op1 (op, loc, (to) - (loc) - 3, b)
01437
01438
01439 #define INSERT_JUMP2(op, loc, to, arg) \
01440 insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
01441
01442
01443
01444
01445
01446 #define MAX_BUF_SIZE (1L << 16)
01447
01448
01449
01450
01451
01452
01453 #define EXTEND_BUFFER() \
01454 do { \
01455 unsigned char *old_buffer = bufp->buffer; \
01456 if (bufp->allocated == MAX_BUF_SIZE) \
01457 return REG_ESIZE; \
01458 bufp->allocated <<= 1; \
01459 if (bufp->allocated > MAX_BUF_SIZE) \
01460 bufp->allocated = MAX_BUF_SIZE; \
01461 bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
01462 if (bufp->buffer == NULL) \
01463 return REG_ESPACE; \
01464 \
01465 if (old_buffer != bufp->buffer) \
01466 { \
01467 b = (b - old_buffer) + bufp->buffer; \
01468 begalt = (begalt - old_buffer) + bufp->buffer; \
01469 if (fixup_alt_jump) \
01470 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
01471 if (laststart) \
01472 laststart = (laststart - old_buffer) + bufp->buffer; \
01473 if (pending_exact) \
01474 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
01475 } \
01476 } while (0)
01477
01478
01479
01480
01481
01482 #define MAX_REGNUM 255
01483
01484
01485
01486 typedef unsigned regnum_t;
01487
01488
01489
01490
01491
01492
01493 typedef int pattern_offset_t;
01494
01495 typedef struct
01496 {
01497 pattern_offset_t begalt_offset;
01498 pattern_offset_t fixup_alt_jump;
01499 pattern_offset_t inner_group_offset;
01500 pattern_offset_t laststart_offset;
01501 regnum_t regnum;
01502 } compile_stack_elt_t;
01503
01504
01505 typedef struct
01506 {
01507 compile_stack_elt_t *stack;
01508 unsigned size;
01509 unsigned avail;
01510 } compile_stack_type;
01511
01512
01513 #define INIT_COMPILE_STACK_SIZE 32
01514
01515 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
01516 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
01517
01518
01519 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
01520
01521
01522
01523 #define SET_LIST_BIT(c) \
01524 (b[((unsigned char) (c)) / BYTEWIDTH] \
01525 |= 1 << (((unsigned char) c) % BYTEWIDTH))
01526
01527
01528
01529 #define GET_UNSIGNED_NUMBER(num) \
01530 { if (p != pend) \
01531 { \
01532 PATFETCH (c); \
01533 while (ISDIGIT (c)) \
01534 { \
01535 if (num < 0) \
01536 num = 0; \
01537 num = num * 10 + c - '0'; \
01538 if (p == pend) \
01539 break; \
01540 PATFETCH (c); \
01541 } \
01542 } \
01543 }
01544
01545 #define CHAR_CLASS_MAX_LENGTH 6
01546
01547 #define IS_CHAR_CLASS(string) \
01548 (STREQ (string, "alpha") || STREQ (string, "upper") \
01549 || STREQ (string, "lower") || STREQ (string, "digit") \
01550 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
01551 || STREQ (string, "space") || STREQ (string, "print") \
01552 || STREQ (string, "punct") || STREQ (string, "graph") \
01553 || STREQ (string, "cntrl") || STREQ (string, "blank"))
01554
01555 #ifndef MATCH_MAY_ALLOCATE
01556
01557
01558
01559
01560
01561
01562
01563
01564 static fail_stack_type fail_stack;
01565
01566
01567
01568
01569 static int regs_allocated_size;
01570
01571 static const char ** regstart, ** regend;
01572 static const char ** old_regstart, ** old_regend;
01573 static const char **best_regstart, **best_regend;
01574 static register_info_type *reg_info;
01575 static const char **reg_dummy;
01576 static register_info_type *reg_info_dummy;
01577
01578
01579
01580
01581 static
01582 regex_grow_registers (num_regs)
01583 int num_regs;
01584 {
01585 if (num_regs > regs_allocated_size)
01586 {
01587 RETALLOC_IF (regstart, num_regs, const char *);
01588 RETALLOC_IF (regend, num_regs, const char *);
01589 RETALLOC_IF (old_regstart, num_regs, const char *);
01590 RETALLOC_IF (old_regend, num_regs, const char *);
01591 RETALLOC_IF (best_regstart, num_regs, const char *);
01592 RETALLOC_IF (best_regend, num_regs, const char *);
01593 RETALLOC_IF (reg_info, num_regs, register_info_type);
01594 RETALLOC_IF (reg_dummy, num_regs, const char *);
01595 RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
01596
01597 regs_allocated_size = num_regs;
01598 }
01599 }
01600
01601 #endif
01602
01603
01604
01605
01606
01607
01608
01609
01610
01611
01612
01613
01614
01615
01616
01617
01618
01619
01620
01621
01622 #define FREE_STACK_RETURN(value) \
01623 return (free (compile_stack.stack), value)
01624
01625 static reg_errcode_t
01626 regex_compile (pattern, size, syntax, bufp)
01627 const char *pattern;
01628 int size;
01629 reg_syntax_t syntax;
01630 struct re_pattern_buffer *bufp;
01631 {
01632
01633
01634
01635 register unsigned char c, c1;
01636
01637
01638 const char *p1;
01639
01640
01641 register unsigned char *b;
01642
01643
01644 compile_stack_type compile_stack;
01645
01646
01647 const char *p = pattern;
01648 const char *pend = pattern + size;
01649
01650
01651 char *translate = bufp->translate;
01652
01653
01654
01655
01656
01657 unsigned char *pending_exact = 0;
01658
01659
01660
01661
01662 unsigned char *laststart = 0;
01663
01664
01665 unsigned char *begalt;
01666
01667
01668
01669 const char *beg_interval;
01670
01671
01672
01673
01674 unsigned char *fixup_alt_jump = 0;
01675
01676
01677
01678
01679 regnum_t regnum = 0;
01680
01681 #ifdef DEBUG
01682 DEBUG_PRINT1 ("\nCompiling pattern: ");
01683 if (debug)
01684 {
01685 unsigned debug_count;
01686
01687 for (debug_count = 0; debug_count < size; debug_count++)
01688 putchar (pattern[debug_count]);
01689 putchar ('\n');
01690 }
01691 #endif
01692
01693
01694 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
01695 if (compile_stack.stack == NULL)
01696 return REG_ESPACE;
01697
01698 compile_stack.size = INIT_COMPILE_STACK_SIZE;
01699 compile_stack.avail = 0;
01700
01701
01702 bufp->syntax = syntax;
01703 bufp->fastmap_accurate = 0;
01704 bufp->not_bol = bufp->not_eol = 0;
01705
01706
01707
01708
01709 bufp->used = 0;
01710
01711
01712 bufp->re_nsub = 0;
01713
01714 #if !defined (emacs) && !defined (SYNTAX_TABLE)
01715
01716 init_syntax_once ();
01717 #endif
01718
01719 if (bufp->allocated == 0)
01720 {
01721 if (bufp->buffer)
01722 {
01723
01724
01725 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
01726 }
01727 else
01728 {
01729 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
01730 }
01731 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
01732
01733 bufp->allocated = INIT_BUF_SIZE;
01734 }
01735
01736 begalt = b = bufp->buffer;
01737
01738
01739 while (p != pend)
01740 {
01741 PATFETCH (c);
01742
01743 switch (c)
01744 {
01745 case '^':
01746 {
01747 if (
01748 p == pattern + 1
01749
01750 || syntax & RE_CONTEXT_INDEP_ANCHORS
01751
01752 || at_begline_loc_p (pattern, p, syntax))
01753 BUF_PUSH (begline);
01754 else
01755 goto normal_char;
01756 }
01757 break;
01758
01759
01760 case '$':
01761 {
01762 if (
01763 p == pend
01764
01765 || syntax & RE_CONTEXT_INDEP_ANCHORS
01766
01767 || at_endline_loc_p (p, pend, syntax))
01768 BUF_PUSH (endline);
01769 else
01770 goto normal_char;
01771 }
01772 break;
01773
01774
01775 case '+':
01776 case '?':
01777 if ((syntax & RE_BK_PLUS_QM)
01778 || (syntax & RE_LIMITED_OPS))
01779 goto normal_char;
01780 handle_plus:
01781 case '*':
01782
01783 if (!laststart)
01784 {
01785 if (syntax & RE_CONTEXT_INVALID_OPS)
01786 FREE_STACK_RETURN (REG_BADRPT);
01787 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
01788 goto normal_char;
01789 }
01790
01791 {
01792
01793 boolean keep_string_p = false;
01794
01795
01796 char zero_times_ok = 0, many_times_ok = 0;
01797
01798
01799
01800
01801
01802
01803 for (;;)
01804 {
01805 zero_times_ok |= c != '+';
01806 many_times_ok |= c != '?';
01807
01808 if (p == pend)
01809 break;
01810
01811 PATFETCH (c);
01812
01813 if (c == '*'
01814 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
01815 ;
01816
01817 else if (syntax & RE_BK_PLUS_QM && c == '\\')
01818 {
01819 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
01820
01821 PATFETCH (c1);
01822 if (!(c1 == '+' || c1 == '?'))
01823 {
01824 PATUNFETCH;
01825 PATUNFETCH;
01826 break;
01827 }
01828
01829 c = c1;
01830 }
01831 else
01832 {
01833 PATUNFETCH;
01834 break;
01835 }
01836
01837
01838 }
01839
01840
01841
01842 if (!laststart)
01843 break;
01844
01845
01846
01847 if (many_times_ok)
01848 {
01849
01850
01851
01852
01853
01854
01855
01856
01857
01858 assert (p - 1 > pattern);
01859
01860
01861 GET_BUFFER_SPACE (3);
01862
01863
01864
01865
01866
01867
01868 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
01869 && zero_times_ok
01870 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
01871 && !(syntax & RE_DOT_NEWLINE))
01872 {
01873 STORE_JUMP (jump, b, laststart);
01874 keep_string_p = true;
01875 }
01876 else
01877
01878 STORE_JUMP (maybe_pop_jump, b, laststart - 3);
01879
01880
01881 b += 3;
01882 }
01883
01884
01885
01886 GET_BUFFER_SPACE (3);
01887 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
01888 : on_failure_jump,
01889 laststart, b + 3);
01890 pending_exact = 0;
01891 b += 3;
01892
01893 if (!zero_times_ok)
01894 {
01895
01896
01897
01898
01899
01900 GET_BUFFER_SPACE (3);
01901 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
01902 b += 3;
01903 }
01904 }
01905 break;
01906
01907
01908 case '.':
01909 laststart = b;
01910 BUF_PUSH (anychar);
01911 break;
01912
01913
01914 case '[':
01915 {
01916 boolean had_char_class = false;
01917
01918 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
01919
01920
01921
01922 GET_BUFFER_SPACE (34);
01923
01924 laststart = b;
01925
01926
01927
01928 BUF_PUSH (*p == '^' ? charset_not : charset);
01929 if (*p == '^')
01930 p++;
01931
01932
01933 p1 = p;
01934
01935
01936 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
01937
01938
01939 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
01940
01941
01942 if ((re_opcode_t) b[-2] == charset_not
01943 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
01944 SET_LIST_BIT ('\n');
01945
01946
01947 for (;;)
01948 {
01949 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
01950
01951 PATFETCH (c);
01952
01953
01954 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
01955 {
01956 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
01957
01958 PATFETCH (c1);
01959 SET_LIST_BIT (c1);
01960 continue;
01961 }
01962
01963
01964
01965
01966 if (c == ']' && p != p1 + 1)
01967 break;
01968
01969
01970
01971 if (had_char_class && c == '-' && *p != ']')
01972 FREE_STACK_RETURN (REG_ERANGE);
01973
01974
01975
01976
01977
01978 if (c == '-'
01979 && !(p - 2 >= pattern && p[-2] == '[')
01980 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
01981 && *p != ']')
01982 {
01983 reg_errcode_t ret
01984 = compile_range (&p, pend, translate, syntax, b);
01985 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
01986 }
01987
01988 else if (p[0] == '-' && p[1] != ']')
01989 {
01990 reg_errcode_t ret;
01991
01992
01993 PATFETCH (c1);
01994
01995 ret = compile_range (&p, pend, translate, syntax, b);
01996 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
01997 }
01998
01999
02000
02001
02002 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
02003 {
02004 char str[CHAR_CLASS_MAX_LENGTH + 1];
02005
02006 PATFETCH (c);
02007 c1 = 0;
02008
02009
02010 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02011
02012 for (;;)
02013 {
02014 PATFETCH (c);
02015 if (c == ':' || c == ']' || p == pend
02016 || c1 == CHAR_CLASS_MAX_LENGTH)
02017 break;
02018 str[c1++] = c;
02019 }
02020 str[c1] = '\0';
02021
02022
02023
02024
02025 if (c == ':' && *p == ']')
02026 {
02027 int ch;
02028 boolean is_alnum = STREQ (str, "alnum");
02029 boolean is_alpha = STREQ (str, "alpha");
02030 boolean is_blank = STREQ (str, "blank");
02031 boolean is_cntrl = STREQ (str, "cntrl");
02032 boolean is_digit = STREQ (str, "digit");
02033 boolean is_graph = STREQ (str, "graph");
02034 boolean is_lower = STREQ (str, "lower");
02035 boolean is_print = STREQ (str, "print");
02036 boolean is_punct = STREQ (str, "punct");
02037 boolean is_space = STREQ (str, "space");
02038 boolean is_upper = STREQ (str, "upper");
02039 boolean is_xdigit = STREQ (str, "xdigit");
02040
02041 if (!IS_CHAR_CLASS (str))
02042 FREE_STACK_RETURN (REG_ECTYPE);
02043
02044
02045
02046 PATFETCH (c);
02047
02048 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02049
02050 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
02051 {
02052
02053
02054 if ( (is_alnum && ISALNUM (ch))
02055 || (is_alpha && ISALPHA (ch))
02056 || (is_blank && ISBLANK (ch))
02057 || (is_cntrl && ISCNTRL (ch)))
02058 SET_LIST_BIT (ch);
02059 if ( (is_digit && ISDIGIT (ch))
02060 || (is_graph && ISGRAPH (ch))
02061 || (is_lower && ISLOWER (ch))
02062 || (is_print && ISPRINT (ch)))
02063 SET_LIST_BIT (ch);
02064 if ( (is_punct && ISPUNCT (ch))
02065 || (is_space && ISSPACE (ch))
02066 || (is_upper && ISUPPER (ch))
02067 || (is_xdigit && ISXDIGIT (ch)))
02068 SET_LIST_BIT (ch);
02069 }
02070 had_char_class = true;
02071 }
02072 else
02073 {
02074 c1++;
02075 while (c1--)
02076 PATUNFETCH;
02077 SET_LIST_BIT ('[');
02078 SET_LIST_BIT (':');
02079 had_char_class = false;
02080 }
02081 }
02082 else
02083 {
02084 had_char_class = false;
02085 SET_LIST_BIT (c);
02086 }
02087 }
02088
02089
02090
02091 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
02092 b[-1]--;
02093 b += b[-1];
02094 }
02095 break;
02096
02097
02098 case '(':
02099 if (syntax & RE_NO_BK_PARENS)
02100 goto handle_open;
02101 else
02102 goto normal_char;
02103
02104
02105 case ')':
02106 if (syntax & RE_NO_BK_PARENS)
02107 goto handle_close;
02108 else
02109 goto normal_char;
02110
02111
02112 case '\n':
02113 if (syntax & RE_NEWLINE_ALT)
02114 goto handle_alt;
02115 else
02116 goto normal_char;
02117
02118
02119 case '|':
02120 if (syntax & RE_NO_BK_VBAR)
02121 goto handle_alt;
02122 else
02123 goto normal_char;
02124
02125
02126 case '{':
02127 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
02128 goto handle_interval;
02129 else
02130 goto normal_char;
02131
02132
02133 case '\\':
02134 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02135
02136
02137
02138
02139 PATFETCH_RAW (c);
02140
02141 switch (c)
02142 {
02143 case '(':
02144 if (syntax & RE_NO_BK_PARENS)
02145 goto normal_backslash;
02146
02147 handle_open:
02148 bufp->re_nsub++;
02149 regnum++;
02150
02151 if (COMPILE_STACK_FULL)
02152 {
02153 RETALLOC (compile_stack.stack, compile_stack.size << 1,
02154 compile_stack_elt_t);
02155 if (compile_stack.stack == NULL) return REG_ESPACE;
02156
02157 compile_stack.size <<= 1;
02158 }
02159
02160
02161
02162
02163
02164 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
02165 COMPILE_STACK_TOP.fixup_alt_jump
02166 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
02167 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
02168 COMPILE_STACK_TOP.regnum = regnum;
02169
02170
02171
02172
02173
02174 if (regnum <= MAX_REGNUM)
02175 {
02176 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
02177 BUF_PUSH_3 (start_memory, regnum, 0);
02178 }
02179
02180 compile_stack.avail++;
02181
02182 fixup_alt_jump = 0;
02183 laststart = 0;
02184 begalt = b;
02185
02186
02187
02188 pending_exact = 0;
02189 break;
02190
02191
02192 case ')':
02193 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
02194
02195 if (COMPILE_STACK_EMPTY)
02196 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
02197 goto normal_backslash;
02198 else
02199 FREE_STACK_RETURN (REG_ERPAREN);
02200
02201 handle_close:
02202 if (fixup_alt_jump)
02203 {
02204
02205
02206
02207 BUF_PUSH (push_dummy_failure);
02208
02209
02210
02211 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
02212 }
02213
02214
02215 if (COMPILE_STACK_EMPTY)
02216 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
02217 goto normal_char;
02218 else
02219 FREE_STACK_RETURN (REG_ERPAREN);
02220
02221
02222
02223 assert (compile_stack.avail != 0);
02224 {
02225
02226
02227
02228 regnum_t this_group_regnum;
02229
02230 compile_stack.avail--;
02231 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
02232 fixup_alt_jump
02233 = COMPILE_STACK_TOP.fixup_alt_jump
02234 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
02235 : 0;
02236 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
02237 this_group_regnum = COMPILE_STACK_TOP.regnum;
02238
02239
02240
02241 pending_exact = 0;
02242
02243
02244
02245 if (this_group_regnum <= MAX_REGNUM)
02246 {
02247 unsigned char *inner_group_loc
02248 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
02249
02250 *inner_group_loc = regnum - this_group_regnum;
02251 BUF_PUSH_3 (stop_memory, this_group_regnum,
02252 regnum - this_group_regnum);
02253 }
02254 }
02255 break;
02256
02257
02258 case '|':
02259 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
02260 goto normal_backslash;
02261 handle_alt:
02262 if (syntax & RE_LIMITED_OPS)
02263 goto normal_char;
02264
02265
02266
02267 GET_BUFFER_SPACE (3);
02268 INSERT_JUMP (on_failure_jump, begalt, b + 6);
02269 pending_exact = 0;
02270 b += 3;
02271
02272
02273
02274
02275
02276
02277
02278
02279
02280
02281
02282
02283
02284
02285
02286
02287
02288 if (fixup_alt_jump)
02289 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
02290
02291
02292
02293
02294 fixup_alt_jump = b;
02295 GET_BUFFER_SPACE (3);
02296 b += 3;
02297
02298 laststart = 0;
02299 begalt = b;
02300 break;
02301
02302
02303 case '{':
02304
02305 if (!(syntax & RE_INTERVALS)
02306
02307
02308 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
02309 || (p - 2 == pattern && p == pend))
02310 goto normal_backslash;
02311
02312 handle_interval:
02313 {
02314
02315
02316
02317 int lower_bound = -1, upper_bound = -1;
02318
02319 beg_interval = p - 1;
02320
02321 if (p == pend)
02322 {
02323 if (syntax & RE_NO_BK_BRACES)
02324 goto unfetch_interval;
02325 else
02326 FREE_STACK_RETURN (REG_EBRACE);
02327 }
02328
02329 GET_UNSIGNED_NUMBER (lower_bound);
02330
02331 if (c == ',')
02332 {
02333 GET_UNSIGNED_NUMBER (upper_bound);
02334 if (upper_bound < 0) upper_bound = RE_DUP_MAX;
02335 }
02336 else
02337
02338 upper_bound = lower_bound;
02339
02340 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
02341 || lower_bound > upper_bound)
02342 {
02343 if (syntax & RE_NO_BK_BRACES)
02344 goto unfetch_interval;
02345 else
02346 FREE_STACK_RETURN (REG_BADBR);
02347 }
02348
02349 if (!(syntax & RE_NO_BK_BRACES))
02350 {
02351 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
02352
02353 PATFETCH (c);
02354 }
02355
02356 if (c != '}')
02357 {
02358 if (syntax & RE_NO_BK_BRACES)
02359 goto unfetch_interval;
02360 else
02361 FREE_STACK_RETURN (REG_BADBR);
02362 }
02363
02364
02365
02366
02367 if (!laststart)
02368 {
02369 if (syntax & RE_CONTEXT_INVALID_OPS)
02370 FREE_STACK_RETURN (REG_BADRPT);
02371 else if (syntax & RE_CONTEXT_INDEP_OPS)
02372 laststart = b;
02373 else
02374 goto unfetch_interval;
02375 }
02376
02377
02378
02379
02380 if (upper_bound == 0)
02381 {
02382 GET_BUFFER_SPACE (3);
02383 INSERT_JUMP (jump, laststart, b + 3);
02384 b += 3;
02385 }
02386
02387
02388
02389
02390
02391
02392
02393
02394
02395
02396 else
02397 {
02398
02399 unsigned nbytes = 10 + (upper_bound > 1) * 10;
02400
02401 GET_BUFFER_SPACE (nbytes);
02402
02403
02404
02405
02406
02407
02408 INSERT_JUMP2 (succeed_n, laststart,
02409 b + 5 + (upper_bound > 1) * 5,
02410 lower_bound);
02411 b += 5;
02412
02413
02414
02415
02416
02417 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
02418 b += 5;
02419
02420 if (upper_bound > 1)
02421 {
02422
02423
02424
02425
02426
02427
02428 STORE_JUMP2 (jump_n, b, laststart + 5,
02429 upper_bound - 1);
02430 b += 5;
02431
02432
02433
02434
02435
02436
02437
02438
02439
02440
02441
02442
02443
02444
02445
02446 insert_op2 (set_number_at, laststart, b - laststart,
02447 upper_bound - 1, b);
02448 b += 5;
02449 }
02450 }
02451 pending_exact = 0;
02452 beg_interval = NULL;
02453 }
02454 break;
02455
02456 unfetch_interval:
02457
02458 assert (beg_interval);
02459 p = beg_interval;
02460 beg_interval = NULL;
02461
02462
02463 PATFETCH (c);
02464
02465 if (!(syntax & RE_NO_BK_BRACES))
02466 {
02467 if (p > pattern && p[-1] == '\\')
02468 goto normal_backslash;
02469 }
02470 goto normal_char;
02471
02472 #ifdef emacs
02473
02474
02475 case '=':
02476 BUF_PUSH (at_dot);
02477 break;
02478
02479 case 's':
02480 laststart = b;
02481 PATFETCH (c);
02482 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
02483 break;
02484
02485 case 'S':
02486 laststart = b;
02487 PATFETCH (c);
02488 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
02489 break;
02490 #endif
02491
02492
02493 case 'w':
02494 laststart = b;
02495 BUF_PUSH (wordchar);
02496 break;
02497
02498
02499 case 'W':
02500 laststart = b;
02501 BUF_PUSH (notwordchar);
02502 break;
02503
02504
02505 case '<':
02506 BUF_PUSH (wordbeg);
02507 break;
02508
02509 case '>':
02510 BUF_PUSH (wordend);
02511 break;
02512
02513 case 'b':
02514 BUF_PUSH (wordbound);
02515 break;
02516
02517 case 'B':
02518 BUF_PUSH (notwordbound);
02519 break;
02520
02521 case '`':
02522 BUF_PUSH (begbuf);
02523 break;
02524
02525 case '\'':
02526 BUF_PUSH (endbuf);
02527 break;
02528
02529 case '1': case '2': case '3': case '4': case '5':
02530 case '6': case '7': case '8': case '9':
02531 if (syntax & RE_NO_BK_REFS)
02532 goto normal_char;
02533
02534 c1 = c - '0';
02535
02536 if (c1 > regnum)
02537 FREE_STACK_RETURN (REG_ESUBREG);
02538
02539
02540 if (group_in_compile_stack (compile_stack, c1))
02541 goto normal_char;
02542
02543 laststart = b;
02544 BUF_PUSH_2 (duplicate, c1);
02545 break;
02546
02547
02548 case '+':
02549 case '?':
02550 if (syntax & RE_BK_PLUS_QM)
02551 goto handle_plus;
02552 else
02553 goto normal_backslash;
02554
02555 default:
02556 normal_backslash:
02557
02558
02559
02560 c = TRANSLATE (c);
02561 goto normal_char;
02562 }
02563 break;
02564
02565
02566 default:
02567
02568 normal_char:
02569
02570 if (!pending_exact
02571
02572
02573 || pending_exact + *pending_exact + 1 != b
02574
02575
02576 || *pending_exact == (1 << BYTEWIDTH) - 1
02577
02578
02579 || *p == '*' || *p == '^'
02580 || ((syntax & RE_BK_PLUS_QM)
02581 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
02582 : (*p == '+' || *p == '?'))
02583 || ((syntax & RE_INTERVALS)
02584 && ((syntax & RE_NO_BK_BRACES)
02585 ? *p == '{'
02586 : (p[0] == '\\' && p[1] == '{'))))
02587 {
02588
02589
02590 laststart = b;
02591
02592 BUF_PUSH_2 (exactn, 0);
02593 pending_exact = b - 1;
02594 }
02595
02596 BUF_PUSH (c);
02597 (*pending_exact)++;
02598 break;
02599 }
02600 }
02601
02602
02603
02604
02605 if (fixup_alt_jump)
02606 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
02607
02608 if (!COMPILE_STACK_EMPTY)
02609 FREE_STACK_RETURN (REG_EPAREN);
02610
02611
02612
02613 if (syntax & RE_NO_POSIX_BACKTRACKING)
02614 BUF_PUSH (succeed);
02615
02616 free (compile_stack.stack);
02617
02618
02619 bufp->used = b - bufp->buffer;
02620
02621 #ifdef DEBUG
02622 if (debug)
02623 {
02624 DEBUG_PRINT1 ("\nCompiled pattern: \n");
02625 print_compiled_pattern (bufp);
02626 }
02627 #endif
02628
02629 #ifndef MATCH_MAY_ALLOCATE
02630
02631
02632
02633 {
02634 int num_regs = bufp->re_nsub + 1;
02635
02636
02637
02638
02639 if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
02640 {
02641 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
02642
02643 #ifdef emacs
02644 if (! fail_stack.stack)
02645 fail_stack.stack
02646 = (fail_stack_elt_t *) xmalloc (fail_stack.size
02647 * sizeof (fail_stack_elt_t));
02648 else
02649 fail_stack.stack
02650 = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
02651 (fail_stack.size
02652 * sizeof (fail_stack_elt_t)));
02653 #else
02654 if (! fail_stack.stack)
02655 fail_stack.stack
02656 = (fail_stack_elt_t *) malloc (fail_stack.size
02657 * sizeof (fail_stack_elt_t));
02658 else
02659 fail_stack.stack
02660 = (fail_stack_elt_t *) realloc (fail_stack.stack,
02661 (fail_stack.size
02662 * sizeof (fail_stack_elt_t)));
02663 #endif
02664 }
02665
02666 regex_grow_registers (num_regs);
02667 }
02668 #endif
02669
02670 return REG_NOERROR;
02671 }
02672
02673
02674
02675
02676
02677 static void
02678 store_op1 (op, loc, arg)
02679 re_opcode_t op;
02680 unsigned char *loc;
02681 int arg;
02682 {
02683 *loc = (unsigned char) op;
02684 STORE_NUMBER (loc + 1, arg);
02685 }
02686
02687
02688
02689
02690 static void
02691 store_op2 (op, loc, arg1, arg2)
02692 re_opcode_t op;
02693 unsigned char *loc;
02694 int arg1, arg2;
02695 {
02696 *loc = (unsigned char) op;
02697 STORE_NUMBER (loc + 1, arg1);
02698 STORE_NUMBER (loc + 3, arg2);
02699 }
02700
02701
02702
02703
02704
02705 static void
02706 insert_op1 (op, loc, arg, end)
02707 re_opcode_t op;
02708 unsigned char *loc;
02709 int arg;
02710 unsigned char *end;
02711 {
02712 register unsigned char *pfrom = end;
02713 register unsigned char *pto = end + 3;
02714
02715 while (pfrom != loc)
02716 *--pto = *--pfrom;
02717
02718 store_op1 (op, loc, arg);
02719 }
02720
02721
02722
02723
02724 static void
02725 insert_op2 (op, loc, arg1, arg2, end)
02726 re_opcode_t op;
02727 unsigned char *loc;
02728 int arg1, arg2;
02729 unsigned char *end;
02730 {
02731 register unsigned char *pfrom = end;
02732 register unsigned char *pto = end + 5;
02733
02734 while (pfrom != loc)
02735 *--pto = *--pfrom;
02736
02737 store_op2 (op, loc, arg1, arg2);
02738 }
02739
02740
02741
02742
02743
02744
02745 static boolean
02746 at_begline_loc_p (pattern, p, syntax)
02747 const char *pattern, *p;
02748 reg_syntax_t syntax;
02749 {
02750 const char *prev = p - 2;
02751 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
02752
02753 return
02754
02755 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
02756
02757 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
02758 }
02759
02760
02761
02762
02763
02764 static boolean
02765 at_endline_loc_p (p, pend, syntax)
02766 const char *p, *pend;
02767 int syntax;
02768 {
02769 const char *next = p;
02770 boolean next_backslash = *next == '\\';
02771 const char *next_next = p + 1 < pend ? p + 1 : 0;
02772
02773 return
02774
02775 (syntax & RE_NO_BK_PARENS ? *next == ')'
02776 : next_backslash && next_next && *next_next == ')')
02777
02778 || (syntax & RE_NO_BK_VBAR ? *next == '|'
02779 : next_backslash && next_next && *next_next == '|');
02780 }
02781
02782
02783
02784
02785
02786 static boolean
02787 group_in_compile_stack (compile_stack, regnum)
02788 compile_stack_type compile_stack;
02789 regnum_t regnum;
02790 {
02791 int this_element;
02792
02793 for (this_element = compile_stack.avail - 1;
02794 this_element >= 0;
02795 this_element--)
02796 if (compile_stack.stack[this_element].regnum == regnum)
02797 return true;
02798
02799 return false;
02800 }
02801
02802
02803
02804
02805
02806
02807
02808
02809
02810
02811
02812
02813
02814 static reg_errcode_t
02815 compile_range (p_ptr, pend, translate, syntax, b)
02816 const char **p_ptr, *pend;
02817 char *translate;
02818 reg_syntax_t syntax;
02819 unsigned char *b;
02820 {
02821 unsigned this_char;
02822
02823 const char *p = *p_ptr;
02824 int range_start, range_end;
02825
02826 if (p == pend)
02827 return REG_ERANGE;
02828
02829
02830
02831
02832
02833
02834
02835
02836
02837 range_start = ((const unsigned char *) p)[-2];
02838 range_end = ((const unsigned char *) p)[0];
02839
02840
02841
02842 (*p_ptr)++;
02843
02844
02845 if (range_start > range_end)
02846 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
02847
02848
02849
02850
02851
02852 for (this_char = range_start; this_char <= range_end; this_char++)
02853 {
02854 SET_LIST_BIT (TRANSLATE (this_char));
02855 }
02856
02857 return REG_NOERROR;
02858 }
02859
02860
02861
02862
02863
02864
02865
02866
02867
02868
02869
02870
02871
02872
02873 int
02874 re_compile_fastmap (bufp)
02875 struct re_pattern_buffer *bufp;
02876 {
02877 int j, k;
02878 #ifdef MATCH_MAY_ALLOCATE
02879 fail_stack_type fail_stack;
02880 #endif
02881 #ifndef REGEX_MALLOC
02882 char *destination;
02883 #endif
02884
02885 unsigned num_regs = 0;
02886
02887 register char *fastmap = bufp->fastmap;
02888 unsigned char *pattern = bufp->buffer;
02889 unsigned long size = bufp->used;
02890 unsigned char *p = pattern;
02891 register unsigned char *pend = pattern + size;
02892
02893 #ifdef REL_ALLOC
02894
02895
02896 fail_stack_elt_t *failure_stack_ptr;
02897 #endif
02898
02899
02900
02901
02902
02903 boolean path_can_be_null = true;
02904
02905
02906 boolean succeed_n_p = false;
02907
02908 assert (fastmap != NULL && p != NULL);
02909
02910 INIT_FAIL_STACK ();
02911 bzero (fastmap, 1 << BYTEWIDTH);
02912 bufp->fastmap_accurate = 1;
02913 bufp->can_be_null = 0;
02914
02915 while (1)
02916 {
02917 if (p == pend || *p == succeed)
02918 {
02919
02920 if (!FAIL_STACK_EMPTY ())
02921 {
02922 bufp->can_be_null |= path_can_be_null;
02923
02924
02925 path_can_be_null = true;
02926
02927 p = fail_stack.stack[--fail_stack.avail].pointer;
02928
02929 continue;
02930 }
02931 else
02932 break;
02933 }
02934
02935
02936 assert (p < pend);
02937
02938 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
02939 {
02940
02941
02942
02943
02944
02945
02946 case duplicate:
02947 bufp->can_be_null = 1;
02948 goto done;
02949
02950
02951
02952
02953
02954 case exactn:
02955 fastmap[p[1]] = 1;
02956 break;
02957
02958
02959 case charset:
02960 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
02961 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
02962 fastmap[j] = 1;
02963 break;
02964
02965
02966 case charset_not:
02967
02968 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
02969 fastmap[j] = 1;
02970
02971 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
02972 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
02973 fastmap[j] = 1;
02974 break;
02975
02976
02977 case wordchar:
02978 for (j = 0; j < (1 << BYTEWIDTH); j++)
02979 if (SYNTAX (j) == Sword)
02980 fastmap[j] = 1;
02981 break;
02982
02983
02984 case notwordchar:
02985 for (j = 0; j < (1 << BYTEWIDTH); j++)
02986 if (SYNTAX (j) != Sword)
02987 fastmap[j] = 1;
02988 break;
02989
02990
02991 case anychar:
02992 {
02993 int fastmap_newline = fastmap['\n'];
02994
02995
02996 for (j = 0; j < (1 << BYTEWIDTH); j++)
02997 fastmap[j] = 1;
02998
02999
03000 if (!(bufp->syntax & RE_DOT_NEWLINE))
03001 fastmap['\n'] = fastmap_newline;
03002
03003
03004
03005 else if (bufp->can_be_null)
03006 goto done;
03007
03008
03009 break;
03010 }
03011
03012 #ifdef emacs
03013 case syntaxspec:
03014 k = *p++;
03015 for (j = 0; j < (1 << BYTEWIDTH); j++)
03016 if (SYNTAX (j) == (enum syntaxcode) k)
03017 fastmap[j] = 1;
03018 break;
03019
03020
03021 case notsyntaxspec:
03022 k = *p++;
03023 for (j = 0; j < (1 << BYTEWIDTH); j++)
03024 if (SYNTAX (j) != (enum syntaxcode) k)
03025 fastmap[j] = 1;
03026 break;
03027
03028
03029
03030
03031
03032
03033 case before_dot:
03034 case at_dot:
03035 case after_dot:
03036 continue;
03037 #endif
03038
03039
03040 case no_op:
03041 case begline:
03042 case endline:
03043 case begbuf:
03044 case endbuf:
03045 case wordbound:
03046 case notwordbound:
03047 case wordbeg:
03048 case wordend:
03049 case push_dummy_failure:
03050 continue;
03051
03052
03053 case jump_n:
03054 case pop_failure_jump:
03055 case maybe_pop_jump:
03056 case jump:
03057 case jump_past_alt:
03058 case dummy_failure_jump:
03059 EXTRACT_NUMBER_AND_INCR (j, p);
03060 p += j;
03061 if (j > 0)
03062 continue;
03063
03064
03065
03066
03067
03068
03069 if ((re_opcode_t) *p != on_failure_jump
03070 && (re_opcode_t) *p != succeed_n)
03071 continue;
03072
03073 p++;
03074 EXTRACT_NUMBER_AND_INCR (j, p);
03075 p += j;
03076
03077
03078 if (!FAIL_STACK_EMPTY ()
03079 && fail_stack.stack[fail_stack.avail - 1].pointer == p)
03080 fail_stack.avail--;
03081
03082 continue;
03083
03084
03085 case on_failure_jump:
03086 case on_failure_keep_string_jump:
03087 handle_on_failure_jump:
03088 EXTRACT_NUMBER_AND_INCR (j, p);
03089
03090
03091
03092
03093
03094
03095
03096
03097 if (p + j < pend)
03098 {
03099 if (!PUSH_PATTERN_OP (p + j, fail_stack))
03100 {
03101 RESET_FAIL_STACK ();
03102 return -2;
03103 }
03104 }
03105 else
03106 bufp->can_be_null = 1;
03107
03108 if (succeed_n_p)
03109 {
03110 EXTRACT_NUMBER_AND_INCR (k, p);
03111 succeed_n_p = false;
03112 }
03113
03114 continue;
03115
03116
03117 case succeed_n:
03118
03119 p += 2;
03120
03121
03122 EXTRACT_NUMBER_AND_INCR (k, p);
03123 if (k == 0)
03124 {
03125 p -= 4;
03126 succeed_n_p = true;
03127 goto handle_on_failure_jump;
03128 }
03129 continue;
03130
03131
03132 case set_number_at:
03133 p += 4;
03134 continue;
03135
03136
03137 case start_memory:
03138 case stop_memory:
03139 p += 2;
03140 continue;
03141
03142
03143 default:
03144 abort ();
03145 }
03146
03147
03148
03149
03150
03151
03152
03153 path_can_be_null = false;
03154 p = pend;
03155 }
03156
03157
03158
03159 bufp->can_be_null |= path_can_be_null;
03160
03161 done:
03162 RESET_FAIL_STACK ();
03163 return 0;
03164 }
03165
03166
03167
03168
03169
03170
03171
03172
03173
03174
03175
03176
03177
03178
03179 void
03180 re_set_registers (bufp, regs, num_regs, starts, ends)
03181 struct re_pattern_buffer *bufp;
03182 struct re_registers *regs;
03183 unsigned num_regs;
03184 regoff_t *starts, *ends;
03185 {
03186 if (num_regs)
03187 {
03188 bufp->regs_allocated = REGS_REALLOCATE;
03189 regs->num_regs = num_regs;
03190 regs->start = starts;
03191 regs->end = ends;
03192 }
03193 else
03194 {
03195 bufp->regs_allocated = REGS_UNALLOCATED;
03196 regs->num_regs = 0;
03197 regs->start = regs->end = (regoff_t *) 0;
03198 }
03199 }
03200
03201
03202
03203
03204
03205
03206 int
03207 re_search (bufp, string, size, startpos, range, regs)
03208 struct re_pattern_buffer *bufp;
03209 const char *string;
03210 int size, startpos, range;
03211 struct re_registers *regs;
03212 {
03213 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
03214 regs, size);
03215 }
03216
03217
03218
03219
03220
03221
03222
03223
03224
03225
03226
03227
03228
03229
03230
03231
03232
03233
03234
03235
03236
03237
03238
03239 int
03240 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
03241 struct re_pattern_buffer *bufp;
03242 const char *string1, *string2;
03243 int size1, size2;
03244 int startpos;
03245 int range;
03246 struct re_registers *regs;
03247 int stop;
03248 {
03249 int val;
03250 register char *fastmap = bufp->fastmap;
03251 register char *translate = bufp->translate;
03252 int total_size = size1 + size2;
03253 int endpos = startpos + range;
03254
03255
03256 if (startpos < 0 || startpos > total_size)
03257 return -1;
03258
03259
03260
03261 if (endpos < -1)
03262 range = -1 - startpos;
03263 else if (endpos > total_size)
03264 range = total_size - startpos;
03265
03266
03267
03268 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
03269 {
03270 if (startpos > 0)
03271 return -1;
03272 else
03273 range = 1;
03274 }
03275
03276
03277 if (fastmap && !bufp->fastmap_accurate)
03278 if (re_compile_fastmap (bufp) == -2)
03279 return -2;
03280
03281
03282 for (;;)
03283 {
03284
03285
03286
03287
03288 if (fastmap && startpos < total_size && !bufp->can_be_null)
03289 {
03290 if (range > 0)
03291 {
03292 register const char *d;
03293 register int lim = 0;
03294 int irange = range;
03295
03296 if (startpos < size1 && startpos + range >= size1)
03297 lim = range - (size1 - startpos);
03298
03299 d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
03300
03301
03302
03303 if (translate)
03304 while (range > lim
03305 && !fastmap[(unsigned char)
03306 translate[(unsigned char) *d++]])
03307 range--;
03308 else
03309 while (range > lim && !fastmap[(unsigned char) *d++])
03310 range--;
03311
03312 startpos += irange - range;
03313 }
03314 else
03315 {
03316 register char c = (size1 == 0 || startpos >= size1
03317 ? string2[startpos - size1]
03318 : string1[startpos]);
03319
03320 if (!fastmap[(unsigned char) TRANSLATE (c)])
03321 goto advance;
03322 }
03323 }
03324
03325
03326 if (range >= 0 && startpos == total_size && fastmap
03327 && !bufp->can_be_null)
03328 return -1;
03329
03330 val = re_match_2_internal (bufp, string1, size1, string2, size2,
03331 startpos, regs, stop);
03332 #ifndef REGEX_MALLOC
03333 #ifdef C_ALLOCA
03334 alloca (0);
03335 #endif
03336 #endif
03337
03338 if (val >= 0)
03339 return startpos;
03340
03341 if (val == -2)
03342 return -2;
03343
03344 advance:
03345 if (!range)
03346 break;
03347 else if (range > 0)
03348 {
03349 range--;
03350 startpos++;
03351 }
03352 else
03353 {
03354 range++;
03355 startpos--;
03356 }
03357 }
03358 return -1;
03359 }
03360
03361
03362
03363 static int bcmp_translate ();
03364 static boolean alt_match_null_string_p (),
03365 common_op_match_null_string_p (),
03366 group_match_null_string_p ();
03367
03368
03369
03370 #define POINTER_TO_OFFSET(ptr) \
03371 (FIRST_STRING_P (ptr) \
03372 ? ((regoff_t) ((ptr) - string1)) \
03373 : ((regoff_t) ((ptr) - string2 + size1)))
03374
03375
03376
03377 #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
03378
03379
03380
03381 #define PREFETCH() \
03382 while (d == dend) \
03383 { \
03384 \
03385 if (dend == end_match_2) \
03386 goto fail; \
03387 \
03388 d = string2; \
03389 dend = end_match_2; \
03390 }
03391
03392
03393
03394
03395 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
03396 #define AT_STRINGS_END(d) ((d) == end2)
03397
03398
03399
03400
03401
03402
03403 #define WORDCHAR_P(d) \
03404 (SYNTAX ((d) == end1 ? *string2 \
03405 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
03406 == Sword)
03407
03408
03409
03410 #define AT_WORD_BOUNDARY(d) \
03411 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
03412 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
03413
03414
03415
03416 #ifdef MATCH_MAY_ALLOCATE
03417 #define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
03418 #define FREE_VARIABLES() \
03419 do { \
03420 REGEX_FREE_STACK (fail_stack.stack); \
03421 FREE_VAR (regstart); \
03422 FREE_VAR (regend); \
03423 FREE_VAR (old_regstart); \
03424 FREE_VAR (old_regend); \
03425 FREE_VAR (best_regstart); \
03426 FREE_VAR (best_regend); \
03427 FREE_VAR (reg_info); \
03428 FREE_VAR (reg_dummy); \
03429 FREE_VAR (reg_info_dummy); \
03430 } while (0)
03431 #else
03432 #define FREE_VARIABLES() ((void)0)
03433 #endif
03434
03435
03436
03437
03438
03439
03440
03441
03442 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
03443 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
03444
03445
03446
03447 #ifndef emacs
03448
03449
03450 int
03451 re_match (bufp, string, size, pos, regs)
03452 struct re_pattern_buffer *bufp;
03453 const char *string;
03454 int size, pos;
03455 struct re_registers *regs;
03456 {
03457 int result = re_match_2_internal (bufp, NULL, 0, string, size,
03458 pos, regs, size);
03459 #ifndef REGEX_MALLOC
03460 alloca (0);
03461 #endif
03462 return result;
03463 }
03464 #endif
03465
03466
03467
03468
03469
03470
03471
03472
03473
03474
03475
03476
03477
03478
03479
03480 int
03481 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
03482 struct re_pattern_buffer *bufp;
03483 const char *string1, *string2;
03484 int size1, size2;
03485 int pos;
03486 struct re_registers *regs;
03487 int stop;
03488 {
03489 int result = re_match_2_internal (bufp, string1, size1, string2, size2,
03490 pos, regs, stop);
03491 #ifndef REGEX_MALLOC
03492 alloca (0);
03493 #endif
03494 return result;
03495 }
03496
03497
03498
03499 static int
03500 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
03501 struct re_pattern_buffer *bufp;
03502 const char *string1, *string2;
03503 int size1, size2;
03504 int pos;
03505 struct re_registers *regs;
03506 int stop;
03507 {
03508
03509 int mcnt;
03510 unsigned char *p1;
03511
03512
03513 const char *end1, *end2;
03514
03515
03516
03517 const char *end_match_1, *end_match_2;
03518
03519
03520 const char *d, *dend;
03521
03522
03523 unsigned char *p = bufp->buffer;
03524 register unsigned char *pend = p + bufp->used;
03525
03526
03527
03528 unsigned char *just_past_start_mem = 0;
03529
03530
03531 char *translate = bufp->translate;
03532
03533
03534
03535
03536
03537
03538
03539
03540
03541
03542 #ifdef MATCH_MAY_ALLOCATE
03543 fail_stack_type fail_stack;
03544 #endif
03545 #ifdef DEBUG
03546 static unsigned failure_id = 0;
03547 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
03548 #endif
03549
03550 #ifdef REL_ALLOC
03551
03552
03553 fail_stack_elt_t *failure_stack_ptr;
03554 #endif
03555
03556
03557
03558
03559 unsigned num_regs = bufp->re_nsub + 1;
03560
03561
03562 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
03563 unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
03564
03565
03566
03567
03568
03569
03570
03571
03572 #ifdef MATCH_MAY_ALLOCATE
03573 const char **regstart, **regend;
03574 #endif
03575
03576
03577
03578
03579
03580
03581 #ifdef MATCH_MAY_ALLOCATE
03582 const char **old_regstart, **old_regend;
03583 #endif
03584
03585
03586
03587
03588
03589
03590
03591 #ifdef MATCH_MAY_ALLOCATE
03592 register_info_type *reg_info;
03593 #endif
03594
03595
03596
03597
03598
03599 unsigned best_regs_set = false;
03600 #ifdef MATCH_MAY_ALLOCATE
03601 const char **best_regstart, **best_regend;
03602 #endif
03603
03604
03605
03606
03607
03608
03609
03610
03611
03612 const char *match_end = NULL;
03613
03614
03615 int set_regs_matched_done = 0;
03616
03617
03618 #ifdef MATCH_MAY_ALLOCATE
03619 const char **reg_dummy;
03620 register_info_type *reg_info_dummy;
03621 #endif
03622
03623 #ifdef DEBUG
03624
03625 unsigned num_regs_pushed = 0;
03626 #endif
03627
03628 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
03629
03630 INIT_FAIL_STACK ();
03631
03632 #ifdef MATCH_MAY_ALLOCATE
03633
03634
03635
03636
03637
03638 if (bufp->re_nsub)
03639 {
03640 regstart = REGEX_TALLOC (num_regs, const char *);
03641 regend = REGEX_TALLOC (num_regs, const char *);
03642 old_regstart = REGEX_TALLOC (num_regs, const char *);
03643 old_regend = REGEX_TALLOC (num_regs, const char *);
03644 best_regstart = REGEX_TALLOC (num_regs, const char *);
03645 best_regend = REGEX_TALLOC (num_regs, const char *);
03646 reg_info = REGEX_TALLOC (num_regs, register_info_type);
03647 reg_dummy = REGEX_TALLOC (num_regs, const char *);
03648 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
03649
03650 if (!(regstart && regend && old_regstart && old_regend && reg_info
03651 && best_regstart && best_regend && reg_dummy && reg_info_dummy))
03652 {
03653 FREE_VARIABLES ();
03654 return -2;
03655 }
03656 }
03657 else
03658 {
03659
03660
03661 regstart = regend = old_regstart = old_regend = best_regstart
03662 = best_regend = reg_dummy = NULL;
03663 reg_info = reg_info_dummy = (register_info_type *) NULL;
03664 }
03665 #endif
03666
03667
03668 if (pos < 0 || pos > size1 + size2)
03669 {
03670 FREE_VARIABLES ();
03671 return -1;
03672 }
03673
03674
03675
03676
03677 for (mcnt = 1; mcnt < num_regs; mcnt++)
03678 {
03679 regstart[mcnt] = regend[mcnt]
03680 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
03681
03682 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
03683 IS_ACTIVE (reg_info[mcnt]) = 0;
03684 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
03685 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
03686 }
03687
03688
03689
03690 if (size2 == 0 && string1 != NULL)
03691 {
03692 string2 = string1;
03693 size2 = size1;
03694 string1 = 0;
03695 size1 = 0;
03696 }
03697 end1 = string1 + size1;
03698 end2 = string2 + size2;
03699
03700
03701 if (stop <= size1)
03702 {
03703 end_match_1 = string1 + stop;
03704 end_match_2 = string2;
03705 }
03706 else
03707 {
03708 end_match_1 = end1;
03709 end_match_2 = string2 + stop - size1;
03710 }
03711
03712
03713
03714
03715
03716
03717
03718 if (size1 > 0 && pos <= size1)
03719 {
03720 d = string1 + pos;
03721 dend = end_match_1;
03722 }
03723 else
03724 {
03725 d = string2 + pos - size1;
03726 dend = end_match_2;
03727 }
03728
03729 DEBUG_PRINT1 ("The compiled pattern is: ");
03730 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
03731 DEBUG_PRINT1 ("The string to match is: `");
03732 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
03733 DEBUG_PRINT1 ("'\n");
03734
03735
03736
03737
03738 for (;;)
03739 {
03740 DEBUG_PRINT2 ("\n0x%x: ", p);
03741
03742 if (p == pend)
03743 {
03744 DEBUG_PRINT1 ("end of pattern ... ");
03745
03746
03747
03748 if (d != end_match_2)
03749 {
03750
03751
03752 boolean same_str_p = (FIRST_STRING_P (match_end)
03753 == MATCHING_IN_FIRST_STRING);
03754
03755 boolean best_match_p;
03756
03757
03758
03759 if (same_str_p)
03760 best_match_p = d > match_end;
03761 else
03762 best_match_p = !MATCHING_IN_FIRST_STRING;
03763
03764 DEBUG_PRINT1 ("backtracking.\n");
03765
03766 if (!FAIL_STACK_EMPTY ())
03767 {
03768
03769
03770 if (!best_regs_set || best_match_p)
03771 {
03772 best_regs_set = true;
03773 match_end = d;
03774
03775 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
03776
03777 for (mcnt = 1; mcnt < num_regs; mcnt++)
03778 {
03779 best_regstart[mcnt] = regstart[mcnt];
03780 best_regend[mcnt] = regend[mcnt];
03781 }
03782 }
03783 goto fail;
03784 }
03785
03786
03787
03788
03789 else if (best_regs_set && !best_match_p)
03790 {
03791 restore_best_regs:
03792
03793
03794
03795
03796
03797 DEBUG_PRINT1 ("Restoring best registers.\n");
03798
03799 d = match_end;
03800 dend = ((d >= string1 && d <= end1)
03801 ? end_match_1 : end_match_2);
03802
03803 for (mcnt = 1; mcnt < num_regs; mcnt++)
03804 {
03805 regstart[mcnt] = best_regstart[mcnt];
03806 regend[mcnt] = best_regend[mcnt];
03807 }
03808 }
03809 }
03810
03811 succeed_label:
03812 DEBUG_PRINT1 ("Accepting match.\n");
03813
03814
03815 if (regs && !bufp->no_sub)
03816 {
03817
03818 if (bufp->regs_allocated == REGS_UNALLOCATED)
03819 {
03820
03821
03822 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
03823 regs->start = TALLOC (regs->num_regs, regoff_t);
03824 regs->end = TALLOC (regs->num_regs, regoff_t);
03825 if (regs->start == NULL || regs->end == NULL)
03826 {
03827 FREE_VARIABLES ();
03828 return -2;
03829 }
03830 bufp->regs_allocated = REGS_REALLOCATE;
03831 }
03832 else if (bufp->regs_allocated == REGS_REALLOCATE)
03833 {
03834
03835
03836 if (regs->num_regs < num_regs + 1)
03837 {
03838 regs->num_regs = num_regs + 1;
03839 RETALLOC (regs->start, regs->num_regs, regoff_t);
03840 RETALLOC (regs->end, regs->num_regs, regoff_t);
03841 if (regs->start == NULL || regs->end == NULL)
03842 {
03843 FREE_VARIABLES ();
03844 return -2;
03845 }
03846 }
03847 }
03848 else
03849 {
03850
03851
03852 assert (bufp->regs_allocated == REGS_FIXED);
03853 }
03854
03855
03856
03857
03858 if (regs->num_regs > 0)
03859 {
03860 regs->start[0] = pos;
03861 regs->end[0] = (MATCHING_IN_FIRST_STRING
03862 ? ((regoff_t) (d - string1))
03863 : ((regoff_t) (d - string2 + size1)));
03864 }
03865
03866
03867
03868 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
03869 {
03870 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
03871 regs->start[mcnt] = regs->end[mcnt] = -1;
03872 else
03873 {
03874 regs->start[mcnt]
03875 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
03876 regs->end[mcnt]
03877 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
03878 }
03879 }
03880
03881
03882
03883
03884
03885
03886 for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
03887 regs->start[mcnt] = regs->end[mcnt] = -1;
03888 }
03889
03890 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
03891 nfailure_points_pushed, nfailure_points_popped,
03892 nfailure_points_pushed - nfailure_points_popped);
03893 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
03894
03895 mcnt = d - pos - (MATCHING_IN_FIRST_STRING
03896 ? string1
03897 : string2 - size1);
03898
03899 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
03900
03901 FREE_VARIABLES ();
03902 return mcnt;
03903 }
03904
03905
03906 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
03907 {
03908
03909
03910 case no_op:
03911 DEBUG_PRINT1 ("EXECUTING no_op.\n");
03912 break;
03913
03914 case succeed:
03915 DEBUG_PRINT1 ("EXECUTING succeed.\n");
03916 goto succeed_label;
03917
03918
03919
03920
03921 case exactn:
03922 mcnt = *p++;
03923 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
03924
03925
03926
03927 if (translate)
03928 {
03929 do
03930 {
03931 PREFETCH ();
03932 if (translate[(unsigned char) *d++] != (char) *p++)
03933 goto fail;
03934 }
03935 while (--mcnt);
03936 }
03937 else
03938 {
03939 do
03940 {
03941 PREFETCH ();
03942 if (*d++ != (char) *p++) goto fail;
03943 }
03944 while (--mcnt);
03945 }
03946 SET_REGS_MATCHED ();
03947 break;
03948
03949
03950
03951 case anychar:
03952 DEBUG_PRINT1 ("EXECUTING anychar.\n");
03953
03954 PREFETCH ();
03955
03956 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
03957 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
03958 goto fail;
03959
03960 SET_REGS_MATCHED ();
03961 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
03962 d++;
03963 break;
03964
03965
03966 case charset:
03967 case charset_not:
03968 {
03969 register unsigned char c;
03970 boolean not = (re_opcode_t) *(p - 1) == charset_not;
03971
03972 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
03973
03974 PREFETCH ();
03975 c = TRANSLATE (*d);
03976
03977
03978
03979 if (c < (unsigned) (*p * BYTEWIDTH)
03980 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
03981 not = !not;
03982
03983 p += 1 + *p;
03984
03985 if (!not) goto fail;
03986
03987 SET_REGS_MATCHED ();
03988 d++;
03989 break;
03990 }
03991
03992
03993
03994
03995
03996
03997
03998 case start_memory:
03999 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
04000
04001
04002 p1 = p;
04003
04004 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
04005 REG_MATCH_NULL_STRING_P (reg_info[*p])
04006 = group_match_null_string_p (&p1, pend, reg_info);
04007
04008
04009
04010
04011
04012
04013 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
04014 ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
04015 : regstart[*p];
04016 DEBUG_PRINT2 (" old_regstart: %d\n",
04017 POINTER_TO_OFFSET (old_regstart[*p]));
04018
04019 regstart[*p] = d;
04020 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
04021
04022 IS_ACTIVE (reg_info[*p]) = 1;
04023 MATCHED_SOMETHING (reg_info[*p]) = 0;
04024
04025
04026 set_regs_matched_done = 0;
04027
04028
04029 highest_active_reg = *p;
04030
04031
04032
04033 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
04034 lowest_active_reg = *p;
04035
04036
04037 p += 2;
04038 just_past_start_mem = p;
04039
04040 break;
04041
04042
04043
04044
04045
04046 case stop_memory:
04047 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
04048
04049
04050
04051
04052
04053
04054 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
04055 ? REG_UNSET (regend[*p]) ? d : regend[*p]
04056 : regend[*p];
04057 DEBUG_PRINT2 (" old_regend: %d\n",
04058 POINTER_TO_OFFSET (old_regend[*p]));
04059
04060 regend[*p] = d;
04061 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
04062
04063
04064 IS_ACTIVE (reg_info[*p]) = 0;
04065
04066
04067 set_regs_matched_done = 0;
04068
04069
04070
04071 if (lowest_active_reg == highest_active_reg)
04072 {
04073 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
04074 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
04075 }
04076 else
04077 {
04078
04079
04080
04081 unsigned char r = *p - 1;
04082 while (r > 0 && !IS_ACTIVE (reg_info[r]))
04083 r--;
04084
04085
04086
04087
04088
04089
04090
04091
04092 if (r == 0)
04093 {
04094 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
04095 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
04096 }
04097 else
04098 highest_active_reg = r;
04099 }
04100
04101
04102
04103
04104
04105
04106 if ((!MATCHED_SOMETHING (reg_info[*p])
04107 || just_past_start_mem == p - 1)
04108 && (p + 2) < pend)
04109 {
04110 boolean is_a_jump_n = false;
04111
04112 p1 = p + 2;
04113 mcnt = 0;
04114 switch ((re_opcode_t) *p1++)
04115 {
04116 case jump_n:
04117 is_a_jump_n = true;
04118 case pop_failure_jump:
04119 case maybe_pop_jump:
04120 case jump:
04121 case dummy_failure_jump:
04122 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04123 if (is_a_jump_n)
04124 p1 += 2;
04125 break;
04126
04127 default:
04128 ;
04129 }
04130 p1 += mcnt;
04131
04132
04133
04134
04135
04136
04137 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
04138 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
04139 {
04140
04141
04142
04143
04144
04145
04146
04147
04148
04149
04150 if (EVER_MATCHED_SOMETHING (reg_info[*p]))
04151 {
04152 unsigned r;
04153
04154 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
04155
04156
04157 for (r = *p; r < *p + *(p + 1); r++)
04158 {
04159 regstart[r] = old_regstart[r];
04160
04161
04162 if (old_regend[r] >= regstart[r])
04163 regend[r] = old_regend[r];
04164 }
04165 }
04166 p1++;
04167 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04168 PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
04169
04170 goto fail;
04171 }
04172 }
04173
04174
04175 p += 2;
04176 break;
04177
04178
04179
04180
04181 case duplicate:
04182 {
04183 register const char *d2, *dend2;
04184 int regno = *p++;
04185 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
04186
04187
04188 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
04189 goto fail;
04190
04191
04192 d2 = regstart[regno];
04193
04194
04195
04196
04197
04198
04199 dend2 = ((FIRST_STRING_P (regstart[regno])
04200 == FIRST_STRING_P (regend[regno]))
04201 ? regend[regno] : end_match_1);
04202 for (;;)
04203 {
04204
04205
04206 while (d2 == dend2)
04207 {
04208 if (dend2 == end_match_2) break;
04209 if (dend2 == regend[regno]) break;
04210
04211
04212 d2 = string2;
04213 dend2 = regend[regno];
04214 }
04215
04216 if (d2 == dend2) break;
04217
04218
04219 PREFETCH ();
04220
04221
04222 mcnt = dend - d;
04223
04224
04225
04226 if (mcnt > dend2 - d2)
04227 mcnt = dend2 - d2;
04228
04229
04230
04231 if (translate
04232 ? bcmp_translate (d, d2, mcnt, translate)
04233 : bcmp (d, d2, mcnt))
04234 goto fail;
04235 d += mcnt, d2 += mcnt;
04236
04237
04238 SET_REGS_MATCHED ();
04239 }
04240 }
04241 break;
04242
04243
04244
04245
04246
04247 case begline:
04248 DEBUG_PRINT1 ("EXECUTING begline.\n");
04249
04250 if (AT_STRINGS_BEG (d))
04251 {
04252 if (!bufp->not_bol) break;
04253 }
04254 else if (d[-1] == '\n' && bufp->newline_anchor)
04255 {
04256 break;
04257 }
04258
04259 goto fail;
04260
04261
04262
04263 case endline:
04264 DEBUG_PRINT1 ("EXECUTING endline.\n");
04265
04266 if (AT_STRINGS_END (d))
04267 {
04268 if (!bufp->not_eol) break;
04269 }
04270
04271
04272 else if ((d == end1 ? *string2 : *d) == '\n'
04273 && bufp->newline_anchor)
04274 {
04275 break;
04276 }
04277 goto fail;
04278
04279
04280
04281 case begbuf:
04282 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
04283 if (AT_STRINGS_BEG (d))
04284 break;
04285 goto fail;
04286
04287
04288
04289 case endbuf:
04290 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
04291 if (AT_STRINGS_END (d))
04292 break;
04293 goto fail;
04294
04295
04296
04297
04298
04299
04300
04301
04302
04303
04304
04305
04306
04307
04308
04309
04310
04311
04312 case on_failure_keep_string_jump:
04313 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
04314
04315 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04316 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
04317
04318 PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
04319 break;
04320
04321
04322
04323
04324
04325
04326
04327
04328
04329
04330
04331
04332
04333
04334 case on_failure_jump:
04335 on_failure:
04336 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
04337
04338 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04339 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
04340
04341
04342
04343
04344
04345
04346
04347
04348
04349
04350 p1 = p;
04351
04352
04353
04354
04355
04356 while (p1 < pend && (re_opcode_t) *p1 == no_op)
04357 p1++;
04358
04359 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
04360 {
04361
04362
04363
04364
04365 highest_active_reg = *(p1 + 1) + *(p1 + 2);
04366 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
04367 lowest_active_reg = *(p1 + 1);
04368 }
04369
04370 DEBUG_PRINT1 (":\n");
04371 PUSH_FAILURE_POINT (p + mcnt, d, -2);
04372 break;
04373
04374
04375
04376
04377 case maybe_pop_jump:
04378 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04379 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
04380 {
04381 register unsigned char *p2 = p;
04382
04383
04384
04385
04386
04387
04388
04389
04390
04391
04392
04393
04394
04395
04396
04397
04398
04399
04400 while (1)
04401 {
04402 if (p2 + 2 < pend
04403 && ((re_opcode_t) *p2 == stop_memory
04404 || (re_opcode_t) *p2 == start_memory))
04405 p2 += 3;
04406 else if (p2 + 6 < pend
04407 && (re_opcode_t) *p2 == dummy_failure_jump)
04408 p2 += 6;
04409 else
04410 break;
04411 }
04412
04413 p1 = p + mcnt;
04414
04415
04416
04417
04418
04419 if (p2 == pend)
04420 {
04421
04422
04423
04424 p[-3] = (unsigned char) pop_failure_jump;
04425 DEBUG_PRINT1
04426 (" End of pattern: change to `pop_failure_jump'.\n");
04427 }
04428
04429 else if ((re_opcode_t) *p2 == exactn
04430 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
04431 {
04432 register unsigned char c
04433 = *p2 == (unsigned char) endline ? '\n' : p2[2];
04434
04435 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
04436 {
04437 p[-3] = (unsigned char) pop_failure_jump;
04438 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
04439 c, p1[5]);
04440 }
04441
04442 else if ((re_opcode_t) p1[3] == charset
04443 || (re_opcode_t) p1[3] == charset_not)
04444 {
04445 int not = (re_opcode_t) p1[3] == charset_not;
04446
04447 if (c < (unsigned char) (p1[4] * BYTEWIDTH)
04448 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
04449 not = !not;
04450
04451
04452
04453 if (!not)
04454 {
04455 p[-3] = (unsigned char) pop_failure_jump;
04456 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
04457 }
04458 }
04459 }
04460 else if ((re_opcode_t) *p2 == charset)
04461 {
04462 #ifdef DEBUG
04463 register unsigned char c
04464 = *p2 == (unsigned char) endline ? '\n' : p2[2];
04465 #endif
04466
04467 if ((re_opcode_t) p1[3] == exactn
04468 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
04469 && (p2[1 + p1[4] / BYTEWIDTH]
04470 & (1 << (p1[4] % BYTEWIDTH)))))
04471 {
04472 p[-3] = (unsigned char) pop_failure_jump;
04473 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
04474 c, p1[5]);
04475 }
04476
04477 else if ((re_opcode_t) p1[3] == charset_not)
04478 {
04479 int idx;
04480
04481
04482 for (idx = 0; idx < (int) p2[1]; idx++)
04483 if (! (p2[2 + idx] == 0
04484 || (idx < (int) p1[4]
04485 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
04486 break;
04487
04488 if (idx == p2[1])
04489 {
04490 p[-3] = (unsigned char) pop_failure_jump;
04491 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
04492 }
04493 }
04494 else if ((re_opcode_t) p1[3] == charset)
04495 {
04496 int idx;
04497
04498
04499 for (idx = 0;
04500 idx < (int) p2[1] && idx < (int) p1[4];
04501 idx++)
04502 if ((p2[2 + idx] & p1[5 + idx]) != 0)
04503 break;
04504
04505 if (idx == p2[1] || idx == p1[4])
04506 {
04507 p[-3] = (unsigned char) pop_failure_jump;
04508 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
04509 }
04510 }
04511 }
04512 }
04513 p -= 2;
04514 if ((re_opcode_t) p[-1] != pop_failure_jump)
04515 {
04516 p[-1] = (unsigned char) jump;
04517 DEBUG_PRINT1 (" Match => jump.\n");
04518 goto unconditional_jump;
04519 }
04520
04521
04522
04523
04524
04525
04526
04527
04528
04529 case pop_failure_jump:
04530 {
04531
04532
04533
04534
04535
04536 unsigned dummy_low_reg, dummy_high_reg;
04537 unsigned char *pdummy;
04538 const char *sdummy;
04539
04540 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
04541 POP_FAILURE_POINT (sdummy, pdummy,
04542 dummy_low_reg, dummy_high_reg,
04543 reg_dummy, reg_dummy, reg_info_dummy);
04544 }
04545
04546
04547
04548
04549 case jump:
04550 unconditional_jump:
04551 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04552 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
04553 p += mcnt;
04554 DEBUG_PRINT2 ("(to 0x%x).\n", p);
04555 break;
04556
04557
04558
04559
04560 case jump_past_alt:
04561 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
04562 goto unconditional_jump;
04563
04564
04565
04566
04567
04568
04569
04570 case dummy_failure_jump:
04571 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
04572
04573
04574 PUSH_FAILURE_POINT (0, 0, -2);
04575 goto unconditional_jump;
04576
04577
04578
04579
04580
04581
04582
04583 case push_dummy_failure:
04584 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
04585
04586
04587 PUSH_FAILURE_POINT (0, 0, -2);
04588 break;
04589
04590
04591
04592 case succeed_n:
04593 EXTRACT_NUMBER (mcnt, p + 2);
04594 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
04595
04596 assert (mcnt >= 0);
04597
04598 if (mcnt > 0)
04599 {
04600 mcnt--;
04601 p += 2;
04602 STORE_NUMBER_AND_INCR (p, mcnt);
04603 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt);
04604 }
04605 else if (mcnt == 0)
04606 {
04607 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
04608 p[2] = (unsigned char) no_op;
04609 p[3] = (unsigned char) no_op;
04610 goto on_failure;
04611 }
04612 break;
04613
04614 case jump_n:
04615 EXTRACT_NUMBER (mcnt, p + 2);
04616 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
04617
04618
04619 if (mcnt)
04620 {
04621 mcnt--;
04622 STORE_NUMBER (p + 2, mcnt);
04623 goto unconditional_jump;
04624 }
04625
04626 else
04627 p += 4;
04628 break;
04629
04630 case set_number_at:
04631 {
04632 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
04633
04634 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04635 p1 = p + mcnt;
04636 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04637 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
04638 STORE_NUMBER (p1, mcnt);
04639 break;
04640 }
04641
04642 case wordbound:
04643 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
04644 if (AT_WORD_BOUNDARY (d))
04645 break;
04646 goto fail;
04647
04648 case notwordbound:
04649 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
04650 if (AT_WORD_BOUNDARY (d))
04651 goto fail;
04652 break;
04653
04654 case wordbeg:
04655 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
04656 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
04657 break;
04658 goto fail;
04659
04660 case wordend:
04661 DEBUG_PRINT1 ("EXECUTING wordend.\n");
04662 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
04663 && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
04664 break;
04665 goto fail;
04666
04667 #ifdef emacs
04668 case before_dot:
04669 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
04670 if (PTR_CHAR_POS ((unsigned char *) d) >= point)
04671 goto fail;
04672 break;
04673
04674 case at_dot:
04675 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
04676 if (PTR_CHAR_POS ((unsigned char *) d) != point)
04677 goto fail;
04678 break;
04679
04680 case after_dot:
04681 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
04682 if (PTR_CHAR_POS ((unsigned char *) d) <= point)
04683 goto fail;
04684 break;
04685 #if 0
04686 case at_dot:
04687 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
04688 if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
04689 goto fail;
04690 break;
04691 #endif
04692
04693 case syntaxspec:
04694 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
04695 mcnt = *p++;
04696 goto matchsyntax;
04697
04698 case wordchar:
04699 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
04700 mcnt = (int) Sword;
04701 matchsyntax:
04702 PREFETCH ();
04703
04704 d++;
04705 if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
04706 goto fail;
04707 SET_REGS_MATCHED ();
04708 break;
04709
04710 case notsyntaxspec:
04711 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
04712 mcnt = *p++;
04713 goto matchnotsyntax;
04714
04715 case notwordchar:
04716 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
04717 mcnt = (int) Sword;
04718 matchnotsyntax:
04719 PREFETCH ();
04720
04721 d++;
04722 if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
04723 goto fail;
04724 SET_REGS_MATCHED ();
04725 break;
04726
04727 #else
04728 case wordchar:
04729 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
04730 PREFETCH ();
04731 if (!WORDCHAR_P (d))
04732 goto fail;
04733 SET_REGS_MATCHED ();
04734 d++;
04735 break;
04736
04737 case notwordchar:
04738 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
04739 PREFETCH ();
04740 if (WORDCHAR_P (d))
04741 goto fail;
04742 SET_REGS_MATCHED ();
04743 d++;
04744 break;
04745 #endif
04746
04747 default:
04748 abort ();
04749 }
04750 continue;
04751
04752
04753
04754 fail:
04755 if (!FAIL_STACK_EMPTY ())
04756 {
04757 DEBUG_PRINT1 ("\nFAIL:\n");
04758 POP_FAILURE_POINT (d, p,
04759 lowest_active_reg, highest_active_reg,
04760 regstart, regend, reg_info);
04761
04762
04763 if (!p)
04764 goto fail;
04765
04766
04767 assert (p <= pend);
04768 if (p < pend)
04769 {
04770 boolean is_a_jump_n = false;
04771
04772
04773
04774 switch ((re_opcode_t) *p)
04775 {
04776 case jump_n:
04777 is_a_jump_n = true;
04778 case maybe_pop_jump:
04779 case pop_failure_jump:
04780 case jump:
04781 p1 = p + 1;
04782 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04783 p1 += mcnt;
04784
04785 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
04786 || (!is_a_jump_n
04787 && (re_opcode_t) *p1 == on_failure_jump))
04788 goto fail;
04789 break;
04790 default:
04791 ;
04792 }
04793 }
04794
04795 if (d >= string1 && d <= end1)
04796 dend = end_match_1;
04797 }
04798 else
04799 break;
04800 }
04801
04802 if (best_regs_set)
04803 goto restore_best_regs;
04804
04805 FREE_VARIABLES ();
04806
04807 return -1;
04808 }
04809
04810
04811
04812
04813
04814
04815
04816
04817
04818
04819
04820
04821
04822
04823 static boolean
04824 group_match_null_string_p (p, end, reg_info)
04825 unsigned char **p, *end;
04826 register_info_type *reg_info;
04827 {
04828 int mcnt;
04829
04830 unsigned char *p1 = *p + 2;
04831
04832 while (p1 < end)
04833 {
04834
04835
04836
04837
04838 switch ((re_opcode_t) *p1)
04839 {
04840
04841 case on_failure_jump:
04842 p1++;
04843 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04844
04845
04846
04847
04848 if (mcnt >= 0)
04849 {
04850
04851
04852
04853
04854
04855
04856
04857
04858
04859
04860
04861
04862
04863
04864
04865
04866
04867
04868 while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
04869 {
04870
04871
04872
04873
04874 if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
04875 reg_info))
04876 return false;
04877
04878
04879
04880 p1 += mcnt;
04881
04882
04883
04884 if ((re_opcode_t) *p1 != on_failure_jump)
04885 break;
04886
04887
04888
04889 p1++;
04890 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04891 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
04892 {
04893
04894 p1 -= 3;
04895 break;
04896 }
04897 }
04898
04899
04900
04901
04902 EXTRACT_NUMBER (mcnt, p1 - 2);
04903
04904 if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
04905 return false;
04906
04907 p1 += mcnt;
04908 }
04909 break;
04910
04911
04912 case stop_memory:
04913 assert (p1[1] == **p);
04914 *p = p1 + 2;
04915 return true;
04916
04917
04918 default:
04919 if (!common_op_match_null_string_p (&p1, end, reg_info))
04920 return false;
04921 }
04922 }
04923
04924 return false;
04925 }
04926
04927
04928
04929
04930
04931
04932 static boolean
04933 alt_match_null_string_p (p, end, reg_info)
04934 unsigned char *p, *end;
04935 register_info_type *reg_info;
04936 {
04937 int mcnt;
04938 unsigned char *p1 = p;
04939
04940 while (p1 < end)
04941 {
04942
04943
04944
04945 switch ((re_opcode_t) *p1)
04946 {
04947
04948 case on_failure_jump:
04949 p1++;
04950 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04951 p1 += mcnt;
04952 break;
04953
04954 default:
04955 if (!common_op_match_null_string_p (&p1, end, reg_info))
04956 return false;
04957 }
04958 }
04959
04960 return true;
04961 }
04962
04963
04964
04965
04966
04967
04968
04969 static boolean
04970 common_op_match_null_string_p (p, end, reg_info)
04971 unsigned char **p, *end;
04972 register_info_type *reg_info;
04973 {
04974 int mcnt;
04975 boolean ret;
04976 int reg_no;
04977 unsigned char *p1 = *p;
04978
04979 switch ((re_opcode_t) *p1++)
04980 {
04981 case no_op:
04982 case begline:
04983 case endline:
04984 case begbuf:
04985 case endbuf:
04986 case wordbeg:
04987 case wordend:
04988 case wordbound:
04989 case notwordbound:
04990 #ifdef emacs
04991 case before_dot:
04992 case at_dot:
04993 case after_dot:
04994 #endif
04995 break;
04996
04997 case start_memory:
04998 reg_no = *p1;
04999 assert (reg_no > 0 && reg_no <= MAX_REGNUM);
05000 ret = group_match_null_string_p (&p1, end, reg_info);
05001
05002
05003
05004
05005 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
05006 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
05007
05008 if (!ret)
05009 return false;
05010 break;
05011
05012
05013 case jump:
05014 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05015 if (mcnt >= 0)
05016 p1 += mcnt;
05017 else
05018 return false;
05019 break;
05020
05021 case succeed_n:
05022
05023 p1 += 2;
05024 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05025
05026 if (mcnt == 0)
05027 {
05028 p1 -= 4;
05029 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05030 p1 += mcnt;
05031 }
05032 else
05033 return false;
05034 break;
05035
05036 case duplicate:
05037 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
05038 return false;
05039 break;
05040
05041 case set_number_at:
05042 p1 += 4;
05043
05044 default:
05045
05046 return false;
05047 }
05048
05049 *p = p1;
05050 return true;
05051 }
05052
05053
05054
05055
05056
05057 static int
05058 bcmp_translate (s1, s2, len, translate)
05059 unsigned char *s1, *s2;
05060 register int len;
05061 char *translate;
05062 {
05063 register unsigned char *p1 = s1, *p2 = s2;
05064 while (len)
05065 {
05066 if (translate[*p1++] != translate[*p2++]) return 1;
05067 len--;
05068 }
05069 return 0;
05070 }
05071
05072
05073
05074
05075
05076
05077
05078
05079
05080
05081
05082
05083 const char *
05084 re_compile_pattern (pattern, length, bufp)
05085 const char *pattern;
05086 int length;
05087 struct re_pattern_buffer *bufp;
05088 {
05089 reg_errcode_t ret;
05090
05091
05092
05093 bufp->regs_allocated = REGS_UNALLOCATED;
05094
05095
05096
05097
05098 bufp->no_sub = 0;
05099
05100
05101 bufp->newline_anchor = 1;
05102
05103 ret = regex_compile (pattern, length, re_syntax_options, bufp);
05104
05105 if (!ret)
05106 return NULL;
05107 return gettext (re_error_msgid[(int) ret]);
05108 }
05109
05110
05111
05112
05113 #ifdef _REGEX_RE_COMP
05114
05115
05116 static struct re_pattern_buffer re_comp_buf;
05117
05118 char *
05119 re_comp (s)
05120 const char *s;
05121 {
05122 reg_errcode_t ret;
05123
05124 if (!s)
05125 {
05126 if (!re_comp_buf.buffer)
05127 return gettext ("No previous regular expression");
05128 return 0;
05129 }
05130
05131 if (!re_comp_buf.buffer)
05132 {
05133 re_comp_buf.buffer = (unsigned char *) malloc (200);
05134 if (re_comp_buf.buffer == NULL)
05135 return gettext (re_error_msgid[(int) REG_ESPACE]);
05136 re_comp_buf.allocated = 200;
05137
05138 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
05139 if (re_comp_buf.fastmap == NULL)
05140 return gettext (re_error_msgid[(int) REG_ESPACE]);
05141 }
05142
05143
05144
05145
05146
05147 re_comp_buf.newline_anchor = 1;
05148
05149 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
05150
05151 if (!ret)
05152 return NULL;
05153
05154
05155 return (char *) gettext (re_error_msgid[(int) ret]);
05156 }
05157
05158
05159 int
05160 re_exec (s)
05161 const char *s;
05162 {
05163 const int len = strlen (s);
05164 return
05165 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
05166 }
05167 #endif
05168
05169
05170
05171 #ifndef emacs
05172
05173
05174
05175
05176
05177
05178
05179
05180
05181
05182
05183
05184
05185
05186
05187
05188
05189
05190
05191
05192
05193
05194
05195
05196
05197
05198
05199
05200
05201
05202
05203
05204
05205
05206
05207 int
05208 regcomp (preg, pattern, cflags)
05209 regex_t *preg;
05210 const char *pattern;
05211 int cflags;
05212 {
05213 reg_errcode_t ret;
05214 unsigned syntax
05215 = (cflags & REG_EXTENDED) ?
05216 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
05217
05218
05219 preg->buffer = 0;
05220 preg->allocated = 0;
05221 preg->used = 0;
05222
05223
05224
05225
05226
05227 preg->fastmap = 0;
05228
05229 if (cflags & REG_ICASE)
05230 {
05231 unsigned i;
05232
05233 preg->translate = (char *) malloc (CHAR_SET_SIZE);
05234 if (preg->translate == NULL)
05235 return (int) REG_ESPACE;
05236
05237
05238 for (i = 0; i < CHAR_SET_SIZE; i++)
05239 preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
05240 }
05241 else
05242 preg->translate = NULL;
05243
05244
05245 if (cflags & REG_NEWLINE)
05246 {
05247 syntax &= ~RE_DOT_NEWLINE;
05248 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
05249
05250 preg->newline_anchor = 1;
05251 }
05252 else
05253 preg->newline_anchor = 0;
05254
05255 preg->no_sub = !!(cflags & REG_NOSUB);
05256
05257
05258
05259 ret = regex_compile (pattern, strlen (pattern), syntax, preg);
05260
05261
05262
05263 if (ret == REG_ERPAREN) ret = REG_EPAREN;
05264
05265 return (int) ret;
05266 }
05267
05268
05269
05270
05271
05272
05273
05274
05275
05276
05277
05278
05279
05280
05281
05282
05283 int
05284 regexec (preg, string, nmatch, pmatch, eflags)
05285 const regex_t *preg;
05286 const char *string;
05287 size_t nmatch;
05288 regmatch_t pmatch[];
05289 int eflags;
05290 {
05291 int ret;
05292 struct re_registers regs;
05293 regex_t private_preg;
05294 int len = strlen (string);
05295 boolean want_reg_info = !preg->no_sub && nmatch > 0;
05296
05297 private_preg = *preg;
05298
05299 private_preg.not_bol = !!(eflags & REG_NOTBOL);
05300 private_preg.not_eol = !!(eflags & REG_NOTEOL);
05301
05302
05303
05304
05305 private_preg.regs_allocated = REGS_FIXED;
05306
05307 if (want_reg_info)
05308 {
05309 regs.num_regs = nmatch;
05310 regs.start = TALLOC (nmatch, regoff_t);
05311 regs.end = TALLOC (nmatch, regoff_t);
05312 if (regs.start == NULL || regs.end == NULL)
05313 return (int) REG_NOMATCH;
05314 }
05315
05316
05317 ret = re_search (&private_preg, string, len,
05318 0, len,
05319 want_reg_info ? ®s : (struct re_registers *) 0);
05320
05321
05322 if (want_reg_info)
05323 {
05324 if (ret >= 0)
05325 {
05326 unsigned r;
05327
05328 for (r = 0; r < nmatch; r++)
05329 {
05330 pmatch[r].rm_so = regs.start[r];
05331 pmatch[r].rm_eo = regs.end[r];
05332 }
05333 }
05334
05335
05336 free (regs.start);
05337 free (regs.end);
05338 }
05339
05340
05341 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
05342 }
05343
05344
05345
05346
05347
05348 size_t
05349 regerror (errcode, preg, errbuf, errbuf_size)
05350 int errcode;
05351 const regex_t *preg;
05352 char *errbuf;
05353 size_t errbuf_size;
05354 {
05355 const char *msg;
05356 size_t msg_size;
05357
05358 if (errcode < 0
05359 || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
05360
05361
05362
05363
05364 abort ();
05365
05366 msg = gettext (re_error_msgid[errcode]);
05367
05368 msg_size = strlen (msg) + 1;
05369
05370 if (errbuf_size != 0)
05371 {
05372 if (msg_size > errbuf_size)
05373 {
05374 strncpy (errbuf, msg, errbuf_size - 1);
05375 errbuf[errbuf_size - 1] = 0;
05376 }
05377 else
05378 strcpy (errbuf, msg);
05379 }
05380
05381 return msg_size;
05382 }
05383
05384
05385
05386
05387 void
05388 regfree (preg)
05389 regex_t *preg;
05390 {
05391 if (preg->buffer != NULL)
05392 free (preg->buffer);
05393 preg->buffer = NULL;
05394
05395 preg->allocated = 0;
05396 preg->used = 0;
05397
05398 if (preg->fastmap != NULL)
05399 free (preg->fastmap);
05400 preg->fastmap = NULL;
05401 preg->fastmap_accurate = 0;
05402
05403 if (preg->translate != NULL)
05404 free (preg->translate);
05405 preg->translate = NULL;
05406 }
05407
05408 #endif
05409
05410
05411
05412
05413
05414
05415
05416