diff options
author | Christian Heim <phreak@gentoo.org> | 2007-02-10 17:39:39 +0000 |
---|---|---|
committer | Christian Heim <phreak@gentoo.org> | 2007-02-10 17:39:39 +0000 |
commit | b455e90ac068f73535299ae05ee13f14c95becc5 (patch) | |
tree | 11952eb46ad95d2b6db6fd95d1f125514d6416e8 /2.0 | |
parent | Updating the external PCRE patch, missed the changes in srclib/pcre/internal.h. (diff) | |
download | apache-b455e90ac068f73535299ae05ee13f14c95becc5.tar.gz apache-b455e90ac068f73535299ae05ee13f14c95becc5.tar.bz2 apache-b455e90ac068f73535299ae05ee13f14c95becc5.zip |
Updating the external PCRE patch, this time for good!
Diffstat (limited to '2.0')
-rw-r--r-- | 2.0/patches/00_all_allow-external-pcre.patch | 859 |
1 files changed, 7 insertions, 852 deletions
diff --git a/2.0/patches/00_all_allow-external-pcre.patch b/2.0/patches/00_all_allow-external-pcre.patch index 73029e0..b9c980a 100644 --- a/2.0/patches/00_all_allow-external-pcre.patch +++ b/2.0/patches/00_all_allow-external-pcre.patch @@ -66,858 +66,13 @@ Index: httpd-2.0.59/configure.in dnl APR should go after the other libs, so the right symbols can be picked up AP_LIBS="$AP_LIBS `$apu_config --link-libtool --libs` `$apr_config --link-libtool --libs`" APACHE_SUBST(AP_LIBS) -Index: httpd-2.0.59/srclib/pcre/internal.h +Index: httpd-2.0.59/srclib/Makefile.in =================================================================== ---- httpd-2.0.59.orig/srclib/pcre/internal.h -+++ httpd-2.0.59/srclib/pcre/internal.h -@@ -5,30 +5,38 @@ +--- httpd-2.0.59.orig/srclib/Makefile.in ++++ httpd-2.0.59/srclib/Makefile.in +@@ -1,5 +1,4 @@ - /* This is a library of functions to support regular expressions whose syntax - and semantics are as close as possible to those of the Perl 5 language. See --the file Tech.Notes for some information on the internals. -+the file doc/Tech.Notes for some information on the internals. +-SUBDIRS = pcre + BUILD_SUBDIRS = $(AP_BUILD_SRCLIB_DIRS) + CLEAN_SUBDIRS = $(AP_CLEAN_SRCLIB_DIRS) - Written by: Philip Hazel <ph10@cam.ac.uk> - -- Copyright (c) 1997-2001 University of Cambridge -+ Copyright (c) 1997-2004 University of Cambridge - - ----------------------------------------------------------------------------- --Permission is granted to anyone to use this software for any purpose on any --computer system, and to redistribute it freely, subject to the following --restrictions: -- --1. This software is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -- --2. The origin of this software must not be misrepresented, either by -- explicit claim or by omission. -- --3. Altered versions must be plainly marked as such, and must not be -- misrepresented as being the original software. -- --4. If PCRE is embedded in any software that is released under the GNU -- General Purpose Licence (GPL), then the terms of that licence shall -- supersede any condition above with which it is incompatible. -+Redistribution and use in source and binary forms, with or without -+modification, are permitted provided that the following conditions are met: -+ -+ * Redistributions of source code must retain the above copyright notice, -+ this list of conditions and the following disclaimer. -+ -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ -+ * Neither the name of the University of Cambridge nor the names of its -+ contributors may be used to endorse or promote products derived from -+ this software without specific prior written permission. -+ -+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+POSSIBILITY OF SUCH DAMAGE. - ----------------------------------------------------------------------------- - */ - -@@ -39,6 +47,68 @@ modules, but which are not relevant to t - - #include "config.h" - -+/* Standard C headers plus the external interface definition. The only time -+setjmp and stdarg are used is when NO_RECURSE is set. */ -+ -+#include <ctype.h> -+#include <limits.h> -+#include <setjmp.h> -+#include <stdarg.h> -+#include <stddef.h> -+#include <stdio.h> -+#include <stdlib.h> -+#include <string.h> -+ -+#ifndef PCRE_SPY -+#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */ -+#endif -+ -+/* We need to have types that specify unsigned 16-bit and 32-bit integers. We -+cannot determine these outside the compilation (e.g. by running a program as -+part of "configure") because PCRE is often cross-compiled for use on other -+systems. Instead we make use of the maximum sizes that are available at -+preprocessor time in standard C environments. */ -+ -+#if USHRT_MAX == 65535 -+ typedef unsigned short pcre_uint16; -+#elif UINT_MAX == 65535 -+ typedef unsigned int pcre_uint16; -+#else -+ #error Cannot determine a type for 16-bit unsigned integers -+#endif -+ -+#if UINT_MAX == 4294967295 -+ typedef unsigned int pcre_uint32; -+#elif ULONG_MAX == 4294967295 -+ typedef unsigned long int pcre_uint32; -+#else -+ #error Cannot determine a type for 32-bit unsigned integers -+#endif -+ -+/* All character handling must be done as unsigned characters. Otherwise there -+are problems with top-bit-set characters and functions such as isspace(). -+However, we leave the interface to the outside world as char *, because that -+should make things easier for callers. We define a short type for unsigned char -+to save lots of typing. I tried "uchar", but it causes problems on Digital -+Unix, where it is defined in sys/types, so use "uschar" instead. */ -+ -+typedef unsigned char uschar; -+ -+/* Include the public PCRE header */ -+ -+#include "pcre.h" -+ -+/* When compiling for use with the Virtual Pascal compiler, these functions -+need to have their names changed. PCRE must be compiled with the -DVPCOMPAT -+option on the command line. */ -+ -+#ifdef VPCOMPAT -+#define strncmp(s1,s2,m) _strncmp(s1,s2,m) -+#define memcpy(d,s,n) _memcpy(d,s,n) -+#define memmove(d,s,n) _memmove(d,s,n) -+#define memset(s,c,n) _memset(s,c,n) -+#else /* VPCOMPAT */ -+ - /* To cope with SunOS4 and other systems that lack memmove() but have bcopy(), - define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY - is set. Otherwise, include an emulating function for those systems that have -@@ -50,7 +120,7 @@ case in PCRE. */ - #undef memmove /* some systems may have a macro */ - #if HAVE_BCOPY - #define memmove(a, b, c) bcopy(b, a, c) --#else -+#else /* HAVE_BCOPY */ - void * - pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n) - { -@@ -60,18 +130,86 @@ src += n; - for (i = 0; i < n; ++i) *(--dest) = *(--src); - } - #define memmove(a, b, c) pcre_memmove(a, b, c) --#endif -+#endif /* not HAVE_BCOPY */ -+#endif /* not HAVE_MEMMOVE */ -+#endif /* not VPCOMPAT */ -+ -+ -+/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored -+in big-endian order) by default. These are used, for example, to link from the -+start of a subpattern to its alternatives and its end. The use of 2 bytes per -+offset limits the size of the compiled regex to around 64K, which is big enough -+for almost everybody. However, I received a request for an even bigger limit. -+For this reason, and also to make the code easier to maintain, the storing and -+loading of offsets from the byte string is now handled by the macros that are -+defined here. -+ -+The macros are controlled by the value of LINK_SIZE. This defaults to 2 in -+the config.h file, but can be overridden by using -D on the command line. This -+is automated on Unix systems via the "configure" command. */ -+ -+#if LINK_SIZE == 2 -+ -+#define PUT(a,n,d) \ -+ (a[n] = (d) >> 8), \ -+ (a[(n)+1] = (d) & 255) -+ -+#define GET(a,n) \ -+ (((a)[n] << 8) | (a)[(n)+1]) -+ -+#define MAX_PATTERN_SIZE (1 << 16) -+ -+ -+#elif LINK_SIZE == 3 -+ -+#define PUT(a,n,d) \ -+ (a[n] = (d) >> 16), \ -+ (a[(n)+1] = (d) >> 8), \ -+ (a[(n)+2] = (d) & 255) -+ -+#define GET(a,n) \ -+ (((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2]) -+ -+#define MAX_PATTERN_SIZE (1 << 24) -+ -+ -+#elif LINK_SIZE == 4 -+ -+#define PUT(a,n,d) \ -+ (a[n] = (d) >> 24), \ -+ (a[(n)+1] = (d) >> 16), \ -+ (a[(n)+2] = (d) >> 8), \ -+ (a[(n)+3] = (d) & 255) -+ -+#define GET(a,n) \ -+ (((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3]) -+ -+#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */ -+ -+ -+#else -+#error LINK_SIZE must be either 2, 3, or 4 - #endif - --/* Standard C headers plus the external interface definition */ - --#include <ctype.h> --#include <limits.h> --#include <stddef.h> --#include <stdio.h> --#include <stdlib.h> --#include <string.h> --#include "pcre.h" -+/* Convenience macro defined in terms of the others */ -+ -+#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE -+ -+ -+/* PCRE uses some other 2-byte quantities that do not change when the size of -+offsets changes. There are used for repeat counts and for other things such as -+capturing parenthesis numbers in back references. */ -+ -+#define PUT2(a,n,d) \ -+ a[n] = (d) >> 8; \ -+ a[(n)+1] = (d) & 255 -+ -+#define GET2(a,n) \ -+ (((a)[n] << 8) | (a)[(n)+1]) -+ -+#define PUT2INC(a,n,d) PUT2(a,n,d), a += 2 -+ - - /* In case there is no definition of offsetof() provided - though any proper - Standard C system should have one. */ -@@ -80,6 +218,7 @@ Standard C system should have one. */ - #define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field)) - #endif - -+ - /* These are the public options that can change during matching. */ - - #define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL) -@@ -88,13 +227,13 @@ Standard C system should have one. */ - but skip the top bit so we can use ints for convenience without getting tangled - with negative values. The public options defined in pcre.h start at the least - significant end. Make sure they don't overlap, though now that we have expanded --to four bytes there is plenty of space. */ -+to four bytes, there is plenty of space. */ - --#define PCRE_FIRSTSET 0x40000000 /* first_char is set */ --#define PCRE_REQCHSET 0x20000000 /* req_char is set */ -+#define PCRE_FIRSTSET 0x40000000 /* first_byte is set */ -+#define PCRE_REQCHSET 0x20000000 /* req_byte is set */ - #define PCRE_STARTLINE 0x10000000 /* start after \n for multiline */ --#define PCRE_INGROUP 0x08000000 /* compiling inside a group */ --#define PCRE_ICHANGED 0x04000000 /* i option changes within regex */ -+#define PCRE_ICHANGED 0x08000000 /* i option changes within regex */ -+#define PCRE_NOPARTIAL 0x04000000 /* can't use partial with this regex */ - - /* Options for the "extra" block produced by pcre_study(). */ - -@@ -105,10 +244,12 @@ time, run time or study time, respective - - #define PUBLIC_OPTIONS \ - (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ -- PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8) -+ PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ -+ PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT) - - #define PUBLIC_EXEC_OPTIONS \ -- (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY) -+ (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \ -+ PCRE_PARTIAL) - - #define PUBLIC_STUDY_OPTIONS 0 /* None defined */ - -@@ -116,6 +257,17 @@ time, run time or study time, respective - - #define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */ - -+/* Negative values for the firstchar and reqchar variables */ -+ -+#define REQ_UNSET (-2) -+#define REQ_NONE (-1) -+ -+/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a -+variable-length repeat, or a anything other than literal characters. */ -+ -+#define REQ_CASELESS 0x0100 /* indicates caselessness */ -+#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */ -+ - /* Miscellaneous definitions */ - - typedef int BOOL; -@@ -124,150 +276,267 @@ typedef int BOOL; - #define TRUE 1 - - /* Escape items that are just an encoding of a particular data value. Note that --ESC_N is defined as yet another macro, which is set in config.h to either \n -+ESC_n is defined as yet another macro, which is set in config.h to either \n - (the default) or \r (which some people want). */ - --#ifndef ESC_E --#define ESC_E 27 -+#ifndef ESC_e -+#define ESC_e 27 - #endif - --#ifndef ESC_F --#define ESC_F '\f' -+#ifndef ESC_f -+#define ESC_f '\f' - #endif - --#ifndef ESC_N --#define ESC_N NEWLINE -+#ifndef ESC_n -+#define ESC_n NEWLINE - #endif - --#ifndef ESC_R --#define ESC_R '\r' -+#ifndef ESC_r -+#define ESC_r '\r' - #endif - --#ifndef ESC_T --#define ESC_T '\t' -+/* We can't officially use ESC_t because it is a POSIX reserved identifier -+(presumably because of all the others like size_t). */ -+ -+#ifndef ESC_tee -+#define ESC_tee '\t' - #endif - - /* These are escaped items that aren't just an encoding of a particular data - value such as \n. They must have non-zero values, as check_escape() returns - their negation. Also, they must appear in the same order as in the opcode --definitions below, up to ESC_z. The final one must be ESC_REF as subsequent --values are used for \1, \2, \3, etc. There is a test in the code for an escape --greater than ESC_b and less than ESC_Z to detect the types that may be --repeated. If any new escapes are put in-between that don't consume a character, --that code will have to change. */ -+definitions below, up to ESC_z. There's a dummy for OP_ANY because it -+corresponds to "." rather than an escape sequence. The final one must be -+ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two -+tests in the code for an escape greater than ESC_b and less than ESC_Z to -+detect the types that may be repeated. These are the types that consume -+characters. If any new escapes are put in between that don't consume a -+character, that code will have to change. */ -+ -+enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, -+ ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_X, ESC_Z, ESC_z, ESC_E, -+ ESC_Q, ESC_REF }; -+ -+/* Flag bits and data types for the extended class (OP_XCLASS) for classes that -+contain UTF-8 characters with values greater than 255. */ -+ -+#define XCL_NOT 0x01 /* Flag: this is a negative class */ -+#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ -+ -+#define XCL_END 0 /* Marks end of individual items */ -+#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ -+#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ -+#define XCL_PROP 3 /* Unicode property (one property code) follows */ -+#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ - --enum { ESC_A = 1, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, -- ESC_Z, ESC_z, ESC_REF }; - - /* Opcode table: OP_BRA must be last, as all values >= it are used for brackets - that extract substrings. Starting from 1 (i.e. after OP_END), the values up to --OP_EOD must correspond in order to the list of escapes immediately above. */ -+OP_EOD must correspond in order to the list of escapes immediately above. -+Note that whenever this list is updated, the two macro definitions that follow -+must also be updated to match. */ - - enum { -- OP_END, /* End of pattern */ -+ OP_END, /* 0 End of pattern */ - - /* Values corresponding to backslashed metacharacters */ - -- OP_SOD, /* Start of data: \A */ -- OP_NOT_WORD_BOUNDARY, /* \B */ -- OP_WORD_BOUNDARY, /* \b */ -- OP_NOT_DIGIT, /* \D */ -- OP_DIGIT, /* \d */ -- OP_NOT_WHITESPACE, /* \S */ -- OP_WHITESPACE, /* \s */ -- OP_NOT_WORDCHAR, /* \W */ -- OP_WORDCHAR, /* \w */ -- OP_EODN, /* End of data or \n at end of data: \Z. */ -- OP_EOD, /* End of data: \z */ -- -- OP_OPT, /* Set runtime options */ -- OP_CIRC, /* Start of line - varies with multiline switch */ -- OP_DOLL, /* End of line - varies with multiline switch */ -- OP_ANY, /* Match any character */ -- OP_CHARS, /* Match string of characters */ -- OP_NOT, /* Match anything but the following char */ -- -- OP_STAR, /* The maximizing and minimizing versions of */ -- OP_MINSTAR, /* all these opcodes must come in pairs, with */ -- OP_PLUS, /* the minimizing one second. */ -- OP_MINPLUS, /* This first set applies to single characters */ -- OP_QUERY, -- OP_MINQUERY, -- OP_UPTO, /* From 0 to n matches */ -- OP_MINUPTO, -- OP_EXACT, /* Exactly n matches */ -- -- OP_NOTSTAR, /* The maximizing and minimizing versions of */ -- OP_NOTMINSTAR, /* all these opcodes must come in pairs, with */ -- OP_NOTPLUS, /* the minimizing one second. */ -- OP_NOTMINPLUS, /* This first set applies to "not" single characters */ -- OP_NOTQUERY, -- OP_NOTMINQUERY, -- OP_NOTUPTO, /* From 0 to n matches */ -- OP_NOTMINUPTO, -- OP_NOTEXACT, /* Exactly n matches */ -- -- OP_TYPESTAR, /* The maximizing and minimizing versions of */ -- OP_TYPEMINSTAR, /* all these opcodes must come in pairs, with */ -- OP_TYPEPLUS, /* the minimizing one second. These codes must */ -- OP_TYPEMINPLUS, /* be in exactly the same order as those above. */ -- OP_TYPEQUERY, /* This set applies to character types such as \d */ -- OP_TYPEMINQUERY, -- OP_TYPEUPTO, /* From 0 to n matches */ -- OP_TYPEMINUPTO, -- OP_TYPEEXACT, /* Exactly n matches */ -- -- OP_CRSTAR, /* The maximizing and minimizing versions of */ -- OP_CRMINSTAR, /* all these opcodes must come in pairs, with */ -- OP_CRPLUS, /* the minimizing one second. These codes must */ -- OP_CRMINPLUS, /* be in exactly the same order as those above. */ -- OP_CRQUERY, /* These are for character classes and back refs */ -- OP_CRMINQUERY, -- OP_CRRANGE, /* These are different to the three seta above. */ -- OP_CRMINRANGE, -- -- OP_CLASS, /* Match a character class */ -- OP_REF, /* Match a back reference */ -- OP_RECURSE, /* Match this pattern recursively */ -- -- OP_ALT, /* Start of alternation */ -- OP_KET, /* End of group that doesn't have an unbounded repeat */ -- OP_KETRMAX, /* These two must remain together and in this */ -- OP_KETRMIN, /* order. They are for groups the repeat for ever. */ -+ OP_SOD, /* 1 Start of data: \A */ -+ OP_SOM, /* 2 Start of match (subject + offset): \G */ -+ OP_NOT_WORD_BOUNDARY, /* 3 \B */ -+ OP_WORD_BOUNDARY, /* 4 \b */ -+ OP_NOT_DIGIT, /* 5 \D */ -+ OP_DIGIT, /* 6 \d */ -+ OP_NOT_WHITESPACE, /* 7 \S */ -+ OP_WHITESPACE, /* 8 \s */ -+ OP_NOT_WORDCHAR, /* 9 \W */ -+ OP_WORDCHAR, /* 10 \w */ -+ OP_ANY, /* 11 Match any character */ -+ OP_ANYBYTE, /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */ -+ OP_NOTPROP, /* 13 \P (not Unicode property) */ -+ OP_PROP, /* 14 \p (Unicode property) */ -+ OP_EXTUNI, /* 15 \X (extended Unicode sequence */ -+ OP_EODN, /* 16 End of data or \n at end of data: \Z. */ -+ OP_EOD, /* 17 End of data: \z */ -+ -+ OP_OPT, /* 18 Set runtime options */ -+ OP_CIRC, /* 19 Start of line - varies with multiline switch */ -+ OP_DOLL, /* 20 End of line - varies with multiline switch */ -+ OP_CHAR, /* 21 Match one character, casefully */ -+ OP_CHARNC, /* 22 Match one character, caselessly */ -+ OP_NOT, /* 23 Match anything but the following char */ -+ -+ OP_STAR, /* 24 The maximizing and minimizing versions of */ -+ OP_MINSTAR, /* 25 all these opcodes must come in pairs, with */ -+ OP_PLUS, /* 26 the minimizing one second. */ -+ OP_MINPLUS, /* 27 This first set applies to single characters */ -+ OP_QUERY, /* 28 */ -+ OP_MINQUERY, /* 29 */ -+ OP_UPTO, /* 30 From 0 to n matches */ -+ OP_MINUPTO, /* 31 */ -+ OP_EXACT, /* 32 Exactly n matches */ -+ -+ OP_NOTSTAR, /* 33 The maximizing and minimizing versions of */ -+ OP_NOTMINSTAR, /* 34 all these opcodes must come in pairs, with */ -+ OP_NOTPLUS, /* 35 the minimizing one second. */ -+ OP_NOTMINPLUS, /* 36 This set applies to "not" single characters */ -+ OP_NOTQUERY, /* 37 */ -+ OP_NOTMINQUERY, /* 38 */ -+ OP_NOTUPTO, /* 39 From 0 to n matches */ -+ OP_NOTMINUPTO, /* 40 */ -+ OP_NOTEXACT, /* 41 Exactly n matches */ -+ -+ OP_TYPESTAR, /* 42 The maximizing and minimizing versions of */ -+ OP_TYPEMINSTAR, /* 43 all these opcodes must come in pairs, with */ -+ OP_TYPEPLUS, /* 44 the minimizing one second. These codes must */ -+ OP_TYPEMINPLUS, /* 45 be in exactly the same order as those above. */ -+ OP_TYPEQUERY, /* 46 This set applies to character types such as \d */ -+ OP_TYPEMINQUERY, /* 47 */ -+ OP_TYPEUPTO, /* 48 From 0 to n matches */ -+ OP_TYPEMINUPTO, /* 49 */ -+ OP_TYPEEXACT, /* 50 Exactly n matches */ -+ -+ OP_CRSTAR, /* 51 The maximizing and minimizing versions of */ -+ OP_CRMINSTAR, /* 52 all these opcodes must come in pairs, with */ -+ OP_CRPLUS, /* 53 the minimizing one second. These codes must */ -+ OP_CRMINPLUS, /* 54 be in exactly the same order as those above. */ -+ OP_CRQUERY, /* 55 These are for character classes and back refs */ -+ OP_CRMINQUERY, /* 56 */ -+ OP_CRRANGE, /* 57 These are different to the three sets above. */ -+ OP_CRMINRANGE, /* 58 */ -+ -+ OP_CLASS, /* 59 Match a character class, chars < 256 only */ -+ OP_NCLASS, /* 60 Same, but the bitmap was created from a negative -+ class - the difference is relevant only when a UTF-8 -+ character > 255 is encountered. */ -+ -+ OP_XCLASS, /* 61 Extended class for handling UTF-8 chars within the -+ class. This does both positive and negative. */ -+ -+ OP_REF, /* 62 Match a back reference */ -+ OP_RECURSE, /* 63 Match a numbered subpattern (possibly recursive) */ -+ OP_CALLOUT, /* 64 Call out to external function if provided */ -+ -+ OP_ALT, /* 65 Start of alternation */ -+ OP_KET, /* 66 End of group that doesn't have an unbounded repeat */ -+ OP_KETRMAX, /* 67 These two must remain together and in this */ -+ OP_KETRMIN, /* 68 order. They are for groups the repeat for ever. */ - - /* The assertions must come before ONCE and COND */ - -- OP_ASSERT, /* Positive lookahead */ -- OP_ASSERT_NOT, /* Negative lookahead */ -- OP_ASSERTBACK, /* Positive lookbehind */ -- OP_ASSERTBACK_NOT, /* Negative lookbehind */ -- OP_REVERSE, /* Move pointer back - used in lookbehind assertions */ -+ OP_ASSERT, /* 69 Positive lookahead */ -+ OP_ASSERT_NOT, /* 70 Negative lookahead */ -+ OP_ASSERTBACK, /* 71 Positive lookbehind */ -+ OP_ASSERTBACK_NOT, /* 72 Negative lookbehind */ -+ OP_REVERSE, /* 73 Move pointer back - used in lookbehind assertions */ - - /* ONCE and COND must come after the assertions, with ONCE first, as there's - a test for >= ONCE for a subpattern that isn't an assertion. */ - -- OP_ONCE, /* Once matched, don't back up into the subpattern */ -- OP_COND, /* Conditional group */ -- OP_CREF, /* Used to hold an extraction string number (cond ref) */ -- -- OP_BRAZERO, /* These two must remain together and in this */ -- OP_BRAMINZERO, /* order. */ -- -- OP_BRANUMBER, /* Used for extracting brackets whose number is greater -- than can fit into an opcode. */ -- -- OP_BRA /* This and greater values are used for brackets that -- extract substrings up to a basic limit. After that, -- use is made of OP_BRANUMBER. */ -+ OP_ONCE, /* 74 Once matched, don't back up into the subpattern */ -+ OP_COND, /* 75 Conditional group */ -+ OP_CREF, /* 76 Used to hold an extraction string number (cond ref) */ -+ -+ OP_BRAZERO, /* 77 These two must remain together and in this */ -+ OP_BRAMINZERO, /* 78 order. */ -+ -+ OP_BRANUMBER, /* 79 Used for extracting brackets whose number is greater -+ than can fit into an opcode. */ -+ -+ OP_BRA /* 80 This and greater values are used for brackets that -+ extract substrings up to EXTRACT_BASIC_MAX. After -+ that, use is made of OP_BRANUMBER. */ - }; - -+/* WARNING WARNING WARNING: There is an implicit assumption in pcre.c and -+study.c that all opcodes are less than 128 in value. This makes handling UTF-8 -+character sequences easier. */ -+ - /* The highest extraction number before we have to start using additional - bytes. (Originally PCRE didn't have support for extraction counts highter than - this number.) The value is limited by the number of opcodes left after OP_BRA, - i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional - opcodes. */ - --#define EXTRACT_BASIC_MAX 150 -+#define EXTRACT_BASIC_MAX 100 -+ -+ -+/* This macro defines textual names for all the opcodes. There are used only -+for debugging, in pcre.c when DEBUG is defined, and also in pcretest.c. The -+macro is referenced only in printint.c. */ -+ -+#define OP_NAME_LIST \ -+ "End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d", \ -+ "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", \ -+ "notprop", "prop", "extuni", \ -+ "\\Z", "\\z", \ -+ "Opt", "^", "$", "char", "charnc", "not", \ -+ "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ -+ "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ -+ "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ -+ "*", "*?", "+", "+?", "?", "??", "{", "{", \ -+ "class", "nclass", "xclass", "Ref", "Recurse", "Callout", \ -+ "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", \ -+ "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cond ref",\ -+ "Brazero", "Braminzero", "Branumber", "Bra" -+ -+ -+/* This macro defines the length of fixed length operations in the compiled -+regex. The lengths are used when searching for specific things, and also in the -+debugging printing of a compiled regex. We use a macro so that it can be -+incorporated both into pcre.c and pcretest.c without being publicly exposed. -+ -+As things have been extended, some of these are no longer fixed lenths, but are -+minima instead. For example, the length of a single-character repeat may vary -+in UTF-8 mode. The code that uses this table must know about such things. */ -+ -+#define OP_LENGTHS \ -+ 1, /* End */ \ -+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \ -+ 1, 1, /* Any, Anybyte */ \ -+ 2, 2, 1, /* NOTPROP, PROP, EXTUNI */ \ -+ 1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \ -+ 2, /* Char - the minimum length */ \ -+ 2, /* Charnc - the minimum length */ \ -+ 2, /* not */ \ -+ /* Positive single-char repeats ** These are */ \ -+ 2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \ -+ 4, 4, 4, /* upto, minupto, exact ** UTF-8 mode */ \ -+ /* Negative single-char repeats - only for chars < 256 */ \ -+ 2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ -+ 4, 4, 4, /* NOT upto, minupto, exact */ \ -+ /* Positive type repeats */ \ -+ 2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \ -+ 4, 4, 4, /* Type upto, minupto, exact */ \ -+ /* Character class & ref repeats */ \ -+ 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \ -+ 5, 5, /* CRRANGE, CRMINRANGE */ \ -+ 33, /* CLASS */ \ -+ 33, /* NCLASS */ \ -+ 0, /* XCLASS - variable length */ \ -+ 3, /* REF */ \ -+ 1+LINK_SIZE, /* RECURSE */ \ -+ 2+2*LINK_SIZE, /* CALLOUT */ \ -+ 1+LINK_SIZE, /* Alt */ \ -+ 1+LINK_SIZE, /* Ket */ \ -+ 1+LINK_SIZE, /* KetRmax */ \ -+ 1+LINK_SIZE, /* KetRmin */ \ -+ 1+LINK_SIZE, /* Assert */ \ -+ 1+LINK_SIZE, /* Assert not */ \ -+ 1+LINK_SIZE, /* Assert behind */ \ -+ 1+LINK_SIZE, /* Assert behind not */ \ -+ 1+LINK_SIZE, /* Reverse */ \ -+ 1+LINK_SIZE, /* Once */ \ -+ 1+LINK_SIZE, /* COND */ \ -+ 3, /* CREF */ \ -+ 1, 1, /* BRAZERO, BRAMINZERO */ \ -+ 3, /* BRANUMBER */ \ -+ 1+LINK_SIZE /* BRA */ \ -+ -+ -+/* A magic value for OP_CREF to indicate the "in recursion" condition. */ -+ -+#define CREF_RECURSE 0xffff - - /* The texts of compile-time error messages are defined as macros here so that - they can be accessed by the POSIX wrapper and converted into error codes. Yes, -@@ -286,9 +555,9 @@ just to accommodate the POSIX wrapper. * - #define ERR10 "operand of unlimited repeat could match the empty string" - #define ERR11 "internal error: unexpected repeat" - #define ERR12 "unrecognized character after (?" --#define ERR13 "unused error" -+#define ERR13 "POSIX named classes are supported only within a class" - #define ERR14 "missing )" --#define ERR15 "back reference to non-existent subpattern" -+#define ERR15 "reference to non-existent subpattern" - #define ERR16 "erroffset passed as NULL" - #define ERR17 "unknown option bit(s) set" - #define ERR18 "missing ) after comment" -@@ -302,45 +571,69 @@ just to accommodate the POSIX wrapper. * - #define ERR26 "malformed number after (?(" - #define ERR27 "conditional group contains more than two branches" - #define ERR28 "assertion expected after (?(" --#define ERR29 "(?p must be followed by )" -+#define ERR29 "(?R or (?digits must be followed by )" - #define ERR30 "unknown POSIX class name" - #define ERR31 "POSIX collating elements are not supported" - #define ERR32 "this version of PCRE is not compiled with PCRE_UTF8 support" --#define ERR33 "characters with values > 255 are not yet supported in classes" -+#define ERR33 "spare error" - #define ERR34 "character value in \\x{...} sequence is too large" - #define ERR35 "invalid condition (?(0)" -- --/* All character handling must be done as unsigned characters. Otherwise there --are problems with top-bit-set characters and functions such as isspace(). --However, we leave the interface to the outside world as char *, because that --should make things easier for callers. We define a short type for unsigned char --to save lots of typing. I tried "uchar", but it causes problems on Digital --Unix, where it is defined in sys/types, so use "uschar" instead. */ -- --typedef unsigned char uschar; -- --/* The real format of the start of the pcre block; the actual code vector --runs on as long as necessary after the end. */ -+#define ERR36 "\\C not allowed in lookbehind assertion" -+#define ERR37 "PCRE does not support \\L, \\l, \\N, \\U, or \\u" -+#define ERR38 "number after (?C is > 255" -+#define ERR39 "closing ) for (?C expected" -+#define ERR40 "recursive call could loop indefinitely" -+#define ERR41 "unrecognized character after (?P" -+#define ERR42 "syntax error after (?P" -+#define ERR43 "two named groups have the same name" -+#define ERR44 "invalid UTF-8 string" -+#define ERR45 "support for \\P, \\p, and \\X has not been compiled" -+#define ERR46 "malformed \\P or \\p sequence" -+#define ERR47 "unknown property name after \\P or \\p" -+ -+/* The real format of the start of the pcre block; the index of names and the -+code vector run on as long as necessary after the end. We store an explicit -+offset to the name table so that if a regex is compiled on one host, saved, and -+then run on another where the size of pointers is different, all might still -+be well. For the case of compiled-on-4 and run-on-8, we include an extra -+pointer that is always NULL. For future-proofing, we also include a few dummy -+fields - even though you can never get this planning right! -+ -+NOTE NOTE NOTE: -+Because people can now save and re-use compiled patterns, any additions to this -+structure should be made at the end, and something earlier (e.g. a new -+flag in the options or one of the dummy fields) should indicate that the new -+fields are present. Currently PCRE always sets the dummy fields to zero. -+NOTE NOTE NOTE: -+*/ - - typedef struct real_pcre { -- unsigned long int magic_number; -- size_t size; -- const unsigned char *tables; -- unsigned long int options; -- unsigned short int top_bracket; -- unsigned short int top_backref; -- uschar first_char; -- uschar req_char; -- uschar code[1]; -+ pcre_uint32 magic_number; -+ pcre_uint32 size; /* Total that was malloced */ -+ pcre_uint32 options; -+ pcre_uint32 dummy1; /* For future use, maybe */ -+ -+ pcre_uint16 top_bracket; -+ pcre_uint16 top_backref; -+ pcre_uint16 first_byte; -+ pcre_uint16 req_byte; -+ pcre_uint16 name_table_offset; /* Offset to name table that follows */ -+ pcre_uint16 name_entry_size; /* Size of any name items */ -+ pcre_uint16 name_count; /* Number of name items */ -+ pcre_uint16 dummy2; /* For future use, maybe */ -+ -+ const unsigned char *tables; /* Pointer to tables or NULL for std */ -+ const unsigned char *nullpad; /* NULL padding */ - } real_pcre; - --/* The real format of the extra block returned by pcre_study(). */ -+/* The format of the block used to store data from pcre_study(). The same -+remark (see NOTE above) about extending this structure applies. */ - --typedef struct real_pcre_extra { -- uschar options; -+typedef struct pcre_study_data { -+ pcre_uint32 size; /* Total that was malloced */ -+ pcre_uint32 options; - uschar start_bits[32]; --} real_pcre_extra; -- -+} pcre_study_data; - - /* Structure for passing "static" information around between the functions - doing the compiling, so that they are thread-safe. */ -@@ -350,13 +643,53 @@ typedef struct compile_data { - const uschar *fcc; /* Points to case-flipping table */ - const uschar *cbits; /* Points to character type table */ - const uschar *ctypes; /* Points to table of type maps */ -+ const uschar *start_code; /* The start of the compiled code */ -+ const uschar *start_pattern; /* The start of the pattern */ -+ uschar *name_table; /* The name/number table */ -+ int names_found; /* Number of entries so far */ -+ int name_entry_size; /* Size of each entry */ -+ int top_backref; /* Maximum back reference */ -+ unsigned int backref_map; /* Bitmap of low back refs */ -+ int req_varyopt; /* "After variable item" flag for reqbyte */ -+ BOOL nopartial; /* Set TRUE if partial won't work */ - } compile_data; - -+/* Structure for maintaining a chain of pointers to the currently incomplete -+branches, for testing for left recursion. */ -+ -+typedef struct branch_chain { -+ struct branch_chain *outer; -+ uschar *current; -+} branch_chain; -+ -+/* Structure for items in a linked list that represents an explicit recursive -+call within the pattern. */ -+ -+typedef struct recursion_info { -+ struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ -+ int group_num; /* Number of group that was called */ -+ const uschar *after_call; /* "Return value": points after the call in the expr */ -+ const uschar *save_start; /* Old value of md->start_match */ -+ int *offset_save; /* Pointer to start of saved offsets */ -+ int saved_max; /* Number of saved offsets */ -+} recursion_info; -+ -+/* When compiling in a mode that doesn't use recursive calls to match(), -+a structure is used to remember local variables on the heap. It is defined in -+pcre.c, close to the match() function, so that it is easy to keep it in step -+with any changes of local variable. However, the pointer to the current frame -+must be saved in some "static" place over a longjmp(). We declare the -+structure here so that we can put a pointer in the match_data structure. -+NOTE: This isn't used for a "normal" compilation of pcre. */ -+ -+struct heapframe; -+ - /* Structure for passing "static" information around between the functions - doing the matching, so that they are thread-safe. */ - - typedef struct match_data { -- int errorcode; /* As it says */ -+ unsigned long int match_call_count; /* As it says */ -+ unsigned long int match_limit;/* As it says */ - int *offset_vector; /* Offset vector */ - int offset_end; /* One past the end */ - int offset_max; /* The maximum usable for return data */ -@@ -368,12 +701,19 @@ typedef struct match_data { - BOOL utf8; /* UTF8 flag */ - BOOL endonly; /* Dollar not before final \n */ - BOOL notempty; /* Empty string match not wanted */ -- const uschar *start_pattern; /* For use when recursing */ -+ BOOL partial; /* PARTIAL flag */ -+ BOOL hitend; /* Hit the end of the subject at some point */ -+ const uschar *start_code; /* For use when recursing */ - const uschar *start_subject; /* Start of the subject string */ - const uschar *end_subject; /* End of the subject string */ - const uschar *start_match; /* Start of this match attempt */ - const uschar *end_match_ptr; /* Subject position at end match */ - int end_offset_top; /* Highwater mark at end of match */ -+ int capture_last; /* Most recent capture number */ -+ int start_offset; /* The start offset value */ -+ recursion_info *recursive; /* Linked list of recursion data */ -+ void *callout_data; /* To pass back to callouts */ -+ struct heapframe *thisframe; /* Used only when compiling for no recursion */ - } match_data; - - /* Bit definitions for entries in the pcre_ctypes table. */ |