Index: /icu/branches/maint/maint-3-8/source/i18n/regexcmp.cpp =================================================================== --- i18n/regexcmp.cpp (revision 21805) +++ i18n/regexcmp.cpp (revision 23292) @@ -3,5 +3,5 @@ // file: regexcmp.cpp // -// Copyright (C) 2002-2007 International Business Machines Corporation and others. +// Copyright (C) 2002-2008 International Business Machines Corporation and others. // All Rights Reserved. // @@ -1187,12 +1187,15 @@ // we fill the operand with the capture group number. At the end // of compilation, it will be changed to the variable's location. - U_ASSERT(groupNum > 0); - int32_t op; - if (fModeFlags & UREGEX_CASE_INSENSITIVE) { - op = URX_BUILD(URX_BACKREF_I, groupNum); + if (groupNum < 1) { + error(U_REGEX_INVALID_BACK_REF); } else { - op = URX_BUILD(URX_BACKREF, groupNum); - } - fRXPat->fCompiledPat->addElement(op, *fStatus); + int32_t op; + if (fModeFlags & UREGEX_CASE_INSENSITIVE) { + op = URX_BUILD(URX_BACKREF_I, groupNum); + } else { + op = URX_BUILD(URX_BACKREF, groupNum); + } + fRXPat->fCompiledPat->addElement(op, *fStatus); + } } break; Index: /icu/branches/maint/maint-3-8/source/i18n/rematch.cpp =================================================================== --- i18n/rematch.cpp (revision 21973) +++ i18n/rematch.cpp (revision 23292) @@ -1,5 +1,5 @@ /* ************************************************************************** -* Copyright (C) 2002-2007 International Business Machines Corporation * +* Copyright (C) 2002-2008 International Business Machines Corporation * * and others. All rights reserved. * ************************************************************************** @@ -30,4 +30,13 @@ U_NAMESPACE_BEGIN + +// Limit the size of the back track stack, to avoid system failures caused +// by heap exhaustion. Units are in 32 bit words, not bytes. +// This value puts ICU's limits higher than most other regexp implementations, +// which use recursion rather than the heap, and take more storage per +// backtrack point. +// This constant is _temporary_. Proper API to control the value will added. +// +static const int32_t BACKTRACK_STACK_CAPACITY = 8000000; //----------------------------------------------------------------------------- @@ -54,6 +63,7 @@ if (fStack == NULL || fData == NULL) { fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; - } - + } else { + fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY); + } reset(RegexStaticSets::gStaticSets->fEmptyString); } @@ -79,4 +89,6 @@ if (fStack == NULL || fData == NULL) { status = U_MEMORY_ALLOCATION_ERROR; + } else { + fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY); } reset(input); @@ -103,4 +115,6 @@ if (fStack == NULL || fData == NULL) { status = U_MEMORY_ALLOCATION_ERROR; + } else { + fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY); } reset(RegexStaticSets::gStaticSets->fEmptyString); @@ -1015,4 +1029,12 @@ // push storage for a new frame. int32_t *newFP = fStack->reserveBlock(frameSize, status); + if (newFP == NULL) { + // Heap allocation error on attempted stack expansion. + // We need to return a writable stack frame, so just return the + // previous frame. The match operation will stop quickly + // becuase of the error status, after which the frame will never + // be looked at again. + return fp; + } fp = (REStackFrame *)(newFP - frameSize); // in case of realloc of stack. @@ -1030,6 +1052,6 @@ return (REStackFrame *)newFP; } - - + + //-------------------------------------------------------------------------------- // @@ -2262,4 +2284,5 @@ if (U_FAILURE(status)) { + isMatch = FALSE; break; } Index: /icu/branches/maint/maint-3-8/source/test/intltest/regextst.h =================================================================== --- test/intltest/regextst.h (revision 22001) +++ test/intltest/regextst.h (revision 23292) @@ -1,5 +1,5 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 2002-2007, International Business Machines Corporation and + * Copyright (c) 2002-2008, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ @@ -31,4 +31,5 @@ virtual void Errors(); virtual void PerlTests(); + virtual void Bug6149(); // The following functions are internal to the regexp tests. Index: /icu/branches/maint/maint-3-8/source/test/intltest/regextst.cpp =================================================================== --- test/intltest/regextst.cpp (revision 22057) +++ test/intltest/regextst.cpp (revision 23292) @@ -1,5 +1,5 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 2002-2007, International Business Machines Corporation and + * Copyright (c) 2002-2008, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ @@ -67,4 +67,8 @@ if (exec) PerlTests(); break; + case 7: name = "Bug 6149"; + if (exec) Bug6149(); + break; + @@ -1640,4 +1644,10 @@ // Ticket 5389 REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX); + + // Invalid Back Reference \0 + // For ICU 3.8 and earlier + // For ICU versions newer than 3.8, \0 introduces an octal escape. + // + REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_INVALID_BACK_REF); } @@ -2123,4 +2133,24 @@ +//-------------------------------------------------------------- +// +// Bug6149 Verify limits to heap expansion for backtrack stack. +// Use this pattern, +// "(a?){1,}" +// The zero-length match will repeat forever. +// (That this goes into a loop is another bug) +// +//--------------------------------------------------------------- +void RegexTest::Bug6149() { + UnicodeString pattern("(a?){1,}"); + UnicodeString s("xyz"); + uint32_t flags = 0; + UErrorCode status = U_ZERO_ERROR; + + RegexMatcher matcher(pattern, s, flags, status); + UBool result = false; + REGEX_ASSERT_FAIL(result=matcher.matches(status), U_BUFFER_OVERFLOW_ERROR); + REGEX_ASSERT(result == FALSE); + } #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ Index: /icu/branches/maint/maint-3-8/source/common/uvectr32.cpp =================================================================== --- common/uvectr32.cpp (revision 12958) +++ common/uvectr32.cpp (revision 23292) @@ -1,5 +1,5 @@ /* ****************************************************************************** -* Copyright (C) 1999-2003, International Business Machines Corporation and * +* Copyright (C) 1999-2008, International Business Machines Corporation and * * others. All Rights Reserved. * ****************************************************************************** @@ -27,4 +27,5 @@ count(0), capacity(0), + maxCapacity(0), elements(NULL) { @@ -35,4 +36,5 @@ count(0), capacity(0), + maxCapacity(0), elements(0) { @@ -46,4 +48,7 @@ if (initialCapacity < 1) { initialCapacity = DEFUALT_CAPACITY; + } + if (maxCapacity>0 && maxCapacity= minimumCapacity) { return TRUE; - } else { - int32_t newCap = capacity * 2; - if (newCap < minimumCapacity) { - newCap = minimumCapacity; - } - int32_t* newElems = (int32_t *)uprv_malloc(sizeof(int32_t)*newCap); - if (newElems == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - uprv_memcpy(newElems, elements, sizeof(elements[0]) * count); - uprv_free(elements); - elements = newElems; - capacity = newCap; - return TRUE; + } + if (maxCapacity>0 && minimumCapacity>maxCapacity) { + status = U_BUFFER_OVERFLOW_ERROR; + return FALSE; + } + int32_t newCap = capacity * 2; + if (newCap < minimumCapacity) { + newCap = minimumCapacity; + } + if (maxCapacity > 0 && newCap > maxCapacity) { + newCap = maxCapacity; + } + int32_t* newElems = (int32_t *)uprv_malloc(sizeof(int32_t)*newCap); + if (newElems == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + uprv_memcpy(newElems, elements, sizeof(elements[0]) * count); + uprv_free(elements); + elements = newElems; + capacity = newCap; + return TRUE; +} + +void UVector32::setMaxCapacity(int32_t limit) { + U_ASSERT(limit >= 0); + maxCapacity = limit; + if (maxCapacity < 0) { + maxCapacity = 0; } } Index: /icu/branches/maint/maint-3-8/source/common/uvectr32.h =================================================================== --- common/uvectr32.h (revision 19000) +++ common/uvectr32.h (revision 23292) @@ -1,5 +1,5 @@ /* ********************************************************************** -* Copyright (C) 1999-2006, International Business Machines +* Copyright (C) 1999-2008, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** @@ -62,4 +62,6 @@ int32_t capacity; + + int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow. int32_t* elements; @@ -161,4 +163,12 @@ */ int32_t *getBuffer() const; + + /** + * Set the maximum allowed buffer capacity for this vector/stack. + * Default with no limit set is unlimited, go until malloc() fails. + * A Limit of zero means unlimited capacity. + * Units are vector elements (32 bits each), not bytes. + */ + void setMaxCapacity(int32_t limit); /** @@ -222,5 +232,7 @@ inline int32_t *UVector32::reserveBlock(int32_t size, UErrorCode &status) { - ensureCapacity(count+size, status); + if (ensureCapacity(count+size, status) == FALSE) { + return NULL; + } int32_t *rp = elements+count; count += size;