diff --git a/builds/posix/make.defaults b/builds/posix/make.defaults index f2f62de76f..8351fc4579 100755 --- a/builds/posix/make.defaults +++ b/builds/posix/make.defaults @@ -307,7 +307,8 @@ endif LIB_PATH_OPTS = $(call LIB_LINK_RPATH,lib) $(call LIB_LINK_RPATH,intl) LIB_LINK_SONAME= -Wl,-soname,$(1) LIB_LINK_MAPFILE= -Wl,--version-script,$(1) -FIREBIRD_LIBRARY_LINK= -L$(LIB) -lfbclient $(MATHLIB) $(CRYPTLIB) +# FIXME: +FIREBIRD_LIBRARY_LINK= -L$(LIB) -lfbclient $(MATHLIB) $(CRYPTLIB) -lre2 EXE_LINK_OPTIONS= $(LDFLAGS) $(THR_FLAGS) $(UNDEF_FLAGS) $(LIB_PATH_OPTS) $(LINK_EMPTY_SYMBOLS) LIB_LINK_OPTIONS= $(LDFLAGS) $(THR_FLAGS) -shared diff --git a/doc/sql.extensions/README.similar_to.txt b/doc/sql.extensions/README.similar_to.txt index 2fdac63b29..6dc2c50d71 100644 --- a/doc/sql.extensions/README.similar_to.txt +++ b/doc/sql.extensions/README.similar_to.txt @@ -90,6 +90,7 @@ Note: , , , , , , , , , , or . +3) Since FB 4 the repeat factor low/high values could not be greater than 1000. Syntax description and examples: @@ -174,7 +175,7 @@ Matches a character not identical to one of : Matches a character identical to one of but not identical to one of : - ... ... + ... ... '3' SIMILAR TO '[[:DIGIT:]^3]' -- false '4' SIMILAR TO '[[:DIGIT:]^3]' -- true @@ -220,3 +221,36 @@ insert into department values ('600', 'Engineering', '(408) 555-123'); -- check select * from department where phone not similar to '\([0-9]{3}\) 555\-%' escape '\'; + +Appendice: + +Since FB 4 SIMILAR TO and SUBSTRING...SIMILAR are implemented using the re2 library, +which has the following license: + +Copyright (c) 2009 The RE2 Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/burp/burp.cpp b/src/burp/burp.cpp index b1cc4a392f..75880da7ed 100644 --- a/src/burp/burp.cpp +++ b/src/burp/burp.cpp @@ -2542,15 +2542,11 @@ void BurpGlobals::setupSkipData(const Firebird::string& regexp) ISC_systemToUtf8(filter); BurpGlobals* tdgbl = BurpGlobals::getSpecific(); - if (!unicodeCollation) - unicodeCollation = FB_NEW_POOL(tdgbl->getPool()) UnicodeCollationHolder(tdgbl->getPool()); - Jrd::TextType* const textType = unicodeCollation->getTextType(); - - skipDataMatcher.reset(FB_NEW_POOL(tdgbl->getPool()) - Firebird::SimilarToMatcher > - (tdgbl->getPool(), textType, (const UCHAR*) filter.c_str(), - filter.length(), '\\', true)); + skipDataMatcher.reset(FB_NEW_POOL(tdgbl->getPool()) Firebird::SimilarToRegex( + tdgbl->getPool(), true, + filter.c_str(), filter.length(), + "\\", 1)); } } catch (const Firebird::Exception&) @@ -2571,18 +2567,12 @@ Firebird::string BurpGlobals::toSystem(const Firebird::PathName& from) bool BurpGlobals::skipRelation(const char* name) { if (gbl_sw_meta) - { return true; - } if (!skipDataMatcher) - { return false; - } - skipDataMatcher->reset(); - skipDataMatcher->process(reinterpret_cast(name), static_cast(strlen(name))); - return skipDataMatcher->result(); + return skipDataMatcher->matches(name, strlen(name)); } void BurpGlobals::read_stats(SINT64* stats) @@ -2703,39 +2693,6 @@ void BurpGlobals::print_stats_header() burp_output(false, "\n"); } -UnicodeCollationHolder::UnicodeCollationHolder(MemoryPool& pool) -{ - cs = FB_NEW_POOL(pool) charset; - tt = FB_NEW_POOL(pool) texttype; - - Firebird::IntlUtil::initUtf8Charset(cs); - - Firebird::string collAttributes("ICU-VERSION="); - collAttributes += Jrd::UnicodeUtil::getDefaultIcuVersion(); - Firebird::IntlUtil::setupIcuAttributes(cs, collAttributes, "", collAttributes); - - Firebird::UCharBuffer collAttributesBuffer; - collAttributesBuffer.push(reinterpret_cast(collAttributes.c_str()), - collAttributes.length()); - - if (!Firebird::IntlUtil::initUnicodeCollation(tt, cs, "UNICODE", 0, collAttributesBuffer, Firebird::string())) - Firebird::fatal_exception::raiseFmt("cannot initialize UNICODE collation to use in gbak"); - - charSet = Jrd::CharSet::createInstance(pool, 0, cs); - textType = FB_NEW_POOL(pool) Jrd::TextType(0, tt, charSet); -} - -UnicodeCollationHolder::~UnicodeCollationHolder() -{ - fb_assert(tt->texttype_fn_destroy); - - if (tt->texttype_fn_destroy) - tt->texttype_fn_destroy(tt); - - // cs should be deleted by texttype_fn_destroy call above - delete tt; -} - void BURP_makeSymbol(BurpGlobals* tdgbl, Firebird::string& name) // add double quotes to string { if (tdgbl->gbl_dialect < SQL_DIALECT_V6) diff --git a/src/burp/burp.h b/src/burp/burp.h index 20b77a5afb..53947dde1b 100644 --- a/src/burp/burp.h +++ b/src/burp/burp.h @@ -42,7 +42,7 @@ #include "../common/classes/array.h" #include "../common/classes/fb_pair.h" #include "../common/classes/MetaName.h" -#include "../jrd/SimilarToMatcher.h" +#include "../common/SimilarToRegex.h" #include "../common/status.h" #include "../common/sha.h" #include "../common/classes/ImplementHelper.h" @@ -894,26 +894,6 @@ static const char HDR_SPLIT_TAG6[] = "InterBase/gbak, "; const FB_UINT64 MIN_SPLIT_SIZE = FB_CONST64(2048); // bytes -// Copy&paste from TraceUnicodeUtils.h - fixme !!!!!!!! -class UnicodeCollationHolder -{ -private: - charset* cs; - texttype* tt; - Firebird::AutoPtr charSet; - Firebird::AutoPtr textType; - -public: - explicit UnicodeCollationHolder(Firebird::MemoryPool& pool); - ~UnicodeCollationHolder(); - - Jrd::TextType* getTextType() - { - return textType; - } -}; - - // Global switches and data struct BurpCrypt; @@ -1174,8 +1154,7 @@ public: bool flag_on_line; // indicates whether we will bring the database on-line bool firstMap; // this is the first time we entered get_mapping() bool stdIoMode; // stdin or stdout is used as backup file - Firebird::AutoPtr unicodeCollation; - Firebird::AutoPtr > > skipDataMatcher; + Firebird::AutoPtr skipDataMatcher; public: Firebird::string toSystem(const Firebird::PathName& from); diff --git a/src/common/SimilarToRegex.cpp b/src/common/SimilarToRegex.cpp new file mode 100644 index 0000000000..1d49478918 --- /dev/null +++ b/src/common/SimilarToRegex.cpp @@ -0,0 +1,821 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Adriano dos Santos Fernandes + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2019 Adriano dos Santos Fernandes + * and all contributors signed below. + * + */ + +#include "firebird.h" +#include "../common/SimilarToRegex.h" +#include "../common/StatusArg.h" +#include + +using namespace Firebird; + +namespace +{ + static const unsigned FLAG_PREFER_FEWER = 0x01; + static const unsigned FLAG_CASE_INSENSITIVE = 0x02; + static const unsigned FLAG_GROUP_CAPTURE = 0x04; + + //// TODO: Verify usage of U8_NEXT_UNSAFE. + class SimilarToCompiler + { + public: + SimilarToCompiler(MemoryPool& pool, AutoPtr& regexp, unsigned aFlags, + const char* aPatternStr, unsigned aPatternLen, + const char* escapeStr, unsigned escapeLen) + : re2PatternStr(pool), + patternStr(aPatternStr), + patternPos(0), + patternLen(aPatternLen), + flags(aFlags), + useEscape(escapeStr != nullptr) + { + if (escapeStr) + { + int32_t escapePos = 0; + U8_NEXT_UNSAFE(escapeStr, escapePos, escapeChar); + + if (escapePos != escapeLen) + status_exception::raise(Arg::Gds(isc_escape_invalid)); + } + + if (flags & FLAG_CASE_INSENSITIVE) + re2PatternStr.append("(?i)"); + + if (flags & FLAG_GROUP_CAPTURE) + re2PatternStr.append("("); + + int parseFlags; + parseExpr(&parseFlags); + + if (flags & FLAG_GROUP_CAPTURE) + re2PatternStr.append(")"); + + // Check for proper termination. + if (patternPos < patternLen) + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + + RE2::Options options; + options.set_log_errors(false); + options.set_dot_nl(true); + + re2::StringPiece sp((const char*) re2PatternStr.c_str(), re2PatternStr.length()); + regexp = FB_NEW_POOL(pool) RE2(sp, options); + + if (!regexp->ok()) + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + } + + bool hasChar() + { + return patternPos < patternLen; + } + + UChar32 getChar() + { + fb_assert(hasChar()); + UChar32 c; + U8_NEXT_UNSAFE(patternStr, patternPos, c); + return c; + } + + UChar32 peekChar() + { + auto savePos = patternPos; + auto c = getChar(); + patternPos = savePos; + return c; + } + + bool isRep(UChar32 c) const + { + return c == '*' || c == '+' || c == '?' || c == '{'; + } + + bool isSpecial(UChar32 c) + { + switch (c) + { + case '^': + case '-': + case '_': + case '%': + case '[': + case ']': + case '(': + case ')': + case '{': + case '}': + case '|': + case '?': + case '+': + case '*': + return true; + + default: + return false; + } + } + + bool isRe2Special(UChar32 c) + { + switch (c) + { + case '\\': + case '$': + case '.': + case '^': + case '-': + case '_': + case '[': + case ']': + case '(': + case ')': + case '{': + case '}': + case '|': + case '?': + case '+': + case '*': + return true; + + default: + return false; + } + } + + void parseExpr(int* parseFlagOut) + { + while (true) + { + int parseFlags; + parseTerm(&parseFlags); + *parseFlagOut &= ~(~parseFlags & PARSE_FLAG_NOT_EMPTY); + *parseFlagOut |= parseFlags; + + auto savePos = patternPos; + UChar32 c; + + if (!hasChar() || (c = getChar()) != '|') + { + patternPos = savePos; + break; + } + + re2PatternStr.append("|"); + } + } + + void parseTerm(int* parseFlagOut) + { + *parseFlagOut = 0; + + bool first = true; + + while (hasChar()) + { + auto c = peekChar(); + + if (c != '|' && c != ')') + { + int parseFlags; + parseFactor(&parseFlags); + + *parseFlagOut |= parseFlags & PARSE_FLAG_NOT_EMPTY; + + if (first) + { + *parseFlagOut |= parseFlags; + first = false; + } + } + else + break; + } + } + + void parseFactor(int* parseFlagOut) + { + int parseFlags; + parsePrimary(&parseFlags); + + UChar32 op; + + if (!hasChar() || !isRep((op = peekChar()))) + { + *parseFlagOut = parseFlags; + return; + } + + if (!(parseFlags & PARSE_FLAG_NOT_EMPTY) && op != '?') + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + + fb_assert(op == '*' || op == '+' || op == '?' || op == '{'); + + if (op == '*') + { + re2PatternStr.append((flags & FLAG_PREFER_FEWER) ? "*?" : "*"); + *parseFlagOut = 0; + ++patternPos; + } + else if (op == '+') + { + re2PatternStr.append((flags & FLAG_PREFER_FEWER) ? "+?" : "+"); + *parseFlagOut = PARSE_FLAG_NOT_EMPTY; + ++patternPos; + } + else if (op == '?') + { + re2PatternStr.append((flags & FLAG_PREFER_FEWER) ? "??" : "?"); + *parseFlagOut = 0; + ++patternPos; + } + else if (op == '{') + { + const auto repeatStart = patternPos++; + + bool comma = false; + string s1, s2; + + while (true) + { + if (!hasChar()) + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + + UChar32 c = getChar(); + + if (c == '}') + { + if (s1.isEmpty()) + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + break; + } + else if (c == ',') + { + if (comma) + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + comma = true; + } + else + { + if (c >= '0' && c <= '9') + { + if (comma) + s2 += (char) c; + else + s1 += (char) c; + } + else + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + } + } + + const int n1 = atoi(s1.c_str()); + *parseFlagOut = n1 == 0 ? 0 : PARSE_FLAG_NOT_EMPTY; + + re2PatternStr.append(patternStr + repeatStart, patternStr + patternPos); + + if (flags & FLAG_PREFER_FEWER) + re2PatternStr.append("?"); + } + + if (hasChar() && isRep(peekChar())) + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + } + + void parsePrimary(int* parseFlagOut) + { + *parseFlagOut = 0; + + fb_assert(hasChar()); + auto savePos = patternPos; + auto op = getChar(); + + if (op == '_') + { + *parseFlagOut |= PARSE_FLAG_NOT_EMPTY; + re2PatternStr.append("."); + return; + } + else if (op == '%') + { + re2PatternStr.append((flags & FLAG_PREFER_FEWER) ? ".*?" : ".*"); + return; + } + else if (op == '[') + { + struct + { + const char* similarClass; + const char* re2ClassInclude; + const char* re2ClassExclude; + } static const classes[] = + { + {"alnum", "[:alnum:]", "[:^alnum:]"}, + {"alpha", "[:alpha:]", "[:^alpha:]"}, + {"digit", "[:digit:]", "[:^digit:]"}, + {"lower", "[:lower:]", "[:^lower:]"}, + {"space", " ", "\\x00-\\x1F\\x21-\\x{10FFFF}"}, + {"upper", "[:upper:]", "[:^upper:]"}, + {"whitespace", "[:space:]", "[:^space:]"} + }; + + struct Item + { + int clazz; + unsigned firstStart, firstEnd, lastStart, lastEnd; + }; + Array items; + unsigned includeCount = 0; + bool exclude = false; + + do + { + if (!hasChar()) + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + + unsigned charSavePos = patternPos; + UChar32 c = getChar(); + bool range = false; + bool charClass = false; + + if (useEscape && c == escapeChar) + { + if (!hasChar()) + status_exception::raise(Arg::Gds(isc_escape_invalid)); + + charSavePos = patternPos; + c = getChar(); + + if (!(c == escapeChar || isSpecial(c))) + status_exception::raise(Arg::Gds(isc_escape_invalid)); + } + else + { + if (c == '[') + charClass = true; + else if (c == '^') + { + if (exclude) + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + + exclude = true; + continue; + } + } + + Item item; + + if (!exclude) + ++includeCount; + + if (charClass) + { + if (!hasChar() || getChar() != ':') + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + + charSavePos = patternPos; + + while (hasChar() && getChar() != ':') + ; + + const SLONG len = patternPos - charSavePos - 1; + + if (!hasChar() || getChar() != ']') + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + + for (item.clazz = 0; item.clazz < FB_NELEM(classes); ++item.clazz) + { + if (fb_utils::strnicmp(patternStr + charSavePos, + classes[item.clazz].similarClass, len) == 0) + { + break; + } + } + + if (item.clazz >= FB_NELEM(classes)) + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + } + else + { + item.clazz = -1; + + item.firstStart = item.lastStart = charSavePos; + item.firstEnd = item.lastEnd = patternPos; + + if (hasChar() && peekChar() == '-') + { + getChar(); + + charSavePos = patternPos; + c = getChar(); + + if (useEscape && c == escapeChar) + { + if (!hasChar()) + status_exception::raise(Arg::Gds(isc_escape_invalid)); + + charSavePos = patternPos; + c = getChar(); + + if (!(c == escapeChar || isSpecial(c))) + status_exception::raise(Arg::Gds(isc_escape_invalid)); + } + + item.lastStart = charSavePos; + item.lastEnd = patternPos; + } + } + + items.add(item); + + if (!hasChar()) + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + } while (peekChar() != ']'); + + auto appendItem = [&](const Item& item, bool negated) { + if (item.clazz != -1) + { + re2PatternStr.append(negated ? + classes[item.clazz].re2ClassExclude : + classes[item.clazz].re2ClassInclude); + } + else + { + if (negated) + { + UChar32 c; + char hex[20]; + + int32_t cPos = item.firstStart; + U8_NEXT_UNSAFE(patternStr, cPos, c); + + if (c > 0) + { + re2PatternStr.append("\\x00"); + re2PatternStr.append("-"); + + sprintf(hex, "\\x{%X}", (int) c - 1); + re2PatternStr.append(hex); + } + + cPos = item.lastStart; + U8_NEXT_UNSAFE(patternStr, cPos, c); + + if (c < 0x10FFFF) + { + sprintf(hex, "\\x{%X}", (int) c + 1); + re2PatternStr.append(hex); + re2PatternStr.append("-"); + re2PatternStr.append("\\x{10FFFF}"); + } + } + else + { + if (isRe2Special(patternStr[item.firstStart])) + re2PatternStr.append("\\"); + + re2PatternStr.append(patternStr + item.firstStart, patternStr + item.firstEnd); + + if (item.lastStart != item.firstStart) + { + re2PatternStr.append("-"); + + if (isRe2Special(patternStr[item.lastStart])) + re2PatternStr.append("\\"); + + re2PatternStr.append(patternStr + item.lastStart, patternStr + item.lastEnd); + } + } + } + }; + + if (exclude && includeCount > 1) + { + re2PatternStr.append("(?:"); + + for (unsigned i = 0; i < includeCount; ++i) + { + if (i != 0) + re2PatternStr.append("|"); + + re2PatternStr.append("["); + re2PatternStr.append("^"); + appendItem(items[i], true); + + for (unsigned j = includeCount; j < items.getCount(); ++j) + appendItem(items[j], false); + + re2PatternStr.append("]"); + } + + re2PatternStr.append(")"); + } + else + { + re2PatternStr.append("["); + + if (exclude) + re2PatternStr.append("^"); + + for (unsigned i = 0; i < items.getCount(); ++i) + appendItem(items[i], exclude && i < includeCount); + + re2PatternStr.append("]"); + } + + getChar(); + *parseFlagOut |= PARSE_FLAG_NOT_EMPTY; + } + else if (op == '(') + { + re2PatternStr.append(flags & FLAG_GROUP_CAPTURE ? "(" : "(?:"); + + int parseFlags; + parseExpr(&parseFlags); + + if (!hasChar() || getChar() != ')') + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + + re2PatternStr.append(")"); + + *parseFlagOut |= parseFlags & PARSE_FLAG_NOT_EMPTY; + } + else + { + patternPos = savePos; + + bool controlChar = false; + + do + { + auto charSavePos = patternPos; + op = getChar(); + + if (useEscape && op == escapeChar) + { + charSavePos = patternPos; + op = getChar(); + + if (!isSpecial(op) && op != escapeChar) + status_exception::raise(Arg::Gds(isc_escape_invalid)); + } + else + { + if (isSpecial(op)) + { + controlChar = true; + patternPos = charSavePos; + } + } + + if (!controlChar) + { + if (isRe2Special(op)) + re2PatternStr.append("\\"); + + re2PatternStr.append(patternStr + charSavePos, patternStr + patternPos); + } + } while (!controlChar && hasChar()); + + if (patternPos == savePos) + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + + *parseFlagOut |= PARSE_FLAG_NOT_EMPTY; + } + } + + const string& getRe2PatternStr() const + { + return re2PatternStr; + } + + private: + static const int PARSE_FLAG_NOT_EMPTY = 1; // known never to match empty string + + string re2PatternStr; + const char* patternStr; + int32_t patternPos; + int32_t patternLen; + UChar32 escapeChar; + unsigned flags; + bool useEscape; + }; + + class SubstringSimilarCompiler + { + public: + SubstringSimilarCompiler(MemoryPool& pool, AutoPtr& regexp, unsigned flags, + const char* aPatternStr, unsigned aPatternLen, + const char* escapeStr, unsigned escapeLen) + : patternStr(aPatternStr), + patternPos(0), + patternLen(aPatternLen) + { + int32_t escapePos = 0; + U8_NEXT_UNSAFE(escapeStr, escapePos, escapeChar); + + if (escapePos != escapeLen) + status_exception::raise(Arg::Gds(isc_escape_invalid)); + + unsigned positions[2]; + unsigned part = 0; + + while (hasChar()) + { + auto c = getChar(); + + if (c != escapeChar) + continue; + + if (!hasChar()) + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + + c = getChar(); + + if (c == '"') + { + if (part >= 2) + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + + positions[part++] = patternPos; + } + } + + if (part != 2) + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + + AutoPtr regexp1, regexp2, regexp3; + + SimilarToCompiler compiler1(pool, regexp1, (flags & FLAG_CASE_INSENSITIVE) | FLAG_PREFER_FEWER, + aPatternStr, positions[0] - escapeLen - 1, escapeStr, escapeLen); + + SimilarToCompiler compiler2(pool, regexp2, (flags & FLAG_CASE_INSENSITIVE), + aPatternStr + positions[0], positions[1] - positions[0] - escapeLen - 1, escapeStr, escapeLen); + + SimilarToCompiler compiler3(pool, regexp3, (flags & FLAG_CASE_INSENSITIVE) | FLAG_PREFER_FEWER, + aPatternStr + positions[1], patternLen - positions[1], escapeStr, escapeLen); + + string finalRe2Pattern; + finalRe2Pattern.reserve( + 1 + // ( + compiler1.getRe2PatternStr().length() + + 2 + // )( + compiler2.getRe2PatternStr().length() + + 2 + // )( + compiler3.getRe2PatternStr().length() + + 1 // ) + ); + + finalRe2Pattern.append("("); + finalRe2Pattern.append(compiler1.getRe2PatternStr()); + finalRe2Pattern.append(")("); + finalRe2Pattern.append(compiler2.getRe2PatternStr()); + finalRe2Pattern.append(")("); + finalRe2Pattern.append(compiler3.getRe2PatternStr()); + finalRe2Pattern.append(")"); + + RE2::Options options; + options.set_log_errors(false); + options.set_dot_nl(true); + + re2::StringPiece sp((const char*) finalRe2Pattern.c_str(), finalRe2Pattern.length()); + regexp = FB_NEW_POOL(pool) RE2(sp, options); + + if (!regexp->ok()) + status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); + } + + bool hasChar() + { + return patternPos < patternLen; + } + + UChar32 getChar() + { + fb_assert(hasChar()); + UChar32 c; + U8_NEXT_UNSAFE(patternStr, patternPos, c); + return c; + } + + UChar32 peekChar() + { + auto savePos = patternPos; + auto c = getChar(); + patternPos = savePos; + return c; + } + + private: + const char* patternStr; + int32_t patternPos; + int32_t patternLen; + UChar32 escapeChar; + }; +} // namespace + +namespace Firebird { + + +SimilarToRegex::SimilarToRegex(MemoryPool& pool, bool caseInsensitive, + const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen) + : PermanentStorage(pool) +{ + SimilarToCompiler compiler(pool, regexp, + FLAG_GROUP_CAPTURE | FLAG_PREFER_FEWER | (caseInsensitive ? FLAG_CASE_INSENSITIVE : 0), + patternStr, patternLen, escapeStr, escapeLen); +} + +bool SimilarToRegex::matches(const char* buffer, unsigned bufferLen, Array* matchPosArray) +{ + re2::StringPiece sp(buffer, bufferLen); + + if (matchPosArray) + { + const int argsCount = regexp->NumberOfCapturingGroups(); + + Array resSps(argsCount); + resSps.resize(argsCount); + + Array args(argsCount); + args.resize(argsCount); + + Array argsPtr(argsCount); + + { // scope + auto resSp = resSps.begin(); + + for (auto& arg : args) + { + arg = resSp++; + argsPtr.push(&arg); + } + } + + if (RE2::FullMatchN(sp, *regexp.get(), argsPtr.begin(), argsCount)) + { + matchPosArray->clear(); + + for (const auto resSp : resSps) + { + matchPosArray->push(MatchPos{ + static_cast(resSp.data() - sp.begin()), + static_cast(resSp.length()) + }); + } + + return true; + } + else + return false; + } + else + return RE2::FullMatch(sp, *regexp.get()); +} + +//--------------------- + +SubstringSimilarRegex::SubstringSimilarRegex(MemoryPool& pool, bool caseInsensitive, + const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen) + : PermanentStorage(pool) +{ + SubstringSimilarCompiler compiler(pool, regexp, + (caseInsensitive ? FLAG_CASE_INSENSITIVE : 0), + patternStr, patternLen, escapeStr, escapeLen); +} + +bool SubstringSimilarRegex::matches(const char* buffer, unsigned bufferLen, + unsigned* resultStart, unsigned* resultLength) +{ + re2::StringPiece sp(buffer, bufferLen); + + re2::StringPiece spResult; + + if (RE2::FullMatch(sp, *regexp.get(), nullptr, &spResult, nullptr)) + { + *resultStart = spResult.begin() - buffer; + *resultLength = spResult.length(); + return true; + } + else + return false; +} + + +} // namespace Firebird diff --git a/src/common/SimilarToRegex.h b/src/common/SimilarToRegex.h new file mode 100644 index 0000000000..e1b2554bb9 --- /dev/null +++ b/src/common/SimilarToRegex.h @@ -0,0 +1,75 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Adriano dos Santos Fernandes + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2019 Adriano dos Santos Fernandes + * and all contributors signed below. + * + */ + +#ifndef COMMON_SIMILAR_TO_REGEX_H +#define COMMON_SIMILAR_TO_REGEX_H + +#include "firebird.h" +#include +#include "../common/classes/auto.h" +#include "../common/classes/array.h" +#include "../common/classes/fb_string.h" + +namespace Firebird { + + +//// FIXME: Leak re2::RE2 when destroyed by pool. +class SimilarToRegex : public PermanentStorage +{ +public: + struct MatchPos + { + unsigned start; + unsigned length; + }; + +public: + SimilarToRegex(MemoryPool& pool, bool caseInsensitive, + const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen); + +public: + bool matches(const char* buffer, unsigned bufferLen, Array* matchPosArray = nullptr); + +private: + AutoPtr regexp; +}; + +//// FIXME: Leak re2::RE2 when destroyed by pool. +// Given a regular expression R1#R2#R3 and the string S: +// - Find the shortest substring of S that matches R1 while the remainder (S23) matches R2R3; +// - Find the longest (S2) substring of S23 that matches R2 while the remainder matches R3; +// - Return S2. +class SubstringSimilarRegex : public PermanentStorage +{ +public: + SubstringSimilarRegex(MemoryPool& pool, bool caseInsensitive, + const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen); + +public: + bool matches(const char* buffer, unsigned bufferLen, unsigned* resultStart, unsigned* resultLength); + +private: + AutoPtr regexp; +}; + + +} // namespace Firebird + +#endif // COMMON_SIMILAR_TO_REGEX_H diff --git a/src/common/TextType.cpp b/src/common/TextType.cpp index ff48778f39..cd69aa5c80 100644 --- a/src/common/TextType.cpp +++ b/src/common/TextType.cpp @@ -169,33 +169,6 @@ TextType::TextType(TTYPE_ID _type, texttype *_tt, CharSet* _cs) memset(&canonicalChars[conversions[i].ch], 0, sizeof(ULONG)); } } - - struct Conversion2 - { - const char* str; - UCHAR* buffer; - }; - - const Conversion2 conversions2[] = - { - {"0123456789", reinterpret_cast(canonicalNumbers)}, - {"abcdefghijklmnopqrstuvwxyz", reinterpret_cast(canonicalLowerLetters)}, - {"ABCDEFGHIJKLMNOPQRSTUVWXYZ", reinterpret_cast(canonicalUpperLetters)}, - {" \t\v\r\n\f", reinterpret_cast(canonicalWhiteSpaces)} - }; - - for (int i = 0; i < FB_NELEM(conversions2); i++) - { - UCHAR temp[sizeof(ULONG)]; - - for (const char* p = conversions2[i].str; *p; ++p) - { - USHORT code = static_cast(*p); - ULONG length = getCharSet()->getConvFromUnicode().convert(sizeof(code), &code, sizeof(temp), temp); - const size_t pos = (p - conversions2[i].str) * getCanonicalWidth(); - canonical(length, temp, sizeof(ULONG), &conversions2[i].buffer[pos]); - } - } } diff --git a/src/common/TextType.h b/src/common/TextType.h index 469afb530b..bfe0e68a26 100644 --- a/src/common/TextType.h +++ b/src/common/TextType.h @@ -138,47 +138,8 @@ public: return reinterpret_cast(&canonicalChars[ch]); } - const UCHAR* getCanonicalNumbers(int* count = NULL) const - { - if (count) - *count = 10; - return reinterpret_cast(canonicalNumbers); - } - - const UCHAR* getCanonicalLowerLetters(int* count = NULL) const - { - if (count) - *count = 26; - return reinterpret_cast(canonicalLowerLetters); - } - - const UCHAR* getCanonicalUpperLetters(int* count = NULL) const - { - if (count) - *count = 26; - return reinterpret_cast(canonicalUpperLetters); - } - - const UCHAR* getCanonicalWhiteSpaces(int* count = NULL) const - { - if (count) - *count = 6; - return reinterpret_cast(canonicalWhiteSpaces); - } - - const UCHAR* getCanonicalSpace(int* count = NULL) const - { - if (count) - *count = 1; - return getCanonicalChar(CHAR_SPACE); - } - private: ULONG canonicalChars[CHAR_COUNT]; - ULONG canonicalNumbers[10]; - ULONG canonicalLowerLetters[26]; - ULONG canonicalUpperLetters[26]; - ULONG canonicalWhiteSpaces[6]; }; } // namespace Jrd diff --git a/src/common/unicode_util.cpp b/src/common/unicode_util.cpp index cf51d5b5cd..af5b3ff98f 100644 --- a/src/common/unicode_util.cpp +++ b/src/common/unicode_util.cpp @@ -1031,6 +1031,37 @@ INTL_BOOL UnicodeUtil::utf32WellFormed(ULONG len, const ULONG* str, ULONG* offen return true; // well-formed } +void UnicodeUtil::utf8Normalize(UCharBuffer& data) +{ + ICU* icu = loadICU("", ""); + + HalfStaticArray utf16Buffer(data.getCount()); + USHORT errCode; + ULONG errPosition; + ULONG utf16BufferLen = utf8ToUtf16(data.getCount(), data.begin(), data.getCount() * sizeof(USHORT), + utf16Buffer.getBuffer(data.getCount()), &errCode, &errPosition); + + UTransliterator* trans = icu->getCiAiTransliterator(); + + if (trans) + { + const int32_t capacity = utf16Buffer.getCount() * sizeof(USHORT); + int32_t len = utf16BufferLen / sizeof(USHORT); + int32_t limit = len; + + UErrorCode errorCode = U_ZERO_ERROR; + icu->utransTransUChars(trans, reinterpret_cast(utf16Buffer.begin()), + &len, capacity, 0, &limit, &errorCode); + icu->releaseCiAiTransliterator(trans); + + len = utf16ToUtf8(utf16BufferLen, utf16Buffer.begin(), + len * 4, data.getBuffer(len * 4, false), + &errCode, &errPosition); + + data.shrink(len); + } +} + UnicodeUtil::ICU* UnicodeUtil::loadICU(const string& icuVersion, const string& configInfo) { ObjectsArray versions; diff --git a/src/common/unicode_util.h b/src/common/unicode_util.h index ff2e30ef27..e57e9a9eed 100644 --- a/src/common/unicode_util.h +++ b/src/common/unicode_util.h @@ -177,6 +177,8 @@ public: static INTL_BOOL utf16WellFormed(ULONG len, const USHORT* str, ULONG* offending_position); static INTL_BOOL utf32WellFormed(ULONG len, const ULONG* str, ULONG* offending_position); + static void utf8Normalize(Firebird::UCharBuffer& data); + static ConversionICU& getConversionICU(); static ICU* loadICU(const Firebird::string& icuVersion, const Firebird::string& configInfo); static bool getCollVersion(const Firebird::string& icuVersion, diff --git a/src/dsql/BoolNodes.cpp b/src/dsql/BoolNodes.cpp index 205fc11cc2..5b6e16a9d1 100644 --- a/src/dsql/BoolNodes.cpp +++ b/src/dsql/BoolNodes.cpp @@ -945,7 +945,7 @@ bool ComparativeBoolNode::stringBoolean(thread_db* tdbb, jrd_req* request, dsc* else // nod_similar { impure->vlu_misc.vlu_invariant = evaluator = obj->createSimilarToMatcher( - *tdbb->getDefaultPool(), p2, l2, escape_str, escape_length); + tdbb, *tdbb->getDefaultPool(), p2, l2, escape_str, escape_length); } } else @@ -961,7 +961,7 @@ bool ComparativeBoolNode::stringBoolean(thread_db* tdbb, jrd_req* request, dsc* } else // nod_similar { - evaluator = obj->createSimilarToMatcher(*tdbb->getDefaultPool(), + evaluator = obj->createSimilarToMatcher(tdbb, *tdbb->getDefaultPool(), p2, l2, escape_str, escape_length); } @@ -1152,7 +1152,7 @@ bool ComparativeBoolNode::stringFunction(thread_db* tdbb, jrd_req* request, else // nod_similar { impure->vlu_misc.vlu_invariant = evaluator = obj->createSimilarToMatcher( - *tdbb->getDefaultPool(), p2, l2, escape_str, escape_length); + tdbb, *tdbb->getDefaultPool(), p2, l2, escape_str, escape_length); } } else @@ -1170,7 +1170,7 @@ bool ComparativeBoolNode::stringFunction(thread_db* tdbb, jrd_req* request, return obj->like(*tdbb->getDefaultPool(), p1, l1, p2, l2, escape_str, escape_length); // nod_similar - return obj->similarTo(*tdbb->getDefaultPool(), p1, l1, p2, l2, escape_str, escape_length); + return obj->similarTo(tdbb, *tdbb->getDefaultPool(), p1, l1, p2, l2, escape_str, escape_length); } // Handle MATCHES diff --git a/src/dsql/ExprNodes.cpp b/src/dsql/ExprNodes.cpp index b2208674f1..dd75a5d6b7 100644 --- a/src/dsql/ExprNodes.cpp +++ b/src/dsql/ExprNodes.cpp @@ -11873,7 +11873,7 @@ dsc* SubstringSimilarNode::execute(thread_db* tdbb, jrd_req* request) const delete impure->vlu_misc.vlu_invariant; impure->vlu_misc.vlu_invariant = evaluator = collation->createSubstringSimilarMatcher( - *tdbb->getDefaultPool(), patternStr, patternLen, escapeStr, escapeLen); + tdbb, *tdbb->getDefaultPool(), patternStr, patternLen, escapeStr, escapeLen); impure->vlu_flags |= VLU_computed; } @@ -11885,7 +11885,7 @@ dsc* SubstringSimilarNode::execute(thread_db* tdbb, jrd_req* request) const } else { - autoEvaluator = evaluator = collation->createSubstringSimilarMatcher(*tdbb->getDefaultPool(), + autoEvaluator = evaluator = collation->createSubstringSimilarMatcher(tdbb, *tdbb->getDefaultPool(), patternStr, patternLen, escapeStr, escapeLen); } diff --git a/src/jrd/Collation.cpp b/src/jrd/Collation.cpp index 46e56d887e..f7d2445163 100644 --- a/src/jrd/Collation.cpp +++ b/src/jrd/Collation.cpp @@ -99,16 +99,177 @@ #include "../jrd/intl_classes.h" #include "../jrd/lck_proto.h" #include "../jrd/intl_classes.h" +#include "../jrd/intl_proto.h" #include "../jrd/Collation.h" #include "../common/TextType.h" +#include "../common/SimilarToRegex.h" -#include "../jrd/SimilarToMatcher.h" - +using namespace Firebird; using namespace Jrd; namespace { +//// TODO: NONE / OCTETS. +class Re2SimilarMatcher : public PatternMatcher +{ +public: + Re2SimilarMatcher(thread_db* tdbb, MemoryPool& pool, TextType* textType, + const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen) + : PatternMatcher(pool, textType), + buffer(pool) + { + CsConvert converter = INTL_convert_lookup(tdbb, CS_UTF8, textType->getCharSet()->getId()); + + UCharBuffer patternBuffer, escapeBuffer; + + converter.convert(patternLen, patternStr, patternBuffer); + + if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE) + UnicodeUtil::utf8Normalize(patternBuffer); + + if (escapeStr) + { + converter.convert(escapeLen, escapeStr, escapeBuffer); + + if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE) + UnicodeUtil::utf8Normalize(escapeBuffer); + } + + regex = FB_NEW_POOL(pool) SimilarToRegex(pool, + (textType->getFlags() & TEXTTYPE_ATTR_CASE_INSENSITIVE), + (const char*) patternBuffer.begin(), patternBuffer.getCount(), + (escapeStr ? (const char*) escapeBuffer.begin() : nullptr), escapeBuffer.getCount()); + } + +public: + static Re2SimilarMatcher* create(thread_db* tdbb, MemoryPool& pool, TextType* textType, + const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen) + { + return FB_NEW_POOL(pool) Re2SimilarMatcher(tdbb, pool, textType, patternStr, patternLen, escapeStr, escapeLen); + } + + static bool evaluate(thread_db* tdbb, MemoryPool& pool, TextType* textType, const UCHAR* str, SLONG strLen, + const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen) + { + Re2SimilarMatcher matcher(tdbb, pool, textType, patternStr, patternLen, escapeStr, escapeLen); + matcher.process(str, strLen); + return matcher.result(); + } + +public: + virtual void reset() + { + buffer.shrink(0); + } + + virtual bool process(const UCHAR* data, SLONG dataLen) + { + const FB_SIZE_T pos = buffer.getCount(); + memcpy(buffer.getBuffer(pos + dataLen) + pos, data, dataLen); + return true; + } + + virtual bool result() + { + if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE) + UnicodeUtil::utf8Normalize(buffer); + + return regex->matches((const char*) buffer.begin(), buffer.getCount()); + } + +private: + AutoPtr regex; + UCharBuffer buffer; +}; + +class Re2SubstringSimilarMatcher : public BaseSubstringSimilarMatcher +{ +public: + Re2SubstringSimilarMatcher(thread_db* tdbb, MemoryPool& pool, TextType* textType, + const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen) + : BaseSubstringSimilarMatcher(pool, textType), + buffer(pool), + resultStart(0), + resultLength(0) + { + CsConvert converter = INTL_convert_lookup(tdbb, textType->getCharSet()->getId(), CS_UTF8); + + UCharBuffer patternBuffer, escapeBuffer; + + converter.convert(patternLen, patternStr, patternBuffer); + + if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE) + UnicodeUtil::utf8Normalize(patternBuffer); + + if (escapeStr) + { + converter.convert(escapeLen, escapeStr, escapeBuffer); + + if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE) + UnicodeUtil::utf8Normalize(escapeBuffer); + } + + regex = FB_NEW_POOL(pool) SubstringSimilarRegex(pool, + (textType->getFlags() & TEXTTYPE_ATTR_CASE_INSENSITIVE), + (const char*) patternBuffer.begin(), patternBuffer.getCount(), + (escapeStr ? (const char*) escapeBuffer.begin() : nullptr), escapeBuffer.getCount()); + } + + virtual ~Re2SubstringSimilarMatcher() + { + } + +public: + static Re2SubstringSimilarMatcher* create(thread_db* tdbb, MemoryPool& pool, TextType* textType, + const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen) + { + return FB_NEW_POOL(pool) Re2SubstringSimilarMatcher(tdbb, pool, textType, + patternStr, patternLen, escapeStr, escapeLen); + } + + static bool evaluate(thread_db* tdbb, MemoryPool& pool, TextType* textType, const UCHAR* str, SLONG strLen, + const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen) + { + Re2SubstringSimilarMatcher matcher(tdbb, pool, textType, patternStr, patternLen, escapeStr, escapeLen); + matcher.process(str, strLen); + return matcher.result(); + } + +public: + virtual void reset() + { + buffer.shrink(0); + resultStart = resultLength = 0; + } + + virtual bool process(const UCHAR* data, SLONG dataLen) + { + const FB_SIZE_T pos = buffer.getCount(); + memcpy(buffer.getBuffer(pos + dataLen) + pos, data, dataLen); + return true; + } + + virtual bool result() + { + if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE) + UnicodeUtil::utf8Normalize(buffer); + + return regex->matches((const char*) buffer.begin(), buffer.getCount(), &resultStart, &resultLength); + } + + virtual void getResultInfo(unsigned* start, unsigned* length) + { + *start = resultStart; + *length = resultLength; + } + +private: + AutoPtr regex; + UCharBuffer buffer; + unsigned resultStart, resultLength; +}; + // constants used in matches and sleuth const int CHAR_GDML_MATCH_ONE = TextType::CHAR_QUESTION_MARK; const int CHAR_GDML_MATCH_ANY = TextType::CHAR_ASTERISK; @@ -725,8 +886,6 @@ template < typename pStartsMatcher, typename pContainsMatcher, typename pLikeMatcher, - typename pSimilarToMatcher, - typename pSubstringSimilarMatcher, typename pMatchesMatcher, typename pSleuthMatcher > @@ -781,22 +940,22 @@ public: getCharSet()->getSqlMatchOne(), getCharSet()->getSqlMatchOneLength()); } - virtual bool similarTo(MemoryPool& pool, const UCHAR* s, SLONG sl, + virtual bool similarTo(thread_db* tdbb, MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen) { - return pSimilarToMatcher::evaluate(pool, this, s, sl, p, pl, escape, escapeLen); + return Re2SimilarMatcher::evaluate(tdbb, pool, this, s, sl, p, pl, escape, escapeLen); } - virtual PatternMatcher* createSimilarToMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl, + virtual PatternMatcher* createSimilarToMatcher(thread_db* tdbb, MemoryPool& pool, const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen) { - return pSimilarToMatcher::create(pool, this, p, pl, escape, escapeLen); + return Re2SimilarMatcher::create(tdbb, pool, this, p, pl, escape, escapeLen); } - virtual BaseSubstringSimilarMatcher* createSubstringSimilarMatcher(MemoryPool& pool, + virtual BaseSubstringSimilarMatcher* createSubstringSimilarMatcher(thread_db* tdbb, MemoryPool& pool, const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen) { - return pSubstringSimilarMatcher::create(pool, this, p, pl, escape, escapeLen); + return Re2SubstringSimilarMatcher::create(tdbb, pool, this, p, pl, escape, escapeLen); } virtual bool contains(MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl) @@ -823,8 +982,6 @@ Collation* newCollation(MemoryPool& pool, TTYPE_ID id, texttype* tt, CharSet* cs StartsMatcherUCharDirect, ContainsMatcherUCharDirect, LikeMatcher, - SimilarToMatcher, - SubstringSimilarMatcher, MatchesMatcher, SleuthMatcher > DirectImpl; @@ -833,8 +990,6 @@ Collation* newCollation(MemoryPool& pool, TTYPE_ID id, texttype* tt, CharSet* cs StartsMatcherUCharCanonical, ContainsMatcher, LikeMatcher, - SimilarToMatcher, - SubstringSimilarMatcher, MatchesMatcher, SleuthMatcher > NonDirectImpl; diff --git a/src/jrd/Collation.h b/src/jrd/Collation.h index 6607f97c66..268411d5f2 100644 --- a/src/jrd/Collation.h +++ b/src/jrd/Collation.h @@ -66,12 +66,12 @@ public: virtual PatternMatcher* createLikeMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen) = 0; - virtual bool similarTo(MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl, + virtual bool similarTo(thread_db* tdbb, MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen) = 0; - virtual PatternMatcher* createSimilarToMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl, + virtual PatternMatcher* createSimilarToMatcher(thread_db* tdbb, MemoryPool& pool, const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen) = 0; - virtual BaseSubstringSimilarMatcher* createSubstringSimilarMatcher(MemoryPool& pool, + virtual BaseSubstringSimilarMatcher* createSubstringSimilarMatcher(thread_db* tdbb, MemoryPool& pool, const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen) = 0; virtual bool contains(MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl) = 0; diff --git a/src/jrd/IntlManager.cpp b/src/jrd/IntlManager.cpp index ccc9857e1d..4e4a517ac1 100644 --- a/src/jrd/IntlManager.cpp +++ b/src/jrd/IntlManager.cpp @@ -654,6 +654,7 @@ bool IntlManager::lookupCollation(const string& collationName, attributes, specificAttributes, specificAttributesLen, ignoreAttributes, collationExternalInfo.configInfo.c_str())) { + tt->texttype_flags = attributes; return true; } } diff --git a/src/jrd/SimilarToMatcher.h b/src/jrd/SimilarToMatcher.h deleted file mode 100644 index c931ac5c23..0000000000 --- a/src/jrd/SimilarToMatcher.h +++ /dev/null @@ -1,1919 +0,0 @@ -/* - * PROGRAM: JRD International support - * MODULE: SimilarToMatcher.h - * DESCRIPTION: SIMILAR TO predicate - * - * The contents of this file are subject to the Initial - * Developer's Public License Version 1.0 (the "License"); - * you may not use this file except in compliance with the - * License. You may obtain a copy of the License at - * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. - * - * Software distributed under the License is distributed AS IS, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. - * See the License for the specific language governing rights - * and limitations under the License. - * - * The Original Code was created by Adriano dos Santos Fernandes - * for the Firebird Open Source RDBMS project. - * - * Copyright (c) 2007 Adriano dos Santos Fernandes - * and all contributors signed below. - * - * All Rights Reserved. - * Contributor(s): ______________________________________. - */ - -#ifndef JRD_SIMILAR_TO_EVALUATOR_H -#define JRD_SIMILAR_TO_EVALUATOR_H - -#include "../jrd/intl_classes.h" -#include "../jrd/evl_string.h" - -// #define DEBUG_SIMILAR - -#ifdef DEBUG_SIMILAR -// #define RECURSIVE_SIMILAR // useless in production due to stack overflow -#endif - -namespace Firebird -{ - -template > -class SimilarToMatcher : public Jrd::PatternMatcher -{ -private: - typedef Jrd::CharSet CharSet; - typedef Jrd::TextType TextType; - - // This class is based on work of Zafir Anjum - // http://www.codeguru.com/Cpp/Cpp/string/regex/article.php/c2791 - // which has been derived from work by Henry Spencer. - // - // The original copyright notice follows: - // - // Copyright (c) 1986, 1993, 1995 by University of Toronto. - // Written by Henry Spencer. Not derived from licensed software. - // - // Permission is granted to anyone to use this software for any - // purpose on any computer system, and to redistribute it in any way, - // subject to the following restrictions: - // - // 1. The author is not responsible for the consequences of use of - // this software, no matter how awful, even if they arise - // from defects in it. - // - // 2. The origin of this software must not be misrepresented, either - // by explicit claim or by omission. - // - // 3. Altered versions must be plainly marked as such, and must not - // be misrepresented (by explicit claim or omission) as being - // the original software. - // - // 4. This notice must not be removed or altered. - class Evaluator : private StaticAllocator - { - public: - Evaluator(MemoryPool& pool, TextType* aTextType, - const UCHAR* patternStr, SLONG patternLen, - CharType aEscapeChar, bool aUseEscape); - - ~Evaluator() - { - delete[] branches; - } - - bool getResult(); - bool processNextChunk(const UCHAR* data, SLONG dataLen); - void reset(); - - private: - enum Op - { - opBranch, - opStart, - opEnd, - opRef, - opRepeatingRefStart, - opRepeatingRefEnd, - opNothing, - opAny, - opAnyOf, - opExactly, - opExactlyOne, // optimization for opExactly with a single character - // Implementation details of the non-recursive match - opRet, - opRepeatingRestore - // If new codes are added, shifts in MatchState codes may need to change. - }; - - struct Node - { - explicit Node(Op aOp, const CharType* aStr = NULL, SLONG aLen = 0) - : op(aOp), - str(aStr), - len(aLen), - str2(NULL), - len2(0), - str3(aStr), - len3(aLen), - str4(NULL), - len4(0), - ref(0), - branchNum(-1) - { - } - - Node(Op aOp, SLONG aLen1, SLONG aLen2, int aRef) - : op(aOp), - str(NULL), - len(aLen1), - str2(NULL), - len2(aLen2), - str3(NULL), - len3(0), - str4(NULL), - len4(0), - ref(aRef), - branchNum(-1) - { - } - - Node(Op aOp, int aRef) - : op(aOp), - str(NULL), - len(0), - str2(NULL), - len2(0), - str3(NULL), - len3(0), - str4(NULL), - len4(0), - ref(aRef), - branchNum(-1) - { - } - - Node(const Node& node) - : op(node.op), - str(node.str), - len(node.len), - str2(node.str2), - len2(node.len2), - str3(node.str3), - len3(node.len3), - str4(node.str4), - len4(node.len4), - ref(node.ref), - branchNum(node.branchNum) - { - } - -#ifdef DEBUG_SIMILAR - void dump(string& text, int i) const - { - string temp; - - switch (op) - { - case opBranch: - if (branchNum == -1) - temp.printf("opBranch(%d)", i + ref); - else - temp.printf("opBranch(%d, %d)", i + ref, branchNum); - break; - - case opStart: - temp = "opStart"; - break; - - case opEnd: - temp = "opEnd"; - break; - - case opRef: - if (branchNum == -1) - temp.printf("opRef(%d)", i + ref); - else - temp.printf("opRef(%d, %d)", i + ref, branchNum); - break; - - case opRepeatingRefStart: - temp.printf("opRepeatingRefStart(%d, %d)", i + ref, len); - break; - - case opRepeatingRefEnd: - temp.printf("opRepeatingRefEnd(%d)", i + ref); - break; - - case opNothing: - temp = "opNothing"; - break; - - case opAny: - temp = "opAny"; - break; - - case opAnyOf: - temp.printf("opAnyOf(%.*s, %d, %.*s, %d, %.*s, %d, %.*s, %d)", - len, str, len, len2, str2, len2, len3, str3, len3, len4, str4, len4); - break; - - case opExactly: - temp.printf("opExactly(%.*s, %d)", len, str, len); - break; - - case opExactlyOne: - temp.printf("opExactlyOne(%.*s)", len, str); - break; - - case opRet: - temp.printf("opRet"); - break; - - case opRepeatingRestore: - temp.printf("opRepeatingRestore"); - break; - - default: - temp = "unknown"; - break; - } - - text.printf("%d: %s", i, temp.c_str()); - } -#endif // DEBUG_SIMILAR - - Op op; - const CharType* str; - SLONG len; - const UCHAR* str2; - SLONG len2; - const CharType* str3; - SLONG len3; - const UCHAR* str4; - SLONG len4; - int ref; - int branchNum; - }; - -#ifndef RECURSIVE_SIMILAR - // Struct used to evaluate expressions without recursion. - // Represents local variables to implement a "virtual stack". - struct Scope - { - inline explicit Scope(const Node* ai) - : i(ai), - save(NULL) - { - } - - inline void operator =(const Node* ai) - { - i = ai; - save = NULL; - } - - const Node* i; - const CharType* save; - }; - - // Stack for recursion emulation. - template - class SimpleStack - { - public: - SimpleStack() - : size(INCREASE_FACTOR) - { - data = FB_NEW_POOL(*getDefaultMemoryPool()) UCHAR[(size + 1) * sizeof(T)]; - back = (T*) FB_ALIGN(data.get(), sizeof(T)); - end = back + size; - - // 'back' starts before initial element, then always points to the last pushed element. - --back; - } - - template - inline void push(T2 node) - { - // If the limit is reached, resize. - if (++back == end) - { - unsigned newSize = size + INCREASE_FACTOR; - UCHAR* newData = FB_NEW_POOL(*getDefaultMemoryPool()) UCHAR[(newSize + 1) * sizeof(T)]; - - T* p = (T*) FB_ALIGN(newData, sizeof(T)); - memcpy(p, end - size, size * sizeof(T)); - - back = p + size; - end = p + newSize; - size = newSize; - - data.reset(newData); - } - - *back = node; - } - - inline T pop() - { - fb_assert(getCount() > 0); - return *back--; - } - - inline T* begin() const - { - return (T*) FB_ALIGN(data.get(), sizeof(T)); - } - - inline FB_SIZE_T getCount() const - { - return (back + 1) - begin(); - } - - public: - T* back; - - private: - static const unsigned INCREASE_FACTOR = 50; - unsigned size; - AutoPtr data; - T* end; - }; -#endif // RECURSIVE_SIMILAR - - static const int FLAG_NOT_EMPTY = 1; // known never to match empty string - static const int FLAG_EXACTLY = 2; // non-escaped string - - private: - void parseExpr(int* flagp); - void parseTerm(int* flagp); - void parseFactor(int* flagp); - void parsePrimary(int* flagp); - bool isRep(CharType c) const; - - CharType canonicalChar(int ch) const - { - return *reinterpret_cast(textType->getCanonicalChar(ch)); - } - -#ifdef DEBUG_SIMILAR - void dump() const; -#endif - - private: -#ifdef RECURSIVE_SIMILAR - bool match(int start); -#else - bool match(); -#endif - - private: - static SLONG notInSet(const CharType* str, SLONG strLen, - const CharType* set, SLONG setLen); - - private: - struct Range - { - unsigned start; - unsigned length; - }; - -#ifdef DEBUG_SIMILAR - Array debugLog; - int debugLevel; -#endif - - TextType* textType; - CharType escapeChar; - bool useEscape; - HalfStaticArray buffer; - const UCHAR* originalPatternStr; - SLONG originalPatternLen; - StrConverter patternCvt; - CharSet* charSet; - Array nodes; - const CharType* patternStart; - const CharType* patternEnd; - const CharType* patternPos; - const CharType* bufferStart; - const CharType* bufferEnd; - const CharType* bufferPos; - CharType metaCharacters[15]; - - public: - unsigned branchNum; - Range* branches; - }; - -public: - SimilarToMatcher(MemoryPool& pool, TextType* ttype, const UCHAR* str, - SLONG strLen, CharType escape, bool useEscape) - : PatternMatcher(pool, ttype), - evaluator(pool, ttype, str, strLen, escape, useEscape) - { - } - - void reset() - { - evaluator.reset(); - } - - bool result() - { - return evaluator.getResult(); - } - - bool process(const UCHAR* str, SLONG length) - { - return evaluator.processNextChunk(str, length); - } - - unsigned getNumBranches() - { - return evaluator.branchNum; - } - - void getBranchInfo(unsigned n, unsigned* start, unsigned* length) - { - fb_assert(n <= evaluator.branchNum); - *start = evaluator.branches[n].start; - *length = evaluator.branches[n].length; - } - - static SimilarToMatcher* create(MemoryPool& pool, TextType* ttype, - const UCHAR* str, SLONG length, const UCHAR* escape, SLONG escapeLen) - { - StrConverter cvt_escape(pool, ttype, escape, escapeLen); - - return FB_NEW_POOL(pool) SimilarToMatcher(pool, ttype, str, length, - (escape ? *reinterpret_cast(escape) : 0), escapeLen != 0); - } - - static bool evaluate(MemoryPool& pool, TextType* ttype, const UCHAR* s, SLONG sl, - const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen) - { - StrConverter cvt_escape(pool, ttype, escape, escapeLen); - - Evaluator evaluator(pool, ttype, p, pl, - (escape ? *reinterpret_cast(escape) : 0), escapeLen != 0); - evaluator.processNextChunk(s, sl); - return evaluator.getResult(); - } - -private: - Evaluator evaluator; -}; - - -template -SimilarToMatcher::Evaluator::Evaluator( - MemoryPool& pool, TextType* aTextType, - const UCHAR* patternStr, SLONG patternLen, - CharType aEscapeChar, bool aUseEscape) - : StaticAllocator(pool), -#ifdef DEBUG_SIMILAR - debugLog(pool), - debugLevel(-1), -#endif - textType(aTextType), - escapeChar(aEscapeChar), - useEscape(aUseEscape), - buffer(pool), - originalPatternStr(patternStr), - originalPatternLen(patternLen), - patternCvt(pool, textType, patternStr, patternLen), - charSet(textType->getCharSet()), - nodes(pool), - branchNum(0) -{ - fb_assert(patternLen % sizeof(CharType) == 0); - patternLen /= sizeof(CharType); - - CharType* p = metaCharacters; - *p++ = canonicalChar(TextType::CHAR_CIRCUMFLEX); - *p++ = canonicalChar(TextType::CHAR_MINUS); - *p++ = canonicalChar(TextType::CHAR_UNDERLINE); - *p++ = canonicalChar(TextType::CHAR_PERCENT); - *p++ = canonicalChar(TextType::CHAR_OPEN_BRACKET); - *p++ = canonicalChar(TextType::CHAR_CLOSE_BRACKET); - *p++ = canonicalChar(TextType::CHAR_OPEN_PAREN); - *p++ = canonicalChar(TextType::CHAR_CLOSE_PAREN); - *p++ = canonicalChar(TextType::CHAR_OPEN_BRACE); - *p++ = canonicalChar(TextType::CHAR_CLOSE_BRACE); - *p++ = canonicalChar(TextType::CHAR_VERTICAL_BAR); - *p++ = canonicalChar(TextType::CHAR_QUESTION_MARK); - *p++ = canonicalChar(TextType::CHAR_PLUS); - *p++ = canonicalChar(TextType::CHAR_ASTERISK); - if (useEscape) - *p++ = escapeChar; - else - *p++ = canonicalChar(TextType::CHAR_ASTERISK); // just repeat something - fb_assert(p - metaCharacters == FB_NELEM(metaCharacters)); - - patternStart = patternPos = (const CharType*) patternStr; - patternEnd = patternStart + patternLen; - - nodes.push(Node(opStart)); - - int flags; - parseExpr(&flags); - - nodes.push(Node(opEnd)); - -#ifdef DEBUG_SIMILAR - dump(); -#endif - - // Check for proper termination. - if (patternPos < patternEnd) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - - branches = FB_NEW_POOL(pool) Range[branchNum + 1]; - - reset(); -} - - -template -bool SimilarToMatcher::Evaluator::getResult() -{ - const UCHAR* str = buffer.begin(); - SLONG len = buffer.getCount(); - - // note that StrConverter changes str and len variables - StrConverter cvt(pool, textType, str, len); - fb_assert(len % sizeof(CharType) == 0); - - bufferStart = bufferPos = (const CharType*) str; - bufferEnd = bufferStart + len / sizeof(CharType); - -#ifdef DEBUG_SIMILAR - debugLog.clear(); - debugLevel = -1; -#endif - - const bool matched = -#ifdef RECURSIVE_SIMILAR - match(0); -#else - match(); -#endif - -#ifdef DEBUG_SIMILAR - if (matched) - { - for (unsigned i = 0; i <= branchNum; ++i) - { - string x; - x.printf("%d: %d, %d\n", i, branches[i].start, branches[i].length); - debugLog.add(x.c_str(), x.length()); - } - - debugLog.add('\0'); - - gds__log("\n%s", debugLog.begin()); - } -#endif // DEBUG_SIMILAR - - return matched; -} - - -template -bool SimilarToMatcher::Evaluator::processNextChunk(const UCHAR* data, SLONG dataLen) -{ - const FB_SIZE_T pos = buffer.getCount(); - memcpy(buffer.getBuffer(pos + dataLen) + pos, data, dataLen); - return true; -} - - -template -void SimilarToMatcher::Evaluator::reset() -{ - buffer.shrink(0); - - memset(branches, 0, sizeof(Range) * (branchNum + 1)); -} - - -template -void SimilarToMatcher::Evaluator::parseExpr(int* flagp) -{ - *flagp = FLAG_NOT_EMPTY; - - bool first = true; - Array refs; - int start; - - while (first || (patternPos < patternEnd && *patternPos == canonicalChar(TextType::CHAR_VERTICAL_BAR))) - { - if (first) - first = false; - else - ++patternPos; - - int thisBranchNum = branchNum; - start = nodes.getCount(); - nodes.push(Node(opBranch)); - nodes.back().branchNum = thisBranchNum; - - int flags; - parseTerm(&flags); - *flagp &= ~(~flags & FLAG_NOT_EMPTY); - *flagp |= flags; - - refs.push(nodes.getCount()); - nodes.push(Node(opRef)); - nodes.back().branchNum = thisBranchNum; - - nodes[start].ref = nodes.getCount() - start; - } - - nodes[start].ref = 0; - - for (Array::iterator i = refs.begin(); i != refs.end(); ++i) - nodes[*i].ref = nodes.getCount() - *i; -} - - -template -void SimilarToMatcher::Evaluator::parseTerm(int* flagp) -{ - *flagp = 0; - - bool first = true; - CharType c; - int flags; - - while ((patternPos < patternEnd) && - (c = *patternPos) != canonicalChar(TextType::CHAR_VERTICAL_BAR) && - c != canonicalChar(TextType::CHAR_CLOSE_PAREN)) - { - parseFactor(&flags); - - *flagp |= flags & FLAG_NOT_EMPTY; - - if (first) - { - *flagp |= flags; - first = false; - } - } - - if (first) - nodes.push(Node(opNothing)); -} - - -template -void SimilarToMatcher::Evaluator::parseFactor(int* flagp) -{ - int atomPos = nodes.getCount(); - - int flags; - parsePrimary(&flags); - - CharType op; - - if (patternPos >= patternEnd || !isRep((op = *patternPos))) - { - *flagp = flags; - return; - } - - if (!(flags & FLAG_NOT_EMPTY) && op != canonicalChar(TextType::CHAR_QUESTION_MARK)) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - - // If the last primary is a string, split the last character - if (flags & FLAG_EXACTLY) - { - fb_assert(nodes.back().op == opExactly || nodes.back().op == opExactlyOne); - - if (nodes.back().op == opExactly && nodes.back().len > 1) - { - Node last = nodes.back(); - last.op = opExactlyOne; - last.str += nodes.back().len - 1; - last.len = 1; - - --nodes.back().len; - atomPos = nodes.getCount(); - nodes.push(last); - } - } - - fb_assert( - op == canonicalChar(TextType::CHAR_ASTERISK) || - op == canonicalChar(TextType::CHAR_PLUS) || - op == canonicalChar(TextType::CHAR_QUESTION_MARK) || - op == canonicalChar(TextType::CHAR_OPEN_BRACE)); - - if (op == canonicalChar(TextType::CHAR_ASTERISK)) - { - *flagp = 0; - nodes.insert(atomPos, Node(opBranch, nodes.getCount() - atomPos + 2)); - nodes.push(Node(opRef, atomPos - nodes.getCount())); - nodes.push(Node(opBranch)); - } - else if (op == canonicalChar(TextType::CHAR_PLUS)) - { - *flagp = FLAG_NOT_EMPTY; - nodes.push(Node(opBranch, 2)); - nodes.push(Node(opRef, atomPos - nodes.getCount())); - nodes.push(Node(opBranch)); - } - else if (op == canonicalChar(TextType::CHAR_QUESTION_MARK)) - { - *flagp = 0; - nodes.insert(atomPos, Node(opBranch, nodes.getCount() - atomPos + 1)); - nodes.push(Node(opBranch)); - } - else if (op == canonicalChar(TextType::CHAR_OPEN_BRACE)) - { - ++patternPos; - - UCharBuffer dummy; - const UCHAR* p = originalPatternStr + - charSet->substring(originalPatternLen, originalPatternStr, - originalPatternLen, dummy.getBuffer(originalPatternLen), - 1, patternPos - patternStart); - ULONG size = 0; - bool comma = false; - string s1, s2; - bool ok; - - while ((ok = IntlUtil::readOneChar(charSet, &p, originalPatternStr + originalPatternLen, &size))) - { - if (*patternPos == canonicalChar(TextType::CHAR_CLOSE_BRACE)) - { - if (s1.isEmpty()) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - break; - } - else if (*patternPos == canonicalChar(TextType::CHAR_COMMA)) - { - if (comma) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - comma = true; - } - else - { - ULONG ch = 0; - charSet->getConvToUnicode().convert(size, p, sizeof(ch), reinterpret_cast(&ch)); - - if (ch >= '0' && ch <= '9') - { - if (comma) - s2 += (char) ch; - else - s1 += (char) ch; - } - else - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - } - - ++patternPos; - } - - if (!ok || s1.length() > 9 || s2.length() > 9) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - - const int n1 = atoi(s1.c_str()); - const int n2 = s2.isEmpty() ? (comma ? INT_MAX : n1) : atoi(s2.c_str()); - - if (n2 < n1) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - - *flagp = n1 == 0 ? 0 : FLAG_NOT_EMPTY; - - if (n1 == 0 && n2 == INT_MAX) - { - // Tranforms x{0,} to x* - nodes.insert(atomPos, Node(opBranch, nodes.getCount() - atomPos + 2)); - nodes.push(Node(opRef, atomPos - nodes.getCount())); - nodes.push(Node(opBranch)); - } - else - { - if (n1 == 0) - { - // Tranforms x{,n} to (x?){n} - nodes.insert(atomPos, Node(opBranch, nodes.getCount() - atomPos + 1)); - nodes.push(Node(opBranch)); - } - - int exprPos = atomPos + 1; - int exprSize = nodes.getCount() - exprPos + 1; - - nodes.insert(atomPos, Node(opRepeatingRefStart, (n1 == 0 ? n2 : n1), 0, - nodes.getCount() - atomPos + 1)); - nodes.push(Node(opRepeatingRefEnd, atomPos - nodes.getCount())); - - if (n2 != n1 && n1 != 0) - { - if (n2 == INT_MAX) - { - // Tranforms x{n,} to x{n}x* - - nodes.push(Node(opBranch, exprSize + 2)); - - for (int i = 0; i < exprSize; ++i) - { - Node copy(nodes[exprPos + i]); - nodes.push(copy); - } - - nodes.push(Node(opRef, -exprSize - 1)); - nodes.push(Node(opBranch)); - } - else - { - // Tranforms x{n,m} to x{n}(x?){m-n} - - nodes.push(Node(opRepeatingRefStart, n2 - n1, 0, exprSize + 3)); - nodes.push(Node(opBranch, exprSize + 1)); - - for (int i = 0; i < exprSize; ++i) - { - Node copy(nodes[exprPos + i]); - nodes.push(copy); - } - - nodes.push(Node(opBranch)); - nodes.push(Node(opRepeatingRefEnd, -exprSize -3)); - } - } - } - } - - ++patternPos; - - if (patternPos < patternEnd && isRep(*patternPos)) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); -} - - -template -void SimilarToMatcher::Evaluator::parsePrimary(int* flagp) -{ - *flagp = 0; - - const CharType op = *patternPos++; - - if (op == canonicalChar(TextType::CHAR_UNDERLINE)) - { - nodes.push(Node(opAny)); - *flagp |= FLAG_NOT_EMPTY; - } - else if (op == canonicalChar(TextType::CHAR_PERCENT)) - { - nodes.push(Node(opBranch, 3)); - nodes.push(Node(opAny)); - nodes.push(Node(opRef, -2)); - nodes.push(Node(opBranch)); - - *flagp = 0; - return; - } - else if (op == canonicalChar(TextType::CHAR_OPEN_BRACKET)) - { - nodes.push(Node(opAnyOf)); - - HalfStaticArray charsBuffer; - HalfStaticArray rangeBuffer; - - Node& node = nodes.back(); - const CharType** nodeChars = &node.str; - SLONG* nodeCharsLen = &node.len; - const UCHAR** nodeRange = &node.str2; - SLONG* nodeRangeLen = &node.len2; - - bool but = false; - - do - { - if (patternPos >= patternEnd) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - - bool range = false; - bool charClass = false; - - if (useEscape && *patternPos == escapeChar) - { - if (++patternPos >= patternEnd) - status_exception::raise(Arg::Gds(isc_escape_invalid)); - - if (*patternPos != escapeChar && - notInSet(patternPos, 1, metaCharacters, FB_NELEM(metaCharacters)) != 0) - { - status_exception::raise(Arg::Gds(isc_escape_invalid)); - } - - if (patternPos + 1 < patternEnd) - range = (patternPos[1] == canonicalChar(TextType::CHAR_MINUS)); - } - else - { - if (*patternPos == canonicalChar(TextType::CHAR_OPEN_BRACKET)) - charClass = true; - else if (*patternPos == canonicalChar(TextType::CHAR_CIRCUMFLEX)) - { - if (but) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - - but = true; - - CharType* p = (CharType*) alloc(charsBuffer.getCount() * sizeof(CharType)); - memcpy(p, charsBuffer.begin(), charsBuffer.getCount() * sizeof(CharType)); - *nodeChars = p; - - *nodeCharsLen = charsBuffer.getCount(); - - if (rangeBuffer.getCount() > 0) - { - UCHAR* p = (UCHAR*) alloc(rangeBuffer.getCount()); - memcpy(p, rangeBuffer.begin(), rangeBuffer.getCount()); - *nodeRange = p; - } - - *nodeRangeLen = rangeBuffer.getCount(); - - charsBuffer.clear(); - rangeBuffer.clear(); - - nodeChars = &node.str3; - nodeCharsLen = &node.len3; - nodeRange = &node.str4; - nodeRangeLen = &node.len4; - - ++patternPos; - continue; - } - else if (patternPos + 1 < patternEnd) - range = (patternPos[1] == canonicalChar(TextType::CHAR_MINUS)); - } - - if (charClass) - { - if (++patternPos >= patternEnd || *patternPos != canonicalChar(TextType::CHAR_COLON)) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - - const CharType* start = ++patternPos; - - while (patternPos < patternEnd && *patternPos != canonicalChar(TextType::CHAR_COLON)) - ++patternPos; - - const SLONG len = patternPos++ - start; - - if (patternPos >= patternEnd || *patternPos++ != canonicalChar(TextType::CHAR_CLOSE_BRACKET)) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - - typedef const UCHAR* (TextType::*GetCanonicalFunc)(int*) const; - - static const GetCanonicalFunc alNum[] = {&TextType::getCanonicalUpperLetters, - &TextType::getCanonicalLowerLetters, &TextType::getCanonicalNumbers, NULL}; - static const GetCanonicalFunc alpha[] = {&TextType::getCanonicalUpperLetters, - &TextType::getCanonicalLowerLetters, NULL}; - static const GetCanonicalFunc digit[] = {&TextType::getCanonicalNumbers, NULL}; - static const GetCanonicalFunc lower[] = {&TextType::getCanonicalLowerLetters, NULL}; - static const GetCanonicalFunc space[] = {&TextType::getCanonicalSpace, NULL}; - static const GetCanonicalFunc upper[] = {&TextType::getCanonicalUpperLetters, NULL}; - static const GetCanonicalFunc whitespace[] = {&TextType::getCanonicalWhiteSpaces, NULL}; - - struct - { - const GetCanonicalFunc* funcs; - const ULONG nameLen; // in bytes, not characters because all functions accept length in bytes - const USHORT name[10]; - } static const classes[] = - { // Names are in utf16 in order not to convert them every time for comparison and thus save some CPU - {alNum, 10, {'A','L','N','U','M'}}, - {alpha, 10, {'A','L','P','H','A'}}, - {digit, 10, {'D','I','G','I','T'}}, - {lower, 10, {'L','O','W','E','R'}}, - {space, 10, {'S','P','A','C','E'}}, - {upper, 10, {'U','P','P','E','R'}}, - {whitespace, 20, {'W','H','I','T','E','S','P','A','C','E'}} - }; - - // Get the exact original substring correspondent to the canonical bytes. - HalfStaticArray classNameStr( - len * charSet->maxBytesPerChar()); - ULONG classNameStrLen = charSet->substring(originalPatternLen, originalPatternStr, - classNameStr.getCapacity(), classNameStr.begin(), start - patternStart, len); - - // And then convert it to UTF-16. - HalfStaticArray classNameUtf16( - len * sizeof(ULONG)); - ULONG classNameUtf16Len = charSet->getConvToUnicode().convert( - classNameStrLen, classNameStr.begin(), - classNameUtf16.getCapacity() * sizeof(USHORT), classNameUtf16.begin()); - - // Bring class name to uppercase for case-insensitivity. - // Do it in UTF-16 because original collation can have no uppercase conversion. - classNameUtf16Len = Jrd::UnicodeUtil::utf16UpperCase( - classNameUtf16Len, classNameUtf16.begin(), - classNameUtf16.getCapacity() * sizeof(USHORT), classNameUtf16.begin(), NULL); - int classN; - - for (classN = 0; classN < FB_NELEM(classes); ++classN) - { - INTL_BOOL errorFlag; - - if (Jrd::UnicodeUtil::utf16Compare(classNameUtf16Len, classNameUtf16.begin(), - classes[classN].nameLen, classes[classN].name, &errorFlag) == 0) - { - for (const GetCanonicalFunc* func = classes[classN].funcs; *func; ++func) - { - int count; - const CharType* canonic = (const CharType*) (textType->**func)(&count); - charsBuffer.push(canonic, count); - } - - break; - } - } - - if (classN >= FB_NELEM(classes)) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - } - else - { - charsBuffer.push(*patternPos++); - - if (range) - { - --patternPos; // go back to first char - - UCHAR c[sizeof(ULONG)]; - ULONG len = charSet->substring(originalPatternLen, originalPatternStr, - sizeof(c), c, patternPos - patternStart, 1); - - rangeBuffer.push(len); - FB_SIZE_T rangeCount = rangeBuffer.getCount(); - memcpy(rangeBuffer.getBuffer(rangeCount + len) + rangeCount, &c, len); - - ++patternPos; // character - ++patternPos; // minus - - if (patternPos >= patternEnd) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - - if (useEscape && *patternPos == escapeChar) - { - if (++patternPos >= patternEnd) - status_exception::raise(Arg::Gds(isc_escape_invalid)); - - if (*patternPos != escapeChar && - notInSet(patternPos, 1, metaCharacters, FB_NELEM(metaCharacters)) != 0) - { - status_exception::raise(Arg::Gds(isc_escape_invalid)); - } - } - - len = charSet->substring(originalPatternLen, originalPatternStr, - sizeof(c), c, patternPos - patternStart, 1); - - rangeBuffer.push(len); - rangeCount = rangeBuffer.getCount(); - memcpy(rangeBuffer.getBuffer(rangeCount + len) + rangeCount, &c, len); - - charsBuffer.push(*patternPos++); - } - } - - if (patternPos >= patternEnd) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - } while (*patternPos != canonicalChar(TextType::CHAR_CLOSE_BRACKET)); - - CharType* p = (CharType*) alloc(charsBuffer.getCount() * sizeof(CharType)); - memcpy(p, charsBuffer.begin(), charsBuffer.getCount() * sizeof(CharType)); - *nodeChars = p; - - *nodeCharsLen = charsBuffer.getCount(); - - if (rangeBuffer.getCount() > 0) - { - UCHAR* r = (UCHAR*) alloc(rangeBuffer.getCount()); - memcpy(r, rangeBuffer.begin(), rangeBuffer.getCount()); - *nodeRange = r; - } - - *nodeRangeLen = rangeBuffer.getCount(); - - ++patternPos; - *flagp |= FLAG_NOT_EMPTY; - } - else if (op == canonicalChar(TextType::CHAR_OPEN_PAREN)) - { - int flags; - parseExpr(&flags); - - ++branchNum; // This is used for the trace stuff. - - if (patternPos >= patternEnd || *patternPos++ != canonicalChar(TextType::CHAR_CLOSE_PAREN)) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - - *flagp |= flags & FLAG_NOT_EMPTY; - } - else if (useEscape && op == escapeChar) - { - if (patternPos >= patternEnd) - status_exception::raise(Arg::Gds(isc_escape_invalid)); - - if (*patternPos != escapeChar && - notInSet(patternPos, 1, metaCharacters, FB_NELEM(metaCharacters)) != 0) - { - status_exception::raise(Arg::Gds(isc_escape_invalid)); - } - - nodes.push(Node(opExactlyOne, patternPos++, 1)); - *flagp |= FLAG_NOT_EMPTY; - } - else - { - --patternPos; - - const SLONG len = notInSet(patternPos, patternEnd - patternPos, - metaCharacters, FB_NELEM(metaCharacters)); - - if (len == 0) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - - *flagp |= FLAG_NOT_EMPTY | FLAG_EXACTLY; - - nodes.push(Node((len == 1 ? opExactlyOne : opExactly), patternPos, len)); - patternPos += len; - } -} - - -template -bool SimilarToMatcher::Evaluator::isRep(CharType c) const -{ - return (c == canonicalChar(TextType::CHAR_ASTERISK) || - c == canonicalChar(TextType::CHAR_PLUS) || - c == canonicalChar(TextType::CHAR_QUESTION_MARK) || - c == canonicalChar(TextType::CHAR_OPEN_BRACE)); -} - - -#ifdef DEBUG_SIMILAR -template -void SimilarToMatcher::Evaluator::dump() const -{ - string text; - - for (unsigned i = 0; i < nodes.getCount(); ++i) - { - string type; - nodes[i].dump(type, i); - - string s; - s.printf("%s%s", (i > 0 ? ", " : ""), type.c_str()); - - text += s; - } - - gds__log("%s", text.c_str()); -} -#endif // DEBUG_SIMILAR - - -template -#ifdef RECURSIVE_SIMILAR -bool SimilarToMatcher::Evaluator::match(int start) -{ -#ifdef DEBUG_SIMILAR - AutoSetRestore autoDebugLevel(&debugLevel, debugLevel + 1); -#endif - - for (int i = start;; ++i) - { - const Node* node = &nodes[i]; - -#ifdef DEBUG_SIMILAR - string s; - node->dump(s, i); - - for (int debugLevelI = 0; debugLevelI < debugLevel; ++debugLevelI) - s = " " + s; - - s = "\n" + s; - debugLog.add(s.c_str(), s.length()); -#endif - - switch (node->op) - { - case opBranch: - { - const CharType* const save = bufferPos; - - while (true) - { - if (node->branchNum != -1) - branches[node->branchNum].start = save - bufferStart; - - if (match(i + 1)) - return true; - - bufferPos = save; - - if (node->ref == 0) - return false; - - i += node->ref; - node = &nodes[i]; - - if (node->ref == 0) - break; - -#ifdef DEBUG_SIMILAR - node->dump(s, i); - - for (int debugLevelI = 0; debugLevelI < debugLevel; ++debugLevelI) - s = " " + s; - - s = "\n" + s; - debugLog.add(s.c_str(), s.length()); -#endif - } - - break; - } - - case opStart: - if (bufferPos != bufferStart) - return false; - break; - - case opEnd: - return (bufferPos == bufferEnd); - - case opRef: - if (node->branchNum != -1) - { - fb_assert(unsigned(node->branchNum) <= branchNum); - branches[node->branchNum].length = - bufferPos - bufferStart - branches[node->branchNum].start; - } - - if (node->ref == 1) // avoid recursion - break; - return match(i + node->ref); - - //// FIXME: opRepeatingRefStart, opRepeatingRefEnd - - case opNothing: - break; - - case opAny: -#ifdef DEBUG_SIMILAR - if (bufferPos >= bufferEnd) - s = " -> "; - else - s.printf(" -> %d", *bufferPos); - debugLog.add(s.c_str(), s.length()); -#endif - - if (bufferPos >= bufferEnd) - return false; - ++bufferPos; - break; - - case opAnyOf: -#ifdef DEBUG_SIMILAR - if (bufferPos >= bufferEnd) - s = " -> "; - else - s.printf(" -> %d", *bufferPos); - debugLog.add(s.c_str(), s.length()); -#endif - - if (bufferPos >= bufferEnd) - return false; - - if (notInSet(bufferPos, 1, node->str, node->len) != 0) - { - const UCHAR* const end = node->str2 + node->len2; - const UCHAR* p = node->str2; - - while (p < end) - { - UCHAR c[sizeof(ULONG)]; - ULONG len = charSet->substring(buffer.getCount(), buffer.begin(), - sizeof(c), c, bufferPos - bufferStart, 1); - - if (textType->compare(len, c, p[0], p + 1) >= 0 && - textType->compare(len, c, p[1 + p[0]], p + 2 + p[0]) <= 0) - { - break; - } - - p += 2 + p[0] + p[1 + p[0]]; - } - - if (node->len + node->len2 != 0 && p >= end) - return false; - } - - if (notInSet(bufferPos, 1, node->str3, node->len3) == 0) - return false; - else - { - const UCHAR* const end = node->str4 + node->len4; - const UCHAR* p = node->str4; - - while (p < end) - { - UCHAR c[sizeof(ULONG)]; - const ULONG len = charSet->substring(buffer.getCount(), buffer.begin(), - sizeof(c), c, bufferPos - bufferStart, 1); - - if (textType->compare(len, c, p[0], p + 1) >= 0 && - textType->compare(len, c, p[1 + p[0]], p + 2 + p[0]) <= 0) - { - break; - } - - p += 2 + p[0] + p[1 + p[0]]; - } - - if (p < end) - return false; - } - - ++bufferPos; - break; - - case opExactly: - if (bufferEnd - bufferPos >= node->len && - memcmp(node->str, bufferPos, node->len * sizeof(CharType)) == 0) - { - bufferPos += node->len; - break; - } - else - return false; - - case opExactlyOne: - if (bufferEnd - bufferPos >= 1 && *node->str == *bufferPos) - { - bufferPos += node->len; - break; - } - else - return false; - - default: - fb_assert(false); - return false; - } - } - - return true; -} -#else -bool SimilarToMatcher::Evaluator::match() -{ - // Left shift by 4 to OR MatchState's and Op's without additional runtime shifts. - static const unsigned MATCH_STATE_SHIFT = 4; - - enum MatchState - { - msIterating = 0x00 << MATCH_STATE_SHIFT, - msReturningFalse = 0x01 << MATCH_STATE_SHIFT, - msReturningTrue = 0x02 << MATCH_STATE_SHIFT, - msReturningMask = (msReturningFalse | msReturningTrue) - }; - - SimpleStack scopeStack; - - // Add special node to return without needing additional comparison after popping - // the stack on each return. - Node nodeRet(opRet); - scopeStack.push(&nodeRet); - - scopeStack.push(nodes.begin()); - - MatchState state = msIterating; - - SimpleStack repeatStack; - SLONG repeatCount = 0; - Node nodeRepeatingRestore(opRepeatingRestore); - - while (true) - { - fb_assert(scopeStack.getCount() > 0); - - Scope* const scope = scopeStack.back; - const Node* const node = scope->i; - -#ifdef DEBUG_SIMILAR - string debugText; - node->dump(debugText, (node == &nodeRet ? -1 : node - nodes.begin())); - - for (const CharType* p = bufferPos; p != bufferEnd; ++p) - { - string s; - s.printf(" %04d", *p); - debugText += s; - } - - debugText += "\nrepeat:"; - - for (const int* p = repeatStack.begin(); p <= repeatStack.back; ++p) - { - string s; - s.printf(" %d", *p); - debugText += s; - } - - { - string s; - s.printf(" %d", repeatCount); - debugText += s; - } - - debugText += "\nscope:"; - - for (const Scope* p = scopeStack.begin(); p <= scopeStack.back; ++p) - { - string s; - s.printf(" %d", - (p->i == &nodeRet ? - -1 : - (p->i == &nodeRepeatingRestore ? - -2 : - p->i - nodes.begin()))); - debugText += s; - } - - gds__log("%d, %s", state, debugText.c_str()); -#endif - -#define ENCODE_OP_STATE(op, state) ((op) | (state)) - - // Go directly to op and state with a single switch. - - switch (ENCODE_OP_STATE(node->op, state)) - { - case ENCODE_OP_STATE(opBranch, msIterating): - if (node->branchNum != -1) - branches[node->branchNum].start = bufferPos - bufferStart; - - scope->save = bufferPos; - - scopeStack.push(scope->i + 1); - continue; - - case ENCODE_OP_STATE(opBranch, msReturningFalse): - bufferPos = scope->save; - - if (node->ref != 0) - { - state = msIterating; - - scope->i += node->ref; - - if (scope->i->ref != 0) - { - scope->save = bufferPos; - - scopeStack.push(scope->i + 1); - continue; - } - } - - break; - - case ENCODE_OP_STATE(opBranch, msReturningTrue): - break; - - case ENCODE_OP_STATE(opStart, msIterating): - if (bufferPos != bufferStart) - state = msReturningFalse; - break; - - case ENCODE_OP_STATE(opEnd, msIterating): - state = (bufferPos == bufferEnd ? msReturningTrue : msReturningFalse); - break; - - case ENCODE_OP_STATE(opRef, msIterating): - if (node->branchNum != -1) - { - fb_assert(unsigned(node->branchNum) <= branchNum); - branches[node->branchNum].length = - bufferPos - bufferStart - branches[node->branchNum].start; - } - - scope->i += node->ref; - scope->save = NULL; - continue; - - case ENCODE_OP_STATE(opRef, msReturningFalse): - case ENCODE_OP_STATE(opRef, msReturningTrue): - break; - - case ENCODE_OP_STATE(opRepeatingRefStart, msIterating): - repeatStack.push(repeatCount); - repeatCount = node->len; - scopeStack.push(scope->i + node->ref); - continue; - - case ENCODE_OP_STATE(opRepeatingRefStart, msReturningFalse): - case ENCODE_OP_STATE(opRepeatingRefStart, msReturningTrue): - repeatCount = repeatStack.pop(); - break; - - case ENCODE_OP_STATE(opRepeatingRefEnd, msIterating): - if (repeatCount > 0) - { - --repeatCount; - scopeStack.push(scope->i + node->ref + 1); - } - else - { - repeatCount = repeatStack.pop(); - scopeStack.push(&nodeRepeatingRestore); - scopeStack.push(scope->i + 1); - } - - continue; - - case ENCODE_OP_STATE(opRepeatingRefEnd, msReturningFalse): - ++repeatCount; - break; - - case ENCODE_OP_STATE(opRepeatingRefEnd, msReturningTrue): - break; - - case ENCODE_OP_STATE(opRepeatingRestore, msReturningFalse): - case ENCODE_OP_STATE(opRepeatingRestore, msReturningTrue): - repeatStack.push(repeatCount); - repeatCount = -1; - break; - - case ENCODE_OP_STATE(opNothing, msIterating): - case ENCODE_OP_STATE(opNothing, msReturningFalse): - case ENCODE_OP_STATE(opNothing, msReturningTrue): - break; - - case ENCODE_OP_STATE(opAny, msIterating): - if (bufferPos >= bufferEnd) - state = msReturningFalse; - else - ++bufferPos; - break; - - case ENCODE_OP_STATE(opAnyOf, msIterating): - if (bufferPos >= bufferEnd) - state = msReturningFalse; - else - { - if (notInSet(bufferPos, 1, node->str, node->len) != 0) - { - const UCHAR* const end = node->str2 + node->len2; - const UCHAR* p = node->str2; - - while (p < end) - { - UCHAR c[sizeof(ULONG)]; - const ULONG len = charSet->substring(buffer.getCount(), buffer.begin(), - sizeof(c), c, bufferPos - bufferStart, 1); - - if (textType->compare(len, c, p[0], p + 1) >= 0 && - textType->compare(len, c, p[1 + p[0]], p + 2 + p[0]) <= 0) - { - break; - } - - p += 2 + p[0] + p[1 + p[0]]; - } - - if (node->len + node->len2 != 0 && p >= end) - { - state = msReturningFalse; - break; - } - } - - if (notInSet(bufferPos, 1, node->str3, node->len3) == 0) - state = msReturningFalse; - else - { - const UCHAR* const end = node->str4 + node->len4; - const UCHAR* p = node->str4; - - while (p < end) - { - UCHAR c[sizeof(ULONG)]; - const ULONG len = charSet->substring( - buffer.getCount(), buffer.begin(), - sizeof(c), c, bufferPos - bufferStart, 1); - - if (textType->compare(len, c, p[0], p + 1) >= 0 && - textType->compare(len, c, p[1 + p[0]], p + 2 + p[0]) <= 0) - { - break; - } - - p += 2 + p[0] + p[1 + p[0]]; - } - - if (p < end) - state = msReturningFalse; - } - } - - if (state == msIterating) - ++bufferPos; - break; - - case ENCODE_OP_STATE(opExactly, msIterating): - if (bufferEnd - bufferPos >= node->len && - memcmp(node->str, bufferPos, node->len * sizeof(CharType)) == 0) - { - bufferPos += node->len; - } - else - state = msReturningFalse; - break; - - case ENCODE_OP_STATE(opExactlyOne, msIterating): - if (bufferEnd - bufferPos >= 1 && *node->str == *bufferPos) - ++bufferPos; - else - state = msReturningFalse; - break; - - case ENCODE_OP_STATE(opRet, msReturningFalse): - case ENCODE_OP_STATE(opRet, msReturningTrue): - fb_assert(repeatStack.getCount() == 0); - return state == msReturningTrue; - - default: - fb_assert(false); - return false; - } - -#undef ENCODE_OP_STATE - - switch (state) - { - case msIterating: - ++scope->i; - break; - - case msReturningFalse: - case msReturningTrue: - scopeStack.pop(); - break; - - default: - break; - } - } - - fb_assert(false); - return false; -} -#endif - - -// Returns the number of characters up to first one present in set. -template -SLONG SimilarToMatcher::Evaluator::notInSet( - const CharType* str, SLONG strLen, const CharType* set, SLONG setLen) -{ - for (const CharType* begin = str; str - begin < strLen; ++str) - { - for (const CharType* p = set; p - set < setLen; ++p) - { - if (*p == *str) - return str - begin; - } - } - - return strLen; -} - - -// Given a regular expression R1#R2#R3 and the string S: -// - Find the shortest substring of S that matches R1 while the remainder (S23) matches R2R3; -// - Find the longest (S2) substring of S23 that matches R2 while the remainder matches R3; -// - Return S2. -template > -class SubstringSimilarMatcher : public Jrd::BaseSubstringSimilarMatcher -{ -private: - typedef Jrd::CharSet CharSet; - typedef Jrd::TextType TextType; - -public: - SubstringSimilarMatcher(MemoryPool& pool, TextType* ttype, - const UCHAR* patternStr, SLONG patternLen, CharType aEscapeChar) - : BaseSubstringSimilarMatcher(pool, ttype), - escapeChar(aEscapeChar), - originalPatternStr(patternStr), - originalPatternLen(patternLen), - patternCvt(pool, textType, patternStr, patternLen), - buffer(pool) - { - CharSet* charSet = textType->getCharSet(); - - // Make a new string without the . While doing it, get the byte - // length of each segment. - - UCharBuffer newExpr(originalPatternLen); - UCHAR* newExprPos = newExpr.begin(); - - const UCHAR* originalPatternEnd = originalPatternStr + originalPatternLen; - const UCHAR* originalPatternPos = originalPatternStr; - - const CharType* lastStart = reinterpret_cast(patternStr); - const CharType* end = lastStart + patternLen; - unsigned lengths[3]; - unsigned lengthsNum = 0; - UCHAR dummy[sizeof(ULONG) * 2]; - - for (const CharType* p = lastStart; p < end; ++p) - { - if (*p != escapeChar) - continue; - - if (++p >= end) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - - if (*p == canonicalChar(TextType::CHAR_DOUBLE_QUOTE)) - { - if (lengthsNum >= 2) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - - // Get the byte length since the last segment. - ULONG len = charSet->substring(originalPatternEnd - originalPatternPos, - originalPatternPos, newExpr.begin() + originalPatternLen - newExprPos, - newExprPos, 0, p - lastStart - 1); - - lengths[lengthsNum++] = len; - newExprPos += len; - originalPatternPos += len; - - // Advance two () characters. - originalPatternPos += charSet->substring(originalPatternEnd - originalPatternPos, - originalPatternPos, sizeof(dummy), dummy, 0, 2); - - lastStart = p + 1; // Register the start of the next segment. - } - } - - if (lengthsNum != 2) - status_exception::raise(Arg::Gds(isc_invalid_similar_pattern)); - - // Get the byte length of the last segment. - lengths[2] = charSet->substring(originalPatternEnd - originalPatternPos, - originalPatternPos, newExpr.begin() + originalPatternLen - newExprPos, - newExprPos, 0, end - lastStart); - - // Construct the needed regular expressions. - - r1 = FB_NEW_POOL(pool) SimilarToMatcher(pool, ttype, - newExpr.begin(), lengths[0], escapeChar, true); - - r2 = FB_NEW_POOL(pool) SimilarToMatcher(pool, ttype, - newExpr.begin() + lengths[0], lengths[1], escapeChar, true); - - r3 = FB_NEW_POOL(pool) SimilarToMatcher(pool, ttype, - newExpr.begin() + lengths[0] + lengths[1], lengths[2], escapeChar, true); - - r23 = FB_NEW_POOL(pool) SimilarToMatcher(pool, ttype, - newExpr.begin() + lengths[0], lengths[1] + lengths[2], escapeChar, true); - } - - static SubstringSimilarMatcher* create(MemoryPool& pool, TextType* ttype, - const UCHAR* str, SLONG length, const UCHAR* escape, SLONG escapeLen) - { - StrConverter cvt_escape(pool, ttype, escape, escapeLen); - - return FB_NEW_POOL(pool) SubstringSimilarMatcher(pool, ttype, str, length, - *reinterpret_cast(escape)); - } - - void reset() - { - buffer.shrink(0); - - r1->reset(); - r2->reset(); - r3->reset(); - r23->reset(); - } - - bool result() - { - CharSet* charSet = textType->getCharSet(); - const UCHAR* p = buffer.begin(); - UCharBuffer temp(buffer.getCount()); - UCHAR dummy[sizeof(ULONG)]; - - // Find the shortest substring that matches R1 while the full expression matches R1R2R3. - do - { - r1->reset(); - r1->process(buffer.begin(), p - buffer.begin()); - - if (r1->result()) - { - // We have a initial substring matching R1. Let's see if the remainder matches R2R3. - - r23->reset(); - r23->process(p, buffer.end() - p); - - if (r23->result()) - { - // Now we start to find the longest substring that matches R2 while the - // remainder matches R3. Once we found it, it's the result string. - - // We already know its start, based on the substring that matched R1. - matchedStart = p - buffer.begin(); - - const UCHAR* p3 = buffer.end(); - SLONG charLen23 = -1; - memcpy(temp.begin(), p, p3 - p); - - while (true) - { - r2->reset(); - r2->process(temp.begin(), p3 - p); - - if (r2->result()) - { - r3->reset(); - r3->process(p3, buffer.end() - p3); - - if (r3->result()) - { - matchedLength = p3 - buffer.begin() - matchedStart; - return true; - } - } - - if (charLen23 == -1) - charLen23 = charSet->length(p3 - p, p, true); - - if (charLen23-- == 0) - break; - - // Shrink in one character the string to match R2. - // Move back one character to match R3. - p3 = p + charSet->substring(buffer.end() - p, p, temp.getCapacity(), - temp.begin(), 0, charLen23); - } - } - } - - // Advance a character. - p += charSet->substring(buffer.end() - p, p, sizeof(dummy), dummy, 0, 1); - } while (p < buffer.end()); - - return false; - } - - bool process(const UCHAR* str, SLONG length) - { - const FB_SIZE_T pos = buffer.getCount(); - memcpy(buffer.getBuffer(pos + length) + pos, str, length); - return true; - } - - // We return byte-base start and length. - void getResultInfo(unsigned* start, unsigned* length) - { - *start = matchedStart; - *length = matchedLength; - } - -private: - CharType canonicalChar(int ch) const - { - return *reinterpret_cast(textType->getCanonicalChar(ch)); - } - -private: - CharType escapeChar; - const UCHAR* originalPatternStr; - SLONG originalPatternLen; - StrConverter patternCvt; - HalfStaticArray buffer; - AutoPtr r1, r2, r3, r23; - unsigned matchedStart; - unsigned matchedLength; -}; - - -} // namespace Firebird - -#endif // JRD_SIMILAR_TO_EVALUATOR_H diff --git a/src/jrd/intl_classes.h b/src/jrd/intl_classes.h index c3be9620f8..e7279ab0d4 100644 --- a/src/jrd/intl_classes.h +++ b/src/jrd/intl_classes.h @@ -87,23 +87,12 @@ public: UpcaseConverter(MemoryPool& pool, TextType* obj, const UCHAR*& str, SLONG& len) : PrevConverter(pool, obj, str, len) { - if (len > (int) sizeof(tempBuffer)) - out_str = FB_NEW_POOL(pool) UCHAR[len]; - else - out_str = tempBuffer; - obj->str_to_upper(len, str, len, out_str); - str = out_str; - } - - ~UpcaseConverter() - { - if (out_str != tempBuffer) - delete[] out_str; + obj->str_to_upper(len, str, len, tempBuffer.getBuffer(len, false)); + str = tempBuffer.begin(); } private: - UCHAR tempBuffer[100]; - UCHAR* out_str; + Firebird::UCharBuffer tempBuffer; }; template @@ -115,29 +104,17 @@ public: { const SLONG out_len = len / obj->getCharSet()->minBytesPerChar() * obj->getCanonicalWidth(); - if (out_len > (int) sizeof(tempBuffer)) - out_str = FB_NEW_POOL(pool) UCHAR[out_len]; - else - out_str = tempBuffer; - if (str) { - len = obj->canonical(len, str, out_len, out_str) * obj->getCanonicalWidth(); - str = out_str; + len = obj->canonical(len, str, out_len, tempBuffer.getBuffer(out_len, false)) * obj->getCanonicalWidth(); + str = tempBuffer.begin(); } else len = 0; } - ~CanonicalConverter() - { - if (out_str != tempBuffer) - delete[] out_str; - } - private: - UCHAR tempBuffer[100]; - UCHAR* out_str; + Firebird::UCharBuffer tempBuffer; }; } // namespace Jrd diff --git a/src/jrd/replication/Manager.cpp b/src/jrd/replication/Manager.cpp index 2ae0742fd6..a8960c8328 100644 --- a/src/jrd/replication/Manager.cpp +++ b/src/jrd/replication/Manager.cpp @@ -50,50 +50,23 @@ TableMatcher::TableMatcher(MemoryPool& pool, const string& excludeFilter) : m_tables(pool) { - m_cs = FB_NEW_POOL(pool) charset; - m_tt = FB_NEW_POOL(pool) texttype; - - IntlUtil::initUtf8Charset(m_cs); - - string collAttributes("ICU-VERSION="); - collAttributes += Jrd::UnicodeUtil::getDefaultIcuVersion(); - IntlUtil::setupIcuAttributes(m_cs, collAttributes, "", collAttributes); - - UCharBuffer collAttributesBuffer; - collAttributesBuffer.push(reinterpret_cast(collAttributes.c_str()), - collAttributes.length()); - - if (!IntlUtil::initUnicodeCollation(m_tt, m_cs, "UNICODE", 0, collAttributesBuffer, "")) - raiseError("Cannot initialize UNICODE collation"); - - m_charSet = CharSet::createInstance(pool, 0, m_cs); - m_textType = FB_NEW_POOL(pool) TextType(0, m_tt, m_charSet); - if (includeFilter.hasData()) { - m_includeMatcher.reset(FB_NEW_POOL(pool) SimilarMatcher( - pool, m_textType, - (const UCHAR*) includeFilter.c_str(), - includeFilter.length(), - '\\', true)); + m_includeMatcher.reset(FB_NEW_POOL(pool) SimilarToRegex( + pool, true, + includeFilter.c_str(), includeFilter.length(), + "\\", 1)); } if (excludeFilter.hasData()) { - m_excludeMatcher.reset(FB_NEW_POOL(pool) SimilarMatcher( - pool, m_textType, - (const UCHAR*) excludeFilter.c_str(), - excludeFilter.length(), - '\\', true)); + m_excludeMatcher.reset(FB_NEW_POOL(pool) SimilarToRegex( + pool, true, + excludeFilter.c_str(), excludeFilter.length(), + "\\", 1)); } } -TableMatcher::~TableMatcher() -{ - if (m_tt && m_tt->texttype_fn_destroy) - m_tt->texttype_fn_destroy(m_tt); -} - bool TableMatcher::matchTable(const MetaName& tableName) { try @@ -104,18 +77,10 @@ bool TableMatcher::matchTable(const MetaName& tableName) enabled = true; if (m_includeMatcher) - { - m_includeMatcher->reset(); - m_includeMatcher->process((const UCHAR*) tableName.c_str(), tableName.length()); - enabled = m_includeMatcher->result(); - } + enabled = m_includeMatcher->matches(tableName.c_str(), tableName.length()); if (enabled && m_excludeMatcher) - { - m_excludeMatcher->reset(); - m_excludeMatcher->process((const UCHAR*) tableName.c_str(), tableName.length()); - enabled = !m_excludeMatcher->result(); - } + enabled = !m_excludeMatcher->matches(tableName.c_str(), tableName.length()); m_tables.put(tableName, enabled); } diff --git a/src/jrd/replication/Manager.h b/src/jrd/replication/Manager.h index d56f1b1cef..9b0232c785 100644 --- a/src/jrd/replication/Manager.h +++ b/src/jrd/replication/Manager.h @@ -26,9 +26,9 @@ #include "../common/classes/array.h" #include "../common/classes/semaphore.h" +#include "../common/SimilarToRegex.h" #include "../common/os/guid.h" #include "../common/isc_s_proto.h" -#include "../../jrd/SimilarToMatcher.h" #include "../../jrd/intl_classes.h" #include "Config.h" @@ -38,25 +38,18 @@ namespace Replication { class TableMatcher { - typedef Jrd::UpcaseConverter SimilarConverter; - typedef Firebird::SimilarToMatcher SimilarMatcher; typedef Firebird::GenericMap > > TablePermissionMap; public: TableMatcher(MemoryPool& pool, const Firebird::string& includeFilter, const Firebird::string& excludeFilter); - ~TableMatcher(); bool matchTable(const Firebird::MetaName& tableName); private: - charset* m_cs; - Firebird::AutoPtr m_tt; - Firebird::AutoPtr m_charSet; - Firebird::AutoPtr m_textType; - Firebird::AutoPtr m_includeMatcher; - Firebird::AutoPtr m_excludeMatcher; + Firebird::AutoPtr m_includeMatcher; + Firebird::AutoPtr m_excludeMatcher; TablePermissionMap m_tables; }; diff --git a/src/jrd/validation.cpp b/src/jrd/validation.cpp index f902a19e75..409f824d0e 100644 --- a/src/jrd/validation.cpp +++ b/src/jrd/validation.cpp @@ -570,7 +570,6 @@ VI. ADDITIONAL NOTES #include "../common/db_alias.h" #include "../jrd/intl_proto.h" #include "../jrd/lck_proto.h" -#include "../jrd/Collation.h" #ifdef DEBUG_VAL_VERBOSE #include "../jrd/dmp_proto.h" @@ -592,18 +591,21 @@ static void print_rhd(USHORT, const rhd*); #endif -static PatternMatcher* createPatternMatcher(thread_db* tdbb, const char* pattern) +static SimilarToRegex* createPatternMatcher(thread_db* tdbb, const char* pattern) { - PatternMatcher* matcher = NULL; + SimilarToRegex* matcher = NULL; try { if (pattern) { const int len = strlen(pattern); - Collation* obj = INTL_texttype_lookup(tdbb, CS_UTF8); - matcher = obj->createSimilarToMatcher(*tdbb->getDefaultPool(), - (const UCHAR*) pattern, len, (UCHAR*) "\\", 1); + //// TODO: Should this be different than trace and replication + //// and use case sensitive matcher? + matcher = FB_NEW_POOL(*tdbb->getDefaultPool()) SimilarToRegex( + *tdbb->getDefaultPool(), false, + pattern, len, + "\\", 1); } } catch (const Exception& ex) @@ -870,8 +872,6 @@ Validation::Validation(thread_db* tdbb, UtilSvc* uSvc) : vdr_page_bitmap = NULL; vdr_service = uSvc; - vdr_tab_incl = vdr_tab_excl = NULL; - vdr_idx_incl = vdr_idx_excl = NULL; vdr_lock_tout = -10; if (uSvc) { @@ -882,11 +882,6 @@ Validation::Validation(thread_db* tdbb, UtilSvc* uSvc) : Validation::~Validation() { - delete vdr_tab_incl; - delete vdr_tab_excl; - delete vdr_idx_incl; - delete vdr_idx_excl; - output("Validation finished\n"); } @@ -1654,22 +1649,14 @@ void Validation::walk_database() if (vdr_tab_incl) { - vdr_tab_incl->reset(); - if (!vdr_tab_incl->process((UCHAR*) relation->rel_name.c_str(), relation->rel_name.length()) || - !vdr_tab_incl->result()) - { + if (!vdr_tab_incl->matches(relation->rel_name.c_str(), relation->rel_name.length())) continue; - } } if (vdr_tab_excl) { - vdr_tab_excl->reset(); - if (!vdr_tab_excl->process((UCHAR*) relation->rel_name.c_str(), relation->rel_name.length()) || - vdr_tab_excl->result()) - { + if (vdr_tab_excl->matches(relation->rel_name.c_str(), relation->rel_name.length())) continue; - } } // We can't realiable track double allocated page's when validating online. @@ -3163,15 +3150,13 @@ Validation::RTN Validation::walk_root(jrd_rel* relation) if (vdr_idx_incl) { - vdr_idx_incl->reset(); - if (!vdr_idx_incl->process((UCHAR*) index.c_str(), index.length()) || !vdr_idx_incl->result()) + if (!vdr_idx_incl->matches(relation->rel_name.c_str(), relation->rel_name.length())) continue; } if (vdr_idx_excl) { - vdr_idx_excl->reset(); - if (!vdr_idx_excl->process((UCHAR*) index.c_str(), index.length()) || vdr_idx_excl->result()) + if (vdr_idx_excl->matches(relation->rel_name.c_str(), relation->rel_name.length())) continue; } diff --git a/src/jrd/validation.h b/src/jrd/validation.h index c45a6d5197..a6aa2c5279 100644 --- a/src/jrd/validation.h +++ b/src/jrd/validation.h @@ -28,6 +28,7 @@ #include "fb_types.h" #include "../common/classes/array.h" +#include "../common/SimilarToRegex.h" #include "../jrd/ods.h" #include "../jrd/cch.h" #include "../jrd/sbm.h" @@ -150,10 +151,10 @@ private: ULONG vdr_err_counts[VAL_MAX_ERROR]; Firebird::UtilSvc* vdr_service; - PatternMatcher* vdr_tab_incl; - PatternMatcher* vdr_tab_excl; - PatternMatcher* vdr_idx_incl; - PatternMatcher* vdr_idx_excl; + Firebird::AutoPtr vdr_tab_incl; + Firebird::AutoPtr vdr_tab_excl; + Firebird::AutoPtr vdr_idx_incl; + Firebird::AutoPtr vdr_idx_excl; int vdr_lock_tout; void checkDPinPP(jrd_rel *relation, SLONG page_number); void checkDPinPIP(jrd_rel *relation, SLONG page_number); diff --git a/src/utilities/CMakeLists.txt b/src/utilities/CMakeLists.txt index b275a1e7e3..2330750bae 100644 --- a/src/utilities/CMakeLists.txt +++ b/src/utilities/CMakeLists.txt @@ -46,7 +46,6 @@ set(fbtrace_src ntrace/TraceConfiguration.cpp ntrace/traceplugin.cpp ntrace/TracePluginImpl.cpp - ntrace/TraceUnicodeUtils.cpp ntrace/os/platform.h ) @@ -70,11 +69,11 @@ if (WIN32) set(instreg_src install/install_reg.cpp install/registry.cpp - + install/registry.h install/regis_proto.h ) - add_executable (instreg ${instreg_src} ${VERSION_RC}) + add_executable (instreg ${instreg_src} ${VERSION_RC}) ########################################################################### # EXECUTABLE instsvc @@ -86,7 +85,7 @@ if (WIN32) install/servi_proto.h ) add_executable (instsvc ${instsvc_src} ${VERSION_RC}) - target_link_libraries (instsvc common yvalve) + target_link_libraries (instsvc common yvalve) ########################################################################### # EXECUTABLE instclient diff --git a/src/utilities/ntrace/TraceConfiguration.cpp b/src/utilities/ntrace/TraceConfiguration.cpp index bdeb1d00bf..2c4136e1f3 100644 --- a/src/utilities/ntrace/TraceConfiguration.cpp +++ b/src/utilities/ntrace/TraceConfiguration.cpp @@ -26,9 +26,7 @@ */ #include "TraceConfiguration.h" -#include "TraceUnicodeUtils.h" -#include "../../jrd/evl_string.h" -#include "../../jrd/SimilarToMatcher.h" +#include "../../common/SimilarToRegex.h" #include "../../common/isc_f_proto.h" using namespace Firebird; @@ -67,26 +65,6 @@ void TraceCfgReader::readTraceConfiguration(const char* text, } -namespace -{ - template - class SystemToUtf8Converter : public PrevConverter - { - public: - SystemToUtf8Converter(MemoryPool& pool, Jrd::TextType* obj, const UCHAR*& str, SLONG& len) - : PrevConverter(pool, obj, str, len) - { - buffer.assign(reinterpret_cast(str), len); - ISC_systemToUtf8(buffer); - str = reinterpret_cast(buffer.c_str()); - len = buffer.length(); - } - - private: - string buffer; - }; -} - #define ERROR_PREFIX "error while parsing trace configuration\n\t" void TraceCfgReader::readConfig() @@ -156,31 +134,28 @@ void TraceCfgReader::readConfig() try { #ifdef WIN_NT // !CASE_SENSITIVITY - typedef Jrd::UpcaseConverter > SimilarConverter; + const bool caseInsensitive = true; #else - typedef SystemToUtf8Converter<> SimilarConverter; + const bool caseInsensitive = false; #endif + string utf8Pattern = pattern; + ISC_systemToUtf8(utf8Pattern); - UnicodeCollationHolder unicodeCollation(*getDefaultMemoryPool()); - Jrd::TextType* textType = unicodeCollation.getTextType(); - - SimilarToMatcher > matcher( - *getDefaultMemoryPool(), textType, (const UCHAR*) pattern.c_str(), - pattern.length(), '\\', true); + SimilarToRegex matcher(*getDefaultMemoryPool(), caseInsensitive, + utf8Pattern.c_str(), utf8Pattern.length(), "\\", 1); regExpOk = true; - matcher.process((const UCHAR*) m_databaseName.c_str(), m_databaseName.length()); - if (matcher.result()) - { - for (unsigned i = 0; - i <= matcher.getNumBranches() && i < FB_NELEM(m_subpatterns); ++i) - { - unsigned start, length; - matcher.getBranchInfo(i, &start, &length); + PathName utf8DatabaseName = m_databaseName; + ISC_systemToUtf8(utf8DatabaseName); + Array matchPosArray; - m_subpatterns[i].start = start; - m_subpatterns[i].end = start + length; + if (matcher.matches(utf8DatabaseName.c_str(), utf8DatabaseName.length(), &matchPosArray)) + { + for (unsigned i = 0; i < matchPosArray.getCount() && i < FB_NELEM(m_subpatterns); ++i) + { + m_subpatterns[i].start = matchPosArray[i].start; + m_subpatterns[i].end = matchPosArray[i].start + matchPosArray[i].length; } match = exactMatch = true; diff --git a/src/utilities/ntrace/TracePluginImpl.cpp b/src/utilities/ntrace/TracePluginImpl.cpp index d4793ebf81..214388638c 100644 --- a/src/utilities/ntrace/TracePluginImpl.cpp +++ b/src/utilities/ntrace/TracePluginImpl.cpp @@ -99,7 +99,6 @@ TracePluginImpl::TracePluginImpl(IPluginBase* plugin, transactions(getDefaultMemoryPool()), statements(getDefaultMemoryPool()), services(getDefaultMemoryPool()), - unicodeCollation(*getDefaultMemoryPool()), include_codes(*getDefaultMemoryPool()), exclude_codes(*getDefaultMemoryPool()) { @@ -124,8 +123,6 @@ TracePluginImpl::TracePluginImpl(IPluginBase* plugin, logWriter->addRef(); } - Jrd::TextType* textType = unicodeCollation.getTextType(); - // Compile filtering regular expressions const char* str = NULL; try @@ -136,9 +133,10 @@ TracePluginImpl::TracePluginImpl(IPluginBase* plugin, string filter(config.include_filter); ISC_systemToUtf8(filter); - include_matcher = FB_NEW TraceSimilarToMatcher( - *getDefaultMemoryPool(), textType, (const UCHAR*) filter.c_str(), - filter.length(), '\\', true); + include_matcher = FB_NEW SimilarToRegex( + *getDefaultMemoryPool(), true, + filter.c_str(), filter.length(), + "\\", 1); } if (config.exclude_filter.hasData()) @@ -147,9 +145,10 @@ TracePluginImpl::TracePluginImpl(IPluginBase* plugin, string filter(config.exclude_filter); ISC_systemToUtf8(filter); - exclude_matcher = FB_NEW TraceSimilarToMatcher( - *getDefaultMemoryPool(), textType, (const UCHAR*) filter.c_str(), - filter.length(), '\\', true); + exclude_matcher = FB_NEW SimilarToRegex( + *getDefaultMemoryPool(), true, + filter.c_str(), filter.length(), + "\\", 1); } } catch (const Exception&) @@ -1546,18 +1545,10 @@ void TracePluginImpl::register_sql_statement(ITraceSQLStatement* statement) return; if (config.include_filter.hasData()) - { - include_matcher->reset(); - include_matcher->process((const UCHAR*) sql, sql_length); - need_statement = include_matcher->result(); - } + need_statement = include_matcher->matches(sql, sql_length); if (need_statement && config.exclude_filter.hasData()) - { - exclude_matcher->reset(); - exclude_matcher->process((const UCHAR*) sql, sql_length); - need_statement = !exclude_matcher->result(); - } + need_statement = !exclude_matcher->matches(sql, sql_length); if (need_statement) { @@ -1949,18 +1940,10 @@ bool TracePluginImpl::checkServiceFilter(ITraceServiceConnection* service, bool bool enabled = true; if (config.include_filter.hasData()) - { - include_matcher->reset(); - include_matcher->process((const UCHAR*) svcName, svcNameLen); - enabled = include_matcher->result(); - } + enabled = include_matcher->matches(svcName, svcNameLen); if (enabled && config.exclude_filter.hasData()) - { - exclude_matcher->reset(); - exclude_matcher->process((const UCHAR*) svcName, svcNameLen); - enabled = !exclude_matcher->result(); - } + enabled = !exclude_matcher->matches(svcName, svcNameLen); if (data) { data->enabled = enabled; diff --git a/src/utilities/ntrace/TracePluginImpl.h b/src/utilities/ntrace/TracePluginImpl.h index 8824c3db4d..2c99be0d3d 100644 --- a/src/utilities/ntrace/TracePluginImpl.h +++ b/src/utilities/ntrace/TracePluginImpl.h @@ -32,11 +32,7 @@ #include "firebird.h" #include "../../jrd/ntrace.h" #include "TracePluginConfig.h" -#include "TraceUnicodeUtils.h" -#include "../../jrd/intl_classes.h" -#include "../../jrd/evl_string.h" -#include "../../common/TextType.h" -#include "../../jrd/SimilarToMatcher.h" +#include "../../common/SimilarToRegex.h" #include "../../common/classes/rwlock.h" #include "../../common/classes/GenericMap.h" #include "../../common/classes/locks.h" @@ -168,10 +164,7 @@ private: // Lock for log rotation Firebird::RWLock renameLock; - UnicodeCollationHolder unicodeCollation; - typedef Firebird::SimilarToMatcher > > - TraceSimilarToMatcher; - Firebird::AutoPtr include_matcher, exclude_matcher; + Firebird::AutoPtr include_matcher, exclude_matcher; // Filters for gds error codes typedef Firebird::SortedArray GdsCodesArray; diff --git a/src/utilities/ntrace/TraceUnicodeUtils.cpp b/src/utilities/ntrace/TraceUnicodeUtils.cpp deleted file mode 100644 index 80626af542..0000000000 --- a/src/utilities/ntrace/TraceUnicodeUtils.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - * PROGRAM: Firebird Trace Services - * MODULE: TraceUnicodeUtils.cpp - * DESCRIPTION: Unicode support for trace needs - * - * The contents of this file are subject to the Initial - * Developer's Public License Version 1.0 (the "License"); - * you may not use this file except in compliance with the - * License. You may obtain a copy of the License at - * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. - * - * Software distributed under the License is distributed AS IS, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. - * See the License for the specific language governing rights - * and limitations under the License. - * - * The Original Code was created by Khorsun Vladyslav - * for the Firebird Open Source RDBMS project. - * - * Copyright (c) 2010 Khorsun Vladyslav - * and all contributors signed below. - * - * All Rights Reserved. - * Contributor(s): ______________________________________. - * Adriano dos Santos Fernandes - * - */ - - -#include "TraceUnicodeUtils.h" - -using namespace Firebird; - -UnicodeCollationHolder::UnicodeCollationHolder(MemoryPool& pool) -{ - cs = FB_NEW_POOL(pool) charset; - tt = FB_NEW_POOL(pool) texttype; - - IntlUtil::initUtf8Charset(cs); - - string collAttributes("ICU-VERSION="); - collAttributes += Jrd::UnicodeUtil::getDefaultIcuVersion(); - IntlUtil::setupIcuAttributes(cs, collAttributes, "", collAttributes); - - UCharBuffer collAttributesBuffer; - collAttributesBuffer.push(reinterpret_cast(collAttributes.c_str()), - collAttributes.length()); - - if (!IntlUtil::initUnicodeCollation(tt, cs, "UNICODE", 0, collAttributesBuffer, string())) - fatal_exception::raiseFmt("cannot initialize UNICODE collation to use in trace plugin"); - - charSet = Jrd::CharSet::createInstance(pool, 0, cs); - textType = FB_NEW_POOL(pool) Jrd::TextType(0, tt, charSet); -} - -UnicodeCollationHolder::~UnicodeCollationHolder() -{ - fb_assert(tt->texttype_fn_destroy); - - if (tt->texttype_fn_destroy) - tt->texttype_fn_destroy(tt); - - // cs should be deleted by texttype_fn_destroy call above - delete tt; -} diff --git a/src/utilities/ntrace/TraceUnicodeUtils.h b/src/utilities/ntrace/TraceUnicodeUtils.h deleted file mode 100644 index a66f428318..0000000000 --- a/src/utilities/ntrace/TraceUnicodeUtils.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * PROGRAM: Firebird Trace Services - * MODULE: TraceUnicodeUtils.h - * DESCRIPTION: Unicode support for trace needs - * - * The contents of this file are subject to the Initial - * Developer's Public License Version 1.0 (the "License"); - * you may not use this file except in compliance with the - * License. You may obtain a copy of the License at - * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. - * - * Software distributed under the License is distributed AS IS, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. - * See the License for the specific language governing rights - * and limitations under the License. - * - * The Original Code was created by Khorsun Vladyslav - * for the Firebird Open Source RDBMS project. - * - * Copyright (c) 2010 Khorsun Vladyslav - * and all contributors signed below. - * - * All Rights Reserved. - * Contributor(s): ______________________________________. - * - */ - -#ifndef TRACE_UNICODE_UTILS_H -#define TRACE_UNICODE_UTILS_H - -#include "firebird.h" -#include "../../common/classes/fb_string.h" -#include "../../jrd/intl_classes.h" -#include "../../common/TextType.h" -#include "../../common/unicode_util.h" - - -class UnicodeCollationHolder -{ -private: - charset* cs; - texttype* tt; - Firebird::AutoPtr charSet; - Firebird::AutoPtr textType; - -public: - explicit UnicodeCollationHolder(Firebird::MemoryPool& pool); - ~UnicodeCollationHolder(); - - Jrd::TextType* getTextType() - { - return textType; - } -}; - - -#endif // TRACE_UNICODE_UTILS_H