8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-22 16:03:03 +01:00

WIP: Reimplementation of SIMILAR TO and SUBSTRING...SIMILAR using Google's re2 library.

This should fix:
CORE-4874
CORE-5664
CORE-3858
CORE-6088
CORE-3773
CORE-5931
CORE-6088
CORE-4893
This commit is contained in:
Adriano dos Santos Fernandes 2019-08-10 22:39:19 -03:00
parent 14d41d409e
commit 28e18749ff
27 changed files with 1221 additions and 2401 deletions

View File

@ -307,7 +307,8 @@ endif
LIB_PATH_OPTS = $(call LIB_LINK_RPATH,lib) $(call LIB_LINK_RPATH,intl)
LIB_LINK_SONAME= -Wl,-soname,$(1)
LIB_LINK_MAPFILE= -Wl,--version-script,$(1)
FIREBIRD_LIBRARY_LINK= -L$(LIB) -lfbclient $(MATHLIB) $(CRYPTLIB)
# FIXME:
FIREBIRD_LIBRARY_LINK= -L$(LIB) -lfbclient $(MATHLIB) $(CRYPTLIB) -lre2
EXE_LINK_OPTIONS= $(LDFLAGS) $(THR_FLAGS) $(UNDEF_FLAGS) $(LIB_PATH_OPTS) $(LINK_EMPTY_SYMBOLS)
LIB_LINK_OPTIONS= $(LDFLAGS) $(THR_FLAGS) -shared

View File

@ -90,6 +90,7 @@ Note:
<left paren>, <right paren>, <vertical bar>, <circumflex>, <minus sign>, <plus sign>, <asterisk>,
<underscore>, <percent>, <question mark>, <left brace> or <escape character>.
3) Since FB 4 the repeat factor low/high values could not be greater than 1000.
Syntax description and examples:
@ -174,7 +175,7 @@ Matches a character not identical to one of <character enumeration>:
Matches a character identical to one of <character enumeration include> but not identical to one
of <character enumeration exclude>:
<left bracket> <character enumeration include>... <circumflex> <character enumeration exclude>...
<left bracket> <character enumeration include>... <circumflex> <character enumeration exclude>...
'3' SIMILAR TO '[[:DIGIT:]^3]' -- false
'4' SIMILAR TO '[[:DIGIT:]^3]' -- true
@ -220,3 +221,36 @@ insert into department values ('600', 'Engineering', '(408) 555-123'); -- check
select * from department
where phone not similar to '\([0-9]{3}\) 555\-%' escape '\';
Appendice:
Since FB 4 SIMILAR TO and SUBSTRING...SIMILAR are implemented using the re2 library,
which has the following license:
Copyright (c) 2009 The RE2 Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -2542,15 +2542,11 @@ void BurpGlobals::setupSkipData(const Firebird::string& regexp)
ISC_systemToUtf8(filter);
BurpGlobals* tdgbl = BurpGlobals::getSpecific();
if (!unicodeCollation)
unicodeCollation = FB_NEW_POOL(tdgbl->getPool()) UnicodeCollationHolder(tdgbl->getPool());
Jrd::TextType* const textType = unicodeCollation->getTextType();
skipDataMatcher.reset(FB_NEW_POOL(tdgbl->getPool())
Firebird::SimilarToMatcher<UCHAR, Jrd::UpcaseConverter<> >
(tdgbl->getPool(), textType, (const UCHAR*) filter.c_str(),
filter.length(), '\\', true));
skipDataMatcher.reset(FB_NEW_POOL(tdgbl->getPool()) Firebird::SimilarToRegex(
tdgbl->getPool(), true,
filter.c_str(), filter.length(),
"\\", 1));
}
}
catch (const Firebird::Exception&)
@ -2571,18 +2567,12 @@ Firebird::string BurpGlobals::toSystem(const Firebird::PathName& from)
bool BurpGlobals::skipRelation(const char* name)
{
if (gbl_sw_meta)
{
return true;
}
if (!skipDataMatcher)
{
return false;
}
skipDataMatcher->reset();
skipDataMatcher->process(reinterpret_cast<const UCHAR*>(name), static_cast<SLONG>(strlen(name)));
return skipDataMatcher->result();
return skipDataMatcher->matches(name, strlen(name));
}
void BurpGlobals::read_stats(SINT64* stats)
@ -2703,39 +2693,6 @@ void BurpGlobals::print_stats_header()
burp_output(false, "\n");
}
UnicodeCollationHolder::UnicodeCollationHolder(MemoryPool& pool)
{
cs = FB_NEW_POOL(pool) charset;
tt = FB_NEW_POOL(pool) texttype;
Firebird::IntlUtil::initUtf8Charset(cs);
Firebird::string collAttributes("ICU-VERSION=");
collAttributes += Jrd::UnicodeUtil::getDefaultIcuVersion();
Firebird::IntlUtil::setupIcuAttributes(cs, collAttributes, "", collAttributes);
Firebird::UCharBuffer collAttributesBuffer;
collAttributesBuffer.push(reinterpret_cast<const UCHAR*>(collAttributes.c_str()),
collAttributes.length());
if (!Firebird::IntlUtil::initUnicodeCollation(tt, cs, "UNICODE", 0, collAttributesBuffer, Firebird::string()))
Firebird::fatal_exception::raiseFmt("cannot initialize UNICODE collation to use in gbak");
charSet = Jrd::CharSet::createInstance(pool, 0, cs);
textType = FB_NEW_POOL(pool) Jrd::TextType(0, tt, charSet);
}
UnicodeCollationHolder::~UnicodeCollationHolder()
{
fb_assert(tt->texttype_fn_destroy);
if (tt->texttype_fn_destroy)
tt->texttype_fn_destroy(tt);
// cs should be deleted by texttype_fn_destroy call above
delete tt;
}
void BURP_makeSymbol(BurpGlobals* tdgbl, Firebird::string& name) // add double quotes to string
{
if (tdgbl->gbl_dialect < SQL_DIALECT_V6)

View File

@ -42,7 +42,7 @@
#include "../common/classes/array.h"
#include "../common/classes/fb_pair.h"
#include "../common/classes/MetaName.h"
#include "../jrd/SimilarToMatcher.h"
#include "../common/SimilarToRegex.h"
#include "../common/status.h"
#include "../common/sha.h"
#include "../common/classes/ImplementHelper.h"
@ -894,26 +894,6 @@ static const char HDR_SPLIT_TAG6[] = "InterBase/gbak, ";
const FB_UINT64 MIN_SPLIT_SIZE = FB_CONST64(2048); // bytes
// Copy&paste from TraceUnicodeUtils.h - fixme !!!!!!!!
class UnicodeCollationHolder
{
private:
charset* cs;
texttype* tt;
Firebird::AutoPtr<Jrd::CharSet> charSet;
Firebird::AutoPtr<Jrd::TextType> textType;
public:
explicit UnicodeCollationHolder(Firebird::MemoryPool& pool);
~UnicodeCollationHolder();
Jrd::TextType* getTextType()
{
return textType;
}
};
// Global switches and data
struct BurpCrypt;
@ -1174,8 +1154,7 @@ public:
bool flag_on_line; // indicates whether we will bring the database on-line
bool firstMap; // this is the first time we entered get_mapping()
bool stdIoMode; // stdin or stdout is used as backup file
Firebird::AutoPtr<UnicodeCollationHolder> unicodeCollation;
Firebird::AutoPtr<Firebird::SimilarToMatcher<UCHAR, Jrd::UpcaseConverter<> > > skipDataMatcher;
Firebird::AutoPtr<Firebird::SimilarToRegex> skipDataMatcher;
public:
Firebird::string toSystem(const Firebird::PathName& from);

View File

@ -0,0 +1,821 @@
/*
* The contents of this file are subject to the Initial
* Developer's Public License Version 1.0 (the "License");
* you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
*
* Software distributed under the License is distributed AS IS,
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
* See the License for the specific language governing rights
* and limitations under the License.
*
* The Original Code was created by Adriano dos Santos Fernandes
* for the Firebird Open Source RDBMS project.
*
* Copyright (c) 2019 Adriano dos Santos Fernandes <adrianosf at gmail.com>
* and all contributors signed below.
*
*/
#include "firebird.h"
#include "../common/SimilarToRegex.h"
#include "../common/StatusArg.h"
#include <unicode/utf8.h>
using namespace Firebird;
namespace
{
static const unsigned FLAG_PREFER_FEWER = 0x01;
static const unsigned FLAG_CASE_INSENSITIVE = 0x02;
static const unsigned FLAG_GROUP_CAPTURE = 0x04;
//// TODO: Verify usage of U8_NEXT_UNSAFE.
class SimilarToCompiler
{
public:
SimilarToCompiler(MemoryPool& pool, AutoPtr<RE2>& regexp, unsigned aFlags,
const char* aPatternStr, unsigned aPatternLen,
const char* escapeStr, unsigned escapeLen)
: re2PatternStr(pool),
patternStr(aPatternStr),
patternPos(0),
patternLen(aPatternLen),
flags(aFlags),
useEscape(escapeStr != nullptr)
{
if (escapeStr)
{
int32_t escapePos = 0;
U8_NEXT_UNSAFE(escapeStr, escapePos, escapeChar);
if (escapePos != escapeLen)
status_exception::raise(Arg::Gds(isc_escape_invalid));
}
if (flags & FLAG_CASE_INSENSITIVE)
re2PatternStr.append("(?i)");
if (flags & FLAG_GROUP_CAPTURE)
re2PatternStr.append("(");
int parseFlags;
parseExpr(&parseFlags);
if (flags & FLAG_GROUP_CAPTURE)
re2PatternStr.append(")");
// Check for proper termination.
if (patternPos < patternLen)
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
RE2::Options options;
options.set_log_errors(false);
options.set_dot_nl(true);
re2::StringPiece sp((const char*) re2PatternStr.c_str(), re2PatternStr.length());
regexp = FB_NEW_POOL(pool) RE2(sp, options);
if (!regexp->ok())
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
}
bool hasChar()
{
return patternPos < patternLen;
}
UChar32 getChar()
{
fb_assert(hasChar());
UChar32 c;
U8_NEXT_UNSAFE(patternStr, patternPos, c);
return c;
}
UChar32 peekChar()
{
auto savePos = patternPos;
auto c = getChar();
patternPos = savePos;
return c;
}
bool isRep(UChar32 c) const
{
return c == '*' || c == '+' || c == '?' || c == '{';
}
bool isSpecial(UChar32 c)
{
switch (c)
{
case '^':
case '-':
case '_':
case '%':
case '[':
case ']':
case '(':
case ')':
case '{':
case '}':
case '|':
case '?':
case '+':
case '*':
return true;
default:
return false;
}
}
bool isRe2Special(UChar32 c)
{
switch (c)
{
case '\\':
case '$':
case '.':
case '^':
case '-':
case '_':
case '[':
case ']':
case '(':
case ')':
case '{':
case '}':
case '|':
case '?':
case '+':
case '*':
return true;
default:
return false;
}
}
void parseExpr(int* parseFlagOut)
{
while (true)
{
int parseFlags;
parseTerm(&parseFlags);
*parseFlagOut &= ~(~parseFlags & PARSE_FLAG_NOT_EMPTY);
*parseFlagOut |= parseFlags;
auto savePos = patternPos;
UChar32 c;
if (!hasChar() || (c = getChar()) != '|')
{
patternPos = savePos;
break;
}
re2PatternStr.append("|");
}
}
void parseTerm(int* parseFlagOut)
{
*parseFlagOut = 0;
bool first = true;
while (hasChar())
{
auto c = peekChar();
if (c != '|' && c != ')')
{
int parseFlags;
parseFactor(&parseFlags);
*parseFlagOut |= parseFlags & PARSE_FLAG_NOT_EMPTY;
if (first)
{
*parseFlagOut |= parseFlags;
first = false;
}
}
else
break;
}
}
void parseFactor(int* parseFlagOut)
{
int parseFlags;
parsePrimary(&parseFlags);
UChar32 op;
if (!hasChar() || !isRep((op = peekChar())))
{
*parseFlagOut = parseFlags;
return;
}
if (!(parseFlags & PARSE_FLAG_NOT_EMPTY) && op != '?')
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
fb_assert(op == '*' || op == '+' || op == '?' || op == '{');
if (op == '*')
{
re2PatternStr.append((flags & FLAG_PREFER_FEWER) ? "*?" : "*");
*parseFlagOut = 0;
++patternPos;
}
else if (op == '+')
{
re2PatternStr.append((flags & FLAG_PREFER_FEWER) ? "+?" : "+");
*parseFlagOut = PARSE_FLAG_NOT_EMPTY;
++patternPos;
}
else if (op == '?')
{
re2PatternStr.append((flags & FLAG_PREFER_FEWER) ? "??" : "?");
*parseFlagOut = 0;
++patternPos;
}
else if (op == '{')
{
const auto repeatStart = patternPos++;
bool comma = false;
string s1, s2;
while (true)
{
if (!hasChar())
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
UChar32 c = getChar();
if (c == '}')
{
if (s1.isEmpty())
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
break;
}
else if (c == ',')
{
if (comma)
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
comma = true;
}
else
{
if (c >= '0' && c <= '9')
{
if (comma)
s2 += (char) c;
else
s1 += (char) c;
}
else
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
}
}
const int n1 = atoi(s1.c_str());
*parseFlagOut = n1 == 0 ? 0 : PARSE_FLAG_NOT_EMPTY;
re2PatternStr.append(patternStr + repeatStart, patternStr + patternPos);
if (flags & FLAG_PREFER_FEWER)
re2PatternStr.append("?");
}
if (hasChar() && isRep(peekChar()))
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
}
void parsePrimary(int* parseFlagOut)
{
*parseFlagOut = 0;
fb_assert(hasChar());
auto savePos = patternPos;
auto op = getChar();
if (op == '_')
{
*parseFlagOut |= PARSE_FLAG_NOT_EMPTY;
re2PatternStr.append(".");
return;
}
else if (op == '%')
{
re2PatternStr.append((flags & FLAG_PREFER_FEWER) ? ".*?" : ".*");
return;
}
else if (op == '[')
{
struct
{
const char* similarClass;
const char* re2ClassInclude;
const char* re2ClassExclude;
} static const classes[] =
{
{"alnum", "[:alnum:]", "[:^alnum:]"},
{"alpha", "[:alpha:]", "[:^alpha:]"},
{"digit", "[:digit:]", "[:^digit:]"},
{"lower", "[:lower:]", "[:^lower:]"},
{"space", " ", "\\x00-\\x1F\\x21-\\x{10FFFF}"},
{"upper", "[:upper:]", "[:^upper:]"},
{"whitespace", "[:space:]", "[:^space:]"}
};
struct Item
{
int clazz;
unsigned firstStart, firstEnd, lastStart, lastEnd;
};
Array<Item> items;
unsigned includeCount = 0;
bool exclude = false;
do
{
if (!hasChar())
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
unsigned charSavePos = patternPos;
UChar32 c = getChar();
bool range = false;
bool charClass = false;
if (useEscape && c == escapeChar)
{
if (!hasChar())
status_exception::raise(Arg::Gds(isc_escape_invalid));
charSavePos = patternPos;
c = getChar();
if (!(c == escapeChar || isSpecial(c)))
status_exception::raise(Arg::Gds(isc_escape_invalid));
}
else
{
if (c == '[')
charClass = true;
else if (c == '^')
{
if (exclude)
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
exclude = true;
continue;
}
}
Item item;
if (!exclude)
++includeCount;
if (charClass)
{
if (!hasChar() || getChar() != ':')
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
charSavePos = patternPos;
while (hasChar() && getChar() != ':')
;
const SLONG len = patternPos - charSavePos - 1;
if (!hasChar() || getChar() != ']')
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
for (item.clazz = 0; item.clazz < FB_NELEM(classes); ++item.clazz)
{
if (fb_utils::strnicmp(patternStr + charSavePos,
classes[item.clazz].similarClass, len) == 0)
{
break;
}
}
if (item.clazz >= FB_NELEM(classes))
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
}
else
{
item.clazz = -1;
item.firstStart = item.lastStart = charSavePos;
item.firstEnd = item.lastEnd = patternPos;
if (hasChar() && peekChar() == '-')
{
getChar();
charSavePos = patternPos;
c = getChar();
if (useEscape && c == escapeChar)
{
if (!hasChar())
status_exception::raise(Arg::Gds(isc_escape_invalid));
charSavePos = patternPos;
c = getChar();
if (!(c == escapeChar || isSpecial(c)))
status_exception::raise(Arg::Gds(isc_escape_invalid));
}
item.lastStart = charSavePos;
item.lastEnd = patternPos;
}
}
items.add(item);
if (!hasChar())
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
} while (peekChar() != ']');
auto appendItem = [&](const Item& item, bool negated) {
if (item.clazz != -1)
{
re2PatternStr.append(negated ?
classes[item.clazz].re2ClassExclude :
classes[item.clazz].re2ClassInclude);
}
else
{
if (negated)
{
UChar32 c;
char hex[20];
int32_t cPos = item.firstStart;
U8_NEXT_UNSAFE(patternStr, cPos, c);
if (c > 0)
{
re2PatternStr.append("\\x00");
re2PatternStr.append("-");
sprintf(hex, "\\x{%X}", (int) c - 1);
re2PatternStr.append(hex);
}
cPos = item.lastStart;
U8_NEXT_UNSAFE(patternStr, cPos, c);
if (c < 0x10FFFF)
{
sprintf(hex, "\\x{%X}", (int) c + 1);
re2PatternStr.append(hex);
re2PatternStr.append("-");
re2PatternStr.append("\\x{10FFFF}");
}
}
else
{
if (isRe2Special(patternStr[item.firstStart]))
re2PatternStr.append("\\");
re2PatternStr.append(patternStr + item.firstStart, patternStr + item.firstEnd);
if (item.lastStart != item.firstStart)
{
re2PatternStr.append("-");
if (isRe2Special(patternStr[item.lastStart]))
re2PatternStr.append("\\");
re2PatternStr.append(patternStr + item.lastStart, patternStr + item.lastEnd);
}
}
}
};
if (exclude && includeCount > 1)
{
re2PatternStr.append("(?:");
for (unsigned i = 0; i < includeCount; ++i)
{
if (i != 0)
re2PatternStr.append("|");
re2PatternStr.append("[");
re2PatternStr.append("^");
appendItem(items[i], true);
for (unsigned j = includeCount; j < items.getCount(); ++j)
appendItem(items[j], false);
re2PatternStr.append("]");
}
re2PatternStr.append(")");
}
else
{
re2PatternStr.append("[");
if (exclude)
re2PatternStr.append("^");
for (unsigned i = 0; i < items.getCount(); ++i)
appendItem(items[i], exclude && i < includeCount);
re2PatternStr.append("]");
}
getChar();
*parseFlagOut |= PARSE_FLAG_NOT_EMPTY;
}
else if (op == '(')
{
re2PatternStr.append(flags & FLAG_GROUP_CAPTURE ? "(" : "(?:");
int parseFlags;
parseExpr(&parseFlags);
if (!hasChar() || getChar() != ')')
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
re2PatternStr.append(")");
*parseFlagOut |= parseFlags & PARSE_FLAG_NOT_EMPTY;
}
else
{
patternPos = savePos;
bool controlChar = false;
do
{
auto charSavePos = patternPos;
op = getChar();
if (useEscape && op == escapeChar)
{
charSavePos = patternPos;
op = getChar();
if (!isSpecial(op) && op != escapeChar)
status_exception::raise(Arg::Gds(isc_escape_invalid));
}
else
{
if (isSpecial(op))
{
controlChar = true;
patternPos = charSavePos;
}
}
if (!controlChar)
{
if (isRe2Special(op))
re2PatternStr.append("\\");
re2PatternStr.append(patternStr + charSavePos, patternStr + patternPos);
}
} while (!controlChar && hasChar());
if (patternPos == savePos)
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
*parseFlagOut |= PARSE_FLAG_NOT_EMPTY;
}
}
const string& getRe2PatternStr() const
{
return re2PatternStr;
}
private:
static const int PARSE_FLAG_NOT_EMPTY = 1; // known never to match empty string
string re2PatternStr;
const char* patternStr;
int32_t patternPos;
int32_t patternLen;
UChar32 escapeChar;
unsigned flags;
bool useEscape;
};
class SubstringSimilarCompiler
{
public:
SubstringSimilarCompiler(MemoryPool& pool, AutoPtr<RE2>& regexp, unsigned flags,
const char* aPatternStr, unsigned aPatternLen,
const char* escapeStr, unsigned escapeLen)
: patternStr(aPatternStr),
patternPos(0),
patternLen(aPatternLen)
{
int32_t escapePos = 0;
U8_NEXT_UNSAFE(escapeStr, escapePos, escapeChar);
if (escapePos != escapeLen)
status_exception::raise(Arg::Gds(isc_escape_invalid));
unsigned positions[2];
unsigned part = 0;
while (hasChar())
{
auto c = getChar();
if (c != escapeChar)
continue;
if (!hasChar())
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
c = getChar();
if (c == '"')
{
if (part >= 2)
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
positions[part++] = patternPos;
}
}
if (part != 2)
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
AutoPtr<RE2> regexp1, regexp2, regexp3;
SimilarToCompiler compiler1(pool, regexp1, (flags & FLAG_CASE_INSENSITIVE) | FLAG_PREFER_FEWER,
aPatternStr, positions[0] - escapeLen - 1, escapeStr, escapeLen);
SimilarToCompiler compiler2(pool, regexp2, (flags & FLAG_CASE_INSENSITIVE),
aPatternStr + positions[0], positions[1] - positions[0] - escapeLen - 1, escapeStr, escapeLen);
SimilarToCompiler compiler3(pool, regexp3, (flags & FLAG_CASE_INSENSITIVE) | FLAG_PREFER_FEWER,
aPatternStr + positions[1], patternLen - positions[1], escapeStr, escapeLen);
string finalRe2Pattern;
finalRe2Pattern.reserve(
1 + // (
compiler1.getRe2PatternStr().length() +
2 + // )(
compiler2.getRe2PatternStr().length() +
2 + // )(
compiler3.getRe2PatternStr().length() +
1 // )
);
finalRe2Pattern.append("(");
finalRe2Pattern.append(compiler1.getRe2PatternStr());
finalRe2Pattern.append(")(");
finalRe2Pattern.append(compiler2.getRe2PatternStr());
finalRe2Pattern.append(")(");
finalRe2Pattern.append(compiler3.getRe2PatternStr());
finalRe2Pattern.append(")");
RE2::Options options;
options.set_log_errors(false);
options.set_dot_nl(true);
re2::StringPiece sp((const char*) finalRe2Pattern.c_str(), finalRe2Pattern.length());
regexp = FB_NEW_POOL(pool) RE2(sp, options);
if (!regexp->ok())
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
}
bool hasChar()
{
return patternPos < patternLen;
}
UChar32 getChar()
{
fb_assert(hasChar());
UChar32 c;
U8_NEXT_UNSAFE(patternStr, patternPos, c);
return c;
}
UChar32 peekChar()
{
auto savePos = patternPos;
auto c = getChar();
patternPos = savePos;
return c;
}
private:
const char* patternStr;
int32_t patternPos;
int32_t patternLen;
UChar32 escapeChar;
};
} // namespace
namespace Firebird {
SimilarToRegex::SimilarToRegex(MemoryPool& pool, bool caseInsensitive,
const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen)
: PermanentStorage(pool)
{
SimilarToCompiler compiler(pool, regexp,
FLAG_GROUP_CAPTURE | FLAG_PREFER_FEWER | (caseInsensitive ? FLAG_CASE_INSENSITIVE : 0),
patternStr, patternLen, escapeStr, escapeLen);
}
bool SimilarToRegex::matches(const char* buffer, unsigned bufferLen, Array<MatchPos>* matchPosArray)
{
re2::StringPiece sp(buffer, bufferLen);
if (matchPosArray)
{
const int argsCount = regexp->NumberOfCapturingGroups();
Array<re2::StringPiece> resSps(argsCount);
resSps.resize(argsCount);
Array<RE2::Arg> args(argsCount);
args.resize(argsCount);
Array<RE2::Arg*> argsPtr(argsCount);
{ // scope
auto resSp = resSps.begin();
for (auto& arg : args)
{
arg = resSp++;
argsPtr.push(&arg);
}
}
if (RE2::FullMatchN(sp, *regexp.get(), argsPtr.begin(), argsCount))
{
matchPosArray->clear();
for (const auto resSp : resSps)
{
matchPosArray->push(MatchPos{
static_cast<unsigned>(resSp.data() - sp.begin()),
static_cast<unsigned>(resSp.length())
});
}
return true;
}
else
return false;
}
else
return RE2::FullMatch(sp, *regexp.get());
}
//---------------------
SubstringSimilarRegex::SubstringSimilarRegex(MemoryPool& pool, bool caseInsensitive,
const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen)
: PermanentStorage(pool)
{
SubstringSimilarCompiler compiler(pool, regexp,
(caseInsensitive ? FLAG_CASE_INSENSITIVE : 0),
patternStr, patternLen, escapeStr, escapeLen);
}
bool SubstringSimilarRegex::matches(const char* buffer, unsigned bufferLen,
unsigned* resultStart, unsigned* resultLength)
{
re2::StringPiece sp(buffer, bufferLen);
re2::StringPiece spResult;
if (RE2::FullMatch(sp, *regexp.get(), nullptr, &spResult, nullptr))
{
*resultStart = spResult.begin() - buffer;
*resultLength = spResult.length();
return true;
}
else
return false;
}
} // namespace Firebird

View File

@ -0,0 +1,75 @@
/*
* The contents of this file are subject to the Initial
* Developer's Public License Version 1.0 (the "License");
* you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
*
* Software distributed under the License is distributed AS IS,
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
* See the License for the specific language governing rights
* and limitations under the License.
*
* The Original Code was created by Adriano dos Santos Fernandes
* for the Firebird Open Source RDBMS project.
*
* Copyright (c) 2019 Adriano dos Santos Fernandes <adrianosf at gmail.com>
* and all contributors signed below.
*
*/
#ifndef COMMON_SIMILAR_TO_REGEX_H
#define COMMON_SIMILAR_TO_REGEX_H
#include "firebird.h"
#include <re2/re2.h>
#include "../common/classes/auto.h"
#include "../common/classes/array.h"
#include "../common/classes/fb_string.h"
namespace Firebird {
//// FIXME: Leak re2::RE2 when destroyed by pool.
class SimilarToRegex : public PermanentStorage
{
public:
struct MatchPos
{
unsigned start;
unsigned length;
};
public:
SimilarToRegex(MemoryPool& pool, bool caseInsensitive,
const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen);
public:
bool matches(const char* buffer, unsigned bufferLen, Array<MatchPos>* matchPosArray = nullptr);
private:
AutoPtr<re2::RE2> regexp;
};
//// FIXME: Leak re2::RE2 when destroyed by pool.
// Given a regular expression R1<escape>#R2#<escape>R3 and the string S:
// - Find the shortest substring of S that matches R1 while the remainder (S23) matches R2R3;
// - Find the longest (S2) substring of S23 that matches R2 while the remainder matches R3;
// - Return S2.
class SubstringSimilarRegex : public PermanentStorage
{
public:
SubstringSimilarRegex(MemoryPool& pool, bool caseInsensitive,
const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen);
public:
bool matches(const char* buffer, unsigned bufferLen, unsigned* resultStart, unsigned* resultLength);
private:
AutoPtr<re2::RE2> regexp;
};
} // namespace Firebird
#endif // COMMON_SIMILAR_TO_REGEX_H

View File

@ -169,33 +169,6 @@ TextType::TextType(TTYPE_ID _type, texttype *_tt, CharSet* _cs)
memset(&canonicalChars[conversions[i].ch], 0, sizeof(ULONG));
}
}
struct Conversion2
{
const char* str;
UCHAR* buffer;
};
const Conversion2 conversions2[] =
{
{"0123456789", reinterpret_cast<UCHAR*>(canonicalNumbers)},
{"abcdefghijklmnopqrstuvwxyz", reinterpret_cast<UCHAR*>(canonicalLowerLetters)},
{"ABCDEFGHIJKLMNOPQRSTUVWXYZ", reinterpret_cast<UCHAR*>(canonicalUpperLetters)},
{" \t\v\r\n\f", reinterpret_cast<UCHAR*>(canonicalWhiteSpaces)}
};
for (int i = 0; i < FB_NELEM(conversions2); i++)
{
UCHAR temp[sizeof(ULONG)];
for (const char* p = conversions2[i].str; *p; ++p)
{
USHORT code = static_cast<USHORT>(*p);
ULONG length = getCharSet()->getConvFromUnicode().convert(sizeof(code), &code, sizeof(temp), temp);
const size_t pos = (p - conversions2[i].str) * getCanonicalWidth();
canonical(length, temp, sizeof(ULONG), &conversions2[i].buffer[pos]);
}
}
}

View File

@ -138,47 +138,8 @@ public:
return reinterpret_cast<const UCHAR*>(&canonicalChars[ch]);
}
const UCHAR* getCanonicalNumbers(int* count = NULL) const
{
if (count)
*count = 10;
return reinterpret_cast<const UCHAR*>(canonicalNumbers);
}
const UCHAR* getCanonicalLowerLetters(int* count = NULL) const
{
if (count)
*count = 26;
return reinterpret_cast<const UCHAR*>(canonicalLowerLetters);
}
const UCHAR* getCanonicalUpperLetters(int* count = NULL) const
{
if (count)
*count = 26;
return reinterpret_cast<const UCHAR*>(canonicalUpperLetters);
}
const UCHAR* getCanonicalWhiteSpaces(int* count = NULL) const
{
if (count)
*count = 6;
return reinterpret_cast<const UCHAR*>(canonicalWhiteSpaces);
}
const UCHAR* getCanonicalSpace(int* count = NULL) const
{
if (count)
*count = 1;
return getCanonicalChar(CHAR_SPACE);
}
private:
ULONG canonicalChars[CHAR_COUNT];
ULONG canonicalNumbers[10];
ULONG canonicalLowerLetters[26];
ULONG canonicalUpperLetters[26];
ULONG canonicalWhiteSpaces[6];
};
} // namespace Jrd

View File

@ -1031,6 +1031,37 @@ INTL_BOOL UnicodeUtil::utf32WellFormed(ULONG len, const ULONG* str, ULONG* offen
return true; // well-formed
}
void UnicodeUtil::utf8Normalize(UCharBuffer& data)
{
ICU* icu = loadICU("", "");
HalfStaticArray<USHORT, BUFFER_MEDIUM> utf16Buffer(data.getCount());
USHORT errCode;
ULONG errPosition;
ULONG utf16BufferLen = utf8ToUtf16(data.getCount(), data.begin(), data.getCount() * sizeof(USHORT),
utf16Buffer.getBuffer(data.getCount()), &errCode, &errPosition);
UTransliterator* trans = icu->getCiAiTransliterator();
if (trans)
{
const int32_t capacity = utf16Buffer.getCount() * sizeof(USHORT);
int32_t len = utf16BufferLen / sizeof(USHORT);
int32_t limit = len;
UErrorCode errorCode = U_ZERO_ERROR;
icu->utransTransUChars(trans, reinterpret_cast<UChar*>(utf16Buffer.begin()),
&len, capacity, 0, &limit, &errorCode);
icu->releaseCiAiTransliterator(trans);
len = utf16ToUtf8(utf16BufferLen, utf16Buffer.begin(),
len * 4, data.getBuffer(len * 4, false),
&errCode, &errPosition);
data.shrink(len);
}
}
UnicodeUtil::ICU* UnicodeUtil::loadICU(const string& icuVersion, const string& configInfo)
{
ObjectsArray<string> versions;

View File

@ -177,6 +177,8 @@ public:
static INTL_BOOL utf16WellFormed(ULONG len, const USHORT* str, ULONG* offending_position);
static INTL_BOOL utf32WellFormed(ULONG len, const ULONG* str, ULONG* offending_position);
static void utf8Normalize(Firebird::UCharBuffer& data);
static ConversionICU& getConversionICU();
static ICU* loadICU(const Firebird::string& icuVersion, const Firebird::string& configInfo);
static bool getCollVersion(const Firebird::string& icuVersion,

View File

@ -945,7 +945,7 @@ bool ComparativeBoolNode::stringBoolean(thread_db* tdbb, jrd_req* request, dsc*
else // nod_similar
{
impure->vlu_misc.vlu_invariant = evaluator = obj->createSimilarToMatcher(
*tdbb->getDefaultPool(), p2, l2, escape_str, escape_length);
tdbb, *tdbb->getDefaultPool(), p2, l2, escape_str, escape_length);
}
}
else
@ -961,7 +961,7 @@ bool ComparativeBoolNode::stringBoolean(thread_db* tdbb, jrd_req* request, dsc*
}
else // nod_similar
{
evaluator = obj->createSimilarToMatcher(*tdbb->getDefaultPool(),
evaluator = obj->createSimilarToMatcher(tdbb, *tdbb->getDefaultPool(),
p2, l2, escape_str, escape_length);
}
@ -1152,7 +1152,7 @@ bool ComparativeBoolNode::stringFunction(thread_db* tdbb, jrd_req* request,
else // nod_similar
{
impure->vlu_misc.vlu_invariant = evaluator = obj->createSimilarToMatcher(
*tdbb->getDefaultPool(), p2, l2, escape_str, escape_length);
tdbb, *tdbb->getDefaultPool(), p2, l2, escape_str, escape_length);
}
}
else
@ -1170,7 +1170,7 @@ bool ComparativeBoolNode::stringFunction(thread_db* tdbb, jrd_req* request,
return obj->like(*tdbb->getDefaultPool(), p1, l1, p2, l2, escape_str, escape_length);
// nod_similar
return obj->similarTo(*tdbb->getDefaultPool(), p1, l1, p2, l2, escape_str, escape_length);
return obj->similarTo(tdbb, *tdbb->getDefaultPool(), p1, l1, p2, l2, escape_str, escape_length);
}
// Handle MATCHES

View File

@ -11873,7 +11873,7 @@ dsc* SubstringSimilarNode::execute(thread_db* tdbb, jrd_req* request) const
delete impure->vlu_misc.vlu_invariant;
impure->vlu_misc.vlu_invariant = evaluator = collation->createSubstringSimilarMatcher(
*tdbb->getDefaultPool(), patternStr, patternLen, escapeStr, escapeLen);
tdbb, *tdbb->getDefaultPool(), patternStr, patternLen, escapeStr, escapeLen);
impure->vlu_flags |= VLU_computed;
}
@ -11885,7 +11885,7 @@ dsc* SubstringSimilarNode::execute(thread_db* tdbb, jrd_req* request) const
}
else
{
autoEvaluator = evaluator = collation->createSubstringSimilarMatcher(*tdbb->getDefaultPool(),
autoEvaluator = evaluator = collation->createSubstringSimilarMatcher(tdbb, *tdbb->getDefaultPool(),
patternStr, patternLen, escapeStr, escapeLen);
}

View File

@ -99,16 +99,177 @@
#include "../jrd/intl_classes.h"
#include "../jrd/lck_proto.h"
#include "../jrd/intl_classes.h"
#include "../jrd/intl_proto.h"
#include "../jrd/Collation.h"
#include "../common/TextType.h"
#include "../common/SimilarToRegex.h"
#include "../jrd/SimilarToMatcher.h"
using namespace Firebird;
using namespace Jrd;
namespace {
//// TODO: NONE / OCTETS.
class Re2SimilarMatcher : public PatternMatcher
{
public:
Re2SimilarMatcher(thread_db* tdbb, MemoryPool& pool, TextType* textType,
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
: PatternMatcher(pool, textType),
buffer(pool)
{
CsConvert converter = INTL_convert_lookup(tdbb, CS_UTF8, textType->getCharSet()->getId());
UCharBuffer patternBuffer, escapeBuffer;
converter.convert(patternLen, patternStr, patternBuffer);
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
UnicodeUtil::utf8Normalize(patternBuffer);
if (escapeStr)
{
converter.convert(escapeLen, escapeStr, escapeBuffer);
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
UnicodeUtil::utf8Normalize(escapeBuffer);
}
regex = FB_NEW_POOL(pool) SimilarToRegex(pool,
(textType->getFlags() & TEXTTYPE_ATTR_CASE_INSENSITIVE),
(const char*) patternBuffer.begin(), patternBuffer.getCount(),
(escapeStr ? (const char*) escapeBuffer.begin() : nullptr), escapeBuffer.getCount());
}
public:
static Re2SimilarMatcher* create(thread_db* tdbb, MemoryPool& pool, TextType* textType,
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
{
return FB_NEW_POOL(pool) Re2SimilarMatcher(tdbb, pool, textType, patternStr, patternLen, escapeStr, escapeLen);
}
static bool evaluate(thread_db* tdbb, MemoryPool& pool, TextType* textType, const UCHAR* str, SLONG strLen,
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
{
Re2SimilarMatcher matcher(tdbb, pool, textType, patternStr, patternLen, escapeStr, escapeLen);
matcher.process(str, strLen);
return matcher.result();
}
public:
virtual void reset()
{
buffer.shrink(0);
}
virtual bool process(const UCHAR* data, SLONG dataLen)
{
const FB_SIZE_T pos = buffer.getCount();
memcpy(buffer.getBuffer(pos + dataLen) + pos, data, dataLen);
return true;
}
virtual bool result()
{
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
UnicodeUtil::utf8Normalize(buffer);
return regex->matches((const char*) buffer.begin(), buffer.getCount());
}
private:
AutoPtr<SimilarToRegex> regex;
UCharBuffer buffer;
};
class Re2SubstringSimilarMatcher : public BaseSubstringSimilarMatcher
{
public:
Re2SubstringSimilarMatcher(thread_db* tdbb, MemoryPool& pool, TextType* textType,
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
: BaseSubstringSimilarMatcher(pool, textType),
buffer(pool),
resultStart(0),
resultLength(0)
{
CsConvert converter = INTL_convert_lookup(tdbb, textType->getCharSet()->getId(), CS_UTF8);
UCharBuffer patternBuffer, escapeBuffer;
converter.convert(patternLen, patternStr, patternBuffer);
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
UnicodeUtil::utf8Normalize(patternBuffer);
if (escapeStr)
{
converter.convert(escapeLen, escapeStr, escapeBuffer);
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
UnicodeUtil::utf8Normalize(escapeBuffer);
}
regex = FB_NEW_POOL(pool) SubstringSimilarRegex(pool,
(textType->getFlags() & TEXTTYPE_ATTR_CASE_INSENSITIVE),
(const char*) patternBuffer.begin(), patternBuffer.getCount(),
(escapeStr ? (const char*) escapeBuffer.begin() : nullptr), escapeBuffer.getCount());
}
virtual ~Re2SubstringSimilarMatcher()
{
}
public:
static Re2SubstringSimilarMatcher* create(thread_db* tdbb, MemoryPool& pool, TextType* textType,
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
{
return FB_NEW_POOL(pool) Re2SubstringSimilarMatcher(tdbb, pool, textType,
patternStr, patternLen, escapeStr, escapeLen);
}
static bool evaluate(thread_db* tdbb, MemoryPool& pool, TextType* textType, const UCHAR* str, SLONG strLen,
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
{
Re2SubstringSimilarMatcher matcher(tdbb, pool, textType, patternStr, patternLen, escapeStr, escapeLen);
matcher.process(str, strLen);
return matcher.result();
}
public:
virtual void reset()
{
buffer.shrink(0);
resultStart = resultLength = 0;
}
virtual bool process(const UCHAR* data, SLONG dataLen)
{
const FB_SIZE_T pos = buffer.getCount();
memcpy(buffer.getBuffer(pos + dataLen) + pos, data, dataLen);
return true;
}
virtual bool result()
{
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
UnicodeUtil::utf8Normalize(buffer);
return regex->matches((const char*) buffer.begin(), buffer.getCount(), &resultStart, &resultLength);
}
virtual void getResultInfo(unsigned* start, unsigned* length)
{
*start = resultStart;
*length = resultLength;
}
private:
AutoPtr<SubstringSimilarRegex> regex;
UCharBuffer buffer;
unsigned resultStart, resultLength;
};
// constants used in matches and sleuth
const int CHAR_GDML_MATCH_ONE = TextType::CHAR_QUESTION_MARK;
const int CHAR_GDML_MATCH_ANY = TextType::CHAR_ASTERISK;
@ -725,8 +886,6 @@ template <
typename pStartsMatcher,
typename pContainsMatcher,
typename pLikeMatcher,
typename pSimilarToMatcher,
typename pSubstringSimilarMatcher,
typename pMatchesMatcher,
typename pSleuthMatcher
>
@ -781,22 +940,22 @@ public:
getCharSet()->getSqlMatchOne(), getCharSet()->getSqlMatchOneLength());
}
virtual bool similarTo(MemoryPool& pool, const UCHAR* s, SLONG sl,
virtual bool similarTo(thread_db* tdbb, MemoryPool& pool, const UCHAR* s, SLONG sl,
const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen)
{
return pSimilarToMatcher::evaluate(pool, this, s, sl, p, pl, escape, escapeLen);
return Re2SimilarMatcher::evaluate(tdbb, pool, this, s, sl, p, pl, escape, escapeLen);
}
virtual PatternMatcher* createSimilarToMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl,
virtual PatternMatcher* createSimilarToMatcher(thread_db* tdbb, MemoryPool& pool, const UCHAR* p, SLONG pl,
const UCHAR* escape, SLONG escapeLen)
{
return pSimilarToMatcher::create(pool, this, p, pl, escape, escapeLen);
return Re2SimilarMatcher::create(tdbb, pool, this, p, pl, escape, escapeLen);
}
virtual BaseSubstringSimilarMatcher* createSubstringSimilarMatcher(MemoryPool& pool,
virtual BaseSubstringSimilarMatcher* createSubstringSimilarMatcher(thread_db* tdbb, MemoryPool& pool,
const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen)
{
return pSubstringSimilarMatcher::create(pool, this, p, pl, escape, escapeLen);
return Re2SubstringSimilarMatcher::create(tdbb, pool, this, p, pl, escape, escapeLen);
}
virtual bool contains(MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl)
@ -823,8 +982,6 @@ Collation* newCollation(MemoryPool& pool, TTYPE_ID id, texttype* tt, CharSet* cs
StartsMatcherUCharDirect,
ContainsMatcherUCharDirect,
LikeMatcher<T>,
SimilarToMatcher<T>,
SubstringSimilarMatcher<T>,
MatchesMatcher<T>,
SleuthMatcher<T>
> DirectImpl;
@ -833,8 +990,6 @@ Collation* newCollation(MemoryPool& pool, TTYPE_ID id, texttype* tt, CharSet* cs
StartsMatcherUCharCanonical,
ContainsMatcher<T>,
LikeMatcher<T>,
SimilarToMatcher<T>,
SubstringSimilarMatcher<T>,
MatchesMatcher<T>,
SleuthMatcher<T>
> NonDirectImpl;

View File

@ -66,12 +66,12 @@ public:
virtual PatternMatcher* createLikeMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl,
const UCHAR* escape, SLONG escapeLen) = 0;
virtual bool similarTo(MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl,
virtual bool similarTo(thread_db* tdbb, MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl,
const UCHAR* escape, SLONG escapeLen) = 0;
virtual PatternMatcher* createSimilarToMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl,
virtual PatternMatcher* createSimilarToMatcher(thread_db* tdbb, MemoryPool& pool, const UCHAR* p, SLONG pl,
const UCHAR* escape, SLONG escapeLen) = 0;
virtual BaseSubstringSimilarMatcher* createSubstringSimilarMatcher(MemoryPool& pool,
virtual BaseSubstringSimilarMatcher* createSubstringSimilarMatcher(thread_db* tdbb, MemoryPool& pool,
const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen) = 0;
virtual bool contains(MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl) = 0;

View File

@ -654,6 +654,7 @@ bool IntlManager::lookupCollation(const string& collationName,
attributes, specificAttributes, specificAttributesLen, ignoreAttributes,
collationExternalInfo.configInfo.c_str()))
{
tt->texttype_flags = attributes;
return true;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -87,23 +87,12 @@ public:
UpcaseConverter(MemoryPool& pool, TextType* obj, const UCHAR*& str, SLONG& len)
: PrevConverter(pool, obj, str, len)
{
if (len > (int) sizeof(tempBuffer))
out_str = FB_NEW_POOL(pool) UCHAR[len];
else
out_str = tempBuffer;
obj->str_to_upper(len, str, len, out_str);
str = out_str;
}
~UpcaseConverter()
{
if (out_str != tempBuffer)
delete[] out_str;
obj->str_to_upper(len, str, len, tempBuffer.getBuffer(len, false));
str = tempBuffer.begin();
}
private:
UCHAR tempBuffer[100];
UCHAR* out_str;
Firebird::UCharBuffer tempBuffer;
};
template <typename PrevConverter = NullStrConverter>
@ -115,29 +104,17 @@ public:
{
const SLONG out_len = len / obj->getCharSet()->minBytesPerChar() * obj->getCanonicalWidth();
if (out_len > (int) sizeof(tempBuffer))
out_str = FB_NEW_POOL(pool) UCHAR[out_len];
else
out_str = tempBuffer;
if (str)
{
len = obj->canonical(len, str, out_len, out_str) * obj->getCanonicalWidth();
str = out_str;
len = obj->canonical(len, str, out_len, tempBuffer.getBuffer(out_len, false)) * obj->getCanonicalWidth();
str = tempBuffer.begin();
}
else
len = 0;
}
~CanonicalConverter()
{
if (out_str != tempBuffer)
delete[] out_str;
}
private:
UCHAR tempBuffer[100];
UCHAR* out_str;
Firebird::UCharBuffer tempBuffer;
};
} // namespace Jrd

View File

@ -50,50 +50,23 @@ TableMatcher::TableMatcher(MemoryPool& pool,
const string& excludeFilter)
: m_tables(pool)
{
m_cs = FB_NEW_POOL(pool) charset;
m_tt = FB_NEW_POOL(pool) texttype;
IntlUtil::initUtf8Charset(m_cs);
string collAttributes("ICU-VERSION=");
collAttributes += Jrd::UnicodeUtil::getDefaultIcuVersion();
IntlUtil::setupIcuAttributes(m_cs, collAttributes, "", collAttributes);
UCharBuffer collAttributesBuffer;
collAttributesBuffer.push(reinterpret_cast<const UCHAR*>(collAttributes.c_str()),
collAttributes.length());
if (!IntlUtil::initUnicodeCollation(m_tt, m_cs, "UNICODE", 0, collAttributesBuffer, ""))
raiseError("Cannot initialize UNICODE collation");
m_charSet = CharSet::createInstance(pool, 0, m_cs);
m_textType = FB_NEW_POOL(pool) TextType(0, m_tt, m_charSet);
if (includeFilter.hasData())
{
m_includeMatcher.reset(FB_NEW_POOL(pool) SimilarMatcher(
pool, m_textType,
(const UCHAR*) includeFilter.c_str(),
includeFilter.length(),
'\\', true));
m_includeMatcher.reset(FB_NEW_POOL(pool) SimilarToRegex(
pool, true,
includeFilter.c_str(), includeFilter.length(),
"\\", 1));
}
if (excludeFilter.hasData())
{
m_excludeMatcher.reset(FB_NEW_POOL(pool) SimilarMatcher(
pool, m_textType,
(const UCHAR*) excludeFilter.c_str(),
excludeFilter.length(),
'\\', true));
m_excludeMatcher.reset(FB_NEW_POOL(pool) SimilarToRegex(
pool, true,
excludeFilter.c_str(), excludeFilter.length(),
"\\", 1));
}
}
TableMatcher::~TableMatcher()
{
if (m_tt && m_tt->texttype_fn_destroy)
m_tt->texttype_fn_destroy(m_tt);
}
bool TableMatcher::matchTable(const MetaName& tableName)
{
try
@ -104,18 +77,10 @@ bool TableMatcher::matchTable(const MetaName& tableName)
enabled = true;
if (m_includeMatcher)
{
m_includeMatcher->reset();
m_includeMatcher->process((const UCHAR*) tableName.c_str(), tableName.length());
enabled = m_includeMatcher->result();
}
enabled = m_includeMatcher->matches(tableName.c_str(), tableName.length());
if (enabled && m_excludeMatcher)
{
m_excludeMatcher->reset();
m_excludeMatcher->process((const UCHAR*) tableName.c_str(), tableName.length());
enabled = !m_excludeMatcher->result();
}
enabled = !m_excludeMatcher->matches(tableName.c_str(), tableName.length());
m_tables.put(tableName, enabled);
}

View File

@ -26,9 +26,9 @@
#include "../common/classes/array.h"
#include "../common/classes/semaphore.h"
#include "../common/SimilarToRegex.h"
#include "../common/os/guid.h"
#include "../common/isc_s_proto.h"
#include "../../jrd/SimilarToMatcher.h"
#include "../../jrd/intl_classes.h"
#include "Config.h"
@ -38,25 +38,18 @@ namespace Replication
{
class TableMatcher
{
typedef Jrd::UpcaseConverter<Jrd::NullStrConverter> SimilarConverter;
typedef Firebird::SimilarToMatcher<UCHAR, SimilarConverter> SimilarMatcher;
typedef Firebird::GenericMap<Firebird::Pair<Firebird::Left<Firebird::MetaName, bool> > > TablePermissionMap;
public:
TableMatcher(MemoryPool& pool,
const Firebird::string& includeFilter,
const Firebird::string& excludeFilter);
~TableMatcher();
bool matchTable(const Firebird::MetaName& tableName);
private:
charset* m_cs;
Firebird::AutoPtr<texttype> m_tt;
Firebird::AutoPtr<Jrd::CharSet> m_charSet;
Firebird::AutoPtr<Jrd::TextType> m_textType;
Firebird::AutoPtr<SimilarMatcher> m_includeMatcher;
Firebird::AutoPtr<SimilarMatcher> m_excludeMatcher;
Firebird::AutoPtr<Firebird::SimilarToRegex> m_includeMatcher;
Firebird::AutoPtr<Firebird::SimilarToRegex> m_excludeMatcher;
TablePermissionMap m_tables;
};

View File

@ -570,7 +570,6 @@ VI. ADDITIONAL NOTES
#include "../common/db_alias.h"
#include "../jrd/intl_proto.h"
#include "../jrd/lck_proto.h"
#include "../jrd/Collation.h"
#ifdef DEBUG_VAL_VERBOSE
#include "../jrd/dmp_proto.h"
@ -592,18 +591,21 @@ static void print_rhd(USHORT, const rhd*);
#endif
static PatternMatcher* createPatternMatcher(thread_db* tdbb, const char* pattern)
static SimilarToRegex* createPatternMatcher(thread_db* tdbb, const char* pattern)
{
PatternMatcher* matcher = NULL;
SimilarToRegex* matcher = NULL;
try
{
if (pattern)
{
const int len = strlen(pattern);
Collation* obj = INTL_texttype_lookup(tdbb, CS_UTF8);
matcher = obj->createSimilarToMatcher(*tdbb->getDefaultPool(),
(const UCHAR*) pattern, len, (UCHAR*) "\\", 1);
//// TODO: Should this be different than trace and replication
//// and use case sensitive matcher?
matcher = FB_NEW_POOL(*tdbb->getDefaultPool()) SimilarToRegex(
*tdbb->getDefaultPool(), false,
pattern, len,
"\\", 1);
}
}
catch (const Exception& ex)
@ -870,8 +872,6 @@ Validation::Validation(thread_db* tdbb, UtilSvc* uSvc) :
vdr_page_bitmap = NULL;
vdr_service = uSvc;
vdr_tab_incl = vdr_tab_excl = NULL;
vdr_idx_incl = vdr_idx_excl = NULL;
vdr_lock_tout = -10;
if (uSvc) {
@ -882,11 +882,6 @@ Validation::Validation(thread_db* tdbb, UtilSvc* uSvc) :
Validation::~Validation()
{
delete vdr_tab_incl;
delete vdr_tab_excl;
delete vdr_idx_incl;
delete vdr_idx_excl;
output("Validation finished\n");
}
@ -1654,22 +1649,14 @@ void Validation::walk_database()
if (vdr_tab_incl)
{
vdr_tab_incl->reset();
if (!vdr_tab_incl->process((UCHAR*) relation->rel_name.c_str(), relation->rel_name.length()) ||
!vdr_tab_incl->result())
{
if (!vdr_tab_incl->matches(relation->rel_name.c_str(), relation->rel_name.length()))
continue;
}
}
if (vdr_tab_excl)
{
vdr_tab_excl->reset();
if (!vdr_tab_excl->process((UCHAR*) relation->rel_name.c_str(), relation->rel_name.length()) ||
vdr_tab_excl->result())
{
if (vdr_tab_excl->matches(relation->rel_name.c_str(), relation->rel_name.length()))
continue;
}
}
// We can't realiable track double allocated page's when validating online.
@ -3163,15 +3150,13 @@ Validation::RTN Validation::walk_root(jrd_rel* relation)
if (vdr_idx_incl)
{
vdr_idx_incl->reset();
if (!vdr_idx_incl->process((UCHAR*) index.c_str(), index.length()) || !vdr_idx_incl->result())
if (!vdr_idx_incl->matches(relation->rel_name.c_str(), relation->rel_name.length()))
continue;
}
if (vdr_idx_excl)
{
vdr_idx_excl->reset();
if (!vdr_idx_excl->process((UCHAR*) index.c_str(), index.length()) || vdr_idx_excl->result())
if (vdr_idx_excl->matches(relation->rel_name.c_str(), relation->rel_name.length()))
continue;
}

View File

@ -28,6 +28,7 @@
#include "fb_types.h"
#include "../common/classes/array.h"
#include "../common/SimilarToRegex.h"
#include "../jrd/ods.h"
#include "../jrd/cch.h"
#include "../jrd/sbm.h"
@ -150,10 +151,10 @@ private:
ULONG vdr_err_counts[VAL_MAX_ERROR];
Firebird::UtilSvc* vdr_service;
PatternMatcher* vdr_tab_incl;
PatternMatcher* vdr_tab_excl;
PatternMatcher* vdr_idx_incl;
PatternMatcher* vdr_idx_excl;
Firebird::AutoPtr<Firebird::SimilarToRegex> vdr_tab_incl;
Firebird::AutoPtr<Firebird::SimilarToRegex> vdr_tab_excl;
Firebird::AutoPtr<Firebird::SimilarToRegex> vdr_idx_incl;
Firebird::AutoPtr<Firebird::SimilarToRegex> vdr_idx_excl;
int vdr_lock_tout;
void checkDPinPP(jrd_rel *relation, SLONG page_number);
void checkDPinPIP(jrd_rel *relation, SLONG page_number);

View File

@ -46,7 +46,6 @@ set(fbtrace_src
ntrace/TraceConfiguration.cpp
ntrace/traceplugin.cpp
ntrace/TracePluginImpl.cpp
ntrace/TraceUnicodeUtils.cpp
ntrace/os/platform.h
)
@ -70,11 +69,11 @@ if (WIN32)
set(instreg_src
install/install_reg.cpp
install/registry.cpp
install/registry.h
install/regis_proto.h
)
add_executable (instreg ${instreg_src} ${VERSION_RC})
add_executable (instreg ${instreg_src} ${VERSION_RC})
###########################################################################
# EXECUTABLE instsvc
@ -86,7 +85,7 @@ if (WIN32)
install/servi_proto.h
)
add_executable (instsvc ${instsvc_src} ${VERSION_RC})
target_link_libraries (instsvc common yvalve)
target_link_libraries (instsvc common yvalve)
###########################################################################
# EXECUTABLE instclient

View File

@ -26,9 +26,7 @@
*/
#include "TraceConfiguration.h"
#include "TraceUnicodeUtils.h"
#include "../../jrd/evl_string.h"
#include "../../jrd/SimilarToMatcher.h"
#include "../../common/SimilarToRegex.h"
#include "../../common/isc_f_proto.h"
using namespace Firebird;
@ -67,26 +65,6 @@ void TraceCfgReader::readTraceConfiguration(const char* text,
}
namespace
{
template <typename PrevConverter = Jrd::NullStrConverter>
class SystemToUtf8Converter : public PrevConverter
{
public:
SystemToUtf8Converter(MemoryPool& pool, Jrd::TextType* obj, const UCHAR*& str, SLONG& len)
: PrevConverter(pool, obj, str, len)
{
buffer.assign(reinterpret_cast<const char*>(str), len);
ISC_systemToUtf8(buffer);
str = reinterpret_cast<const UCHAR*>(buffer.c_str());
len = buffer.length();
}
private:
string buffer;
};
}
#define ERROR_PREFIX "error while parsing trace configuration\n\t"
void TraceCfgReader::readConfig()
@ -156,31 +134,28 @@ void TraceCfgReader::readConfig()
try
{
#ifdef WIN_NT // !CASE_SENSITIVITY
typedef Jrd::UpcaseConverter<SystemToUtf8Converter<> > SimilarConverter;
const bool caseInsensitive = true;
#else
typedef SystemToUtf8Converter<> SimilarConverter;
const bool caseInsensitive = false;
#endif
string utf8Pattern = pattern;
ISC_systemToUtf8(utf8Pattern);
UnicodeCollationHolder unicodeCollation(*getDefaultMemoryPool());
Jrd::TextType* textType = unicodeCollation.getTextType();
SimilarToMatcher<ULONG, Jrd::CanonicalConverter<SimilarConverter> > matcher(
*getDefaultMemoryPool(), textType, (const UCHAR*) pattern.c_str(),
pattern.length(), '\\', true);
SimilarToRegex matcher(*getDefaultMemoryPool(), caseInsensitive,
utf8Pattern.c_str(), utf8Pattern.length(), "\\", 1);
regExpOk = true;
matcher.process((const UCHAR*) m_databaseName.c_str(), m_databaseName.length());
if (matcher.result())
{
for (unsigned i = 0;
i <= matcher.getNumBranches() && i < FB_NELEM(m_subpatterns); ++i)
{
unsigned start, length;
matcher.getBranchInfo(i, &start, &length);
PathName utf8DatabaseName = m_databaseName;
ISC_systemToUtf8(utf8DatabaseName);
Array<SimilarToRegex::MatchPos> matchPosArray;
m_subpatterns[i].start = start;
m_subpatterns[i].end = start + length;
if (matcher.matches(utf8DatabaseName.c_str(), utf8DatabaseName.length(), &matchPosArray))
{
for (unsigned i = 0; i < matchPosArray.getCount() && i < FB_NELEM(m_subpatterns); ++i)
{
m_subpatterns[i].start = matchPosArray[i].start;
m_subpatterns[i].end = matchPosArray[i].start + matchPosArray[i].length;
}
match = exactMatch = true;

View File

@ -99,7 +99,6 @@ TracePluginImpl::TracePluginImpl(IPluginBase* plugin,
transactions(getDefaultMemoryPool()),
statements(getDefaultMemoryPool()),
services(getDefaultMemoryPool()),
unicodeCollation(*getDefaultMemoryPool()),
include_codes(*getDefaultMemoryPool()),
exclude_codes(*getDefaultMemoryPool())
{
@ -124,8 +123,6 @@ TracePluginImpl::TracePluginImpl(IPluginBase* plugin,
logWriter->addRef();
}
Jrd::TextType* textType = unicodeCollation.getTextType();
// Compile filtering regular expressions
const char* str = NULL;
try
@ -136,9 +133,10 @@ TracePluginImpl::TracePluginImpl(IPluginBase* plugin,
string filter(config.include_filter);
ISC_systemToUtf8(filter);
include_matcher = FB_NEW TraceSimilarToMatcher(
*getDefaultMemoryPool(), textType, (const UCHAR*) filter.c_str(),
filter.length(), '\\', true);
include_matcher = FB_NEW SimilarToRegex(
*getDefaultMemoryPool(), true,
filter.c_str(), filter.length(),
"\\", 1);
}
if (config.exclude_filter.hasData())
@ -147,9 +145,10 @@ TracePluginImpl::TracePluginImpl(IPluginBase* plugin,
string filter(config.exclude_filter);
ISC_systemToUtf8(filter);
exclude_matcher = FB_NEW TraceSimilarToMatcher(
*getDefaultMemoryPool(), textType, (const UCHAR*) filter.c_str(),
filter.length(), '\\', true);
exclude_matcher = FB_NEW SimilarToRegex(
*getDefaultMemoryPool(), true,
filter.c_str(), filter.length(),
"\\", 1);
}
}
catch (const Exception&)
@ -1546,18 +1545,10 @@ void TracePluginImpl::register_sql_statement(ITraceSQLStatement* statement)
return;
if (config.include_filter.hasData())
{
include_matcher->reset();
include_matcher->process((const UCHAR*) sql, sql_length);
need_statement = include_matcher->result();
}
need_statement = include_matcher->matches(sql, sql_length);
if (need_statement && config.exclude_filter.hasData())
{
exclude_matcher->reset();
exclude_matcher->process((const UCHAR*) sql, sql_length);
need_statement = !exclude_matcher->result();
}
need_statement = !exclude_matcher->matches(sql, sql_length);
if (need_statement)
{
@ -1949,18 +1940,10 @@ bool TracePluginImpl::checkServiceFilter(ITraceServiceConnection* service, bool
bool enabled = true;
if (config.include_filter.hasData())
{
include_matcher->reset();
include_matcher->process((const UCHAR*) svcName, svcNameLen);
enabled = include_matcher->result();
}
enabled = include_matcher->matches(svcName, svcNameLen);
if (enabled && config.exclude_filter.hasData())
{
exclude_matcher->reset();
exclude_matcher->process((const UCHAR*) svcName, svcNameLen);
enabled = !exclude_matcher->result();
}
enabled = !exclude_matcher->matches(svcName, svcNameLen);
if (data) {
data->enabled = enabled;

View File

@ -32,11 +32,7 @@
#include "firebird.h"
#include "../../jrd/ntrace.h"
#include "TracePluginConfig.h"
#include "TraceUnicodeUtils.h"
#include "../../jrd/intl_classes.h"
#include "../../jrd/evl_string.h"
#include "../../common/TextType.h"
#include "../../jrd/SimilarToMatcher.h"
#include "../../common/SimilarToRegex.h"
#include "../../common/classes/rwlock.h"
#include "../../common/classes/GenericMap.h"
#include "../../common/classes/locks.h"
@ -168,10 +164,7 @@ private:
// Lock for log rotation
Firebird::RWLock renameLock;
UnicodeCollationHolder unicodeCollation;
typedef Firebird::SimilarToMatcher<ULONG, Jrd::UpcaseConverter<Jrd::CanonicalConverter<> > >
TraceSimilarToMatcher;
Firebird::AutoPtr<TraceSimilarToMatcher> include_matcher, exclude_matcher;
Firebird::AutoPtr<Firebird::SimilarToRegex> include_matcher, exclude_matcher;
// Filters for gds error codes
typedef Firebird::SortedArray<ISC_STATUS> GdsCodesArray;

View File

@ -1,65 +0,0 @@
/*
* PROGRAM: Firebird Trace Services
* MODULE: TraceUnicodeUtils.cpp
* DESCRIPTION: Unicode support for trace needs
*
* The contents of this file are subject to the Initial
* Developer's Public License Version 1.0 (the "License");
* you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
*
* Software distributed under the License is distributed AS IS,
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
* See the License for the specific language governing rights
* and limitations under the License.
*
* The Original Code was created by Khorsun Vladyslav
* for the Firebird Open Source RDBMS project.
*
* Copyright (c) 2010 Khorsun Vladyslav <hvlad@users.sourceforge.net>
* and all contributors signed below.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
* Adriano dos Santos Fernandes
*
*/
#include "TraceUnicodeUtils.h"
using namespace Firebird;
UnicodeCollationHolder::UnicodeCollationHolder(MemoryPool& pool)
{
cs = FB_NEW_POOL(pool) charset;
tt = FB_NEW_POOL(pool) texttype;
IntlUtil::initUtf8Charset(cs);
string collAttributes("ICU-VERSION=");
collAttributes += Jrd::UnicodeUtil::getDefaultIcuVersion();
IntlUtil::setupIcuAttributes(cs, collAttributes, "", collAttributes);
UCharBuffer collAttributesBuffer;
collAttributesBuffer.push(reinterpret_cast<const UCHAR*>(collAttributes.c_str()),
collAttributes.length());
if (!IntlUtil::initUnicodeCollation(tt, cs, "UNICODE", 0, collAttributesBuffer, string()))
fatal_exception::raiseFmt("cannot initialize UNICODE collation to use in trace plugin");
charSet = Jrd::CharSet::createInstance(pool, 0, cs);
textType = FB_NEW_POOL(pool) Jrd::TextType(0, tt, charSet);
}
UnicodeCollationHolder::~UnicodeCollationHolder()
{
fb_assert(tt->texttype_fn_destroy);
if (tt->texttype_fn_destroy)
tt->texttype_fn_destroy(tt);
// cs should be deleted by texttype_fn_destroy call above
delete tt;
}

View File

@ -1,57 +0,0 @@
/*
* PROGRAM: Firebird Trace Services
* MODULE: TraceUnicodeUtils.h
* DESCRIPTION: Unicode support for trace needs
*
* The contents of this file are subject to the Initial
* Developer's Public License Version 1.0 (the "License");
* you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
*
* Software distributed under the License is distributed AS IS,
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
* See the License for the specific language governing rights
* and limitations under the License.
*
* The Original Code was created by Khorsun Vladyslav
* for the Firebird Open Source RDBMS project.
*
* Copyright (c) 2010 Khorsun Vladyslav <hvlad@users.sourceforge.net>
* and all contributors signed below.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
*
*/
#ifndef TRACE_UNICODE_UTILS_H
#define TRACE_UNICODE_UTILS_H
#include "firebird.h"
#include "../../common/classes/fb_string.h"
#include "../../jrd/intl_classes.h"
#include "../../common/TextType.h"
#include "../../common/unicode_util.h"
class UnicodeCollationHolder
{
private:
charset* cs;
texttype* tt;
Firebird::AutoPtr<Jrd::CharSet> charSet;
Firebird::AutoPtr<Jrd::TextType> textType;
public:
explicit UnicodeCollationHolder(Firebird::MemoryPool& pool);
~UnicodeCollationHolder();
Jrd::TextType* getTextType()
{
return textType;
}
};
#endif // TRACE_UNICODE_UTILS_H