mirror of
https://github.com/FirebirdSQL/firebird.git
synced 2025-01-22 18:43:02 +01:00
WIP: Reimplementation of SIMILAR TO and SUBSTRING...SIMILAR using Google's re2 library.
This should fix: CORE-4874 CORE-5664 CORE-3858 CORE-6088 CORE-3773 CORE-5931 CORE-6088 CORE-4893
This commit is contained in:
parent
14d41d409e
commit
28e18749ff
@ -307,7 +307,8 @@ endif
|
||||
LIB_PATH_OPTS = $(call LIB_LINK_RPATH,lib) $(call LIB_LINK_RPATH,intl)
|
||||
LIB_LINK_SONAME= -Wl,-soname,$(1)
|
||||
LIB_LINK_MAPFILE= -Wl,--version-script,$(1)
|
||||
FIREBIRD_LIBRARY_LINK= -L$(LIB) -lfbclient $(MATHLIB) $(CRYPTLIB)
|
||||
# FIXME:
|
||||
FIREBIRD_LIBRARY_LINK= -L$(LIB) -lfbclient $(MATHLIB) $(CRYPTLIB) -lre2
|
||||
|
||||
EXE_LINK_OPTIONS= $(LDFLAGS) $(THR_FLAGS) $(UNDEF_FLAGS) $(LIB_PATH_OPTS) $(LINK_EMPTY_SYMBOLS)
|
||||
LIB_LINK_OPTIONS= $(LDFLAGS) $(THR_FLAGS) -shared
|
||||
|
@ -90,6 +90,7 @@ Note:
|
||||
<left paren>, <right paren>, <vertical bar>, <circumflex>, <minus sign>, <plus sign>, <asterisk>,
|
||||
<underscore>, <percent>, <question mark>, <left brace> or <escape character>.
|
||||
|
||||
3) Since FB 4 the repeat factor low/high values could not be greater than 1000.
|
||||
|
||||
Syntax description and examples:
|
||||
|
||||
@ -220,3 +221,36 @@ insert into department values ('600', 'Engineering', '(408) 555-123'); -- check
|
||||
select * from department
|
||||
where phone not similar to '\([0-9]{3}\) 555\-%' escape '\';
|
||||
|
||||
|
||||
Appendice:
|
||||
|
||||
Since FB 4 SIMILAR TO and SUBSTRING...SIMILAR are implemented using the re2 library,
|
||||
which has the following license:
|
||||
|
||||
Copyright (c) 2009 The RE2 Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
@ -2542,15 +2542,11 @@ void BurpGlobals::setupSkipData(const Firebird::string& regexp)
|
||||
ISC_systemToUtf8(filter);
|
||||
|
||||
BurpGlobals* tdgbl = BurpGlobals::getSpecific();
|
||||
if (!unicodeCollation)
|
||||
unicodeCollation = FB_NEW_POOL(tdgbl->getPool()) UnicodeCollationHolder(tdgbl->getPool());
|
||||
|
||||
Jrd::TextType* const textType = unicodeCollation->getTextType();
|
||||
|
||||
skipDataMatcher.reset(FB_NEW_POOL(tdgbl->getPool())
|
||||
Firebird::SimilarToMatcher<UCHAR, Jrd::UpcaseConverter<> >
|
||||
(tdgbl->getPool(), textType, (const UCHAR*) filter.c_str(),
|
||||
filter.length(), '\\', true));
|
||||
skipDataMatcher.reset(FB_NEW_POOL(tdgbl->getPool()) Firebird::SimilarToRegex(
|
||||
tdgbl->getPool(), true,
|
||||
filter.c_str(), filter.length(),
|
||||
"\\", 1));
|
||||
}
|
||||
}
|
||||
catch (const Firebird::Exception&)
|
||||
@ -2571,18 +2567,12 @@ Firebird::string BurpGlobals::toSystem(const Firebird::PathName& from)
|
||||
bool BurpGlobals::skipRelation(const char* name)
|
||||
{
|
||||
if (gbl_sw_meta)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!skipDataMatcher)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
skipDataMatcher->reset();
|
||||
skipDataMatcher->process(reinterpret_cast<const UCHAR*>(name), static_cast<SLONG>(strlen(name)));
|
||||
return skipDataMatcher->result();
|
||||
return skipDataMatcher->matches(name, strlen(name));
|
||||
}
|
||||
|
||||
void BurpGlobals::read_stats(SINT64* stats)
|
||||
@ -2703,39 +2693,6 @@ void BurpGlobals::print_stats_header()
|
||||
burp_output(false, "\n");
|
||||
}
|
||||
|
||||
UnicodeCollationHolder::UnicodeCollationHolder(MemoryPool& pool)
|
||||
{
|
||||
cs = FB_NEW_POOL(pool) charset;
|
||||
tt = FB_NEW_POOL(pool) texttype;
|
||||
|
||||
Firebird::IntlUtil::initUtf8Charset(cs);
|
||||
|
||||
Firebird::string collAttributes("ICU-VERSION=");
|
||||
collAttributes += Jrd::UnicodeUtil::getDefaultIcuVersion();
|
||||
Firebird::IntlUtil::setupIcuAttributes(cs, collAttributes, "", collAttributes);
|
||||
|
||||
Firebird::UCharBuffer collAttributesBuffer;
|
||||
collAttributesBuffer.push(reinterpret_cast<const UCHAR*>(collAttributes.c_str()),
|
||||
collAttributes.length());
|
||||
|
||||
if (!Firebird::IntlUtil::initUnicodeCollation(tt, cs, "UNICODE", 0, collAttributesBuffer, Firebird::string()))
|
||||
Firebird::fatal_exception::raiseFmt("cannot initialize UNICODE collation to use in gbak");
|
||||
|
||||
charSet = Jrd::CharSet::createInstance(pool, 0, cs);
|
||||
textType = FB_NEW_POOL(pool) Jrd::TextType(0, tt, charSet);
|
||||
}
|
||||
|
||||
UnicodeCollationHolder::~UnicodeCollationHolder()
|
||||
{
|
||||
fb_assert(tt->texttype_fn_destroy);
|
||||
|
||||
if (tt->texttype_fn_destroy)
|
||||
tt->texttype_fn_destroy(tt);
|
||||
|
||||
// cs should be deleted by texttype_fn_destroy call above
|
||||
delete tt;
|
||||
}
|
||||
|
||||
void BURP_makeSymbol(BurpGlobals* tdgbl, Firebird::string& name) // add double quotes to string
|
||||
{
|
||||
if (tdgbl->gbl_dialect < SQL_DIALECT_V6)
|
||||
|
@ -42,7 +42,7 @@
|
||||
#include "../common/classes/array.h"
|
||||
#include "../common/classes/fb_pair.h"
|
||||
#include "../common/classes/MetaName.h"
|
||||
#include "../jrd/SimilarToMatcher.h"
|
||||
#include "../common/SimilarToRegex.h"
|
||||
#include "../common/status.h"
|
||||
#include "../common/sha.h"
|
||||
#include "../common/classes/ImplementHelper.h"
|
||||
@ -894,26 +894,6 @@ static const char HDR_SPLIT_TAG6[] = "InterBase/gbak, ";
|
||||
const FB_UINT64 MIN_SPLIT_SIZE = FB_CONST64(2048); // bytes
|
||||
|
||||
|
||||
// Copy&paste from TraceUnicodeUtils.h - fixme !!!!!!!!
|
||||
class UnicodeCollationHolder
|
||||
{
|
||||
private:
|
||||
charset* cs;
|
||||
texttype* tt;
|
||||
Firebird::AutoPtr<Jrd::CharSet> charSet;
|
||||
Firebird::AutoPtr<Jrd::TextType> textType;
|
||||
|
||||
public:
|
||||
explicit UnicodeCollationHolder(Firebird::MemoryPool& pool);
|
||||
~UnicodeCollationHolder();
|
||||
|
||||
Jrd::TextType* getTextType()
|
||||
{
|
||||
return textType;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Global switches and data
|
||||
|
||||
struct BurpCrypt;
|
||||
@ -1174,8 +1154,7 @@ public:
|
||||
bool flag_on_line; // indicates whether we will bring the database on-line
|
||||
bool firstMap; // this is the first time we entered get_mapping()
|
||||
bool stdIoMode; // stdin or stdout is used as backup file
|
||||
Firebird::AutoPtr<UnicodeCollationHolder> unicodeCollation;
|
||||
Firebird::AutoPtr<Firebird::SimilarToMatcher<UCHAR, Jrd::UpcaseConverter<> > > skipDataMatcher;
|
||||
Firebird::AutoPtr<Firebird::SimilarToRegex> skipDataMatcher;
|
||||
|
||||
public:
|
||||
Firebird::string toSystem(const Firebird::PathName& from);
|
||||
|
821
src/common/SimilarToRegex.cpp
Normal file
821
src/common/SimilarToRegex.cpp
Normal file
@ -0,0 +1,821 @@
|
||||
/*
|
||||
* The contents of this file are subject to the Initial
|
||||
* Developer's Public License Version 1.0 (the "License");
|
||||
* you may not use this file except in compliance with the
|
||||
* License. You may obtain a copy of the License at
|
||||
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
|
||||
*
|
||||
* Software distributed under the License is distributed AS IS,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing rights
|
||||
* and limitations under the License.
|
||||
*
|
||||
* The Original Code was created by Adriano dos Santos Fernandes
|
||||
* for the Firebird Open Source RDBMS project.
|
||||
*
|
||||
* Copyright (c) 2019 Adriano dos Santos Fernandes <adrianosf at gmail.com>
|
||||
* and all contributors signed below.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "firebird.h"
|
||||
#include "../common/SimilarToRegex.h"
|
||||
#include "../common/StatusArg.h"
|
||||
#include <unicode/utf8.h>
|
||||
|
||||
using namespace Firebird;
|
||||
|
||||
namespace
|
||||
{
|
||||
static const unsigned FLAG_PREFER_FEWER = 0x01;
|
||||
static const unsigned FLAG_CASE_INSENSITIVE = 0x02;
|
||||
static const unsigned FLAG_GROUP_CAPTURE = 0x04;
|
||||
|
||||
//// TODO: Verify usage of U8_NEXT_UNSAFE.
|
||||
class SimilarToCompiler
|
||||
{
|
||||
public:
|
||||
SimilarToCompiler(MemoryPool& pool, AutoPtr<RE2>& regexp, unsigned aFlags,
|
||||
const char* aPatternStr, unsigned aPatternLen,
|
||||
const char* escapeStr, unsigned escapeLen)
|
||||
: re2PatternStr(pool),
|
||||
patternStr(aPatternStr),
|
||||
patternPos(0),
|
||||
patternLen(aPatternLen),
|
||||
flags(aFlags),
|
||||
useEscape(escapeStr != nullptr)
|
||||
{
|
||||
if (escapeStr)
|
||||
{
|
||||
int32_t escapePos = 0;
|
||||
U8_NEXT_UNSAFE(escapeStr, escapePos, escapeChar);
|
||||
|
||||
if (escapePos != escapeLen)
|
||||
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||
}
|
||||
|
||||
if (flags & FLAG_CASE_INSENSITIVE)
|
||||
re2PatternStr.append("(?i)");
|
||||
|
||||
if (flags & FLAG_GROUP_CAPTURE)
|
||||
re2PatternStr.append("(");
|
||||
|
||||
int parseFlags;
|
||||
parseExpr(&parseFlags);
|
||||
|
||||
if (flags & FLAG_GROUP_CAPTURE)
|
||||
re2PatternStr.append(")");
|
||||
|
||||
// Check for proper termination.
|
||||
if (patternPos < patternLen)
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
|
||||
RE2::Options options;
|
||||
options.set_log_errors(false);
|
||||
options.set_dot_nl(true);
|
||||
|
||||
re2::StringPiece sp((const char*) re2PatternStr.c_str(), re2PatternStr.length());
|
||||
regexp = FB_NEW_POOL(pool) RE2(sp, options);
|
||||
|
||||
if (!regexp->ok())
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
}
|
||||
|
||||
bool hasChar()
|
||||
{
|
||||
return patternPos < patternLen;
|
||||
}
|
||||
|
||||
UChar32 getChar()
|
||||
{
|
||||
fb_assert(hasChar());
|
||||
UChar32 c;
|
||||
U8_NEXT_UNSAFE(patternStr, patternPos, c);
|
||||
return c;
|
||||
}
|
||||
|
||||
UChar32 peekChar()
|
||||
{
|
||||
auto savePos = patternPos;
|
||||
auto c = getChar();
|
||||
patternPos = savePos;
|
||||
return c;
|
||||
}
|
||||
|
||||
bool isRep(UChar32 c) const
|
||||
{
|
||||
return c == '*' || c == '+' || c == '?' || c == '{';
|
||||
}
|
||||
|
||||
bool isSpecial(UChar32 c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '^':
|
||||
case '-':
|
||||
case '_':
|
||||
case '%':
|
||||
case '[':
|
||||
case ']':
|
||||
case '(':
|
||||
case ')':
|
||||
case '{':
|
||||
case '}':
|
||||
case '|':
|
||||
case '?':
|
||||
case '+':
|
||||
case '*':
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool isRe2Special(UChar32 c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '\\':
|
||||
case '$':
|
||||
case '.':
|
||||
case '^':
|
||||
case '-':
|
||||
case '_':
|
||||
case '[':
|
||||
case ']':
|
||||
case '(':
|
||||
case ')':
|
||||
case '{':
|
||||
case '}':
|
||||
case '|':
|
||||
case '?':
|
||||
case '+':
|
||||
case '*':
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void parseExpr(int* parseFlagOut)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
int parseFlags;
|
||||
parseTerm(&parseFlags);
|
||||
*parseFlagOut &= ~(~parseFlags & PARSE_FLAG_NOT_EMPTY);
|
||||
*parseFlagOut |= parseFlags;
|
||||
|
||||
auto savePos = patternPos;
|
||||
UChar32 c;
|
||||
|
||||
if (!hasChar() || (c = getChar()) != '|')
|
||||
{
|
||||
patternPos = savePos;
|
||||
break;
|
||||
}
|
||||
|
||||
re2PatternStr.append("|");
|
||||
}
|
||||
}
|
||||
|
||||
void parseTerm(int* parseFlagOut)
|
||||
{
|
||||
*parseFlagOut = 0;
|
||||
|
||||
bool first = true;
|
||||
|
||||
while (hasChar())
|
||||
{
|
||||
auto c = peekChar();
|
||||
|
||||
if (c != '|' && c != ')')
|
||||
{
|
||||
int parseFlags;
|
||||
parseFactor(&parseFlags);
|
||||
|
||||
*parseFlagOut |= parseFlags & PARSE_FLAG_NOT_EMPTY;
|
||||
|
||||
if (first)
|
||||
{
|
||||
*parseFlagOut |= parseFlags;
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void parseFactor(int* parseFlagOut)
|
||||
{
|
||||
int parseFlags;
|
||||
parsePrimary(&parseFlags);
|
||||
|
||||
UChar32 op;
|
||||
|
||||
if (!hasChar() || !isRep((op = peekChar())))
|
||||
{
|
||||
*parseFlagOut = parseFlags;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!(parseFlags & PARSE_FLAG_NOT_EMPTY) && op != '?')
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
|
||||
fb_assert(op == '*' || op == '+' || op == '?' || op == '{');
|
||||
|
||||
if (op == '*')
|
||||
{
|
||||
re2PatternStr.append((flags & FLAG_PREFER_FEWER) ? "*?" : "*");
|
||||
*parseFlagOut = 0;
|
||||
++patternPos;
|
||||
}
|
||||
else if (op == '+')
|
||||
{
|
||||
re2PatternStr.append((flags & FLAG_PREFER_FEWER) ? "+?" : "+");
|
||||
*parseFlagOut = PARSE_FLAG_NOT_EMPTY;
|
||||
++patternPos;
|
||||
}
|
||||
else if (op == '?')
|
||||
{
|
||||
re2PatternStr.append((flags & FLAG_PREFER_FEWER) ? "??" : "?");
|
||||
*parseFlagOut = 0;
|
||||
++patternPos;
|
||||
}
|
||||
else if (op == '{')
|
||||
{
|
||||
const auto repeatStart = patternPos++;
|
||||
|
||||
bool comma = false;
|
||||
string s1, s2;
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (!hasChar())
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
|
||||
UChar32 c = getChar();
|
||||
|
||||
if (c == '}')
|
||||
{
|
||||
if (s1.isEmpty())
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
break;
|
||||
}
|
||||
else if (c == ',')
|
||||
{
|
||||
if (comma)
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
comma = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (c >= '0' && c <= '9')
|
||||
{
|
||||
if (comma)
|
||||
s2 += (char) c;
|
||||
else
|
||||
s1 += (char) c;
|
||||
}
|
||||
else
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
}
|
||||
}
|
||||
|
||||
const int n1 = atoi(s1.c_str());
|
||||
*parseFlagOut = n1 == 0 ? 0 : PARSE_FLAG_NOT_EMPTY;
|
||||
|
||||
re2PatternStr.append(patternStr + repeatStart, patternStr + patternPos);
|
||||
|
||||
if (flags & FLAG_PREFER_FEWER)
|
||||
re2PatternStr.append("?");
|
||||
}
|
||||
|
||||
if (hasChar() && isRep(peekChar()))
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
}
|
||||
|
||||
void parsePrimary(int* parseFlagOut)
|
||||
{
|
||||
*parseFlagOut = 0;
|
||||
|
||||
fb_assert(hasChar());
|
||||
auto savePos = patternPos;
|
||||
auto op = getChar();
|
||||
|
||||
if (op == '_')
|
||||
{
|
||||
*parseFlagOut |= PARSE_FLAG_NOT_EMPTY;
|
||||
re2PatternStr.append(".");
|
||||
return;
|
||||
}
|
||||
else if (op == '%')
|
||||
{
|
||||
re2PatternStr.append((flags & FLAG_PREFER_FEWER) ? ".*?" : ".*");
|
||||
return;
|
||||
}
|
||||
else if (op == '[')
|
||||
{
|
||||
struct
|
||||
{
|
||||
const char* similarClass;
|
||||
const char* re2ClassInclude;
|
||||
const char* re2ClassExclude;
|
||||
} static const classes[] =
|
||||
{
|
||||
{"alnum", "[:alnum:]", "[:^alnum:]"},
|
||||
{"alpha", "[:alpha:]", "[:^alpha:]"},
|
||||
{"digit", "[:digit:]", "[:^digit:]"},
|
||||
{"lower", "[:lower:]", "[:^lower:]"},
|
||||
{"space", " ", "\\x00-\\x1F\\x21-\\x{10FFFF}"},
|
||||
{"upper", "[:upper:]", "[:^upper:]"},
|
||||
{"whitespace", "[:space:]", "[:^space:]"}
|
||||
};
|
||||
|
||||
struct Item
|
||||
{
|
||||
int clazz;
|
||||
unsigned firstStart, firstEnd, lastStart, lastEnd;
|
||||
};
|
||||
Array<Item> items;
|
||||
unsigned includeCount = 0;
|
||||
bool exclude = false;
|
||||
|
||||
do
|
||||
{
|
||||
if (!hasChar())
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
|
||||
unsigned charSavePos = patternPos;
|
||||
UChar32 c = getChar();
|
||||
bool range = false;
|
||||
bool charClass = false;
|
||||
|
||||
if (useEscape && c == escapeChar)
|
||||
{
|
||||
if (!hasChar())
|
||||
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||
|
||||
charSavePos = patternPos;
|
||||
c = getChar();
|
||||
|
||||
if (!(c == escapeChar || isSpecial(c)))
|
||||
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (c == '[')
|
||||
charClass = true;
|
||||
else if (c == '^')
|
||||
{
|
||||
if (exclude)
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
|
||||
exclude = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
Item item;
|
||||
|
||||
if (!exclude)
|
||||
++includeCount;
|
||||
|
||||
if (charClass)
|
||||
{
|
||||
if (!hasChar() || getChar() != ':')
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
|
||||
charSavePos = patternPos;
|
||||
|
||||
while (hasChar() && getChar() != ':')
|
||||
;
|
||||
|
||||
const SLONG len = patternPos - charSavePos - 1;
|
||||
|
||||
if (!hasChar() || getChar() != ']')
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
|
||||
for (item.clazz = 0; item.clazz < FB_NELEM(classes); ++item.clazz)
|
||||
{
|
||||
if (fb_utils::strnicmp(patternStr + charSavePos,
|
||||
classes[item.clazz].similarClass, len) == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (item.clazz >= FB_NELEM(classes))
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
}
|
||||
else
|
||||
{
|
||||
item.clazz = -1;
|
||||
|
||||
item.firstStart = item.lastStart = charSavePos;
|
||||
item.firstEnd = item.lastEnd = patternPos;
|
||||
|
||||
if (hasChar() && peekChar() == '-')
|
||||
{
|
||||
getChar();
|
||||
|
||||
charSavePos = patternPos;
|
||||
c = getChar();
|
||||
|
||||
if (useEscape && c == escapeChar)
|
||||
{
|
||||
if (!hasChar())
|
||||
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||
|
||||
charSavePos = patternPos;
|
||||
c = getChar();
|
||||
|
||||
if (!(c == escapeChar || isSpecial(c)))
|
||||
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||
}
|
||||
|
||||
item.lastStart = charSavePos;
|
||||
item.lastEnd = patternPos;
|
||||
}
|
||||
}
|
||||
|
||||
items.add(item);
|
||||
|
||||
if (!hasChar())
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
} while (peekChar() != ']');
|
||||
|
||||
auto appendItem = [&](const Item& item, bool negated) {
|
||||
if (item.clazz != -1)
|
||||
{
|
||||
re2PatternStr.append(negated ?
|
||||
classes[item.clazz].re2ClassExclude :
|
||||
classes[item.clazz].re2ClassInclude);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (negated)
|
||||
{
|
||||
UChar32 c;
|
||||
char hex[20];
|
||||
|
||||
int32_t cPos = item.firstStart;
|
||||
U8_NEXT_UNSAFE(patternStr, cPos, c);
|
||||
|
||||
if (c > 0)
|
||||
{
|
||||
re2PatternStr.append("\\x00");
|
||||
re2PatternStr.append("-");
|
||||
|
||||
sprintf(hex, "\\x{%X}", (int) c - 1);
|
||||
re2PatternStr.append(hex);
|
||||
}
|
||||
|
||||
cPos = item.lastStart;
|
||||
U8_NEXT_UNSAFE(patternStr, cPos, c);
|
||||
|
||||
if (c < 0x10FFFF)
|
||||
{
|
||||
sprintf(hex, "\\x{%X}", (int) c + 1);
|
||||
re2PatternStr.append(hex);
|
||||
re2PatternStr.append("-");
|
||||
re2PatternStr.append("\\x{10FFFF}");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (isRe2Special(patternStr[item.firstStart]))
|
||||
re2PatternStr.append("\\");
|
||||
|
||||
re2PatternStr.append(patternStr + item.firstStart, patternStr + item.firstEnd);
|
||||
|
||||
if (item.lastStart != item.firstStart)
|
||||
{
|
||||
re2PatternStr.append("-");
|
||||
|
||||
if (isRe2Special(patternStr[item.lastStart]))
|
||||
re2PatternStr.append("\\");
|
||||
|
||||
re2PatternStr.append(patternStr + item.lastStart, patternStr + item.lastEnd);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (exclude && includeCount > 1)
|
||||
{
|
||||
re2PatternStr.append("(?:");
|
||||
|
||||
for (unsigned i = 0; i < includeCount; ++i)
|
||||
{
|
||||
if (i != 0)
|
||||
re2PatternStr.append("|");
|
||||
|
||||
re2PatternStr.append("[");
|
||||
re2PatternStr.append("^");
|
||||
appendItem(items[i], true);
|
||||
|
||||
for (unsigned j = includeCount; j < items.getCount(); ++j)
|
||||
appendItem(items[j], false);
|
||||
|
||||
re2PatternStr.append("]");
|
||||
}
|
||||
|
||||
re2PatternStr.append(")");
|
||||
}
|
||||
else
|
||||
{
|
||||
re2PatternStr.append("[");
|
||||
|
||||
if (exclude)
|
||||
re2PatternStr.append("^");
|
||||
|
||||
for (unsigned i = 0; i < items.getCount(); ++i)
|
||||
appendItem(items[i], exclude && i < includeCount);
|
||||
|
||||
re2PatternStr.append("]");
|
||||
}
|
||||
|
||||
getChar();
|
||||
*parseFlagOut |= PARSE_FLAG_NOT_EMPTY;
|
||||
}
|
||||
else if (op == '(')
|
||||
{
|
||||
re2PatternStr.append(flags & FLAG_GROUP_CAPTURE ? "(" : "(?:");
|
||||
|
||||
int parseFlags;
|
||||
parseExpr(&parseFlags);
|
||||
|
||||
if (!hasChar() || getChar() != ')')
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
|
||||
re2PatternStr.append(")");
|
||||
|
||||
*parseFlagOut |= parseFlags & PARSE_FLAG_NOT_EMPTY;
|
||||
}
|
||||
else
|
||||
{
|
||||
patternPos = savePos;
|
||||
|
||||
bool controlChar = false;
|
||||
|
||||
do
|
||||
{
|
||||
auto charSavePos = patternPos;
|
||||
op = getChar();
|
||||
|
||||
if (useEscape && op == escapeChar)
|
||||
{
|
||||
charSavePos = patternPos;
|
||||
op = getChar();
|
||||
|
||||
if (!isSpecial(op) && op != escapeChar)
|
||||
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (isSpecial(op))
|
||||
{
|
||||
controlChar = true;
|
||||
patternPos = charSavePos;
|
||||
}
|
||||
}
|
||||
|
||||
if (!controlChar)
|
||||
{
|
||||
if (isRe2Special(op))
|
||||
re2PatternStr.append("\\");
|
||||
|
||||
re2PatternStr.append(patternStr + charSavePos, patternStr + patternPos);
|
||||
}
|
||||
} while (!controlChar && hasChar());
|
||||
|
||||
if (patternPos == savePos)
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
|
||||
*parseFlagOut |= PARSE_FLAG_NOT_EMPTY;
|
||||
}
|
||||
}
|
||||
|
||||
const string& getRe2PatternStr() const
|
||||
{
|
||||
return re2PatternStr;
|
||||
}
|
||||
|
||||
private:
|
||||
static const int PARSE_FLAG_NOT_EMPTY = 1; // known never to match empty string
|
||||
|
||||
string re2PatternStr;
|
||||
const char* patternStr;
|
||||
int32_t patternPos;
|
||||
int32_t patternLen;
|
||||
UChar32 escapeChar;
|
||||
unsigned flags;
|
||||
bool useEscape;
|
||||
};
|
||||
|
||||
class SubstringSimilarCompiler
|
||||
{
|
||||
public:
|
||||
SubstringSimilarCompiler(MemoryPool& pool, AutoPtr<RE2>& regexp, unsigned flags,
|
||||
const char* aPatternStr, unsigned aPatternLen,
|
||||
const char* escapeStr, unsigned escapeLen)
|
||||
: patternStr(aPatternStr),
|
||||
patternPos(0),
|
||||
patternLen(aPatternLen)
|
||||
{
|
||||
int32_t escapePos = 0;
|
||||
U8_NEXT_UNSAFE(escapeStr, escapePos, escapeChar);
|
||||
|
||||
if (escapePos != escapeLen)
|
||||
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||
|
||||
unsigned positions[2];
|
||||
unsigned part = 0;
|
||||
|
||||
while (hasChar())
|
||||
{
|
||||
auto c = getChar();
|
||||
|
||||
if (c != escapeChar)
|
||||
continue;
|
||||
|
||||
if (!hasChar())
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
|
||||
c = getChar();
|
||||
|
||||
if (c == '"')
|
||||
{
|
||||
if (part >= 2)
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
|
||||
positions[part++] = patternPos;
|
||||
}
|
||||
}
|
||||
|
||||
if (part != 2)
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
|
||||
AutoPtr<RE2> regexp1, regexp2, regexp3;
|
||||
|
||||
SimilarToCompiler compiler1(pool, regexp1, (flags & FLAG_CASE_INSENSITIVE) | FLAG_PREFER_FEWER,
|
||||
aPatternStr, positions[0] - escapeLen - 1, escapeStr, escapeLen);
|
||||
|
||||
SimilarToCompiler compiler2(pool, regexp2, (flags & FLAG_CASE_INSENSITIVE),
|
||||
aPatternStr + positions[0], positions[1] - positions[0] - escapeLen - 1, escapeStr, escapeLen);
|
||||
|
||||
SimilarToCompiler compiler3(pool, regexp3, (flags & FLAG_CASE_INSENSITIVE) | FLAG_PREFER_FEWER,
|
||||
aPatternStr + positions[1], patternLen - positions[1], escapeStr, escapeLen);
|
||||
|
||||
string finalRe2Pattern;
|
||||
finalRe2Pattern.reserve(
|
||||
1 + // (
|
||||
compiler1.getRe2PatternStr().length() +
|
||||
2 + // )(
|
||||
compiler2.getRe2PatternStr().length() +
|
||||
2 + // )(
|
||||
compiler3.getRe2PatternStr().length() +
|
||||
1 // )
|
||||
);
|
||||
|
||||
finalRe2Pattern.append("(");
|
||||
finalRe2Pattern.append(compiler1.getRe2PatternStr());
|
||||
finalRe2Pattern.append(")(");
|
||||
finalRe2Pattern.append(compiler2.getRe2PatternStr());
|
||||
finalRe2Pattern.append(")(");
|
||||
finalRe2Pattern.append(compiler3.getRe2PatternStr());
|
||||
finalRe2Pattern.append(")");
|
||||
|
||||
RE2::Options options;
|
||||
options.set_log_errors(false);
|
||||
options.set_dot_nl(true);
|
||||
|
||||
re2::StringPiece sp((const char*) finalRe2Pattern.c_str(), finalRe2Pattern.length());
|
||||
regexp = FB_NEW_POOL(pool) RE2(sp, options);
|
||||
|
||||
if (!regexp->ok())
|
||||
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||
}
|
||||
|
||||
bool hasChar()
|
||||
{
|
||||
return patternPos < patternLen;
|
||||
}
|
||||
|
||||
UChar32 getChar()
|
||||
{
|
||||
fb_assert(hasChar());
|
||||
UChar32 c;
|
||||
U8_NEXT_UNSAFE(patternStr, patternPos, c);
|
||||
return c;
|
||||
}
|
||||
|
||||
UChar32 peekChar()
|
||||
{
|
||||
auto savePos = patternPos;
|
||||
auto c = getChar();
|
||||
patternPos = savePos;
|
||||
return c;
|
||||
}
|
||||
|
||||
private:
|
||||
const char* patternStr;
|
||||
int32_t patternPos;
|
||||
int32_t patternLen;
|
||||
UChar32 escapeChar;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
namespace Firebird {
|
||||
|
||||
|
||||
SimilarToRegex::SimilarToRegex(MemoryPool& pool, bool caseInsensitive,
|
||||
const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen)
|
||||
: PermanentStorage(pool)
|
||||
{
|
||||
SimilarToCompiler compiler(pool, regexp,
|
||||
FLAG_GROUP_CAPTURE | FLAG_PREFER_FEWER | (caseInsensitive ? FLAG_CASE_INSENSITIVE : 0),
|
||||
patternStr, patternLen, escapeStr, escapeLen);
|
||||
}
|
||||
|
||||
bool SimilarToRegex::matches(const char* buffer, unsigned bufferLen, Array<MatchPos>* matchPosArray)
|
||||
{
|
||||
re2::StringPiece sp(buffer, bufferLen);
|
||||
|
||||
if (matchPosArray)
|
||||
{
|
||||
const int argsCount = regexp->NumberOfCapturingGroups();
|
||||
|
||||
Array<re2::StringPiece> resSps(argsCount);
|
||||
resSps.resize(argsCount);
|
||||
|
||||
Array<RE2::Arg> args(argsCount);
|
||||
args.resize(argsCount);
|
||||
|
||||
Array<RE2::Arg*> argsPtr(argsCount);
|
||||
|
||||
{ // scope
|
||||
auto resSp = resSps.begin();
|
||||
|
||||
for (auto& arg : args)
|
||||
{
|
||||
arg = resSp++;
|
||||
argsPtr.push(&arg);
|
||||
}
|
||||
}
|
||||
|
||||
if (RE2::FullMatchN(sp, *regexp.get(), argsPtr.begin(), argsCount))
|
||||
{
|
||||
matchPosArray->clear();
|
||||
|
||||
for (const auto resSp : resSps)
|
||||
{
|
||||
matchPosArray->push(MatchPos{
|
||||
static_cast<unsigned>(resSp.data() - sp.begin()),
|
||||
static_cast<unsigned>(resSp.length())
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
else
|
||||
return RE2::FullMatch(sp, *regexp.get());
|
||||
}
|
||||
|
||||
//---------------------
|
||||
|
||||
SubstringSimilarRegex::SubstringSimilarRegex(MemoryPool& pool, bool caseInsensitive,
|
||||
const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen)
|
||||
: PermanentStorage(pool)
|
||||
{
|
||||
SubstringSimilarCompiler compiler(pool, regexp,
|
||||
(caseInsensitive ? FLAG_CASE_INSENSITIVE : 0),
|
||||
patternStr, patternLen, escapeStr, escapeLen);
|
||||
}
|
||||
|
||||
bool SubstringSimilarRegex::matches(const char* buffer, unsigned bufferLen,
|
||||
unsigned* resultStart, unsigned* resultLength)
|
||||
{
|
||||
re2::StringPiece sp(buffer, bufferLen);
|
||||
|
||||
re2::StringPiece spResult;
|
||||
|
||||
if (RE2::FullMatch(sp, *regexp.get(), nullptr, &spResult, nullptr))
|
||||
{
|
||||
*resultStart = spResult.begin() - buffer;
|
||||
*resultLength = spResult.length();
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
} // namespace Firebird
|
75
src/common/SimilarToRegex.h
Normal file
75
src/common/SimilarToRegex.h
Normal file
@ -0,0 +1,75 @@
|
||||
/*
|
||||
* The contents of this file are subject to the Initial
|
||||
* Developer's Public License Version 1.0 (the "License");
|
||||
* you may not use this file except in compliance with the
|
||||
* License. You may obtain a copy of the License at
|
||||
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
|
||||
*
|
||||
* Software distributed under the License is distributed AS IS,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing rights
|
||||
* and limitations under the License.
|
||||
*
|
||||
* The Original Code was created by Adriano dos Santos Fernandes
|
||||
* for the Firebird Open Source RDBMS project.
|
||||
*
|
||||
* Copyright (c) 2019 Adriano dos Santos Fernandes <adrianosf at gmail.com>
|
||||
* and all contributors signed below.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef COMMON_SIMILAR_TO_REGEX_H
|
||||
#define COMMON_SIMILAR_TO_REGEX_H
|
||||
|
||||
#include "firebird.h"
|
||||
#include <re2/re2.h>
|
||||
#include "../common/classes/auto.h"
|
||||
#include "../common/classes/array.h"
|
||||
#include "../common/classes/fb_string.h"
|
||||
|
||||
namespace Firebird {
|
||||
|
||||
|
||||
//// FIXME: Leak re2::RE2 when destroyed by pool.
|
||||
class SimilarToRegex : public PermanentStorage
|
||||
{
|
||||
public:
|
||||
struct MatchPos
|
||||
{
|
||||
unsigned start;
|
||||
unsigned length;
|
||||
};
|
||||
|
||||
public:
|
||||
SimilarToRegex(MemoryPool& pool, bool caseInsensitive,
|
||||
const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen);
|
||||
|
||||
public:
|
||||
bool matches(const char* buffer, unsigned bufferLen, Array<MatchPos>* matchPosArray = nullptr);
|
||||
|
||||
private:
|
||||
AutoPtr<re2::RE2> regexp;
|
||||
};
|
||||
|
||||
//// FIXME: Leak re2::RE2 when destroyed by pool.
|
||||
// Given a regular expression R1<escape>#R2#<escape>R3 and the string S:
|
||||
// - Find the shortest substring of S that matches R1 while the remainder (S23) matches R2R3;
|
||||
// - Find the longest (S2) substring of S23 that matches R2 while the remainder matches R3;
|
||||
// - Return S2.
|
||||
class SubstringSimilarRegex : public PermanentStorage
|
||||
{
|
||||
public:
|
||||
SubstringSimilarRegex(MemoryPool& pool, bool caseInsensitive,
|
||||
const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen);
|
||||
|
||||
public:
|
||||
bool matches(const char* buffer, unsigned bufferLen, unsigned* resultStart, unsigned* resultLength);
|
||||
|
||||
private:
|
||||
AutoPtr<re2::RE2> regexp;
|
||||
};
|
||||
|
||||
|
||||
} // namespace Firebird
|
||||
|
||||
#endif // COMMON_SIMILAR_TO_REGEX_H
|
@ -169,33 +169,6 @@ TextType::TextType(TTYPE_ID _type, texttype *_tt, CharSet* _cs)
|
||||
memset(&canonicalChars[conversions[i].ch], 0, sizeof(ULONG));
|
||||
}
|
||||
}
|
||||
|
||||
struct Conversion2
|
||||
{
|
||||
const char* str;
|
||||
UCHAR* buffer;
|
||||
};
|
||||
|
||||
const Conversion2 conversions2[] =
|
||||
{
|
||||
{"0123456789", reinterpret_cast<UCHAR*>(canonicalNumbers)},
|
||||
{"abcdefghijklmnopqrstuvwxyz", reinterpret_cast<UCHAR*>(canonicalLowerLetters)},
|
||||
{"ABCDEFGHIJKLMNOPQRSTUVWXYZ", reinterpret_cast<UCHAR*>(canonicalUpperLetters)},
|
||||
{" \t\v\r\n\f", reinterpret_cast<UCHAR*>(canonicalWhiteSpaces)}
|
||||
};
|
||||
|
||||
for (int i = 0; i < FB_NELEM(conversions2); i++)
|
||||
{
|
||||
UCHAR temp[sizeof(ULONG)];
|
||||
|
||||
for (const char* p = conversions2[i].str; *p; ++p)
|
||||
{
|
||||
USHORT code = static_cast<USHORT>(*p);
|
||||
ULONG length = getCharSet()->getConvFromUnicode().convert(sizeof(code), &code, sizeof(temp), temp);
|
||||
const size_t pos = (p - conversions2[i].str) * getCanonicalWidth();
|
||||
canonical(length, temp, sizeof(ULONG), &conversions2[i].buffer[pos]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -138,47 +138,8 @@ public:
|
||||
return reinterpret_cast<const UCHAR*>(&canonicalChars[ch]);
|
||||
}
|
||||
|
||||
const UCHAR* getCanonicalNumbers(int* count = NULL) const
|
||||
{
|
||||
if (count)
|
||||
*count = 10;
|
||||
return reinterpret_cast<const UCHAR*>(canonicalNumbers);
|
||||
}
|
||||
|
||||
const UCHAR* getCanonicalLowerLetters(int* count = NULL) const
|
||||
{
|
||||
if (count)
|
||||
*count = 26;
|
||||
return reinterpret_cast<const UCHAR*>(canonicalLowerLetters);
|
||||
}
|
||||
|
||||
const UCHAR* getCanonicalUpperLetters(int* count = NULL) const
|
||||
{
|
||||
if (count)
|
||||
*count = 26;
|
||||
return reinterpret_cast<const UCHAR*>(canonicalUpperLetters);
|
||||
}
|
||||
|
||||
const UCHAR* getCanonicalWhiteSpaces(int* count = NULL) const
|
||||
{
|
||||
if (count)
|
||||
*count = 6;
|
||||
return reinterpret_cast<const UCHAR*>(canonicalWhiteSpaces);
|
||||
}
|
||||
|
||||
const UCHAR* getCanonicalSpace(int* count = NULL) const
|
||||
{
|
||||
if (count)
|
||||
*count = 1;
|
||||
return getCanonicalChar(CHAR_SPACE);
|
||||
}
|
||||
|
||||
private:
|
||||
ULONG canonicalChars[CHAR_COUNT];
|
||||
ULONG canonicalNumbers[10];
|
||||
ULONG canonicalLowerLetters[26];
|
||||
ULONG canonicalUpperLetters[26];
|
||||
ULONG canonicalWhiteSpaces[6];
|
||||
};
|
||||
|
||||
} // namespace Jrd
|
||||
|
@ -1031,6 +1031,37 @@ INTL_BOOL UnicodeUtil::utf32WellFormed(ULONG len, const ULONG* str, ULONG* offen
|
||||
return true; // well-formed
|
||||
}
|
||||
|
||||
void UnicodeUtil::utf8Normalize(UCharBuffer& data)
|
||||
{
|
||||
ICU* icu = loadICU("", "");
|
||||
|
||||
HalfStaticArray<USHORT, BUFFER_MEDIUM> utf16Buffer(data.getCount());
|
||||
USHORT errCode;
|
||||
ULONG errPosition;
|
||||
ULONG utf16BufferLen = utf8ToUtf16(data.getCount(), data.begin(), data.getCount() * sizeof(USHORT),
|
||||
utf16Buffer.getBuffer(data.getCount()), &errCode, &errPosition);
|
||||
|
||||
UTransliterator* trans = icu->getCiAiTransliterator();
|
||||
|
||||
if (trans)
|
||||
{
|
||||
const int32_t capacity = utf16Buffer.getCount() * sizeof(USHORT);
|
||||
int32_t len = utf16BufferLen / sizeof(USHORT);
|
||||
int32_t limit = len;
|
||||
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
icu->utransTransUChars(trans, reinterpret_cast<UChar*>(utf16Buffer.begin()),
|
||||
&len, capacity, 0, &limit, &errorCode);
|
||||
icu->releaseCiAiTransliterator(trans);
|
||||
|
||||
len = utf16ToUtf8(utf16BufferLen, utf16Buffer.begin(),
|
||||
len * 4, data.getBuffer(len * 4, false),
|
||||
&errCode, &errPosition);
|
||||
|
||||
data.shrink(len);
|
||||
}
|
||||
}
|
||||
|
||||
UnicodeUtil::ICU* UnicodeUtil::loadICU(const string& icuVersion, const string& configInfo)
|
||||
{
|
||||
ObjectsArray<string> versions;
|
||||
|
@ -177,6 +177,8 @@ public:
|
||||
static INTL_BOOL utf16WellFormed(ULONG len, const USHORT* str, ULONG* offending_position);
|
||||
static INTL_BOOL utf32WellFormed(ULONG len, const ULONG* str, ULONG* offending_position);
|
||||
|
||||
static void utf8Normalize(Firebird::UCharBuffer& data);
|
||||
|
||||
static ConversionICU& getConversionICU();
|
||||
static ICU* loadICU(const Firebird::string& icuVersion, const Firebird::string& configInfo);
|
||||
static bool getCollVersion(const Firebird::string& icuVersion,
|
||||
|
@ -945,7 +945,7 @@ bool ComparativeBoolNode::stringBoolean(thread_db* tdbb, jrd_req* request, dsc*
|
||||
else // nod_similar
|
||||
{
|
||||
impure->vlu_misc.vlu_invariant = evaluator = obj->createSimilarToMatcher(
|
||||
*tdbb->getDefaultPool(), p2, l2, escape_str, escape_length);
|
||||
tdbb, *tdbb->getDefaultPool(), p2, l2, escape_str, escape_length);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -961,7 +961,7 @@ bool ComparativeBoolNode::stringBoolean(thread_db* tdbb, jrd_req* request, dsc*
|
||||
}
|
||||
else // nod_similar
|
||||
{
|
||||
evaluator = obj->createSimilarToMatcher(*tdbb->getDefaultPool(),
|
||||
evaluator = obj->createSimilarToMatcher(tdbb, *tdbb->getDefaultPool(),
|
||||
p2, l2, escape_str, escape_length);
|
||||
}
|
||||
|
||||
@ -1152,7 +1152,7 @@ bool ComparativeBoolNode::stringFunction(thread_db* tdbb, jrd_req* request,
|
||||
else // nod_similar
|
||||
{
|
||||
impure->vlu_misc.vlu_invariant = evaluator = obj->createSimilarToMatcher(
|
||||
*tdbb->getDefaultPool(), p2, l2, escape_str, escape_length);
|
||||
tdbb, *tdbb->getDefaultPool(), p2, l2, escape_str, escape_length);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -1170,7 +1170,7 @@ bool ComparativeBoolNode::stringFunction(thread_db* tdbb, jrd_req* request,
|
||||
return obj->like(*tdbb->getDefaultPool(), p1, l1, p2, l2, escape_str, escape_length);
|
||||
|
||||
// nod_similar
|
||||
return obj->similarTo(*tdbb->getDefaultPool(), p1, l1, p2, l2, escape_str, escape_length);
|
||||
return obj->similarTo(tdbb, *tdbb->getDefaultPool(), p1, l1, p2, l2, escape_str, escape_length);
|
||||
}
|
||||
|
||||
// Handle MATCHES
|
||||
|
@ -11873,7 +11873,7 @@ dsc* SubstringSimilarNode::execute(thread_db* tdbb, jrd_req* request) const
|
||||
delete impure->vlu_misc.vlu_invariant;
|
||||
|
||||
impure->vlu_misc.vlu_invariant = evaluator = collation->createSubstringSimilarMatcher(
|
||||
*tdbb->getDefaultPool(), patternStr, patternLen, escapeStr, escapeLen);
|
||||
tdbb, *tdbb->getDefaultPool(), patternStr, patternLen, escapeStr, escapeLen);
|
||||
|
||||
impure->vlu_flags |= VLU_computed;
|
||||
}
|
||||
@ -11885,7 +11885,7 @@ dsc* SubstringSimilarNode::execute(thread_db* tdbb, jrd_req* request) const
|
||||
}
|
||||
else
|
||||
{
|
||||
autoEvaluator = evaluator = collation->createSubstringSimilarMatcher(*tdbb->getDefaultPool(),
|
||||
autoEvaluator = evaluator = collation->createSubstringSimilarMatcher(tdbb, *tdbb->getDefaultPool(),
|
||||
patternStr, patternLen, escapeStr, escapeLen);
|
||||
}
|
||||
|
||||
|
@ -99,16 +99,177 @@
|
||||
#include "../jrd/intl_classes.h"
|
||||
#include "../jrd/lck_proto.h"
|
||||
#include "../jrd/intl_classes.h"
|
||||
#include "../jrd/intl_proto.h"
|
||||
#include "../jrd/Collation.h"
|
||||
#include "../common/TextType.h"
|
||||
#include "../common/SimilarToRegex.h"
|
||||
|
||||
#include "../jrd/SimilarToMatcher.h"
|
||||
|
||||
using namespace Firebird;
|
||||
using namespace Jrd;
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
//// TODO: NONE / OCTETS.
|
||||
class Re2SimilarMatcher : public PatternMatcher
|
||||
{
|
||||
public:
|
||||
Re2SimilarMatcher(thread_db* tdbb, MemoryPool& pool, TextType* textType,
|
||||
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
|
||||
: PatternMatcher(pool, textType),
|
||||
buffer(pool)
|
||||
{
|
||||
CsConvert converter = INTL_convert_lookup(tdbb, CS_UTF8, textType->getCharSet()->getId());
|
||||
|
||||
UCharBuffer patternBuffer, escapeBuffer;
|
||||
|
||||
converter.convert(patternLen, patternStr, patternBuffer);
|
||||
|
||||
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
|
||||
UnicodeUtil::utf8Normalize(patternBuffer);
|
||||
|
||||
if (escapeStr)
|
||||
{
|
||||
converter.convert(escapeLen, escapeStr, escapeBuffer);
|
||||
|
||||
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
|
||||
UnicodeUtil::utf8Normalize(escapeBuffer);
|
||||
}
|
||||
|
||||
regex = FB_NEW_POOL(pool) SimilarToRegex(pool,
|
||||
(textType->getFlags() & TEXTTYPE_ATTR_CASE_INSENSITIVE),
|
||||
(const char*) patternBuffer.begin(), patternBuffer.getCount(),
|
||||
(escapeStr ? (const char*) escapeBuffer.begin() : nullptr), escapeBuffer.getCount());
|
||||
}
|
||||
|
||||
public:
|
||||
static Re2SimilarMatcher* create(thread_db* tdbb, MemoryPool& pool, TextType* textType,
|
||||
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
|
||||
{
|
||||
return FB_NEW_POOL(pool) Re2SimilarMatcher(tdbb, pool, textType, patternStr, patternLen, escapeStr, escapeLen);
|
||||
}
|
||||
|
||||
static bool evaluate(thread_db* tdbb, MemoryPool& pool, TextType* textType, const UCHAR* str, SLONG strLen,
|
||||
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
|
||||
{
|
||||
Re2SimilarMatcher matcher(tdbb, pool, textType, patternStr, patternLen, escapeStr, escapeLen);
|
||||
matcher.process(str, strLen);
|
||||
return matcher.result();
|
||||
}
|
||||
|
||||
public:
|
||||
virtual void reset()
|
||||
{
|
||||
buffer.shrink(0);
|
||||
}
|
||||
|
||||
virtual bool process(const UCHAR* data, SLONG dataLen)
|
||||
{
|
||||
const FB_SIZE_T pos = buffer.getCount();
|
||||
memcpy(buffer.getBuffer(pos + dataLen) + pos, data, dataLen);
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool result()
|
||||
{
|
||||
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
|
||||
UnicodeUtil::utf8Normalize(buffer);
|
||||
|
||||
return regex->matches((const char*) buffer.begin(), buffer.getCount());
|
||||
}
|
||||
|
||||
private:
|
||||
AutoPtr<SimilarToRegex> regex;
|
||||
UCharBuffer buffer;
|
||||
};
|
||||
|
||||
class Re2SubstringSimilarMatcher : public BaseSubstringSimilarMatcher
|
||||
{
|
||||
public:
|
||||
Re2SubstringSimilarMatcher(thread_db* tdbb, MemoryPool& pool, TextType* textType,
|
||||
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
|
||||
: BaseSubstringSimilarMatcher(pool, textType),
|
||||
buffer(pool),
|
||||
resultStart(0),
|
||||
resultLength(0)
|
||||
{
|
||||
CsConvert converter = INTL_convert_lookup(tdbb, textType->getCharSet()->getId(), CS_UTF8);
|
||||
|
||||
UCharBuffer patternBuffer, escapeBuffer;
|
||||
|
||||
converter.convert(patternLen, patternStr, patternBuffer);
|
||||
|
||||
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
|
||||
UnicodeUtil::utf8Normalize(patternBuffer);
|
||||
|
||||
if (escapeStr)
|
||||
{
|
||||
converter.convert(escapeLen, escapeStr, escapeBuffer);
|
||||
|
||||
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
|
||||
UnicodeUtil::utf8Normalize(escapeBuffer);
|
||||
}
|
||||
|
||||
regex = FB_NEW_POOL(pool) SubstringSimilarRegex(pool,
|
||||
(textType->getFlags() & TEXTTYPE_ATTR_CASE_INSENSITIVE),
|
||||
(const char*) patternBuffer.begin(), patternBuffer.getCount(),
|
||||
(escapeStr ? (const char*) escapeBuffer.begin() : nullptr), escapeBuffer.getCount());
|
||||
}
|
||||
|
||||
virtual ~Re2SubstringSimilarMatcher()
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
static Re2SubstringSimilarMatcher* create(thread_db* tdbb, MemoryPool& pool, TextType* textType,
|
||||
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
|
||||
{
|
||||
return FB_NEW_POOL(pool) Re2SubstringSimilarMatcher(tdbb, pool, textType,
|
||||
patternStr, patternLen, escapeStr, escapeLen);
|
||||
}
|
||||
|
||||
static bool evaluate(thread_db* tdbb, MemoryPool& pool, TextType* textType, const UCHAR* str, SLONG strLen,
|
||||
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
|
||||
{
|
||||
Re2SubstringSimilarMatcher matcher(tdbb, pool, textType, patternStr, patternLen, escapeStr, escapeLen);
|
||||
matcher.process(str, strLen);
|
||||
return matcher.result();
|
||||
}
|
||||
|
||||
public:
|
||||
virtual void reset()
|
||||
{
|
||||
buffer.shrink(0);
|
||||
resultStart = resultLength = 0;
|
||||
}
|
||||
|
||||
virtual bool process(const UCHAR* data, SLONG dataLen)
|
||||
{
|
||||
const FB_SIZE_T pos = buffer.getCount();
|
||||
memcpy(buffer.getBuffer(pos + dataLen) + pos, data, dataLen);
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool result()
|
||||
{
|
||||
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
|
||||
UnicodeUtil::utf8Normalize(buffer);
|
||||
|
||||
return regex->matches((const char*) buffer.begin(), buffer.getCount(), &resultStart, &resultLength);
|
||||
}
|
||||
|
||||
virtual void getResultInfo(unsigned* start, unsigned* length)
|
||||
{
|
||||
*start = resultStart;
|
||||
*length = resultLength;
|
||||
}
|
||||
|
||||
private:
|
||||
AutoPtr<SubstringSimilarRegex> regex;
|
||||
UCharBuffer buffer;
|
||||
unsigned resultStart, resultLength;
|
||||
};
|
||||
|
||||
// constants used in matches and sleuth
|
||||
const int CHAR_GDML_MATCH_ONE = TextType::CHAR_QUESTION_MARK;
|
||||
const int CHAR_GDML_MATCH_ANY = TextType::CHAR_ASTERISK;
|
||||
@ -725,8 +886,6 @@ template <
|
||||
typename pStartsMatcher,
|
||||
typename pContainsMatcher,
|
||||
typename pLikeMatcher,
|
||||
typename pSimilarToMatcher,
|
||||
typename pSubstringSimilarMatcher,
|
||||
typename pMatchesMatcher,
|
||||
typename pSleuthMatcher
|
||||
>
|
||||
@ -781,22 +940,22 @@ public:
|
||||
getCharSet()->getSqlMatchOne(), getCharSet()->getSqlMatchOneLength());
|
||||
}
|
||||
|
||||
virtual bool similarTo(MemoryPool& pool, const UCHAR* s, SLONG sl,
|
||||
virtual bool similarTo(thread_db* tdbb, MemoryPool& pool, const UCHAR* s, SLONG sl,
|
||||
const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen)
|
||||
{
|
||||
return pSimilarToMatcher::evaluate(pool, this, s, sl, p, pl, escape, escapeLen);
|
||||
return Re2SimilarMatcher::evaluate(tdbb, pool, this, s, sl, p, pl, escape, escapeLen);
|
||||
}
|
||||
|
||||
virtual PatternMatcher* createSimilarToMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl,
|
||||
virtual PatternMatcher* createSimilarToMatcher(thread_db* tdbb, MemoryPool& pool, const UCHAR* p, SLONG pl,
|
||||
const UCHAR* escape, SLONG escapeLen)
|
||||
{
|
||||
return pSimilarToMatcher::create(pool, this, p, pl, escape, escapeLen);
|
||||
return Re2SimilarMatcher::create(tdbb, pool, this, p, pl, escape, escapeLen);
|
||||
}
|
||||
|
||||
virtual BaseSubstringSimilarMatcher* createSubstringSimilarMatcher(MemoryPool& pool,
|
||||
virtual BaseSubstringSimilarMatcher* createSubstringSimilarMatcher(thread_db* tdbb, MemoryPool& pool,
|
||||
const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen)
|
||||
{
|
||||
return pSubstringSimilarMatcher::create(pool, this, p, pl, escape, escapeLen);
|
||||
return Re2SubstringSimilarMatcher::create(tdbb, pool, this, p, pl, escape, escapeLen);
|
||||
}
|
||||
|
||||
virtual bool contains(MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl)
|
||||
@ -823,8 +982,6 @@ Collation* newCollation(MemoryPool& pool, TTYPE_ID id, texttype* tt, CharSet* cs
|
||||
StartsMatcherUCharDirect,
|
||||
ContainsMatcherUCharDirect,
|
||||
LikeMatcher<T>,
|
||||
SimilarToMatcher<T>,
|
||||
SubstringSimilarMatcher<T>,
|
||||
MatchesMatcher<T>,
|
||||
SleuthMatcher<T>
|
||||
> DirectImpl;
|
||||
@ -833,8 +990,6 @@ Collation* newCollation(MemoryPool& pool, TTYPE_ID id, texttype* tt, CharSet* cs
|
||||
StartsMatcherUCharCanonical,
|
||||
ContainsMatcher<T>,
|
||||
LikeMatcher<T>,
|
||||
SimilarToMatcher<T>,
|
||||
SubstringSimilarMatcher<T>,
|
||||
MatchesMatcher<T>,
|
||||
SleuthMatcher<T>
|
||||
> NonDirectImpl;
|
||||
|
@ -66,12 +66,12 @@ public:
|
||||
virtual PatternMatcher* createLikeMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl,
|
||||
const UCHAR* escape, SLONG escapeLen) = 0;
|
||||
|
||||
virtual bool similarTo(MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl,
|
||||
virtual bool similarTo(thread_db* tdbb, MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl,
|
||||
const UCHAR* escape, SLONG escapeLen) = 0;
|
||||
virtual PatternMatcher* createSimilarToMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl,
|
||||
virtual PatternMatcher* createSimilarToMatcher(thread_db* tdbb, MemoryPool& pool, const UCHAR* p, SLONG pl,
|
||||
const UCHAR* escape, SLONG escapeLen) = 0;
|
||||
|
||||
virtual BaseSubstringSimilarMatcher* createSubstringSimilarMatcher(MemoryPool& pool,
|
||||
virtual BaseSubstringSimilarMatcher* createSubstringSimilarMatcher(thread_db* tdbb, MemoryPool& pool,
|
||||
const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen) = 0;
|
||||
|
||||
virtual bool contains(MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl) = 0;
|
||||
|
@ -654,6 +654,7 @@ bool IntlManager::lookupCollation(const string& collationName,
|
||||
attributes, specificAttributes, specificAttributesLen, ignoreAttributes,
|
||||
collationExternalInfo.configInfo.c_str()))
|
||||
{
|
||||
tt->texttype_flags = attributes;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -87,23 +87,12 @@ public:
|
||||
UpcaseConverter(MemoryPool& pool, TextType* obj, const UCHAR*& str, SLONG& len)
|
||||
: PrevConverter(pool, obj, str, len)
|
||||
{
|
||||
if (len > (int) sizeof(tempBuffer))
|
||||
out_str = FB_NEW_POOL(pool) UCHAR[len];
|
||||
else
|
||||
out_str = tempBuffer;
|
||||
obj->str_to_upper(len, str, len, out_str);
|
||||
str = out_str;
|
||||
}
|
||||
|
||||
~UpcaseConverter()
|
||||
{
|
||||
if (out_str != tempBuffer)
|
||||
delete[] out_str;
|
||||
obj->str_to_upper(len, str, len, tempBuffer.getBuffer(len, false));
|
||||
str = tempBuffer.begin();
|
||||
}
|
||||
|
||||
private:
|
||||
UCHAR tempBuffer[100];
|
||||
UCHAR* out_str;
|
||||
Firebird::UCharBuffer tempBuffer;
|
||||
};
|
||||
|
||||
template <typename PrevConverter = NullStrConverter>
|
||||
@ -115,29 +104,17 @@ public:
|
||||
{
|
||||
const SLONG out_len = len / obj->getCharSet()->minBytesPerChar() * obj->getCanonicalWidth();
|
||||
|
||||
if (out_len > (int) sizeof(tempBuffer))
|
||||
out_str = FB_NEW_POOL(pool) UCHAR[out_len];
|
||||
else
|
||||
out_str = tempBuffer;
|
||||
|
||||
if (str)
|
||||
{
|
||||
len = obj->canonical(len, str, out_len, out_str) * obj->getCanonicalWidth();
|
||||
str = out_str;
|
||||
len = obj->canonical(len, str, out_len, tempBuffer.getBuffer(out_len, false)) * obj->getCanonicalWidth();
|
||||
str = tempBuffer.begin();
|
||||
}
|
||||
else
|
||||
len = 0;
|
||||
}
|
||||
|
||||
~CanonicalConverter()
|
||||
{
|
||||
if (out_str != tempBuffer)
|
||||
delete[] out_str;
|
||||
}
|
||||
|
||||
private:
|
||||
UCHAR tempBuffer[100];
|
||||
UCHAR* out_str;
|
||||
Firebird::UCharBuffer tempBuffer;
|
||||
};
|
||||
|
||||
} // namespace Jrd
|
||||
|
@ -50,50 +50,23 @@ TableMatcher::TableMatcher(MemoryPool& pool,
|
||||
const string& excludeFilter)
|
||||
: m_tables(pool)
|
||||
{
|
||||
m_cs = FB_NEW_POOL(pool) charset;
|
||||
m_tt = FB_NEW_POOL(pool) texttype;
|
||||
|
||||
IntlUtil::initUtf8Charset(m_cs);
|
||||
|
||||
string collAttributes("ICU-VERSION=");
|
||||
collAttributes += Jrd::UnicodeUtil::getDefaultIcuVersion();
|
||||
IntlUtil::setupIcuAttributes(m_cs, collAttributes, "", collAttributes);
|
||||
|
||||
UCharBuffer collAttributesBuffer;
|
||||
collAttributesBuffer.push(reinterpret_cast<const UCHAR*>(collAttributes.c_str()),
|
||||
collAttributes.length());
|
||||
|
||||
if (!IntlUtil::initUnicodeCollation(m_tt, m_cs, "UNICODE", 0, collAttributesBuffer, ""))
|
||||
raiseError("Cannot initialize UNICODE collation");
|
||||
|
||||
m_charSet = CharSet::createInstance(pool, 0, m_cs);
|
||||
m_textType = FB_NEW_POOL(pool) TextType(0, m_tt, m_charSet);
|
||||
|
||||
if (includeFilter.hasData())
|
||||
{
|
||||
m_includeMatcher.reset(FB_NEW_POOL(pool) SimilarMatcher(
|
||||
pool, m_textType,
|
||||
(const UCHAR*) includeFilter.c_str(),
|
||||
includeFilter.length(),
|
||||
'\\', true));
|
||||
m_includeMatcher.reset(FB_NEW_POOL(pool) SimilarToRegex(
|
||||
pool, true,
|
||||
includeFilter.c_str(), includeFilter.length(),
|
||||
"\\", 1));
|
||||
}
|
||||
|
||||
if (excludeFilter.hasData())
|
||||
{
|
||||
m_excludeMatcher.reset(FB_NEW_POOL(pool) SimilarMatcher(
|
||||
pool, m_textType,
|
||||
(const UCHAR*) excludeFilter.c_str(),
|
||||
excludeFilter.length(),
|
||||
'\\', true));
|
||||
m_excludeMatcher.reset(FB_NEW_POOL(pool) SimilarToRegex(
|
||||
pool, true,
|
||||
excludeFilter.c_str(), excludeFilter.length(),
|
||||
"\\", 1));
|
||||
}
|
||||
}
|
||||
|
||||
TableMatcher::~TableMatcher()
|
||||
{
|
||||
if (m_tt && m_tt->texttype_fn_destroy)
|
||||
m_tt->texttype_fn_destroy(m_tt);
|
||||
}
|
||||
|
||||
bool TableMatcher::matchTable(const MetaName& tableName)
|
||||
{
|
||||
try
|
||||
@ -104,18 +77,10 @@ bool TableMatcher::matchTable(const MetaName& tableName)
|
||||
enabled = true;
|
||||
|
||||
if (m_includeMatcher)
|
||||
{
|
||||
m_includeMatcher->reset();
|
||||
m_includeMatcher->process((const UCHAR*) tableName.c_str(), tableName.length());
|
||||
enabled = m_includeMatcher->result();
|
||||
}
|
||||
enabled = m_includeMatcher->matches(tableName.c_str(), tableName.length());
|
||||
|
||||
if (enabled && m_excludeMatcher)
|
||||
{
|
||||
m_excludeMatcher->reset();
|
||||
m_excludeMatcher->process((const UCHAR*) tableName.c_str(), tableName.length());
|
||||
enabled = !m_excludeMatcher->result();
|
||||
}
|
||||
enabled = !m_excludeMatcher->matches(tableName.c_str(), tableName.length());
|
||||
|
||||
m_tables.put(tableName, enabled);
|
||||
}
|
||||
|
@ -26,9 +26,9 @@
|
||||
|
||||
#include "../common/classes/array.h"
|
||||
#include "../common/classes/semaphore.h"
|
||||
#include "../common/SimilarToRegex.h"
|
||||
#include "../common/os/guid.h"
|
||||
#include "../common/isc_s_proto.h"
|
||||
#include "../../jrd/SimilarToMatcher.h"
|
||||
#include "../../jrd/intl_classes.h"
|
||||
|
||||
#include "Config.h"
|
||||
@ -38,25 +38,18 @@ namespace Replication
|
||||
{
|
||||
class TableMatcher
|
||||
{
|
||||
typedef Jrd::UpcaseConverter<Jrd::NullStrConverter> SimilarConverter;
|
||||
typedef Firebird::SimilarToMatcher<UCHAR, SimilarConverter> SimilarMatcher;
|
||||
typedef Firebird::GenericMap<Firebird::Pair<Firebird::Left<Firebird::MetaName, bool> > > TablePermissionMap;
|
||||
|
||||
public:
|
||||
TableMatcher(MemoryPool& pool,
|
||||
const Firebird::string& includeFilter,
|
||||
const Firebird::string& excludeFilter);
|
||||
~TableMatcher();
|
||||
|
||||
bool matchTable(const Firebird::MetaName& tableName);
|
||||
|
||||
private:
|
||||
charset* m_cs;
|
||||
Firebird::AutoPtr<texttype> m_tt;
|
||||
Firebird::AutoPtr<Jrd::CharSet> m_charSet;
|
||||
Firebird::AutoPtr<Jrd::TextType> m_textType;
|
||||
Firebird::AutoPtr<SimilarMatcher> m_includeMatcher;
|
||||
Firebird::AutoPtr<SimilarMatcher> m_excludeMatcher;
|
||||
Firebird::AutoPtr<Firebird::SimilarToRegex> m_includeMatcher;
|
||||
Firebird::AutoPtr<Firebird::SimilarToRegex> m_excludeMatcher;
|
||||
TablePermissionMap m_tables;
|
||||
};
|
||||
|
||||
|
@ -570,7 +570,6 @@ VI. ADDITIONAL NOTES
|
||||
#include "../common/db_alias.h"
|
||||
#include "../jrd/intl_proto.h"
|
||||
#include "../jrd/lck_proto.h"
|
||||
#include "../jrd/Collation.h"
|
||||
|
||||
#ifdef DEBUG_VAL_VERBOSE
|
||||
#include "../jrd/dmp_proto.h"
|
||||
@ -592,18 +591,21 @@ static void print_rhd(USHORT, const rhd*);
|
||||
#endif
|
||||
|
||||
|
||||
static PatternMatcher* createPatternMatcher(thread_db* tdbb, const char* pattern)
|
||||
static SimilarToRegex* createPatternMatcher(thread_db* tdbb, const char* pattern)
|
||||
{
|
||||
PatternMatcher* matcher = NULL;
|
||||
SimilarToRegex* matcher = NULL;
|
||||
try
|
||||
{
|
||||
if (pattern)
|
||||
{
|
||||
const int len = strlen(pattern);
|
||||
|
||||
Collation* obj = INTL_texttype_lookup(tdbb, CS_UTF8);
|
||||
matcher = obj->createSimilarToMatcher(*tdbb->getDefaultPool(),
|
||||
(const UCHAR*) pattern, len, (UCHAR*) "\\", 1);
|
||||
//// TODO: Should this be different than trace and replication
|
||||
//// and use case sensitive matcher?
|
||||
matcher = FB_NEW_POOL(*tdbb->getDefaultPool()) SimilarToRegex(
|
||||
*tdbb->getDefaultPool(), false,
|
||||
pattern, len,
|
||||
"\\", 1);
|
||||
}
|
||||
}
|
||||
catch (const Exception& ex)
|
||||
@ -870,8 +872,6 @@ Validation::Validation(thread_db* tdbb, UtilSvc* uSvc) :
|
||||
vdr_page_bitmap = NULL;
|
||||
|
||||
vdr_service = uSvc;
|
||||
vdr_tab_incl = vdr_tab_excl = NULL;
|
||||
vdr_idx_incl = vdr_idx_excl = NULL;
|
||||
vdr_lock_tout = -10;
|
||||
|
||||
if (uSvc) {
|
||||
@ -882,11 +882,6 @@ Validation::Validation(thread_db* tdbb, UtilSvc* uSvc) :
|
||||
|
||||
Validation::~Validation()
|
||||
{
|
||||
delete vdr_tab_incl;
|
||||
delete vdr_tab_excl;
|
||||
delete vdr_idx_incl;
|
||||
delete vdr_idx_excl;
|
||||
|
||||
output("Validation finished\n");
|
||||
}
|
||||
|
||||
@ -1654,23 +1649,15 @@ void Validation::walk_database()
|
||||
|
||||
if (vdr_tab_incl)
|
||||
{
|
||||
vdr_tab_incl->reset();
|
||||
if (!vdr_tab_incl->process((UCHAR*) relation->rel_name.c_str(), relation->rel_name.length()) ||
|
||||
!vdr_tab_incl->result())
|
||||
{
|
||||
if (!vdr_tab_incl->matches(relation->rel_name.c_str(), relation->rel_name.length()))
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (vdr_tab_excl)
|
||||
{
|
||||
vdr_tab_excl->reset();
|
||||
if (!vdr_tab_excl->process((UCHAR*) relation->rel_name.c_str(), relation->rel_name.length()) ||
|
||||
vdr_tab_excl->result())
|
||||
{
|
||||
if (vdr_tab_excl->matches(relation->rel_name.c_str(), relation->rel_name.length()))
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// We can't realiable track double allocated page's when validating online.
|
||||
// All we can check is that page is not double allocated at the same relation.
|
||||
@ -3163,15 +3150,13 @@ Validation::RTN Validation::walk_root(jrd_rel* relation)
|
||||
|
||||
if (vdr_idx_incl)
|
||||
{
|
||||
vdr_idx_incl->reset();
|
||||
if (!vdr_idx_incl->process((UCHAR*) index.c_str(), index.length()) || !vdr_idx_incl->result())
|
||||
if (!vdr_idx_incl->matches(relation->rel_name.c_str(), relation->rel_name.length()))
|
||||
continue;
|
||||
}
|
||||
|
||||
if (vdr_idx_excl)
|
||||
{
|
||||
vdr_idx_excl->reset();
|
||||
if (!vdr_idx_excl->process((UCHAR*) index.c_str(), index.length()) || vdr_idx_excl->result())
|
||||
if (vdr_idx_excl->matches(relation->rel_name.c_str(), relation->rel_name.length()))
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "fb_types.h"
|
||||
|
||||
#include "../common/classes/array.h"
|
||||
#include "../common/SimilarToRegex.h"
|
||||
#include "../jrd/ods.h"
|
||||
#include "../jrd/cch.h"
|
||||
#include "../jrd/sbm.h"
|
||||
@ -150,10 +151,10 @@ private:
|
||||
ULONG vdr_err_counts[VAL_MAX_ERROR];
|
||||
|
||||
Firebird::UtilSvc* vdr_service;
|
||||
PatternMatcher* vdr_tab_incl;
|
||||
PatternMatcher* vdr_tab_excl;
|
||||
PatternMatcher* vdr_idx_incl;
|
||||
PatternMatcher* vdr_idx_excl;
|
||||
Firebird::AutoPtr<Firebird::SimilarToRegex> vdr_tab_incl;
|
||||
Firebird::AutoPtr<Firebird::SimilarToRegex> vdr_tab_excl;
|
||||
Firebird::AutoPtr<Firebird::SimilarToRegex> vdr_idx_incl;
|
||||
Firebird::AutoPtr<Firebird::SimilarToRegex> vdr_idx_excl;
|
||||
int vdr_lock_tout;
|
||||
void checkDPinPP(jrd_rel *relation, SLONG page_number);
|
||||
void checkDPinPIP(jrd_rel *relation, SLONG page_number);
|
||||
|
@ -46,7 +46,6 @@ set(fbtrace_src
|
||||
ntrace/TraceConfiguration.cpp
|
||||
ntrace/traceplugin.cpp
|
||||
ntrace/TracePluginImpl.cpp
|
||||
ntrace/TraceUnicodeUtils.cpp
|
||||
|
||||
ntrace/os/platform.h
|
||||
)
|
||||
|
@ -26,9 +26,7 @@
|
||||
*/
|
||||
|
||||
#include "TraceConfiguration.h"
|
||||
#include "TraceUnicodeUtils.h"
|
||||
#include "../../jrd/evl_string.h"
|
||||
#include "../../jrd/SimilarToMatcher.h"
|
||||
#include "../../common/SimilarToRegex.h"
|
||||
#include "../../common/isc_f_proto.h"
|
||||
|
||||
using namespace Firebird;
|
||||
@ -67,26 +65,6 @@ void TraceCfgReader::readTraceConfiguration(const char* text,
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename PrevConverter = Jrd::NullStrConverter>
|
||||
class SystemToUtf8Converter : public PrevConverter
|
||||
{
|
||||
public:
|
||||
SystemToUtf8Converter(MemoryPool& pool, Jrd::TextType* obj, const UCHAR*& str, SLONG& len)
|
||||
: PrevConverter(pool, obj, str, len)
|
||||
{
|
||||
buffer.assign(reinterpret_cast<const char*>(str), len);
|
||||
ISC_systemToUtf8(buffer);
|
||||
str = reinterpret_cast<const UCHAR*>(buffer.c_str());
|
||||
len = buffer.length();
|
||||
}
|
||||
|
||||
private:
|
||||
string buffer;
|
||||
};
|
||||
}
|
||||
|
||||
#define ERROR_PREFIX "error while parsing trace configuration\n\t"
|
||||
|
||||
void TraceCfgReader::readConfig()
|
||||
@ -156,31 +134,28 @@ void TraceCfgReader::readConfig()
|
||||
try
|
||||
{
|
||||
#ifdef WIN_NT // !CASE_SENSITIVITY
|
||||
typedef Jrd::UpcaseConverter<SystemToUtf8Converter<> > SimilarConverter;
|
||||
const bool caseInsensitive = true;
|
||||
#else
|
||||
typedef SystemToUtf8Converter<> SimilarConverter;
|
||||
const bool caseInsensitive = false;
|
||||
#endif
|
||||
string utf8Pattern = pattern;
|
||||
ISC_systemToUtf8(utf8Pattern);
|
||||
|
||||
UnicodeCollationHolder unicodeCollation(*getDefaultMemoryPool());
|
||||
Jrd::TextType* textType = unicodeCollation.getTextType();
|
||||
|
||||
SimilarToMatcher<ULONG, Jrd::CanonicalConverter<SimilarConverter> > matcher(
|
||||
*getDefaultMemoryPool(), textType, (const UCHAR*) pattern.c_str(),
|
||||
pattern.length(), '\\', true);
|
||||
SimilarToRegex matcher(*getDefaultMemoryPool(), caseInsensitive,
|
||||
utf8Pattern.c_str(), utf8Pattern.length(), "\\", 1);
|
||||
|
||||
regExpOk = true;
|
||||
|
||||
matcher.process((const UCHAR*) m_databaseName.c_str(), m_databaseName.length());
|
||||
if (matcher.result())
|
||||
{
|
||||
for (unsigned i = 0;
|
||||
i <= matcher.getNumBranches() && i < FB_NELEM(m_subpatterns); ++i)
|
||||
{
|
||||
unsigned start, length;
|
||||
matcher.getBranchInfo(i, &start, &length);
|
||||
PathName utf8DatabaseName = m_databaseName;
|
||||
ISC_systemToUtf8(utf8DatabaseName);
|
||||
Array<SimilarToRegex::MatchPos> matchPosArray;
|
||||
|
||||
m_subpatterns[i].start = start;
|
||||
m_subpatterns[i].end = start + length;
|
||||
if (matcher.matches(utf8DatabaseName.c_str(), utf8DatabaseName.length(), &matchPosArray))
|
||||
{
|
||||
for (unsigned i = 0; i < matchPosArray.getCount() && i < FB_NELEM(m_subpatterns); ++i)
|
||||
{
|
||||
m_subpatterns[i].start = matchPosArray[i].start;
|
||||
m_subpatterns[i].end = matchPosArray[i].start + matchPosArray[i].length;
|
||||
}
|
||||
|
||||
match = exactMatch = true;
|
||||
|
@ -99,7 +99,6 @@ TracePluginImpl::TracePluginImpl(IPluginBase* plugin,
|
||||
transactions(getDefaultMemoryPool()),
|
||||
statements(getDefaultMemoryPool()),
|
||||
services(getDefaultMemoryPool()),
|
||||
unicodeCollation(*getDefaultMemoryPool()),
|
||||
include_codes(*getDefaultMemoryPool()),
|
||||
exclude_codes(*getDefaultMemoryPool())
|
||||
{
|
||||
@ -124,8 +123,6 @@ TracePluginImpl::TracePluginImpl(IPluginBase* plugin,
|
||||
logWriter->addRef();
|
||||
}
|
||||
|
||||
Jrd::TextType* textType = unicodeCollation.getTextType();
|
||||
|
||||
// Compile filtering regular expressions
|
||||
const char* str = NULL;
|
||||
try
|
||||
@ -136,9 +133,10 @@ TracePluginImpl::TracePluginImpl(IPluginBase* plugin,
|
||||
string filter(config.include_filter);
|
||||
ISC_systemToUtf8(filter);
|
||||
|
||||
include_matcher = FB_NEW TraceSimilarToMatcher(
|
||||
*getDefaultMemoryPool(), textType, (const UCHAR*) filter.c_str(),
|
||||
filter.length(), '\\', true);
|
||||
include_matcher = FB_NEW SimilarToRegex(
|
||||
*getDefaultMemoryPool(), true,
|
||||
filter.c_str(), filter.length(),
|
||||
"\\", 1);
|
||||
}
|
||||
|
||||
if (config.exclude_filter.hasData())
|
||||
@ -147,9 +145,10 @@ TracePluginImpl::TracePluginImpl(IPluginBase* plugin,
|
||||
string filter(config.exclude_filter);
|
||||
ISC_systemToUtf8(filter);
|
||||
|
||||
exclude_matcher = FB_NEW TraceSimilarToMatcher(
|
||||
*getDefaultMemoryPool(), textType, (const UCHAR*) filter.c_str(),
|
||||
filter.length(), '\\', true);
|
||||
exclude_matcher = FB_NEW SimilarToRegex(
|
||||
*getDefaultMemoryPool(), true,
|
||||
filter.c_str(), filter.length(),
|
||||
"\\", 1);
|
||||
}
|
||||
}
|
||||
catch (const Exception&)
|
||||
@ -1546,18 +1545,10 @@ void TracePluginImpl::register_sql_statement(ITraceSQLStatement* statement)
|
||||
return;
|
||||
|
||||
if (config.include_filter.hasData())
|
||||
{
|
||||
include_matcher->reset();
|
||||
include_matcher->process((const UCHAR*) sql, sql_length);
|
||||
need_statement = include_matcher->result();
|
||||
}
|
||||
need_statement = include_matcher->matches(sql, sql_length);
|
||||
|
||||
if (need_statement && config.exclude_filter.hasData())
|
||||
{
|
||||
exclude_matcher->reset();
|
||||
exclude_matcher->process((const UCHAR*) sql, sql_length);
|
||||
need_statement = !exclude_matcher->result();
|
||||
}
|
||||
need_statement = !exclude_matcher->matches(sql, sql_length);
|
||||
|
||||
if (need_statement)
|
||||
{
|
||||
@ -1949,18 +1940,10 @@ bool TracePluginImpl::checkServiceFilter(ITraceServiceConnection* service, bool
|
||||
bool enabled = true;
|
||||
|
||||
if (config.include_filter.hasData())
|
||||
{
|
||||
include_matcher->reset();
|
||||
include_matcher->process((const UCHAR*) svcName, svcNameLen);
|
||||
enabled = include_matcher->result();
|
||||
}
|
||||
enabled = include_matcher->matches(svcName, svcNameLen);
|
||||
|
||||
if (enabled && config.exclude_filter.hasData())
|
||||
{
|
||||
exclude_matcher->reset();
|
||||
exclude_matcher->process((const UCHAR*) svcName, svcNameLen);
|
||||
enabled = !exclude_matcher->result();
|
||||
}
|
||||
enabled = !exclude_matcher->matches(svcName, svcNameLen);
|
||||
|
||||
if (data) {
|
||||
data->enabled = enabled;
|
||||
|
@ -32,11 +32,7 @@
|
||||
#include "firebird.h"
|
||||
#include "../../jrd/ntrace.h"
|
||||
#include "TracePluginConfig.h"
|
||||
#include "TraceUnicodeUtils.h"
|
||||
#include "../../jrd/intl_classes.h"
|
||||
#include "../../jrd/evl_string.h"
|
||||
#include "../../common/TextType.h"
|
||||
#include "../../jrd/SimilarToMatcher.h"
|
||||
#include "../../common/SimilarToRegex.h"
|
||||
#include "../../common/classes/rwlock.h"
|
||||
#include "../../common/classes/GenericMap.h"
|
||||
#include "../../common/classes/locks.h"
|
||||
@ -168,10 +164,7 @@ private:
|
||||
// Lock for log rotation
|
||||
Firebird::RWLock renameLock;
|
||||
|
||||
UnicodeCollationHolder unicodeCollation;
|
||||
typedef Firebird::SimilarToMatcher<ULONG, Jrd::UpcaseConverter<Jrd::CanonicalConverter<> > >
|
||||
TraceSimilarToMatcher;
|
||||
Firebird::AutoPtr<TraceSimilarToMatcher> include_matcher, exclude_matcher;
|
||||
Firebird::AutoPtr<Firebird::SimilarToRegex> include_matcher, exclude_matcher;
|
||||
|
||||
// Filters for gds error codes
|
||||
typedef Firebird::SortedArray<ISC_STATUS> GdsCodesArray;
|
||||
|
@ -1,65 +0,0 @@
|
||||
/*
|
||||
* PROGRAM: Firebird Trace Services
|
||||
* MODULE: TraceUnicodeUtils.cpp
|
||||
* DESCRIPTION: Unicode support for trace needs
|
||||
*
|
||||
* The contents of this file are subject to the Initial
|
||||
* Developer's Public License Version 1.0 (the "License");
|
||||
* you may not use this file except in compliance with the
|
||||
* License. You may obtain a copy of the License at
|
||||
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
|
||||
*
|
||||
* Software distributed under the License is distributed AS IS,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing rights
|
||||
* and limitations under the License.
|
||||
*
|
||||
* The Original Code was created by Khorsun Vladyslav
|
||||
* for the Firebird Open Source RDBMS project.
|
||||
*
|
||||
* Copyright (c) 2010 Khorsun Vladyslav <hvlad@users.sourceforge.net>
|
||||
* and all contributors signed below.
|
||||
*
|
||||
* All Rights Reserved.
|
||||
* Contributor(s): ______________________________________.
|
||||
* Adriano dos Santos Fernandes
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "TraceUnicodeUtils.h"
|
||||
|
||||
using namespace Firebird;
|
||||
|
||||
UnicodeCollationHolder::UnicodeCollationHolder(MemoryPool& pool)
|
||||
{
|
||||
cs = FB_NEW_POOL(pool) charset;
|
||||
tt = FB_NEW_POOL(pool) texttype;
|
||||
|
||||
IntlUtil::initUtf8Charset(cs);
|
||||
|
||||
string collAttributes("ICU-VERSION=");
|
||||
collAttributes += Jrd::UnicodeUtil::getDefaultIcuVersion();
|
||||
IntlUtil::setupIcuAttributes(cs, collAttributes, "", collAttributes);
|
||||
|
||||
UCharBuffer collAttributesBuffer;
|
||||
collAttributesBuffer.push(reinterpret_cast<const UCHAR*>(collAttributes.c_str()),
|
||||
collAttributes.length());
|
||||
|
||||
if (!IntlUtil::initUnicodeCollation(tt, cs, "UNICODE", 0, collAttributesBuffer, string()))
|
||||
fatal_exception::raiseFmt("cannot initialize UNICODE collation to use in trace plugin");
|
||||
|
||||
charSet = Jrd::CharSet::createInstance(pool, 0, cs);
|
||||
textType = FB_NEW_POOL(pool) Jrd::TextType(0, tt, charSet);
|
||||
}
|
||||
|
||||
UnicodeCollationHolder::~UnicodeCollationHolder()
|
||||
{
|
||||
fb_assert(tt->texttype_fn_destroy);
|
||||
|
||||
if (tt->texttype_fn_destroy)
|
||||
tt->texttype_fn_destroy(tt);
|
||||
|
||||
// cs should be deleted by texttype_fn_destroy call above
|
||||
delete tt;
|
||||
}
|
@ -1,57 +0,0 @@
|
||||
/*
|
||||
* PROGRAM: Firebird Trace Services
|
||||
* MODULE: TraceUnicodeUtils.h
|
||||
* DESCRIPTION: Unicode support for trace needs
|
||||
*
|
||||
* The contents of this file are subject to the Initial
|
||||
* Developer's Public License Version 1.0 (the "License");
|
||||
* you may not use this file except in compliance with the
|
||||
* License. You may obtain a copy of the License at
|
||||
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
|
||||
*
|
||||
* Software distributed under the License is distributed AS IS,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing rights
|
||||
* and limitations under the License.
|
||||
*
|
||||
* The Original Code was created by Khorsun Vladyslav
|
||||
* for the Firebird Open Source RDBMS project.
|
||||
*
|
||||
* Copyright (c) 2010 Khorsun Vladyslav <hvlad@users.sourceforge.net>
|
||||
* and all contributors signed below.
|
||||
*
|
||||
* All Rights Reserved.
|
||||
* Contributor(s): ______________________________________.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef TRACE_UNICODE_UTILS_H
|
||||
#define TRACE_UNICODE_UTILS_H
|
||||
|
||||
#include "firebird.h"
|
||||
#include "../../common/classes/fb_string.h"
|
||||
#include "../../jrd/intl_classes.h"
|
||||
#include "../../common/TextType.h"
|
||||
#include "../../common/unicode_util.h"
|
||||
|
||||
|
||||
class UnicodeCollationHolder
|
||||
{
|
||||
private:
|
||||
charset* cs;
|
||||
texttype* tt;
|
||||
Firebird::AutoPtr<Jrd::CharSet> charSet;
|
||||
Firebird::AutoPtr<Jrd::TextType> textType;
|
||||
|
||||
public:
|
||||
explicit UnicodeCollationHolder(Firebird::MemoryPool& pool);
|
||||
~UnicodeCollationHolder();
|
||||
|
||||
Jrd::TextType* getTextType()
|
||||
{
|
||||
return textType;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#endif // TRACE_UNICODE_UTILS_H
|
Loading…
Reference in New Issue
Block a user