mirror of
https://github.com/FirebirdSQL/firebird.git
synced 2025-01-22 22:03:03 +01:00
WIP: Reimplementation of SIMILAR TO and SUBSTRING...SIMILAR using Google's re2 library.
This should fix: CORE-4874 CORE-5664 CORE-3858 CORE-6088 CORE-3773 CORE-5931 CORE-6088 CORE-4893
This commit is contained in:
parent
14d41d409e
commit
28e18749ff
@ -307,7 +307,8 @@ endif
|
|||||||
LIB_PATH_OPTS = $(call LIB_LINK_RPATH,lib) $(call LIB_LINK_RPATH,intl)
|
LIB_PATH_OPTS = $(call LIB_LINK_RPATH,lib) $(call LIB_LINK_RPATH,intl)
|
||||||
LIB_LINK_SONAME= -Wl,-soname,$(1)
|
LIB_LINK_SONAME= -Wl,-soname,$(1)
|
||||||
LIB_LINK_MAPFILE= -Wl,--version-script,$(1)
|
LIB_LINK_MAPFILE= -Wl,--version-script,$(1)
|
||||||
FIREBIRD_LIBRARY_LINK= -L$(LIB) -lfbclient $(MATHLIB) $(CRYPTLIB)
|
# FIXME:
|
||||||
|
FIREBIRD_LIBRARY_LINK= -L$(LIB) -lfbclient $(MATHLIB) $(CRYPTLIB) -lre2
|
||||||
|
|
||||||
EXE_LINK_OPTIONS= $(LDFLAGS) $(THR_FLAGS) $(UNDEF_FLAGS) $(LIB_PATH_OPTS) $(LINK_EMPTY_SYMBOLS)
|
EXE_LINK_OPTIONS= $(LDFLAGS) $(THR_FLAGS) $(UNDEF_FLAGS) $(LIB_PATH_OPTS) $(LINK_EMPTY_SYMBOLS)
|
||||||
LIB_LINK_OPTIONS= $(LDFLAGS) $(THR_FLAGS) -shared
|
LIB_LINK_OPTIONS= $(LDFLAGS) $(THR_FLAGS) -shared
|
||||||
|
@ -90,6 +90,7 @@ Note:
|
|||||||
<left paren>, <right paren>, <vertical bar>, <circumflex>, <minus sign>, <plus sign>, <asterisk>,
|
<left paren>, <right paren>, <vertical bar>, <circumflex>, <minus sign>, <plus sign>, <asterisk>,
|
||||||
<underscore>, <percent>, <question mark>, <left brace> or <escape character>.
|
<underscore>, <percent>, <question mark>, <left brace> or <escape character>.
|
||||||
|
|
||||||
|
3) Since FB 4 the repeat factor low/high values could not be greater than 1000.
|
||||||
|
|
||||||
Syntax description and examples:
|
Syntax description and examples:
|
||||||
|
|
||||||
@ -220,3 +221,36 @@ insert into department values ('600', 'Engineering', '(408) 555-123'); -- check
|
|||||||
select * from department
|
select * from department
|
||||||
where phone not similar to '\([0-9]{3}\) 555\-%' escape '\';
|
where phone not similar to '\([0-9]{3}\) 555\-%' escape '\';
|
||||||
|
|
||||||
|
|
||||||
|
Appendice:
|
||||||
|
|
||||||
|
Since FB 4 SIMILAR TO and SUBSTRING...SIMILAR are implemented using the re2 library,
|
||||||
|
which has the following license:
|
||||||
|
|
||||||
|
Copyright (c) 2009 The RE2 Authors. All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
@ -2542,15 +2542,11 @@ void BurpGlobals::setupSkipData(const Firebird::string& regexp)
|
|||||||
ISC_systemToUtf8(filter);
|
ISC_systemToUtf8(filter);
|
||||||
|
|
||||||
BurpGlobals* tdgbl = BurpGlobals::getSpecific();
|
BurpGlobals* tdgbl = BurpGlobals::getSpecific();
|
||||||
if (!unicodeCollation)
|
|
||||||
unicodeCollation = FB_NEW_POOL(tdgbl->getPool()) UnicodeCollationHolder(tdgbl->getPool());
|
|
||||||
|
|
||||||
Jrd::TextType* const textType = unicodeCollation->getTextType();
|
skipDataMatcher.reset(FB_NEW_POOL(tdgbl->getPool()) Firebird::SimilarToRegex(
|
||||||
|
tdgbl->getPool(), true,
|
||||||
skipDataMatcher.reset(FB_NEW_POOL(tdgbl->getPool())
|
filter.c_str(), filter.length(),
|
||||||
Firebird::SimilarToMatcher<UCHAR, Jrd::UpcaseConverter<> >
|
"\\", 1));
|
||||||
(tdgbl->getPool(), textType, (const UCHAR*) filter.c_str(),
|
|
||||||
filter.length(), '\\', true));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (const Firebird::Exception&)
|
catch (const Firebird::Exception&)
|
||||||
@ -2571,18 +2567,12 @@ Firebird::string BurpGlobals::toSystem(const Firebird::PathName& from)
|
|||||||
bool BurpGlobals::skipRelation(const char* name)
|
bool BurpGlobals::skipRelation(const char* name)
|
||||||
{
|
{
|
||||||
if (gbl_sw_meta)
|
if (gbl_sw_meta)
|
||||||
{
|
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
|
|
||||||
if (!skipDataMatcher)
|
if (!skipDataMatcher)
|
||||||
{
|
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
|
|
||||||
skipDataMatcher->reset();
|
return skipDataMatcher->matches(name, strlen(name));
|
||||||
skipDataMatcher->process(reinterpret_cast<const UCHAR*>(name), static_cast<SLONG>(strlen(name)));
|
|
||||||
return skipDataMatcher->result();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void BurpGlobals::read_stats(SINT64* stats)
|
void BurpGlobals::read_stats(SINT64* stats)
|
||||||
@ -2703,39 +2693,6 @@ void BurpGlobals::print_stats_header()
|
|||||||
burp_output(false, "\n");
|
burp_output(false, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
UnicodeCollationHolder::UnicodeCollationHolder(MemoryPool& pool)
|
|
||||||
{
|
|
||||||
cs = FB_NEW_POOL(pool) charset;
|
|
||||||
tt = FB_NEW_POOL(pool) texttype;
|
|
||||||
|
|
||||||
Firebird::IntlUtil::initUtf8Charset(cs);
|
|
||||||
|
|
||||||
Firebird::string collAttributes("ICU-VERSION=");
|
|
||||||
collAttributes += Jrd::UnicodeUtil::getDefaultIcuVersion();
|
|
||||||
Firebird::IntlUtil::setupIcuAttributes(cs, collAttributes, "", collAttributes);
|
|
||||||
|
|
||||||
Firebird::UCharBuffer collAttributesBuffer;
|
|
||||||
collAttributesBuffer.push(reinterpret_cast<const UCHAR*>(collAttributes.c_str()),
|
|
||||||
collAttributes.length());
|
|
||||||
|
|
||||||
if (!Firebird::IntlUtil::initUnicodeCollation(tt, cs, "UNICODE", 0, collAttributesBuffer, Firebird::string()))
|
|
||||||
Firebird::fatal_exception::raiseFmt("cannot initialize UNICODE collation to use in gbak");
|
|
||||||
|
|
||||||
charSet = Jrd::CharSet::createInstance(pool, 0, cs);
|
|
||||||
textType = FB_NEW_POOL(pool) Jrd::TextType(0, tt, charSet);
|
|
||||||
}
|
|
||||||
|
|
||||||
UnicodeCollationHolder::~UnicodeCollationHolder()
|
|
||||||
{
|
|
||||||
fb_assert(tt->texttype_fn_destroy);
|
|
||||||
|
|
||||||
if (tt->texttype_fn_destroy)
|
|
||||||
tt->texttype_fn_destroy(tt);
|
|
||||||
|
|
||||||
// cs should be deleted by texttype_fn_destroy call above
|
|
||||||
delete tt;
|
|
||||||
}
|
|
||||||
|
|
||||||
void BURP_makeSymbol(BurpGlobals* tdgbl, Firebird::string& name) // add double quotes to string
|
void BURP_makeSymbol(BurpGlobals* tdgbl, Firebird::string& name) // add double quotes to string
|
||||||
{
|
{
|
||||||
if (tdgbl->gbl_dialect < SQL_DIALECT_V6)
|
if (tdgbl->gbl_dialect < SQL_DIALECT_V6)
|
||||||
|
@ -42,7 +42,7 @@
|
|||||||
#include "../common/classes/array.h"
|
#include "../common/classes/array.h"
|
||||||
#include "../common/classes/fb_pair.h"
|
#include "../common/classes/fb_pair.h"
|
||||||
#include "../common/classes/MetaName.h"
|
#include "../common/classes/MetaName.h"
|
||||||
#include "../jrd/SimilarToMatcher.h"
|
#include "../common/SimilarToRegex.h"
|
||||||
#include "../common/status.h"
|
#include "../common/status.h"
|
||||||
#include "../common/sha.h"
|
#include "../common/sha.h"
|
||||||
#include "../common/classes/ImplementHelper.h"
|
#include "../common/classes/ImplementHelper.h"
|
||||||
@ -894,26 +894,6 @@ static const char HDR_SPLIT_TAG6[] = "InterBase/gbak, ";
|
|||||||
const FB_UINT64 MIN_SPLIT_SIZE = FB_CONST64(2048); // bytes
|
const FB_UINT64 MIN_SPLIT_SIZE = FB_CONST64(2048); // bytes
|
||||||
|
|
||||||
|
|
||||||
// Copy&paste from TraceUnicodeUtils.h - fixme !!!!!!!!
|
|
||||||
class UnicodeCollationHolder
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
charset* cs;
|
|
||||||
texttype* tt;
|
|
||||||
Firebird::AutoPtr<Jrd::CharSet> charSet;
|
|
||||||
Firebird::AutoPtr<Jrd::TextType> textType;
|
|
||||||
|
|
||||||
public:
|
|
||||||
explicit UnicodeCollationHolder(Firebird::MemoryPool& pool);
|
|
||||||
~UnicodeCollationHolder();
|
|
||||||
|
|
||||||
Jrd::TextType* getTextType()
|
|
||||||
{
|
|
||||||
return textType;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
// Global switches and data
|
// Global switches and data
|
||||||
|
|
||||||
struct BurpCrypt;
|
struct BurpCrypt;
|
||||||
@ -1174,8 +1154,7 @@ public:
|
|||||||
bool flag_on_line; // indicates whether we will bring the database on-line
|
bool flag_on_line; // indicates whether we will bring the database on-line
|
||||||
bool firstMap; // this is the first time we entered get_mapping()
|
bool firstMap; // this is the first time we entered get_mapping()
|
||||||
bool stdIoMode; // stdin or stdout is used as backup file
|
bool stdIoMode; // stdin or stdout is used as backup file
|
||||||
Firebird::AutoPtr<UnicodeCollationHolder> unicodeCollation;
|
Firebird::AutoPtr<Firebird::SimilarToRegex> skipDataMatcher;
|
||||||
Firebird::AutoPtr<Firebird::SimilarToMatcher<UCHAR, Jrd::UpcaseConverter<> > > skipDataMatcher;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Firebird::string toSystem(const Firebird::PathName& from);
|
Firebird::string toSystem(const Firebird::PathName& from);
|
||||||
|
821
src/common/SimilarToRegex.cpp
Normal file
821
src/common/SimilarToRegex.cpp
Normal file
@ -0,0 +1,821 @@
|
|||||||
|
/*
|
||||||
|
* The contents of this file are subject to the Initial
|
||||||
|
* Developer's Public License Version 1.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the
|
||||||
|
* License. You may obtain a copy of the License at
|
||||||
|
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed AS IS,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing rights
|
||||||
|
* and limitations under the License.
|
||||||
|
*
|
||||||
|
* The Original Code was created by Adriano dos Santos Fernandes
|
||||||
|
* for the Firebird Open Source RDBMS project.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2019 Adriano dos Santos Fernandes <adrianosf at gmail.com>
|
||||||
|
* and all contributors signed below.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "firebird.h"
|
||||||
|
#include "../common/SimilarToRegex.h"
|
||||||
|
#include "../common/StatusArg.h"
|
||||||
|
#include <unicode/utf8.h>
|
||||||
|
|
||||||
|
using namespace Firebird;
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
static const unsigned FLAG_PREFER_FEWER = 0x01;
|
||||||
|
static const unsigned FLAG_CASE_INSENSITIVE = 0x02;
|
||||||
|
static const unsigned FLAG_GROUP_CAPTURE = 0x04;
|
||||||
|
|
||||||
|
//// TODO: Verify usage of U8_NEXT_UNSAFE.
|
||||||
|
class SimilarToCompiler
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SimilarToCompiler(MemoryPool& pool, AutoPtr<RE2>& regexp, unsigned aFlags,
|
||||||
|
const char* aPatternStr, unsigned aPatternLen,
|
||||||
|
const char* escapeStr, unsigned escapeLen)
|
||||||
|
: re2PatternStr(pool),
|
||||||
|
patternStr(aPatternStr),
|
||||||
|
patternPos(0),
|
||||||
|
patternLen(aPatternLen),
|
||||||
|
flags(aFlags),
|
||||||
|
useEscape(escapeStr != nullptr)
|
||||||
|
{
|
||||||
|
if (escapeStr)
|
||||||
|
{
|
||||||
|
int32_t escapePos = 0;
|
||||||
|
U8_NEXT_UNSAFE(escapeStr, escapePos, escapeChar);
|
||||||
|
|
||||||
|
if (escapePos != escapeLen)
|
||||||
|
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (flags & FLAG_CASE_INSENSITIVE)
|
||||||
|
re2PatternStr.append("(?i)");
|
||||||
|
|
||||||
|
if (flags & FLAG_GROUP_CAPTURE)
|
||||||
|
re2PatternStr.append("(");
|
||||||
|
|
||||||
|
int parseFlags;
|
||||||
|
parseExpr(&parseFlags);
|
||||||
|
|
||||||
|
if (flags & FLAG_GROUP_CAPTURE)
|
||||||
|
re2PatternStr.append(")");
|
||||||
|
|
||||||
|
// Check for proper termination.
|
||||||
|
if (patternPos < patternLen)
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
|
||||||
|
RE2::Options options;
|
||||||
|
options.set_log_errors(false);
|
||||||
|
options.set_dot_nl(true);
|
||||||
|
|
||||||
|
re2::StringPiece sp((const char*) re2PatternStr.c_str(), re2PatternStr.length());
|
||||||
|
regexp = FB_NEW_POOL(pool) RE2(sp, options);
|
||||||
|
|
||||||
|
if (!regexp->ok())
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool hasChar()
|
||||||
|
{
|
||||||
|
return patternPos < patternLen;
|
||||||
|
}
|
||||||
|
|
||||||
|
UChar32 getChar()
|
||||||
|
{
|
||||||
|
fb_assert(hasChar());
|
||||||
|
UChar32 c;
|
||||||
|
U8_NEXT_UNSAFE(patternStr, patternPos, c);
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
UChar32 peekChar()
|
||||||
|
{
|
||||||
|
auto savePos = patternPos;
|
||||||
|
auto c = getChar();
|
||||||
|
patternPos = savePos;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isRep(UChar32 c) const
|
||||||
|
{
|
||||||
|
return c == '*' || c == '+' || c == '?' || c == '{';
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isSpecial(UChar32 c)
|
||||||
|
{
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case '^':
|
||||||
|
case '-':
|
||||||
|
case '_':
|
||||||
|
case '%':
|
||||||
|
case '[':
|
||||||
|
case ']':
|
||||||
|
case '(':
|
||||||
|
case ')':
|
||||||
|
case '{':
|
||||||
|
case '}':
|
||||||
|
case '|':
|
||||||
|
case '?':
|
||||||
|
case '+':
|
||||||
|
case '*':
|
||||||
|
return true;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isRe2Special(UChar32 c)
|
||||||
|
{
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case '\\':
|
||||||
|
case '$':
|
||||||
|
case '.':
|
||||||
|
case '^':
|
||||||
|
case '-':
|
||||||
|
case '_':
|
||||||
|
case '[':
|
||||||
|
case ']':
|
||||||
|
case '(':
|
||||||
|
case ')':
|
||||||
|
case '{':
|
||||||
|
case '}':
|
||||||
|
case '|':
|
||||||
|
case '?':
|
||||||
|
case '+':
|
||||||
|
case '*':
|
||||||
|
return true;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void parseExpr(int* parseFlagOut)
|
||||||
|
{
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
int parseFlags;
|
||||||
|
parseTerm(&parseFlags);
|
||||||
|
*parseFlagOut &= ~(~parseFlags & PARSE_FLAG_NOT_EMPTY);
|
||||||
|
*parseFlagOut |= parseFlags;
|
||||||
|
|
||||||
|
auto savePos = patternPos;
|
||||||
|
UChar32 c;
|
||||||
|
|
||||||
|
if (!hasChar() || (c = getChar()) != '|')
|
||||||
|
{
|
||||||
|
patternPos = savePos;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
re2PatternStr.append("|");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void parseTerm(int* parseFlagOut)
|
||||||
|
{
|
||||||
|
*parseFlagOut = 0;
|
||||||
|
|
||||||
|
bool first = true;
|
||||||
|
|
||||||
|
while (hasChar())
|
||||||
|
{
|
||||||
|
auto c = peekChar();
|
||||||
|
|
||||||
|
if (c != '|' && c != ')')
|
||||||
|
{
|
||||||
|
int parseFlags;
|
||||||
|
parseFactor(&parseFlags);
|
||||||
|
|
||||||
|
*parseFlagOut |= parseFlags & PARSE_FLAG_NOT_EMPTY;
|
||||||
|
|
||||||
|
if (first)
|
||||||
|
{
|
||||||
|
*parseFlagOut |= parseFlags;
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void parseFactor(int* parseFlagOut)
|
||||||
|
{
|
||||||
|
int parseFlags;
|
||||||
|
parsePrimary(&parseFlags);
|
||||||
|
|
||||||
|
UChar32 op;
|
||||||
|
|
||||||
|
if (!hasChar() || !isRep((op = peekChar())))
|
||||||
|
{
|
||||||
|
*parseFlagOut = parseFlags;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(parseFlags & PARSE_FLAG_NOT_EMPTY) && op != '?')
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
|
||||||
|
fb_assert(op == '*' || op == '+' || op == '?' || op == '{');
|
||||||
|
|
||||||
|
if (op == '*')
|
||||||
|
{
|
||||||
|
re2PatternStr.append((flags & FLAG_PREFER_FEWER) ? "*?" : "*");
|
||||||
|
*parseFlagOut = 0;
|
||||||
|
++patternPos;
|
||||||
|
}
|
||||||
|
else if (op == '+')
|
||||||
|
{
|
||||||
|
re2PatternStr.append((flags & FLAG_PREFER_FEWER) ? "+?" : "+");
|
||||||
|
*parseFlagOut = PARSE_FLAG_NOT_EMPTY;
|
||||||
|
++patternPos;
|
||||||
|
}
|
||||||
|
else if (op == '?')
|
||||||
|
{
|
||||||
|
re2PatternStr.append((flags & FLAG_PREFER_FEWER) ? "??" : "?");
|
||||||
|
*parseFlagOut = 0;
|
||||||
|
++patternPos;
|
||||||
|
}
|
||||||
|
else if (op == '{')
|
||||||
|
{
|
||||||
|
const auto repeatStart = patternPos++;
|
||||||
|
|
||||||
|
bool comma = false;
|
||||||
|
string s1, s2;
|
||||||
|
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
if (!hasChar())
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
|
||||||
|
UChar32 c = getChar();
|
||||||
|
|
||||||
|
if (c == '}')
|
||||||
|
{
|
||||||
|
if (s1.isEmpty())
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (c == ',')
|
||||||
|
{
|
||||||
|
if (comma)
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
comma = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (c >= '0' && c <= '9')
|
||||||
|
{
|
||||||
|
if (comma)
|
||||||
|
s2 += (char) c;
|
||||||
|
else
|
||||||
|
s1 += (char) c;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const int n1 = atoi(s1.c_str());
|
||||||
|
*parseFlagOut = n1 == 0 ? 0 : PARSE_FLAG_NOT_EMPTY;
|
||||||
|
|
||||||
|
re2PatternStr.append(patternStr + repeatStart, patternStr + patternPos);
|
||||||
|
|
||||||
|
if (flags & FLAG_PREFER_FEWER)
|
||||||
|
re2PatternStr.append("?");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasChar() && isRep(peekChar()))
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
}
|
||||||
|
|
||||||
|
void parsePrimary(int* parseFlagOut)
|
||||||
|
{
|
||||||
|
*parseFlagOut = 0;
|
||||||
|
|
||||||
|
fb_assert(hasChar());
|
||||||
|
auto savePos = patternPos;
|
||||||
|
auto op = getChar();
|
||||||
|
|
||||||
|
if (op == '_')
|
||||||
|
{
|
||||||
|
*parseFlagOut |= PARSE_FLAG_NOT_EMPTY;
|
||||||
|
re2PatternStr.append(".");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else if (op == '%')
|
||||||
|
{
|
||||||
|
re2PatternStr.append((flags & FLAG_PREFER_FEWER) ? ".*?" : ".*");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else if (op == '[')
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
const char* similarClass;
|
||||||
|
const char* re2ClassInclude;
|
||||||
|
const char* re2ClassExclude;
|
||||||
|
} static const classes[] =
|
||||||
|
{
|
||||||
|
{"alnum", "[:alnum:]", "[:^alnum:]"},
|
||||||
|
{"alpha", "[:alpha:]", "[:^alpha:]"},
|
||||||
|
{"digit", "[:digit:]", "[:^digit:]"},
|
||||||
|
{"lower", "[:lower:]", "[:^lower:]"},
|
||||||
|
{"space", " ", "\\x00-\\x1F\\x21-\\x{10FFFF}"},
|
||||||
|
{"upper", "[:upper:]", "[:^upper:]"},
|
||||||
|
{"whitespace", "[:space:]", "[:^space:]"}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Item
|
||||||
|
{
|
||||||
|
int clazz;
|
||||||
|
unsigned firstStart, firstEnd, lastStart, lastEnd;
|
||||||
|
};
|
||||||
|
Array<Item> items;
|
||||||
|
unsigned includeCount = 0;
|
||||||
|
bool exclude = false;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if (!hasChar())
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
|
||||||
|
unsigned charSavePos = patternPos;
|
||||||
|
UChar32 c = getChar();
|
||||||
|
bool range = false;
|
||||||
|
bool charClass = false;
|
||||||
|
|
||||||
|
if (useEscape && c == escapeChar)
|
||||||
|
{
|
||||||
|
if (!hasChar())
|
||||||
|
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||||
|
|
||||||
|
charSavePos = patternPos;
|
||||||
|
c = getChar();
|
||||||
|
|
||||||
|
if (!(c == escapeChar || isSpecial(c)))
|
||||||
|
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (c == '[')
|
||||||
|
charClass = true;
|
||||||
|
else if (c == '^')
|
||||||
|
{
|
||||||
|
if (exclude)
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
|
||||||
|
exclude = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Item item;
|
||||||
|
|
||||||
|
if (!exclude)
|
||||||
|
++includeCount;
|
||||||
|
|
||||||
|
if (charClass)
|
||||||
|
{
|
||||||
|
if (!hasChar() || getChar() != ':')
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
|
||||||
|
charSavePos = patternPos;
|
||||||
|
|
||||||
|
while (hasChar() && getChar() != ':')
|
||||||
|
;
|
||||||
|
|
||||||
|
const SLONG len = patternPos - charSavePos - 1;
|
||||||
|
|
||||||
|
if (!hasChar() || getChar() != ']')
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
|
||||||
|
for (item.clazz = 0; item.clazz < FB_NELEM(classes); ++item.clazz)
|
||||||
|
{
|
||||||
|
if (fb_utils::strnicmp(patternStr + charSavePos,
|
||||||
|
classes[item.clazz].similarClass, len) == 0)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (item.clazz >= FB_NELEM(classes))
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
item.clazz = -1;
|
||||||
|
|
||||||
|
item.firstStart = item.lastStart = charSavePos;
|
||||||
|
item.firstEnd = item.lastEnd = patternPos;
|
||||||
|
|
||||||
|
if (hasChar() && peekChar() == '-')
|
||||||
|
{
|
||||||
|
getChar();
|
||||||
|
|
||||||
|
charSavePos = patternPos;
|
||||||
|
c = getChar();
|
||||||
|
|
||||||
|
if (useEscape && c == escapeChar)
|
||||||
|
{
|
||||||
|
if (!hasChar())
|
||||||
|
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||||
|
|
||||||
|
charSavePos = patternPos;
|
||||||
|
c = getChar();
|
||||||
|
|
||||||
|
if (!(c == escapeChar || isSpecial(c)))
|
||||||
|
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||||
|
}
|
||||||
|
|
||||||
|
item.lastStart = charSavePos;
|
||||||
|
item.lastEnd = patternPos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
items.add(item);
|
||||||
|
|
||||||
|
if (!hasChar())
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
} while (peekChar() != ']');
|
||||||
|
|
||||||
|
auto appendItem = [&](const Item& item, bool negated) {
|
||||||
|
if (item.clazz != -1)
|
||||||
|
{
|
||||||
|
re2PatternStr.append(negated ?
|
||||||
|
classes[item.clazz].re2ClassExclude :
|
||||||
|
classes[item.clazz].re2ClassInclude);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (negated)
|
||||||
|
{
|
||||||
|
UChar32 c;
|
||||||
|
char hex[20];
|
||||||
|
|
||||||
|
int32_t cPos = item.firstStart;
|
||||||
|
U8_NEXT_UNSAFE(patternStr, cPos, c);
|
||||||
|
|
||||||
|
if (c > 0)
|
||||||
|
{
|
||||||
|
re2PatternStr.append("\\x00");
|
||||||
|
re2PatternStr.append("-");
|
||||||
|
|
||||||
|
sprintf(hex, "\\x{%X}", (int) c - 1);
|
||||||
|
re2PatternStr.append(hex);
|
||||||
|
}
|
||||||
|
|
||||||
|
cPos = item.lastStart;
|
||||||
|
U8_NEXT_UNSAFE(patternStr, cPos, c);
|
||||||
|
|
||||||
|
if (c < 0x10FFFF)
|
||||||
|
{
|
||||||
|
sprintf(hex, "\\x{%X}", (int) c + 1);
|
||||||
|
re2PatternStr.append(hex);
|
||||||
|
re2PatternStr.append("-");
|
||||||
|
re2PatternStr.append("\\x{10FFFF}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (isRe2Special(patternStr[item.firstStart]))
|
||||||
|
re2PatternStr.append("\\");
|
||||||
|
|
||||||
|
re2PatternStr.append(patternStr + item.firstStart, patternStr + item.firstEnd);
|
||||||
|
|
||||||
|
if (item.lastStart != item.firstStart)
|
||||||
|
{
|
||||||
|
re2PatternStr.append("-");
|
||||||
|
|
||||||
|
if (isRe2Special(patternStr[item.lastStart]))
|
||||||
|
re2PatternStr.append("\\");
|
||||||
|
|
||||||
|
re2PatternStr.append(patternStr + item.lastStart, patternStr + item.lastEnd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (exclude && includeCount > 1)
|
||||||
|
{
|
||||||
|
re2PatternStr.append("(?:");
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < includeCount; ++i)
|
||||||
|
{
|
||||||
|
if (i != 0)
|
||||||
|
re2PatternStr.append("|");
|
||||||
|
|
||||||
|
re2PatternStr.append("[");
|
||||||
|
re2PatternStr.append("^");
|
||||||
|
appendItem(items[i], true);
|
||||||
|
|
||||||
|
for (unsigned j = includeCount; j < items.getCount(); ++j)
|
||||||
|
appendItem(items[j], false);
|
||||||
|
|
||||||
|
re2PatternStr.append("]");
|
||||||
|
}
|
||||||
|
|
||||||
|
re2PatternStr.append(")");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
re2PatternStr.append("[");
|
||||||
|
|
||||||
|
if (exclude)
|
||||||
|
re2PatternStr.append("^");
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < items.getCount(); ++i)
|
||||||
|
appendItem(items[i], exclude && i < includeCount);
|
||||||
|
|
||||||
|
re2PatternStr.append("]");
|
||||||
|
}
|
||||||
|
|
||||||
|
getChar();
|
||||||
|
*parseFlagOut |= PARSE_FLAG_NOT_EMPTY;
|
||||||
|
}
|
||||||
|
else if (op == '(')
|
||||||
|
{
|
||||||
|
re2PatternStr.append(flags & FLAG_GROUP_CAPTURE ? "(" : "(?:");
|
||||||
|
|
||||||
|
int parseFlags;
|
||||||
|
parseExpr(&parseFlags);
|
||||||
|
|
||||||
|
if (!hasChar() || getChar() != ')')
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
|
||||||
|
re2PatternStr.append(")");
|
||||||
|
|
||||||
|
*parseFlagOut |= parseFlags & PARSE_FLAG_NOT_EMPTY;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
patternPos = savePos;
|
||||||
|
|
||||||
|
bool controlChar = false;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
auto charSavePos = patternPos;
|
||||||
|
op = getChar();
|
||||||
|
|
||||||
|
if (useEscape && op == escapeChar)
|
||||||
|
{
|
||||||
|
charSavePos = patternPos;
|
||||||
|
op = getChar();
|
||||||
|
|
||||||
|
if (!isSpecial(op) && op != escapeChar)
|
||||||
|
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (isSpecial(op))
|
||||||
|
{
|
||||||
|
controlChar = true;
|
||||||
|
patternPos = charSavePos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!controlChar)
|
||||||
|
{
|
||||||
|
if (isRe2Special(op))
|
||||||
|
re2PatternStr.append("\\");
|
||||||
|
|
||||||
|
re2PatternStr.append(patternStr + charSavePos, patternStr + patternPos);
|
||||||
|
}
|
||||||
|
} while (!controlChar && hasChar());
|
||||||
|
|
||||||
|
if (patternPos == savePos)
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
|
||||||
|
*parseFlagOut |= PARSE_FLAG_NOT_EMPTY;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const string& getRe2PatternStr() const
|
||||||
|
{
|
||||||
|
return re2PatternStr;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static const int PARSE_FLAG_NOT_EMPTY = 1; // known never to match empty string
|
||||||
|
|
||||||
|
string re2PatternStr;
|
||||||
|
const char* patternStr;
|
||||||
|
int32_t patternPos;
|
||||||
|
int32_t patternLen;
|
||||||
|
UChar32 escapeChar;
|
||||||
|
unsigned flags;
|
||||||
|
bool useEscape;
|
||||||
|
};
|
||||||
|
|
||||||
|
class SubstringSimilarCompiler
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SubstringSimilarCompiler(MemoryPool& pool, AutoPtr<RE2>& regexp, unsigned flags,
|
||||||
|
const char* aPatternStr, unsigned aPatternLen,
|
||||||
|
const char* escapeStr, unsigned escapeLen)
|
||||||
|
: patternStr(aPatternStr),
|
||||||
|
patternPos(0),
|
||||||
|
patternLen(aPatternLen)
|
||||||
|
{
|
||||||
|
int32_t escapePos = 0;
|
||||||
|
U8_NEXT_UNSAFE(escapeStr, escapePos, escapeChar);
|
||||||
|
|
||||||
|
if (escapePos != escapeLen)
|
||||||
|
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||||
|
|
||||||
|
unsigned positions[2];
|
||||||
|
unsigned part = 0;
|
||||||
|
|
||||||
|
while (hasChar())
|
||||||
|
{
|
||||||
|
auto c = getChar();
|
||||||
|
|
||||||
|
if (c != escapeChar)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!hasChar())
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
|
||||||
|
c = getChar();
|
||||||
|
|
||||||
|
if (c == '"')
|
||||||
|
{
|
||||||
|
if (part >= 2)
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
|
||||||
|
positions[part++] = patternPos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (part != 2)
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
|
||||||
|
AutoPtr<RE2> regexp1, regexp2, regexp3;
|
||||||
|
|
||||||
|
SimilarToCompiler compiler1(pool, regexp1, (flags & FLAG_CASE_INSENSITIVE) | FLAG_PREFER_FEWER,
|
||||||
|
aPatternStr, positions[0] - escapeLen - 1, escapeStr, escapeLen);
|
||||||
|
|
||||||
|
SimilarToCompiler compiler2(pool, regexp2, (flags & FLAG_CASE_INSENSITIVE),
|
||||||
|
aPatternStr + positions[0], positions[1] - positions[0] - escapeLen - 1, escapeStr, escapeLen);
|
||||||
|
|
||||||
|
SimilarToCompiler compiler3(pool, regexp3, (flags & FLAG_CASE_INSENSITIVE) | FLAG_PREFER_FEWER,
|
||||||
|
aPatternStr + positions[1], patternLen - positions[1], escapeStr, escapeLen);
|
||||||
|
|
||||||
|
string finalRe2Pattern;
|
||||||
|
finalRe2Pattern.reserve(
|
||||||
|
1 + // (
|
||||||
|
compiler1.getRe2PatternStr().length() +
|
||||||
|
2 + // )(
|
||||||
|
compiler2.getRe2PatternStr().length() +
|
||||||
|
2 + // )(
|
||||||
|
compiler3.getRe2PatternStr().length() +
|
||||||
|
1 // )
|
||||||
|
);
|
||||||
|
|
||||||
|
finalRe2Pattern.append("(");
|
||||||
|
finalRe2Pattern.append(compiler1.getRe2PatternStr());
|
||||||
|
finalRe2Pattern.append(")(");
|
||||||
|
finalRe2Pattern.append(compiler2.getRe2PatternStr());
|
||||||
|
finalRe2Pattern.append(")(");
|
||||||
|
finalRe2Pattern.append(compiler3.getRe2PatternStr());
|
||||||
|
finalRe2Pattern.append(")");
|
||||||
|
|
||||||
|
RE2::Options options;
|
||||||
|
options.set_log_errors(false);
|
||||||
|
options.set_dot_nl(true);
|
||||||
|
|
||||||
|
re2::StringPiece sp((const char*) finalRe2Pattern.c_str(), finalRe2Pattern.length());
|
||||||
|
regexp = FB_NEW_POOL(pool) RE2(sp, options);
|
||||||
|
|
||||||
|
if (!regexp->ok())
|
||||||
|
status_exception::raise(Arg::Gds(isc_invalid_similar_pattern));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool hasChar()
|
||||||
|
{
|
||||||
|
return patternPos < patternLen;
|
||||||
|
}
|
||||||
|
|
||||||
|
UChar32 getChar()
|
||||||
|
{
|
||||||
|
fb_assert(hasChar());
|
||||||
|
UChar32 c;
|
||||||
|
U8_NEXT_UNSAFE(patternStr, patternPos, c);
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
UChar32 peekChar()
|
||||||
|
{
|
||||||
|
auto savePos = patternPos;
|
||||||
|
auto c = getChar();
|
||||||
|
patternPos = savePos;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const char* patternStr;
|
||||||
|
int32_t patternPos;
|
||||||
|
int32_t patternLen;
|
||||||
|
UChar32 escapeChar;
|
||||||
|
};
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
namespace Firebird {
|
||||||
|
|
||||||
|
|
||||||
|
SimilarToRegex::SimilarToRegex(MemoryPool& pool, bool caseInsensitive,
|
||||||
|
const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen)
|
||||||
|
: PermanentStorage(pool)
|
||||||
|
{
|
||||||
|
SimilarToCompiler compiler(pool, regexp,
|
||||||
|
FLAG_GROUP_CAPTURE | FLAG_PREFER_FEWER | (caseInsensitive ? FLAG_CASE_INSENSITIVE : 0),
|
||||||
|
patternStr, patternLen, escapeStr, escapeLen);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SimilarToRegex::matches(const char* buffer, unsigned bufferLen, Array<MatchPos>* matchPosArray)
|
||||||
|
{
|
||||||
|
re2::StringPiece sp(buffer, bufferLen);
|
||||||
|
|
||||||
|
if (matchPosArray)
|
||||||
|
{
|
||||||
|
const int argsCount = regexp->NumberOfCapturingGroups();
|
||||||
|
|
||||||
|
Array<re2::StringPiece> resSps(argsCount);
|
||||||
|
resSps.resize(argsCount);
|
||||||
|
|
||||||
|
Array<RE2::Arg> args(argsCount);
|
||||||
|
args.resize(argsCount);
|
||||||
|
|
||||||
|
Array<RE2::Arg*> argsPtr(argsCount);
|
||||||
|
|
||||||
|
{ // scope
|
||||||
|
auto resSp = resSps.begin();
|
||||||
|
|
||||||
|
for (auto& arg : args)
|
||||||
|
{
|
||||||
|
arg = resSp++;
|
||||||
|
argsPtr.push(&arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (RE2::FullMatchN(sp, *regexp.get(), argsPtr.begin(), argsCount))
|
||||||
|
{
|
||||||
|
matchPosArray->clear();
|
||||||
|
|
||||||
|
for (const auto resSp : resSps)
|
||||||
|
{
|
||||||
|
matchPosArray->push(MatchPos{
|
||||||
|
static_cast<unsigned>(resSp.data() - sp.begin()),
|
||||||
|
static_cast<unsigned>(resSp.length())
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return RE2::FullMatch(sp, *regexp.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
//---------------------
|
||||||
|
|
||||||
|
SubstringSimilarRegex::SubstringSimilarRegex(MemoryPool& pool, bool caseInsensitive,
|
||||||
|
const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen)
|
||||||
|
: PermanentStorage(pool)
|
||||||
|
{
|
||||||
|
SubstringSimilarCompiler compiler(pool, regexp,
|
||||||
|
(caseInsensitive ? FLAG_CASE_INSENSITIVE : 0),
|
||||||
|
patternStr, patternLen, escapeStr, escapeLen);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SubstringSimilarRegex::matches(const char* buffer, unsigned bufferLen,
|
||||||
|
unsigned* resultStart, unsigned* resultLength)
|
||||||
|
{
|
||||||
|
re2::StringPiece sp(buffer, bufferLen);
|
||||||
|
|
||||||
|
re2::StringPiece spResult;
|
||||||
|
|
||||||
|
if (RE2::FullMatch(sp, *regexp.get(), nullptr, &spResult, nullptr))
|
||||||
|
{
|
||||||
|
*resultStart = spResult.begin() - buffer;
|
||||||
|
*resultLength = spResult.length();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace Firebird
|
75
src/common/SimilarToRegex.h
Normal file
75
src/common/SimilarToRegex.h
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
/*
|
||||||
|
* The contents of this file are subject to the Initial
|
||||||
|
* Developer's Public License Version 1.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the
|
||||||
|
* License. You may obtain a copy of the License at
|
||||||
|
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed AS IS,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing rights
|
||||||
|
* and limitations under the License.
|
||||||
|
*
|
||||||
|
* The Original Code was created by Adriano dos Santos Fernandes
|
||||||
|
* for the Firebird Open Source RDBMS project.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2019 Adriano dos Santos Fernandes <adrianosf at gmail.com>
|
||||||
|
* and all contributors signed below.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef COMMON_SIMILAR_TO_REGEX_H
|
||||||
|
#define COMMON_SIMILAR_TO_REGEX_H
|
||||||
|
|
||||||
|
#include "firebird.h"
|
||||||
|
#include <re2/re2.h>
|
||||||
|
#include "../common/classes/auto.h"
|
||||||
|
#include "../common/classes/array.h"
|
||||||
|
#include "../common/classes/fb_string.h"
|
||||||
|
|
||||||
|
namespace Firebird {
|
||||||
|
|
||||||
|
|
||||||
|
//// FIXME: Leak re2::RE2 when destroyed by pool.
|
||||||
|
class SimilarToRegex : public PermanentStorage
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
struct MatchPos
|
||||||
|
{
|
||||||
|
unsigned start;
|
||||||
|
unsigned length;
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
SimilarToRegex(MemoryPool& pool, bool caseInsensitive,
|
||||||
|
const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen);
|
||||||
|
|
||||||
|
public:
|
||||||
|
bool matches(const char* buffer, unsigned bufferLen, Array<MatchPos>* matchPosArray = nullptr);
|
||||||
|
|
||||||
|
private:
|
||||||
|
AutoPtr<re2::RE2> regexp;
|
||||||
|
};
|
||||||
|
|
||||||
|
//// FIXME: Leak re2::RE2 when destroyed by pool.
|
||||||
|
// Given a regular expression R1<escape>#R2#<escape>R3 and the string S:
|
||||||
|
// - Find the shortest substring of S that matches R1 while the remainder (S23) matches R2R3;
|
||||||
|
// - Find the longest (S2) substring of S23 that matches R2 while the remainder matches R3;
|
||||||
|
// - Return S2.
|
||||||
|
class SubstringSimilarRegex : public PermanentStorage
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SubstringSimilarRegex(MemoryPool& pool, bool caseInsensitive,
|
||||||
|
const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen);
|
||||||
|
|
||||||
|
public:
|
||||||
|
bool matches(const char* buffer, unsigned bufferLen, unsigned* resultStart, unsigned* resultLength);
|
||||||
|
|
||||||
|
private:
|
||||||
|
AutoPtr<re2::RE2> regexp;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace Firebird
|
||||||
|
|
||||||
|
#endif // COMMON_SIMILAR_TO_REGEX_H
|
@ -169,33 +169,6 @@ TextType::TextType(TTYPE_ID _type, texttype *_tt, CharSet* _cs)
|
|||||||
memset(&canonicalChars[conversions[i].ch], 0, sizeof(ULONG));
|
memset(&canonicalChars[conversions[i].ch], 0, sizeof(ULONG));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Conversion2
|
|
||||||
{
|
|
||||||
const char* str;
|
|
||||||
UCHAR* buffer;
|
|
||||||
};
|
|
||||||
|
|
||||||
const Conversion2 conversions2[] =
|
|
||||||
{
|
|
||||||
{"0123456789", reinterpret_cast<UCHAR*>(canonicalNumbers)},
|
|
||||||
{"abcdefghijklmnopqrstuvwxyz", reinterpret_cast<UCHAR*>(canonicalLowerLetters)},
|
|
||||||
{"ABCDEFGHIJKLMNOPQRSTUVWXYZ", reinterpret_cast<UCHAR*>(canonicalUpperLetters)},
|
|
||||||
{" \t\v\r\n\f", reinterpret_cast<UCHAR*>(canonicalWhiteSpaces)}
|
|
||||||
};
|
|
||||||
|
|
||||||
for (int i = 0; i < FB_NELEM(conversions2); i++)
|
|
||||||
{
|
|
||||||
UCHAR temp[sizeof(ULONG)];
|
|
||||||
|
|
||||||
for (const char* p = conversions2[i].str; *p; ++p)
|
|
||||||
{
|
|
||||||
USHORT code = static_cast<USHORT>(*p);
|
|
||||||
ULONG length = getCharSet()->getConvFromUnicode().convert(sizeof(code), &code, sizeof(temp), temp);
|
|
||||||
const size_t pos = (p - conversions2[i].str) * getCanonicalWidth();
|
|
||||||
canonical(length, temp, sizeof(ULONG), &conversions2[i].buffer[pos]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -138,47 +138,8 @@ public:
|
|||||||
return reinterpret_cast<const UCHAR*>(&canonicalChars[ch]);
|
return reinterpret_cast<const UCHAR*>(&canonicalChars[ch]);
|
||||||
}
|
}
|
||||||
|
|
||||||
const UCHAR* getCanonicalNumbers(int* count = NULL) const
|
|
||||||
{
|
|
||||||
if (count)
|
|
||||||
*count = 10;
|
|
||||||
return reinterpret_cast<const UCHAR*>(canonicalNumbers);
|
|
||||||
}
|
|
||||||
|
|
||||||
const UCHAR* getCanonicalLowerLetters(int* count = NULL) const
|
|
||||||
{
|
|
||||||
if (count)
|
|
||||||
*count = 26;
|
|
||||||
return reinterpret_cast<const UCHAR*>(canonicalLowerLetters);
|
|
||||||
}
|
|
||||||
|
|
||||||
const UCHAR* getCanonicalUpperLetters(int* count = NULL) const
|
|
||||||
{
|
|
||||||
if (count)
|
|
||||||
*count = 26;
|
|
||||||
return reinterpret_cast<const UCHAR*>(canonicalUpperLetters);
|
|
||||||
}
|
|
||||||
|
|
||||||
const UCHAR* getCanonicalWhiteSpaces(int* count = NULL) const
|
|
||||||
{
|
|
||||||
if (count)
|
|
||||||
*count = 6;
|
|
||||||
return reinterpret_cast<const UCHAR*>(canonicalWhiteSpaces);
|
|
||||||
}
|
|
||||||
|
|
||||||
const UCHAR* getCanonicalSpace(int* count = NULL) const
|
|
||||||
{
|
|
||||||
if (count)
|
|
||||||
*count = 1;
|
|
||||||
return getCanonicalChar(CHAR_SPACE);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ULONG canonicalChars[CHAR_COUNT];
|
ULONG canonicalChars[CHAR_COUNT];
|
||||||
ULONG canonicalNumbers[10];
|
|
||||||
ULONG canonicalLowerLetters[26];
|
|
||||||
ULONG canonicalUpperLetters[26];
|
|
||||||
ULONG canonicalWhiteSpaces[6];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Jrd
|
} // namespace Jrd
|
||||||
|
@ -1031,6 +1031,37 @@ INTL_BOOL UnicodeUtil::utf32WellFormed(ULONG len, const ULONG* str, ULONG* offen
|
|||||||
return true; // well-formed
|
return true; // well-formed
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UnicodeUtil::utf8Normalize(UCharBuffer& data)
|
||||||
|
{
|
||||||
|
ICU* icu = loadICU("", "");
|
||||||
|
|
||||||
|
HalfStaticArray<USHORT, BUFFER_MEDIUM> utf16Buffer(data.getCount());
|
||||||
|
USHORT errCode;
|
||||||
|
ULONG errPosition;
|
||||||
|
ULONG utf16BufferLen = utf8ToUtf16(data.getCount(), data.begin(), data.getCount() * sizeof(USHORT),
|
||||||
|
utf16Buffer.getBuffer(data.getCount()), &errCode, &errPosition);
|
||||||
|
|
||||||
|
UTransliterator* trans = icu->getCiAiTransliterator();
|
||||||
|
|
||||||
|
if (trans)
|
||||||
|
{
|
||||||
|
const int32_t capacity = utf16Buffer.getCount() * sizeof(USHORT);
|
||||||
|
int32_t len = utf16BufferLen / sizeof(USHORT);
|
||||||
|
int32_t limit = len;
|
||||||
|
|
||||||
|
UErrorCode errorCode = U_ZERO_ERROR;
|
||||||
|
icu->utransTransUChars(trans, reinterpret_cast<UChar*>(utf16Buffer.begin()),
|
||||||
|
&len, capacity, 0, &limit, &errorCode);
|
||||||
|
icu->releaseCiAiTransliterator(trans);
|
||||||
|
|
||||||
|
len = utf16ToUtf8(utf16BufferLen, utf16Buffer.begin(),
|
||||||
|
len * 4, data.getBuffer(len * 4, false),
|
||||||
|
&errCode, &errPosition);
|
||||||
|
|
||||||
|
data.shrink(len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
UnicodeUtil::ICU* UnicodeUtil::loadICU(const string& icuVersion, const string& configInfo)
|
UnicodeUtil::ICU* UnicodeUtil::loadICU(const string& icuVersion, const string& configInfo)
|
||||||
{
|
{
|
||||||
ObjectsArray<string> versions;
|
ObjectsArray<string> versions;
|
||||||
|
@ -177,6 +177,8 @@ public:
|
|||||||
static INTL_BOOL utf16WellFormed(ULONG len, const USHORT* str, ULONG* offending_position);
|
static INTL_BOOL utf16WellFormed(ULONG len, const USHORT* str, ULONG* offending_position);
|
||||||
static INTL_BOOL utf32WellFormed(ULONG len, const ULONG* str, ULONG* offending_position);
|
static INTL_BOOL utf32WellFormed(ULONG len, const ULONG* str, ULONG* offending_position);
|
||||||
|
|
||||||
|
static void utf8Normalize(Firebird::UCharBuffer& data);
|
||||||
|
|
||||||
static ConversionICU& getConversionICU();
|
static ConversionICU& getConversionICU();
|
||||||
static ICU* loadICU(const Firebird::string& icuVersion, const Firebird::string& configInfo);
|
static ICU* loadICU(const Firebird::string& icuVersion, const Firebird::string& configInfo);
|
||||||
static bool getCollVersion(const Firebird::string& icuVersion,
|
static bool getCollVersion(const Firebird::string& icuVersion,
|
||||||
|
@ -945,7 +945,7 @@ bool ComparativeBoolNode::stringBoolean(thread_db* tdbb, jrd_req* request, dsc*
|
|||||||
else // nod_similar
|
else // nod_similar
|
||||||
{
|
{
|
||||||
impure->vlu_misc.vlu_invariant = evaluator = obj->createSimilarToMatcher(
|
impure->vlu_misc.vlu_invariant = evaluator = obj->createSimilarToMatcher(
|
||||||
*tdbb->getDefaultPool(), p2, l2, escape_str, escape_length);
|
tdbb, *tdbb->getDefaultPool(), p2, l2, escape_str, escape_length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -961,7 +961,7 @@ bool ComparativeBoolNode::stringBoolean(thread_db* tdbb, jrd_req* request, dsc*
|
|||||||
}
|
}
|
||||||
else // nod_similar
|
else // nod_similar
|
||||||
{
|
{
|
||||||
evaluator = obj->createSimilarToMatcher(*tdbb->getDefaultPool(),
|
evaluator = obj->createSimilarToMatcher(tdbb, *tdbb->getDefaultPool(),
|
||||||
p2, l2, escape_str, escape_length);
|
p2, l2, escape_str, escape_length);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1152,7 +1152,7 @@ bool ComparativeBoolNode::stringFunction(thread_db* tdbb, jrd_req* request,
|
|||||||
else // nod_similar
|
else // nod_similar
|
||||||
{
|
{
|
||||||
impure->vlu_misc.vlu_invariant = evaluator = obj->createSimilarToMatcher(
|
impure->vlu_misc.vlu_invariant = evaluator = obj->createSimilarToMatcher(
|
||||||
*tdbb->getDefaultPool(), p2, l2, escape_str, escape_length);
|
tdbb, *tdbb->getDefaultPool(), p2, l2, escape_str, escape_length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -1170,7 +1170,7 @@ bool ComparativeBoolNode::stringFunction(thread_db* tdbb, jrd_req* request,
|
|||||||
return obj->like(*tdbb->getDefaultPool(), p1, l1, p2, l2, escape_str, escape_length);
|
return obj->like(*tdbb->getDefaultPool(), p1, l1, p2, l2, escape_str, escape_length);
|
||||||
|
|
||||||
// nod_similar
|
// nod_similar
|
||||||
return obj->similarTo(*tdbb->getDefaultPool(), p1, l1, p2, l2, escape_str, escape_length);
|
return obj->similarTo(tdbb, *tdbb->getDefaultPool(), p1, l1, p2, l2, escape_str, escape_length);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle MATCHES
|
// Handle MATCHES
|
||||||
|
@ -11873,7 +11873,7 @@ dsc* SubstringSimilarNode::execute(thread_db* tdbb, jrd_req* request) const
|
|||||||
delete impure->vlu_misc.vlu_invariant;
|
delete impure->vlu_misc.vlu_invariant;
|
||||||
|
|
||||||
impure->vlu_misc.vlu_invariant = evaluator = collation->createSubstringSimilarMatcher(
|
impure->vlu_misc.vlu_invariant = evaluator = collation->createSubstringSimilarMatcher(
|
||||||
*tdbb->getDefaultPool(), patternStr, patternLen, escapeStr, escapeLen);
|
tdbb, *tdbb->getDefaultPool(), patternStr, patternLen, escapeStr, escapeLen);
|
||||||
|
|
||||||
impure->vlu_flags |= VLU_computed;
|
impure->vlu_flags |= VLU_computed;
|
||||||
}
|
}
|
||||||
@ -11885,7 +11885,7 @@ dsc* SubstringSimilarNode::execute(thread_db* tdbb, jrd_req* request) const
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
autoEvaluator = evaluator = collation->createSubstringSimilarMatcher(*tdbb->getDefaultPool(),
|
autoEvaluator = evaluator = collation->createSubstringSimilarMatcher(tdbb, *tdbb->getDefaultPool(),
|
||||||
patternStr, patternLen, escapeStr, escapeLen);
|
patternStr, patternLen, escapeStr, escapeLen);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -99,16 +99,177 @@
|
|||||||
#include "../jrd/intl_classes.h"
|
#include "../jrd/intl_classes.h"
|
||||||
#include "../jrd/lck_proto.h"
|
#include "../jrd/lck_proto.h"
|
||||||
#include "../jrd/intl_classes.h"
|
#include "../jrd/intl_classes.h"
|
||||||
|
#include "../jrd/intl_proto.h"
|
||||||
#include "../jrd/Collation.h"
|
#include "../jrd/Collation.h"
|
||||||
#include "../common/TextType.h"
|
#include "../common/TextType.h"
|
||||||
|
#include "../common/SimilarToRegex.h"
|
||||||
|
|
||||||
#include "../jrd/SimilarToMatcher.h"
|
using namespace Firebird;
|
||||||
|
|
||||||
using namespace Jrd;
|
using namespace Jrd;
|
||||||
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
//// TODO: NONE / OCTETS.
|
||||||
|
class Re2SimilarMatcher : public PatternMatcher
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Re2SimilarMatcher(thread_db* tdbb, MemoryPool& pool, TextType* textType,
|
||||||
|
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
|
||||||
|
: PatternMatcher(pool, textType),
|
||||||
|
buffer(pool)
|
||||||
|
{
|
||||||
|
CsConvert converter = INTL_convert_lookup(tdbb, CS_UTF8, textType->getCharSet()->getId());
|
||||||
|
|
||||||
|
UCharBuffer patternBuffer, escapeBuffer;
|
||||||
|
|
||||||
|
converter.convert(patternLen, patternStr, patternBuffer);
|
||||||
|
|
||||||
|
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
|
||||||
|
UnicodeUtil::utf8Normalize(patternBuffer);
|
||||||
|
|
||||||
|
if (escapeStr)
|
||||||
|
{
|
||||||
|
converter.convert(escapeLen, escapeStr, escapeBuffer);
|
||||||
|
|
||||||
|
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
|
||||||
|
UnicodeUtil::utf8Normalize(escapeBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
regex = FB_NEW_POOL(pool) SimilarToRegex(pool,
|
||||||
|
(textType->getFlags() & TEXTTYPE_ATTR_CASE_INSENSITIVE),
|
||||||
|
(const char*) patternBuffer.begin(), patternBuffer.getCount(),
|
||||||
|
(escapeStr ? (const char*) escapeBuffer.begin() : nullptr), escapeBuffer.getCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
static Re2SimilarMatcher* create(thread_db* tdbb, MemoryPool& pool, TextType* textType,
|
||||||
|
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
|
||||||
|
{
|
||||||
|
return FB_NEW_POOL(pool) Re2SimilarMatcher(tdbb, pool, textType, patternStr, patternLen, escapeStr, escapeLen);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool evaluate(thread_db* tdbb, MemoryPool& pool, TextType* textType, const UCHAR* str, SLONG strLen,
|
||||||
|
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
|
||||||
|
{
|
||||||
|
Re2SimilarMatcher matcher(tdbb, pool, textType, patternStr, patternLen, escapeStr, escapeLen);
|
||||||
|
matcher.process(str, strLen);
|
||||||
|
return matcher.result();
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
virtual void reset()
|
||||||
|
{
|
||||||
|
buffer.shrink(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool process(const UCHAR* data, SLONG dataLen)
|
||||||
|
{
|
||||||
|
const FB_SIZE_T pos = buffer.getCount();
|
||||||
|
memcpy(buffer.getBuffer(pos + dataLen) + pos, data, dataLen);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool result()
|
||||||
|
{
|
||||||
|
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
|
||||||
|
UnicodeUtil::utf8Normalize(buffer);
|
||||||
|
|
||||||
|
return regex->matches((const char*) buffer.begin(), buffer.getCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
AutoPtr<SimilarToRegex> regex;
|
||||||
|
UCharBuffer buffer;
|
||||||
|
};
|
||||||
|
|
||||||
|
class Re2SubstringSimilarMatcher : public BaseSubstringSimilarMatcher
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Re2SubstringSimilarMatcher(thread_db* tdbb, MemoryPool& pool, TextType* textType,
|
||||||
|
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
|
||||||
|
: BaseSubstringSimilarMatcher(pool, textType),
|
||||||
|
buffer(pool),
|
||||||
|
resultStart(0),
|
||||||
|
resultLength(0)
|
||||||
|
{
|
||||||
|
CsConvert converter = INTL_convert_lookup(tdbb, textType->getCharSet()->getId(), CS_UTF8);
|
||||||
|
|
||||||
|
UCharBuffer patternBuffer, escapeBuffer;
|
||||||
|
|
||||||
|
converter.convert(patternLen, patternStr, patternBuffer);
|
||||||
|
|
||||||
|
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
|
||||||
|
UnicodeUtil::utf8Normalize(patternBuffer);
|
||||||
|
|
||||||
|
if (escapeStr)
|
||||||
|
{
|
||||||
|
converter.convert(escapeLen, escapeStr, escapeBuffer);
|
||||||
|
|
||||||
|
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
|
||||||
|
UnicodeUtil::utf8Normalize(escapeBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
regex = FB_NEW_POOL(pool) SubstringSimilarRegex(pool,
|
||||||
|
(textType->getFlags() & TEXTTYPE_ATTR_CASE_INSENSITIVE),
|
||||||
|
(const char*) patternBuffer.begin(), patternBuffer.getCount(),
|
||||||
|
(escapeStr ? (const char*) escapeBuffer.begin() : nullptr), escapeBuffer.getCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ~Re2SubstringSimilarMatcher()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
static Re2SubstringSimilarMatcher* create(thread_db* tdbb, MemoryPool& pool, TextType* textType,
|
||||||
|
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
|
||||||
|
{
|
||||||
|
return FB_NEW_POOL(pool) Re2SubstringSimilarMatcher(tdbb, pool, textType,
|
||||||
|
patternStr, patternLen, escapeStr, escapeLen);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool evaluate(thread_db* tdbb, MemoryPool& pool, TextType* textType, const UCHAR* str, SLONG strLen,
|
||||||
|
const UCHAR* patternStr, SLONG patternLen, const UCHAR* escapeStr, SLONG escapeLen)
|
||||||
|
{
|
||||||
|
Re2SubstringSimilarMatcher matcher(tdbb, pool, textType, patternStr, patternLen, escapeStr, escapeLen);
|
||||||
|
matcher.process(str, strLen);
|
||||||
|
return matcher.result();
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
virtual void reset()
|
||||||
|
{
|
||||||
|
buffer.shrink(0);
|
||||||
|
resultStart = resultLength = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool process(const UCHAR* data, SLONG dataLen)
|
||||||
|
{
|
||||||
|
const FB_SIZE_T pos = buffer.getCount();
|
||||||
|
memcpy(buffer.getBuffer(pos + dataLen) + pos, data, dataLen);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool result()
|
||||||
|
{
|
||||||
|
if (textType->getFlags() & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
|
||||||
|
UnicodeUtil::utf8Normalize(buffer);
|
||||||
|
|
||||||
|
return regex->matches((const char*) buffer.begin(), buffer.getCount(), &resultStart, &resultLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void getResultInfo(unsigned* start, unsigned* length)
|
||||||
|
{
|
||||||
|
*start = resultStart;
|
||||||
|
*length = resultLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
AutoPtr<SubstringSimilarRegex> regex;
|
||||||
|
UCharBuffer buffer;
|
||||||
|
unsigned resultStart, resultLength;
|
||||||
|
};
|
||||||
|
|
||||||
// constants used in matches and sleuth
|
// constants used in matches and sleuth
|
||||||
const int CHAR_GDML_MATCH_ONE = TextType::CHAR_QUESTION_MARK;
|
const int CHAR_GDML_MATCH_ONE = TextType::CHAR_QUESTION_MARK;
|
||||||
const int CHAR_GDML_MATCH_ANY = TextType::CHAR_ASTERISK;
|
const int CHAR_GDML_MATCH_ANY = TextType::CHAR_ASTERISK;
|
||||||
@ -725,8 +886,6 @@ template <
|
|||||||
typename pStartsMatcher,
|
typename pStartsMatcher,
|
||||||
typename pContainsMatcher,
|
typename pContainsMatcher,
|
||||||
typename pLikeMatcher,
|
typename pLikeMatcher,
|
||||||
typename pSimilarToMatcher,
|
|
||||||
typename pSubstringSimilarMatcher,
|
|
||||||
typename pMatchesMatcher,
|
typename pMatchesMatcher,
|
||||||
typename pSleuthMatcher
|
typename pSleuthMatcher
|
||||||
>
|
>
|
||||||
@ -781,22 +940,22 @@ public:
|
|||||||
getCharSet()->getSqlMatchOne(), getCharSet()->getSqlMatchOneLength());
|
getCharSet()->getSqlMatchOne(), getCharSet()->getSqlMatchOneLength());
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool similarTo(MemoryPool& pool, const UCHAR* s, SLONG sl,
|
virtual bool similarTo(thread_db* tdbb, MemoryPool& pool, const UCHAR* s, SLONG sl,
|
||||||
const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen)
|
const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen)
|
||||||
{
|
{
|
||||||
return pSimilarToMatcher::evaluate(pool, this, s, sl, p, pl, escape, escapeLen);
|
return Re2SimilarMatcher::evaluate(tdbb, pool, this, s, sl, p, pl, escape, escapeLen);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual PatternMatcher* createSimilarToMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl,
|
virtual PatternMatcher* createSimilarToMatcher(thread_db* tdbb, MemoryPool& pool, const UCHAR* p, SLONG pl,
|
||||||
const UCHAR* escape, SLONG escapeLen)
|
const UCHAR* escape, SLONG escapeLen)
|
||||||
{
|
{
|
||||||
return pSimilarToMatcher::create(pool, this, p, pl, escape, escapeLen);
|
return Re2SimilarMatcher::create(tdbb, pool, this, p, pl, escape, escapeLen);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual BaseSubstringSimilarMatcher* createSubstringSimilarMatcher(MemoryPool& pool,
|
virtual BaseSubstringSimilarMatcher* createSubstringSimilarMatcher(thread_db* tdbb, MemoryPool& pool,
|
||||||
const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen)
|
const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen)
|
||||||
{
|
{
|
||||||
return pSubstringSimilarMatcher::create(pool, this, p, pl, escape, escapeLen);
|
return Re2SubstringSimilarMatcher::create(tdbb, pool, this, p, pl, escape, escapeLen);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool contains(MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl)
|
virtual bool contains(MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl)
|
||||||
@ -823,8 +982,6 @@ Collation* newCollation(MemoryPool& pool, TTYPE_ID id, texttype* tt, CharSet* cs
|
|||||||
StartsMatcherUCharDirect,
|
StartsMatcherUCharDirect,
|
||||||
ContainsMatcherUCharDirect,
|
ContainsMatcherUCharDirect,
|
||||||
LikeMatcher<T>,
|
LikeMatcher<T>,
|
||||||
SimilarToMatcher<T>,
|
|
||||||
SubstringSimilarMatcher<T>,
|
|
||||||
MatchesMatcher<T>,
|
MatchesMatcher<T>,
|
||||||
SleuthMatcher<T>
|
SleuthMatcher<T>
|
||||||
> DirectImpl;
|
> DirectImpl;
|
||||||
@ -833,8 +990,6 @@ Collation* newCollation(MemoryPool& pool, TTYPE_ID id, texttype* tt, CharSet* cs
|
|||||||
StartsMatcherUCharCanonical,
|
StartsMatcherUCharCanonical,
|
||||||
ContainsMatcher<T>,
|
ContainsMatcher<T>,
|
||||||
LikeMatcher<T>,
|
LikeMatcher<T>,
|
||||||
SimilarToMatcher<T>,
|
|
||||||
SubstringSimilarMatcher<T>,
|
|
||||||
MatchesMatcher<T>,
|
MatchesMatcher<T>,
|
||||||
SleuthMatcher<T>
|
SleuthMatcher<T>
|
||||||
> NonDirectImpl;
|
> NonDirectImpl;
|
||||||
|
@ -66,12 +66,12 @@ public:
|
|||||||
virtual PatternMatcher* createLikeMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl,
|
virtual PatternMatcher* createLikeMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl,
|
||||||
const UCHAR* escape, SLONG escapeLen) = 0;
|
const UCHAR* escape, SLONG escapeLen) = 0;
|
||||||
|
|
||||||
virtual bool similarTo(MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl,
|
virtual bool similarTo(thread_db* tdbb, MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl,
|
||||||
const UCHAR* escape, SLONG escapeLen) = 0;
|
const UCHAR* escape, SLONG escapeLen) = 0;
|
||||||
virtual PatternMatcher* createSimilarToMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl,
|
virtual PatternMatcher* createSimilarToMatcher(thread_db* tdbb, MemoryPool& pool, const UCHAR* p, SLONG pl,
|
||||||
const UCHAR* escape, SLONG escapeLen) = 0;
|
const UCHAR* escape, SLONG escapeLen) = 0;
|
||||||
|
|
||||||
virtual BaseSubstringSimilarMatcher* createSubstringSimilarMatcher(MemoryPool& pool,
|
virtual BaseSubstringSimilarMatcher* createSubstringSimilarMatcher(thread_db* tdbb, MemoryPool& pool,
|
||||||
const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen) = 0;
|
const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escapeLen) = 0;
|
||||||
|
|
||||||
virtual bool contains(MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl) = 0;
|
virtual bool contains(MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl) = 0;
|
||||||
|
@ -654,6 +654,7 @@ bool IntlManager::lookupCollation(const string& collationName,
|
|||||||
attributes, specificAttributes, specificAttributesLen, ignoreAttributes,
|
attributes, specificAttributes, specificAttributesLen, ignoreAttributes,
|
||||||
collationExternalInfo.configInfo.c_str()))
|
collationExternalInfo.configInfo.c_str()))
|
||||||
{
|
{
|
||||||
|
tt->texttype_flags = attributes;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -87,23 +87,12 @@ public:
|
|||||||
UpcaseConverter(MemoryPool& pool, TextType* obj, const UCHAR*& str, SLONG& len)
|
UpcaseConverter(MemoryPool& pool, TextType* obj, const UCHAR*& str, SLONG& len)
|
||||||
: PrevConverter(pool, obj, str, len)
|
: PrevConverter(pool, obj, str, len)
|
||||||
{
|
{
|
||||||
if (len > (int) sizeof(tempBuffer))
|
obj->str_to_upper(len, str, len, tempBuffer.getBuffer(len, false));
|
||||||
out_str = FB_NEW_POOL(pool) UCHAR[len];
|
str = tempBuffer.begin();
|
||||||
else
|
|
||||||
out_str = tempBuffer;
|
|
||||||
obj->str_to_upper(len, str, len, out_str);
|
|
||||||
str = out_str;
|
|
||||||
}
|
|
||||||
|
|
||||||
~UpcaseConverter()
|
|
||||||
{
|
|
||||||
if (out_str != tempBuffer)
|
|
||||||
delete[] out_str;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
UCHAR tempBuffer[100];
|
Firebird::UCharBuffer tempBuffer;
|
||||||
UCHAR* out_str;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename PrevConverter = NullStrConverter>
|
template <typename PrevConverter = NullStrConverter>
|
||||||
@ -115,29 +104,17 @@ public:
|
|||||||
{
|
{
|
||||||
const SLONG out_len = len / obj->getCharSet()->minBytesPerChar() * obj->getCanonicalWidth();
|
const SLONG out_len = len / obj->getCharSet()->minBytesPerChar() * obj->getCanonicalWidth();
|
||||||
|
|
||||||
if (out_len > (int) sizeof(tempBuffer))
|
|
||||||
out_str = FB_NEW_POOL(pool) UCHAR[out_len];
|
|
||||||
else
|
|
||||||
out_str = tempBuffer;
|
|
||||||
|
|
||||||
if (str)
|
if (str)
|
||||||
{
|
{
|
||||||
len = obj->canonical(len, str, out_len, out_str) * obj->getCanonicalWidth();
|
len = obj->canonical(len, str, out_len, tempBuffer.getBuffer(out_len, false)) * obj->getCanonicalWidth();
|
||||||
str = out_str;
|
str = tempBuffer.begin();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
len = 0;
|
len = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
~CanonicalConverter()
|
|
||||||
{
|
|
||||||
if (out_str != tempBuffer)
|
|
||||||
delete[] out_str;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
UCHAR tempBuffer[100];
|
Firebird::UCharBuffer tempBuffer;
|
||||||
UCHAR* out_str;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Jrd
|
} // namespace Jrd
|
||||||
|
@ -50,50 +50,23 @@ TableMatcher::TableMatcher(MemoryPool& pool,
|
|||||||
const string& excludeFilter)
|
const string& excludeFilter)
|
||||||
: m_tables(pool)
|
: m_tables(pool)
|
||||||
{
|
{
|
||||||
m_cs = FB_NEW_POOL(pool) charset;
|
|
||||||
m_tt = FB_NEW_POOL(pool) texttype;
|
|
||||||
|
|
||||||
IntlUtil::initUtf8Charset(m_cs);
|
|
||||||
|
|
||||||
string collAttributes("ICU-VERSION=");
|
|
||||||
collAttributes += Jrd::UnicodeUtil::getDefaultIcuVersion();
|
|
||||||
IntlUtil::setupIcuAttributes(m_cs, collAttributes, "", collAttributes);
|
|
||||||
|
|
||||||
UCharBuffer collAttributesBuffer;
|
|
||||||
collAttributesBuffer.push(reinterpret_cast<const UCHAR*>(collAttributes.c_str()),
|
|
||||||
collAttributes.length());
|
|
||||||
|
|
||||||
if (!IntlUtil::initUnicodeCollation(m_tt, m_cs, "UNICODE", 0, collAttributesBuffer, ""))
|
|
||||||
raiseError("Cannot initialize UNICODE collation");
|
|
||||||
|
|
||||||
m_charSet = CharSet::createInstance(pool, 0, m_cs);
|
|
||||||
m_textType = FB_NEW_POOL(pool) TextType(0, m_tt, m_charSet);
|
|
||||||
|
|
||||||
if (includeFilter.hasData())
|
if (includeFilter.hasData())
|
||||||
{
|
{
|
||||||
m_includeMatcher.reset(FB_NEW_POOL(pool) SimilarMatcher(
|
m_includeMatcher.reset(FB_NEW_POOL(pool) SimilarToRegex(
|
||||||
pool, m_textType,
|
pool, true,
|
||||||
(const UCHAR*) includeFilter.c_str(),
|
includeFilter.c_str(), includeFilter.length(),
|
||||||
includeFilter.length(),
|
"\\", 1));
|
||||||
'\\', true));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (excludeFilter.hasData())
|
if (excludeFilter.hasData())
|
||||||
{
|
{
|
||||||
m_excludeMatcher.reset(FB_NEW_POOL(pool) SimilarMatcher(
|
m_excludeMatcher.reset(FB_NEW_POOL(pool) SimilarToRegex(
|
||||||
pool, m_textType,
|
pool, true,
|
||||||
(const UCHAR*) excludeFilter.c_str(),
|
excludeFilter.c_str(), excludeFilter.length(),
|
||||||
excludeFilter.length(),
|
"\\", 1));
|
||||||
'\\', true));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TableMatcher::~TableMatcher()
|
|
||||||
{
|
|
||||||
if (m_tt && m_tt->texttype_fn_destroy)
|
|
||||||
m_tt->texttype_fn_destroy(m_tt);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool TableMatcher::matchTable(const MetaName& tableName)
|
bool TableMatcher::matchTable(const MetaName& tableName)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
@ -104,18 +77,10 @@ bool TableMatcher::matchTable(const MetaName& tableName)
|
|||||||
enabled = true;
|
enabled = true;
|
||||||
|
|
||||||
if (m_includeMatcher)
|
if (m_includeMatcher)
|
||||||
{
|
enabled = m_includeMatcher->matches(tableName.c_str(), tableName.length());
|
||||||
m_includeMatcher->reset();
|
|
||||||
m_includeMatcher->process((const UCHAR*) tableName.c_str(), tableName.length());
|
|
||||||
enabled = m_includeMatcher->result();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (enabled && m_excludeMatcher)
|
if (enabled && m_excludeMatcher)
|
||||||
{
|
enabled = !m_excludeMatcher->matches(tableName.c_str(), tableName.length());
|
||||||
m_excludeMatcher->reset();
|
|
||||||
m_excludeMatcher->process((const UCHAR*) tableName.c_str(), tableName.length());
|
|
||||||
enabled = !m_excludeMatcher->result();
|
|
||||||
}
|
|
||||||
|
|
||||||
m_tables.put(tableName, enabled);
|
m_tables.put(tableName, enabled);
|
||||||
}
|
}
|
||||||
|
@ -26,9 +26,9 @@
|
|||||||
|
|
||||||
#include "../common/classes/array.h"
|
#include "../common/classes/array.h"
|
||||||
#include "../common/classes/semaphore.h"
|
#include "../common/classes/semaphore.h"
|
||||||
|
#include "../common/SimilarToRegex.h"
|
||||||
#include "../common/os/guid.h"
|
#include "../common/os/guid.h"
|
||||||
#include "../common/isc_s_proto.h"
|
#include "../common/isc_s_proto.h"
|
||||||
#include "../../jrd/SimilarToMatcher.h"
|
|
||||||
#include "../../jrd/intl_classes.h"
|
#include "../../jrd/intl_classes.h"
|
||||||
|
|
||||||
#include "Config.h"
|
#include "Config.h"
|
||||||
@ -38,25 +38,18 @@ namespace Replication
|
|||||||
{
|
{
|
||||||
class TableMatcher
|
class TableMatcher
|
||||||
{
|
{
|
||||||
typedef Jrd::UpcaseConverter<Jrd::NullStrConverter> SimilarConverter;
|
|
||||||
typedef Firebird::SimilarToMatcher<UCHAR, SimilarConverter> SimilarMatcher;
|
|
||||||
typedef Firebird::GenericMap<Firebird::Pair<Firebird::Left<Firebird::MetaName, bool> > > TablePermissionMap;
|
typedef Firebird::GenericMap<Firebird::Pair<Firebird::Left<Firebird::MetaName, bool> > > TablePermissionMap;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
TableMatcher(MemoryPool& pool,
|
TableMatcher(MemoryPool& pool,
|
||||||
const Firebird::string& includeFilter,
|
const Firebird::string& includeFilter,
|
||||||
const Firebird::string& excludeFilter);
|
const Firebird::string& excludeFilter);
|
||||||
~TableMatcher();
|
|
||||||
|
|
||||||
bool matchTable(const Firebird::MetaName& tableName);
|
bool matchTable(const Firebird::MetaName& tableName);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
charset* m_cs;
|
Firebird::AutoPtr<Firebird::SimilarToRegex> m_includeMatcher;
|
||||||
Firebird::AutoPtr<texttype> m_tt;
|
Firebird::AutoPtr<Firebird::SimilarToRegex> m_excludeMatcher;
|
||||||
Firebird::AutoPtr<Jrd::CharSet> m_charSet;
|
|
||||||
Firebird::AutoPtr<Jrd::TextType> m_textType;
|
|
||||||
Firebird::AutoPtr<SimilarMatcher> m_includeMatcher;
|
|
||||||
Firebird::AutoPtr<SimilarMatcher> m_excludeMatcher;
|
|
||||||
TablePermissionMap m_tables;
|
TablePermissionMap m_tables;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -570,7 +570,6 @@ VI. ADDITIONAL NOTES
|
|||||||
#include "../common/db_alias.h"
|
#include "../common/db_alias.h"
|
||||||
#include "../jrd/intl_proto.h"
|
#include "../jrd/intl_proto.h"
|
||||||
#include "../jrd/lck_proto.h"
|
#include "../jrd/lck_proto.h"
|
||||||
#include "../jrd/Collation.h"
|
|
||||||
|
|
||||||
#ifdef DEBUG_VAL_VERBOSE
|
#ifdef DEBUG_VAL_VERBOSE
|
||||||
#include "../jrd/dmp_proto.h"
|
#include "../jrd/dmp_proto.h"
|
||||||
@ -592,18 +591,21 @@ static void print_rhd(USHORT, const rhd*);
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
static PatternMatcher* createPatternMatcher(thread_db* tdbb, const char* pattern)
|
static SimilarToRegex* createPatternMatcher(thread_db* tdbb, const char* pattern)
|
||||||
{
|
{
|
||||||
PatternMatcher* matcher = NULL;
|
SimilarToRegex* matcher = NULL;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
if (pattern)
|
if (pattern)
|
||||||
{
|
{
|
||||||
const int len = strlen(pattern);
|
const int len = strlen(pattern);
|
||||||
|
|
||||||
Collation* obj = INTL_texttype_lookup(tdbb, CS_UTF8);
|
//// TODO: Should this be different than trace and replication
|
||||||
matcher = obj->createSimilarToMatcher(*tdbb->getDefaultPool(),
|
//// and use case sensitive matcher?
|
||||||
(const UCHAR*) pattern, len, (UCHAR*) "\\", 1);
|
matcher = FB_NEW_POOL(*tdbb->getDefaultPool()) SimilarToRegex(
|
||||||
|
*tdbb->getDefaultPool(), false,
|
||||||
|
pattern, len,
|
||||||
|
"\\", 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (const Exception& ex)
|
catch (const Exception& ex)
|
||||||
@ -870,8 +872,6 @@ Validation::Validation(thread_db* tdbb, UtilSvc* uSvc) :
|
|||||||
vdr_page_bitmap = NULL;
|
vdr_page_bitmap = NULL;
|
||||||
|
|
||||||
vdr_service = uSvc;
|
vdr_service = uSvc;
|
||||||
vdr_tab_incl = vdr_tab_excl = NULL;
|
|
||||||
vdr_idx_incl = vdr_idx_excl = NULL;
|
|
||||||
vdr_lock_tout = -10;
|
vdr_lock_tout = -10;
|
||||||
|
|
||||||
if (uSvc) {
|
if (uSvc) {
|
||||||
@ -882,11 +882,6 @@ Validation::Validation(thread_db* tdbb, UtilSvc* uSvc) :
|
|||||||
|
|
||||||
Validation::~Validation()
|
Validation::~Validation()
|
||||||
{
|
{
|
||||||
delete vdr_tab_incl;
|
|
||||||
delete vdr_tab_excl;
|
|
||||||
delete vdr_idx_incl;
|
|
||||||
delete vdr_idx_excl;
|
|
||||||
|
|
||||||
output("Validation finished\n");
|
output("Validation finished\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1654,22 +1649,14 @@ void Validation::walk_database()
|
|||||||
|
|
||||||
if (vdr_tab_incl)
|
if (vdr_tab_incl)
|
||||||
{
|
{
|
||||||
vdr_tab_incl->reset();
|
if (!vdr_tab_incl->matches(relation->rel_name.c_str(), relation->rel_name.length()))
|
||||||
if (!vdr_tab_incl->process((UCHAR*) relation->rel_name.c_str(), relation->rel_name.length()) ||
|
|
||||||
!vdr_tab_incl->result())
|
|
||||||
{
|
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vdr_tab_excl)
|
if (vdr_tab_excl)
|
||||||
{
|
{
|
||||||
vdr_tab_excl->reset();
|
if (vdr_tab_excl->matches(relation->rel_name.c_str(), relation->rel_name.length()))
|
||||||
if (!vdr_tab_excl->process((UCHAR*) relation->rel_name.c_str(), relation->rel_name.length()) ||
|
|
||||||
vdr_tab_excl->result())
|
|
||||||
{
|
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// We can't realiable track double allocated page's when validating online.
|
// We can't realiable track double allocated page's when validating online.
|
||||||
@ -3163,15 +3150,13 @@ Validation::RTN Validation::walk_root(jrd_rel* relation)
|
|||||||
|
|
||||||
if (vdr_idx_incl)
|
if (vdr_idx_incl)
|
||||||
{
|
{
|
||||||
vdr_idx_incl->reset();
|
if (!vdr_idx_incl->matches(relation->rel_name.c_str(), relation->rel_name.length()))
|
||||||
if (!vdr_idx_incl->process((UCHAR*) index.c_str(), index.length()) || !vdr_idx_incl->result())
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vdr_idx_excl)
|
if (vdr_idx_excl)
|
||||||
{
|
{
|
||||||
vdr_idx_excl->reset();
|
if (vdr_idx_excl->matches(relation->rel_name.c_str(), relation->rel_name.length()))
|
||||||
if (!vdr_idx_excl->process((UCHAR*) index.c_str(), index.length()) || vdr_idx_excl->result())
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include "fb_types.h"
|
#include "fb_types.h"
|
||||||
|
|
||||||
#include "../common/classes/array.h"
|
#include "../common/classes/array.h"
|
||||||
|
#include "../common/SimilarToRegex.h"
|
||||||
#include "../jrd/ods.h"
|
#include "../jrd/ods.h"
|
||||||
#include "../jrd/cch.h"
|
#include "../jrd/cch.h"
|
||||||
#include "../jrd/sbm.h"
|
#include "../jrd/sbm.h"
|
||||||
@ -150,10 +151,10 @@ private:
|
|||||||
ULONG vdr_err_counts[VAL_MAX_ERROR];
|
ULONG vdr_err_counts[VAL_MAX_ERROR];
|
||||||
|
|
||||||
Firebird::UtilSvc* vdr_service;
|
Firebird::UtilSvc* vdr_service;
|
||||||
PatternMatcher* vdr_tab_incl;
|
Firebird::AutoPtr<Firebird::SimilarToRegex> vdr_tab_incl;
|
||||||
PatternMatcher* vdr_tab_excl;
|
Firebird::AutoPtr<Firebird::SimilarToRegex> vdr_tab_excl;
|
||||||
PatternMatcher* vdr_idx_incl;
|
Firebird::AutoPtr<Firebird::SimilarToRegex> vdr_idx_incl;
|
||||||
PatternMatcher* vdr_idx_excl;
|
Firebird::AutoPtr<Firebird::SimilarToRegex> vdr_idx_excl;
|
||||||
int vdr_lock_tout;
|
int vdr_lock_tout;
|
||||||
void checkDPinPP(jrd_rel *relation, SLONG page_number);
|
void checkDPinPP(jrd_rel *relation, SLONG page_number);
|
||||||
void checkDPinPIP(jrd_rel *relation, SLONG page_number);
|
void checkDPinPIP(jrd_rel *relation, SLONG page_number);
|
||||||
|
@ -46,7 +46,6 @@ set(fbtrace_src
|
|||||||
ntrace/TraceConfiguration.cpp
|
ntrace/TraceConfiguration.cpp
|
||||||
ntrace/traceplugin.cpp
|
ntrace/traceplugin.cpp
|
||||||
ntrace/TracePluginImpl.cpp
|
ntrace/TracePluginImpl.cpp
|
||||||
ntrace/TraceUnicodeUtils.cpp
|
|
||||||
|
|
||||||
ntrace/os/platform.h
|
ntrace/os/platform.h
|
||||||
)
|
)
|
||||||
|
@ -26,9 +26,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "TraceConfiguration.h"
|
#include "TraceConfiguration.h"
|
||||||
#include "TraceUnicodeUtils.h"
|
#include "../../common/SimilarToRegex.h"
|
||||||
#include "../../jrd/evl_string.h"
|
|
||||||
#include "../../jrd/SimilarToMatcher.h"
|
|
||||||
#include "../../common/isc_f_proto.h"
|
#include "../../common/isc_f_proto.h"
|
||||||
|
|
||||||
using namespace Firebird;
|
using namespace Firebird;
|
||||||
@ -67,26 +65,6 @@ void TraceCfgReader::readTraceConfiguration(const char* text,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
template <typename PrevConverter = Jrd::NullStrConverter>
|
|
||||||
class SystemToUtf8Converter : public PrevConverter
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
SystemToUtf8Converter(MemoryPool& pool, Jrd::TextType* obj, const UCHAR*& str, SLONG& len)
|
|
||||||
: PrevConverter(pool, obj, str, len)
|
|
||||||
{
|
|
||||||
buffer.assign(reinterpret_cast<const char*>(str), len);
|
|
||||||
ISC_systemToUtf8(buffer);
|
|
||||||
str = reinterpret_cast<const UCHAR*>(buffer.c_str());
|
|
||||||
len = buffer.length();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
string buffer;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
#define ERROR_PREFIX "error while parsing trace configuration\n\t"
|
#define ERROR_PREFIX "error while parsing trace configuration\n\t"
|
||||||
|
|
||||||
void TraceCfgReader::readConfig()
|
void TraceCfgReader::readConfig()
|
||||||
@ -156,31 +134,28 @@ void TraceCfgReader::readConfig()
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
#ifdef WIN_NT // !CASE_SENSITIVITY
|
#ifdef WIN_NT // !CASE_SENSITIVITY
|
||||||
typedef Jrd::UpcaseConverter<SystemToUtf8Converter<> > SimilarConverter;
|
const bool caseInsensitive = true;
|
||||||
#else
|
#else
|
||||||
typedef SystemToUtf8Converter<> SimilarConverter;
|
const bool caseInsensitive = false;
|
||||||
#endif
|
#endif
|
||||||
|
string utf8Pattern = pattern;
|
||||||
|
ISC_systemToUtf8(utf8Pattern);
|
||||||
|
|
||||||
UnicodeCollationHolder unicodeCollation(*getDefaultMemoryPool());
|
SimilarToRegex matcher(*getDefaultMemoryPool(), caseInsensitive,
|
||||||
Jrd::TextType* textType = unicodeCollation.getTextType();
|
utf8Pattern.c_str(), utf8Pattern.length(), "\\", 1);
|
||||||
|
|
||||||
SimilarToMatcher<ULONG, Jrd::CanonicalConverter<SimilarConverter> > matcher(
|
|
||||||
*getDefaultMemoryPool(), textType, (const UCHAR*) pattern.c_str(),
|
|
||||||
pattern.length(), '\\', true);
|
|
||||||
|
|
||||||
regExpOk = true;
|
regExpOk = true;
|
||||||
|
|
||||||
matcher.process((const UCHAR*) m_databaseName.c_str(), m_databaseName.length());
|
PathName utf8DatabaseName = m_databaseName;
|
||||||
if (matcher.result())
|
ISC_systemToUtf8(utf8DatabaseName);
|
||||||
{
|
Array<SimilarToRegex::MatchPos> matchPosArray;
|
||||||
for (unsigned i = 0;
|
|
||||||
i <= matcher.getNumBranches() && i < FB_NELEM(m_subpatterns); ++i)
|
|
||||||
{
|
|
||||||
unsigned start, length;
|
|
||||||
matcher.getBranchInfo(i, &start, &length);
|
|
||||||
|
|
||||||
m_subpatterns[i].start = start;
|
if (matcher.matches(utf8DatabaseName.c_str(), utf8DatabaseName.length(), &matchPosArray))
|
||||||
m_subpatterns[i].end = start + length;
|
{
|
||||||
|
for (unsigned i = 0; i < matchPosArray.getCount() && i < FB_NELEM(m_subpatterns); ++i)
|
||||||
|
{
|
||||||
|
m_subpatterns[i].start = matchPosArray[i].start;
|
||||||
|
m_subpatterns[i].end = matchPosArray[i].start + matchPosArray[i].length;
|
||||||
}
|
}
|
||||||
|
|
||||||
match = exactMatch = true;
|
match = exactMatch = true;
|
||||||
|
@ -99,7 +99,6 @@ TracePluginImpl::TracePluginImpl(IPluginBase* plugin,
|
|||||||
transactions(getDefaultMemoryPool()),
|
transactions(getDefaultMemoryPool()),
|
||||||
statements(getDefaultMemoryPool()),
|
statements(getDefaultMemoryPool()),
|
||||||
services(getDefaultMemoryPool()),
|
services(getDefaultMemoryPool()),
|
||||||
unicodeCollation(*getDefaultMemoryPool()),
|
|
||||||
include_codes(*getDefaultMemoryPool()),
|
include_codes(*getDefaultMemoryPool()),
|
||||||
exclude_codes(*getDefaultMemoryPool())
|
exclude_codes(*getDefaultMemoryPool())
|
||||||
{
|
{
|
||||||
@ -124,8 +123,6 @@ TracePluginImpl::TracePluginImpl(IPluginBase* plugin,
|
|||||||
logWriter->addRef();
|
logWriter->addRef();
|
||||||
}
|
}
|
||||||
|
|
||||||
Jrd::TextType* textType = unicodeCollation.getTextType();
|
|
||||||
|
|
||||||
// Compile filtering regular expressions
|
// Compile filtering regular expressions
|
||||||
const char* str = NULL;
|
const char* str = NULL;
|
||||||
try
|
try
|
||||||
@ -136,9 +133,10 @@ TracePluginImpl::TracePluginImpl(IPluginBase* plugin,
|
|||||||
string filter(config.include_filter);
|
string filter(config.include_filter);
|
||||||
ISC_systemToUtf8(filter);
|
ISC_systemToUtf8(filter);
|
||||||
|
|
||||||
include_matcher = FB_NEW TraceSimilarToMatcher(
|
include_matcher = FB_NEW SimilarToRegex(
|
||||||
*getDefaultMemoryPool(), textType, (const UCHAR*) filter.c_str(),
|
*getDefaultMemoryPool(), true,
|
||||||
filter.length(), '\\', true);
|
filter.c_str(), filter.length(),
|
||||||
|
"\\", 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (config.exclude_filter.hasData())
|
if (config.exclude_filter.hasData())
|
||||||
@ -147,9 +145,10 @@ TracePluginImpl::TracePluginImpl(IPluginBase* plugin,
|
|||||||
string filter(config.exclude_filter);
|
string filter(config.exclude_filter);
|
||||||
ISC_systemToUtf8(filter);
|
ISC_systemToUtf8(filter);
|
||||||
|
|
||||||
exclude_matcher = FB_NEW TraceSimilarToMatcher(
|
exclude_matcher = FB_NEW SimilarToRegex(
|
||||||
*getDefaultMemoryPool(), textType, (const UCHAR*) filter.c_str(),
|
*getDefaultMemoryPool(), true,
|
||||||
filter.length(), '\\', true);
|
filter.c_str(), filter.length(),
|
||||||
|
"\\", 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (const Exception&)
|
catch (const Exception&)
|
||||||
@ -1546,18 +1545,10 @@ void TracePluginImpl::register_sql_statement(ITraceSQLStatement* statement)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
if (config.include_filter.hasData())
|
if (config.include_filter.hasData())
|
||||||
{
|
need_statement = include_matcher->matches(sql, sql_length);
|
||||||
include_matcher->reset();
|
|
||||||
include_matcher->process((const UCHAR*) sql, sql_length);
|
|
||||||
need_statement = include_matcher->result();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (need_statement && config.exclude_filter.hasData())
|
if (need_statement && config.exclude_filter.hasData())
|
||||||
{
|
need_statement = !exclude_matcher->matches(sql, sql_length);
|
||||||
exclude_matcher->reset();
|
|
||||||
exclude_matcher->process((const UCHAR*) sql, sql_length);
|
|
||||||
need_statement = !exclude_matcher->result();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (need_statement)
|
if (need_statement)
|
||||||
{
|
{
|
||||||
@ -1949,18 +1940,10 @@ bool TracePluginImpl::checkServiceFilter(ITraceServiceConnection* service, bool
|
|||||||
bool enabled = true;
|
bool enabled = true;
|
||||||
|
|
||||||
if (config.include_filter.hasData())
|
if (config.include_filter.hasData())
|
||||||
{
|
enabled = include_matcher->matches(svcName, svcNameLen);
|
||||||
include_matcher->reset();
|
|
||||||
include_matcher->process((const UCHAR*) svcName, svcNameLen);
|
|
||||||
enabled = include_matcher->result();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (enabled && config.exclude_filter.hasData())
|
if (enabled && config.exclude_filter.hasData())
|
||||||
{
|
enabled = !exclude_matcher->matches(svcName, svcNameLen);
|
||||||
exclude_matcher->reset();
|
|
||||||
exclude_matcher->process((const UCHAR*) svcName, svcNameLen);
|
|
||||||
enabled = !exclude_matcher->result();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (data) {
|
if (data) {
|
||||||
data->enabled = enabled;
|
data->enabled = enabled;
|
||||||
|
@ -32,11 +32,7 @@
|
|||||||
#include "firebird.h"
|
#include "firebird.h"
|
||||||
#include "../../jrd/ntrace.h"
|
#include "../../jrd/ntrace.h"
|
||||||
#include "TracePluginConfig.h"
|
#include "TracePluginConfig.h"
|
||||||
#include "TraceUnicodeUtils.h"
|
#include "../../common/SimilarToRegex.h"
|
||||||
#include "../../jrd/intl_classes.h"
|
|
||||||
#include "../../jrd/evl_string.h"
|
|
||||||
#include "../../common/TextType.h"
|
|
||||||
#include "../../jrd/SimilarToMatcher.h"
|
|
||||||
#include "../../common/classes/rwlock.h"
|
#include "../../common/classes/rwlock.h"
|
||||||
#include "../../common/classes/GenericMap.h"
|
#include "../../common/classes/GenericMap.h"
|
||||||
#include "../../common/classes/locks.h"
|
#include "../../common/classes/locks.h"
|
||||||
@ -168,10 +164,7 @@ private:
|
|||||||
// Lock for log rotation
|
// Lock for log rotation
|
||||||
Firebird::RWLock renameLock;
|
Firebird::RWLock renameLock;
|
||||||
|
|
||||||
UnicodeCollationHolder unicodeCollation;
|
Firebird::AutoPtr<Firebird::SimilarToRegex> include_matcher, exclude_matcher;
|
||||||
typedef Firebird::SimilarToMatcher<ULONG, Jrd::UpcaseConverter<Jrd::CanonicalConverter<> > >
|
|
||||||
TraceSimilarToMatcher;
|
|
||||||
Firebird::AutoPtr<TraceSimilarToMatcher> include_matcher, exclude_matcher;
|
|
||||||
|
|
||||||
// Filters for gds error codes
|
// Filters for gds error codes
|
||||||
typedef Firebird::SortedArray<ISC_STATUS> GdsCodesArray;
|
typedef Firebird::SortedArray<ISC_STATUS> GdsCodesArray;
|
||||||
|
@ -1,65 +0,0 @@
|
|||||||
/*
|
|
||||||
* PROGRAM: Firebird Trace Services
|
|
||||||
* MODULE: TraceUnicodeUtils.cpp
|
|
||||||
* DESCRIPTION: Unicode support for trace needs
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Initial
|
|
||||||
* Developer's Public License Version 1.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the
|
|
||||||
* License. You may obtain a copy of the License at
|
|
||||||
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed AS IS,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing rights
|
|
||||||
* and limitations under the License.
|
|
||||||
*
|
|
||||||
* The Original Code was created by Khorsun Vladyslav
|
|
||||||
* for the Firebird Open Source RDBMS project.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2010 Khorsun Vladyslav <hvlad@users.sourceforge.net>
|
|
||||||
* and all contributors signed below.
|
|
||||||
*
|
|
||||||
* All Rights Reserved.
|
|
||||||
* Contributor(s): ______________________________________.
|
|
||||||
* Adriano dos Santos Fernandes
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#include "TraceUnicodeUtils.h"
|
|
||||||
|
|
||||||
using namespace Firebird;
|
|
||||||
|
|
||||||
UnicodeCollationHolder::UnicodeCollationHolder(MemoryPool& pool)
|
|
||||||
{
|
|
||||||
cs = FB_NEW_POOL(pool) charset;
|
|
||||||
tt = FB_NEW_POOL(pool) texttype;
|
|
||||||
|
|
||||||
IntlUtil::initUtf8Charset(cs);
|
|
||||||
|
|
||||||
string collAttributes("ICU-VERSION=");
|
|
||||||
collAttributes += Jrd::UnicodeUtil::getDefaultIcuVersion();
|
|
||||||
IntlUtil::setupIcuAttributes(cs, collAttributes, "", collAttributes);
|
|
||||||
|
|
||||||
UCharBuffer collAttributesBuffer;
|
|
||||||
collAttributesBuffer.push(reinterpret_cast<const UCHAR*>(collAttributes.c_str()),
|
|
||||||
collAttributes.length());
|
|
||||||
|
|
||||||
if (!IntlUtil::initUnicodeCollation(tt, cs, "UNICODE", 0, collAttributesBuffer, string()))
|
|
||||||
fatal_exception::raiseFmt("cannot initialize UNICODE collation to use in trace plugin");
|
|
||||||
|
|
||||||
charSet = Jrd::CharSet::createInstance(pool, 0, cs);
|
|
||||||
textType = FB_NEW_POOL(pool) Jrd::TextType(0, tt, charSet);
|
|
||||||
}
|
|
||||||
|
|
||||||
UnicodeCollationHolder::~UnicodeCollationHolder()
|
|
||||||
{
|
|
||||||
fb_assert(tt->texttype_fn_destroy);
|
|
||||||
|
|
||||||
if (tt->texttype_fn_destroy)
|
|
||||||
tt->texttype_fn_destroy(tt);
|
|
||||||
|
|
||||||
// cs should be deleted by texttype_fn_destroy call above
|
|
||||||
delete tt;
|
|
||||||
}
|
|
@ -1,57 +0,0 @@
|
|||||||
/*
|
|
||||||
* PROGRAM: Firebird Trace Services
|
|
||||||
* MODULE: TraceUnicodeUtils.h
|
|
||||||
* DESCRIPTION: Unicode support for trace needs
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Initial
|
|
||||||
* Developer's Public License Version 1.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the
|
|
||||||
* License. You may obtain a copy of the License at
|
|
||||||
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed AS IS,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing rights
|
|
||||||
* and limitations under the License.
|
|
||||||
*
|
|
||||||
* The Original Code was created by Khorsun Vladyslav
|
|
||||||
* for the Firebird Open Source RDBMS project.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2010 Khorsun Vladyslav <hvlad@users.sourceforge.net>
|
|
||||||
* and all contributors signed below.
|
|
||||||
*
|
|
||||||
* All Rights Reserved.
|
|
||||||
* Contributor(s): ______________________________________.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef TRACE_UNICODE_UTILS_H
|
|
||||||
#define TRACE_UNICODE_UTILS_H
|
|
||||||
|
|
||||||
#include "firebird.h"
|
|
||||||
#include "../../common/classes/fb_string.h"
|
|
||||||
#include "../../jrd/intl_classes.h"
|
|
||||||
#include "../../common/TextType.h"
|
|
||||||
#include "../../common/unicode_util.h"
|
|
||||||
|
|
||||||
|
|
||||||
class UnicodeCollationHolder
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
charset* cs;
|
|
||||||
texttype* tt;
|
|
||||||
Firebird::AutoPtr<Jrd::CharSet> charSet;
|
|
||||||
Firebird::AutoPtr<Jrd::TextType> textType;
|
|
||||||
|
|
||||||
public:
|
|
||||||
explicit UnicodeCollationHolder(Firebird::MemoryPool& pool);
|
|
||||||
~UnicodeCollationHolder();
|
|
||||||
|
|
||||||
Jrd::TextType* getTextType()
|
|
||||||
{
|
|
||||||
return textType;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
#endif // TRACE_UNICODE_UTILS_H
|
|
Loading…
Reference in New Issue
Block a user