8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-22 20:03:02 +01:00

Fix #7715 - Alternative String Literals and multibyte (UTF8) alternatives. (#7742)

This commit is contained in:
Adriano dos Santos Fernandes 2023-09-18 07:36:12 -03:00 committed by GitHub
parent 93acbd1aef
commit fa9c6d4d3c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 76 additions and 28 deletions

View File

@ -27,6 +27,7 @@
#include "../dsql/chars.h"
#include "../jrd/jrd.h"
#include "../jrd/DataTypeUtil.h"
#include "../dsql/metd_proto.h"
#include "../jrd/intl_proto.h"
#ifdef HAVE_FLOAT_H
@ -40,7 +41,7 @@ using namespace Jrd;
Parser::Parser(thread_db* tdbb, MemoryPool& pool, MemoryPool* aStatementPool, DsqlCompilerScratch* aScratch,
USHORT aClientDialect, USHORT aDbDialect, const TEXT* string, size_t length, SSHORT characterSet)
USHORT aClientDialect, USHORT aDbDialect, const TEXT* string, size_t length, SSHORT charSetId)
: PermanentStorage(pool),
statementPool(aStatementPool),
scratch(aScratch),
@ -50,6 +51,8 @@ Parser::Parser(thread_db* tdbb, MemoryPool& pool, MemoryPool* aStatementPool, Ds
strMarks(pool),
stmt_ambiguous(false)
{
charSet = INTL_charset_lookup(tdbb, charSetId);
yyps = 0;
yypath = 0;
yylvals = 0;
@ -76,7 +79,7 @@ Parser::Parser(thread_db* tdbb, MemoryPool& pool, MemoryPool* aStatementPool, Ds
lex.line_start = lex.last_token = lex.ptr = lex.leadingPtr = string;
lex.end = string + length;
lex.lines = 1;
lex.att_charset = characterSet;
lex.charSetId = charSetId;
lex.line_start_bk = lex.line_start;
lex.lines_bk = lex.lines;
lex.param_number = 1;
@ -715,31 +718,66 @@ int Parser::yylexAux()
if ((c == 'q' || c == 'Q') && lex.ptr + 3 < lex.end && *lex.ptr == '\'')
{
auto currentCharSet = charSet;
if (introducerCharSetName)
{
const auto symbol = METD_get_charset(scratch->getTransaction(),
introducerCharSetName->length(), introducerCharSetName->c_str());
if (!symbol)
{
// character set name is not defined
ERRD_post(
Arg::Gds(isc_sqlerr) << Arg::Num(-504) <<
Arg::Gds(isc_charset_not_found) << *introducerCharSetName);
}
currentCharSet = INTL_charset_lookup(tdbb, symbol->intlsym_ttype);
}
StrMark mark;
mark.pos = lex.last_token - lex.start;
char endChar = *++lex.ptr;
switch (endChar)
const auto* endChar = ++lex.ptr;
ULONG endCharSize = 0;
if (!IntlUtil::readOneChar(currentCharSet, reinterpret_cast<const UCHAR**>(&lex.ptr),
reinterpret_cast<const UCHAR*>(lex.end), &endCharSize))
{
case '{':
endChar = '}';
break;
case '(':
endChar = ')';
break;
case '[':
endChar = ']';
break;
case '<':
endChar = '>';
break;
endCharSize = 1;
}
while (++lex.ptr + 1 < lex.end)
if (endCharSize == 1)
{
if (*lex.ptr == endChar && lex.ptr[1] == '\'')
switch (*endChar)
{
size_t len = ++lex.ptr - lex.last_token - 4;
case '{':
endChar = "}";
break;
case '(':
endChar = ")";
break;
case '[':
endChar = "]";
break;
case '<':
endChar = ">";
break;
}
}
const auto start = lex.ptr + endCharSize;
ULONG charSize = endCharSize;
while (IntlUtil::readOneChar(currentCharSet, reinterpret_cast<const UCHAR**>(&lex.ptr),
reinterpret_cast<const UCHAR*>(lex.end), &charSize))
{
if (charSize == endCharSize &&
memcmp(lex.ptr, endChar, endCharSize) == 0 &&
lex.ptr[endCharSize] == '\'')
{
size_t len = lex.ptr - start;
if (len > MAX_STR_SIZE)
{
@ -749,9 +787,9 @@ int Parser::yylexAux()
Arg::Num(MAX_STR_SIZE));
}
yylval.intlStringPtr = newIntlString(Firebird::string(lex.last_token + 3, len));
yylval.intlStringPtr = newIntlString(Firebird::string(start, len));
++lex.ptr;
lex.ptr += endCharSize + 1;
mark.length = lex.ptr - lex.last_token;
mark.str = yylval.intlStringPtr;

View File

@ -99,7 +99,7 @@ private:
const TEXT* line_start;
const TEXT* last_token_bk;
const TEXT* line_start_bk;
SSHORT att_charset;
SSHORT charSetId;
SLONG lines, lines_bk;
int prev_keyword;
USHORT param_number;
@ -131,7 +131,7 @@ public:
public:
Parser(thread_db* tdbb, MemoryPool& pool, MemoryPool* aStatementPool, DsqlCompilerScratch* aScratch,
USHORT aClientDialect, USHORT aDbDialect, const TEXT* string, size_t length, SSHORT characterSet);
USHORT aClientDialect, USHORT aDbDialect, const TEXT* string, size_t length, SSHORT charSetId);
~Parser();
public:
@ -375,6 +375,7 @@ private:
USHORT client_dialect;
USHORT db_dialect;
USHORT parser_version;
CharSet* charSet;
CharSet* metadataCharSet;
Firebird::string transformedString;
@ -382,6 +383,9 @@ private:
bool stmt_ambiguous;
DsqlStatement* parsedStatement;
// Parser feedback for lexer
MetaName* introducerCharSetName = nullptr;
// These value/posn are taken from the lexer
YYSTYPE yylval;
YYPOSN yyposn;

View File

@ -7747,7 +7747,7 @@ ul_numeric_constant
u_constant
: u_numeric_constant
| sql_string
{ $$ = MAKE_str_constant($1, lex.att_charset); }
{ $$ = MAKE_str_constant($1, lex.charSetId); }
| DATE STRING
{
if (client_dialect < SQL_DIALECT_V6_TRANSITION)
@ -7837,12 +7837,18 @@ error_context
%type <intlStringPtr> sql_string
sql_string
: STRING // string in current charset
| INTRODUCER STRING // string in specific charset
| INTRODUCER
[
// feedback for lexer
introducerCharSetName = $1;
]
STRING // string in specific charset
[ introducerCharSetName = nullptr; ]
{
$$ = $2;
$$ = $3;
$$->setCharSet(*$1);
StrMark* mark = strMarks.get($2);
StrMark* mark = strMarks.get($3);
if (mark) // hex string is not in strMarks
mark->introduced = true;
@ -8179,7 +8185,7 @@ window_frame_exclusion_opt
%type <valueExprNode> delimiter_opt
delimiter_opt
: /* nothing */ { $$ = MAKE_str_constant(newIntlString(","), lex.att_charset); }
: /* nothing */ { $$ = MAKE_str_constant(newIntlString(","), lex.charSetId); }
| ',' value { $$ = $2; }
;