From 56e8c6224c6587be89eaa4a511ebdc1883a8de5e Mon Sep 17 00:00:00 2001 From: MrTeeett <130389703+MrTeeett@users.noreply.github.com> Date: Mon, 25 Mar 2024 19:46:05 +0300 Subject: [PATCH] Add multi-character TRIM function (#8015) * Add multi-character TRIM function * Follow Adriano's suggestion Co-authored-by: Adriano dos Santos Fernandes <529415+asfernandes@users.noreply.github.com> --------- Co-authored-by: Dmitry Yemanov Co-authored-by: Adriano dos Santos Fernandes <529415+asfernandes@users.noreply.github.com> --- doc/sql.extensions/README.trim | 10 ++++ src/common/ParserTokens.h | 3 ++ src/dsql/ExprNodes.cpp | 96 +++++++++++++++++++++++++-------- src/dsql/ExprNodes.h | 3 +- src/dsql/parse.y | 43 +++++++++++++-- src/include/firebird/impl/blr.h | 1 + 6 files changed, 129 insertions(+), 27 deletions(-) diff --git a/doc/sql.extensions/README.trim b/doc/sql.extensions/README.trim index 67c992ab03..0bb5577a46 100644 --- a/doc/sql.extensions/README.trim +++ b/doc/sql.extensions/README.trim @@ -18,11 +18,16 @@ Format: ::= + ::= + { BTRIM | LTRIM | RTRIM } [ ] + Syntax Rules: 1) If is not specified, BOTH is assumed. 2) If is not specified, ' ' is assumed. 3) If and/or is specified, FROM should be specified. 4) If and is not specified, FROM should not be specified. + 5) multi-character trim function accepts a sequence of characters as the second argument and will remove all + leading, trailing, or both occurrences of any of these characters, regardless of their ordering. Examples: A) @@ -36,3 +41,8 @@ B) trim(rdb$relation_name) || ' is a system table' from rdb$relations where rdb$system_flag = 1; + +C) + select + ltrim('baobab is a tree', 'aboe') + from rdb$database; diff --git a/src/common/ParserTokens.h b/src/common/ParserTokens.h index f478e17d61..ad2b682722 100644 --- a/src/common/ParserTokens.h +++ b/src/common/ParserTokens.h @@ -507,6 +507,9 @@ PARSER_TOKEN(TOK_TRANSACTION, "TRANSACTION", true) PARSER_TOKEN(TOK_TRAPS, "TRAPS", true) PARSER_TOKEN(TOK_TRIGGER, "TRIGGER", false) PARSER_TOKEN(TOK_TRIM, "TRIM", false) +PARSER_TOKEN(TOK_BTRIM, "BTRIM", false) +PARSER_TOKEN(TOK_LTRIM, "LTRIM", false) +PARSER_TOKEN(TOK_RTRIM, "RTRIM", false) PARSER_TOKEN(TOK_TRUE, "TRUE", false) PARSER_TOKEN(TOK_TRUNC, "TRUNC", true) PARSER_TOKEN(TOK_TRUSTED, "TRUSTED", true) diff --git a/src/dsql/ExprNodes.cpp b/src/dsql/ExprNodes.cpp index 67a200fd27..d503598f82 100644 --- a/src/dsql/ExprNodes.cpp +++ b/src/dsql/ExprNodes.cpp @@ -12531,9 +12531,10 @@ ValueExprNode* SysFuncCallNode::dsqlPass(DsqlCompilerScratch* dsqlScratch) static RegisterNode regTrimNode({blr_trim}); -TrimNode::TrimNode(MemoryPool& pool, UCHAR aWhere, ValueExprNode* aValue, ValueExprNode* aTrimChars) +TrimNode::TrimNode(MemoryPool& pool, UCHAR aWhere, UCHAR aWhat, ValueExprNode* aValue, ValueExprNode* aTrimChars) : TypedNode(pool), where(aWhere), + what(aWhat), value(aValue), trimChars(aTrimChars) { @@ -12544,9 +12545,9 @@ DmlNode* TrimNode::parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb UCHAR where = csb->csb_blr_reader.getByte(); UCHAR what = csb->csb_blr_reader.getByte(); - TrimNode* node = FB_NEW_POOL(pool) TrimNode(pool, where); + TrimNode* node = FB_NEW_POOL(pool) TrimNode(pool, where, what); - if (what == blr_trim_characters) + if (what == blr_trim_characters || what == blr_trim_multi_characters) node->trimChars = PAR_parse_value(tdbb, csb); node->value = PAR_parse_value(tdbb, csb); @@ -12567,7 +12568,7 @@ string TrimNode::internalPrint(NodePrinter& printer) const ValueExprNode* TrimNode::dsqlPass(DsqlCompilerScratch* dsqlScratch) { - TrimNode* node = FB_NEW_POOL(dsqlScratch->getPool()) TrimNode(dsqlScratch->getPool(), where, + TrimNode* node = FB_NEW_POOL(dsqlScratch->getPool()) TrimNode(dsqlScratch->getPool(), where, what, doDsqlPass(dsqlScratch, value), doDsqlPass(dsqlScratch, trimChars)); // Try to force trimChars to be same type as value: TRIM(? FROM FIELD) @@ -12595,7 +12596,7 @@ void TrimNode::genBlr(DsqlCompilerScratch* dsqlScratch) if (trimChars) { - dsqlScratch->appendUChar(blr_trim_characters); + dsqlScratch->appendUChar(what); GEN_expr(dsqlScratch, trimChars); } else @@ -12665,7 +12666,7 @@ void TrimNode::getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc) ValueExprNode* TrimNode::copy(thread_db* tdbb, NodeCopier& copier) const { - TrimNode* node = FB_NEW_POOL(*tdbb->getDefaultPool()) TrimNode(*tdbb->getDefaultPool(), where); + TrimNode* node = FB_NEW_POOL(*tdbb->getDefaultPool()) TrimNode(*tdbb->getDefaultPool(), where, what); node->value = copier.copy(tdbb, value); if (trimChars) node->trimChars = copier.copy(tdbb, trimChars); @@ -12800,34 +12801,87 @@ dsc* TrimNode::execute(thread_db* tdbb, Request* request) const SLONG offsetLead = 0; SLONG offsetTrail = valueCanonicalLen; + const bool multi = what == blr_trim_multi_characters; // CVC: Avoid endless loop with zero length trim chars. if (charactersCanonicalLen) { - if (where == blr_trim_both || where == blr_trim_leading) + int charSize = charactersCanonical.getCount() / charactersLength; + if (!multi) { - // CVC: Prevent surprises with offsetLead < valueCanonicalLen; it may fail. - for (; offsetLead + charactersCanonicalLen <= valueCanonicalLen; - offsetLead += charactersCanonicalLen) + if (where == blr_trim_both || where == blr_trim_leading) { - if (memcmp(charactersCanonical.begin(), &valueCanonical[offsetLead], - charactersCanonicalLen) != 0) + // CVC: Prevent surprises with offsetLead < valueCanonicalLen; it may fail. + for (; offsetLead + charactersCanonicalLen <= valueCanonicalLen; + offsetLead += charactersCanonicalLen) { - break; + if (memcmp(charactersCanonical.begin(), &valueCanonical[offsetLead], + charactersCanonicalLen) != 0) + { + break; + } + } + } + + if (where == blr_trim_both || where == blr_trim_trailing) + { + for (; offsetTrail - charactersCanonicalLen >= offsetLead; + offsetTrail -= charactersCanonicalLen) + { + if (memcmp(charactersCanonical.begin(), + &valueCanonical[offsetTrail - charactersCanonicalLen], + charactersCanonicalLen) != 0) + { + break; + } } } } - - if (where == blr_trim_both || where == blr_trim_trailing) + else { - for (; offsetTrail - charactersCanonicalLen >= offsetLead; - offsetTrail -= charactersCanonicalLen) + if (where == blr_trim_both || where == blr_trim_leading) { - if (memcmp(charactersCanonical.begin(), - &valueCanonical[offsetTrail - charactersCanonicalLen], - charactersCanonicalLen) != 0) + while (offsetLead < valueCanonicalLen) { - break; + bool found = false; + for (int i = 0; i < charactersCanonicalLen; i += charSize) + { + if (memcmp(&charactersCanonical[i], + &valueCanonical[offsetLead], + charSize) == 0) + { + found = true; + break; + } + } + if (!found) + { + break; + } + offsetLead += charSize; + } + } + + if (where == blr_trim_both || where == blr_trim_trailing) + { + while (offsetTrail - charSize >= offsetLead) + { + bool found = false; + for (int i = 0; i < charactersCanonicalLen; i += charSize) + { + if (memcmp(&charactersCanonical[i], + &valueCanonical[offsetTrail - charSize], + charSize) == 0) + { + found = true; + break; + } + } + if (!found) + { + break; + } + offsetTrail -= charSize; } } } diff --git a/src/dsql/ExprNodes.h b/src/dsql/ExprNodes.h index 51996cd8db..4684b95bfa 100644 --- a/src/dsql/ExprNodes.h +++ b/src/dsql/ExprNodes.h @@ -2107,7 +2107,7 @@ public: class TrimNode final : public TypedNode { public: - explicit TrimNode(MemoryPool& pool, UCHAR aWhere, + explicit TrimNode(MemoryPool& pool, UCHAR aWhere, UCHAR aWhat, ValueExprNode* aValue = NULL, ValueExprNode* aTrimChars = NULL); static DmlNode* parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp); @@ -2137,6 +2137,7 @@ public: public: UCHAR where; + UCHAR what; NestConst value; NestConst trimChars; // may be NULL }; diff --git a/src/dsql/parse.y b/src/dsql/parse.y index 6e3b75306a..f0e45d8574 100644 --- a/src/dsql/parse.y +++ b/src/dsql/parse.y @@ -700,9 +700,12 @@ using namespace Firebird; // tokens added for Firebird 6.0 %token ANY_VALUE +%token BTRIM %token CALL %token FORMAT +%token LTRIM %token NAMED_ARG_ASSIGN +%token RTRIM // precedence declarations for expression evaluation @@ -4494,7 +4497,10 @@ keyword_or_column | VARBINARY | WINDOW | WITHOUT - | CALL // added in FB 6.0 + | BTRIM // added in FB 6.0 + | CALL + | LTRIM + | RTRIM ; col_opt @@ -8735,6 +8741,9 @@ of_first_last_day_part string_value_function : substring_function | trim_function + | btrim_function + | ltrim_function + | rtrim_function | UPPER '(' value ')' { $$ = newNode(blr_upcase, $3); } | LOWER '(' value ')' @@ -8766,13 +8775,13 @@ string_length_opt %type trim_function trim_function : TRIM '(' trim_specification value FROM value ')' - { $$ = newNode($3, $6, $4); } + { $$ = newNode($3, blr_trim_characters, $6, $4); } | TRIM '(' value FROM value ')' - { $$ = newNode(blr_trim_both, $5, $3); } + { $$ = newNode(blr_trim_both, blr_trim_characters, $5, $3); } | TRIM '(' trim_specification FROM value ')' - { $$ = newNode($3, $5); } + { $$ = newNode($3, blr_trim_spaces, $5); } | TRIM '(' value ')' - { $$ = newNode(blr_trim_both, $3); } + { $$ = newNode(blr_trim_both, blr_trim_spaces, $3); } ; %type trim_specification @@ -8782,6 +8791,30 @@ trim_specification | LEADING { $$ = blr_trim_leading; } ; +%type btrim_function +btrim_function + : BTRIM '(' value ',' value ')' + { $$ = newNode(blr_trim_both, blr_trim_multi_characters, $3, $5); } + | BTRIM '(' value ')' + { $$ = newNode(blr_trim_both, blr_trim_spaces, $3); } + ; + +%type ltrim_function +ltrim_function + : LTRIM '(' value ',' value ')' + { $$ = newNode(blr_trim_leading, blr_trim_multi_characters, $3, $5); } + | LTRIM '(' value ')' + { $$ = newNode(blr_trim_leading, blr_trim_spaces, $3); } + ; + +%type rtrim_function +rtrim_function + : RTRIM '(' value ',' value ')' + { $$ = newNode(blr_trim_trailing, blr_trim_multi_characters, $3, $5); } + | RTRIM '(' value ')' + { $$ = newNode(blr_trim_trailing, blr_trim_spaces, $3); } + ; + %type udf udf : symbol_UDF_call_name '(' argument_list_opt ')' diff --git a/src/include/firebird/impl/blr.h b/src/include/firebird/impl/blr.h index 09c4776279..ecad7bb11a 100644 --- a/src/include/firebird/impl/blr.h +++ b/src/include/firebird/impl/blr.h @@ -345,6 +345,7 @@ /* second sub parameter for blr_trim */ #define blr_trim_spaces (unsigned char)0 #define blr_trim_characters (unsigned char)1 +#define blr_trim_multi_characters (unsigned char)2 /* These codes are actions for cursors */