From fe178a14042c911a15062be544781d6e0f5fd502 Mon Sep 17 00:00:00 2001 From: Dmitry Yemanov Date: Wed, 15 Jan 2025 10:08:42 +0300 Subject: [PATCH] Frontport the reworked implementation of the full outer join --- src/jrd/optimizer/Optimizer.h | 2 +- src/jrd/optimizer/OuterJoin.cpp | 38 +++++++++++++++++++------------ src/jrd/recsrc/FullOuterJoin.cpp | 29 ++++++++++++++++++++--- src/jrd/recsrc/NestedLoopJoin.cpp | 7 +++--- src/jrd/recsrc/RecordSource.h | 6 +++-- 5 files changed, 59 insertions(+), 23 deletions(-) diff --git a/src/jrd/optimizer/Optimizer.h b/src/jrd/optimizer/Optimizer.h index fc12bd4dd5..c9c5c20f11 100644 --- a/src/jrd/optimizer/Optimizer.h +++ b/src/jrd/optimizer/Optimizer.h @@ -922,7 +922,7 @@ public: RecordSource* generate(); private: - RecordSource* process(const JoinType joinType); + RecordSource* process(StreamList* outerStreams = nullptr); thread_db* const tdbb; Optimizer* const optimizer; diff --git a/src/jrd/optimizer/OuterJoin.cpp b/src/jrd/optimizer/OuterJoin.cpp index 343ac1605f..876b7209eb 100644 --- a/src/jrd/optimizer/OuterJoin.cpp +++ b/src/jrd/optimizer/OuterJoin.cpp @@ -94,15 +94,25 @@ OuterJoin::OuterJoin(thread_db* aTdbb, Optimizer* opt, RecordSource* OuterJoin::generate() { - const auto outerJoinRsb = process(OUTER_JOIN); - if (!optimizer->isFullJoin()) - return outerJoinRsb; + { + fb_assert(optimizer->isLeftJoin()); + return process(); + } - // A FULL JOIN B is currently implemented similar to (A LEFT JOIN B) UNION ALL (B ANTI-JOIN A). + StreamList outerStreams; + const auto outerJoinRsb = process(&outerStreams); + + // A FULL JOIN B is currently implemented similar to: + // + // (A LEFT JOIN B) + // UNION ALL + // (B LEFT JOIN A WHERE A.* IS NULL) + // + // See also FullOuterJoin class implementation. // // At this point we already have the first part -- (A LEFT JOIN B) -- ready, - // so just swap the sides and make an anti-join. + // so just swap the sides and make the second (inverted) join. auto& outerStream = joinStreams[0]; auto& innerStream = joinStreams[1]; @@ -131,15 +141,15 @@ RecordSource* OuterJoin::generate() iter.reset(CMP_clone_node_opt(tdbb, csb, iter)); } - const auto antiJoinRsb = process(ANTI_JOIN); + const auto antiJoinRsb = process(); // Allocate and return the final join record source - return FB_NEW_POOL(getPool()) FullOuterJoin(csb, outerJoinRsb, antiJoinRsb); + return FB_NEW_POOL(getPool()) FullOuterJoin(csb, outerJoinRsb, antiJoinRsb, outerStreams); } -RecordSource* OuterJoin::process(const JoinType joinType) +RecordSource* OuterJoin::process(StreamList* outerStreams) { BoolExprNode* boolean = nullptr; @@ -153,8 +163,7 @@ RecordSource* OuterJoin::process(const JoinType joinType) { fb_assert(!outerStream.rsb); outerStream.rsb = optimizer->generateRetrieval(outerStream.number, - optimizer->isFullJoin() ? nullptr : sortPtr, - true, false, &boolean); + optimizer->isFullJoin() ? nullptr : sortPtr, true, false, &boolean); } else { @@ -173,13 +182,15 @@ RecordSource* OuterJoin::process(const JoinType joinType) boolean = optimizer->composeBoolean(); } + if (outerStreams) + outerStream.rsb->findUsedStreams(*outerStreams); + if (innerStream.number != INVALID_STREAM) { fb_assert(!innerStream.rsb); // AB: the sort clause for the inner stream of an OUTER JOIN // should never be used for the index retrieval - innerStream.rsb = optimizer->generateRetrieval(innerStream.number, nullptr, - false, (joinType == OUTER_JOIN) ? true : false); + innerStream.rsb = optimizer->generateRetrieval(innerStream.number, nullptr, false, true); } // Generate a parent filter record source for any remaining booleans that @@ -189,8 +200,7 @@ RecordSource* OuterJoin::process(const JoinType joinType) // Allocate and return the join record source - return FB_NEW_POOL(getPool()) - NestedLoopJoin(csb, outerStream.rsb, innerRsb, boolean, joinType); + return FB_NEW_POOL(getPool()) NestedLoopJoin(csb, outerStream.rsb, innerRsb, boolean); }; diff --git a/src/jrd/recsrc/FullOuterJoin.cpp b/src/jrd/recsrc/FullOuterJoin.cpp index 1578b1100b..d570fba14c 100644 --- a/src/jrd/recsrc/FullOuterJoin.cpp +++ b/src/jrd/recsrc/FullOuterJoin.cpp @@ -37,10 +37,13 @@ using namespace Jrd; // Data access: full outer join // ---------------------------- -FullOuterJoin::FullOuterJoin(CompilerScratch* csb, RecordSource* arg1, RecordSource* arg2) +FullOuterJoin::FullOuterJoin(CompilerScratch* csb, + RecordSource* arg1, RecordSource* arg2, + const StreamList& checkStreams) : RecordSource(csb), m_arg1(arg1), - m_arg2(arg2) + m_arg2(arg2), + m_checkStreams(csb->csb_pool, checkStreams) { fb_assert(m_arg1 && m_arg2); @@ -97,7 +100,27 @@ bool FullOuterJoin::internalGetRecord(thread_db* tdbb) const m_arg2->open(tdbb); } - return m_arg2->getRecord(tdbb); + // We should exclude matching records from the right-joined (second) record source, + // as they're already returned from the left-joined (first) record source + + while (m_arg2->getRecord(tdbb)) + { + bool matched = false; + + for (const auto stream : m_checkStreams) + { + if (request->req_rpb[stream].rpb_number.isValid()) + { + matched = true; + break; + } + } + + if (!matched) + return true; + } + + return false; } bool FullOuterJoin::refetchRecord(thread_db* /*tdbb*/) const diff --git a/src/jrd/recsrc/NestedLoopJoin.cpp b/src/jrd/recsrc/NestedLoopJoin.cpp index 2f89112807..9380d6a1d0 100644 --- a/src/jrd/recsrc/NestedLoopJoin.cpp +++ b/src/jrd/recsrc/NestedLoopJoin.cpp @@ -53,10 +53,11 @@ NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, FB_SIZE_T count, RecordSour } } -NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, RecordSource* outer, RecordSource* inner, - BoolExprNode* boolean, JoinType joinType) +NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, + RecordSource* outer, RecordSource* inner, + BoolExprNode* boolean) : RecordSource(csb), - m_joinType(joinType), + m_joinType(OUTER_JOIN), m_args(csb->csb_pool), m_boolean(boolean) { diff --git a/src/jrd/recsrc/RecordSource.h b/src/jrd/recsrc/RecordSource.h index ee65daf6f1..1fb0fc2974 100644 --- a/src/jrd/recsrc/RecordSource.h +++ b/src/jrd/recsrc/RecordSource.h @@ -1137,7 +1137,7 @@ namespace Jrd public: NestedLoopJoin(CompilerScratch* csb, FB_SIZE_T count, RecordSource* const* args); NestedLoopJoin(CompilerScratch* csb, RecordSource* outer, RecordSource* inner, - BoolExprNode* boolean, JoinType joinType); + BoolExprNode* boolean); void close(thread_db* tdbb) const override; @@ -1168,7 +1168,8 @@ namespace Jrd class FullOuterJoin : public RecordSource { public: - FullOuterJoin(CompilerScratch* csb, RecordSource* arg1, RecordSource* arg2); + FullOuterJoin(CompilerScratch* csb, RecordSource* arg1, RecordSource* arg2, + const StreamList& checkStreams); void close(thread_db* tdbb) const override; @@ -1191,6 +1192,7 @@ namespace Jrd private: NestConst m_arg1; NestConst m_arg2; + const StreamList m_checkStreams; }; class HashJoin : public RecordSource