8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-02-02 09:20:39 +01:00

Refactored outer joins into a separate class

This commit is contained in:
Dmitry Yemanov 2024-02-09 18:07:30 +03:00
parent c40fe4e181
commit 1b4d390b0e
5 changed files with 266 additions and 187 deletions

View File

@ -108,6 +108,7 @@
<ClCompile Include="..\..\..\src\jrd\optimizer\Optimizer.cpp" />
<ClCompile Include="..\..\..\src\jrd\optimizer\Retrieval.cpp" />
<ClCompile Include="..\..\..\src\jrd\optimizer\InnerJoin.cpp" />
<ClCompile Include="..\..\..\src\jrd\optimizer\OuterJoin.cpp" />
<ClCompile Include="..\..\..\src\jrd\os\win32\winnt.cpp" />
<ClCompile Include="..\..\..\src\jrd\pag.cpp" />
<ClCompile Include="..\..\..\src\jrd\par.cpp" />

View File

@ -528,6 +528,9 @@
<ClCompile Include="..\..\..\src\jrd\optimizer\InnerJoin.cpp">
<Filter>Optimizer</Filter>
</ClCompile>
<ClCompile Include="..\..\..\src\jrd\optimizer\OuterJoin.cpp">
<Filter>Optimizer</Filter>
</ClCompile>
<ClCompile Include="..\..\..\src\jrd\optimizer\Optimizer.cpp">
<Filter>Optimizer</Filter>
</ClCompile>

View File

@ -785,8 +785,8 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack)
StreamList rseStreams;
rse->computeRseStreams(rseStreams);
for (StreamList::iterator i = rseStreams.begin(); i != rseStreams.end(); ++i)
csb->csb_rpt[*i].deactivate();
for (const auto rseStream : rseStreams)
csb->csb_rpt[rseStream].deactivate();
// Find and collect booleans that are invariant in this context
// (i.e. independent from streams in the RseNode). We can do that
@ -902,7 +902,7 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack)
// Outer joins are processed their own way
if (!isInnerJoin())
rsb = generateOuterJoin(rivers, &sort);
rsb = OuterJoin(tdbb, this, rse, rivers, &sort).generate();
else
{
// AB: If previous rsb's are already on the stack we can't use
@ -979,7 +979,7 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack)
rsb = CrossJoin(csb, rivers).getRecordSource();
// Pick up any residual boolean that may have fallen thru the cracks
rsb = generateResidualBoolean(rsb);
rsb = applyResidualBoolean(rsb);
}
// Assign the sort node back if it wasn't used by the index navigation
@ -2499,169 +2499,6 @@ void Optimizer::generateInnerJoin(StreamList& streams,
}
//
// Generate a top level outer join. The "outer" and "inner" sub-streams must be
// handled differently from each other. The inner is like other streams.
// The outer one isn't because conjuncts may not eliminate records from the stream.
// They only determine if a join with an inner stream record is to be attempted.
//
RecordSource* Optimizer::generateOuterJoin(RiverList& rivers,
SortNode** sortClause)
{
struct {
RecordSource* stream_rsb;
StreamType stream_num;
} stream_o, stream_i, *stream_ptr[2];
// Determine which stream should be outer and which is inner.
// In the case of a left join, the syntactically left stream is the
// outer, and the right stream is the inner. For all others, swap
// the sense of inner and outer, though for a full join it doesn't
// matter and we should probably try both orders to see which is
// more efficient.
if (rse->rse_jointype != blr_left)
{
stream_ptr[1] = &stream_o;
stream_ptr[0] = &stream_i;
}
else
{
stream_ptr[0] = &stream_o;
stream_ptr[1] = &stream_i;
}
// Loop through the outer join sub-streams in
// reverse order because rivers may have been PUSHed
for (int i = 1; i >= 0; i--)
{
const auto node = rse->rse_relations[i];
if (nodeIs<RelationSourceNode>(node) || nodeIs<LocalTableSourceNode>(node))
{
stream_ptr[i]->stream_rsb = nullptr;
stream_ptr[i]->stream_num = node->getStream();
}
else
{
River* const river = rivers.pop();
stream_ptr[i]->stream_rsb = river->getRecordSource();
}
}
if (!isFullJoin())
{
// Generate rsbs for the sub-streams.
// For the left sub-stream we also will get a boolean back.
BoolExprNode* boolean = nullptr;
if (!stream_o.stream_rsb)
{
stream_o.stream_rsb =
generateRetrieval(stream_o.stream_num, sortClause, true, false, &boolean);
}
if (!stream_i.stream_rsb)
{
// AB: the sort clause for the inner stream of an OUTER JOIN
// should never be used for the index retrieval
stream_i.stream_rsb =
generateRetrieval(stream_i.stream_num, nullptr, false, true);
}
// generate a parent boolean rsb for any remaining booleans that
// were not satisfied via an index lookup
stream_i.stream_rsb = generateResidualBoolean(stream_i.stream_rsb);
// Allocate and fill in the rsb
return FB_NEW_POOL(getPool())
NestedLoopJoin(csb, stream_o.stream_rsb, stream_i.stream_rsb,
boolean, OUTER_JOIN);
}
bool hasOuterRsb = true, hasInnerRsb = true;
BoolExprNode* boolean = nullptr;
if (!stream_o.stream_rsb)
{
hasOuterRsb = false;
stream_o.stream_rsb =
generateRetrieval(stream_o.stream_num, nullptr, true, false, &boolean);
}
if (!stream_i.stream_rsb)
{
hasInnerRsb = false;
stream_i.stream_rsb =
generateRetrieval(stream_i.stream_num, nullptr, false, true);
}
const auto innerRsb = generateResidualBoolean(stream_i.stream_rsb);
const auto rsb1 = FB_NEW_POOL(getPool())
NestedLoopJoin(csb, stream_o.stream_rsb, innerRsb, boolean, OUTER_JOIN);
for (auto iter = getConjuncts(); iter.hasData(); ++iter)
{
if (iter & CONJUNCT_USED)
iter.reset(CMP_clone_node_opt(tdbb, csb, iter));
}
if (!hasInnerRsb)
csb->csb_rpt[stream_i.stream_num].deactivate();
if (!hasOuterRsb)
csb->csb_rpt[stream_o.stream_num].deactivate();
boolean = nullptr;
if (!hasInnerRsb)
{
stream_i.stream_rsb =
generateRetrieval(stream_i.stream_num, nullptr, true, false, &boolean);
}
if (!hasOuterRsb)
{
stream_o.stream_rsb =
generateRetrieval(stream_o.stream_num, nullptr, false, false);
}
const auto outerRsb = generateResidualBoolean(stream_o.stream_rsb);
const auto rsb2 = FB_NEW_POOL(getPool())
NestedLoopJoin(csb, stream_i.stream_rsb, outerRsb, boolean, ANTI_JOIN);
return FB_NEW_POOL(getPool()) FullOuterJoin(csb, rsb1, rsb2);
}
//
// Pick up any residual boolean remaining, meaning those that have not been used
// as part of some join. These booleans must still be applied to the result stream.
//
RecordSource* Optimizer::generateResidualBoolean(RecordSource* rsb)
{
BoolExprNode* boolean = nullptr;
double selectivity = MAXIMUM_SELECTIVITY;
for (auto iter = getBaseConjuncts(); iter.hasData(); ++iter)
{
if (!(iter & CONJUNCT_USED))
{
compose(getPool(), &boolean, iter);
iter |= CONJUNCT_USED;
if (!(iter & (CONJUNCT_MATCHED | CONJUNCT_JOINED)))
selectivity *= getSelectivity(*iter);
}
}
return boolean ? FB_NEW_POOL(getPool()) FilteredStream(csb, rsb, boolean, selectivity) : rsb;
}
//
// Compile a record retrieval source
//
@ -2774,22 +2611,11 @@ RecordSource* Optimizer::generateRetrieval(StreamType stream,
if (outerFlag)
{
fb_assert(returnBoolean);
*returnBoolean = nullptr;
// Now make another pass thru the outer conjuncts only, finding unused,
// computable booleans. When one is found, roll it into a final
// boolean and mark it used.
for (auto iter = getBaseConjuncts(); iter.hasData(); ++iter)
{
if (!(iter & CONJUNCT_USED) &&
!(iter->nodFlags & ExprNode::FLAG_RESIDUAL) &&
iter->computable(csb, INVALID_STREAM, false))
{
compose(getPool(), returnBoolean, iter);
iter |= CONJUNCT_USED;
}
}
fb_assert(returnBoolean);
*returnBoolean = composeBoolean();
}
// Now make another pass thru the conjuncts finding unused, computable
@ -2872,14 +2698,27 @@ RecordSource* Optimizer::applyLocalBoolean(RecordSource* rsb,
StreamStateHolder localHolder(csb, streams);
localHolder.activate(csb);
double selectivity = MAXIMUM_SELECTIVITY;
if (const auto boolean = composeBoolean(iter, &selectivity))
rsb = FB_NEW_POOL(getPool()) FilteredStream(csb, rsb, boolean, selectivity);
return rsb;
}
//
// Pick up any residual boolean remaining, meaning those that have not been used
// as part of some join. These booleans must still be applied to the result stream.
//
RecordSource* Optimizer::applyResidualBoolean(RecordSource* rsb)
{
BoolExprNode* boolean = nullptr;
double selectivity = MAXIMUM_SELECTIVITY;
for (iter.rewind(); iter.hasData(); ++iter)
for (auto iter = getBaseConjuncts(); iter.hasData(); ++iter)
{
if (!(iter & CONJUNCT_USED) &&
!(iter->nodFlags & ExprNode::FLAG_RESIDUAL) &&
iter->computable(csb, INVALID_STREAM, false))
if (!(iter & CONJUNCT_USED))
{
compose(getPool(), &boolean, iter);
iter |= CONJUNCT_USED;
@ -2893,6 +2732,28 @@ RecordSource* Optimizer::applyLocalBoolean(RecordSource* rsb,
}
BoolExprNode* Optimizer::composeBoolean(ConjunctIterator& iter, double* selectivity)
{
BoolExprNode* boolean = nullptr;
for (iter.rewind(); iter.hasData(); ++iter)
{
if (!(iter & CONJUNCT_USED) &&
!(iter->nodFlags & ExprNode::FLAG_RESIDUAL) &&
iter->computable(csb, INVALID_STREAM, false))
{
compose(getPool(), &boolean, iter);
iter |= CONJUNCT_USED;
if (!(iter & (CONJUNCT_MATCHED | CONJUNCT_JOINED)) && selectivity)
*selectivity *= getSelectivity(*iter);
}
}
return boolean;
}
//
// Check whether the given boolean can be involved in a equi-join relationship
//

View File

@ -40,6 +40,7 @@
#include "../dsql/ExprNodes.h"
#include "../jrd/RecordSourceNodes.h"
#include "../jrd/exe.h"
#include "../jrd/recsrc/RecordSource.h"
namespace Jrd {
@ -500,6 +501,17 @@ public:
RecordSource* applyLocalBoolean(RecordSource* rsb,
const StreamList& streams,
ConjunctIterator& iter);
RecordSource* applyResidualBoolean(RecordSource* rsb);
BoolExprNode* composeBoolean(ConjunctIterator& iter,
double* selectivity = nullptr);
BoolExprNode* composeBoolean(double* selectivity = nullptr)
{
auto iter = getBaseConjuncts();
return composeBoolean(iter, selectivity);
}
bool checkEquiJoin(BoolExprNode* boolean);
bool getEquiJoinKeys(BoolExprNode* boolean,
NestConst<ValueExprNode>* node1,
@ -529,9 +541,6 @@ private:
RiverList& rivers,
SortNode** sortClause,
const PlanNode* planClause);
RecordSource* generateOuterJoin(RiverList& rivers,
SortNode** sortClause);
RecordSource* generateResidualBoolean(RecordSource* rsb);
bool getEquiJoinKeys(NestConst<ValueExprNode>& node1,
NestConst<ValueExprNode>& node2,
bool needCast);
@ -895,6 +904,31 @@ private:
JoinedStreamList bestStreams;
};
class OuterJoin : private Firebird::PermanentStorage
{
struct OuterJoinStream
{
RecordSource* rsb = nullptr;
StreamType number = INVALID_STREAM;
};
public:
OuterJoin(thread_db* tdbb, Optimizer* opt,
const RseNode* rse, RiverList& rivers,
SortNode** sortClause);
RecordSource* generate();
private:
RecordSource* process(const JoinType joinType);
thread_db* const tdbb;
Optimizer* const optimizer;
CompilerScratch* const csb;
SortNode** sortPtr;
OuterJoinStream joinStreams[2];
};
} // namespace Jrd
#endif // OPTIMIZER_H

View File

@ -0,0 +1,180 @@
/*
* The contents of this file are subject to the Initial
* Developer's Public License Version 1.0 (the "License");
* you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
*
* Software distributed under the License is distributed AS IS,
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
* See the License for the specific language governing rights
* and limitations under the License.
*
* The Original Code was created by Dmitry Yemanov
* for the Firebird Open Source RDBMS project.
*
* Copyright (c) 2023 Dmitry Yemanov <dimitr@firebirdsql.org>
* and all contributors signed below.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
*
*/
#include "firebird.h"
#include "../jrd/jrd.h"
#include "../jrd/cmp_proto.h"
#include "../jrd/RecordSourceNodes.h"
#include "../jrd/optimizer/Optimizer.h"
using namespace Firebird;
using namespace Jrd;
//
// Constructor
//
OuterJoin::OuterJoin(thread_db* aTdbb, Optimizer* opt,
const RseNode* rse, RiverList& rivers,
SortNode** sortClause)
: PermanentStorage(*aTdbb->getDefaultPool()),
tdbb(aTdbb),
optimizer(opt),
csb(opt->getCompilerScratch()),
sortPtr(sortClause)
{
// Loop through the join sub-streams. Do it backwards, as rivers are passed as a stack.
fb_assert(rse->rse_relations.getCount() == 2);
fb_assert(rivers.getCount() <= 2);
for (int pos = 1; pos >= 0; pos--)
{
const auto node = rse->rse_relations[pos];
auto& joinStream = joinStreams[pos];
if (nodeIs<RelationSourceNode>(node) || nodeIs<LocalTableSourceNode>(node))
{
const auto stream = node->getStream();
fb_assert(!(csb->csb_rpt[stream].csb_flags & csb_active));
joinStream.number = stream;
}
else
{
const auto river = rivers.pop();
joinStream.rsb = river->getRecordSource();
}
};
fb_assert(rivers.isEmpty());
// Determine which stream should be outer and which is inner.
// In the case of a left join, the syntactically left stream is the outer,
// and the right stream is the inner. For a right join, just swap the sides.
// For a full join, order does not matter, but historically it has been reversed,
// so let's preserve this for the time being.
if (rse->rse_jointype != blr_left)
{
// RIGHT JOIN is converted into LEFT JOIN by the BLR parser,
// so it should never appear here
fb_assert(rse->rse_jointype == blr_full);
std::swap(joinStreams[0], joinStreams[1]);
}
}
// Generate a top level outer join. The "outer" and "inner" sub-streams must be
// handled differently from each other. The inner is like other streams.
// The outer one isn't because conjuncts may not eliminate records from the stream.
// They only determine if a join with an inner stream record is to be attempted.
RecordSource* OuterJoin::generate()
{
const auto outerJoinRsb = process(OUTER_JOIN);
if (!optimizer->isFullJoin())
return outerJoinRsb;
// A FULL JOIN B is currently implemented similar to (A LEFT JOIN B) UNION ALL (B ANTI-JOIN A).
//
// At this point we already have the first part -- (A LEFT JOIN B) -- ready,
// so just swap the sides and make an anti-join.
auto& outerStream = joinStreams[0];
auto& innerStream = joinStreams[1];
std::swap(outerStream, innerStream);
// Reset both streams to their original states
if (outerStream.number != INVALID_STREAM)
{
outerStream.rsb = nullptr;
csb->csb_rpt[outerStream.number].deactivate();
}
if (innerStream.number != INVALID_STREAM)
{
innerStream.rsb = nullptr;
csb->csb_rpt[innerStream.number].deactivate();
}
// Clone the booleans to make them re-usable for an anti-join
for (auto iter = optimizer->getConjuncts(); iter.hasData(); ++iter)
{
if (iter & Optimizer::CONJUNCT_USED)
iter.reset(CMP_clone_node_opt(tdbb, csb, iter));
}
const auto antiJoinRsb = process(ANTI_JOIN);
// Allocate and return the final join record source
return FB_NEW_POOL(getPool()) FullOuterJoin(csb, outerJoinRsb, antiJoinRsb);
}
RecordSource* OuterJoin::process(const JoinType joinType)
{
BoolExprNode* boolean = nullptr;
auto& outerStream = joinStreams[0];
auto& innerStream = joinStreams[1];
// Generate record sources for the sub-streams.
// For the outer sub-stream we also will get a boolean back.
if (outerStream.number != INVALID_STREAM)
{
fb_assert(!outerStream.rsb);
outerStream.rsb = optimizer->generateRetrieval(outerStream.number,
optimizer->isFullJoin() ? nullptr : sortPtr,
true, false, &boolean);
}
if (innerStream.number != INVALID_STREAM)
{
fb_assert(!innerStream.rsb);
// AB: the sort clause for the inner stream of an OUTER JOIN
// should never be used for the index retrieval
innerStream.rsb = optimizer->generateRetrieval(innerStream.number, nullptr,
false, (joinType == OUTER_JOIN) ? true : false);
}
// Generate a parent filter record source for any remaining booleans that
// were not satisfied via an index lookup
const auto innerRsb = optimizer->applyResidualBoolean(innerStream.rsb);
// Allocate and return the join record source
return FB_NEW_POOL(getPool())
NestedLoopJoin(csb, outerStream.rsb, innerRsb, boolean, joinType);
};