8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-22 20:03:02 +01:00

Adjustments for cardinality estimations. Minor optimizer refactoring.

This commit is contained in:
Dmitry Yemanov 2022-03-29 20:04:41 +03:00
parent c234434894
commit 343c1f97df
7 changed files with 117 additions and 118 deletions

View File

@ -59,15 +59,16 @@ using namespace Jrd;
InnerJoin::InnerJoin(thread_db* aTdbb, Optimizer* opt,
const StreamList& streams,
SortNode* sort_clause, bool hasPlan)
SortNode** sortClause, bool hasPlan)
: PermanentStorage(*aTdbb->getDefaultPool()),
tdbb(aTdbb),
optimizer(opt),
csb(opt->getCompilerScratch()),
sort(sort_clause),
sortPtr(sortClause),
plan(hasPlan),
innerStreams(getPool(), streams.getCount()),
joinedStreams(getPool())
joinedStreams(getPool()),
bestStreams(getPool())
{
joinedStreams.grow(streams.getCount());
@ -92,6 +93,8 @@ void InnerJoin::calculateStreamInfo()
optimizer->printf("Base stream info:\n");
#endif
const auto sort = sortPtr ? *sortPtr : nullptr;
for (auto innerStream : innerStreams)
{
streams.add(innerStream->stream);
@ -167,23 +170,25 @@ void InnerJoin::calculateStreamInfo()
// Estimate the cost for the stream
//
void InnerJoin::estimateCost(StreamType stream,
void InnerJoin::estimateCost(unsigned position,
const StreamInfo* stream,
double* cost,
double* resulting_cardinality,
bool start) const
double* resultingCardinality) const
{
const auto sort = (position == 0 && sortPtr) ? *sortPtr : nullptr;
// Create the optimizer retrieval generation class and calculate
// which indexes will be used and the total estimated selectivity will be returned
Retrieval retrieval(tdbb, optimizer, stream, false, false, (start ? sort : nullptr), true);
Retrieval retrieval(tdbb, optimizer, stream->stream, false, false, sort, true);
const auto candidate = retrieval.getInversion();
*cost = candidate->cost;
// Calculate cardinality
const auto tail = &csb->csb_rpt[stream];
const auto tail = &csb->csb_rpt[stream->stream];
const double cardinality = tail->csb_cardinality * candidate->selectivity;
*resulting_cardinality = MAX(cardinality, MINIMUM_CARDINALITY);
*resultingCardinality = MAX(cardinality, MINIMUM_CARDINALITY);
}
@ -193,7 +198,7 @@ void InnerJoin::estimateCost(StreamType stream,
// Next loop through the remaining streams and find the best order.
//
bool InnerJoin::findJoinOrder(StreamList& bestStreams)
bool InnerJoin::findJoinOrder()
{
bestStreams.clear();
bestCount = 0;
@ -298,7 +303,6 @@ void InnerJoin::findBestOrder(unsigned position,
double cost,
double cardinality)
{
const bool start = (position == 0);
const auto tail = &csb->csb_rpt[stream->stream];
// Do some initializations
@ -313,21 +317,21 @@ void InnerJoin::findBestOrder(unsigned position,
streamFlags.add(innerStream->used);
// Compute delta and total estimate cost to fetch this stream
double position_cost = 0, position_cardinality = 0, new_cost = 0, new_cardinality = 0;
double positionCost = 0, positionCardinality = 0, newCost = 0, newCardinality = 0;
if (!plan)
{
estimateCost(stream->stream, &position_cost, &position_cardinality, start);
new_cost = cost + cardinality * position_cost;
new_cardinality = position_cardinality * cardinality;
estimateCost(position, stream, &positionCost, &positionCardinality);
newCost = cost + cardinality * positionCost;
newCardinality = positionCardinality * cardinality;
}
// If the partial order is either longer than any previous partial order,
// or the same length and cheap, save order as "best"
if (position > bestCount || (position == bestCount && new_cost < bestCost))
if (position > bestCount || (position == bestCount && newCost < bestCost))
{
bestCount = position;
bestCost = new_cost;
bestCost = newCost;
const auto end = joinedStreams.begin() + position;
for (auto iter = joinedStreams.begin(); iter != end; ++iter)
@ -353,10 +357,24 @@ void InnerJoin::findBestOrder(unsigned position,
// If we know a combination with all streams used and the
// current cost is higher as the one from the best we're done
if (bestCount == remainingStreams && bestCost < new_cost)
if (bestCount == remainingStreams && bestCost < newCost)
done = true;
if (!done && !plan)
if (plan)
{
// If a explicit PLAN was specific pick the next relation.
// The order in innerStreams is expected to be exactly the order as
// specified in the explicit PLAN.
for (auto nextStream : innerStreams)
{
if (!nextStream->used)
{
findBestOrder(position, nextStream, processList, newCost, newCardinality);
break;
}
}
}
else if (!done)
{
// Add these relations to the processing list
for (auto& relationship : stream->indexedRelationships)
@ -395,22 +413,7 @@ void InnerJoin::findBestOrder(unsigned position,
auto relationStreamInfo = getStreamInfo(nextRelationship.stream);
if (!relationStreamInfo->used)
{
findBestOrder(position, relationStreamInfo, processList, new_cost, new_cardinality);
break;
}
}
}
if (plan)
{
// If a explicit PLAN was specific pick the next relation.
// The order in innerStreams is expected to be exactly the order as
// specified in the explicit PLAN.
for (auto nextStream : innerStreams)
{
if (!nextStream->used)
{
findBestOrder(position, nextStream, processList, new_cost, new_cardinality);
findBestOrder(position, relationStreamInfo, processList, newCost, newCardinality);
break;
}
}
@ -423,6 +426,37 @@ void InnerJoin::findBestOrder(unsigned position,
}
//
// Form streams into rivers (combinations of streams)
//
River* InnerJoin::formRiver()
{
fb_assert(bestCount);
fb_assert(bestStreams.hasData());
if (bestStreams.getCount() != innerStreams.getCount())
sortPtr = nullptr;
HalfStaticArray<RecordSource*, OPT_STATIC_ITEMS> rsbs;
for (const auto stream : bestStreams)
{
const auto rsb = optimizer->generateRetrieval(stream, sortPtr, false, false);
rsbs.add(rsb);
sortPtr = nullptr;
}
const auto rsb = (rsbs.getCount() == 1) ? rsbs[0] :
FB_NEW_POOL(getPool()) NestedLoopJoin(csb, rsbs.getCount(), rsbs.begin());
// Allocate a river block and move the best order into it
const auto river = FB_NEW_POOL(getPool()) River(csb, rsb, nullptr, bestStreams);
river->deactivate(csb);
return river;
}
//
// Check if the testStream can use a index when the baseStream is active. If so
// then we create a indexRelationship and fill it with the needed information.

View File

@ -2137,49 +2137,6 @@ void Optimizer::findDependentStreams(const StreamList& streams,
}
//
// Form streams into rivers (combinations of streams)
//
bool Optimizer::formRiver(unsigned streamCount,
StreamList& streams,
const StreamList& joinedStreams,
RiverList& rivers,
SortNode** sortClause)
{
const auto count = joinedStreams.getCount();
fb_assert(count);
if (count != streamCount)
sortClause = nullptr;
HalfStaticArray<RecordSource*, OPT_STATIC_ITEMS> rsbs(count);
for (const auto stream : joinedStreams)
{
rsbs.add(generateRetrieval(stream, sortClause, false, false, nullptr));
sortClause = nullptr;
// Remove already consumed streams from remainingStreams
FB_SIZE_T pos;
if (streams.find(stream, pos))
streams.remove(pos);
else
fb_assert(false);
}
const auto rsb = (count == 1) ? rsbs[0] :
FB_NEW_POOL(getPool()) NestedLoopJoin(csb, count, rsbs.begin());
// Allocate a river block and move the best order into it
const auto river = FB_NEW_POOL(getPool()) River(csb, rsb, nullptr, joinedStreams);
river->deactivate(csb);
rivers.push(river);
return streams.hasData();
}
//
// Form streams into rivers according to the user-specified plan
//
@ -2225,23 +2182,14 @@ void Optimizer::formRivers(const StreamList& streams,
// AB: Only form rivers when any retrieval node is seen, for
// example a MERGE on two JOINs will come with no retrievals
// at this point.
// CVC: Notice "plan_node" is pointing to the last element in the loop above.
// If the loop didn't execute, we had garbage in "planNode".
if (tempStreams.hasData())
{
const auto count = tempStreams.getCount();
InnerJoin innerJoin(tdbb, this, tempStreams,
(sortClause ? *sortClause : nullptr),
(planClause != nullptr));
sortClause, (planClause != nullptr));
StreamList joinedStreams;
while (innerJoin.findJoinOrder(joinedStreams))
{
if (!formRiver(count, tempStreams, joinedStreams, rivers, sortClause))
break;
}
while (innerJoin.findJoinOrder())
rivers.add(innerJoin.formRiver());
}
}
@ -2555,17 +2503,23 @@ void Optimizer::generateInnerJoin(StreamList& streams,
return;
}
const auto count = streams.getCount();
InnerJoin innerJoin(tdbb, this, streams,
(sortClause ? *sortClause : nullptr),
(planClause != nullptr));
sortClause, (planClause != nullptr));
StreamList joinedStreams;
while (innerJoin.findJoinOrder(joinedStreams))
while (innerJoin.findJoinOrder())
{
if (!formRiver(count, streams, joinedStreams, rivers, sortClause))
break;
const auto river = innerJoin.formRiver();
rivers.add(river);
// Remove already consumed streams from the source stream list
for (const auto stream : river->getStreams())
{
FB_SIZE_T pos;
if (streams.find(stream, pos))
streams.remove(pos);
else
fb_assert(false);
}
}
}
@ -2637,7 +2591,7 @@ RecordSource* Optimizer::generateOuterJoin(RiverList& rivers,
// AB: the sort clause for the inner stream of an OUTER JOIN
// should never be used for the index retrieval
stream_i.stream_rsb =
generateRetrieval(stream_i.stream_num, nullptr, false, true, nullptr);
generateRetrieval(stream_i.stream_num, nullptr, false, true);
}
// generate a parent boolean rsb for any remaining booleans that
@ -2664,7 +2618,7 @@ RecordSource* Optimizer::generateOuterJoin(RiverList& rivers,
{
hasInnerRsb = false;
stream_i.stream_rsb =
generateRetrieval(stream_i.stream_num, nullptr, false, true, nullptr);
generateRetrieval(stream_i.stream_num, nullptr, false, true);
}
const auto innerRsb = generateResidualBoolean(stream_i.stream_rsb);
@ -2695,7 +2649,7 @@ RecordSource* Optimizer::generateOuterJoin(RiverList& rivers,
if (!hasOuterRsb)
{
stream_o.stream_rsb =
generateRetrieval(stream_o.stream_num, nullptr, false, false, nullptr);
generateRetrieval(stream_o.stream_num, nullptr, false, false);
}
const auto outerRsb = generateResidualBoolean(stream_o.stream_rsb);

View File

@ -370,6 +370,11 @@ public:
void compileRelation(StreamType stream);
void generateAggregateDistincts(MapNode* map);
RecordSource* generateRetrieval(StreamType stream,
SortNode** sortClause,
bool outerFlag,
bool innerFlag,
BoolExprNode** returnBoolean = nullptr);
SortedStream* generateSort(const StreamList& streams,
const StreamList* dbkeyStreams,
RecordSource* rsb, SortNode* sort,
@ -421,11 +426,6 @@ private:
void findDependentStreams(const StreamList& streams,
StreamList& dependent_streams,
StreamList& free_streams);
bool formRiver(unsigned streamCount,
StreamList& streams,
const StreamList& joinedStreams,
RiverList& rivers,
SortNode** sortClause);
void formRivers(const StreamList& streams,
RiverList& rivers,
SortNode** sortClause,
@ -438,11 +438,6 @@ private:
RecordSource* generateOuterJoin(RiverList& rivers,
SortNode** sortClause);
RecordSource* generateResidualBoolean(RecordSource* rsb);
RecordSource* generateRetrieval(StreamType stream,
SortNode** sortClause,
bool outerFlag,
bool innerFlag,
BoolExprNode** returnBoolean);
BoolExprNode* makeInferenceNode(BoolExprNode* boolean,
ValueExprNode* arg1,
ValueExprNode* arg2);
@ -740,7 +735,7 @@ class InnerJoin : private Firebird::PermanentStorage
public:
InnerJoin(thread_db* tdbb, Optimizer* opt,
const StreamList& streams,
SortNode* sort_clause, bool hasPlan);
SortNode** sortClause, bool hasPlan);
~InnerJoin()
{
@ -748,11 +743,12 @@ public:
delete innerStream;
}
bool findJoinOrder(StreamList& bestStreams);
bool findJoinOrder();
River* formRiver();
protected:
void calculateStreamInfo();
void estimateCost(StreamType stream, double* cost, double* resulting_cardinality, bool start) const;
void estimateCost(unsigned position, const StreamInfo* stream, double* cost, double* resultingCardinality) const;
void findBestOrder(unsigned position, StreamInfo* stream,
IndexedRelationships& processList, double cost, double cardinality);
void getIndexedRelationships(StreamInfo* testStream);
@ -769,7 +765,7 @@ private:
thread_db* const tdbb;
Optimizer* const optimizer;
CompilerScratch* const csb;
SortNode* const sort;
SortNode** sortPtr;
const bool plan;
unsigned remainingStreams = 0;
@ -778,6 +774,7 @@ private:
StreamInfoList innerStreams;
JoinedStreamList joinedStreams;
StreamList bestStreams;
};
} // namespace Jrd

View File

@ -51,7 +51,15 @@ BaseAggWinStream<ThisType, NextType>::BaseAggWinStream(thread_db* tdbb, Compiler
fb_assert(m_next);
m_impure = csb->allocImpure<typename ThisType::Impure>();
m_cardinality = group ? next->getCardinality() * DEFAULT_SELECTIVITY : MINIMUM_CARDINALITY;
if (group)
{
m_cardinality = next->getCardinality();
for (auto count = group->getCount(); count; count--)
m_cardinality *= REDUCE_SELECTIVITY_FACTOR_EQUALITY;
}
else
m_cardinality = MINIMUM_CARDINALITY;
}
template <typename ThisType, typename NextType>

View File

@ -250,7 +250,9 @@ HashJoin::HashJoin(thread_db* tdbb, CompilerScratch* csb, FB_SIZE_T count,
RecordSource* const sub_rsb = args[i];
fb_assert(sub_rsb);
m_cardinality *= sub_rsb->getCardinality() * DEFAULT_SELECTIVITY;
m_cardinality *= sub_rsb->getCardinality();
for (auto keyCount = keys[i]->getCount(); keyCount; keyCount--)
m_cardinality *= REDUCE_SELECTIVITY_FACTOR_EQUALITY;
SubStream sub;
sub.buffer = FB_NEW_POOL(csb->csb_pool) BufferedStream(csb, sub_rsb);

View File

@ -52,7 +52,9 @@ MergeJoin::MergeJoin(CompilerScratch* csb, FB_SIZE_T count,
fb_assert(args[i]);
m_args[i] = args[i];
m_cardinality *= args[i]->getCardinality() * DEFAULT_SELECTIVITY;
m_cardinality *= args[i]->getCardinality();
for (auto keyCount = keys[i]->getCount(); keyCount; keyCount--)
m_cardinality *= REDUCE_SELECTIVITY_FACTOR_EQUALITY;
fb_assert(keys[i]);
m_keys[i] = keys[i];

View File

@ -60,6 +60,8 @@ NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, RecordSource* outer, Record
m_args.add(outer);
m_args.add(inner);
m_cardinality = outer->getCardinality() * inner->getCardinality();
}
void NestedLoopJoin::open(thread_db* tdbb) const