From 26e64e9c08f635d55ac7a111469498b3f0c7fe81 Mon Sep 17 00:00:00 2001 From: Dmitry Yemanov Date: Thu, 14 Nov 2024 21:05:43 +0300 Subject: [PATCH] Cost-based decision between ORDER and SORT plans (#8316) * Cost-based decision between ORDER and SORT plans * Use inline constexpr as suggested by Adriano. Misc style changes. --- src/jrd/optimizer/Optimizer.cpp | 2 +- src/jrd/optimizer/Optimizer.h | 31 +++++------ src/jrd/optimizer/Retrieval.cpp | 92 ++++++++++++++++++++++++++++++--- 3 files changed, 103 insertions(+), 22 deletions(-) diff --git a/src/jrd/optimizer/Optimizer.cpp b/src/jrd/optimizer/Optimizer.cpp index 1dc090dd54..082d31962e 100644 --- a/src/jrd/optimizer/Optimizer.cpp +++ b/src/jrd/optimizer/Optimizer.cpp @@ -2646,7 +2646,7 @@ RecordSource* Optimizer::generateRetrieval(StreamType stream, } } - const auto navigation = retrieval.getNavigation(); + const auto navigation = retrieval.getNavigation(candidate); if (navigation) { diff --git a/src/jrd/optimizer/Optimizer.h b/src/jrd/optimizer/Optimizer.h index 36c4a00a9e..fc12bd4dd5 100644 --- a/src/jrd/optimizer/Optimizer.h +++ b/src/jrd/optimizer/Optimizer.h @@ -46,29 +46,30 @@ namespace Jrd { // AB: 2005-11-05 // Constants below needs some discussions and ideas -const double REDUCE_SELECTIVITY_FACTOR_EQUALITY = 0.001; -const double REDUCE_SELECTIVITY_FACTOR_BETWEEN = 0.0025; -const double REDUCE_SELECTIVITY_FACTOR_LESS = 0.05; -const double REDUCE_SELECTIVITY_FACTOR_GREATER = 0.05; -const double REDUCE_SELECTIVITY_FACTOR_STARTING = 0.01; -const double REDUCE_SELECTIVITY_FACTOR_OTHER = 0.01; +inline constexpr double REDUCE_SELECTIVITY_FACTOR_EQUALITY = 0.001; +inline constexpr double REDUCE_SELECTIVITY_FACTOR_BETWEEN = 0.0025; +inline constexpr double REDUCE_SELECTIVITY_FACTOR_LESS = 0.05; +inline constexpr double REDUCE_SELECTIVITY_FACTOR_GREATER = 0.05; +inline constexpr double REDUCE_SELECTIVITY_FACTOR_STARTING = 0.01; +inline constexpr double REDUCE_SELECTIVITY_FACTOR_OTHER = 0.01; // Cost of simple (CPU bound) operations is less than the page access cost -const double COST_FACTOR_MEMCOPY = 0.5; -const double COST_FACTOR_HASHING = 0.5; +inline constexpr double COST_FACTOR_MEMCOPY = 0.5; +inline constexpr double COST_FACTOR_HASHING = 0.5; +inline constexpr double COST_FACTOR_QUICKSORT = 0.1; -const double MAXIMUM_SELECTIVITY = 1.0; -const double DEFAULT_SELECTIVITY = 0.1; +inline constexpr double MAXIMUM_SELECTIVITY = 1.0; +inline constexpr double DEFAULT_SELECTIVITY = 0.1; -const double MINIMUM_CARDINALITY = 1.0; -const double THRESHOLD_CARDINALITY = 5.0; -const double DEFAULT_CARDINALITY = 1000.0; +inline constexpr double MINIMUM_CARDINALITY = 1.0; +inline constexpr double THRESHOLD_CARDINALITY = 5.0; +inline constexpr double DEFAULT_CARDINALITY = 1000.0; // Default depth of an index tree (including one leaf page), // also representing the minimal cost of the index scan. // We assume that the root page would be always cached, // so it's not included here. -const double DEFAULT_INDEX_COST = 3.0; +inline const double DEFAULT_INDEX_COST = 3.0; struct index_desc; @@ -684,7 +685,7 @@ public: } InversionCandidate* getInversion(); - IndexTableScan* getNavigation(); + IndexTableScan* getNavigation(const InversionCandidate* candidate); protected: void analyzeNavigation(const InversionCandidateList& inversions); diff --git a/src/jrd/optimizer/Retrieval.cpp b/src/jrd/optimizer/Retrieval.cpp index e92b090d5a..22e3889306 100644 --- a/src/jrd/optimizer/Retrieval.cpp +++ b/src/jrd/optimizer/Retrieval.cpp @@ -49,6 +49,8 @@ #include "../jrd/optimizer/Optimizer.h" +#include + using namespace Firebird; using namespace Jrd; @@ -366,25 +368,103 @@ InversionCandidate* Retrieval::getInversion() return invCandidate; } -IndexTableScan* Retrieval::getNavigation() +IndexTableScan* Retrieval::getNavigation(const InversionCandidate* candidate) { if (!navigationCandidate) return nullptr; - IndexScratch* const scratch = navigationCandidate->scratch; + const auto scratch = navigationCandidate->scratch; + + const auto streamCardinality = csb->csb_rpt[stream].csb_cardinality; + + // If the table looks like empty during preparation time, we cannot be sure about + // its real cardinality during execution. So, unless we have some index-based + // filtering applied, let's better be pessimistic and avoid external sorting + // due to likely cardinality under-estimation. + const bool avoidSorting = (streamCardinality <= MINIMUM_CARDINALITY && !candidate->inversion); + + if (!(scratch->index->idx_runtime_flags & idx_plan_navigate) && !avoidSorting) + { + // Check whether the navigational index scan is cheaper than the external sort + // and give up if it's not worth the efforts. + // + // We ignore candidate->cost in the calculations below as it belongs + // to both parts being compared. + + fb_assert(candidate); + + // Restore the original selectivity of the inversion, + // i.e. before the navigation candidate was accounted + auto selectivity = candidate->selectivity / navigationCandidate->selectivity; + + // Non-indexed booleans are checked before sorting, so they improve the selectivity + + double factor = MAXIMUM_SELECTIVITY; + for (auto iter = optimizer->getConjuncts(outerFlag, innerFlag); iter.hasData(); ++iter) + { + if (!(iter & Optimizer::CONJUNCT_USED) && + !candidate->matches.exist(iter) && + iter->computable(csb, stream, true) && + iter->containsStream(stream)) + { + factor *= Optimizer::getSelectivity(*iter); + } + } + + Optimizer::adjustSelectivity(selectivity, factor, streamCardinality); + + // Don't consider external sorting if optimization for first rows is requested + // and we have no local filtering applied + + if (!optimizer->favorFirstRows() || selectivity < MAXIMUM_SELECTIVITY) + { + // Estimate amount of records to be sorted + const auto cardinality = streamCardinality * selectivity; + + // We optimistically assume that records will be cached during sorting + const auto sortCost = + // record copying (to the sort buffer and back) + cardinality * COST_FACTOR_MEMCOPY * 2 + + // quicksort algorithm is O(n*log(n)) in average + cardinality * log2(cardinality) * COST_FACTOR_QUICKSORT; + + // During navigation we fetch an index leaf page per every record being returned, + // thus add the estimated cardinality to the cost + auto navigationCost = navigationCandidate->cost + + streamCardinality * candidate->selectivity; + + if (optimizer->favorFirstRows()) + { + // Reset the cost to represent a single record retrieval + navigationCost = DEFAULT_INDEX_COST; + + // We know that some local filtering is applied, so we need + // to adjust the cost as we need to walk the index + // until the first matching record is found + const auto fullIndexCost = navigationCandidate->scratch->cardinality; + const auto ratio = MAXIMUM_SELECTIVITY / selectivity; + const auto fraction = ratio / streamCardinality; + const auto walkCost = fullIndexCost * fraction * navigationCandidate->selectivity; + navigationCost += walkCost; + } + + if (sortCost < navigationCost) + return nullptr; + } + } // Looks like we can do a navigational walk. Flag that // we have used this index for navigation, and allocate // a navigational rsb for it. scratch->index->idx_runtime_flags |= idx_navigate; - const USHORT key_length = + const auto indexNode = makeIndexScanNode(scratch); + + const USHORT keyLength = ROUNDUP(BTR_key_length(tdbb, relation, scratch->index), sizeof(SLONG)); - InversionNode* const index_node = makeIndexScanNode(scratch); - return FB_NEW_POOL(getPool()) - IndexTableScan(csb, getAlias(), stream, relation, index_node, key_length, + IndexTableScan(csb, getAlias(), stream, relation, indexNode, keyLength, navigationCandidate->selectivity); }