8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-22 18:43:02 +01:00

Cost-based decision between ORDER and SORT plans (#8316)

* Cost-based decision between ORDER and SORT plans

* Use inline constexpr as suggested by Adriano. Misc style changes.
This commit is contained in:
Dmitry Yemanov 2024-11-14 21:05:43 +03:00 committed by GitHub
parent 65b80501e4
commit 26e64e9c08
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 103 additions and 22 deletions

View File

@ -2646,7 +2646,7 @@ RecordSource* Optimizer::generateRetrieval(StreamType stream,
} }
} }
const auto navigation = retrieval.getNavigation(); const auto navigation = retrieval.getNavigation(candidate);
if (navigation) if (navigation)
{ {

View File

@ -46,29 +46,30 @@ namespace Jrd {
// AB: 2005-11-05 // AB: 2005-11-05
// Constants below needs some discussions and ideas // Constants below needs some discussions and ideas
const double REDUCE_SELECTIVITY_FACTOR_EQUALITY = 0.001; inline constexpr double REDUCE_SELECTIVITY_FACTOR_EQUALITY = 0.001;
const double REDUCE_SELECTIVITY_FACTOR_BETWEEN = 0.0025; inline constexpr double REDUCE_SELECTIVITY_FACTOR_BETWEEN = 0.0025;
const double REDUCE_SELECTIVITY_FACTOR_LESS = 0.05; inline constexpr double REDUCE_SELECTIVITY_FACTOR_LESS = 0.05;
const double REDUCE_SELECTIVITY_FACTOR_GREATER = 0.05; inline constexpr double REDUCE_SELECTIVITY_FACTOR_GREATER = 0.05;
const double REDUCE_SELECTIVITY_FACTOR_STARTING = 0.01; inline constexpr double REDUCE_SELECTIVITY_FACTOR_STARTING = 0.01;
const double REDUCE_SELECTIVITY_FACTOR_OTHER = 0.01; inline constexpr double REDUCE_SELECTIVITY_FACTOR_OTHER = 0.01;
// Cost of simple (CPU bound) operations is less than the page access cost // Cost of simple (CPU bound) operations is less than the page access cost
const double COST_FACTOR_MEMCOPY = 0.5; inline constexpr double COST_FACTOR_MEMCOPY = 0.5;
const double COST_FACTOR_HASHING = 0.5; inline constexpr double COST_FACTOR_HASHING = 0.5;
inline constexpr double COST_FACTOR_QUICKSORT = 0.1;
const double MAXIMUM_SELECTIVITY = 1.0; inline constexpr double MAXIMUM_SELECTIVITY = 1.0;
const double DEFAULT_SELECTIVITY = 0.1; inline constexpr double DEFAULT_SELECTIVITY = 0.1;
const double MINIMUM_CARDINALITY = 1.0; inline constexpr double MINIMUM_CARDINALITY = 1.0;
const double THRESHOLD_CARDINALITY = 5.0; inline constexpr double THRESHOLD_CARDINALITY = 5.0;
const double DEFAULT_CARDINALITY = 1000.0; inline constexpr double DEFAULT_CARDINALITY = 1000.0;
// Default depth of an index tree (including one leaf page), // Default depth of an index tree (including one leaf page),
// also representing the minimal cost of the index scan. // also representing the minimal cost of the index scan.
// We assume that the root page would be always cached, // We assume that the root page would be always cached,
// so it's not included here. // so it's not included here.
const double DEFAULT_INDEX_COST = 3.0; inline const double DEFAULT_INDEX_COST = 3.0;
struct index_desc; struct index_desc;
@ -684,7 +685,7 @@ public:
} }
InversionCandidate* getInversion(); InversionCandidate* getInversion();
IndexTableScan* getNavigation(); IndexTableScan* getNavigation(const InversionCandidate* candidate);
protected: protected:
void analyzeNavigation(const InversionCandidateList& inversions); void analyzeNavigation(const InversionCandidateList& inversions);

View File

@ -49,6 +49,8 @@
#include "../jrd/optimizer/Optimizer.h" #include "../jrd/optimizer/Optimizer.h"
#include <cmath>
using namespace Firebird; using namespace Firebird;
using namespace Jrd; using namespace Jrd;
@ -366,25 +368,103 @@ InversionCandidate* Retrieval::getInversion()
return invCandidate; return invCandidate;
} }
IndexTableScan* Retrieval::getNavigation() IndexTableScan* Retrieval::getNavigation(const InversionCandidate* candidate)
{ {
if (!navigationCandidate) if (!navigationCandidate)
return nullptr; return nullptr;
IndexScratch* const scratch = navigationCandidate->scratch; const auto scratch = navigationCandidate->scratch;
const auto streamCardinality = csb->csb_rpt[stream].csb_cardinality;
// If the table looks like empty during preparation time, we cannot be sure about
// its real cardinality during execution. So, unless we have some index-based
// filtering applied, let's better be pessimistic and avoid external sorting
// due to likely cardinality under-estimation.
const bool avoidSorting = (streamCardinality <= MINIMUM_CARDINALITY && !candidate->inversion);
if (!(scratch->index->idx_runtime_flags & idx_plan_navigate) && !avoidSorting)
{
// Check whether the navigational index scan is cheaper than the external sort
// and give up if it's not worth the efforts.
//
// We ignore candidate->cost in the calculations below as it belongs
// to both parts being compared.
fb_assert(candidate);
// Restore the original selectivity of the inversion,
// i.e. before the navigation candidate was accounted
auto selectivity = candidate->selectivity / navigationCandidate->selectivity;
// Non-indexed booleans are checked before sorting, so they improve the selectivity
double factor = MAXIMUM_SELECTIVITY;
for (auto iter = optimizer->getConjuncts(outerFlag, innerFlag); iter.hasData(); ++iter)
{
if (!(iter & Optimizer::CONJUNCT_USED) &&
!candidate->matches.exist(iter) &&
iter->computable(csb, stream, true) &&
iter->containsStream(stream))
{
factor *= Optimizer::getSelectivity(*iter);
}
}
Optimizer::adjustSelectivity(selectivity, factor, streamCardinality);
// Don't consider external sorting if optimization for first rows is requested
// and we have no local filtering applied
if (!optimizer->favorFirstRows() || selectivity < MAXIMUM_SELECTIVITY)
{
// Estimate amount of records to be sorted
const auto cardinality = streamCardinality * selectivity;
// We optimistically assume that records will be cached during sorting
const auto sortCost =
// record copying (to the sort buffer and back)
cardinality * COST_FACTOR_MEMCOPY * 2 +
// quicksort algorithm is O(n*log(n)) in average
cardinality * log2(cardinality) * COST_FACTOR_QUICKSORT;
// During navigation we fetch an index leaf page per every record being returned,
// thus add the estimated cardinality to the cost
auto navigationCost = navigationCandidate->cost +
streamCardinality * candidate->selectivity;
if (optimizer->favorFirstRows())
{
// Reset the cost to represent a single record retrieval
navigationCost = DEFAULT_INDEX_COST;
// We know that some local filtering is applied, so we need
// to adjust the cost as we need to walk the index
// until the first matching record is found
const auto fullIndexCost = navigationCandidate->scratch->cardinality;
const auto ratio = MAXIMUM_SELECTIVITY / selectivity;
const auto fraction = ratio / streamCardinality;
const auto walkCost = fullIndexCost * fraction * navigationCandidate->selectivity;
navigationCost += walkCost;
}
if (sortCost < navigationCost)
return nullptr;
}
}
// Looks like we can do a navigational walk. Flag that // Looks like we can do a navigational walk. Flag that
// we have used this index for navigation, and allocate // we have used this index for navigation, and allocate
// a navigational rsb for it. // a navigational rsb for it.
scratch->index->idx_runtime_flags |= idx_navigate; scratch->index->idx_runtime_flags |= idx_navigate;
const USHORT key_length = const auto indexNode = makeIndexScanNode(scratch);
const USHORT keyLength =
ROUNDUP(BTR_key_length(tdbb, relation, scratch->index), sizeof(SLONG)); ROUNDUP(BTR_key_length(tdbb, relation, scratch->index), sizeof(SLONG));
InversionNode* const index_node = makeIndexScanNode(scratch);
return FB_NEW_POOL(getPool()) return FB_NEW_POOL(getPool())
IndexTableScan(csb, getAlias(), stream, relation, index_node, key_length, IndexTableScan(csb, getAlias(), stream, relation, indexNode, keyLength,
navigationCandidate->selectivity); navigationCandidate->selectivity);
} }