diff --git a/src/jrd/btr.cpp b/src/jrd/btr.cpp index f80f250f37..b98ce8211c 100644 --- a/src/jrd/btr.cpp +++ b/src/jrd/btr.cpp @@ -518,6 +518,7 @@ bool BTR_description(thread_db* tdbb, jrd_rel* relation, index_root_page* root, idx->idx_expression_statement = nullptr; idx->idx_condition = nullptr; idx->idx_condition_statement = nullptr; + idx->idx_fraction = 1.0; // pick up field ids and type descriptions for each of the fields const UCHAR* ptr = (UCHAR*) root + irt_desc->irt_desc; diff --git a/src/jrd/btr.h b/src/jrd/btr.h index da7eb4f697..290bea1838 100644 --- a/src/jrd/btr.h +++ b/src/jrd/btr.h @@ -70,7 +70,8 @@ struct index_desc dsc idx_expression_desc; // descriptor for expression result Statement* idx_expression_statement; // stored statement for expression evaluation BoolExprNode* idx_condition; // node tree for index condition - Statement* idx_condition_statement; // stored statement for index condition + Statement* idx_condition_statement; // stored statement for index condition + float idx_fraction; // fraction of keys included in the index // This structure should exactly match IRTD structure for current ODS struct idx_repeat { diff --git a/src/jrd/optimizer/Optimizer.h b/src/jrd/optimizer/Optimizer.h index 0701610789..416df6fd4b 100644 --- a/src/jrd/optimizer/Optimizer.h +++ b/src/jrd/optimizer/Optimizer.h @@ -45,13 +45,12 @@ namespace Jrd { // AB: 2005-11-05 // Constants below needs some discussions and ideas +const double REDUCE_SELECTIVITY_FACTOR_EQUALITY = 0.001; const double REDUCE_SELECTIVITY_FACTOR_BETWEEN = 0.0025; const double REDUCE_SELECTIVITY_FACTOR_LESS = 0.05; const double REDUCE_SELECTIVITY_FACTOR_GREATER = 0.05; const double REDUCE_SELECTIVITY_FACTOR_STARTING = 0.01; - -const double REDUCE_SELECTIVITY_FACTOR_EQUALITY = 0.1; -const double REDUCE_SELECTIVITY_FACTOR_INEQUALITY = 0.3; +const double REDUCE_SELECTIVITY_FACTOR_OTHER = 0.01; const double MAXIMUM_SELECTIVITY = 1.0; const double DEFAULT_SELECTIVITY = 0.1; @@ -347,11 +346,38 @@ public: static double getSelectivity(const BoolExprNode* node) { - const auto cmpNode = nodeAs(node); + if (const auto cmpNode = nodeAs(node)) + { + switch (cmpNode->blrOp) + { + case blr_eql: + case blr_equiv: + return REDUCE_SELECTIVITY_FACTOR_EQUALITY; - return (cmpNode && cmpNode->blrOp == blr_eql) ? - REDUCE_SELECTIVITY_FACTOR_EQUALITY : - REDUCE_SELECTIVITY_FACTOR_INEQUALITY; + case blr_gtr: + case blr_geq: + return REDUCE_SELECTIVITY_FACTOR_GREATER; + + case blr_lss: + case blr_leq: + return REDUCE_SELECTIVITY_FACTOR_LESS; + + case blr_between: + return REDUCE_SELECTIVITY_FACTOR_BETWEEN; + + case blr_starting: + return REDUCE_SELECTIVITY_FACTOR_STARTING; + + default: + break; + } + } + else if (nodeIs(node)) + { + return REDUCE_SELECTIVITY_FACTOR_EQUALITY; + } + + return REDUCE_SELECTIVITY_FACTOR_OTHER; } static void adjustSelectivity(double& selectivity, double factor, double cardinality) @@ -518,7 +544,7 @@ struct IndexScratchSegment struct IndexScratch { - IndexScratch(MemoryPool& p, index_desc* idx, double cardinality); + IndexScratch(MemoryPool& p, index_desc* idx); IndexScratch(MemoryPool& p, const IndexScratch& other); index_desc* index = nullptr; // index descriptor @@ -533,6 +559,7 @@ struct IndexScratch bool useMultiStartingKeys = false; // Use INTL_KEY_MULTI_STARTING Firebird::ObjectsArray segments; + MatchedBooleanList matches; // matched booleans (partial indices only) }; typedef Firebird::ObjectsArray IndexScratchList; @@ -590,9 +617,8 @@ public: protected: void analyzeNavigation(const InversionCandidateList& inversions); - bool betterInversion(const InversionCandidate* inv1, const InversionCandidate* inv2, - bool ignoreUnmatched) const; - bool checkIndexCondition(const index_desc& idx) const; + bool betterInversion(const InversionCandidate* inv1, const InversionCandidate* inv2) const; + bool checkIndexCondition(index_desc& idx, MatchedBooleanList& matches) const; bool checkIndexExpression(const index_desc* idx, ValueExprNode* node) const; InversionNode* composeInversion(InversionNode* node1, InversionNode* node2, InversionNode::Type node_type) const; diff --git a/src/jrd/optimizer/Retrieval.cpp b/src/jrd/optimizer/Retrieval.cpp index f0ec24d1a4..b4469a0073 100644 --- a/src/jrd/optimizer/Retrieval.cpp +++ b/src/jrd/optimizer/Retrieval.cpp @@ -107,8 +107,8 @@ namespace } // namespace -IndexScratch::IndexScratch(MemoryPool& p, index_desc* idx, double card) - : index(idx), cardinality(card), segments(p) +IndexScratch::IndexScratch(MemoryPool& p, index_desc* idx) + : index(idx), segments(p), matches(p) { segments.resize(index->idx_count); } @@ -124,7 +124,8 @@ IndexScratch::IndexScratch(MemoryPool& p, const IndexScratch& other) nonFullMatchedSegments(other.nonFullMatchedSegments), usePartialKey(other.usePartialKey), useMultiStartingKeys(other.useMultiStartingKeys), - segments(p, other.segments) + segments(p, other.segments), + matches(p, other.matches) {} @@ -144,19 +145,24 @@ Retrieval::Retrieval(thread_db* aTdbb, Optimizer* opt, StreamType streamNumber, indexScratches(getPool()), inversionCandidates(getPool()) { - const Database* const dbb = tdbb->getDatabase(); + const auto dbb = tdbb->getDatabase(); const auto tail = &csb->csb_rpt[stream]; - relation = tail->csb_relation; fb_assert(relation); if (!tail->csb_idx) return; + MatchedBooleanList matches; + for (auto& index : *tail->csb_idx) { - if ((index.idx_flags & idx_condition) && !checkIndexCondition(index)) + matches.clear(); + + index.idx_fraction = MAXIMUM_SELECTIVITY; + + if ((index.idx_flags & idx_condition) && !checkIndexCondition(index, matches)) continue; const auto length = ROUNDUP(BTR_key_length(tdbb, relation, &index), sizeof(SLONG)); @@ -171,12 +177,16 @@ Retrieval::Retrieval(thread_db* aTdbb, Optimizer* opt, StreamType streamNumber, // Compound indexes are generally less compressed. const double factor = (index.idx_count == 1) ? 0.5 : 0.7; - double cardinality = tail->csb_cardinality; + double cardinality = tail->csb_cardinality * index.idx_fraction; cardinality *= (2 + length * factor); cardinality /= (dbb->dbb_page_size - BTR_SIZE); cardinality = MAX(cardinality, MINIMUM_CARDINALITY); - indexScratches.add(IndexScratch(getPool(), &index, cardinality)); + IndexScratch scratch(getPool(), &index); + scratch.cardinality = cardinality; + scratch.matches.assign(matches); + + indexScratches.add(scratch); } } @@ -562,7 +572,7 @@ void Retrieval::analyzeNavigation(const InversionCandidateList& inversions) for (const auto otherMatch : otherCandidate->matches) { if (candidate->matches.exist(otherMatch) && - betterInversion(otherCandidate, candidate, true)) + betterInversion(otherCandidate, candidate)) { usableIndex = false; break; @@ -586,15 +596,14 @@ void Retrieval::analyzeNavigation(const InversionCandidateList& inversions) // If no inversion candidate is found, create a fake one representing full index scan candidate = FB_NEW_POOL(getPool()) InversionCandidate(getPool()); - candidate->cost = indexScratch.cardinality; + candidate->cost = DEFAULT_INDEX_COST + indexScratch.cardinality; candidate->indexes = 1; candidate->scratch = &indexScratch; - candidate->nonFullMatchedSegments = indexScratch.segments.getCount(); tempCandidates.add(candidate); } if (!bestCandidate || - betterInversion(candidate, bestCandidate, false)) + betterInversion(candidate, bestCandidate)) { bestCandidate = candidate; } @@ -611,8 +620,7 @@ void Retrieval::analyzeNavigation(const InversionCandidateList& inversions) } bool Retrieval::betterInversion(const InversionCandidate* inv1, - const InversionCandidate* inv2, - bool ignoreUnmatched) const + const InversionCandidate* inv2) const { // Return true if inversion1 is *better* than inversion2. // It's mostly about the retrieval cost, but other aspects are also taken into account. @@ -662,24 +670,23 @@ bool Retrieval::betterInversion(const InversionCandidate* inv1, { // If the "same" costs then compare with the nr of unmatched segments, // how many indexes and matched segments. First compare number of indexes. - int compareSelectivity = (inv1->indexes - inv2->indexes); + int diff = (inv1->indexes - inv2->indexes); - if (compareSelectivity == 0) + if (diff == 0) { // For the same number of indexes compare number of matched segments. // Note the inverted condition: the more matched segments the better. - compareSelectivity = (inv2->matchedSegments - inv1->matchedSegments); + diff = (inv2->matchedSegments - inv1->matchedSegments); - if (compareSelectivity == 0 && !ignoreUnmatched) + if (diff == 0) { // For the same number of matched segments // compare ones that aren't full matched - compareSelectivity = - (inv1->nonFullMatchedSegments - inv2->nonFullMatchedSegments); + diff = (inv1->nonFullMatchedSegments - inv2->nonFullMatchedSegments); } } - if (compareSelectivity < 0) + if (diff < 0) return true; } else if (cost1 < cost2) @@ -690,20 +697,22 @@ bool Retrieval::betterInversion(const InversionCandidate* inv1, return false; } -bool Retrieval::checkIndexCondition(const index_desc& idx) const +bool Retrieval::checkIndexCondition(index_desc& idx, MatchedBooleanList& matches) const { fb_assert(idx.idx_condition); if (!idx.idx_condition->containsStream(0, true)) return false; + fb_assert(matches.isEmpty()); + auto iter = optimizer->getConjuncts(outerFlag, innerFlag); BoolExprNodeStack idxConjuncts; const auto conjunctCount = optimizer->decomposeBoolean(idx.idx_condition, idxConjuncts); fb_assert(conjunctCount); - unsigned matchCount = 0; + idx.idx_fraction = MAXIMUM_SELECTIVITY; for (BoolExprNodeStack::const_iterator idxIter(idxConjuncts); idxIter.hasData(); ++idxIter) @@ -720,7 +729,7 @@ bool Retrieval::checkIndexCondition(const index_desc& idx) const if (binaryNode->arg1->sameAs(*iter, true) || binaryNode->arg2->sameAs(*iter, true)) { - matchCount++; + matches.add(*iter); break; } } @@ -741,14 +750,14 @@ bool Retrieval::checkIndexCondition(const index_desc& idx) const if (cmpNode->arg1->sameAs(missingNode->arg, true) || cmpNode->arg2->sameAs(missingNode->arg, true)) { - matchCount++; + matches.add(*iter); break; } if (cmpNode->arg3 && cmpNode->arg3->sameAs(missingNode->arg, true)) { - matchCount++; + matches.add(*iter); break; } } @@ -761,13 +770,15 @@ bool Retrieval::checkIndexCondition(const index_desc& idx) const { if (idxIter.object()->sameAs(*iter, true)) { - matchCount++; + matches.add(*iter); break; } } + + idx.idx_fraction *= optimizer->getSelectivity(boolean); } - return (matchCount >= conjunctCount); + return (matches.getCount() >= conjunctCount); } bool Retrieval::checkIndexExpression(const index_desc* idx, ValueExprNode* node) const @@ -813,13 +824,14 @@ void Retrieval::getInversionCandidates(InversionCandidateList& inversions, scratch.usePartialKey = false; scratch.useMultiStartingKeys = false; + const auto idx = scratch.index; + if (scratch.candidate) { - matches.clear(); - scratch.selectivity = MAXIMUM_SELECTIVITY; + matches.assign(scratch.matches); + scratch.selectivity = idx->idx_fraction; bool unique = false; - const auto idx = scratch.index; for (unsigned j = 0; j < scratch.segments.getCount(); j++) { @@ -988,6 +1000,17 @@ void Retrieval::getInversionCandidates(InversionCandidateList& inversions, inversions.add(invCandidate); } } + else if (idx->idx_flags & idx_condition) + { + const auto invCandidate = FB_NEW_POOL(getPool()) InversionCandidate(getPool()); + invCandidate->selectivity = idx->idx_fraction; + invCandidate->cost = DEFAULT_INDEX_COST + scratch.cardinality; + invCandidate->indexes = 1; + invCandidate->scratch = &scratch; + invCandidate->matches.assign(scratch.matches); + + inversions.add(invCandidate); + } } } @@ -1338,7 +1361,7 @@ InversionCandidate* Retrieval::makeInversion(InversionCandidateList& inversions) break; } - if (betterInversion(currentInv, bestCandidate, false)) + if (betterInversion(currentInv, bestCandidate)) bestCandidate = currentInv; } } @@ -1547,6 +1570,14 @@ bool Retrieval::matchBoolean(IndexScratch* indexScratch, const auto idx = indexScratch->index; + if (idx->idx_flags & idx_condition) + { + // If index condition matches the boolean, this should not be + // considered a match. Full index scan will be used instead. + if (idx->idx_condition->sameAs(boolean, true)) + return false; + } + if (idx->idx_flags & idx_expression) { // see if one side or the other is matchable to the index expression