Allow full index scans for partial indices

2025-01-22 18:03:03 +01:00 · 2022-07-22 12:11:04 +03:00 · 2022-07-22 12:11:04 +03:00 · 858a2828ab
commit 858a2828ab
parent 7ff8bd7ca0
4 changed files with 103 additions and 44 deletions
--- a/src/jrd/btr.cpp
+++ b/src/jrd/btr.cpp
@ -518,6 +518,7 @@ bool BTR_description(thread_db* tdbb, jrd_rel* relation, index_root_page* root,
 	idx->idx_expression_statement = nullptr;
 	idx->idx_condition = nullptr;
 	idx->idx_condition_statement = nullptr;
+	idx->idx_fraction = 1.0;

 	// pick up field ids and type descriptions for each of the fields
 	const UCHAR* ptr = (UCHAR*) root + irt_desc->irt_desc;
--- a/src/jrd/btr.h
+++ b/src/jrd/btr.h
@ -70,7 +70,8 @@ struct index_desc
 	dsc		idx_expression_desc;			// descriptor for expression result
 	Statement* idx_expression_statement;	// stored statement for expression evaluation
 	BoolExprNode* idx_condition;			// node tree for index condition
-	Statement* idx_condition_statement;	// stored statement for index condition
+	Statement* idx_condition_statement;		// stored statement for index condition
+	float idx_fraction;						// fraction of keys included in the index
 	// This structure should exactly match IRTD structure for current ODS
 	struct idx_repeat
 	{
--- a/src/jrd/optimizer/Optimizer.h
+++ b/src/jrd/optimizer/Optimizer.h
@ -45,13 +45,12 @@ namespace Jrd {

 // AB: 2005-11-05
 // Constants below needs some discussions and ideas
+const double REDUCE_SELECTIVITY_FACTOR_EQUALITY = 0.001;
 const double REDUCE_SELECTIVITY_FACTOR_BETWEEN = 0.0025;
 const double REDUCE_SELECTIVITY_FACTOR_LESS = 0.05;
 const double REDUCE_SELECTIVITY_FACTOR_GREATER = 0.05;
 const double REDUCE_SELECTIVITY_FACTOR_STARTING = 0.01;
-
-const double REDUCE_SELECTIVITY_FACTOR_EQUALITY = 0.1;
-const double REDUCE_SELECTIVITY_FACTOR_INEQUALITY = 0.3;
+const double REDUCE_SELECTIVITY_FACTOR_OTHER = 0.01;

 const double MAXIMUM_SELECTIVITY = 1.0;
 const double DEFAULT_SELECTIVITY = 0.1;
@ -347,11 +346,38 @@ public:

 	static double getSelectivity(const BoolExprNode* node)
 	{
-		const auto cmpNode = nodeAs<ComparativeBoolNode>(node);
+		if (const auto cmpNode = nodeAs<ComparativeBoolNode>(node))
+		{
+			switch (cmpNode->blrOp)
+			{
+			case blr_eql:
+			case blr_equiv:
+				return REDUCE_SELECTIVITY_FACTOR_EQUALITY;

-		return (cmpNode && cmpNode->blrOp == blr_eql) ?
-			REDUCE_SELECTIVITY_FACTOR_EQUALITY :
-			REDUCE_SELECTIVITY_FACTOR_INEQUALITY;
+			case blr_gtr:
+			case blr_geq:
+				return REDUCE_SELECTIVITY_FACTOR_GREATER;
+
+			case blr_lss:
+			case blr_leq:
+				return REDUCE_SELECTIVITY_FACTOR_LESS;
+
+			case blr_between:
+				return REDUCE_SELECTIVITY_FACTOR_BETWEEN;
+
+			case blr_starting:
+				return REDUCE_SELECTIVITY_FACTOR_STARTING;
+
+			default:
+				break;
+			}
+		}
+		else if (nodeIs<MissingBoolNode>(node))
+		{
+			return REDUCE_SELECTIVITY_FACTOR_EQUALITY;
+		}
+
+		return REDUCE_SELECTIVITY_FACTOR_OTHER;
 	}

 	static void adjustSelectivity(double& selectivity, double factor, double cardinality)
@ -518,7 +544,7 @@ struct IndexScratchSegment

 struct IndexScratch
 {
-	IndexScratch(MemoryPool& p, index_desc* idx, double cardinality);
+	IndexScratch(MemoryPool& p, index_desc* idx);
 	IndexScratch(MemoryPool& p, const IndexScratch& other);

 	index_desc* index = nullptr;				// index descriptor
@ -533,6 +559,7 @@ struct IndexScratch
 	bool useMultiStartingKeys = false;		// Use INTL_KEY_MULTI_STARTING

 	Firebird::ObjectsArray<IndexScratchSegment> segments;
+	MatchedBooleanList matches;					// matched booleans (partial indices only)
 };

 typedef Firebird::ObjectsArray<IndexScratch> IndexScratchList;
@ -590,9 +617,8 @@ public:

 protected:
 	void analyzeNavigation(const InversionCandidateList& inversions);
-	bool betterInversion(const InversionCandidate* inv1, const InversionCandidate* inv2,
-		bool ignoreUnmatched) const;
-	bool checkIndexCondition(const index_desc& idx) const;
+	bool betterInversion(const InversionCandidate* inv1, const InversionCandidate* inv2) const;
+	bool checkIndexCondition(index_desc& idx, MatchedBooleanList& matches) const;
 	bool checkIndexExpression(const index_desc* idx, ValueExprNode* node) const;
 	InversionNode* composeInversion(InversionNode* node1, InversionNode* node2,
 		InversionNode::Type node_type) const;
--- a/src/jrd/optimizer/Retrieval.cpp
+++ b/src/jrd/optimizer/Retrieval.cpp
@ -107,8 +107,8 @@ namespace
 } // namespace


-IndexScratch::IndexScratch(MemoryPool& p, index_desc* idx, double card)
-	: index(idx), cardinality(card), segments(p)
+IndexScratch::IndexScratch(MemoryPool& p, index_desc* idx)
+	: index(idx), segments(p), matches(p)
 {
 	segments.resize(index->idx_count);
 }
@ -124,7 +124,8 @@ IndexScratch::IndexScratch(MemoryPool& p, const IndexScratch& other)
 	  nonFullMatchedSegments(other.nonFullMatchedSegments),
 	  usePartialKey(other.usePartialKey),
 	  useMultiStartingKeys(other.useMultiStartingKeys),
-	  segments(p, other.segments)
+	  segments(p, other.segments),
+	  matches(p, other.matches)
 {}


@ -144,19 +145,24 @@ Retrieval::Retrieval(thread_db* aTdbb, Optimizer* opt, StreamType streamNumber,
 	  indexScratches(getPool()),
 	  inversionCandidates(getPool())
 {
-	const Database* const dbb = tdbb->getDatabase();
+	const auto dbb = tdbb->getDatabase();

 	const auto tail = &csb->csb_rpt[stream];
-
 	relation = tail->csb_relation;
 	fb_assert(relation);

 	if (!tail->csb_idx)
 		return;

+	MatchedBooleanList matches;
+
 	for (auto& index : *tail->csb_idx)
 	{
-		if ((index.idx_flags & idx_condition) && !checkIndexCondition(index))
+		matches.clear();
+
+		index.idx_fraction = MAXIMUM_SELECTIVITY;
+
+		if ((index.idx_flags & idx_condition) && !checkIndexCondition(index, matches))
 			continue;

 		const auto length = ROUNDUP(BTR_key_length(tdbb, relation, &index), sizeof(SLONG));
@ -171,12 +177,16 @@ Retrieval::Retrieval(thread_db* aTdbb, Optimizer* opt, StreamType streamNumber,
 		// Compound indexes are generally less compressed.
 		const double factor = (index.idx_count == 1) ? 0.5 : 0.7;

-		double cardinality = tail->csb_cardinality;
+		double cardinality = tail->csb_cardinality * index.idx_fraction;
 		cardinality *= (2 + length * factor);
 		cardinality /= (dbb->dbb_page_size - BTR_SIZE);
 		cardinality = MAX(cardinality, MINIMUM_CARDINALITY);

-		indexScratches.add(IndexScratch(getPool(), &index, cardinality));
+		IndexScratch scratch(getPool(), &index);
+		scratch.cardinality = cardinality;
+		scratch.matches.assign(matches);
+
+		indexScratches.add(scratch);
 	}
 }

@ -562,7 +572,7 @@ void Retrieval::analyzeNavigation(const InversionCandidateList& inversions)
 					for (const auto otherMatch : otherCandidate->matches)
 					{
 						if (candidate->matches.exist(otherMatch) &&
-							betterInversion(otherCandidate, candidate, true))
+							betterInversion(otherCandidate, candidate))
 						{
 							usableIndex = false;
 							break;
@ -586,15 +596,14 @@ void Retrieval::analyzeNavigation(const InversionCandidateList& inversions)
 			// If no inversion candidate is found, create a fake one representing full index scan

 			candidate = FB_NEW_POOL(getPool()) InversionCandidate(getPool());
-			candidate->cost = indexScratch.cardinality;
+			candidate->cost = DEFAULT_INDEX_COST + indexScratch.cardinality;
 			candidate->indexes = 1;
 			candidate->scratch = &indexScratch;
-			candidate->nonFullMatchedSegments = indexScratch.segments.getCount();
 			tempCandidates.add(candidate);
 		}

 		if (!bestCandidate ||
-			betterInversion(candidate, bestCandidate, false))
+			betterInversion(candidate, bestCandidate))
 		{
 			bestCandidate = candidate;
 		}
@ -611,8 +620,7 @@ void Retrieval::analyzeNavigation(const InversionCandidateList& inversions)
 }

 bool Retrieval::betterInversion(const InversionCandidate* inv1,
-								const InversionCandidate* inv2,
-								bool ignoreUnmatched) const
+								const InversionCandidate* inv2) const
 {
 	// Return true if inversion1 is *better* than inversion2.
 	// It's mostly about the retrieval cost, but other aspects are also taken into account.
@ -662,24 +670,23 @@ bool Retrieval::betterInversion(const InversionCandidate* inv1,
 			{
 				// If the "same" costs then compare with the nr of unmatched segments,
 				// how many indexes and matched segments. First compare number of indexes.
-				int compareSelectivity = (inv1->indexes - inv2->indexes);
+				int diff = (inv1->indexes - inv2->indexes);

-				if (compareSelectivity == 0)
+				if (diff == 0)
 				{
 					// For the same number of indexes compare number of matched segments.
 					// Note the inverted condition: the more matched segments the better.
-					compareSelectivity = (inv2->matchedSegments - inv1->matchedSegments);
+					diff = (inv2->matchedSegments - inv1->matchedSegments);

-					if (compareSelectivity == 0 && !ignoreUnmatched)
+					if (diff == 0)
 					{
 						// For the same number of matched segments
 						// compare ones that aren't full matched
-						compareSelectivity =
-							(inv1->nonFullMatchedSegments - inv2->nonFullMatchedSegments);
+						diff = (inv1->nonFullMatchedSegments - inv2->nonFullMatchedSegments);
 					}
 				}

-				if (compareSelectivity < 0)
+				if (diff < 0)
 					return true;
 			}
 			else if (cost1 < cost2)
@ -690,20 +697,22 @@ bool Retrieval::betterInversion(const InversionCandidate* inv1,
 	return false;
 }

-bool Retrieval::checkIndexCondition(const index_desc& idx) const
+bool Retrieval::checkIndexCondition(index_desc& idx, MatchedBooleanList& matches) const
 {
 	fb_assert(idx.idx_condition);

 	if (!idx.idx_condition->containsStream(0, true))
 		return false;

+	fb_assert(matches.isEmpty());
+
 	auto iter = optimizer->getConjuncts(outerFlag, innerFlag);

 	BoolExprNodeStack idxConjuncts;
 	const auto conjunctCount = optimizer->decomposeBoolean(idx.idx_condition, idxConjuncts);
 	fb_assert(conjunctCount);

-	unsigned matchCount = 0;
+	idx.idx_fraction = MAXIMUM_SELECTIVITY;

 	for (BoolExprNodeStack::const_iterator idxIter(idxConjuncts);
 		idxIter.hasData(); ++idxIter)
@ -720,7 +729,7 @@ bool Retrieval::checkIndexCondition(const index_desc& idx) const
 				if (binaryNode->arg1->sameAs(*iter, true) ||
 					binaryNode->arg2->sameAs(*iter, true))
 				{
-					matchCount++;
+					matches.add(*iter);
 					break;
 				}
 			}
@ -741,14 +750,14 @@ bool Retrieval::checkIndexCondition(const index_desc& idx) const
 					if (cmpNode->arg1->sameAs(missingNode->arg, true) ||
 						cmpNode->arg2->sameAs(missingNode->arg, true))
 					{
-						matchCount++;
+						matches.add(*iter);
 						break;
 					}

 					if (cmpNode->arg3 &&
 						cmpNode->arg3->sameAs(missingNode->arg, true))
 					{
-						matchCount++;
+						matches.add(*iter);
 						break;
 					}
 				}
@ -761,13 +770,15 @@ bool Retrieval::checkIndexCondition(const index_desc& idx) const
 		{
 			if (idxIter.object()->sameAs(*iter, true))
 			{
-				matchCount++;
+				matches.add(*iter);
 				break;
 			}
 		}
+
+		idx.idx_fraction *= optimizer->getSelectivity(boolean);
 	}

-	return (matchCount >= conjunctCount);
+	return (matches.getCount() >= conjunctCount);
 }

 bool Retrieval::checkIndexExpression(const index_desc* idx, ValueExprNode* node) const
@ -813,13 +824,14 @@ void Retrieval::getInversionCandidates(InversionCandidateList& inversions,
 		scratch.usePartialKey = false;
 		scratch.useMultiStartingKeys = false;

+		const auto idx = scratch.index;
+
 		if (scratch.candidate)
 		{
-			matches.clear();
-			scratch.selectivity = MAXIMUM_SELECTIVITY;
+			matches.assign(scratch.matches);
+			scratch.selectivity = idx->idx_fraction;

 			bool unique = false;
-			const auto idx = scratch.index;

 			for (unsigned j = 0; j < scratch.segments.getCount(); j++)
 			{
@ -988,6 +1000,17 @@ void Retrieval::getInversionCandidates(InversionCandidateList& inversions,
 				inversions.add(invCandidate);
 			}
 		}
+		else if (idx->idx_flags & idx_condition)
+		{
+			const auto invCandidate = FB_NEW_POOL(getPool()) InversionCandidate(getPool());
+			invCandidate->selectivity = idx->idx_fraction;
+			invCandidate->cost = DEFAULT_INDEX_COST + scratch.cardinality;
+			invCandidate->indexes = 1;
+			invCandidate->scratch = &scratch;
+			invCandidate->matches.assign(scratch.matches);
+
+			inversions.add(invCandidate);
+		}
 	}
 }

@ -1338,7 +1361,7 @@ InversionCandidate* Retrieval::makeInversion(InversionCandidateList& inversions)
 						break;
 					}

-					if (betterInversion(currentInv, bestCandidate, false))
+					if (betterInversion(currentInv, bestCandidate))
 						bestCandidate = currentInv;
 				}
 			}
@ -1547,6 +1570,14 @@ bool Retrieval::matchBoolean(IndexScratch* indexScratch,

 	const auto idx = indexScratch->index;

+	if (idx->idx_flags & idx_condition)
+	{
+		// If index condition matches the boolean, this should not be
+		// considered a match. Full index scan will be used instead.
+		if (idx->idx_condition->sameAs(boolean, true))
+			return false;
+	}
+
 	if (idx->idx_flags & idx_expression)
 	{
 		// see if one side or the other is matchable to the index expression