8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-31 04:03:03 +01:00
firebird-mirror/src/jrd/opt.cpp

3835 lines
105 KiB
C++
Raw Normal View History

2001-05-23 15:26:42 +02:00
/*
* PROGRAM: JRD Access Method
2003-10-16 10:51:06 +02:00
* MODULE: opt.cpp
2001-05-23 15:26:42 +02:00
* DESCRIPTION: Optimizer / record selection expression compiler
*
* The contents of this file are subject to the Interbase Public
* License Version 1.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy
* of the License at http://www.Inprise.com/IPL.html
*
* Software distributed under the License is distributed on an
* "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
* or implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code was created by Inprise Corporation
* and its predecessors. Portions created by Inprise Corporation are
* Copyright (C) Inprise Corporation.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
2005-05-28 00:45:31 +02:00
* 2002.10.12: Nickolay Samofatov: Fixed problems with wrong results produced by
* outer joins
2002-07-01 18:59:09 +02:00
* 2001.07.28: John Bellardo: Added code to handle rse_skip nodes.
* 2001.07.17 Claudio Valderrama: Stop crash with indices and recursive calls
* of OPT_compile: indicator csb_indices set to zero after used memory is
* returned to the free pool.
* 2001.02.15: Claudio Valderrama: Don't obfuscate the plan output if a selectable
* stored procedure doesn't access tables, views or other procedures directly.
2002-10-30 07:40:58 +01:00
* 2002.10.29 Sean Leyne - Removed obsolete "Netware" port
* 2002.10.30: Arno Brinkman: Changes made to gen_retrieval, OPT_compile and make_inversion.
* Procedure sort_indices added. The changes in gen_retrieval are that now
2005-05-28 00:45:31 +02:00
* an index with high field-count has priority to build an index from.
* Procedure make_inversion is changed so that it not pick every index
* that comes away, this was slow performance with bad selectivity indices
* which most are foreign_keys with a reference to a few records.
* 2002.11.01: Arno Brinkman: Added match_indices for better support of OR handling
* in INNER JOIN (gen_join) statements.
* 2002.12.15: Arno Brinkman: Added find_used_streams, so that inside opt_compile all the
* streams are marked active. This causes that more indices can be used for
* a retrieval. With this change BUG SF #219525 is solved too.
2001-05-23 15:26:42 +02:00
*/
#include "firebird.h"
2004-03-22 12:38:23 +01:00
#include "../jrd/common.h"
2004-04-29 00:43:34 +02:00
#include <stdio.h>
2001-05-23 15:26:42 +02:00
#include <string.h>
2003-11-08 17:40:17 +01:00
#include "../jrd/ibase.h"
2001-05-23 15:26:42 +02:00
#include "../jrd/jrd.h"
#include "../jrd/align.h"
#include "../jrd/val.h"
#include "../jrd/req.h"
#include "../jrd/exe.h"
#include "../jrd/lls.h"
#include "../jrd/ods.h"
#include "../jrd/btr.h"
#include "../jrd/sort.h"
#include "../jrd/rse.h"
#include "../jrd/ini.h"
2001-05-23 15:26:42 +02:00
#include "../jrd/intl.h"
#include "../jrd/gdsassert.h"
#include "../jrd/btr_proto.h"
#include "../jrd/cch_proto.h"
#include "../jrd/cmp_proto.h"
#include "../jrd/dpm_proto.h"
#include "../jrd/dsc_proto.h"
2001-05-23 15:26:42 +02:00
#include "../jrd/err_proto.h"
#include "../jrd/ext_proto.h"
#include "../jrd/intl_proto.h"
#include "../jrd/lck_proto.h"
#include "../jrd/met_proto.h"
#include "../jrd/mov_proto.h"
#include "../jrd/opt_proto.h"
#include "../jrd/par_proto.h"
#include "../jrd/gds_proto.h"
#include "../jrd/dbg_proto.h"
#include "../jrd/DataTypeUtil.h"
#include "../jrd/RecordSourceNodes.h"
#include "../jrd/VirtualTable.h"
#include "../jrd/DatabaseSnapshot.h"
#include "../jrd/UserManagement.h"
2003-08-19 14:49:33 +02:00
#include "../common/classes/array.h"
#include "../common/classes/objects_array.h"
#include "../jrd/recsrc/RecordSource.h"
#include "../jrd/recsrc/Cursor.h"
#include "../jrd/Optimizer.h"
#include "../dsql/ExprNodes.h"
#include "../dsql/StmtNodes.h"
using namespace Jrd;
using namespace Firebird;
2003-03-07 01:36:44 +01:00
#ifdef DEV_BUILD
#define OPT_DEBUG
#endif
bool JrdNodeVisitor::visitChildren(jrd_nod* node)
{
bool ret = false;
if (!node)
return ret;
switch (node->nod_type)
{
case nod_class_exprnode_jrd:
{
ExprNode* exprNode = reinterpret_cast<ExprNode*>(node->nod_arg[0]);
return call(exprNode);
}
case nod_argument:
case nod_current_date:
case nod_current_role:
case nod_current_time:
case nod_current_timestamp:
case nod_gen_id:
case nod_gen_id2:
case nod_internal_info:
case nod_literal:
case nod_null:
case nod_user_name:
case nod_variable:
break;
case nod_cast:
ret |= visit(node->nod_arg[e_cast_source]);
break;
case nod_extract:
ret |= visit(node->nod_arg[e_extract_value]);
break;
case nod_strlen:
ret |= visit(node->nod_arg[e_strlen_value]);
break;
case nod_add:
case nod_add2:
case nod_divide:
case nod_divide2:
case nod_multiply:
case nod_multiply2:
case nod_negate:
case nod_subtract:
case nod_subtract2:
case nod_upcase:
case nod_lowcase:
case nod_substr:
case nod_trim:
case nod_derived_expr:
{
jrd_nod* const* ptr = node->nod_arg;
// Check all sub-nodes of this node.
for (jrd_nod* const* end = ptr + node->nod_count; ptr < end; ptr++)
ret |= visit(*ptr);
return ret;
}
default:
return returnOnOthers;
}
return ret;
}
PossibleUnknownFinder::PossibleUnknownFinder()
: JrdNodeVisitor(true, &ExprNode::jrdPossibleUnknownFinder)
{
}
bool PossibleUnknownFinder::visit(jrd_nod* node)
{
DEV_BLKCHK(node, type_nod);
if (!node)
return false;
switch (node->nod_type)
{
case nod_field:
case nod_rec_version:
case nod_dbkey:
return false;
case nod_or:
case nod_and:
case nod_like:
case nod_between:
case nod_contains:
case nod_similar:
case nod_starts:
case nod_eql:
case nod_neq:
case nod_geq:
case nod_gtr:
case nod_lss:
case nod_leq:
{
jrd_nod* const* ptr = node->nod_arg;
// Check all sub-nodes of this node.
for (jrd_nod* const* const end = ptr + node->nod_count; ptr < end; ptr++)
{
if (visit(*ptr))
return true;
}
return false;
}
default:
return visitChildren(node);
}
}
StreamFinder::StreamFinder(CompilerScratch* aCsb, UCHAR aStream)
: JrdNodeVisitor(true, &ExprNode::jrdStreamFinder),
csb(aCsb),
stream(aStream)
{
}
bool StreamFinder::visit(jrd_nod* node)
{
DEV_BLKCHK(node, type_nod);
if (!node)
return false;
switch (node->nod_type)
{
case nod_field:
return (USHORT)(IPTR) node->nod_arg[e_fld_stream] == stream;
case nod_rec_version:
case nod_dbkey:
return (USHORT)(IPTR) node->nod_arg[0] == stream;
case nod_any:
case nod_unique:
case nod_ansi_any:
case nod_ansi_all:
case nod_exists:
return visit(node->nod_arg[e_any_rse]);
case nod_class_recsrcnode_jrd:
{
RecordSourceNode* recSource = reinterpret_cast<RecordSourceNode*>(node->nod_arg[0]);
if (recSource->type == RseNode::TYPE)
{
RseNode* rse = static_cast<RseNode*>(recSource);
if (rse->rse_first && visit(rse->rse_first))
return true;
if (rse->rse_skip && visit(rse->rse_skip))
return true;
if (rse->rse_boolean && visit(rse->rse_boolean))
return true;
// ASF: The legacy code used to visit rse_sorted and rse_projection. But note that
// visiting them, the visitor always returns true, because nod_sort is not handled
// there. So I replaced these lines by the if/return below.
//
// if (rse->rse_sorted && visit(rse->rse_sorted))
// return true;
//
// if (rse->rse_projection && visit(rse->rse_projection))
// return true;
if (rse->rse_sorted || rse->rse_projection)
return true;
}
else if (recSource->type == ProcedureSourceNode::TYPE)
return visit(static_cast<ProcedureSourceNode*>(recSource)->inputs);
else
return visitChildren(node);
break;
}
case nod_average:
case nod_count:
case nod_from:
case nod_max:
case nod_min:
case nod_total:
{
jrd_nod* nodeDefault = node->nod_arg[e_stat_rse];
if (nodeDefault && visit(nodeDefault))
return true;
jrd_nod* value = node->nod_arg[e_stat_value];
if (value && visit(value))
return true;
return false;
}
case nod_like:
case nod_between:
case nod_contains:
case nod_similar:
case nod_starts:
case nod_eql:
case nod_neq:
case nod_geq:
case nod_gtr:
case nod_lss:
case nod_leq:
case nod_sleuth:
case nod_missing:
case nod_value_if:
case nod_matches:
case nod_equiv:
{
jrd_nod* const* ptr = node->nod_arg;
// Check all sub-nodes of this node.
for (jrd_nod* const* const end = ptr + node->nod_count; ptr < end; ptr++)
{
if (visit(*ptr))
return true;
}
return false;
}
default:
return visitChildren(node);
}
return false;
}
StreamsCollector::StreamsCollector(SortedArray<int>& aStreams)
: JrdNodeVisitor(false, &ExprNode::jrdStreamsCollector),
streams(aStreams)
{
}
bool StreamsCollector::visit(jrd_nod* node)
{
DEV_BLKCHK(node, type_nod);
if (!node)
return false;
switch (node->nod_type)
{
case nod_field:
{
int n = (int)(IPTR) node->nod_arg[e_fld_stream];
if (!streams.exist(n))
streams.add(n);
break;
}
case nod_rec_version:
case nod_dbkey:
{
int n = (int)(IPTR) node->nod_arg[0];
if (!streams.exist(n))
streams.add(n);
break;
}
case nod_any:
case nod_unique:
case nod_ansi_any:
case nod_ansi_all:
case nod_exists:
visit(node->nod_arg[e_any_rse]);
break;
case nod_class_recsrcnode_jrd:
{
RecordSourceNode* recSource = reinterpret_cast<RecordSourceNode*>(node->nod_arg[0]);
if (recSource->type == RseNode::TYPE)
{
RseNode* rse = static_cast<RseNode*>(recSource);
visit(rse->rse_first);
visit(rse->rse_skip);
visit(rse->rse_boolean);
// ASF: The legacy code used to visit rse_sorted and rse_projection, but the visitor
// never handled nod_sort.
// visit(rse->rse_sorted);
// visit(rse->rse_projection);
}
else if (recSource->type == ProcedureSourceNode::TYPE)
return visit(static_cast<ProcedureSourceNode*>(recSource)->inputs);
else
return visitChildren(node);
break;
}
case nod_average:
case nod_count:
case nod_from:
case nod_max:
case nod_min:
case nod_total:
visit(node->nod_arg[e_stat_rse]);
visit(node->nod_arg[e_stat_value]);
break;
case nod_like:
case nod_between:
case nod_contains:
case nod_similar:
case nod_starts:
case nod_eql:
case nod_neq:
case nod_geq:
case nod_gtr:
case nod_lss:
case nod_leq:
case nod_sleuth:
case nod_missing:
case nod_value_if:
case nod_matches:
case nod_equiv:
{
jrd_nod* const* ptr = node->nod_arg;
// Check all sub-nodes of this node.
for (jrd_nod* const* const end = ptr + node->nod_count; ptr < end; ptr++)
visit(*ptr);
break;
}
default:
return visitChildren(node);
}
return false;
}
UnmappedNodeGetter::UnmappedNodeGetter(const MapNode* aMap, UCHAR aShellStream)
: JrdNodeVisitor(false, &ExprNode::jrdUnmappedNodeGetter),
map(aMap),
shellStream(aShellStream),
rootNode(true),
invalid(false),
nodeFound(NULL)
{
DEV_BLKCHK(map, type_nod);
}
bool UnmappedNodeGetter::visit(jrd_nod* node)
{
2010-02-16 09:53:31 +01:00
const bool wasRootNode = rootNode;
rootNode = false;
// Check if node is a mapping and if so unmap it, but only for root nodes (not contained in
// another node). This can be expanded by checking complete expression (Then don't forget to
// leave aggregate-functions alone in case of aggregate rse).
// Because this is only to help using an index we keep it simple.
if (node->nod_type == nod_field && (USHORT)(IPTR) node->nod_arg[e_fld_stream] == shellStream)
{
const USHORT fieldId = (USHORT)(IPTR) node->nod_arg[e_fld_id];
if (!wasRootNode || fieldId >= map->items.getCount())
{
invalid = true;
return false;
}
// Check also the expression inside the map, because aggregate
// functions aren't allowed to be delivered to the WHERE clause.
if (!visit(map->items[fieldId]->nod_arg[e_asgn_from]))
invalid = true;
return !invalid;
}
nodeFound = node;
switch (node->nod_type)
{
case nod_field:
break;
case nod_argument:
case nod_current_date:
case nod_current_role:
case nod_current_time:
case nod_current_timestamp:
case nod_gen_id:
case nod_gen_id2:
case nod_internal_info:
case nod_literal:
case nod_null:
case nod_user_name:
case nod_variable:
break;
default:
invalid |= !visitChildren(node);
return !invalid;
}
return !invalid;
}
namespace Jrd
{
class River
{
public:
River(CompilerScratch* csb, RecordSource* rsb, RecordSourceNode* node, size_t count, UCHAR* streams)
: m_rsb(rsb), m_node(node), m_streams(csb->csb_pool)
{
m_streams.resize(count);
memcpy(m_streams.begin(), streams, count);
}
River(CompilerScratch* csb, RecordSource* rsb, RiverList& rivers)
: m_rsb(rsb), m_node(NULL), m_streams(csb->csb_pool)
{
for (River** iter = rivers.begin(); iter < rivers.end(); iter++)
{
River* const sub_river = *iter;
const size_t count = m_streams.getCount();
const size_t delta = sub_river->m_streams.getCount();
m_streams.grow(count + delta);
memcpy(m_streams.begin() + count, sub_river->m_streams.begin(), delta);
}
}
RecordSource* getRecordSource() const
{
return m_rsb;
}
size_t getStreamCount() const
{
return m_streams.getCount();
}
const UCHAR* getStreams() const
{
return m_streams.begin();
}
void activate(CompilerScratch* csb)
{
for (const UCHAR* iter = m_streams.begin(); iter < m_streams.end(); iter++)
{
csb->csb_rpt[*iter].csb_flags |= csb_active;
}
}
void deactivate(CompilerScratch* csb)
{
for (const UCHAR* iter = m_streams.begin(); iter < m_streams.end(); iter++)
{
csb->csb_rpt[*iter].csb_flags &= ~csb_active;
}
}
2010-01-26 09:20:27 +01:00
bool isReferenced(const jrd_nod* node) const
{
bool field_found = false;
if (isReferenced(node, field_found))
return field_found;
2010-01-26 09:20:27 +01:00
return false;
}
bool isComputable(CompilerScratch* csb) const
{
return m_node ? m_node->computable(csb, -1, false, false, NULL) : true;
}
protected:
2010-01-26 09:20:27 +01:00
bool isReferenced(const jrd_nod* node, bool& field_found) const
{
if (node->nod_type == nod_field)
{
for (const UCHAR* iter = m_streams.begin(); iter < m_streams.end(); iter++)
{
if ((USHORT)(IPTR) node->nod_arg[e_fld_stream] == *iter)
{
field_found = true;
return true;
}
}
return false;
}
const jrd_nod* const* ptr = node->nod_arg;
for (const jrd_nod* const* const end = ptr + node->nod_count; ptr < end; ptr++)
{
if (!isReferenced(*ptr, field_found))
{
return false;
}
}
return true;
}
RecordSource* m_rsb;
RecordSourceNode* const m_node;
2010-01-13 12:46:54 +01:00
StreamList m_streams;
};
class CrossJoin : public River
{
public:
CrossJoin(CompilerScratch* csb, RiverList& rivers)
: River(csb, NULL, rivers)
{
const size_t count = rivers.getCount();
fb_assert(count);
if (count == 1)
{
m_rsb = rivers.front()->getRecordSource();
}
else
{
HalfStaticArray<RecordSource*, OPT_STATIC_ITEMS> rsbs;
// Reorder input rivers according to their possible inter-dependencies
while (rsbs.getCount() < count)
{
for (River** iter = rivers.begin(); iter < rivers.end(); iter++)
{
River* const sub_river = *iter;
RecordSource* const sub_rsb = sub_river->getRecordSource();
if (!rsbs.exist(sub_rsb) && sub_river->isComputable(csb))
{
rsbs.add(sub_rsb);
sub_river->activate(csb);
}
}
}
m_rsb = FB_NEW(csb->csb_pool) NestedLoopJoin(csb, count, rsbs.begin());
}
rivers.clear();
}
};
} // namespace
static bool augment_stack(jrd_nod*, NodeStack&);
static void check_indices(const CompilerScratch::csb_repeat*);
static void check_sorts(RseNode*);
2004-01-13 10:52:19 +01:00
static void class_mask(USHORT, jrd_nod**, ULONG *);
2009-04-03 12:07:55 +02:00
static jrd_nod* compose(jrd_nod**, jrd_nod*, nod_t);
static bool check_for_nod_from(const jrd_nod*);
static SLONG decompose(thread_db*, jrd_nod*, NodeStack&, CompilerScratch*);
static USHORT distribute_equalities(NodeStack&, CompilerScratch*, USHORT);
2008-12-22 10:00:05 +01:00
static void find_index_relationship_streams(thread_db*, OptimizerBlk*, const UCHAR*, UCHAR*, UCHAR*);
2010-08-29 22:20:19 +02:00
static void form_rivers(thread_db*, OptimizerBlk*, const UCHAR*, RiverList&, SortNode**, PlanNode*);
static bool form_river(thread_db*, OptimizerBlk*, USHORT, USHORT, UCHAR*, RiverList&, SortNode**);
2010-08-29 22:20:19 +02:00
static void gen_join(thread_db*, OptimizerBlk*, const UCHAR*, RiverList&, SortNode**, PlanNode*);
static RecordSource* gen_outer(thread_db*, OptimizerBlk*, RseNode*, RiverList&, SortNode**);
static RecordSource* gen_residual_boolean(thread_db*, OptimizerBlk*, RecordSource*);
static RecordSource* gen_retrieval(thread_db*, OptimizerBlk*, SSHORT, SortNode**, bool, bool, jrd_nod**);
static bool gen_equi_join(thread_db*, OptimizerBlk*, RiverList&);
static jrd_nod* make_inference_node(CompilerScratch*, jrd_nod*, jrd_nod*, jrd_nod*);
static bool map_equal(const jrd_nod*, const jrd_nod*, const MapNode*);
2004-01-13 10:52:19 +01:00
static bool node_equality(const jrd_nod*, const jrd_nod*);
2005-05-28 00:45:31 +02:00
static jrd_nod* optimize_like(thread_db*, CompilerScratch*, jrd_nod*);
2004-01-13 10:52:19 +01:00
static USHORT river_count(USHORT, jrd_nod**);
2004-05-09 07:48:33 +02:00
static bool search_stack(const jrd_nod*, const NodeStack&);
static void set_direction(SortNode*, SortNode*);
static void set_position(const SortNode*, SortNode*, const MapNode*);
static void set_rse_inactive(CompilerScratch*, const RseNode*);
2001-05-23 15:26:42 +02:00
2009-08-20 12:19:02 +02:00
// macro definitions
2001-05-23 15:26:42 +02:00
#ifdef OPT_DEBUG
2004-05-03 23:43:56 +02:00
const int DEBUG_PUNT = 5;
const int DEBUG_RELATIONSHIPS = 4;
const int DEBUG_ALL = 3;
const int DEBUG_CANDIDATE = 2;
const int DEBUG_BEST = 1;
const int DEBUG_NONE = 0;
2001-05-23 15:26:42 +02:00
2004-04-29 00:43:34 +02:00
FILE *opt_debug_file = 0;
2004-03-21 10:35:27 +01:00
static int opt_debug_flag = DEBUG_NONE;
2001-05-23 15:26:42 +02:00
#endif
inline void SET_DEP_BIT(ULONG* array, const SLONG bit)
{
array[bit / 32] |= (1L << (bit % 32));
}
2001-05-23 15:26:42 +02:00
inline void CLEAR_DEP_BIT(ULONG* array, const SLONG bit)
{
array[bit / 32] &= ~(1L << (bit % 32));
}
inline bool TEST_DEP_BIT(const ULONG* array, const ULONG bit)
{
return (array[bit / 32] & (1L << (bit % 32))) != 0;
}
inline bool TEST_DEP_ARRAYS(const ULONG* ar1, const ULONG* ar2)
2008-12-22 10:00:05 +01:00
{ return (ar1[0] & ar2[0]) || (ar1[1] & ar2[1]) || (ar1[2] & ar2[2]) || (ar1[3] & ar2[3]) ||
(ar1[4] & ar2[4]) || (ar1[5] & ar2[5]) || (ar1[6] & ar2[6]) || (ar1[7] & ar2[7]);
}
2009-08-20 12:19:02 +02:00
// some arbitrary fudge factors for calculating costs, etc.--
// these could probably be better tuned someday
2001-05-23 15:26:42 +02:00
2004-05-07 09:57:46 +02:00
const double ESTIMATED_SELECTIVITY = 0.01;
const int INVERSE_ESTIMATE = 10;
const double INDEX_COST = 30.0;
const int CACHE_PAGES_PER_STREAM = 15;
const int SELECTIVITY_THRESHOLD_FACTOR = 10;
const int OR_SELECTIVITY_THRESHOLD_FACTOR = 2000;
2007-11-12 15:26:44 +01:00
const FB_UINT64 LOWEST_PRIORITY_LEVEL = 0;
2001-05-23 15:26:42 +02:00
2009-08-20 12:19:02 +02:00
// enumeration of sort datatypes
2001-05-23 15:26:42 +02:00
static const UCHAR sort_dtypes[] =
{
0, // dtype_unknown
SKD_text, // dtype_text
SKD_cstring, // dtype_cstring
SKD_varying, // dtype_varying
2001-05-23 15:26:42 +02:00
0,
0,
0, // dtype_packed
0, // dtype_byte
SKD_short, // dtype_short
SKD_long, // dtype_long
SKD_quad, // dtype_quad
SKD_float, // dtype_real
SKD_double, // dtype_double
SKD_double, // dtype_d_float
SKD_sql_date, // dtype_sql_date
SKD_sql_time, // dtype_sql_time
2010-04-06 18:26:27 +02:00
SKD_timestamp, // dtype_timestamp
SKD_quad, // dtype_blob
0, // dtype_array
SKD_int64, // dtype_int64
SKD_text // dtype_dbkey - use text sort for backward compatibility
2001-05-23 15:26:42 +02:00
};
2009-11-25 09:38:52 +01:00
bool OPT_access_path(const jrd_req* request, UCHAR* buffer, SLONG buffer_length, ULONG* return_length)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* O P T _ a c c e s s _ p a t h
*
**************************************
*
* Functional description
* Returns a formatted access path for all
* RseNode's in the specified request.
2001-05-23 15:26:42 +02:00
*
**************************************/
DEV_BLKCHK(request, type_req);
thread_db* tdbb = JRD_get_thread_data();
if (!buffer || buffer_length < 0 || !return_length)
return false;
2009-08-20 12:19:02 +02:00
// loop through all RSEs in the request, and describe the rsb tree for that rsb
2001-05-23 15:26:42 +02:00
UCharBuffer infoBuffer;
Array<const RecordSource*>& fors = request->getStatement()->fors;
2010-04-29 07:13:03 +02:00
for (size_t i = 0; i < fors.getCount(); i++)
fors[i]->dump(tdbb, infoBuffer);
2001-05-23 15:26:42 +02:00
const size_t length = infoBuffer.getCount();
2009-12-16 10:31:36 +01:00
if (length > static_cast<ULONG>(buffer_length))
{
*return_length = 0;
return false;
}
2001-05-23 15:26:42 +02:00
*return_length = (ULONG) length;
memcpy(buffer, infoBuffer.begin(), length);
return true;
2001-05-23 15:26:42 +02:00
}
2010-08-25 03:35:33 +02:00
// Compile and optimize a record selection expression into a set of record source blocks (rsb's).
RecordSource* OPT_compile(thread_db* tdbb, CompilerScratch* csb, RseNode* rse,
NodeStack* const parent_stack)
2001-05-23 15:26:42 +02:00
{
DEV_BLKCHK(csb, type_csb);
DEV_BLKCHK(rse, type_nod);
SET_TDBB(tdbb);
#ifdef OPT_DEBUG
if (opt_debug_flag != DEBUG_NONE && !opt_debug_file)
2004-04-29 00:43:34 +02:00
opt_debug_file = fopen("opt_debug.out", "w");
2001-05-23 15:26:42 +02:00
#endif
2009-11-25 04:58:43 +01:00
// If there is a boolean, there is some work to be done. First,
// decompose the boolean into conjunctions. Then get descriptions
// of all indices for all relations in the RseNode. This will give
2009-11-25 04:58:43 +01:00
// us the info necessary to allocate a optimizer block big
// enough to hold this crud.
// Do not allocate the index_desc struct. Let BTR_all do the job. The allocated
// memory will then be in csb->csb_rpt[stream].csb_idx_allocation, which
// gets cleaned up before this function exits.
2001-05-23 15:26:42 +02:00
AutoPtr<OptimizerBlk> opt(FB_NEW(*tdbb->getDefaultPool()) OptimizerBlk(tdbb->getDefaultPool(),
rse, parent_stack));
opt->opt_streams.grow(csb->csb_n_stream);
2010-01-18 12:39:34 +01:00
RecordSource* rsb = NULL;
2001-05-23 15:26:42 +02:00
2001-12-24 03:51:06 +01:00
try {
opt->opt_csb = csb;
2001-05-23 15:26:42 +02:00
RiverList rivers;
2001-05-23 15:26:42 +02:00
check_sorts(rse);
SortNode* sort = rse->rse_sorted;
SortNode* project = rse->rse_projection;
SortNode* aggregate = rse->rse_aggregate;
2001-05-23 15:26:42 +02:00
2005-05-28 00:45:31 +02:00
// put any additional booleans on the conjunct stack, and see if we
// can generate additional booleans by associativity--this will help
// to utilize indices that we might not have noticed
if (rse->rse_boolean)
opt->conjunctCount = decompose(tdbb, rse->rse_boolean, opt->conjunctStack, csb);
2001-05-23 15:26:42 +02:00
opt->conjunctCount += distribute_equalities(opt->conjunctStack, csb, opt->conjunctCount);
2001-05-23 15:26:42 +02:00
// AB: If we have limit our retrieval with FIRST / SKIP syntax then
2005-05-28 00:45:31 +02:00
// we may not deliver above conditions (from higher rse's) to this
// rse, because the results should be consistent.
if (rse->rse_skip || rse->rse_first)
opt->parentStack = NULL;
// clear the csb_active flag of all streams in the RseNode
2001-05-23 15:26:42 +02:00
set_rse_inactive(csb, rse);
2005-05-28 00:45:31 +02:00
// go through the record selection expression generating
// record source blocks for all streams
2001-05-23 15:26:42 +02:00
2004-01-13 10:52:19 +01:00
// CVC: I defined this var here because it's assigned inside an if() shortly
// below but it's used later in the loop always, so I assume the idea is that
// iterations where nod_type != nod_rse are the ones that set up a new stream.
// Hope this isn't some kind of logic error.
SSHORT stream = -1;
NestConst<RecordSourceNode>* ptr = rse->rse_relations.begin();
for (NestConst<RecordSourceNode>* const end = rse->rse_relations.end(); ptr != end; ++ptr)
2001-12-24 03:51:06 +01:00
{
RecordSourceNode* node = *ptr;
2001-05-23 15:26:42 +02:00
opt->localStreams[0] = 0;
fb_assert(sort == rse->rse_sorted);
fb_assert(aggregate == rse->rse_aggregate);
2005-05-28 00:45:31 +02:00
// find the stream number and place it at the end of the beds array
// (if this is really a stream and not another RseNode)
rsb = node->compile(tdbb, opt, (ptr - rse->rse_relations.begin() == 1));
2001-05-23 15:26:42 +02:00
2003-02-28 00:07:15 +01:00
// if an rsb has been generated, we have a non-relation;
2005-05-28 00:45:31 +02:00
// so it forms a river of its own since it is separately
2003-02-28 00:07:15 +01:00
// optimized from the streams in this rsb
2001-05-23 15:26:42 +02:00
2009-06-21 12:34:33 +02:00
if (rsb)
{
2003-02-28 00:07:15 +01:00
// AB: Save all inner-part streams
2005-05-28 00:45:31 +02:00
if (rse->rse_jointype == blr_inner ||
(rse->rse_jointype == blr_left && (ptr - rse->rse_relations.begin()) == 0))
{
rsb->findUsedStreams(opt->subStreams);
2007-02-15 08:20:59 +01:00
// Save also the outer streams
if (rse->rse_jointype == blr_left)
rsb->findUsedStreams(opt->outerStreams);
}
const size_t count = opt->localStreams[0];
UCHAR* const streams = opt->localStreams + 1;
River* const river = FB_NEW(*tdbb->getDefaultPool()) River(csb, rsb, node, count, streams);
river->deactivate(csb);
rivers.add(river);
2001-05-23 15:26:42 +02:00
}
}
2005-05-28 00:45:31 +02:00
// this is an attempt to make sure we have a large enough cache to
// efficiently retrieve this query; make sure the cache has a minimum
// number of pages for each stream in the RseNode (the number is just a guess)
if (opt->compileStreams[0] > 5)
CCH_expand(tdbb, (ULONG) (opt->compileStreams[0] * CACHE_PAGES_PER_STREAM));
2001-05-23 15:26:42 +02:00
2005-05-28 00:45:31 +02:00
// At this point we are ready to start optimizing.
// We will use the opt block to hold information of
2005-05-28 00:45:31 +02:00
// a global nature, meaning that it needs to stick
// around for the rest of the optimization process.
2001-05-23 15:26:42 +02:00
// Set base-point before the parent/distributed nodes begin.
const USHORT base_count = (USHORT) opt->conjunctCount;
opt->opt_base_conjuncts = base_count;
2001-05-23 15:26:42 +02:00
// AB: Add parent conjunctions to opt->conjunctStack, keep in mind
// the outer-streams! For outer streams put missing (IS NULL)
// conjunctions in the missing_stack.
//
// opt_rpt[0..opt_base_conjuncts-1] = defined conjunctions to this stream
2005-05-28 00:45:31 +02:00
// opt_rpt[0..opt_base_parent_conjuncts-1] = defined conjunctions to this
// stream and allowed distributed conjunctions (with parent)
2005-05-28 00:45:31 +02:00
// opt_rpt[0..opt_base_missing_conjuncts-1] = defined conjunctions to this
// stream and allowed distributed conjunctions and allowed parent
// opt_rpt[0..opt_conjuncts_count-1] = all conjunctions
//
// allowed = booleans that can never evaluate to NULL/Unknown or turn
// NULL/Unknown into a True or False.
USHORT parent_count = 0, distributed_count = 0;
NodeStack missing_stack;
if (opt->parentStack)
{
for (NodeStack::iterator iter(*opt->parentStack);
iter.hasData() && opt->conjunctCount < MAX_CONJUNCTS; ++iter)
{
jrd_nod* const node = iter.object();
if (rse->rse_jointype != blr_inner && PossibleUnknownFinder::find(node))
{
2005-05-28 00:45:31 +02:00
// parent missing conjunctions shouldn't be
// distributed to FULL OUTER JOIN streams at all
2005-05-28 00:45:31 +02:00
if (rse->rse_jointype != blr_full)
{
missing_stack.push(node);
}
}
else
{
opt->conjunctStack.push(node);
opt->conjunctCount++;
parent_count++;
}
2005-05-28 00:45:31 +02:00
}
2005-05-28 00:45:31 +02:00
// We've now merged parent, try again to make more conjunctions.
distributed_count = distribute_equalities(opt->conjunctStack, csb, opt->conjunctCount);
opt->conjunctCount += distributed_count;
}
2005-05-28 00:45:31 +02:00
// The newly created conjunctions belong to the base conjunctions.
// After them are starting the parent conjunctions.
opt->opt_base_parent_conjuncts = opt->opt_base_conjuncts + distributed_count;
// Set base-point before the parent IS NULL nodes begin
opt->opt_base_missing_conjuncts = (USHORT) opt->conjunctCount;
2005-05-28 00:45:31 +02:00
// Check if size of optimizer block exceeded.
if (opt->conjunctCount > MAX_CONJUNCTS)
{
ERR_post(Arg::Gds(isc_optimizer_blk_exc));
2003-02-28 00:07:15 +01:00
// Msg442: size of optimizer block exceeded
}
2005-05-28 00:45:31 +02:00
// Put conjunctions in opt structure.
// Note that it's a stack and we get the nodes in reversed order from the stack.
opt->opt_conjuncts.grow(opt->conjunctCount);
SSHORT nodeBase = -1, j = -1;
for (SLONG i = opt->conjunctCount; i > 0; i--, j--)
{
jrd_nod* const node = opt->conjunctStack.pop();
2001-05-23 15:26:42 +02:00
if (i == base_count)
{
// The base conjunctions
j = base_count - 1;
nodeBase = 0;
}
else if (i == opt->conjunctCount - distributed_count)
{
// The parent conjunctions
j = parent_count - 1;
nodeBase = opt->opt_base_parent_conjuncts;
}
else if (i == opt->conjunctCount)
{
// The new conjunctions created by "distribution" from the stack
j = distributed_count - 1;
nodeBase = opt->opt_base_conjuncts;
}
fb_assert(nodeBase >= 0 && j >= 0);
opt->opt_conjuncts[nodeBase + j].opt_conjunct_node = node;
}
// Put the parent missing nodes on the stack
for (NodeStack::iterator iter(missing_stack);
iter.hasData() && opt->conjunctCount < MAX_CONJUNCTS; ++iter)
{
jrd_nod* const node = iter.object();
opt->opt_conjuncts.grow(opt->conjunctCount + 1);
opt->opt_conjuncts[opt->conjunctCount].opt_conjunct_node = node;
opt->conjunctCount++;
2003-02-28 00:07:15 +01:00
}
2001-05-23 15:26:42 +02:00
2004-11-17 20:33:11 +01:00
// Deoptimize some conjuncts in advance
for (size_t iter = 0; iter < opt->opt_conjuncts.getCount(); iter++)
2004-11-17 20:33:11 +01:00
{
if (opt->opt_conjuncts[iter].opt_conjunct_node->nod_flags & nod_deoptimize)
2004-11-17 20:33:11 +01:00
{
// Fake an index match for them
opt->opt_conjuncts[iter].opt_conjunct_flags |= opt_conjunct_matched;
2004-11-17 20:33:11 +01:00
}
}
// attempt to optimize aggregates via an index, if possible
if (aggregate && !sort)
2001-05-23 15:26:42 +02:00
sort = aggregate;
else
2001-05-23 15:26:42 +02:00
rse->rse_aggregate = aggregate = NULL;
// AB: Mark the previous used streams (sub-RseNode's) as active
for (StreamsArray::iterator i = opt->subStreams.begin(); i != opt->subStreams.end(); ++i)
csb->csb_rpt[*i].csb_flags |= csb_active;
// outer joins require some extra processing
if (rse->rse_jointype != blr_inner)
rsb = gen_outer(tdbb, opt, rse, rivers, &sort);
2009-06-21 12:34:33 +02:00
else
{
bool sort_can_be_used = true;
SortNode* const saved_sort_node = sort;
2003-02-28 00:07:15 +01:00
// AB: If previous rsb's are already on the stack we can't use
2005-05-12 08:51:33 +02:00
// a navigational-retrieval for an ORDER BY because the next
2004-05-09 07:48:33 +02:00
// streams are JOINed to the previous ones
if (rivers.hasData())
2009-06-21 12:34:33 +02:00
{
sort = NULL;
sort_can_be_used = false;
2003-02-28 00:07:15 +01:00
// AB: We could already have multiple rivers at this
// point so try to do some hashing or sort/merging now.
while (gen_equi_join(tdbb, opt, rivers))
;
// AB: Mark the previous used streams (sub-RseNode's) again
2005-05-12 08:51:33 +02:00
// as active, because a SORT/MERGE could reset the flags
for (StreamsArray::iterator i = opt->subStreams.begin(); i != opt->subStreams.end(); ++i)
csb->csb_rpt[*i].csb_flags |= csb_active;
}
fb_assert(opt->compileStreams[0] != 1 || csb->csb_rpt[opt->compileStreams[1]].csb_relation != 0);
while (true)
{
// AB: Determine which streams have an index relationship
// with the currently active rivers. This is needed so that
// no merge is made between a new cross river and the
// currently active rivers. Where in the new cross river
// a stream depends (index) on the active rivers.
stream_array_t dependent_streams, free_streams;
dependent_streams[0] = free_streams[0] = 0;
find_index_relationship_streams(tdbb, opt, opt->compileStreams, dependent_streams, free_streams);
// If we have dependent and free streams then we can't rely on
// the sort node to be used for index navigation.
2009-06-21 12:34:33 +02:00
if (dependent_streams[0] && free_streams[0])
{
sort = NULL;
sort_can_be_used = false;
}
2009-06-21 12:34:33 +02:00
if (dependent_streams[0])
{
// copy free streams
2009-06-27 08:23:36 +02:00
for (USHORT i = 0; i <= free_streams[0]; i++) {
opt->compileStreams[i] = free_streams[i];
}
// Make rivers from the dependent streams
gen_join(tdbb, opt, dependent_streams, rivers, &sort, rse->rse_plan);
// Generate one river which holds a cross join rsb between
// all currently available rivers
River* const river = FB_NEW(*tdbb->getDefaultPool()) CrossJoin(csb, rivers);
rivers.add(river);
}
else
{
2009-06-21 12:34:33 +02:00
if (free_streams[0])
{
2006-08-31 20:16:58 +02:00
// Deactivate streams from rivers on stack, because
// the remaining streams don't have any indexed relationship with them
for (River** iter = rivers.begin(); iter < rivers.end(); iter++)
{
(*iter)->deactivate(csb);
}
}
break;
2005-02-01 15:39:36 +01:00
}
}
2005-05-28 00:45:31 +02:00
// attempt to form joins in decreasing order of desirability
gen_join(tdbb, opt, opt->compileStreams, rivers, &sort, rse->rse_plan);
2001-05-23 15:26:42 +02:00
// If there are multiple rivers, try some hashing or sort/merging
while (gen_equi_join(tdbb, opt, rivers))
;
rsb = CrossJoin(csb, rivers).getRecordSource();
2001-05-23 15:26:42 +02:00
// Assign the sort node back if it wasn't used by the index navigation
if (saved_sort_node && !sort_can_be_used)
2005-03-20 05:26:10 +01:00
{
sort = saved_sort_node;
}
2003-02-28 00:07:15 +01:00
// Pick up any residual boolean that may have fallen thru the cracks
rsb = gen_residual_boolean(tdbb, opt, rsb);
2001-05-23 15:26:42 +02:00
}
2009-08-22 14:16:47 +02:00
// if the aggregate was not optimized via an index, get rid of the
// sort and flag the fact to the calling routine
2009-06-21 12:34:33 +02:00
if (aggregate && sort)
{
2001-05-23 15:26:42 +02:00
rse->rse_aggregate = NULL;
sort = NULL;
}
2009-08-22 14:16:47 +02:00
// check index usage in all the base streams to ensure
// that any user-specified access plan is followed
2001-05-23 15:26:42 +02:00
for (USHORT i = 1; i <= opt->compileStreams[0]; i++)
check_indices(&csb->csb_rpt[opt->compileStreams[i]]);
2001-05-23 15:26:42 +02:00
2009-06-21 12:34:33 +02:00
if (project || sort)
{
2003-02-28 00:07:15 +01:00
// Eliminate any duplicate dbkey streams
const UCHAR* const b_end = opt->beds + opt->beds[0];
const UCHAR* const k_end = opt->keyStreams + opt->keyStreams[0];
UCHAR* k = &opt->keyStreams[1];
2009-06-21 12:34:33 +02:00
for (const UCHAR* p2 = k; p2 <= k_end; p2++)
{
const UCHAR* q = &opt->beds[1];
2004-01-13 10:52:19 +01:00
while (q <= b_end && *q != *p2) {
q++;
}
if (q > b_end) {
*k++ = *p2;
}
2001-05-23 15:26:42 +02:00
}
opt->keyStreams[0] = k - &opt->keyStreams[1];
2001-05-23 15:26:42 +02:00
2010-01-14 08:13:38 +01:00
// Handle project clause, if present
if (project)
rsb = OPT_gen_sort(tdbb, opt->opt_csb, opt->beds, opt->keyStreams, rsb, project, true);
2001-05-23 15:26:42 +02:00
2003-02-28 00:07:15 +01:00
// Handle sort clause if present
if (sort)
rsb = OPT_gen_sort(tdbb, opt->opt_csb, opt->beds, opt->keyStreams, rsb, sort, false);
2001-05-23 15:26:42 +02:00
}
2003-02-28 00:07:15 +01:00
// Handle first and/or skip. The skip MUST (if present)
// appear in the rsb list AFTER the first. Since the gen_first and gen_skip
// functions add their nodes at the beginning of the rsb list we MUST call
// gen_skip before gen_first.
2002-07-01 18:59:09 +02:00
if (rse->rse_skip) {
2009-12-14 17:01:06 +01:00
rsb = FB_NEW(*tdbb->getDefaultPool()) SkipRowsStream(csb, rsb, rse->rse_skip);
}
2001-05-23 15:26:42 +02:00
if (rse->rse_first) {
2009-12-14 17:01:06 +01:00
rsb = FB_NEW(*tdbb->getDefaultPool()) FirstRowsStream(csb, rsb, rse->rse_first);
}
2001-05-23 15:26:42 +02:00
2006-09-14 11:40:58 +02:00
// release memory allocated for index descriptions
for (USHORT i = 1; i <= opt->compileStreams[0]; ++i)
2009-06-21 12:34:33 +02:00
{
const USHORT loopStream = opt->compileStreams[i];
2010-01-18 14:38:36 +01:00
delete csb->csb_rpt[loopStream].csb_idx;
csb->csb_rpt[loopStream].csb_idx = NULL;
2002-07-01 18:59:09 +02:00
2004-05-09 07:48:33 +02:00
// CVC: The following line added because OPT_compile is recursive, both directly
// and through gen_union(), too. Otherwise, we happen to step on deallocated memory
// and this is the cause of the crashes with indices that have plagued IB since v4.
2002-07-01 18:59:09 +02:00
2010-01-18 14:38:36 +01:00
csb->csb_rpt[loopStream].csb_indices = 0;
2001-05-23 15:26:42 +02:00
}
#ifdef OPT_DEBUG
2009-06-21 12:34:33 +02:00
if (opt_debug_file)
{
2004-04-29 00:43:34 +02:00
fflush(opt_debug_file);
//fclose(opt_debug_file);
//opt_debug_file = 0;
2001-05-23 15:26:42 +02:00
}
#endif
2001-12-24 03:51:06 +01:00
} // try
2009-06-21 12:34:33 +02:00
catch (const Firebird::Exception&)
{
for (USHORT i = 1; i <= opt->compileStreams[0]; ++i)
2009-06-21 12:34:33 +02:00
{
const USHORT loopStream = opt->compileStreams[i];
2010-01-18 14:38:36 +01:00
delete csb->csb_rpt[loopStream].csb_idx;
csb->csb_rpt[loopStream].csb_idx = NULL;
csb->csb_rpt[loopStream].csb_indices = 0; // Probably needed to be safe
2001-12-24 03:51:06 +01:00
}
2010-01-18 14:18:20 +01:00
throw;
2001-12-24 03:51:06 +01:00
}
2005-05-28 00:45:31 +02:00
if (rse->flags & RseNode::FLAG_WRITELOCK)
{
for (USHORT i = 1; i <= opt->compileStreams[0]; ++i)
{
const USHORT loopStream = opt->compileStreams[i];
2010-01-18 14:38:36 +01:00
csb->csb_rpt[loopStream].csb_flags |= csb_update;
}
}
2001-05-23 15:26:42 +02:00
return rsb;
}
static bool augment_stack(jrd_nod* node, NodeStack& stack)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* a u g m e n t _ s t a c k
*
**************************************
*
* Functional description
* Add node to stack unless node is already on stack.
*
**************************************/
DEV_BLKCHK(node, type_nod);
2009-06-21 12:34:33 +02:00
for (NodeStack::const_iterator temp(stack); temp.hasData(); ++temp)
{
if (node_equality(node, temp.object())) {
2003-09-02 18:39:58 +02:00
return false;
}
}
2001-05-23 15:26:42 +02:00
stack.push(node);
2001-05-23 15:26:42 +02:00
2003-09-02 18:39:58 +02:00
return true;
2001-05-23 15:26:42 +02:00
}
static void check_indices(const CompilerScratch::csb_repeat* csb_tail)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* c h e c k _ i n d i c e s
*
**************************************
*
* Functional description
* Check to make sure that the user-specified
* indices were actually utilized by the optimizer.
2005-05-28 00:45:31 +02:00
*
2001-05-23 15:26:42 +02:00
**************************************/
thread_db* tdbb = JRD_get_thread_data();
2001-05-23 15:26:42 +02:00
2010-08-29 22:20:19 +02:00
const PlanNode* plan = csb_tail->csb_plan;
if (!plan)
2001-05-23 15:26:42 +02:00
return;
2010-08-29 22:20:19 +02:00
if (plan->type != PlanNode::TYPE_RETRIEVE)
2001-05-23 15:26:42 +02:00
return;
const jrd_rel* relation = csb_tail->csb_relation;
2001-05-23 15:26:42 +02:00
2009-08-20 12:19:02 +02:00
// if there were no indices fetched at all but the
// user specified some, error out using the first index specified
2010-08-29 22:20:19 +02:00
if (!csb_tail->csb_indices && plan->accessType)
2004-01-13 10:52:19 +01:00
{
2009-08-20 12:19:02 +02:00
// index %s cannot be used in the specified plan
2010-08-29 22:20:19 +02:00
ERR_post(Arg::Gds(isc_index_unused) << plan->accessType->items[0].indexName);
2004-01-13 10:52:19 +01:00
}
2001-05-23 15:26:42 +02:00
2009-08-20 12:19:02 +02:00
// check to make sure that all indices are either used or marked not to be used,
// and that there are no unused navigational indices
2010-08-29 22:20:19 +02:00
MetaName index_name;
2001-05-23 15:26:42 +02:00
const index_desc* idx = csb_tail->csb_idx->items;
2010-08-29 22:20:19 +02:00
2009-06-21 12:34:33 +02:00
for (USHORT i = 0; i < csb_tail->csb_indices; i++)
{
2001-05-23 15:26:42 +02:00
if (!(idx->idx_runtime_flags & (idx_plan_dont_use | idx_used)) ||
2008-12-22 10:00:05 +01:00
((idx->idx_runtime_flags & idx_plan_navigate) && !(idx->idx_runtime_flags & idx_navigate)))
2004-01-13 10:52:19 +01:00
{
if (relation)
MET_lookup_index(tdbb, index_name, relation->rel_name, (USHORT) (idx->idx_id + 1));
else
index_name = "";
// index %s cannot be used in the specified plan
ERR_post(Arg::Gds(isc_index_unused) << Arg::Str(index_name));
2004-01-13 10:52:19 +01:00
}
2010-08-29 22:20:19 +02:00
++idx;
2001-05-23 15:26:42 +02:00
}
}
static void check_sorts(RseNode* rse)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* c h e c k _ s o r t s
*
**************************************
*
* Functional description
2005-05-28 00:45:31 +02:00
* Try to optimize out unnecessary sorting.
2001-05-23 15:26:42 +02:00
*
**************************************/
DEV_BLKCHK(rse, type_nod);
SortNode* sort = rse->rse_sorted;
SortNode* project = rse->rse_projection;
2001-05-23 15:26:42 +02:00
2003-09-02 18:39:58 +02:00
// check if a GROUP BY exists using the same fields as the project or sort:
2005-05-28 00:45:31 +02:00
// if so, the projection can be eliminated; if no projection exists, then
2003-09-02 18:39:58 +02:00
// the sort can be eliminated.
2001-05-23 15:26:42 +02:00
SortNode* group;
RecordSourceNode* sub_rse;
if ((project || sort) && rse->rse_relations.getCount() == 1 && (sub_rse = rse->rse_relations[0]) &&
sub_rse->type == AggregateSourceNode::TYPE &&
(group = static_cast<AggregateSourceNode*>(sub_rse)->group))
2004-01-13 10:52:19 +01:00
{
2005-05-28 00:45:31 +02:00
// if all the fields of the project are the same as all the fields
2003-09-02 18:39:58 +02:00
// of the group by, get rid of the project.
2001-05-23 15:26:42 +02:00
if (project && (project->expressions.getCount() == group->expressions.getCount()))
2009-06-21 12:34:33 +02:00
{
NestConst<jrd_nod>* project_ptr = project->expressions.begin();
const NestConst<jrd_nod>* const project_end = project->expressions.end();
for (; project_ptr != project_end; ++project_ptr)
2003-09-02 18:39:58 +02:00
{
const NestConst<jrd_nod>* group_ptr = group->expressions.begin();
const NestConst<jrd_nod>* const group_end = group->expressions.end();
for (; group_ptr != group_end; ++group_ptr)
2003-09-02 18:39:58 +02:00
{
if (map_equal(*group_ptr, *project_ptr, static_cast<AggregateSourceNode*>(sub_rse)->map))
2003-09-02 18:39:58 +02:00
break;
}
if (group_ptr == group_end)
2001-05-23 15:26:42 +02:00
break;
}
2005-05-28 00:45:31 +02:00
// we can now ignore the project, but in case the project is being done
2003-09-02 18:39:58 +02:00
// in descending order because of an order by, do the group by the same way.
2009-06-21 12:34:33 +02:00
if (project_ptr == project_end)
{
2001-05-23 15:26:42 +02:00
set_direction(project, group);
project = rse->rse_projection = NULL;
}
}
2005-05-28 00:45:31 +02:00
// if there is no projection, then we can make a similar optimization
2003-09-02 18:39:58 +02:00
// for sort, except that sort may have fewer fields than group by.
if (!project && sort && (sort->expressions.getCount() <= group->expressions.getCount()))
2009-06-21 12:34:33 +02:00
{
const NestConst<jrd_nod>* sort_ptr = sort->expressions.begin();
const NestConst<jrd_nod>* const sort_end = sort->expressions.end();
for (; sort_ptr != sort_end; ++sort_ptr)
2003-09-02 18:39:58 +02:00
{
const NestConst<jrd_nod>* group_ptr = group->expressions.begin();
const NestConst<jrd_nod>* const group_end = group->expressions.end();
for (; group_ptr != group_end; ++group_ptr)
2003-09-02 18:39:58 +02:00
{
if (map_equal(*group_ptr, *sort_ptr, static_cast<AggregateSourceNode*>(sub_rse)->map))
2003-09-02 18:39:58 +02:00
break;
}
if (group_ptr == group_end)
2001-05-23 15:26:42 +02:00
break;
}
2005-05-28 00:45:31 +02:00
// if all the fields in the sort list match the first n fields in the
// project list, we can ignore the sort, but update the sort order
2003-09-02 18:39:58 +02:00
// (ascending/descending) to match that in the sort list
2001-05-23 15:26:42 +02:00
2009-06-21 12:34:33 +02:00
if (sort_ptr == sort_end)
{
2001-05-23 15:26:42 +02:00
set_direction(sort, group);
set_position(sort, group, static_cast<AggregateSourceNode*>(sub_rse)->map);
2001-05-23 15:26:42 +02:00
sort = rse->rse_sorted = NULL;
}
}
}
2005-05-28 00:45:31 +02:00
// examine the ORDER BY and DISTINCT clauses; if all the fields in the
// ORDER BY match the first n fields in the DISTINCT in any order, the
// ORDER BY can be removed, changing the fields in the DISTINCT to match
2003-09-02 18:39:58 +02:00
// the ordering of fields in the ORDER BY.
2001-05-23 15:26:42 +02:00
if (sort && project && (sort->expressions.getCount() <= project->expressions.getCount()))
2009-06-21 12:34:33 +02:00
{
const NestConst<jrd_nod>* sort_ptr = sort->expressions.begin();
const NestConst<jrd_nod>* const sort_end = sort->expressions.end();
for (; sort_ptr != sort_end; ++sort_ptr)
2003-09-02 18:39:58 +02:00
{
const NestConst<jrd_nod>* project_ptr = project->expressions.begin();
const NestConst<jrd_nod>* const project_end = project->expressions.end();
for (; project_ptr != project_end; ++project_ptr)
2003-09-02 18:39:58 +02:00
{
2008-12-18 12:29:10 +01:00
if ((*sort_ptr)->nod_type == nod_field &&
(*project_ptr)->nod_type == nod_field &&
(*sort_ptr)->nod_arg[e_fld_stream] == (*project_ptr)->nod_arg[e_fld_stream] &&
(*sort_ptr)->nod_arg[e_fld_id] == (*project_ptr)->nod_arg[e_fld_id])
2003-09-02 18:39:58 +02:00
{
2001-05-23 15:26:42 +02:00
break;
2003-09-02 18:39:58 +02:00
}
}
2001-05-23 15:26:42 +02:00
if (project_ptr == project_end)
2001-05-23 15:26:42 +02:00
break;
}
2005-05-28 00:45:31 +02:00
// if all the fields in the sort list match the first n fields
// in the project list, we can ignore the sort, but update
2003-09-02 18:39:58 +02:00
// the project to match the sort.
2009-06-21 12:34:33 +02:00
if (sort_ptr == sort_end)
{
2001-05-23 15:26:42 +02:00
set_direction(sort, project);
set_position(sort, project, NULL);
sort = rse->rse_sorted = NULL;
}
}
// RP: optimize sort with OUTER JOIN
// if all the fields in the sort list are from one stream, check the stream is
// the most outer stream, if true update rse and ignore the sort
2009-06-21 12:34:33 +02:00
if (sort && !project)
{
int sort_stream = 0;
bool usableSort = true;
NestConst<jrd_nod>* sort_ptr = sort->expressions.begin();
const NestConst<jrd_nod>* const sort_end = sort->expressions.end();
for (; sort_ptr != sort_end; ++sort_ptr)
2009-06-21 12:34:33 +02:00
{
if ((*sort_ptr)->nod_type == nod_field)
{
// Get stream for this field at this position.
2008-12-22 10:00:05 +01:00
const int current_stream = (int)(IPTR)(*sort_ptr)->nod_arg[e_fld_stream];
// If this is the first position node, save this stream.
if (sort_ptr == sort->expressions.begin())
sort_stream = current_stream;
2009-06-21 12:34:33 +02:00
else if (current_stream != sort_stream)
{
// If the current stream is different then the previous stream
2005-05-28 00:45:31 +02:00
// then we can't use this sort for an indexed order retrieval.
usableSort = false;
break;
}
}
2009-06-21 12:34:33 +02:00
else
{
// If this is not the first position node, reject this sort.
// Two expressions cannot be mapped to a single index.
if (sort_ptr > sort->expressions.begin())
2009-06-21 12:34:33 +02:00
{
usableSort = false;
break;
}
// This position doesn't use a simple field, thus we should
// check the expression internals.
SortedArray<int> streams;
StreamsCollector::collect(*sort_ptr, streams);
// We can use this sort only if there's a single stream
// referenced by the expression.
if (streams.getCount() == 1)
sort_stream = streams[0];
else
{
usableSort = false;
break;
}
}
}
if (usableSort)
{
RseNode* new_rse = NULL;
RecordSourceNode* node = rse;
while (node)
{
if (node->type == RseNode::TYPE)
{
new_rse = static_cast<RseNode*>(node);
// AB: Don't distribute the sort when a FIRST/SKIP is supplied,
// because that will affect the behaviour from the deeper RSE.
// dimitr: the same rule applies to explicit/implicit user-defined sorts.
if (new_rse != rse &&
(new_rse->rse_first || new_rse->rse_skip ||
2010-02-19 02:40:05 +01:00
new_rse->rse_sorted || new_rse->rse_projection))
{
node = NULL;
break;
}
// Walk trough the relations of the RSE and see if a
// matching stream can be found.
if (new_rse->rse_jointype == blr_inner)
{
if (new_rse->rse_relations.getCount() == 1)
node = new_rse->rse_relations[0];
else
{
bool sortStreamFound = false;
for (size_t i = 0; i < new_rse->rse_relations.getCount(); i++)
{
RecordSourceNode* subNode = new_rse->rse_relations[i];
if (subNode->type == RelationSourceNode::TYPE &&
subNode->getStream() == sort_stream &&
new_rse != rse)
{
// We have found the correct stream
sortStreamFound = true;
break;
}
}
if (sortStreamFound)
{
// Set the sort to the found stream and clear the original sort
new_rse->rse_sorted = sort;
sort = rse->rse_sorted = NULL;
}
node = NULL;
}
}
else if (new_rse->rse_jointype == blr_left)
node = new_rse->rse_relations[0];
else
node = NULL;
}
else
{
if (node->type == RelationSourceNode::TYPE &&
node->getStream() == sort_stream &&
new_rse && new_rse != rse)
{
// We have found the correct stream, thus apply the sort here
new_rse->rse_sorted = sort;
sort = rse->rse_sorted = NULL;
}
node = NULL;
}
}
}
2001-05-23 15:26:42 +02:00
}
}
static void class_mask(USHORT count, jrd_nod** eq_class, ULONG* mask)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* c l a s s _ m a s k
2001-05-23 15:26:42 +02:00
*
**************************************
*
* Functional description
* Given an sort/merge join equivalence class (vector of node pointers
* of representative values for rivers), return a bit mask of rivers
* with values.
2001-05-23 15:26:42 +02:00
*
**************************************/
if (*eq_class) {
DEV_BLKCHK(*eq_class, type_nod);
}
2001-05-23 15:26:42 +02:00
if (count > MAX_CONJUNCTS)
2009-06-21 12:34:33 +02:00
{
ERR_post(Arg::Gds(isc_optimizer_blk_exc));
// Msg442: size of optimizer block exceeded
2001-05-23 15:26:42 +02:00
}
for (SLONG i = 0; i < OPT_STREAM_BITS; i++) {
mask[i] = 0;
2001-05-23 15:26:42 +02:00
}
for (SLONG i = 0; i < count; i++, eq_class++)
{
if (*eq_class)
{
SET_DEP_BIT(mask, i);
DEV_BLKCHK(*eq_class, type_nod);
}
}
2001-05-23 15:26:42 +02:00
}
static jrd_nod* compose(jrd_nod** node1, jrd_nod* node2, nod_t node_type)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* c o m p o s e
2001-05-23 15:26:42 +02:00
*
**************************************
*
* Functional description
* Build and AND out of two conjuncts.
2001-05-23 15:26:42 +02:00
*
**************************************/
DEV_BLKCHK(*node1, type_nod);
DEV_BLKCHK(node2, type_nod);
2001-05-23 15:26:42 +02:00
if (!node2) {
return *node1;
}
2001-05-23 15:26:42 +02:00
if (!*node1) {
return (*node1 = node2);
2001-05-23 15:26:42 +02:00
}
return *node1 = OPT_make_binary_node(node_type, *node1, node2, false);
}
2001-05-23 15:26:42 +02:00
static bool check_for_nod_from(const jrd_nod* node)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* c h e c k _ f o r _ n o d _ f r o m
2001-05-23 15:26:42 +02:00
*
**************************************
*
* Functional description
* Check for nod_from under >=0 nod_cast nodes.
2001-05-23 15:26:42 +02:00
*
**************************************/
2004-02-02 12:02:12 +01:00
switch (node->nod_type)
{
case nod_from:
return true;
case nod_cast:
return check_for_nod_from(node->nod_arg[e_cast_source]);
default:
return false;
}
}
2001-05-23 15:26:42 +02:00
static SLONG decompose(thread_db* tdbb, jrd_nod* boolean_node, NodeStack& stack, CompilerScratch* csb)
{
/**************************************
*
* d e c o m p o s e
*
**************************************
*
* Functional description
* Decompose a boolean into a stack of conjuctions.
*
**************************************/
DEV_BLKCHK(boolean_node, type_nod);
DEV_BLKCHK(csb, type_csb);
if (boolean_node->nod_type == nod_and)
{
SLONG count = decompose(tdbb, boolean_node->nod_arg[0], stack, csb);
count += decompose(tdbb, boolean_node->nod_arg[1], stack, csb);
return count;
}
// turn a between into (a greater than or equal) AND (a less than or equal)
if (boolean_node->nod_type == nod_between)
{
jrd_nod* arg = boolean_node->nod_arg[0];
if (check_for_nod_from(arg))
{
// Without this ERR_punt(), server was crashing with sub queries
// under "between" predicate, Bug No. 73766
ERR_post(Arg::Gds(isc_optimizer_between_err));
// Msg 493: Unsupported field type specified in BETWEEN predicate
}
jrd_nod* node = OPT_make_binary_node(nod_geq, arg, boolean_node->nod_arg[1], true);
stack.push(node);
arg = CMP_clone_node_opt(tdbb, csb, arg);
node = OPT_make_binary_node(nod_leq, arg, boolean_node->nod_arg[2], true);
stack.push(node);
return 2;
}
// turn a LIKE into a LIKE and a STARTING WITH, if it starts
// with anything other than a pattern-matching character
2009-01-14 10:19:00 +01:00
jrd_nod* arg;
if (boolean_node->nod_type == nod_like && (arg = optimize_like(tdbb, csb, boolean_node)))
{
stack.push(OPT_make_binary_node(nod_starts, boolean_node->nod_arg[0], arg, false));
stack.push(boolean_node);
return 2;
}
if (boolean_node->nod_type == nod_or)
{
NodeStack or_stack;
if (decompose(tdbb, boolean_node->nod_arg[0], or_stack, csb) >= 2)
{
boolean_node->nod_arg[0] = or_stack.pop();
while (or_stack.hasData())
{
boolean_node->nod_arg[0] =
OPT_make_binary_node(nod_and, or_stack.pop(), boolean_node->nod_arg[0], true);
}
2003-09-02 18:39:58 +02:00
}
2001-05-23 15:26:42 +02:00
or_stack.clear();
if (decompose(tdbb, boolean_node->nod_arg[1], or_stack, csb) >= 2)
{
boolean_node->nod_arg[1] = or_stack.pop();
while (or_stack.hasData())
{
boolean_node->nod_arg[1] =
OPT_make_binary_node(nod_and, or_stack.pop(), boolean_node->nod_arg[1], true);
}
}
}
stack.push(boolean_node);
return 1;
2001-05-23 15:26:42 +02:00
}
static USHORT distribute_equalities(NodeStack& org_stack, CompilerScratch* csb, USHORT base_count)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* d i s t r i b u t e _ e q u a l i t i e s
2001-05-23 15:26:42 +02:00
*
**************************************
*
* Functional description
* Given a stack of conjunctions, generate some simple
* inferences. In general, find classes of equalities,
* then find operations based on members of those classes.
* If we find any, generate additional conjunctions. In
* short:
*
* If (a == b) and (a $ c) --> (b $ c) for any
* operation '$'.
2001-05-23 15:26:42 +02:00
*
**************************************/
Firebird::ObjectsArray<NodeStack> classes;
Firebird::ObjectsArray<NodeStack>::iterator eq_class;
2001-05-23 15:26:42 +02:00
DEV_BLKCHK(csb, type_csb);
2001-05-23 15:26:42 +02:00
// Zip thru stack of booleans looking for field equalities
2005-05-28 00:45:31 +02:00
for (NodeStack::iterator stack1(org_stack); stack1.hasData(); ++stack1)
2009-01-20 09:33:59 +01:00
{
jrd_nod* boolean = stack1.object();
if (boolean->nod_flags & nod_deoptimize)
continue;
if (boolean->nod_type != nod_eql)
continue;
jrd_nod* node1 = boolean->nod_arg[0];
if (node1->nod_type != nod_field)
continue;
jrd_nod* node2 = boolean->nod_arg[1];
if (node2->nod_type != nod_field)
continue;
for (eq_class = classes.begin(); eq_class != classes.end(); ++eq_class)
{
if (search_stack(node1, *eq_class))
{
augment_stack(node2, *eq_class);
break;
}
else if (search_stack(node2, *eq_class))
{
eq_class->push(node1);
break;
2003-09-02 18:39:58 +02:00
}
}
if (eq_class == classes.end())
{
NodeStack& s = classes.add();
s.push(node1);
s.push(node2);
eq_class = classes.back();
}
}
if (classes.getCount() == 0)
return 0;
2001-05-23 15:26:42 +02:00
// Make another pass looking for any equality relationships that may have crept
// in between classes (this could result from the sequence (A = B, C = D, B = C)
2001-05-23 15:26:42 +02:00
for (eq_class = classes.begin(); eq_class != classes.end(); ++eq_class)
{
for (NodeStack::const_iterator stack2(*eq_class); stack2.hasData(); ++stack2)
{
for (Firebird::ObjectsArray<NodeStack>::iterator eq_class2(eq_class);
++eq_class2 != classes.end();)
{
if (search_stack(stack2.object(), *eq_class2))
{
while (eq_class2->hasData()) {
augment_stack(eq_class2->pop(), *eq_class);
}
}
2003-09-02 18:39:58 +02:00
}
}
}
USHORT count = 0;
// Start by making a pass distributing field equalities
for (eq_class = classes.begin(); eq_class != classes.end(); ++eq_class)
{
if (eq_class->hasMore(2))
{
for (NodeStack::iterator outer(*eq_class); outer.hasData(); ++outer)
{
for (NodeStack::iterator inner(outer); (++inner).hasData(); )
{
jrd_nod* boolean =
OPT_make_binary_node(nod_eql, outer.object(), inner.object(), true);
if ((base_count + count < MAX_CONJUNCTS) && augment_stack(boolean, org_stack))
{
count++;
}
else
{
delete boolean;
}
}
}
}
}
// Now make a second pass looking for non-field equalities
for (NodeStack::iterator stack3(org_stack); stack3.hasData(); ++stack3)
{
jrd_nod* boolean = stack3.object();
if (boolean->nod_type != nod_eql &&
boolean->nod_type != nod_gtr &&
boolean->nod_type != nod_geq &&
boolean->nod_type != nod_leq &&
boolean->nod_type != nod_lss &&
boolean->nod_type != nod_matches &&
boolean->nod_type != nod_contains &&
boolean->nod_type != nod_like &&
boolean->nod_type != nod_similar)
{
continue;
}
const jrd_nod* node1 = boolean->nod_arg[0];
const jrd_nod* node2 = boolean->nod_arg[1];
bool reverse = false;
if (node1->nod_type != nod_field)
{
const jrd_nod* swap_node = node1;
node1 = node2;
node2 = swap_node;
reverse = true;
}
if (node1->nod_type != nod_field) {
continue;
}
if (node2->nod_type != nod_literal &&
node2->nod_type != nod_variable &&
node2->nod_type != nod_argument)
{
continue;
}
for (eq_class = classes.begin(); eq_class != classes.end(); ++eq_class)
{
if (search_stack(node1, *eq_class))
{
for (NodeStack::iterator temp(*eq_class); temp.hasData(); ++temp)
{
if (!node_equality(node1, temp.object()))
{
jrd_nod* arg1;
jrd_nod* arg2;
if (reverse)
{
arg1 = boolean->nod_arg[0];
arg2 = temp.object();
}
else
{
arg1 = temp.object();
arg2 = boolean->nod_arg[1];
}
// From the conjuncts X(A,B) and A=C, infer the conjunct X(C,B)
jrd_nod* new_node = make_inference_node(csb, boolean, arg1, arg2);
if ((base_count + count < MAX_CONJUNCTS) && augment_stack(new_node, org_stack))
{
count++;
}
}
}
break;
}
}
}
return count;
}
static void find_index_relationship_streams(thread_db* tdbb,
OptimizerBlk* opt,
const UCHAR* streams,
UCHAR* dependent_streams,
UCHAR* free_streams)
{
/**************************************
*
2010-01-14 08:13:38 +01:00
* f i n d _ i n d e x _ r e l a t i o n s h i p _ s t r e a m s
*
**************************************
*
* Functional description
2010-01-14 08:13:38 +01:00
* Find the streams that can use an index
* with the currently active streams.
*
**************************************/
DEV_BLKCHK(opt, type_opt);
SET_TDBB(tdbb);
2009-12-14 17:01:06 +01:00
CompilerScratch* const csb = opt->opt_csb;
const UCHAR* end_stream = streams + 1 + streams[0];
for (const UCHAR* stream = streams + 1; stream < end_stream; stream++)
2009-01-20 09:33:59 +01:00
{
2010-01-14 08:13:38 +01:00
CompilerScratch::csb_repeat* const csb_tail = &csb->csb_rpt[*stream];
// Set temporary active flag for this stream
csb_tail->csb_flags |= csb_active;
bool indexed_relationship = false;
2010-01-14 08:13:38 +01:00
if (opt->opt_conjuncts.getCount())
{
// Calculate the inversion for this stream.
// The returning candidate contains the streams that will be used for
// index retrieval. This meant that if some stream is used this stream
// depends on already active streams and can not be used in a separate
// SORT/MERGE.
2010-01-14 08:13:38 +01:00
OptimizerRetrieval optimizerRetrieval(*tdbb->getDefaultPool(), opt, *stream, false, false, NULL);
AutoPtr<InversionCandidate> candidate(optimizerRetrieval.getCost());
if (candidate->dependentFromStreams.hasData())
{
indexed_relationship = true;
}
}
2010-01-14 08:13:38 +01:00
if (indexed_relationship)
{
dependent_streams[++dependent_streams[0]] = *stream;
}
2010-01-14 08:13:38 +01:00
else
{
free_streams[++free_streams[0]] = *stream;
}
// Reset active flag
csb_tail->csb_flags &= ~csb_active;
}
}
2001-05-23 15:26:42 +02:00
static void form_rivers(thread_db* tdbb,
OptimizerBlk* opt,
2004-12-07 01:33:16 +01:00
const UCHAR* streams,
RiverList& river_list,
SortNode** sort_clause,
2010-08-29 22:20:19 +02:00
PlanNode* plan_clause)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* f o r m _ r i v e r s
*
**************************************
*
* Functional description
* Form streams into rivers according
* to the user-specified plan.
*
**************************************/
SET_TDBB(tdbb);
DEV_BLKCHK(opt, type_opt);
2003-09-02 18:39:58 +02:00
2004-01-13 10:52:19 +01:00
stream_array_t temp;
2001-05-23 15:26:42 +02:00
temp[0] = 0;
2003-09-02 18:39:58 +02:00
// this must be a join or a merge node, so go through
// the substreams and place them into the temp vector
// for formation into a river.
2010-08-29 22:20:19 +02:00
PlanNode* plan_node = NULL;
NestConst<PlanNode>* ptr = plan_clause->subNodes.begin();
for (const NestConst<PlanNode>* const end = plan_clause->subNodes.end(); ptr != end; ++ptr)
2009-06-21 12:34:33 +02:00
{
2001-05-23 15:26:42 +02:00
plan_node = *ptr;
2010-08-29 22:20:19 +02:00
if (plan_node->type == PlanNode::TYPE_JOIN)
2009-06-21 12:34:33 +02:00
{
form_rivers(tdbb, opt, streams, river_list, sort_clause, plan_node);
2001-05-23 15:26:42 +02:00
continue;
}
2003-09-02 18:39:58 +02:00
// at this point we must have a retrieval node, so put
// the stream into the river.
2010-08-29 22:20:19 +02:00
fb_assert(plan_node->type == PlanNode::TYPE_RETRIEVE);
2010-08-29 22:20:19 +02:00
const UCHAR stream = plan_node->relationNode->getStream();
// dimitr: the plan may contain more retrievals than the "streams"
// array (some streams could already be joined to the active
// rivers), so we populate the "temp" array only with the
// streams that appear in both the plan and the "streams"
// array.
2010-08-29 22:20:19 +02:00
const UCHAR* ptr_stream = streams + 1;
const UCHAR* const end_stream = ptr_stream + streams[0];
2010-08-29 22:20:19 +02:00
2009-06-21 12:34:33 +02:00
while (ptr_stream < end_stream)
{
if (*ptr_stream++ == stream)
{
temp[0]++;
temp[temp[0]] = stream;
break;
}
}
2001-05-23 15:26:42 +02:00
}
2005-05-28 00:45:31 +02:00
// just because the user specified a join does not mean that
2003-09-02 18:39:58 +02:00
// we are able to form a river; thus form as many rivers out
// of the join are as necessary to exhaust the streams.
// AB: Only form rivers when any retrieval node is seen, for
// example a MERGE on two JOINs will come with no retrievals
// at this point.
2004-01-13 10:52:19 +01:00
// CVC: Notice "plan_node" is pointing to the last element in the loop above.
// If the loop didn't execute, we had garbage in "plan_node".
2009-06-21 12:34:33 +02:00
if (temp[0] != 0)
{
OptimizerInnerJoin* const innerJoin = FB_NEW(*tdbb->getDefaultPool())
OptimizerInnerJoin(*tdbb->getDefaultPool(), opt, temp, sort_clause, plan_clause);
2010-08-29 22:20:19 +02:00
USHORT count;
do {
count = innerJoin->findJoinOrder();
} while (form_river(tdbb, opt, count, streams[0], temp, river_list, sort_clause));
delete innerJoin;
}
}
2001-05-23 15:26:42 +02:00
static bool form_river(thread_db* tdbb,
OptimizerBlk* opt,
USHORT count,
USHORT stream_count,
UCHAR* temp,
RiverList& river_list,
SortNode** sort_clause)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* f o r m _ r i v e r
*
**************************************
*
* Functional description
2005-05-28 00:45:31 +02:00
* Form streams into rivers (combinations of streams).
2001-05-23 15:26:42 +02:00
*
**************************************/
fb_assert(count);
2001-05-23 15:26:42 +02:00
DEV_BLKCHK(opt, type_opt);
DEV_BLKCHK(plan_clause, type_nod);
2001-05-23 15:26:42 +02:00
SET_TDBB(tdbb);
2009-12-14 17:01:06 +01:00
CompilerScratch* const csb = opt->opt_csb;
HalfStaticArray<RecordSource*, OPT_STATIC_ITEMS> rsbs;
rsbs.resize(count);
RecordSource** ptr = rsbs.begin();
2010-01-26 09:20:27 +01:00
StreamList streams;
streams.resize(count);
UCHAR* stream = streams.begin();
if (count != stream_count)
{
sort_clause = NULL;
}
const OptimizerBlk::opt_stream* const opt_end = opt->opt_streams.begin() + count;
for (OptimizerBlk::opt_stream* tail = opt->opt_streams.begin();
2009-12-10 01:02:01 +01:00
tail < opt_end; tail++, stream++, ptr++)
2009-06-21 12:34:33 +02:00
{
2001-05-23 15:26:42 +02:00
*stream = (UCHAR) tail->opt_best_stream;
*ptr = gen_retrieval(tdbb, opt, *stream, sort_clause, false, false, NULL);
sort_clause = NULL;
2001-05-23 15:26:42 +02:00
}
RecordSource* const rsb = (count == 1) ? rsbs[0] :
FB_NEW(*tdbb->getDefaultPool()) NestedLoopJoin(csb, count, rsbs.begin());
// Allocate a river block and move the best order into it
River* const river = FB_NEW(*tdbb->getDefaultPool()) River(csb, rsb, NULL, count, streams.begin());
river->deactivate(csb);
river_list.push(river);
stream = temp + 1;
const UCHAR* const end_stream = stream + temp[0];
if (!(temp[0] -= count))
{
return false;
}
// Reform "temp" from streams not consumed
2009-06-21 12:34:33 +02:00
for (const UCHAR* t2 = stream; t2 < end_stream; t2++)
{
bool used = false;
for (OptimizerBlk::opt_stream* tail = opt->opt_streams.begin(); tail < opt_end; tail++)
2009-06-21 12:34:33 +02:00
{
if (*t2 == tail->opt_best_stream)
{
used = true;
break;
}
}
if (!used)
{
*stream++ = *t2;
}
2001-05-23 15:26:42 +02:00
}
2003-09-02 18:39:58 +02:00
return true;
2001-05-23 15:26:42 +02:00
}
// Generate a separate AggregateSort (Aggregate SortedStream Block) for each distinct operation.
// Note that this should be optimized to use indices if possible.
void OPT_gen_aggregate_distincts(thread_db* tdbb, CompilerScratch* csb, MapNode* map)
{
2009-11-02 15:42:27 +01:00
DSC descriptor;
DSC* desc = &descriptor;
NestConst<jrd_nod>* ptr = map->items.begin();
for (const NestConst<jrd_nod>* const end = map->items.end(); ptr != end; ++ptr)
{
2004-01-13 10:52:19 +01:00
jrd_nod* from = (*ptr)->nod_arg[e_asgn_from];
AggNode* aggNode = ExprNode::as<AggNode>(from);
if (aggNode && aggNode->distinct)
{
// Build the sort key definition. Turn cstrings into varying text.
CMP_get_desc(tdbb, csb, aggNode->arg, desc);
2009-06-21 12:34:33 +02:00
if (desc->dsc_dtype == dtype_cstring)
{
desc->dsc_dtype = dtype_varying;
desc->dsc_length++;
2001-05-23 15:26:42 +02:00
}
2010-04-07 18:32:12 +02:00
AggregateSort* asb = FB_NEW(*tdbb->getDefaultPool()) AggregateSort(
*tdbb->getDefaultPool());
asb->intl = desc->isText() && desc->getTextType() != ttype_none &&
desc->getTextType() != ttype_binary && desc->getTextType() != ttype_ascii;
2010-04-07 18:32:12 +02:00
sort_key_def* sort_key = asb->keyItems.getBuffer(asb->intl ? 2 : 1);
2001-05-23 15:26:42 +02:00
sort_key->skd_offset = 0;
2010-04-07 18:32:12 +02:00
if (asb->intl)
{
2008-12-05 02:20:14 +01:00
const USHORT key_length = ROUNDUP(INTL_key_length(tdbb,
INTL_TEXT_TO_INDEX(desc->getTextType()), desc->getStringLength()), sizeof(SINT64));
sort_key->skd_dtype = SKD_bytes;
sort_key->skd_flags = SKD_ascending;
sort_key->skd_length = key_length;
sort_key->skd_offset = 0;
sort_key->skd_vary_offset = 0;
++sort_key;
2010-04-07 18:32:12 +02:00
asb->length = sort_key->skd_offset = key_length;
}
2003-11-04 00:59:24 +01:00
fb_assert(desc->dsc_dtype < FB_NELEM(sort_dtypes));
2001-05-23 15:26:42 +02:00
sort_key->skd_dtype = sort_dtypes[desc->dsc_dtype];
if (!sort_key->skd_dtype)
{
ERR_post(Arg::Gds(isc_invalid_sort_datatype) << Arg::Str(DSC_dtype_tostring(desc->dsc_dtype)));
}
2001-05-23 15:26:42 +02:00
sort_key->skd_length = desc->dsc_length;
if (desc->dsc_dtype == dtype_varying)
{
// allocate space to store varying length
sort_key->skd_vary_offset = sort_key->skd_offset + ROUNDUP(desc->dsc_length, sizeof(SLONG));
2010-04-07 18:32:12 +02:00
asb->length = sort_key->skd_vary_offset + sizeof(USHORT);
}
else
2010-04-07 18:32:12 +02:00
asb->length += sort_key->skd_length;
2001-05-23 15:26:42 +02:00
sort_key->skd_flags = SKD_ascending;
2010-04-07 18:32:12 +02:00
asb->impure = CMP_impure(csb, sizeof(impure_agg_sort));
asb->desc = *desc;
aggNode->asb = asb;
2001-05-23 15:26:42 +02:00
}
}
}
static void gen_join(thread_db* tdbb,
OptimizerBlk* opt,
2008-02-04 18:01:36 +01:00
const UCHAR* streams,
RiverList& river_list,
SortNode** sort_clause,
2010-08-29 22:20:19 +02:00
PlanNode* plan_clause)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* g e n _ j o i n
*
**************************************
*
* Functional description
2005-05-28 00:45:31 +02:00
* Find all indexed relationships between streams,
* then form streams into rivers (combinations of
* streams).
2001-05-23 15:26:42 +02:00
*
**************************************/
DEV_BLKCHK(opt, type_opt);
SET_TDBB(tdbb);
if (!streams[0])
2001-05-23 15:26:42 +02:00
return;
if (plan_clause && streams[0] > 1)
2009-06-21 12:34:33 +02:00
{
// this routine expects a join/merge
form_rivers(tdbb, opt, streams, river_list, sort_clause, plan_clause);
2001-05-23 15:26:42 +02:00
return;
}
OptimizerInnerJoin* const innerJoin = FB_NEW(*tdbb->getDefaultPool())
OptimizerInnerJoin(*tdbb->getDefaultPool(), opt, streams, sort_clause, plan_clause);
2003-09-02 18:39:58 +02:00
stream_array_t temp;
memcpy(temp, streams, streams[0] + 1);
2001-05-23 15:26:42 +02:00
USHORT count;
do {
count = innerJoin->findJoinOrder();
} while (form_river(tdbb, opt, count, streams[0], temp, river_list, sort_clause));
delete innerJoin;
2001-05-23 15:26:42 +02:00
}
static RecordSource* gen_outer(thread_db* tdbb, OptimizerBlk* opt, RseNode* rse,
RiverList& river_list, SortNode** sort_clause)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* g e n _ o u t e r
*
**************************************
*
* Functional description
* Generate a top level outer join. The "outer" and "inner"
* sub-streams must be handled differently from each other.
* The inner is like other streams. The outer stream isn't
* because conjuncts may not eliminate records from the
* stream. They only determine if a join with an inner
* stream record is to be attempted.
*
**************************************/
struct {
RecordSource* stream_rsb;
2001-05-23 15:26:42 +02:00
USHORT stream_num;
} stream_o, stream_i, *stream_ptr[2];
2005-05-28 00:45:31 +02:00
2001-05-23 15:26:42 +02:00
DEV_BLKCHK(opt, type_opt);
DEV_BLKCHK(rse, type_nod);
SET_TDBB(tdbb);
2003-09-02 18:39:58 +02:00
// Determine which stream should be outer and which is inner.
2005-05-28 00:45:31 +02:00
// In the case of a left join, the syntactically left stream is the
// outer, and the right stream is the inner. For all others, swap
// the sense of inner and outer, though for a full join it doesn't
// matter and we should probably try both orders to see which is
2003-09-02 18:39:58 +02:00
// more efficient.
2009-06-21 12:34:33 +02:00
if (rse->rse_jointype != blr_left)
{
2001-05-23 15:26:42 +02:00
stream_ptr[1] = &stream_o;
stream_ptr[0] = &stream_i;
}
2009-06-21 12:34:33 +02:00
else
{
2001-05-23 15:26:42 +02:00
stream_ptr[0] = &stream_o;
stream_ptr[1] = &stream_i;
}
2003-09-02 18:39:58 +02:00
// Loop through the outer join sub-streams in
// reverse order because rivers may have been PUSHed
for (int i = 1; i >= 0; i--)
2009-06-21 12:34:33 +02:00
{
const RecordSourceNode* node = rse->rse_relations[i];
if (node->type == RelationSourceNode::TYPE)
{
stream_ptr[i]->stream_rsb = NULL;
stream_ptr[i]->stream_num = node->getStream();
2001-05-23 15:26:42 +02:00
}
2009-06-21 12:34:33 +02:00
else
{
River* const river = river_list.pop();
stream_ptr[i]->stream_rsb = river->getRecordSource();
}
2001-05-23 15:26:42 +02:00
}
CompilerScratch* const csb = opt->opt_csb;
const bool isFullJoin = (rse->rse_jointype == blr_full);
if (!isFullJoin)
{
// Generate rsbs for the sub-streams.
// For the left sub-stream we also will get a boolean back.
jrd_nod* boolean = NULL;
if (!stream_o.stream_rsb)
{
stream_o.stream_rsb = gen_retrieval(tdbb, opt, stream_o.stream_num, sort_clause,
true, false, &boolean);
}
if (!stream_i.stream_rsb)
{
// AB: the sort clause for the inner stream of an OUTER JOIN
// should never be used for the index retrieval
stream_i.stream_rsb =
gen_retrieval(tdbb, opt, stream_i.stream_num, NULL, false, true, NULL);
}
// generate a parent boolean rsb for any remaining booleans that
// were not satisfied via an index lookup
stream_i.stream_rsb = gen_residual_boolean(tdbb, opt, stream_i.stream_rsb);
// Allocate and fill in the rsb
return FB_NEW(*tdbb->getDefaultPool())
NestedLoopJoin(csb, stream_o.stream_rsb, stream_i.stream_rsb, boolean, false, false);
}
bool hasOuterRsb = true, hasInnerRsb = true;
2004-01-13 10:52:19 +01:00
jrd_nod* boolean = NULL;
2009-06-21 12:34:33 +02:00
if (!stream_o.stream_rsb)
{
hasOuterRsb = false;
stream_o.stream_rsb =
gen_retrieval(tdbb, opt, stream_o.stream_num, NULL, true, false, &boolean);
}
2003-09-02 18:39:58 +02:00
2001-05-23 15:26:42 +02:00
if (!stream_i.stream_rsb)
{
hasInnerRsb = false;
stream_i.stream_rsb =
gen_retrieval(tdbb, opt, stream_i.stream_num, NULL, false, true, NULL);
}
2003-09-02 18:39:58 +02:00
stream_i.stream_rsb = gen_residual_boolean(tdbb, opt, stream_i.stream_rsb);
RecordSource* const rsb1 = FB_NEW(*tdbb->getDefaultPool())
NestedLoopJoin(csb, stream_o.stream_rsb, stream_i.stream_rsb, boolean, false, false);
for (size_t i = 0; i < opt->opt_conjuncts.getCount(); i++)
{
if (opt->opt_conjuncts[i].opt_conjunct_flags & opt_conjunct_used)
{
jrd_nod* const org_node = opt->opt_conjuncts[i].opt_conjunct_node;
opt->opt_conjuncts[i].opt_conjunct_node = CMP_clone_node_opt(tdbb, csb, org_node);
opt->opt_conjuncts[i].opt_conjunct_flags = 0;
}
}
if (!hasInnerRsb)
{
csb->csb_rpt[stream_i.stream_num].csb_flags &= ~csb_active;
2003-09-02 18:39:58 +02:00
}
if (!hasOuterRsb)
{
csb->csb_rpt[stream_o.stream_num].csb_flags &= ~csb_active;
}
if (!hasInnerRsb)
{
stream_i.stream_rsb =
gen_retrieval(tdbb, opt, stream_i.stream_num, NULL, false, false, NULL);
}
if (!hasOuterRsb)
{
stream_o.stream_rsb =
gen_retrieval(tdbb, opt, stream_o.stream_num, NULL, false, false, NULL);
}
stream_o.stream_rsb = gen_residual_boolean(tdbb, opt, stream_o.stream_rsb);
RecordSource* const rsb2 = FB_NEW(*tdbb->getDefaultPool())
NestedLoopJoin(csb, stream_i.stream_rsb, stream_o.stream_rsb, NULL, false, true);
return FB_NEW(*tdbb->getDefaultPool()) FullOuterJoin(csb, rsb1, rsb2);
2001-05-23 15:26:42 +02:00
}
2009-11-25 09:38:52 +01:00
static RecordSource* gen_residual_boolean(thread_db* tdbb, OptimizerBlk* opt, RecordSource* prior_rsb)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* g e n _ r e s i d u a l _ b o o l e a n
*
**************************************
*
* Functional description
* Pick up any residual boolean remaining,
2005-05-28 00:45:31 +02:00
* meaning those that have not been used
2001-05-23 15:26:42 +02:00
* as part of some join. These booleans
* must still be applied to the result stream.
*
**************************************/
SET_TDBB(tdbb);
DEV_BLKCHK(opt, type_opt);
DEV_BLKCHK(prior_rsb, type_rsb);
2003-09-02 18:39:58 +02:00
jrd_nod* boolean = NULL;
const OptimizerBlk::opt_conjunct* const opt_end =
opt->opt_conjuncts.begin() + opt->opt_base_conjuncts;
for (OptimizerBlk::opt_conjunct* tail = opt->opt_conjuncts.begin(); tail < opt_end; tail++)
{
jrd_nod* node = tail->opt_conjunct_node;
if (!(tail->opt_conjunct_flags & opt_conjunct_used))
{
2001-05-23 15:26:42 +02:00
compose(&boolean, node, nod_and);
2003-10-17 22:29:52 +02:00
tail->opt_conjunct_flags |= opt_conjunct_used;
2001-05-23 15:26:42 +02:00
}
}
2009-12-14 17:01:06 +01:00
return boolean ? FB_NEW(*tdbb->getDefaultPool())
FilteredStream(opt->opt_csb, prior_rsb, boolean) : prior_rsb;
2001-05-23 15:26:42 +02:00
}
static RecordSource* gen_retrieval(thread_db* tdbb,
OptimizerBlk* opt,
SSHORT stream,
SortNode** sort_ptr,
bool outer_flag,
bool inner_flag,
2004-01-13 10:52:19 +01:00
jrd_nod** return_boolean)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* g e n _ r e t r i e v a l
*
**************************************
*
* Functional description
* Compile and optimize a record selection expression into a
* set of record source blocks (rsb's).
*
**************************************/
OptimizerBlk::opt_conjunct* tail;
2001-05-23 15:26:42 +02:00
SET_TDBB(tdbb);
2001-05-23 15:26:42 +02:00
DEV_BLKCHK(opt, type_opt);
if (return_boolean)
2001-05-23 15:26:42 +02:00
DEV_BLKCHK(*return_boolean, type_nod);
CompilerScratch* const csb = opt->opt_csb;
CompilerScratch::csb_repeat* const csb_tail = &csb->csb_rpt[stream];
jrd_rel* const relation = csb_tail->csb_relation;
2003-11-04 00:59:24 +01:00
fb_assert(relation);
const string alias = OPT_make_alias(tdbb, csb, csb_tail);
2001-05-23 15:26:42 +02:00
csb_tail->csb_flags |= csb_active;
2008-02-04 18:01:36 +01:00
// Time to find inversions. For each index on the relation
// match all unused booleans against the index looking for upper
// and lower bounds that can be computed by the index. When
// all unused conjunctions are exhausted, see if there is enough
// information for an index retrieval. If so, build up an
// inversion component of the boolean.
// It's recalculated later.
const OptimizerBlk::opt_conjunct* opt_end = opt->opt_conjuncts.begin() +
2008-12-22 10:00:05 +01:00
(inner_flag ? opt->opt_base_missing_conjuncts : opt->opt_conjuncts.getCount());
RecordSource* rsb = NULL;
IndexTableScan* nav_rsb = NULL;
jrd_nod* inversion = NULL;
if (relation->rel_file)
2009-06-21 12:34:33 +02:00
{
// External table
rsb = FB_NEW(*tdbb->getDefaultPool()) ExternalTableScan(csb, alias, stream);
}
2009-06-21 12:34:33 +02:00
else if (relation->isVirtual())
{
// Virtual table: monitoring or security
if (relation->rel_id == rel_sec_users)
{
rsb = FB_NEW(*tdbb->getDefaultPool()) UsersTableScan(csb, alias, stream);
}
else
{
rsb = FB_NEW(*tdbb->getDefaultPool()) MonitoringTableScan(csb, alias, stream);
}
}
else
{
// Persistent table
2009-12-13 11:41:53 +01:00
OptimizerRetrieval optimizerRetrieval(*tdbb->getDefaultPool(),
opt, stream, outer_flag, inner_flag, sort_ptr);
AutoPtr<InversionCandidate> candidate(optimizerRetrieval.getInversion(&nav_rsb));
if (candidate && candidate->inversion)
{
inversion = candidate->inversion;
}
2001-05-23 15:26:42 +02:00
}
2009-06-21 12:34:33 +02:00
if (outer_flag)
{
fb_assert(return_boolean);
// Now make another pass thru the outer conjuncts only, finding unused,
// computable booleans. When one is found, roll it into a final
// boolean and mark it used.
2001-05-23 15:26:42 +02:00
*return_boolean = NULL;
2003-10-17 22:29:52 +02:00
opt_end = opt->opt_conjuncts.begin() + opt->opt_base_conjuncts;
2009-06-21 12:34:33 +02:00
for (tail = opt->opt_conjuncts.begin(); tail < opt_end; tail++)
{
jrd_nod* node = tail->opt_conjunct_node;
if (!(tail->opt_conjunct_flags & opt_conjunct_used) &&
OPT_computable(csb, node, -1, false, false))
{
2001-05-23 15:26:42 +02:00
compose(return_boolean, node, nod_and);
2003-10-17 22:29:52 +02:00
tail->opt_conjunct_flags |= opt_conjunct_used;
2001-05-23 15:26:42 +02:00
}
}
}
// Now make another pass thru the conjuncts finding unused, computable
// booleans. When one is found, roll it into a final boolean and mark
// it used. If a computable boolean didn't match against an index then
// mark the stream to denote unmatched booleans.
2009-12-14 17:01:06 +01:00
jrd_nod* boolean = NULL;
opt_end = opt->opt_conjuncts.begin() + (inner_flag ? opt->opt_base_missing_conjuncts : opt->opt_conjuncts.getCount());
2003-10-17 22:29:52 +02:00
tail = opt->opt_conjuncts.begin();
if (outer_flag)
{
tail += opt->opt_base_parent_conjuncts;
}
2003-09-02 18:39:58 +02:00
for (; tail < opt_end; tail++)
{
jrd_nod* const node = tail->opt_conjunct_node;
2008-12-18 12:29:10 +01:00
if (!(tail->opt_conjunct_flags & opt_conjunct_used) &&
OPT_computable(csb, node, -1, false, false))
{
2005-05-28 00:45:31 +02:00
// If no index is used then leave other nodes alone, because they
// could be used for building a SORT/MERGE.
if ((inversion && StreamFinder::find(csb, stream, node)) ||
(!inversion && OPT_computable(csb, node, stream, false, true)))
{
2009-12-14 17:01:06 +01:00
compose(&boolean, node, nod_and);
tail->opt_conjunct_flags |= opt_conjunct_used;
2005-05-28 00:45:31 +02:00
2008-12-22 10:00:05 +01:00
if (!outer_flag && !(tail->opt_conjunct_flags & opt_conjunct_matched))
{
csb_tail->csb_flags |= csb_unmatched;
}
2005-05-28 00:45:31 +02:00
}
2001-05-23 15:26:42 +02:00
}
}
if (nav_rsb)
2009-06-21 12:34:33 +02:00
{
nav_rsb->setInversion(inversion);
fb_assert(!rsb);
rsb = nav_rsb;
2003-09-02 18:39:58 +02:00
}
2008-01-16 10:41:31 +01:00
if (!rsb)
2009-06-21 12:34:33 +02:00
{
if (inversion)
{
rsb = FB_NEW(*tdbb->getDefaultPool()) BitmapTableScan(csb, alias, stream, inversion);
2001-05-23 15:26:42 +02:00
}
2009-06-21 12:34:33 +02:00
else
{
rsb = FB_NEW(*tdbb->getDefaultPool()) FullTableScan(csb, alias, stream);
2001-05-23 15:26:42 +02:00
if (boolean)
{
csb->csb_rpt[stream].csb_flags |= csb_unmatched;
}
2001-05-23 15:26:42 +02:00
}
}
2009-12-14 17:01:06 +01:00
return boolean ? FB_NEW(*tdbb->getDefaultPool()) FilteredStream(csb, rsb, boolean) : rsb;
2002-07-01 18:59:09 +02:00
}
2001-05-23 15:26:42 +02:00
SortedStream* OPT_gen_sort(thread_db* tdbb, CompilerScratch* csb, const UCHAR* streams,
const UCHAR* dbkey_streams, RecordSource* prior_rsb, SortNode* sort, bool project_flag)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* g e n _ s o r t
*
**************************************
*
* Functional description
* Generate a record source block to handle either a sort or a project.
* The two case are virtual identical -- the only difference is that
* project eliminates duplicates. However, since duplicates are
* recognized and handled by sort, the JRD processing is identical.
*
**************************************/
DEV_BLKCHK(prior_rsb, type_rsb);
SET_TDBB(tdbb);
2009-08-23 03:27:46 +02:00
2009-08-20 12:19:02 +02:00
/* We already know the number of keys, but we also need to compute the
total number of fields, keys and non-keys, to be pumped thru sort. Starting
with the number of keys, count the other field referenced. Since a field
is often a key, check for overlap to keep the length of the sort record
down. */
2009-08-23 03:27:46 +02:00
2001-05-23 15:26:42 +02:00
/* Along with the record number, the transaction id of the
* record will also be stored in the sort file. This will
2005-05-28 00:45:31 +02:00
* be used to detect update conflict in read committed
2009-08-20 12:19:02 +02:00
* transactions. */
2004-11-30 07:18:39 +01:00
const UCHAR* ptr;
dsc descriptor;
ULONG items = sort->expressions.getCount() +
(streams[0] * 3) + 2 * (dbkey_streams ? dbkey_streams[0] : 0);
const UCHAR* const end_ptr = streams + streams[0];
const NestConst<jrd_nod>* const end_node = sort->expressions.end();
HalfStaticArray<SLONG, OPT_STATIC_ITEMS> id_list;
StreamList stream_list;
2005-05-28 00:45:31 +02:00
2009-06-21 12:34:33 +02:00
for (ptr = &streams[1]; ptr <= end_ptr; ptr++)
{
UInt32Bitmap::Accessor accessor(csb->csb_rpt[*ptr].csb_fields);
2008-12-05 02:20:14 +01:00
if (accessor.getFirst())
2009-12-12 21:36:56 +01:00
{
do
{
2006-04-06 10:18:53 +02:00
const ULONG id = accessor.current();
items++;
id_list.push(id);
stream_list.push(*ptr);
for (NestConst<jrd_nod>* node_ptr = sort->expressions.begin();
node_ptr != end_node;
++node_ptr)
{
NestConst<jrd_nod> node = *node_ptr;
2008-12-18 12:29:10 +01:00
if (node->nod_type == nod_field &&
(USHORT)(IPTR) node->nod_arg[e_fld_stream] == *ptr &&
(USHORT)(IPTR) node->nod_arg[e_fld_id] == id)
2006-04-06 10:18:53 +02:00
{
dsc* desc = &descriptor;
CMP_get_desc(tdbb, csb, node, desc);
2009-08-20 12:19:02 +02:00
// International type text has a computed key
2006-04-06 10:18:53 +02:00
if (IS_INTL_DATA(desc))
break;
2006-04-06 10:18:53 +02:00
--items;
id_list.pop();
stream_list.pop();
2001-05-23 15:26:42 +02:00
break;
2006-04-06 10:18:53 +02:00
}
2001-05-23 15:26:42 +02:00
}
2006-04-06 10:18:53 +02:00
} while (accessor.getNext());
2009-12-12 21:36:56 +01:00
}
2001-05-23 15:26:42 +02:00
}
2006-09-12 18:31:38 +02:00
if (items > MAX_USHORT)
ERR_post(Arg::Gds(isc_imp_exc));
2006-09-12 18:31:38 +02:00
2009-12-12 21:36:56 +01:00
// Now that we know the number of items, allocate a sort map block.
2009-12-13 11:41:53 +01:00
SortedStream::SortMap* map =
FB_NEW(*tdbb->getDefaultPool()) SortedStream::SortMap(*tdbb->getDefaultPool());
if (project_flag)
2009-12-12 21:36:56 +01:00
map->flags |= SortedStream::FLAG_PROJECT;
if (sort->unique)
2009-12-12 21:36:56 +01:00
map->flags |= SortedStream::FLAG_UNIQUE;
2003-09-02 18:39:58 +02:00
ULONG map_length = 0;
2003-09-02 18:39:58 +02:00
// Loop thru sort keys building sort keys. Actually, to handle null values
// correctly, two sort keys are made for each field, one for the null flag
// and one for field itself.
2009-12-12 21:36:56 +01:00
SortedStream::SortMap::Item* map_item = map->items.getBuffer((USHORT) items);
sort_key_def* sort_key = map->keyItems.getBuffer(2 * sort->expressions.getCount());
int* nullOrder = sort->nullOrder.begin();
bool* descending = sort->descending.begin();
2009-12-12 21:36:56 +01:00
for (NestConst<jrd_nod>* node_ptr = sort->expressions.begin();
node_ptr != end_node;
++node_ptr, ++nullOrder, ++descending, ++map_item)
{
2009-08-20 12:19:02 +02:00
// Pick up sort key expression.
2001-05-23 15:26:42 +02:00
NestConst<jrd_nod> node = *node_ptr;
dsc* desc = &descriptor;
2001-05-23 15:26:42 +02:00
CMP_get_desc(tdbb, csb, node, desc);
2009-08-23 03:27:46 +02:00
2009-08-20 12:19:02 +02:00
// Allow for "key" forms of International text to grow
2009-06-21 12:34:33 +02:00
if (IS_INTL_DATA(desc))
{
2009-08-20 12:19:02 +02:00
// Turn varying text and cstrings into text.
2001-05-23 15:26:42 +02:00
2009-06-21 12:34:33 +02:00
if (desc->dsc_dtype == dtype_varying)
{
2001-05-23 15:26:42 +02:00
desc->dsc_dtype = dtype_text;
desc->dsc_length -= sizeof(USHORT);
}
2009-06-21 12:34:33 +02:00
else if (desc->dsc_dtype == dtype_cstring)
{
2001-05-23 15:26:42 +02:00
desc->dsc_dtype = dtype_text;
desc->dsc_length--;
}
2008-12-22 10:00:05 +01:00
desc->dsc_length = INTL_key_length(tdbb, INTL_INDEX_TYPE(desc), desc->dsc_length);
2004-12-24 09:52:39 +01:00
}
2001-05-23 15:26:42 +02:00
2009-08-20 12:19:02 +02:00
// Make key for null flag
2001-05-23 15:26:42 +02:00
#ifndef WORDS_BIGENDIAN
2001-05-23 15:26:42 +02:00
map_length = ROUNDUP(map_length, sizeof(SLONG));
#endif
2010-01-06 15:16:21 +01:00
const USHORT flag_offset = (USHORT) map_length++;
sort_key->skd_offset = flag_offset;
sort_key->skd_dtype = SKD_text;
2001-05-23 15:26:42 +02:00
sort_key->skd_length = 1;
// Handle nulls placement
2001-05-23 15:26:42 +02:00
sort_key->skd_flags = SKD_ascending;
// Have SQL-compliant nulls ordering for ODS11+
if ((*nullOrder == rse_nulls_default && !*descending) || *nullOrder == rse_nulls_first)
sort_key->skd_flags |= SKD_descending;
2001-05-23 15:26:42 +02:00
++sort_key;
2009-08-20 12:19:02 +02:00
// Make key for sort key proper
#ifndef WORDS_BIGENDIAN
2001-05-23 15:26:42 +02:00
map_length = ROUNDUP(map_length, sizeof(SLONG));
#else
if (desc->dsc_dtype >= dtype_aligned)
2008-12-22 10:00:05 +01:00
map_length = FB_ALIGN(map_length, type_alignments[desc->dsc_dtype]);
2001-05-23 15:26:42 +02:00
#endif
2001-05-23 15:26:42 +02:00
sort_key->skd_offset = (USHORT) map_length;
sort_key->skd_flags = SKD_ascending;
if (*descending)
2001-05-23 15:26:42 +02:00
sort_key->skd_flags |= SKD_descending;
2003-11-04 00:59:24 +01:00
fb_assert(desc->dsc_dtype < FB_NELEM(sort_dtypes));
2001-05-23 15:26:42 +02:00
sort_key->skd_dtype = sort_dtypes[desc->dsc_dtype];
if (!sort_key->skd_dtype)
ERR_post(Arg::Gds(isc_invalid_sort_datatype) << Arg::Str(DSC_dtype_tostring(desc->dsc_dtype)));
2008-12-22 10:00:05 +01:00
if (sort_key->skd_dtype == SKD_varying || sort_key->skd_dtype == SKD_cstring)
{
if (desc->dsc_ttype() == ttype_binary)
2001-05-23 15:26:42 +02:00
sort_key->skd_flags |= SKD_binary;
}
2001-05-23 15:26:42 +02:00
sort_key->skd_length = desc->dsc_length;
++sort_key;
2009-12-12 21:36:56 +01:00
map_item->clear();
map_item->node = node;
2010-01-06 15:16:21 +01:00
map_item->flagOffset = flag_offset;
2009-12-12 21:36:56 +01:00
map_item->desc = *desc;
map_item->desc.dsc_address = (UCHAR*)(IPTR) map_length;
2001-05-23 15:26:42 +02:00
map_length += desc->dsc_length;
2009-06-21 12:34:33 +02:00
if (node->nod_type == nod_field)
{
2009-12-12 21:36:56 +01:00
map_item->stream = (USHORT)(IPTR) node->nod_arg[e_fld_stream];
map_item->fieldId = (USHORT)(IPTR) node->nod_arg[e_fld_id];
2001-05-23 15:26:42 +02:00
}
}
map_length = ROUNDUP(map_length, sizeof(SLONG));
2009-12-12 21:36:56 +01:00
map->keyLength = (USHORT) map_length >> SHIFTLONG;
USHORT flag_offset = (USHORT) map_length;
map_length += items - sort->expressions.getCount();
2009-08-20 12:19:02 +02:00
// Now go back and process all to fields involved with the sort. If the
// field has already been mentioned as a sort key, don't bother to repeat it.
while (stream_list.hasData())
2009-06-21 12:34:33 +02:00
{
const SLONG id = id_list.pop();
const UCHAR stream = stream_list.pop();
2004-03-30 06:10:52 +02:00
const Format* format = CMP_format(tdbb, csb, stream);
const dsc* desc = &format->fmt_desc[id];
if (id >= format->fmt_count || desc->dsc_dtype == dtype_unknown)
2009-08-20 12:19:02 +02:00
IBERROR(157); // msg 157 cannot sort on a field that does not exist
2001-05-23 15:26:42 +02:00
if (desc->dsc_dtype >= dtype_aligned)
2008-12-22 10:00:05 +01:00
map_length = FB_ALIGN(map_length, type_alignments[desc->dsc_dtype]);
2009-12-12 21:36:56 +01:00
map_item->clear();
map_item->fieldId = (SSHORT) id;
map_item->stream = stream;
map_item->flagOffset = flag_offset++;
map_item->desc = *desc;
2009-12-12 22:02:01 +01:00
map_item->desc.dsc_address = (UCHAR*)(IPTR) map_length;
2001-05-23 15:26:42 +02:00
map_length += desc->dsc_length;
map_item++;
}
2009-08-20 12:19:02 +02:00
// Make fields for record numbers record for all streams
2001-05-23 15:26:42 +02:00
2007-03-09 09:11:10 +01:00
map_length = ROUNDUP(map_length, sizeof(SINT64));
2009-06-21 12:34:33 +02:00
for (ptr = &streams[1]; ptr <= end_ptr; ptr++, map_item++)
{
2009-12-12 21:36:56 +01:00
map_item->clear();
map_item->fieldId = SortedStream::ID_DBKEY;
map_item->stream = *ptr;
dsc* desc = &map_item->desc;
desc->dsc_dtype = dtype_int64;
desc->dsc_length = sizeof(SINT64);
2009-12-12 22:02:01 +01:00
desc->dsc_address = (UCHAR*)(IPTR) map_length;
2001-05-23 15:26:42 +02:00
map_length += desc->dsc_length;
}
2009-08-20 12:19:02 +02:00
// Make fields for transaction id of record for all streams
2001-05-23 15:26:42 +02:00
2009-06-21 12:34:33 +02:00
for (ptr = &streams[1]; ptr <= end_ptr; ptr++, map_item++)
{
2009-12-12 21:36:56 +01:00
map_item->clear();
map_item->fieldId = SortedStream::ID_TRANS;
map_item->stream = *ptr;
dsc* desc = &map_item->desc;
2001-05-23 15:26:42 +02:00
desc->dsc_dtype = dtype_long;
desc->dsc_length = sizeof(SLONG);
2009-12-12 22:02:01 +01:00
desc->dsc_address = (UCHAR*)(IPTR) map_length;
2001-05-23 15:26:42 +02:00
map_length += desc->dsc_length;
}
if (dbkey_streams)
{
const UCHAR* const end_ptrL = dbkey_streams + dbkey_streams[0];
2007-03-09 09:11:10 +01:00
map_length = ROUNDUP(map_length, sizeof(SINT64));
for (ptr = &dbkey_streams[1]; ptr <= end_ptrL; ptr++, map_item++)
{
2009-12-12 21:36:56 +01:00
map_item->clear();
map_item->fieldId = SortedStream::ID_DBKEY;
map_item->stream = *ptr;
dsc* desc = &map_item->desc;
desc->dsc_dtype = dtype_int64;
desc->dsc_length = sizeof(SINT64);
2009-12-12 22:02:01 +01:00
desc->dsc_address = (UCHAR*)(IPTR) map_length;
2001-05-23 15:26:42 +02:00
map_length += desc->dsc_length;
}
for (ptr = &dbkey_streams[1]; ptr <= end_ptrL; ptr++, map_item++)
{
2009-12-12 21:36:56 +01:00
map_item->clear();
map_item->fieldId = SortedStream::ID_DBKEY_VALID;
map_item->stream = *ptr;
dsc* desc = &map_item->desc;
desc->dsc_dtype = dtype_text;
desc->dsc_ttype() = CS_BINARY;
desc->dsc_length = 1;
2009-12-12 22:02:01 +01:00
desc->dsc_address = (UCHAR*)(IPTR) map_length;
map_length += desc->dsc_length;
}
}
2001-05-23 15:26:42 +02:00
2009-06-21 12:34:33 +02:00
for (ptr = &streams[1]; ptr <= end_ptr; ptr++, map_item++)
{
2009-12-12 21:36:56 +01:00
map_item->clear();
map_item->fieldId = SortedStream::ID_DBKEY_VALID;
map_item->stream = *ptr;
dsc* desc = &map_item->desc;
desc->dsc_dtype = dtype_text;
desc->dsc_ttype() = CS_BINARY;
desc->dsc_length = 1;
2009-12-12 22:02:01 +01:00
desc->dsc_address = (UCHAR*)(IPTR) map_length;
map_length += desc->dsc_length;
}
fb_assert(map_item - map->items.begin() == USHORT(map->items.getCount()));
fb_assert(sort_key - map->keyItems.begin() == USHORT(map->keyItems.getCount()));
2009-12-12 21:36:56 +01:00
map_length = ROUNDUP(map_length, sizeof(SLONG));
2009-08-20 12:19:02 +02:00
// Make fields to store varying and cstring length.
2001-05-23 15:26:42 +02:00
const sort_key_def* const end_key = sort_key;
2009-12-12 21:36:56 +01:00
for (sort_key = map->keyItems.begin(); sort_key < end_key; sort_key++)
2009-06-21 12:34:33 +02:00
{
fb_assert(sort_key->skd_dtype != 0);
2008-12-22 10:00:05 +01:00
if (sort_key->skd_dtype == SKD_varying || sort_key->skd_dtype == SKD_cstring)
{
2001-05-23 15:26:42 +02:00
sort_key->skd_vary_offset = (USHORT) map_length;
map_length += sizeof(USHORT);
}
}
if (map_length > MAX_SORT_RECORD)
2009-08-20 12:19:02 +02:00
{
ERR_post(Arg::Gds(isc_sort_rec_size_err) << Arg::Num(map_length));
2009-08-20 12:19:02 +02:00
// Msg438: sort record size of %ld bytes is too big
}
2009-08-23 03:27:46 +02:00
2009-12-12 21:36:56 +01:00
map->length = (USHORT) map_length;
2009-08-23 03:27:46 +02:00
2009-08-20 12:19:02 +02:00
// That was most unpleasant. Never the less, it's done (except for the debugging).
// All that remains is to build the record source block for the sort.
return FB_NEW(*tdbb->getDefaultPool()) SortedStream(csb, prior_rsb, map);
2001-05-23 15:26:42 +02:00
}
static bool gen_equi_join(thread_db* tdbb, OptimizerBlk* opt, RiverList& org_rivers)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* g e n _ e q u i _ j o i n
2001-05-23 15:26:42 +02:00
*
**************************************
*
* Functional description
* We've got a set of rivers that may or may not be amenable to
* a hash join or a sort/merge join, and it's time to find out.
* If there are, build an appropriate join RecordSource,
* push it on the rsb stack, and update rivers accordingly.
* If two or more rivers were successfully joined, return true.
* If the whole things is a moby no-op, return false.
2001-05-23 15:26:42 +02:00
*
**************************************/
2003-10-17 22:29:52 +02:00
ULONG selected_rivers[OPT_STREAM_BITS], selected_rivers2[OPT_STREAM_BITS];
jrd_nod** eq_class;
2001-05-23 15:26:42 +02:00
DEV_BLKCHK(opt, type_opt);
SET_TDBB(tdbb);
2003-09-02 18:39:58 +02:00
CompilerScratch* const csb = opt->opt_csb;
2003-09-02 18:39:58 +02:00
// Count the number of "rivers" involved in the operation, then allocate
// a scratch block large enough to hold values to compute equality
// classes.
2010-01-12 10:53:08 +01:00
const USHORT cnt = (USHORT) org_rivers.getCount();
if (cnt < 2)
{
return false;
2001-05-23 15:26:42 +02:00
}
2010-01-12 10:53:08 +01:00
Firebird::HalfStaticArray<jrd_nod*, OPT_STATIC_ITEMS> scratch;
scratch.grow(opt->opt_base_conjuncts * cnt);
2004-01-13 10:52:19 +01:00
jrd_nod** classes = scratch.begin();
2003-09-02 18:39:58 +02:00
2010-01-12 10:53:08 +01:00
// Compute equivalence classes among streams. This involves finding groups
2003-09-02 18:39:58 +02:00
// of streams joined by field equalities.
2010-01-12 10:53:08 +01:00
2004-01-13 10:52:19 +01:00
jrd_nod** last_class = classes;
OptimizerBlk::opt_conjunct* tail = opt->opt_conjuncts.begin();
const OptimizerBlk::opt_conjunct* const end = tail + opt->opt_base_conjuncts;
for (; tail < end; tail++)
{
if (tail->opt_conjunct_flags & opt_conjunct_used)
{
2001-05-23 15:26:42 +02:00
continue;
2003-09-02 18:39:58 +02:00
}
jrd_nod* const node = tail->opt_conjunct_node;
2010-01-12 15:21:30 +01:00
if (node->nod_type != nod_eql && node->nod_type != nod_equiv)
{
2001-05-23 15:26:42 +02:00
continue;
2003-09-02 18:39:58 +02:00
}
2004-01-13 10:52:19 +01:00
jrd_nod* node1 = node->nod_arg[0];
jrd_nod* node2 = node->nod_arg[1];
dsc desc1, desc2;
CMP_get_desc(tdbb, csb, node1, &desc1);
CMP_get_desc(tdbb, csb, node2, &desc2);
if (!DSC_EQUIV(&desc1, &desc2, true) || desc1.isBlob() || desc2.isBlob())
{
continue;
}
USHORT number1 = 0;
for (River** iter1 = org_rivers.begin(); iter1 < org_rivers.end(); iter1++, number1++)
2009-06-21 12:34:33 +02:00
{
River* const river1 = *iter1;
if (!river1->isReferenced(node1))
2009-06-21 12:34:33 +02:00
{
if (!river1->isReferenced(node2))
{
2001-05-23 15:26:42 +02:00
continue;
2003-09-02 18:39:58 +02:00
}
jrd_nod* const temp = node1;
node1 = node2;
node2 = temp;
2003-09-02 18:39:58 +02:00
}
USHORT number2 = number1 + 1;
for (River** iter2 = iter1 + 1; iter2 < org_rivers.end(); iter2++, number2++)
{
River* const river2 = *iter2;
if (river2->isReferenced(node2))
2009-06-21 12:34:33 +02:00
{
for (eq_class = classes; eq_class < last_class; eq_class += cnt)
{
if (node_equality(node1, classes[number1]) ||
node_equality(node2, classes[number2]))
2003-09-02 18:39:58 +02:00
{
break;
}
}
eq_class[number1] = node1;
eq_class[number2] = node2;
if (eq_class == last_class)
{
2001-05-23 15:26:42 +02:00
last_class += cnt;
2003-09-02 18:39:58 +02:00
}
2001-05-23 15:26:42 +02:00
}
}
}
}
// Pick both a set of classes and a set of rivers on which to join.
// Obviously, if the set of classes is empty, return false
2009-08-20 12:19:02 +02:00
// to indicate that nothing could be done.
2001-05-23 15:26:42 +02:00
USHORT river_cnt = 0;
HalfStaticArray<jrd_nod**, OPT_STATIC_ITEMS> selected_classes(cnt);
2009-06-21 12:34:33 +02:00
for (eq_class = classes; eq_class < last_class; eq_class += cnt)
{
USHORT i = river_count(cnt, eq_class);
2009-06-21 12:34:33 +02:00
if (i > river_cnt)
{
2001-05-23 15:26:42 +02:00
river_cnt = i;
2003-10-17 22:29:52 +02:00
selected_classes.shrink(0);
selected_classes.add(eq_class);
class_mask(cnt, eq_class, selected_rivers);
2001-05-23 15:26:42 +02:00
}
2009-06-21 12:34:33 +02:00
else
{
class_mask(cnt, eq_class, selected_rivers2);
2009-06-21 12:34:33 +02:00
for (i = 0; i < OPT_STREAM_BITS; i++)
{
if ((selected_rivers[i] & selected_rivers2[i]) != selected_rivers[i])
{
2001-05-23 15:26:42 +02:00
break;
2003-09-02 18:39:58 +02:00
}
}
if (i == OPT_STREAM_BITS)
{
selected_classes.add(eq_class);
2003-09-02 18:39:58 +02:00
}
2001-05-23 15:26:42 +02:00
}
}
2003-10-17 22:29:52 +02:00
if (!river_cnt)
{
2003-09-02 18:39:58 +02:00
return false;
}
2003-09-02 18:39:58 +02:00
// AB: Inactivate currently all streams from every river, because we
// need to know which nodes are computable between the rivers used
// for the merge.
USHORT flag_vector[MAX_STREAMS + 1], *fv;
UCHAR stream_nr;
for (stream_nr = 0, fv = flag_vector; stream_nr < csb->csb_n_stream; stream_nr++)
{
*fv++ = csb->csb_rpt[stream_nr].csb_flags & csb_active;
csb->csb_rpt[stream_nr].csb_flags &= ~csb_active;
}
HalfStaticArray<RecordSource*, OPT_STATIC_ITEMS> rsbs;
HalfStaticArray<LegacyNodeArray*, OPT_STATIC_ITEMS> keys;
2003-09-02 18:39:58 +02:00
// Unconditionally disable merge joins in favor of hash joins.
// This is a temporary debugging measure.
bool prefer_merge_over_hash = false;
// AB: Get the lowest river position from the rivers that are merged
RiverList rivers_to_merge;
USHORT lowest_river_position = MAX_USHORT;
USHORT number = 0;
for (River** iter = org_rivers.begin(); iter < org_rivers.end(); number++)
2009-06-21 12:34:33 +02:00
{
River* const river = *iter;
if (!(TEST_DEP_BIT(selected_rivers, number)))
{
iter++;
2001-05-23 15:26:42 +02:00
continue;
2003-09-02 18:39:58 +02:00
}
if (number < lowest_river_position)
{
lowest_river_position = number;
}
rivers_to_merge.add(river);
org_rivers.remove(iter);
RecordSource* rsb = river->getRecordSource();
// Apply local river booleans, if any
river->activate(csb);
jrd_nod* river_boolean = NULL;
for (tail = opt->opt_conjuncts.begin(); tail < end; tail++)
{
jrd_nod* const node = tail->opt_conjunct_node;
if (!(tail->opt_conjunct_flags & opt_conjunct_used) &&
OPT_computable(csb, node, -1, false, false))
{
compose(&river_boolean, node, nod_and);
tail->opt_conjunct_flags |= opt_conjunct_used;
}
}
river->deactivate(csb);
if (river_boolean)
rsb = FB_NEW(*tdbb->getDefaultPool()) FilteredStream(csb, rsb, river_boolean);
// Collect RSBs and keys to join
const size_t selected_count = selected_classes.getCount();
SortNode* key = FB_NEW(*tdbb->getDefaultPool()) SortNode(*tdbb->getDefaultPool());
if (prefer_merge_over_hash)
2003-09-02 18:39:58 +02:00
{
jrd_nod*** selected_class;
for (selected_class = selected_classes.begin();
selected_class != selected_classes.end(); ++selected_class)
{
key->descending.add(false); // Ascending sort
key->nullOrder.add(rse_nulls_default); // Default nulls placement
key->expressions.add((*selected_class)[number]);
}
const size_t stream_count = river->getStreamCount();
fb_assert(stream_count <= MAX_STREAMS);
stream_array_t streams;
streams[0] = (UCHAR) stream_count;
memcpy(streams + 1, river->getStreams(), stream_count);
rsb = OPT_gen_sort(tdbb, opt->opt_csb, streams, NULL, rsb, key, false);
}
else
{
jrd_nod*** selected_class;
for (selected_class = selected_classes.begin();
selected_class != selected_classes.end(); ++selected_class)
{
key->expressions.add((*selected_class)[number]);
}
2003-09-02 18:39:58 +02:00
}
// It seems that rivers are already sorted by their cardinality.
// For a hash join, we need to choose the smallest ones as inner sub-streams,
// hence we reverse the order when storing them in the temporary arrays.
if (prefer_merge_over_hash)
{
rsbs.add(rsb);
keys.add(&key->expressions);
}
else
{
rsbs.insert(0, rsb);
keys.insert(0, &key->expressions);
}
2001-05-23 15:26:42 +02:00
}
fb_assert(rsbs.getCount() == keys.getCount());
// Build a join stream
RecordSource* rsb = NULL;
if (prefer_merge_over_hash)
{
rsb = FB_NEW(*tdbb->getDefaultPool())
MergeJoin(csb, rsbs.getCount(), (SortedStream**) rsbs.begin(), keys.begin());
}
else
{
rsb = FB_NEW(*tdbb->getDefaultPool())
HashJoin(csb, rsbs.getCount(), rsbs.begin(), keys.begin());
}
// Activate streams of all the rivers being merged
for (River** iter = rivers_to_merge.begin(); iter < rivers_to_merge.end(); iter++)
(*iter)->activate(csb);
// Pick up any boolean that may apply
jrd_nod* boolean = NULL;
for (tail = opt->opt_conjuncts.begin(); tail < end; tail++)
{
jrd_nod* const node = tail->opt_conjunct_node;
if (!(tail->opt_conjunct_flags & opt_conjunct_used) &&
OPT_computable(csb, node, -1, false, false))
2009-12-14 17:01:06 +01:00
{
compose(&boolean, node, nod_and);
tail->opt_conjunct_flags |= opt_conjunct_used;
}
}
2009-12-14 17:01:06 +01:00
if (boolean)
{
rsb = FB_NEW(*tdbb->getDefaultPool()) FilteredStream(csb, rsb, boolean);
}
// Reset all the streams to their original state
for (stream_nr = 0, fv = flag_vector; stream_nr < csb->csb_n_stream; stream_nr++)
{
csb->csb_rpt[stream_nr].csb_flags |= *fv++;
}
River* const merged_river =
FB_NEW(*tdbb->getDefaultPool()) River(csb, rsb, rivers_to_merge);
org_rivers.insert(lowest_river_position, merged_river);
return true;
}
static jrd_nod* make_inference_node(CompilerScratch* csb, jrd_nod* boolean,
jrd_nod* arg1, jrd_nod* arg2)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* m a k e _ i n f e r e n c e _ n o d e
*
**************************************
*
* Defined
* 1996-Jan-15 David Schnepper
*
* Functional description
* From the predicate, boolean, and infer a new
* predicate using arg1 & arg2 as the first two
* parameters to the predicate.
*
* This is used when the engine knows A<B and A=C, and
* creates a new node to represent the infered knowledge C<B.
2001-05-23 15:26:42 +02:00
*
* Note that this may be sometimes incorrect with 3-value
* logic (per Chris Date's Object & Relations seminar).
* Later stages of query evaluation evaluate exactly
* the originally specified query, so 3-value issues are
* caught there. Making this inference might cause us to
* examine more records than needed, but would not result
* in incorrect results.
*
* Note that some nodes, specifically nod_like, have
* more than two parameters for a boolean operation.
* (nod_like has an optional 3rd parameter for the ESCAPE character
* option of SQL)
* Nod_sleuth also has an optional 3rd parameter (for the GDML
* matching ESCAPE character language). But nod_sleuth is
* (apparently) not considered during optimization.
2001-05-23 15:26:42 +02:00
*
*
**************************************/
thread_db* tdbb = JRD_get_thread_data();
DEV_BLKCHK(csb, type_csb);
2001-05-23 15:26:42 +02:00
DEV_BLKCHK(boolean, type_nod);
DEV_BLKCHK(arg1, type_nod);
DEV_BLKCHK(arg2, type_nod);
fb_assert(boolean->nod_count >= 2); // must be a conjunction boolean
// Clone the input predicate
jrd_nod* node = PAR_make_node(tdbb, boolean->nod_count);
node->nod_type = boolean->nod_type;
// We may safely copy invariantness flag because
// (1) we only distribute field equalities
// (2) invariantness of second argument of STARTING WITH or LIKE is solely
// determined by its dependency on any of the fields
// If provisions above change the line below will have to be modified
node->nod_flags = boolean->nod_flags;
// But substitute new values for some of the predicate arguments
node->nod_arg[0] = CMP_clone_node_opt(tdbb, csb, arg1);
node->nod_arg[1] = CMP_clone_node_opt(tdbb, csb, arg2);
// Arguments after the first two are just cloned (eg: LIKE ESCAPE clause)
for (USHORT n = 2; n < boolean->nod_count; n++)
node->nod_arg[n] = CMP_clone_node_opt(tdbb, csb, boolean->nod_arg[n]);
// Share impure area for cached invariant value used to hold pre-compiled
// pattern for new LIKE and CONTAINING algorithms.
// Proper cloning of impure area for this node would require careful accounting
// of new invariant dependencies - we avoid such hassles via using single
// cached pattern value for all node clones. This is faster too.
if (node->nod_flags & nod_invariant)
node->nod_impure = boolean->nod_impure;
2001-05-23 15:26:42 +02:00
return node;
}
static bool map_equal(const jrd_nod* field1, const jrd_nod* field2, const MapNode* map)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* m a p _ e q u a l
*
**************************************
*
* Functional description
2005-05-28 00:45:31 +02:00
* Test to see if two fields are equal, where the fields
2001-05-23 15:26:42 +02:00
* are in two different streams possibly mapped to each other.
* Order of the input fields is important.
*
**************************************/
DEV_BLKCHK(field1, type_nod);
DEV_BLKCHK(field2, type_nod);
if (field1->nod_type != nod_field)
2003-09-02 18:39:58 +02:00
return false;
if (field2->nod_type != nod_field)
2003-09-02 18:39:58 +02:00
return false;
// look through the mapping and see if we can find an equivalence.
const NestConst<jrd_nod>* map_ptr = map->items.begin();
for (const NestConst<jrd_nod>* const map_end = map->items.end(); map_ptr != map_end; ++map_ptr)
2003-09-02 18:39:58 +02:00
{
2004-01-13 10:52:19 +01:00
const jrd_nod* map_from = (*map_ptr)->nod_arg[e_asgn_from];
const jrd_nod* map_to = (*map_ptr)->nod_arg[e_asgn_to];
if (map_from->nod_type != nod_field || map_to->nod_type != nod_field)
2001-05-23 15:26:42 +02:00
continue;
2008-12-18 12:29:10 +01:00
if (field1->nod_arg[e_fld_stream] != map_from->nod_arg[e_fld_stream] ||
field1->nod_arg[e_fld_id] != map_from->nod_arg[e_fld_id])
2003-09-02 18:39:58 +02:00
{
2001-05-23 15:26:42 +02:00
continue;
2003-09-02 18:39:58 +02:00
}
2008-12-18 12:29:10 +01:00
if (field2->nod_arg[e_fld_stream] != map_to->nod_arg[e_fld_stream] ||
field2->nod_arg[e_fld_id] != map_to->nod_arg[e_fld_id])
2003-09-02 18:39:58 +02:00
{
2001-05-23 15:26:42 +02:00
continue;
2003-09-02 18:39:58 +02:00
}
2003-09-02 18:39:58 +02:00
return true;
2001-05-23 15:26:42 +02:00
}
2003-09-02 18:39:58 +02:00
return false;
2001-05-23 15:26:42 +02:00
}
2004-01-13 10:52:19 +01:00
static bool node_equality(const jrd_nod* node1, const jrd_nod* node2)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* n o d e _ e q u a l i t y
*
**************************************
*
* Functional description
* Test two field node pointers for symbolic equality.
*
**************************************/
DEV_BLKCHK(node1, type_nod);
DEV_BLKCHK(node2, type_nod);
2003-09-02 18:39:58 +02:00
if (!node1 || !node2) {
return false;
}
if (node1->nod_type != node2->nod_type) {
return false;
}
if (node1 == node2) {
return true;
}
2009-01-20 09:33:59 +01:00
switch (node1->nod_type)
{
2003-09-02 18:39:58 +02:00
case nod_field:
2008-12-18 12:29:10 +01:00
return (node1->nod_arg[e_fld_stream] == node2->nod_arg[e_fld_stream] &&
node1->nod_arg[e_fld_id] == node2->nod_arg[e_fld_id]);
case nod_equiv:
2003-09-02 18:39:58 +02:00
case nod_eql:
2008-12-18 12:29:10 +01:00
if (node_equality(node1->nod_arg[0], node2->nod_arg[0]) &&
node_equality(node1->nod_arg[1], node2->nod_arg[1]))
2003-09-02 18:39:58 +02:00
{
return true;
}
2008-12-18 12:29:10 +01:00
if (node_equality(node1->nod_arg[0], node2->nod_arg[1]) &&
node_equality(node1->nod_arg[1], node2->nod_arg[0]))
2003-09-02 18:39:58 +02:00
{
return true;
}
return false;
default:
break;
2001-05-23 15:26:42 +02:00
}
2003-09-02 18:39:58 +02:00
return false;
2001-05-23 15:26:42 +02:00
}
2005-05-28 00:45:31 +02:00
static jrd_nod* optimize_like(thread_db* tdbb, CompilerScratch* csb, jrd_nod* like_node)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* o p t i m i z e _ l i k e
*
**************************************
*
* Functional description
* Optimize a LIKE expression, if possible,
* into a "starting with" AND a "like". This
* will allow us to use the index for the
* starting with, and the LIKE can just tag
2004-12-22 09:50:47 +01:00
* along for the ride.
2001-05-23 15:26:42 +02:00
* But on the ride it does useful work, consider
* match LIKE "ab%c". This is optimized by adding
* AND starting_with "ab", but the LIKE clause is
* still needed.
*
**************************************/
SET_TDBB(tdbb);
DEV_BLKCHK(like_node, type_nod);
2005-05-28 00:45:31 +02:00
jrd_nod* match_node = like_node->nod_arg[0];
jrd_nod* pattern_node = like_node->nod_arg[1];
2004-01-13 10:52:19 +01:00
jrd_nod* escape_node = (like_node->nod_count > 2) ? like_node->nod_arg[2] : NULL;
2005-05-28 00:45:31 +02:00
2009-08-20 12:19:02 +02:00
// if the pattern string or the escape string can't be
// evaluated at compile time, forget it
2008-12-22 10:00:05 +01:00
if ((pattern_node->nod_type != nod_literal) || (escape_node && escape_node->nod_type != nod_literal))
{
2001-05-23 15:26:42 +02:00
return NULL;
}
2005-05-28 00:45:31 +02:00
dsc match_desc;
CMP_get_desc(tdbb, csb, match_node, &match_desc);
dsc* pattern_desc = &((Literal*) pattern_node)->lit_desc;
dsc* escape_desc = 0;
2001-05-23 15:26:42 +02:00
if (escape_node)
escape_desc = &((Literal*) escape_node)->lit_desc;
2009-08-20 12:19:02 +02:00
// if either is not a character expression, forget it
2005-05-28 00:45:31 +02:00
if ((match_desc.dsc_dtype > dtype_any_text) ||
(pattern_desc->dsc_dtype > dtype_any_text) ||
2001-05-23 15:26:42 +02:00
(escape_node && escape_desc->dsc_dtype > dtype_any_text))
{
2001-05-23 15:26:42 +02:00
return NULL;
}
TextType* matchTextType = INTL_texttype_lookup(tdbb, INTL_TTYPE(&match_desc));
2005-05-28 00:45:31 +02:00
CharSet* matchCharset = matchTextType->getCharSet();
TextType* patternTextType = INTL_texttype_lookup(tdbb, INTL_TTYPE(pattern_desc));
2005-05-28 00:45:31 +02:00
CharSet* patternCharset = patternTextType->getCharSet();
UCHAR escape_canonic[sizeof(ULONG)];
UCHAR first_ch[sizeof(ULONG)];
ULONG first_len;
UCHAR* p;
USHORT p_count;
2009-08-20 12:19:02 +02:00
// Get the escape character, if any
2002-10-29 17:27:47 +01:00
if (escape_node)
{
2005-06-21 03:44:43 +02:00
// Ensure escape string is same character set as match string
2001-05-23 15:26:42 +02:00
2005-06-21 03:44:43 +02:00
MoveBuffer escape_buffer;
2005-05-28 00:45:31 +02:00
2007-03-01 01:44:14 +01:00
p_count = MOV_make_string2(tdbb, escape_desc, INTL_TTYPE(&match_desc), &p, escape_buffer);
2005-05-28 00:45:31 +02:00
first_len = matchCharset->substring(p_count, p, sizeof(first_ch), first_ch, 0, 1);
2005-05-28 00:45:31 +02:00
matchTextType->canonical(first_len, p, sizeof(escape_canonic), escape_canonic);
}
2005-06-21 03:44:43 +02:00
MoveBuffer pattern_buffer;
2005-05-28 00:45:31 +02:00
2007-03-01 01:44:14 +01:00
p_count = MOV_make_string2(tdbb, pattern_desc, INTL_TTYPE(&match_desc), &p, pattern_buffer);
2005-05-28 00:45:31 +02:00
first_len = matchCharset->substring(p_count, p, sizeof(first_ch), first_ch, 0, 1);
2005-05-28 00:45:31 +02:00
UCHAR first_canonic[sizeof(ULONG)];
matchTextType->canonical(first_len, p, sizeof(first_canonic), first_canonic);
const BYTE canWidth = matchTextType->getCanonicalWidth();
2008-12-05 02:20:14 +01:00
// If the first character is a wildcard char, forget it.
2005-05-28 00:45:31 +02:00
if ((!escape_node ||
2008-12-22 10:00:05 +01:00
(memcmp(first_canonic, escape_canonic, canWidth) != 0)) &&
2008-01-16 10:41:31 +01:00
(memcmp(first_canonic, matchTextType->getCanonicalChar(TextType::CHAR_SQL_MATCH_ONE), canWidth) == 0 ||
memcmp(first_canonic, matchTextType->getCanonicalChar(TextType::CHAR_SQL_MATCH_ANY), canWidth) == 0))
{
2001-05-23 15:26:42 +02:00
return NULL;
}
2005-05-28 00:45:31 +02:00
// allocate a literal node to store the starting with string;
2005-05-28 00:45:31 +02:00
// assume it will be shorter than the pattern string
// CVC: This assumption may not be true if we use "value like field".
2008-12-22 10:00:05 +01:00
const SSHORT count = lit_delta + (pattern_desc->dsc_length + sizeof(jrd_nod*) - 1) / sizeof(jrd_nod*);
2004-01-13 10:52:19 +01:00
jrd_nod* node = PAR_make_node(tdbb, count);
2001-05-23 15:26:42 +02:00
node->nod_type = nod_literal;
node->nod_count = 0;
Literal* literal = (Literal*) node;
2005-05-28 00:45:31 +02:00
literal->lit_desc = *pattern_desc;
UCHAR* q = reinterpret_cast<UCHAR*>(literal->lit_data);
literal->lit_desc.dsc_address = q;
2005-05-28 00:45:31 +02:00
2009-08-20 12:19:02 +02:00
// copy the string into the starting with literal, up to the first wildcard character
2005-05-28 00:45:31 +02:00
Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> patternCanonical;
ULONG patternCanonicalLen = p_count / matchCharset->minBytesPerChar() * canWidth;
2005-05-28 00:45:31 +02:00
2008-12-22 10:00:05 +01:00
patternCanonicalLen = matchTextType->canonical(p_count, p,
2005-05-28 00:45:31 +02:00
patternCanonicalLen, patternCanonical.getBuffer(patternCanonicalLen));
for (UCHAR* patternPtr = patternCanonical.begin(); patternPtr < patternCanonical.end(); )
{
// if there are escape characters, skip past them and
// don't treat the next char as a wildcard
2005-05-28 00:45:31 +02:00
const UCHAR* patternPtrStart = patternPtr;
patternPtr += canWidth;
2005-05-28 00:45:31 +02:00
2008-12-22 10:00:05 +01:00
if (escape_node && (memcmp(patternPtrStart, escape_canonic, canWidth) == 0))
{
2009-08-20 12:19:02 +02:00
// Check for Escape character at end of string
2005-05-28 00:45:31 +02:00
if (!(patternPtr < patternCanonical.end()))
2001-05-23 15:26:42 +02:00
break;
2005-05-28 00:45:31 +02:00
patternPtrStart = patternPtr;
patternPtr += canWidth;
2001-05-23 15:26:42 +02:00
}
2008-01-16 10:41:31 +01:00
else if (memcmp(patternPtrStart, matchTextType->getCanonicalChar(TextType::CHAR_SQL_MATCH_ONE), canWidth) == 0 ||
memcmp(patternPtrStart, matchTextType->getCanonicalChar(TextType::CHAR_SQL_MATCH_ANY), canWidth) == 0)
{
2001-05-23 15:26:42 +02:00
break;
}
2005-05-28 00:45:31 +02:00
q += patternCharset->substring(pattern_desc->dsc_length, pattern_desc->dsc_address,
literal->lit_desc.dsc_length - (q - literal->lit_desc.dsc_address),
2008-12-22 10:00:05 +01:00
q, (patternPtrStart - patternCanonical.begin()) / canWidth, 1);
2001-05-23 15:26:42 +02:00
}
literal->lit_desc.dsc_length = q - literal->lit_desc.dsc_address;
return node;
}
2004-01-13 10:52:19 +01:00
static USHORT river_count(USHORT count, jrd_nod** eq_class)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* r i v e r _ c o u n t
*
**************************************
*
* Functional description
* Given an sort/merge join equivalence class (vector of node pointers
* of representative values for rivers), return the count of rivers
* with values.
*
**************************************/
if (*eq_class) {
DEV_BLKCHK(*eq_class, type_nod);
}
2004-01-13 10:52:19 +01:00
USHORT cnt = 0;
for (USHORT i = 0; i < count; i++, eq_class++)
2009-06-21 12:34:33 +02:00
{
if (*eq_class)
{
2001-05-23 15:26:42 +02:00
cnt++;
DEV_BLKCHK(*eq_class, type_nod);
2001-05-23 15:26:42 +02:00
}
2009-06-21 12:34:33 +02:00
}
2001-05-23 15:26:42 +02:00
return cnt;
}
2004-05-09 07:48:33 +02:00
static bool search_stack(const jrd_nod* node, const NodeStack& stack)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* s e a r c h _ s t a c k
*
**************************************
*
* Functional description
* Search a stack for the presence of a particular value.
*
**************************************/
DEV_BLKCHK(node, type_nod);
2009-06-21 12:34:33 +02:00
for (NodeStack::const_iterator iter(stack); iter.hasData(); ++iter)
{
if (node_equality(node, iter.object())) {
2003-09-02 18:39:58 +02:00
return true;
}
}
return false;
2001-05-23 15:26:42 +02:00
}
static void set_direction(SortNode* fromClause, SortNode* toClause)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* s e t _ d i r e c t i o n
*
**************************************
*
* Functional description
2005-05-28 00:45:31 +02:00
* Update the direction of a GROUP BY, DISTINCT, or ORDER BY
2006-11-21 08:47:02 +01:00
* clause to the same direction as another clause. Do the same
* for the nulls placement flag.
2001-05-23 15:26:42 +02:00
*
**************************************/
fb_assert(fromClause->expressions.getCount() <= toClause->expressions.getCount());
fb_assert(fromClause->expressions.getCount() == fromClause->descending.getCount() &&
fromClause->expressions.getCount() == fromClause->nullOrder.getCount());
fb_assert(toClause->expressions.getCount() == toClause->descending.getCount() &&
toClause->expressions.getCount() == toClause->nullOrder.getCount());
for (size_t i = 0; i < fromClause->expressions.getCount(); ++i)
{
toClause->descending[i] = fromClause->descending[i];
toClause->nullOrder[i] = fromClause->nullOrder[i];
2004-01-13 10:52:19 +01:00
}
2001-05-23 15:26:42 +02:00
}
static void set_position(const SortNode* from_clause, SortNode* to_clause, const MapNode* map)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* s e t _ p o s i t i o n
*
**************************************
*
* Functional description
2005-05-28 00:45:31 +02:00
* Update the fields in a GROUP BY, DISTINCT, or ORDER BY
* clause to the same position as another clause, possibly
2001-05-23 15:26:42 +02:00
* using a mapping between the streams.
*
**************************************/
DEV_BLKCHK(from_clause, type_nod);
2009-08-20 12:19:02 +02:00
// Track the position in the from list with "to_swap", and find the corresponding
// field in the from list with "to_ptr", then swap the two fields. By the time
// we get to the end of the from list, all fields in the to list will be reordered.
NestConst<jrd_nod>* to_swap = to_clause->expressions.begin();
const NestConst<jrd_nod>* from_ptr = from_clause->expressions.begin();
for (const NestConst<jrd_nod>* const from_end = from_clause->expressions.end();
from_ptr != from_end; ++from_ptr)
2004-01-13 10:52:19 +01:00
{
NestConst<jrd_nod>* to_ptr = to_clause->expressions.begin();
for (const NestConst<jrd_nod>* const to_end = to_clause->expressions.end();
to_ptr != to_end; ++to_ptr)
2004-01-13 10:52:19 +01:00
{
2008-12-18 12:29:10 +01:00
if ((map && map_equal(*to_ptr, *from_ptr, map)) ||
(!map &&
(*from_ptr)->nod_arg[e_fld_stream] == (*to_ptr)->nod_arg[e_fld_stream] &&
(*from_ptr)->nod_arg[e_fld_id] == (*to_ptr)->nod_arg[e_fld_id]))
2004-01-13 10:52:19 +01:00
{
jrd_nod* swap = *to_swap;
2001-05-23 15:26:42 +02:00
*to_swap = *to_ptr;
*to_ptr = swap;
}
}
++to_swap;
2001-05-23 15:26:42 +02:00
}
}
static void set_rse_inactive(CompilerScratch* csb, const RseNode* rse)
2001-05-23 15:26:42 +02:00
{
/***************************************************
*
* s e t _ r s e _ i n a c t i v e
*
***************************************************
*
* Functional Description:
* Set all the streams involved in an RseNode as inactive. Do it recursively.
2001-05-23 15:26:42 +02:00
*
***************************************************/
const NestConst<RecordSourceNode>* ptr = rse->rse_relations.begin();
for (const NestConst<RecordSourceNode>* const end = rse->rse_relations.end(); ptr != end; ++ptr)
{
const RecordSourceNode* node = *ptr;
if (node->type == RseNode::TYPE)
set_rse_inactive(csb, static_cast<const RseNode*>(node));
2001-05-23 15:26:42 +02:00
else
2009-06-21 12:34:33 +02:00
{
StreamsArray sourceStreams;
node->getStreams(sourceStreams);
2009-06-29 04:06:00 +02:00
for (StreamsArray::iterator i = sourceStreams.begin(); i != sourceStreams.end(); ++i)
csb->csb_rpt[*i].csb_flags &= ~csb_active;
}
}
}