8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-22 20:43:02 +01:00

Feature CORE-4722 - Aggregate linear regression functions.

This commit is contained in:
asfernandes 2015-03-25 17:51:54 +00:00
parent 11fedd72e7
commit 4554ee24f5
8 changed files with 500 additions and 0 deletions

View File

@ -291,6 +291,14 @@
Contributor(s): Contributor(s):
Alex Peshkov <peshkoff at mail.ru> Alex Peshkov <peshkoff at mail.ru>
* New feature CORE-4722
Aggregate linear regression functions
See also:
/doc/sql.extensions/README.regr_functions.txt
Contributor(s):
Hajime Nakagami <nakagami at gmail.com>
Adriano dos Santos Fernandes <adrianosf at gmail.com>
* New feature CORE-4717 * New feature CORE-4717
Aggregate statistical functions COVAR_SAMP, COVAR_POP and CORR Aggregate statistical functions COVAR_SAMP, COVAR_POP and CORR
See also: See also:

View File

@ -0,0 +1,38 @@
---------------------------
Linear Regression Functions
---------------------------
REGR_* functions analyze relationshitp of the 2 numeric set of data.
These functions calculate with records that both of 2 set are not NULL.
Syntax:
<regr function> ::= <function name>(<expr1>, <expr2>)
<function name> := { REGR_AVGX | REGR_AVGY | REGR_COUNT | REGR_INTERCEPT |
REGR_R2 | REGR_SLOPE | REGR_SXX | REGR_SXY | REGR_SYY }
Formula use bellow variable.
Y: <expr1> (<expr1> IS NOT NULL AND <expr2> IS NOT NULL).
X: <expr2> (<expr1> IS NOT NULL AND <expr2> IS NOT NULL).
N: COUNT of recordset except <expr1> IS NULL OR <expr2> IS NULL.
Formula:
REGR_AVGX(Y, X) = SUM(X) / N
REGR_AVGY(Y, X) = SUM(Y) / N
REGR_COUNT(Y, X) = N
REGR_INTERCEPT(Y, X) = REGR_AVGY(Y, X) - REGR_SLOPE(Y, X) * REGR_AVG_X(Y, X)
REGR_R2(Y, X) = POWER(CORR(Y, X),2)
REGR_SLOPE(Y, X) = COVAR_POP(Y, X)
REGR_SXX(Y, X) = N * VAR_POP(X)
REGR_SXY(Y, X) = N * COVAR_POP(Y, X)
REGR_SYY(Y, X) = N * VAR_POP(Y)
Author:
Hajime Nakagami <nakagami@gmail.com>
Note:
Function return NULL if N = 0 except of REGR_COUNT().

View File

@ -1220,6 +1220,7 @@ DmlNode* StdDevAggNode::parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch
default: default:
fb_assert(false); fb_assert(false);
return NULL;
} }
return FB_NEW(pool) StdDevAggNode(pool, type, PAR_parse_value(tdbb, csb)); return FB_NEW(pool) StdDevAggNode(pool, type, PAR_parse_value(tdbb, csb));
@ -1359,6 +1360,7 @@ DmlNode* CorrAggNode::parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch*
default: default:
fb_assert(false); fb_assert(false);
return NULL;
} }
ValueExprNode* a1 = PAR_parse_value(tdbb, csb); ValueExprNode* a1 = PAR_parse_value(tdbb, csb);
@ -1488,4 +1490,322 @@ AggNode* CorrAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/
} }
//--------------------
static AggNode::Register<RegrAggNode> regrAvgxAggInfo("REGR_AVGX", blr_agg_regr_avgx);
static AggNode::Register<RegrAggNode> regrAvgyAggInfo("REGR_AVGY", blr_agg_regr_avgy);
static AggNode::Register<RegrAggNode> regrInterceptAggInfo("REGR_INTERCEPT", blr_agg_regr_intercept);
static AggNode::Register<RegrAggNode> regrR2AggInfo("REGR_R2", blr_agg_regr_r2);
static AggNode::Register<RegrAggNode> regrSlopeAggInfo("REGR_SLOPE", blr_agg_regr_slope);
static AggNode::Register<RegrAggNode> regrSxxAggInfo("REGR_SXX", blr_agg_regr_sxx);
static AggNode::Register<RegrAggNode> regrSxyAggInfo("REGR_SXY", blr_agg_regr_sxy);
static AggNode::Register<RegrAggNode> regrSyyAggInfo("REGR_SYY", blr_agg_regr_syy);
RegrAggNode::RegrAggNode(MemoryPool& pool, RegrType aType, ValueExprNode* aArg, ValueExprNode* aArg2)
: AggNode(pool,
(aType == RegrAggNode::TYPE_REGR_AVGX ? regrAvgxAggInfo :
aType == RegrAggNode::TYPE_REGR_AVGY ? regrAvgyAggInfo :
aType == RegrAggNode::TYPE_REGR_INTERCEPT ? regrInterceptAggInfo :
aType == RegrAggNode::TYPE_REGR_R2 ? regrR2AggInfo :
aType == RegrAggNode::TYPE_REGR_SLOPE ? regrSlopeAggInfo :
aType == RegrAggNode::TYPE_REGR_SXX ? regrSxxAggInfo :
aType == RegrAggNode::TYPE_REGR_SXY ? regrSxyAggInfo :
aType == RegrAggNode::TYPE_REGR_SYY ? regrSyyAggInfo :
regrSyyAggInfo),
false, false, aArg),
type(aType),
arg2(aArg2),
impure2Offset(0)
{
addChildNode(arg2, arg2);
}
void RegrAggNode::aggPostRse(thread_db* tdbb, CompilerScratch* csb)
{
AggNode::aggPostRse(tdbb, csb);
impure2Offset = CMP_impure(csb, sizeof(RegrImpure));
}
DmlNode* RegrAggNode::parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp)
{
RegrType type;
switch (blrOp)
{
case blr_agg_regr_avgx:
type = TYPE_REGR_AVGX;
break;
case blr_agg_regr_avgy:
type = TYPE_REGR_AVGY;
break;
case blr_agg_regr_intercept:
type = TYPE_REGR_INTERCEPT;
break;
case blr_agg_regr_r2:
type = TYPE_REGR_R2;
break;
case blr_agg_regr_slope:
type = TYPE_REGR_SLOPE;
break;
case blr_agg_regr_sxx:
type = TYPE_REGR_SXX;
break;
case blr_agg_regr_sxy:
type = TYPE_REGR_SXY;
break;
case blr_agg_regr_syy:
type = TYPE_REGR_SYY;
break;
default:
fb_assert(false);
return NULL;
}
ValueExprNode* a1 = PAR_parse_value(tdbb, csb);
ValueExprNode* a2 = PAR_parse_value(tdbb, csb);
return FB_NEW(pool) RegrAggNode(pool, type, a1, a2);
}
void RegrAggNode::make(DsqlCompilerScratch* dsqlScratch, dsc* desc)
{
desc->makeDouble();
desc->setNullable(true);
}
void RegrAggNode::getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc)
{
desc->makeDouble();
}
ValueExprNode* RegrAggNode::copy(thread_db* tdbb, NodeCopier& copier) const
{
RegrAggNode* node = FB_NEW(*tdbb->getDefaultPool()) RegrAggNode(*tdbb->getDefaultPool(), type);
node->nodScale = nodScale;
node->arg = copier.copy(tdbb, arg);
node->arg2 = copier.copy(tdbb, arg2);
return node;
}
void RegrAggNode::aggInit(thread_db* tdbb, jrd_req* request) const
{
AggNode::aggInit(tdbb, request);
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
impure->make_double(0);
RegrImpure* impure2 = request->getImpure<RegrImpure>(impure2Offset);
impure2->x = impure2->x2 = impure2->y = impure2->y2 = impure2->xy = 0.0;
}
bool RegrAggNode::aggPass(thread_db* tdbb, jrd_req* request) const
{
dsc* desc = NULL;
dsc* desc2 = NULL;
desc = EVL_expr(tdbb, request, arg);
if (request->req_flags & req_null)
return false;
desc2 = EVL_expr(tdbb, request, arg2);
if (request->req_flags & req_null)
return false;
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
++impure->vlux_count;
const double y = MOV_get_double(desc);
const double x = MOV_get_double(desc2);
RegrImpure* impure2 = request->getImpure<RegrImpure>(impure2Offset);
impure2->x += x;
impure2->x2 += x * x;
impure2->y += y;
impure2->y2 += y * y;
impure2->xy += x * y;
return true;
}
void RegrAggNode::aggPass(thread_db* /*tdbb*/, jrd_req* /*request*/, dsc* /*desc*/) const
{
fb_assert(false);
}
dsc* RegrAggNode::aggExecute(thread_db* tdbb, jrd_req* request) const
{
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
RegrImpure* impure2 = request->getImpure<RegrImpure>(impure2Offset);
if (impure->vlux_count == 0)
return NULL;
const double varPopX = (impure2->x2 - impure2->x * impure2->x / impure->vlux_count) / impure->vlux_count;
const double varPopY = (impure2->y2 - impure2->y * impure2->y / impure->vlux_count) / impure->vlux_count;
const double covarPop = (impure2->xy - impure2->y * impure2->x / impure->vlux_count) / impure->vlux_count;
const double avgX = impure2->x / impure->vlux_count;
const double avgY = impure2->y / impure->vlux_count;
const double slope = covarPop / varPopX;
const double sq = sqrt(varPopX) * sqrt(varPopY);
const double corr = covarPop / sq;
double d;
switch (type)
{
case TYPE_REGR_AVGX:
d = avgX;
break;
case TYPE_REGR_AVGY:
d = avgY;
break;
case TYPE_REGR_INTERCEPT:
if (varPopX == 0.0)
return NULL;
else
d = avgY - slope * avgX;
break;
case TYPE_REGR_R2:
if (varPopX == 0.0)
return NULL;
else if (varPopY == 0.0)
d = 1.0;
else if (sq == 0.0)
return NULL;
else
d = corr * corr;
break;
case TYPE_REGR_SLOPE:
if (varPopX == 0.0)
return NULL;
else
d = covarPop / varPopX;
break;
case TYPE_REGR_SXX:
d = impure->vlux_count * varPopX;
break;
case TYPE_REGR_SXY:
d = impure->vlux_count * covarPop;
break;
case TYPE_REGR_SYY:
d = impure->vlux_count * varPopY;
break;
}
dsc temp;
temp.makeDouble(&d);
EVL_make_value(tdbb, &temp, impure);
return &impure->vlu_desc;
}
AggNode* RegrAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/
{
return FB_NEW(getPool()) RegrAggNode(getPool(), type,
doDsqlPass(dsqlScratch, arg), doDsqlPass(dsqlScratch, arg2));
}
//--------------------
static AggNode::Register<RegrCountAggNode> regrCountAggInfo("REGR_COUNT", blr_agg_regr_count);
RegrCountAggNode::RegrCountAggNode(MemoryPool& pool, ValueExprNode* aArg, ValueExprNode* aArg2)
: AggNode(pool, regrCountAggInfo, false, false, aArg),
arg2(aArg2)
{
addChildNode(arg2, arg2);
}
DmlNode* RegrCountAggNode::parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp)
{
ValueExprNode* a1 = PAR_parse_value(tdbb, csb);
ValueExprNode* a2 = PAR_parse_value(tdbb, csb);
return FB_NEW(pool) RegrCountAggNode(pool, a1, a2);
}
void RegrCountAggNode::make(DsqlCompilerScratch* dsqlScratch, dsc* desc)
{
desc->makeInt64(0);
}
void RegrCountAggNode::getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc)
{
desc->makeInt64(0);
}
ValueExprNode* RegrCountAggNode::copy(thread_db* tdbb, NodeCopier& copier) const
{
RegrCountAggNode* node = FB_NEW(*tdbb->getDefaultPool()) RegrCountAggNode(*tdbb->getDefaultPool());
node->nodScale = nodScale;
node->arg = copier.copy(tdbb, arg);
node->arg2 = copier.copy(tdbb, arg2);
return node;
}
void RegrCountAggNode::aggInit(thread_db* tdbb, jrd_req* request) const
{
AggNode::aggInit(tdbb, request);
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
impure->make_int64(0);
}
bool RegrCountAggNode::aggPass(thread_db* tdbb, jrd_req* request) const
{
dsc* desc = NULL;
dsc* desc2 = NULL;
desc = EVL_expr(tdbb, request, arg);
if (request->req_flags & req_null)
return false;
desc2 = EVL_expr(tdbb, request, arg2);
if (request->req_flags & req_null)
return false;
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
++impure->vlu_misc.vlu_int64;
return true;
}
void RegrCountAggNode::aggPass(thread_db* /*tdbb*/, jrd_req* /*request*/, dsc* /*desc*/) const
{
fb_assert(false);
}
dsc* RegrCountAggNode::aggExecute(thread_db* tdbb, jrd_req* request) const
{
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
if (!impure->vlu_desc.dsc_dtype)
return NULL;
return &impure->vlu_desc;
}
AggNode* RegrCountAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/
{
return FB_NEW(getPool()) RegrCountAggNode(getPool(),
doDsqlPass(dsqlScratch, arg), doDsqlPass(dsqlScratch, arg2));
}
} // namespace Jrd } // namespace Jrd

View File

@ -234,6 +234,77 @@ private:
ULONG impure2Offset; ULONG impure2Offset;
}; };
class RegrAggNode : public AggNode
{
public:
enum RegrType
{
TYPE_REGR_AVGX,
TYPE_REGR_AVGY,
TYPE_REGR_INTERCEPT,
TYPE_REGR_R2,
TYPE_REGR_SLOPE,
TYPE_REGR_SXX,
TYPE_REGR_SXY,
TYPE_REGR_SYY
};
struct RegrImpure
{
double x, x2, y, y2, xy;
};
explicit RegrAggNode(MemoryPool& pool, RegrType aType,
ValueExprNode* aArg = NULL, ValueExprNode* aArg2 = NULL);
virtual void aggPostRse(thread_db* tdbb, CompilerScratch* csb);
static DmlNode* parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp);
virtual void make(DsqlCompilerScratch* dsqlScratch, dsc* desc);
virtual void getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc);
virtual ValueExprNode* copy(thread_db* tdbb, NodeCopier& copier) const;
virtual void aggInit(thread_db* tdbb, jrd_req* request) const;
virtual bool aggPass(thread_db* tdbb, jrd_req* request) const;
virtual void aggPass(thread_db* tdbb, jrd_req* request, dsc* desc) const;
virtual dsc* aggExecute(thread_db* tdbb, jrd_req* request) const;
protected:
virtual AggNode* dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/;
public:
const RegrType type;
NestConst<ValueExprNode> arg2;
private:
ULONG impure2Offset;
};
class RegrCountAggNode : public AggNode
{
public:
explicit RegrCountAggNode(MemoryPool& pool,
ValueExprNode* aArg = NULL, ValueExprNode* aArg2 = NULL);
static DmlNode* parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp);
virtual void make(DsqlCompilerScratch* dsqlScratch, dsc* desc);
virtual void getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc);
virtual ValueExprNode* copy(thread_db* tdbb, NodeCopier& copier) const;
virtual void aggInit(thread_db* tdbb, jrd_req* request) const;
virtual bool aggPass(thread_db* tdbb, jrd_req* request) const;
virtual void aggPass(thread_db* tdbb, jrd_req* request, dsc* desc) const;
virtual dsc* aggExecute(thread_db* tdbb, jrd_req* request) const;
protected:
virtual AggNode* dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/;
public:
NestConst<ValueExprNode> arg2;
};
} // namespace } // namespace
#endif // DSQL_AGG_NODES_H #endif // DSQL_AGG_NODES_H

View File

@ -579,6 +579,15 @@ using namespace Firebird;
%token <metaNamePtr> COVAR_SAMP %token <metaNamePtr> COVAR_SAMP
%token <metaNamePtr> COVAR_POP %token <metaNamePtr> COVAR_POP
%token <metaNamePtr> CORR %token <metaNamePtr> CORR
%token <metaNamePtr> REGR_AVGX
%token <metaNamePtr> REGR_AVGY
%token <metaNamePtr> REGR_COUNT
%token <metaNamePtr> REGR_INTERCEPT
%token <metaNamePtr> REGR_R2
%token <metaNamePtr> REGR_SLOPE
%token <metaNamePtr> REGR_SXX
%token <metaNamePtr> REGR_SXY
%token <metaNamePtr> REGR_SYY
// precedence declarations for expression evaluation // precedence declarations for expression evaluation
@ -3824,6 +3833,15 @@ keyword_or_column
| KW_FALSE | KW_FALSE
| OFFSET | OFFSET
| OVER | OVER
| REGR_AVGX
| REGR_AVGY
| REGR_COUNT
| REGR_INTERCEPT
| REGR_R2
| REGR_SLOPE
| REGR_SXX
| REGR_SXY
| REGR_SYY
| RETURN | RETURN
| RDB_RECORD_VERSION | RDB_RECORD_VERSION
| ROW | ROW
@ -6817,6 +6835,24 @@ aggregate_function
{ $$ = newNode<CorrAggNode>(CorrAggNode::TYPE_COVAR_POP, $3, $5); } { $$ = newNode<CorrAggNode>(CorrAggNode::TYPE_COVAR_POP, $3, $5); }
| CORR '(' value ',' value ')' | CORR '(' value ',' value ')'
{ $$ = newNode<CorrAggNode>(CorrAggNode::TYPE_CORR, $3, $5); } { $$ = newNode<CorrAggNode>(CorrAggNode::TYPE_CORR, $3, $5); }
| REGR_AVGX '(' value ',' value ')'
{ $$ = newNode<RegrAggNode>(RegrAggNode::TYPE_REGR_AVGX, $3, $5); }
| REGR_AVGY '(' value ',' value ')'
{ $$ = newNode<RegrAggNode>(RegrAggNode::TYPE_REGR_AVGY, $3, $5); }
| REGR_COUNT '(' value ',' value ')'
{ $$ = newNode<RegrCountAggNode>($3, $5); }
| REGR_INTERCEPT '(' value ',' value ')'
{ $$ = newNode<RegrAggNode>(RegrAggNode::TYPE_REGR_INTERCEPT, $3, $5); }
| REGR_R2 '(' value ',' value ')'
{ $$ = newNode<RegrAggNode>(RegrAggNode::TYPE_REGR_R2, $3, $5); }
| REGR_SLOPE '(' value ',' value ')'
{ $$ = newNode<RegrAggNode>(RegrAggNode::TYPE_REGR_SLOPE, $3, $5); }
| REGR_SXX '(' value ',' value ')'
{ $$ = newNode<RegrAggNode>(RegrAggNode::TYPE_REGR_SXX, $3, $5); }
| REGR_SXY '(' value ',' value ')'
{ $$ = newNode<RegrAggNode>(RegrAggNode::TYPE_REGR_SXY, $3, $5); }
| REGR_SYY '(' value ',' value ')'
{ $$ = newNode<RegrAggNode>(RegrAggNode::TYPE_REGR_SYY, $3, $5); }
; ;
%type <aggNode> window_function %type <aggNode> window_function

View File

@ -248,5 +248,14 @@ static const struct
{"agg_covar_samp", two}, {"agg_covar_samp", two},
{"agg_covar_pop", two}, {"agg_covar_pop", two},
{"agg_corr", two}, {"agg_corr", two},
{"blr_agg_regr_avgx", two},
{"blr_agg_regr_avgy", two},
{"blr_agg_regr_count", two},
{"blr_agg_regr_intercept", two},
{"blr_agg_regr_r2", two},
{"blr_agg_regr_slope", two},
{"blr_agg_regr_sxx", two},
{"blr_agg_regr_sxy", two},
{"blr_agg_regr_syy", two},
{0, 0} {0, 0}
}; };

View File

@ -414,5 +414,14 @@
#define blr_agg_covar_samp (unsigned char) 215 #define blr_agg_covar_samp (unsigned char) 215
#define blr_agg_covar_pop (unsigned char) 216 #define blr_agg_covar_pop (unsigned char) 216
#define blr_agg_corr (unsigned char) 217 #define blr_agg_corr (unsigned char) 217
#define blr_agg_regr_avgx (unsigned char) 218
#define blr_agg_regr_avgy (unsigned char) 219
#define blr_agg_regr_count (unsigned char) 220
#define blr_agg_regr_intercept (unsigned char) 221
#define blr_agg_regr_r2 (unsigned char) 222
#define blr_agg_regr_slope (unsigned char) 223
#define blr_agg_regr_sxx (unsigned char) 224
#define blr_agg_regr_sxy (unsigned char) 225
#define blr_agg_regr_syy (unsigned char) 226
#endif // JRD_BLR_H #endif // JRD_BLR_H

View File

@ -333,6 +333,15 @@ static const TOK tokens[] =
{RECREATE, "RECREATE", 2, false}, {RECREATE, "RECREATE", 2, false},
{RECURSIVE, "RECURSIVE", 2, false}, {RECURSIVE, "RECURSIVE", 2, false},
{REFERENCES, "REFERENCES", 1, false}, {REFERENCES, "REFERENCES", 1, false},
{REGR_AVGX, "REGR_AVGX", 2, false},
{REGR_AVGY, "REGR_AVGY", 2, false},
{REGR_COUNT, "REGR_COUNT", 2, false},
{REGR_INTERCEPT, "REGR_INTERCEPT", 2, false},
{REGR_R2, "REGR_R2", 2, false},
{REGR_SLOPE, "REGR_SLOPE", 2, false},
{REGR_SXX, "REGR_SXX", 2, false},
{REGR_SXY, "REGR_SXY", 2, false},
{REGR_SYY, "REGR_SYY", 2, false},
{KW_RELATIVE, "RELATIVE", 2, true}, {KW_RELATIVE, "RELATIVE", 2, true},
{RELEASE, "RELEASE", 2, false}, {RELEASE, "RELEASE", 2, false},
{REPLACE, "REPLACE", 2, false}, {REPLACE, "REPLACE", 2, false},