8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-22 20:43:02 +01:00

Feature CORE-4717 - Aggregate statistical functions COVAR_SAMP, COVAR_POP and CORR - contributed by Hajime Nakagami.

This commit is contained in:
asfernandes 2015-03-23 02:55:42 +00:00
parent 8d1cbc7e32
commit 64d146a06f
8 changed files with 273 additions and 9 deletions

View File

@ -291,6 +291,14 @@
Contributor(s): Contributor(s):
Alex Peshkov <peshkoff at mail.ru> Alex Peshkov <peshkoff at mail.ru>
* New feature CORE-4717
Aggregate statistical functions COVAR_SAMP, COVAR_POP and CORR
See also:
/doc/sql.extensions/README.statistical_functions.txt
Contributor(s):
Hajime Nakagami <nakagami at gmail.com>
Adriano dos Santos Fernandes <adrianosf at gmail.com>
* New feature CORE-4714 * New feature CORE-4714
Aggregate statistical functions STDDEV_POP, STDDEV_SAMP, VAR_POP and VAR_SAMP Aggregate statistical functions STDDEV_POP, STDDEV_SAMP, VAR_POP and VAR_SAMP
See also: See also:

View File

@ -5,22 +5,40 @@ Statistical Functions
By the SQL specification, some statistical functions are defined. By the SQL specification, some statistical functions are defined.
Function about variance and standard deviation are bellow. Function about variance and standard deviation are bellow.
VAR_POP: return the population variance.
VAR_SAMP: return the sample variance. VAR_SAMP: return the sample variance.
STDDEV_SAMP: return the sample standard deviation . eq. (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / (COUNT(<expr>) - 1)
STDDEV_POP: return the population standard deviation.
VAR_POP(<expr>) is equivalent to (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / COUNT(<expr>). VAR_POP: return the population variance.
VAR_SAMP(<expr>) is equivalent to (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / (COUNT(<expr>) - 1). eq. (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / COUNT(<expr>)
STDDEV_POP(<expr>) is equivalent to SQRT(VAR_POP(<expr>)).
STDDEV_SAMP(<expr>) is equivalent to SQRT(VAR_SAMP(<expr)). STDDEV_SAMP: return the sample standard deviation.
eq. SQRT(VAR_SAMP(<expr))
STDDEV_POP: return the population standard deviation.
eq. SQRT(VAR_POP(<expr>))
COVAR_SAMP: return the sample population.
eq. (SUM(<expr1> * <expr2>) - SUM(<expr1>) * SUM(<expr2>) / COUNT(*)) / (COUNT(*) - 1)
COVAR_POP: return the population covariance.
eq. (SUM(<expr1> * <expr2>) - SUM(<expr1>) * SUM(<expr2>) / COUNT(*)) / COUNT(*)
CORR: returns the coefficient of correlation.
eq. COVAR_POP(<expr1>, <expr2>) / (STDDEV_POP(<expr2>) * STDDEV_POP(<expr1>))
Author: Author:
Hajime Nakagami <nakagami@gmail.com> Hajime Nakagami <nakagami@gmail.com>
Syntax: Syntax:
<statistical function> ::= <statistical function name>(<expr>) <single param statistical function> ::= <single param statistical function name>(<expr>)
<statistical function name> := { VAR_POP | VAR_SAMP | STDDEV_POP | STDDEV_SAMP } <single param statistical function name> := { VAR_POP | VAR_SAMP | STDDEV_POP | STDDEV_SAMP }
<dual param statistical function> ::= <dual param statistical function name>(<expr1>, <expr2>)
<dual param statistical function name> := { COVAR_POP | COVAR_SAMP | CORR }
Note:
If VAR_SAMP, STDDEV_SAMP, COVAR_SAMP and result count is 0 or 1, return NULL.
If VAR_POP, STDDEV_POP, COVAR_POP, CORR and result count is 0, return NULL.
Example: Example:
SELECT STDDEV_SAMP(salary) FROM employees; SELECT STDDEV_SAMP(salary) FROM employees;

View File

@ -1313,4 +1313,179 @@ AggNode* StdDevAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/
} }
//--------------------
static AggNode::Register<CorrAggNode> coVarSampAggInfo("COVAR_SAMP", blr_agg_covar_samp);
static AggNode::Register<CorrAggNode> coVarPopAggInfo("COVAR_POP", blr_agg_covar_pop);
static AggNode::Register<CorrAggNode> corrAggInfo("CORR", blr_agg_corr);
CorrAggNode::CorrAggNode(MemoryPool& pool, CorrType aType, ValueExprNode* aArg, ValueExprNode* aArg2)
: AggNode(pool,
(aType == CorrAggNode::TYPE_COVAR_SAMP ? coVarSampAggInfo :
aType == CorrAggNode::TYPE_COVAR_POP ? coVarPopAggInfo :
corrAggInfo),
false, false, aArg),
type(aType),
arg2(aArg2),
impure2Offset(0)
{
addChildNode(arg2, arg2);
}
void CorrAggNode::aggPostRse(thread_db* tdbb, CompilerScratch* csb)
{
AggNode::aggPostRse(tdbb, csb);
impure2Offset = CMP_impure(csb, sizeof(CorrImpure));
}
DmlNode* CorrAggNode::parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp)
{
CorrType type;
switch (blrOp)
{
case blr_agg_covar_samp:
type = TYPE_COVAR_SAMP;
break;
case blr_agg_covar_pop:
type = TYPE_COVAR_POP;
break;
case blr_agg_corr:
type = TYPE_CORR;
break;
default:
fb_assert(false);
}
ValueExprNode* a1 = PAR_parse_value(tdbb, csb);
ValueExprNode* a2 = PAR_parse_value(tdbb, csb);
return FB_NEW(pool) CorrAggNode(pool, type, a1, a2);
}
void CorrAggNode::make(DsqlCompilerScratch* dsqlScratch, dsc* desc)
{
desc->makeDouble();
desc->setNullable(true);
}
void CorrAggNode::getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc)
{
desc->makeDouble();
}
ValueExprNode* CorrAggNode::copy(thread_db* tdbb, NodeCopier& copier) const
{
CorrAggNode* node = FB_NEW(*tdbb->getDefaultPool()) CorrAggNode(*tdbb->getDefaultPool(), type);
node->nodScale = nodScale;
node->arg = copier.copy(tdbb, arg);
node->arg2 = copier.copy(tdbb, arg2);
return node;
}
void CorrAggNode::aggInit(thread_db* tdbb, jrd_req* request) const
{
AggNode::aggInit(tdbb, request);
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
impure->make_double(0);
CorrImpure* impure2 = request->getImpure<CorrImpure>(impure2Offset);
impure2->x = impure2->x2 = impure2->y = impure2->y2 = impure2->xy = 0.0;
}
bool CorrAggNode::aggPass(thread_db* tdbb, jrd_req* request) const
{
dsc* desc = NULL;
dsc* desc2 = NULL;
desc = EVL_expr(tdbb, request, arg);
if (request->req_flags & req_null)
return false;
desc2 = EVL_expr(tdbb, request, arg2);
if (request->req_flags & req_null)
return false;
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
++impure->vlux_count;
const double y = MOV_get_double(desc);
const double x = MOV_get_double(desc2);
CorrImpure* impure2 = request->getImpure<CorrImpure>(impure2Offset);
impure2->x += x;
impure2->x2 += x * x;
impure2->y += y;
impure2->y2 += y * y;
impure2->xy += x * y;
return true;
}
void CorrAggNode::aggPass(thread_db* /*tdbb*/, jrd_req* /*request*/, dsc* /*desc*/) const
{
fb_assert(false);
}
dsc* CorrAggNode::aggExecute(thread_db* tdbb, jrd_req* request) const
{
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
CorrImpure* impure2 = request->getImpure<CorrImpure>(impure2Offset);
double d;
switch (type)
{
case TYPE_COVAR_SAMP:
if (impure->vlux_count < 2)
return NULL;
d = (impure2->xy - impure2->y * impure2->x / impure->vlux_count) / (impure->vlux_count - 1);
break;
case TYPE_COVAR_POP:
if (impure->vlux_count == 0)
return NULL;
d = (impure2->xy - impure2->y * impure2->x / impure->vlux_count) / impure->vlux_count;
break;
case TYPE_CORR:
{
// COVAR_POP(Y, X) / (STDDEV_POP(X) * STDDEV_POP(Y))
if (impure->vlux_count == 0)
return NULL;
const double covarPop = (impure2->xy - impure2->y * impure2->x / impure->vlux_count) /
impure->vlux_count;
const double varPopX = (impure2->x2 - impure2->x * impure2->x / impure->vlux_count) /
impure->vlux_count;
const double varPopY = (impure2->y2 - impure2->y * impure2->y / impure->vlux_count) /
impure->vlux_count;
const double divisor = sqrt(varPopX) * sqrt(varPopY);
if (divisor == 0.0)
return NULL;
d = covarPop / divisor;
break;
}
}
dsc temp;
temp.makeDouble(&d);
EVL_make_value(tdbb, &temp, impure);
return &impure->vlu_desc;
}
AggNode* CorrAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/
{
return FB_NEW(getPool()) CorrAggNode(getPool(), type,
doDsqlPass(dsqlScratch, arg), doDsqlPass(dsqlScratch, arg2));
}
} // namespace Jrd } // namespace Jrd

View File

@ -192,6 +192,48 @@ private:
ULONG impure2Offset; ULONG impure2Offset;
}; };
class CorrAggNode : public AggNode
{
public:
enum CorrType
{
TYPE_COVAR_SAMP,
TYPE_COVAR_POP,
TYPE_CORR
};
struct CorrImpure
{
double x, x2, y, y2, xy;
};
explicit CorrAggNode(MemoryPool& pool, CorrType aType,
ValueExprNode* aArg = NULL, ValueExprNode* aArg2 = NULL);
virtual void aggPostRse(thread_db* tdbb, CompilerScratch* csb);
static DmlNode* parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp);
virtual void make(DsqlCompilerScratch* dsqlScratch, dsc* desc);
virtual void getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc);
virtual ValueExprNode* copy(thread_db* tdbb, NodeCopier& copier) const;
virtual void aggInit(thread_db* tdbb, jrd_req* request) const;
virtual bool aggPass(thread_db* tdbb, jrd_req* request) const;
virtual void aggPass(thread_db* tdbb, jrd_req* request, dsc* desc) const;
virtual dsc* aggExecute(thread_db* tdbb, jrd_req* request) const;
protected:
virtual AggNode* dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/;
public:
const CorrType type;
NestConst<ValueExprNode> arg2;
private:
ULONG impure2Offset;
};
} // namespace } // namespace
#endif // DSQL_AGG_NODES_H #endif // DSQL_AGG_NODES_H

View File

@ -576,6 +576,9 @@ using namespace Firebird;
%token <metaNamePtr> STDDEV_POP %token <metaNamePtr> STDDEV_POP
%token <metaNamePtr> VAR_SAMP %token <metaNamePtr> VAR_SAMP
%token <metaNamePtr> VAR_POP %token <metaNamePtr> VAR_POP
%token <metaNamePtr> COVAR_SAMP
%token <metaNamePtr> COVAR_POP
%token <metaNamePtr> CORR
// precedence declarations for expression evaluation // precedence declarations for expression evaluation
@ -3814,6 +3817,9 @@ keyword_or_column
| START | START
| SIMILAR // added in FB 2.5 | SIMILAR // added in FB 2.5
| KW_BOOLEAN // added in FB 3.0 | KW_BOOLEAN // added in FB 3.0
| CORR
| COVAR_POP
| COVAR_SAMP
| DETERMINISTIC | DETERMINISTIC
| KW_FALSE | KW_FALSE
| OFFSET | OFFSET
@ -6805,6 +6811,12 @@ aggregate_function
{ $$ = newNode<StdDevAggNode>(StdDevAggNode::TYPE_VAR_SAMP, $3); } { $$ = newNode<StdDevAggNode>(StdDevAggNode::TYPE_VAR_SAMP, $3); }
| VAR_POP '(' value ')' | VAR_POP '(' value ')'
{ $$ = newNode<StdDevAggNode>(StdDevAggNode::TYPE_VAR_POP, $3); } { $$ = newNode<StdDevAggNode>(StdDevAggNode::TYPE_VAR_POP, $3); }
| COVAR_SAMP '(' value ',' value ')'
{ $$ = newNode<CorrAggNode>(CorrAggNode::TYPE_COVAR_SAMP, $3, $5); }
| COVAR_POP '(' value ',' value ')'
{ $$ = newNode<CorrAggNode>(CorrAggNode::TYPE_COVAR_POP, $3, $5); }
| CORR '(' value ',' value ')'
{ $$ = newNode<CorrAggNode>(CorrAggNode::TYPE_CORR, $3, $5); }
; ;
%type <aggNode> window_function %type <aggNode> window_function

View File

@ -245,5 +245,8 @@ static const struct
{"agg_stddev_pop", one}, {"agg_stddev_pop", one},
{"agg_var_samp", one}, {"agg_var_samp", one},
{"agg_var_pop", one}, {"agg_var_pop", one},
{"agg_covar_samp", two},
{"agg_covar_pop", two},
{"agg_corr", two},
{0, 0} {0, 0}
}; };

View File

@ -411,5 +411,8 @@
#define blr_agg_stddev_pop (unsigned char) 212 #define blr_agg_stddev_pop (unsigned char) 212
#define blr_agg_var_samp (unsigned char) 213 #define blr_agg_var_samp (unsigned char) 213
#define blr_agg_var_pop (unsigned char) 214 #define blr_agg_var_pop (unsigned char) 214
#define blr_agg_covar_samp (unsigned char) 215
#define blr_agg_covar_pop (unsigned char) 216
#define blr_agg_corr (unsigned char) 217
#endif // JRD_BLR_H #endif // JRD_BLR_H

View File

@ -129,10 +129,13 @@ static const TOK tokens[] =
{CONSTRAINT, "CONSTRAINT", 1, false}, {CONSTRAINT, "CONSTRAINT", 1, false},
{CONTAINING, "CONTAINING", 1, false}, {CONTAINING, "CONTAINING", 1, false},
{CONTINUE, "CONTINUE", 2, true}, {CONTINUE, "CONTINUE", 2, true},
{CORR, "CORR", 2, false},
{COS, "COS", 2, false}, {COS, "COS", 2, false},
{COSH, "COSH", 2, false}, {COSH, "COSH", 2, false},
{COT, "COT", 2, false}, {COT, "COT", 2, false},
{COUNT, "COUNT", 1, false}, {COUNT, "COUNT", 1, false},
{COVAR_POP, "COVAR_POP", 2, false},
{COVAR_SAMP, "COVAR_SAMP", 2, false},
{CREATE, "CREATE", 1, false}, {CREATE, "CREATE", 1, false},
{CROSS, "CROSS", 2, false}, {CROSS, "CROSS", 2, false},
{CSTRING, "CSTRING", 1, false}, {CSTRING, "CSTRING", 1, false},