mirror of
https://github.com/FirebirdSQL/firebird.git
synced 2025-01-22 17:23:03 +01:00
Feature CORE-4717 - Aggregate statistical functions COVAR_SAMP, COVAR_POP and CORR - contributed by Hajime Nakagami.
This commit is contained in:
parent
8d1cbc7e32
commit
64d146a06f
@ -291,6 +291,14 @@
|
|||||||
Contributor(s):
|
Contributor(s):
|
||||||
Alex Peshkov <peshkoff at mail.ru>
|
Alex Peshkov <peshkoff at mail.ru>
|
||||||
|
|
||||||
|
* New feature CORE-4717
|
||||||
|
Aggregate statistical functions COVAR_SAMP, COVAR_POP and CORR
|
||||||
|
See also:
|
||||||
|
/doc/sql.extensions/README.statistical_functions.txt
|
||||||
|
Contributor(s):
|
||||||
|
Hajime Nakagami <nakagami at gmail.com>
|
||||||
|
Adriano dos Santos Fernandes <adrianosf at gmail.com>
|
||||||
|
|
||||||
* New feature CORE-4714
|
* New feature CORE-4714
|
||||||
Aggregate statistical functions STDDEV_POP, STDDEV_SAMP, VAR_POP and VAR_SAMP
|
Aggregate statistical functions STDDEV_POP, STDDEV_SAMP, VAR_POP and VAR_SAMP
|
||||||
See also:
|
See also:
|
||||||
|
@ -5,22 +5,40 @@ Statistical Functions
|
|||||||
By the SQL specification, some statistical functions are defined.
|
By the SQL specification, some statistical functions are defined.
|
||||||
Function about variance and standard deviation are bellow.
|
Function about variance and standard deviation are bellow.
|
||||||
|
|
||||||
VAR_POP: return the population variance.
|
|
||||||
VAR_SAMP: return the sample variance.
|
VAR_SAMP: return the sample variance.
|
||||||
STDDEV_SAMP: return the sample standard deviation .
|
eq. (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / (COUNT(<expr>) - 1)
|
||||||
STDDEV_POP: return the population standard deviation.
|
|
||||||
|
|
||||||
VAR_POP(<expr>) is equivalent to (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / COUNT(<expr>).
|
VAR_POP: return the population variance.
|
||||||
VAR_SAMP(<expr>) is equivalent to (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / (COUNT(<expr>) - 1).
|
eq. (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / COUNT(<expr>)
|
||||||
STDDEV_POP(<expr>) is equivalent to SQRT(VAR_POP(<expr>)).
|
|
||||||
STDDEV_SAMP(<expr>) is equivalent to SQRT(VAR_SAMP(<expr)).
|
STDDEV_SAMP: return the sample standard deviation.
|
||||||
|
eq. SQRT(VAR_SAMP(<expr))
|
||||||
|
|
||||||
|
STDDEV_POP: return the population standard deviation.
|
||||||
|
eq. SQRT(VAR_POP(<expr>))
|
||||||
|
|
||||||
|
COVAR_SAMP: return the sample population.
|
||||||
|
eq. (SUM(<expr1> * <expr2>) - SUM(<expr1>) * SUM(<expr2>) / COUNT(*)) / (COUNT(*) - 1)
|
||||||
|
|
||||||
|
COVAR_POP: return the population covariance.
|
||||||
|
eq. (SUM(<expr1> * <expr2>) - SUM(<expr1>) * SUM(<expr2>) / COUNT(*)) / COUNT(*)
|
||||||
|
|
||||||
|
CORR: returns the coefficient of correlation.
|
||||||
|
eq. COVAR_POP(<expr1>, <expr2>) / (STDDEV_POP(<expr2>) * STDDEV_POP(<expr1>))
|
||||||
|
|
||||||
Author:
|
Author:
|
||||||
Hajime Nakagami <nakagami@gmail.com>
|
Hajime Nakagami <nakagami@gmail.com>
|
||||||
|
|
||||||
Syntax:
|
Syntax:
|
||||||
<statistical function> ::= <statistical function name>(<expr>)
|
<single param statistical function> ::= <single param statistical function name>(<expr>)
|
||||||
<statistical function name> := { VAR_POP | VAR_SAMP | STDDEV_POP | STDDEV_SAMP }
|
<single param statistical function name> := { VAR_POP | VAR_SAMP | STDDEV_POP | STDDEV_SAMP }
|
||||||
|
|
||||||
|
<dual param statistical function> ::= <dual param statistical function name>(<expr1>, <expr2>)
|
||||||
|
<dual param statistical function name> := { COVAR_POP | COVAR_SAMP | CORR }
|
||||||
|
|
||||||
|
Note:
|
||||||
|
If VAR_SAMP, STDDEV_SAMP, COVAR_SAMP and result count is 0 or 1, return NULL.
|
||||||
|
If VAR_POP, STDDEV_POP, COVAR_POP, CORR and result count is 0, return NULL.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
SELECT STDDEV_SAMP(salary) FROM employees;
|
SELECT STDDEV_SAMP(salary) FROM employees;
|
||||||
|
@ -1313,4 +1313,179 @@ AggNode* StdDevAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
|
||||||
|
|
||||||
|
static AggNode::Register<CorrAggNode> coVarSampAggInfo("COVAR_SAMP", blr_agg_covar_samp);
|
||||||
|
static AggNode::Register<CorrAggNode> coVarPopAggInfo("COVAR_POP", blr_agg_covar_pop);
|
||||||
|
static AggNode::Register<CorrAggNode> corrAggInfo("CORR", blr_agg_corr);
|
||||||
|
|
||||||
|
CorrAggNode::CorrAggNode(MemoryPool& pool, CorrType aType, ValueExprNode* aArg, ValueExprNode* aArg2)
|
||||||
|
: AggNode(pool,
|
||||||
|
(aType == CorrAggNode::TYPE_COVAR_SAMP ? coVarSampAggInfo :
|
||||||
|
aType == CorrAggNode::TYPE_COVAR_POP ? coVarPopAggInfo :
|
||||||
|
corrAggInfo),
|
||||||
|
false, false, aArg),
|
||||||
|
type(aType),
|
||||||
|
arg2(aArg2),
|
||||||
|
impure2Offset(0)
|
||||||
|
{
|
||||||
|
addChildNode(arg2, arg2);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CorrAggNode::aggPostRse(thread_db* tdbb, CompilerScratch* csb)
|
||||||
|
{
|
||||||
|
AggNode::aggPostRse(tdbb, csb);
|
||||||
|
impure2Offset = CMP_impure(csb, sizeof(CorrImpure));
|
||||||
|
}
|
||||||
|
|
||||||
|
DmlNode* CorrAggNode::parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp)
|
||||||
|
{
|
||||||
|
CorrType type;
|
||||||
|
|
||||||
|
switch (blrOp)
|
||||||
|
{
|
||||||
|
case blr_agg_covar_samp:
|
||||||
|
type = TYPE_COVAR_SAMP;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case blr_agg_covar_pop:
|
||||||
|
type = TYPE_COVAR_POP;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case blr_agg_corr:
|
||||||
|
type = TYPE_CORR;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
fb_assert(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
ValueExprNode* a1 = PAR_parse_value(tdbb, csb);
|
||||||
|
ValueExprNode* a2 = PAR_parse_value(tdbb, csb);
|
||||||
|
return FB_NEW(pool) CorrAggNode(pool, type, a1, a2);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CorrAggNode::make(DsqlCompilerScratch* dsqlScratch, dsc* desc)
|
||||||
|
{
|
||||||
|
desc->makeDouble();
|
||||||
|
desc->setNullable(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CorrAggNode::getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc)
|
||||||
|
{
|
||||||
|
desc->makeDouble();
|
||||||
|
}
|
||||||
|
|
||||||
|
ValueExprNode* CorrAggNode::copy(thread_db* tdbb, NodeCopier& copier) const
|
||||||
|
{
|
||||||
|
CorrAggNode* node = FB_NEW(*tdbb->getDefaultPool()) CorrAggNode(*tdbb->getDefaultPool(), type);
|
||||||
|
node->nodScale = nodScale;
|
||||||
|
node->arg = copier.copy(tdbb, arg);
|
||||||
|
node->arg2 = copier.copy(tdbb, arg2);
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CorrAggNode::aggInit(thread_db* tdbb, jrd_req* request) const
|
||||||
|
{
|
||||||
|
AggNode::aggInit(tdbb, request);
|
||||||
|
|
||||||
|
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
|
||||||
|
impure->make_double(0);
|
||||||
|
|
||||||
|
CorrImpure* impure2 = request->getImpure<CorrImpure>(impure2Offset);
|
||||||
|
impure2->x = impure2->x2 = impure2->y = impure2->y2 = impure2->xy = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CorrAggNode::aggPass(thread_db* tdbb, jrd_req* request) const
|
||||||
|
{
|
||||||
|
dsc* desc = NULL;
|
||||||
|
dsc* desc2 = NULL;
|
||||||
|
|
||||||
|
desc = EVL_expr(tdbb, request, arg);
|
||||||
|
if (request->req_flags & req_null)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
desc2 = EVL_expr(tdbb, request, arg2);
|
||||||
|
if (request->req_flags & req_null)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
|
||||||
|
++impure->vlux_count;
|
||||||
|
|
||||||
|
const double y = MOV_get_double(desc);
|
||||||
|
const double x = MOV_get_double(desc2);
|
||||||
|
|
||||||
|
CorrImpure* impure2 = request->getImpure<CorrImpure>(impure2Offset);
|
||||||
|
impure2->x += x;
|
||||||
|
impure2->x2 += x * x;
|
||||||
|
impure2->y += y;
|
||||||
|
impure2->y2 += y * y;
|
||||||
|
impure2->xy += x * y;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CorrAggNode::aggPass(thread_db* /*tdbb*/, jrd_req* /*request*/, dsc* /*desc*/) const
|
||||||
|
{
|
||||||
|
fb_assert(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
dsc* CorrAggNode::aggExecute(thread_db* tdbb, jrd_req* request) const
|
||||||
|
{
|
||||||
|
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
|
||||||
|
CorrImpure* impure2 = request->getImpure<CorrImpure>(impure2Offset);
|
||||||
|
double d;
|
||||||
|
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case TYPE_COVAR_SAMP:
|
||||||
|
if (impure->vlux_count < 2)
|
||||||
|
return NULL;
|
||||||
|
d = (impure2->xy - impure2->y * impure2->x / impure->vlux_count) / (impure->vlux_count - 1);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case TYPE_COVAR_POP:
|
||||||
|
if (impure->vlux_count == 0)
|
||||||
|
return NULL;
|
||||||
|
d = (impure2->xy - impure2->y * impure2->x / impure->vlux_count) / impure->vlux_count;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case TYPE_CORR:
|
||||||
|
{
|
||||||
|
// COVAR_POP(Y, X) / (STDDEV_POP(X) * STDDEV_POP(Y))
|
||||||
|
if (impure->vlux_count == 0)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
const double covarPop = (impure2->xy - impure2->y * impure2->x / impure->vlux_count) /
|
||||||
|
impure->vlux_count;
|
||||||
|
const double varPopX = (impure2->x2 - impure2->x * impure2->x / impure->vlux_count) /
|
||||||
|
impure->vlux_count;
|
||||||
|
const double varPopY = (impure2->y2 - impure2->y * impure2->y / impure->vlux_count) /
|
||||||
|
impure->vlux_count;
|
||||||
|
const double divisor = sqrt(varPopX) * sqrt(varPopY);
|
||||||
|
|
||||||
|
if (divisor == 0.0)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
d = covarPop / divisor;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dsc temp;
|
||||||
|
temp.makeDouble(&d);
|
||||||
|
|
||||||
|
EVL_make_value(tdbb, &temp, impure);
|
||||||
|
|
||||||
|
return &impure->vlu_desc;
|
||||||
|
}
|
||||||
|
|
||||||
|
AggNode* CorrAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/
|
||||||
|
{
|
||||||
|
return FB_NEW(getPool()) CorrAggNode(getPool(), type,
|
||||||
|
doDsqlPass(dsqlScratch, arg), doDsqlPass(dsqlScratch, arg2));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
} // namespace Jrd
|
} // namespace Jrd
|
||||||
|
@ -192,6 +192,48 @@ private:
|
|||||||
ULONG impure2Offset;
|
ULONG impure2Offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class CorrAggNode : public AggNode
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
enum CorrType
|
||||||
|
{
|
||||||
|
TYPE_COVAR_SAMP,
|
||||||
|
TYPE_COVAR_POP,
|
||||||
|
TYPE_CORR
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CorrImpure
|
||||||
|
{
|
||||||
|
double x, x2, y, y2, xy;
|
||||||
|
};
|
||||||
|
|
||||||
|
explicit CorrAggNode(MemoryPool& pool, CorrType aType,
|
||||||
|
ValueExprNode* aArg = NULL, ValueExprNode* aArg2 = NULL);
|
||||||
|
|
||||||
|
virtual void aggPostRse(thread_db* tdbb, CompilerScratch* csb);
|
||||||
|
|
||||||
|
static DmlNode* parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp);
|
||||||
|
|
||||||
|
virtual void make(DsqlCompilerScratch* dsqlScratch, dsc* desc);
|
||||||
|
virtual void getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc);
|
||||||
|
virtual ValueExprNode* copy(thread_db* tdbb, NodeCopier& copier) const;
|
||||||
|
|
||||||
|
virtual void aggInit(thread_db* tdbb, jrd_req* request) const;
|
||||||
|
virtual bool aggPass(thread_db* tdbb, jrd_req* request) const;
|
||||||
|
virtual void aggPass(thread_db* tdbb, jrd_req* request, dsc* desc) const;
|
||||||
|
virtual dsc* aggExecute(thread_db* tdbb, jrd_req* request) const;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual AggNode* dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/;
|
||||||
|
|
||||||
|
public:
|
||||||
|
const CorrType type;
|
||||||
|
NestConst<ValueExprNode> arg2;
|
||||||
|
|
||||||
|
private:
|
||||||
|
ULONG impure2Offset;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
#endif // DSQL_AGG_NODES_H
|
#endif // DSQL_AGG_NODES_H
|
||||||
|
@ -576,6 +576,9 @@ using namespace Firebird;
|
|||||||
%token <metaNamePtr> STDDEV_POP
|
%token <metaNamePtr> STDDEV_POP
|
||||||
%token <metaNamePtr> VAR_SAMP
|
%token <metaNamePtr> VAR_SAMP
|
||||||
%token <metaNamePtr> VAR_POP
|
%token <metaNamePtr> VAR_POP
|
||||||
|
%token <metaNamePtr> COVAR_SAMP
|
||||||
|
%token <metaNamePtr> COVAR_POP
|
||||||
|
%token <metaNamePtr> CORR
|
||||||
|
|
||||||
// precedence declarations for expression evaluation
|
// precedence declarations for expression evaluation
|
||||||
|
|
||||||
@ -3814,6 +3817,9 @@ keyword_or_column
|
|||||||
| START
|
| START
|
||||||
| SIMILAR // added in FB 2.5
|
| SIMILAR // added in FB 2.5
|
||||||
| KW_BOOLEAN // added in FB 3.0
|
| KW_BOOLEAN // added in FB 3.0
|
||||||
|
| CORR
|
||||||
|
| COVAR_POP
|
||||||
|
| COVAR_SAMP
|
||||||
| DETERMINISTIC
|
| DETERMINISTIC
|
||||||
| KW_FALSE
|
| KW_FALSE
|
||||||
| OFFSET
|
| OFFSET
|
||||||
@ -6805,6 +6811,12 @@ aggregate_function
|
|||||||
{ $$ = newNode<StdDevAggNode>(StdDevAggNode::TYPE_VAR_SAMP, $3); }
|
{ $$ = newNode<StdDevAggNode>(StdDevAggNode::TYPE_VAR_SAMP, $3); }
|
||||||
| VAR_POP '(' value ')'
|
| VAR_POP '(' value ')'
|
||||||
{ $$ = newNode<StdDevAggNode>(StdDevAggNode::TYPE_VAR_POP, $3); }
|
{ $$ = newNode<StdDevAggNode>(StdDevAggNode::TYPE_VAR_POP, $3); }
|
||||||
|
| COVAR_SAMP '(' value ',' value ')'
|
||||||
|
{ $$ = newNode<CorrAggNode>(CorrAggNode::TYPE_COVAR_SAMP, $3, $5); }
|
||||||
|
| COVAR_POP '(' value ',' value ')'
|
||||||
|
{ $$ = newNode<CorrAggNode>(CorrAggNode::TYPE_COVAR_POP, $3, $5); }
|
||||||
|
| CORR '(' value ',' value ')'
|
||||||
|
{ $$ = newNode<CorrAggNode>(CorrAggNode::TYPE_CORR, $3, $5); }
|
||||||
;
|
;
|
||||||
|
|
||||||
%type <aggNode> window_function
|
%type <aggNode> window_function
|
||||||
|
@ -245,5 +245,8 @@ static const struct
|
|||||||
{"agg_stddev_pop", one},
|
{"agg_stddev_pop", one},
|
||||||
{"agg_var_samp", one},
|
{"agg_var_samp", one},
|
||||||
{"agg_var_pop", one},
|
{"agg_var_pop", one},
|
||||||
|
{"agg_covar_samp", two},
|
||||||
|
{"agg_covar_pop", two},
|
||||||
|
{"agg_corr", two},
|
||||||
{0, 0}
|
{0, 0}
|
||||||
};
|
};
|
||||||
|
@ -411,5 +411,8 @@
|
|||||||
#define blr_agg_stddev_pop (unsigned char) 212
|
#define blr_agg_stddev_pop (unsigned char) 212
|
||||||
#define blr_agg_var_samp (unsigned char) 213
|
#define blr_agg_var_samp (unsigned char) 213
|
||||||
#define blr_agg_var_pop (unsigned char) 214
|
#define blr_agg_var_pop (unsigned char) 214
|
||||||
|
#define blr_agg_covar_samp (unsigned char) 215
|
||||||
|
#define blr_agg_covar_pop (unsigned char) 216
|
||||||
|
#define blr_agg_corr (unsigned char) 217
|
||||||
|
|
||||||
#endif // JRD_BLR_H
|
#endif // JRD_BLR_H
|
||||||
|
@ -129,10 +129,13 @@ static const TOK tokens[] =
|
|||||||
{CONSTRAINT, "CONSTRAINT", 1, false},
|
{CONSTRAINT, "CONSTRAINT", 1, false},
|
||||||
{CONTAINING, "CONTAINING", 1, false},
|
{CONTAINING, "CONTAINING", 1, false},
|
||||||
{CONTINUE, "CONTINUE", 2, true},
|
{CONTINUE, "CONTINUE", 2, true},
|
||||||
|
{CORR, "CORR", 2, false},
|
||||||
{COS, "COS", 2, false},
|
{COS, "COS", 2, false},
|
||||||
{COSH, "COSH", 2, false},
|
{COSH, "COSH", 2, false},
|
||||||
{COT, "COT", 2, false},
|
{COT, "COT", 2, false},
|
||||||
{COUNT, "COUNT", 1, false},
|
{COUNT, "COUNT", 1, false},
|
||||||
|
{COVAR_POP, "COVAR_POP", 2, false},
|
||||||
|
{COVAR_SAMP, "COVAR_SAMP", 2, false},
|
||||||
{CREATE, "CREATE", 1, false},
|
{CREATE, "CREATE", 1, false},
|
||||||
{CROSS, "CROSS", 2, false},
|
{CROSS, "CROSS", 2, false},
|
||||||
{CSTRING, "CSTRING", 1, false},
|
{CSTRING, "CSTRING", 1, false},
|
||||||
|
Loading…
Reference in New Issue
Block a user