diff --git a/doc/WhatsNew b/doc/WhatsNew index 65714369dc..33b44cd2cd 100644 --- a/doc/WhatsNew +++ b/doc/WhatsNew @@ -291,6 +291,14 @@ Contributor(s): Alex Peshkov + * New feature CORE-4717 + Aggregate statistical functions COVAR_SAMP, COVAR_POP and CORR + See also: + /doc/sql.extensions/README.statistical_functions.txt + Contributor(s): + Hajime Nakagami + Adriano dos Santos Fernandes + * New feature CORE-4714 Aggregate statistical functions STDDEV_POP, STDDEV_SAMP, VAR_POP and VAR_SAMP See also: diff --git a/doc/sql.extensions/README.statistical_functions.txt b/doc/sql.extensions/README.statistical_functions.txt index aa79903c18..c76de13707 100644 --- a/doc/sql.extensions/README.statistical_functions.txt +++ b/doc/sql.extensions/README.statistical_functions.txt @@ -5,22 +5,40 @@ Statistical Functions By the SQL specification, some statistical functions are defined. Function about variance and standard deviation are bellow. -VAR_POP: return the population variance. VAR_SAMP: return the sample variance. -STDDEV_SAMP: return the sample standard deviation . -STDDEV_POP: return the population standard deviation. + eq. (SUM( ^ 2) - SUM() ^ 2 / COUNT()) / (COUNT() - 1) -VAR_POP() is equivalent to (SUM( ^ 2) - SUM() ^ 2 / COUNT()) / COUNT(). -VAR_SAMP() is equivalent to (SUM( ^ 2) - SUM() ^ 2 / COUNT()) / (COUNT() - 1). -STDDEV_POP() is equivalent to SQRT(VAR_POP()). -STDDEV_SAMP() is equivalent to SQRT(VAR_SAMP( ^ 2) - SUM() ^ 2 / COUNT()) / COUNT() + +STDDEV_SAMP: return the sample standard deviation. + eq. SQRT(VAR_SAMP()) + +COVAR_SAMP: return the sample population. + eq. (SUM( * ) - SUM() * SUM() / COUNT(*)) / (COUNT(*) - 1) + +COVAR_POP: return the population covariance. + eq. (SUM( * ) - SUM() * SUM() / COUNT(*)) / COUNT(*) + +CORR: returns the coefficient of correlation. + eq. COVAR_POP(, ) / (STDDEV_POP() * STDDEV_POP()) Author: Hajime Nakagami Syntax: - ::= () - := { VAR_POP | VAR_SAMP | STDDEV_POP | STDDEV_SAMP } + ::= () + := { VAR_POP | VAR_SAMP | STDDEV_POP | STDDEV_SAMP } + + ::= (, ) + := { COVAR_POP | COVAR_SAMP | CORR } + +Note: + If VAR_SAMP, STDDEV_SAMP, COVAR_SAMP and result count is 0 or 1, return NULL. + If VAR_POP, STDDEV_POP, COVAR_POP, CORR and result count is 0, return NULL. Example: SELECT STDDEV_SAMP(salary) FROM employees; diff --git a/src/dsql/AggNodes.cpp b/src/dsql/AggNodes.cpp index c8d17f6806..ab11d5a8b8 100644 --- a/src/dsql/AggNodes.cpp +++ b/src/dsql/AggNodes.cpp @@ -1313,4 +1313,179 @@ AggNode* StdDevAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/ } +//-------------------- + + +static AggNode::Register coVarSampAggInfo("COVAR_SAMP", blr_agg_covar_samp); +static AggNode::Register coVarPopAggInfo("COVAR_POP", blr_agg_covar_pop); +static AggNode::Register corrAggInfo("CORR", blr_agg_corr); + +CorrAggNode::CorrAggNode(MemoryPool& pool, CorrType aType, ValueExprNode* aArg, ValueExprNode* aArg2) + : AggNode(pool, + (aType == CorrAggNode::TYPE_COVAR_SAMP ? coVarSampAggInfo : + aType == CorrAggNode::TYPE_COVAR_POP ? coVarPopAggInfo : + corrAggInfo), + false, false, aArg), + type(aType), + arg2(aArg2), + impure2Offset(0) +{ + addChildNode(arg2, arg2); +} + +void CorrAggNode::aggPostRse(thread_db* tdbb, CompilerScratch* csb) +{ + AggNode::aggPostRse(tdbb, csb); + impure2Offset = CMP_impure(csb, sizeof(CorrImpure)); +} + +DmlNode* CorrAggNode::parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp) +{ + CorrType type; + + switch (blrOp) + { + case blr_agg_covar_samp: + type = TYPE_COVAR_SAMP; + break; + + case blr_agg_covar_pop: + type = TYPE_COVAR_POP; + break; + + case blr_agg_corr: + type = TYPE_CORR; + break; + + default: + fb_assert(false); + } + + ValueExprNode* a1 = PAR_parse_value(tdbb, csb); + ValueExprNode* a2 = PAR_parse_value(tdbb, csb); + return FB_NEW(pool) CorrAggNode(pool, type, a1, a2); +} + +void CorrAggNode::make(DsqlCompilerScratch* dsqlScratch, dsc* desc) +{ + desc->makeDouble(); + desc->setNullable(true); +} + +void CorrAggNode::getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc) +{ + desc->makeDouble(); +} + +ValueExprNode* CorrAggNode::copy(thread_db* tdbb, NodeCopier& copier) const +{ + CorrAggNode* node = FB_NEW(*tdbb->getDefaultPool()) CorrAggNode(*tdbb->getDefaultPool(), type); + node->nodScale = nodScale; + node->arg = copier.copy(tdbb, arg); + node->arg2 = copier.copy(tdbb, arg2); + return node; +} + +void CorrAggNode::aggInit(thread_db* tdbb, jrd_req* request) const +{ + AggNode::aggInit(tdbb, request); + + impure_value_ex* impure = request->getImpure(impureOffset); + impure->make_double(0); + + CorrImpure* impure2 = request->getImpure(impure2Offset); + impure2->x = impure2->x2 = impure2->y = impure2->y2 = impure2->xy = 0.0; +} + +bool CorrAggNode::aggPass(thread_db* tdbb, jrd_req* request) const +{ + dsc* desc = NULL; + dsc* desc2 = NULL; + + desc = EVL_expr(tdbb, request, arg); + if (request->req_flags & req_null) + return false; + + desc2 = EVL_expr(tdbb, request, arg2); + if (request->req_flags & req_null) + return false; + + impure_value_ex* impure = request->getImpure(impureOffset); + ++impure->vlux_count; + + const double y = MOV_get_double(desc); + const double x = MOV_get_double(desc2); + + CorrImpure* impure2 = request->getImpure(impure2Offset); + impure2->x += x; + impure2->x2 += x * x; + impure2->y += y; + impure2->y2 += y * y; + impure2->xy += x * y; + + return true; +} + +void CorrAggNode::aggPass(thread_db* /*tdbb*/, jrd_req* /*request*/, dsc* /*desc*/) const +{ + fb_assert(false); +} + +dsc* CorrAggNode::aggExecute(thread_db* tdbb, jrd_req* request) const +{ + impure_value_ex* impure = request->getImpure(impureOffset); + CorrImpure* impure2 = request->getImpure(impure2Offset); + double d; + + switch (type) + { + case TYPE_COVAR_SAMP: + if (impure->vlux_count < 2) + return NULL; + d = (impure2->xy - impure2->y * impure2->x / impure->vlux_count) / (impure->vlux_count - 1); + break; + + case TYPE_COVAR_POP: + if (impure->vlux_count == 0) + return NULL; + d = (impure2->xy - impure2->y * impure2->x / impure->vlux_count) / impure->vlux_count; + break; + + case TYPE_CORR: + { + // COVAR_POP(Y, X) / (STDDEV_POP(X) * STDDEV_POP(Y)) + if (impure->vlux_count == 0) + return NULL; + + const double covarPop = (impure2->xy - impure2->y * impure2->x / impure->vlux_count) / + impure->vlux_count; + const double varPopX = (impure2->x2 - impure2->x * impure2->x / impure->vlux_count) / + impure->vlux_count; + const double varPopY = (impure2->y2 - impure2->y * impure2->y / impure->vlux_count) / + impure->vlux_count; + const double divisor = sqrt(varPopX) * sqrt(varPopY); + + if (divisor == 0.0) + return NULL; + + d = covarPop / divisor; + break; + } + } + + dsc temp; + temp.makeDouble(&d); + + EVL_make_value(tdbb, &temp, impure); + + return &impure->vlu_desc; +} + +AggNode* CorrAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/ +{ + return FB_NEW(getPool()) CorrAggNode(getPool(), type, + doDsqlPass(dsqlScratch, arg), doDsqlPass(dsqlScratch, arg2)); +} + + } // namespace Jrd diff --git a/src/dsql/AggNodes.h b/src/dsql/AggNodes.h index 354e6eed6d..dcd3193b84 100644 --- a/src/dsql/AggNodes.h +++ b/src/dsql/AggNodes.h @@ -192,6 +192,48 @@ private: ULONG impure2Offset; }; +class CorrAggNode : public AggNode +{ +public: + enum CorrType + { + TYPE_COVAR_SAMP, + TYPE_COVAR_POP, + TYPE_CORR + }; + + struct CorrImpure + { + double x, x2, y, y2, xy; + }; + + explicit CorrAggNode(MemoryPool& pool, CorrType aType, + ValueExprNode* aArg = NULL, ValueExprNode* aArg2 = NULL); + + virtual void aggPostRse(thread_db* tdbb, CompilerScratch* csb); + + static DmlNode* parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp); + + virtual void make(DsqlCompilerScratch* dsqlScratch, dsc* desc); + virtual void getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc); + virtual ValueExprNode* copy(thread_db* tdbb, NodeCopier& copier) const; + + virtual void aggInit(thread_db* tdbb, jrd_req* request) const; + virtual bool aggPass(thread_db* tdbb, jrd_req* request) const; + virtual void aggPass(thread_db* tdbb, jrd_req* request, dsc* desc) const; + virtual dsc* aggExecute(thread_db* tdbb, jrd_req* request) const; + +protected: + virtual AggNode* dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/; + +public: + const CorrType type; + NestConst arg2; + +private: + ULONG impure2Offset; +}; + } // namespace #endif // DSQL_AGG_NODES_H diff --git a/src/dsql/parse.y b/src/dsql/parse.y index 65b1e32ba4..c703b046bd 100644 --- a/src/dsql/parse.y +++ b/src/dsql/parse.y @@ -576,6 +576,9 @@ using namespace Firebird; %token STDDEV_POP %token VAR_SAMP %token VAR_POP +%token COVAR_SAMP +%token COVAR_POP +%token CORR // precedence declarations for expression evaluation @@ -3814,6 +3817,9 @@ keyword_or_column | START | SIMILAR // added in FB 2.5 | KW_BOOLEAN // added in FB 3.0 + | CORR + | COVAR_POP + | COVAR_SAMP | DETERMINISTIC | KW_FALSE | OFFSET @@ -6805,6 +6811,12 @@ aggregate_function { $$ = newNode(StdDevAggNode::TYPE_VAR_SAMP, $3); } | VAR_POP '(' value ')' { $$ = newNode(StdDevAggNode::TYPE_VAR_POP, $3); } + | COVAR_SAMP '(' value ',' value ')' + { $$ = newNode(CorrAggNode::TYPE_COVAR_SAMP, $3, $5); } + | COVAR_POP '(' value ',' value ')' + { $$ = newNode(CorrAggNode::TYPE_COVAR_POP, $3, $5); } + | CORR '(' value ',' value ')' + { $$ = newNode(CorrAggNode::TYPE_CORR, $3, $5); } ; %type window_function diff --git a/src/jrd/blp.h b/src/jrd/blp.h index d831268020..adfeb75417 100644 --- a/src/jrd/blp.h +++ b/src/jrd/blp.h @@ -245,5 +245,8 @@ static const struct {"agg_stddev_pop", one}, {"agg_var_samp", one}, {"agg_var_pop", one}, + {"agg_covar_samp", two}, + {"agg_covar_pop", two}, + {"agg_corr", two}, {0, 0} }; diff --git a/src/jrd/blr.h b/src/jrd/blr.h index efec70d267..94e71d31a3 100644 --- a/src/jrd/blr.h +++ b/src/jrd/blr.h @@ -411,5 +411,8 @@ #define blr_agg_stddev_pop (unsigned char) 212 #define blr_agg_var_samp (unsigned char) 213 #define blr_agg_var_pop (unsigned char) 214 +#define blr_agg_covar_samp (unsigned char) 215 +#define blr_agg_covar_pop (unsigned char) 216 +#define blr_agg_corr (unsigned char) 217 #endif // JRD_BLR_H diff --git a/src/yvalve/keywords.cpp b/src/yvalve/keywords.cpp index 380ed763b7..4e4c6736ec 100644 --- a/src/yvalve/keywords.cpp +++ b/src/yvalve/keywords.cpp @@ -129,10 +129,13 @@ static const TOK tokens[] = {CONSTRAINT, "CONSTRAINT", 1, false}, {CONTAINING, "CONTAINING", 1, false}, {CONTINUE, "CONTINUE", 2, true}, + {CORR, "CORR", 2, false}, {COS, "COS", 2, false}, {COSH, "COSH", 2, false}, {COT, "COT", 2, false}, {COUNT, "COUNT", 1, false}, + {COVAR_POP, "COVAR_POP", 2, false}, + {COVAR_SAMP, "COVAR_SAMP", 2, false}, {CREATE, "CREATE", 1, false}, {CROSS, "CROSS", 2, false}, {CSTRING, "CSTRING", 1, false},