mirror of
https://github.com/FirebirdSQL/firebird.git
synced 2025-01-22 20:43:02 +01:00
Feature CORE-4714 - Aggregate statistical functions stddev_pop, stddev_samp, var_pop and var_samp - contributed by Hajime Nakagami.
This commit is contained in:
parent
5dfd0ab774
commit
2bd863a353
26
doc/sql.extensions/README.statistical_functions.txt
Normal file
26
doc/sql.extensions/README.statistical_functions.txt
Normal file
@ -0,0 +1,26 @@
|
||||
---------------------
|
||||
Statistical Functions
|
||||
---------------------
|
||||
|
||||
By the SQL specification, some statistical functions are defined.
|
||||
Function about variance and standard deviation are bellow.
|
||||
|
||||
VAR_POP: return the population variance.
|
||||
VAR_SAMP: return the sample variance.
|
||||
STDDEV_SAMP: return the sample standard deviation .
|
||||
STDDEV_POP: return the population standard deviation.
|
||||
|
||||
VAR_POP(<expr>) is equivalent to (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / COUNT(<expr>).
|
||||
VAR_SAMP(<expr>) is equivalent to (SUM(<expr> ^ 2) - SUM(<expr>) ^ 2 / COUNT(<expr>)) / (COUNT(<expr>) - 1).
|
||||
STDDEV_POP(<expr>) is equivalent to SQRT(VAR_POP(<expr>)).
|
||||
STDDEV_SAMP(<expr>) is equivalent to SQRT(VAR_SAMP(<expr)).
|
||||
|
||||
Author:
|
||||
Hajime Nakagami <nakagami@gmail.com>
|
||||
|
||||
Syntax:
|
||||
<statistical function> ::= <statistical function name>(<expr>)
|
||||
<statistical function name> := { VAR_POP | VAR_SAMP | STDDEV_POP | STDDEV_SAMP }
|
||||
|
||||
Example:
|
||||
SELECT STDDEV_SAMP(salary) FROM employees;
|
@ -40,6 +40,7 @@
|
||||
#include "../dsql/pass1_proto.h"
|
||||
#include "../dsql/utld_proto.h"
|
||||
#include "../jrd/DataTypeUtil.h"
|
||||
#include <math.h>
|
||||
|
||||
using namespace Firebird;
|
||||
using namespace Jrd;
|
||||
@ -1169,4 +1170,147 @@ AggNode* MaxMinAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/
|
||||
}
|
||||
|
||||
|
||||
//--------------------
|
||||
|
||||
|
||||
static AggNode::Register<StdDevAggNode> stdDevSampAggInfo("STDDEV_SAMP", blr_agg_stddev_samp);
|
||||
static AggNode::Register<StdDevAggNode> stdDevPopAggInfo("STDDEV_POP", blr_agg_stddev_pop);
|
||||
static AggNode::Register<StdDevAggNode> varSampAggInfo("VAR_SAMP", blr_agg_var_samp);
|
||||
static AggNode::Register<StdDevAggNode> varPopAggInfo("VAR_POP", blr_agg_var_pop);
|
||||
|
||||
StdDevAggNode::StdDevAggNode(MemoryPool& pool, StdDevType aType, ValueExprNode* aArg)
|
||||
: AggNode(pool,
|
||||
(aType == StdDevAggNode::TYPE_STDDEV_SAMP ? stdDevSampAggInfo :
|
||||
aType == StdDevAggNode::TYPE_STDDEV_POP ? stdDevPopAggInfo :
|
||||
aType == StdDevAggNode::TYPE_VAR_SAMP ? varSampAggInfo :
|
||||
varPopAggInfo),
|
||||
false, false, aArg),
|
||||
type(aType),
|
||||
impure2Offset(0)
|
||||
{
|
||||
}
|
||||
|
||||
void StdDevAggNode::aggPostRse(thread_db* tdbb, CompilerScratch* csb)
|
||||
{
|
||||
AggNode::aggPostRse(tdbb, csb);
|
||||
impure2Offset = CMP_impure(csb, sizeof(StdDevImpure));
|
||||
}
|
||||
|
||||
DmlNode* StdDevAggNode::parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp)
|
||||
{
|
||||
StdDevType type;
|
||||
|
||||
switch (blrOp)
|
||||
{
|
||||
case blr_agg_stddev_samp:
|
||||
type = TYPE_STDDEV_SAMP;
|
||||
break;
|
||||
|
||||
case blr_agg_stddev_pop:
|
||||
type = TYPE_STDDEV_POP;
|
||||
break;
|
||||
|
||||
case blr_agg_var_samp:
|
||||
type = TYPE_VAR_SAMP;
|
||||
break;
|
||||
|
||||
case blr_agg_var_pop:
|
||||
type = TYPE_VAR_POP;
|
||||
break;
|
||||
|
||||
default:
|
||||
fb_assert(false);
|
||||
}
|
||||
|
||||
return FB_NEW(pool) StdDevAggNode(pool, type, PAR_parse_value(tdbb, csb));
|
||||
}
|
||||
|
||||
void StdDevAggNode::make(DsqlCompilerScratch* dsqlScratch, dsc* desc)
|
||||
{
|
||||
desc->makeDouble();
|
||||
desc->setNullable(true);
|
||||
}
|
||||
|
||||
void StdDevAggNode::getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc)
|
||||
{
|
||||
desc->makeDouble();
|
||||
}
|
||||
|
||||
ValueExprNode* StdDevAggNode::copy(thread_db* tdbb, NodeCopier& copier) const
|
||||
{
|
||||
StdDevAggNode* node = FB_NEW(*tdbb->getDefaultPool()) StdDevAggNode(*tdbb->getDefaultPool(), type);
|
||||
node->nodScale = nodScale;
|
||||
node->arg = copier.copy(tdbb, arg);
|
||||
return node;
|
||||
}
|
||||
|
||||
void StdDevAggNode::aggInit(thread_db* tdbb, jrd_req* request) const
|
||||
{
|
||||
AggNode::aggInit(tdbb, request);
|
||||
|
||||
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
|
||||
impure->make_double(0);
|
||||
|
||||
StdDevImpure* impure2 = request->getImpure<StdDevImpure>(impure2Offset);
|
||||
impure2->x = impure2->x2 = 0.0;
|
||||
}
|
||||
|
||||
void StdDevAggNode::aggPass(thread_db* tdbb, jrd_req* request, dsc* desc) const
|
||||
{
|
||||
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
|
||||
++impure->vlux_count;
|
||||
|
||||
const double d = MOV_get_double(desc);
|
||||
|
||||
StdDevImpure* impure2 = request->getImpure<StdDevImpure>(impure2Offset);
|
||||
impure2->x += d;
|
||||
impure2->x2 += d * d;
|
||||
}
|
||||
|
||||
dsc* StdDevAggNode::aggExecute(thread_db* tdbb, jrd_req* request) const
|
||||
{
|
||||
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
|
||||
StdDevImpure* impure2 = request->getImpure<StdDevImpure>(impure2Offset);
|
||||
double d;
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case TYPE_STDDEV_SAMP:
|
||||
case TYPE_VAR_SAMP:
|
||||
if (impure->vlux_count < 2)
|
||||
return NULL;
|
||||
|
||||
d = (impure2->x2 - impure2->x * impure2->x / impure->vlux_count) /
|
||||
(impure->vlux_count - 1);
|
||||
|
||||
if (type == TYPE_STDDEV_SAMP)
|
||||
d = sqrt(d);
|
||||
break;
|
||||
|
||||
case TYPE_STDDEV_POP:
|
||||
case TYPE_VAR_POP:
|
||||
if (impure->vlux_count == 0)
|
||||
return NULL;
|
||||
|
||||
d = (impure2->x2 - impure2->x * impure2->x / impure->vlux_count) /
|
||||
impure->vlux_count;
|
||||
|
||||
if (type == TYPE_STDDEV_POP)
|
||||
d = sqrt(d);
|
||||
break;
|
||||
}
|
||||
|
||||
dsc temp;
|
||||
temp.makeDouble(&d);
|
||||
EVL_make_value(tdbb, &temp, impure);
|
||||
|
||||
return &impure->vlu_desc;
|
||||
}
|
||||
|
||||
AggNode* StdDevAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/
|
||||
{
|
||||
return FB_NEW(getPool()) StdDevAggNode(getPool(), type, doDsqlPass(dsqlScratch, arg));
|
||||
}
|
||||
|
||||
|
||||
} // namespace Jrd
|
||||
|
@ -153,6 +153,44 @@ public:
|
||||
const MaxMinType type;
|
||||
};
|
||||
|
||||
class StdDevAggNode : public AggNode
|
||||
{
|
||||
public:
|
||||
enum StdDevType
|
||||
{
|
||||
TYPE_STDDEV_SAMP,
|
||||
TYPE_STDDEV_POP,
|
||||
TYPE_VAR_SAMP,
|
||||
TYPE_VAR_POP
|
||||
};
|
||||
|
||||
struct StdDevImpure
|
||||
{
|
||||
double x, x2;
|
||||
};
|
||||
|
||||
explicit StdDevAggNode(MemoryPool& pool, StdDevType aType, ValueExprNode* aArg = NULL);
|
||||
virtual void aggPostRse(thread_db* tdbb, CompilerScratch* csb);
|
||||
|
||||
static DmlNode* parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp);
|
||||
|
||||
virtual void make(DsqlCompilerScratch* dsqlScratch, dsc* desc);
|
||||
virtual void getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc);
|
||||
virtual ValueExprNode* copy(thread_db* tdbb, NodeCopier& copier) const;
|
||||
|
||||
virtual void aggInit(thread_db* tdbb, jrd_req* request) const;
|
||||
virtual void aggPass(thread_db* tdbb, jrd_req* request, dsc* desc) const;
|
||||
virtual dsc* aggExecute(thread_db* tdbb, jrd_req* request) const;
|
||||
|
||||
protected:
|
||||
virtual AggNode* dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/;
|
||||
|
||||
public:
|
||||
const StdDevType type;
|
||||
|
||||
private:
|
||||
ULONG impure2Offset;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
|
@ -572,6 +572,10 @@ using namespace Firebird;
|
||||
%token <metaNamePtr> TRUSTED
|
||||
%token <metaNamePtr> ROW
|
||||
%token <metaNamePtr> OFFSET
|
||||
%token <metaNamePtr> STDDEV_SAMP
|
||||
%token <metaNamePtr> STDDEV_POP
|
||||
%token <metaNamePtr> VAR_SAMP
|
||||
%token <metaNamePtr> VAR_POP
|
||||
|
||||
// precedence declarations for expression evaluation
|
||||
|
||||
@ -3819,8 +3823,12 @@ keyword_or_column
|
||||
| ROW
|
||||
| SCROLL
|
||||
| SQLSTATE
|
||||
| STDDEV_SAMP
|
||||
| STDDEV_POP
|
||||
| KW_TRUE
|
||||
| UNKNOWN
|
||||
| VAR_SAMP
|
||||
| VAR_POP
|
||||
;
|
||||
|
||||
col_opt
|
||||
@ -6789,6 +6797,14 @@ aggregate_function
|
||||
{ $$ = newNode<ListAggNode>(false, $4, $5); }
|
||||
| LIST '(' DISTINCT value delimiter_opt ')'
|
||||
{ $$ = newNode<ListAggNode>(true, $4, $5); }
|
||||
| STDDEV_SAMP '(' value ')'
|
||||
{ $$ = newNode<StdDevAggNode>(StdDevAggNode::TYPE_STDDEV_SAMP, $3); }
|
||||
| STDDEV_POP '(' value ')'
|
||||
{ $$ = newNode<StdDevAggNode>(StdDevAggNode::TYPE_STDDEV_POP, $3); }
|
||||
| VAR_SAMP '(' value ')'
|
||||
{ $$ = newNode<StdDevAggNode>(StdDevAggNode::TYPE_VAR_SAMP, $3); }
|
||||
| VAR_POP '(' value ')'
|
||||
{ $$ = newNode<StdDevAggNode>(StdDevAggNode::TYPE_VAR_POP, $3); }
|
||||
;
|
||||
|
||||
%type <aggNode> window_function
|
||||
|
@ -241,5 +241,9 @@ static const struct
|
||||
{"subfunc", function},
|
||||
{"record_version2", byte_line},
|
||||
{"gen_id2", gen_id2}, // 210
|
||||
{"agg_stddev_samp", one},
|
||||
{"agg_stddev_pop", one},
|
||||
{"agg_var_samp", one},
|
||||
{"agg_var_pop", one},
|
||||
{0, 0}
|
||||
};
|
||||
|
@ -407,4 +407,9 @@
|
||||
#define blr_record_version2 (unsigned char) 209
|
||||
#define blr_gen_id2 (unsigned char) 210 // NEXT VALUE FOR generator
|
||||
|
||||
#define blr_agg_stddev_samp (unsigned char) 211
|
||||
#define blr_agg_stddev_pop (unsigned char) 212
|
||||
#define blr_agg_var_samp (unsigned char) 213
|
||||
#define blr_agg_var_pop (unsigned char) 214
|
||||
|
||||
#endif // JRD_BLR_H
|
||||
|
@ -93,6 +93,7 @@ struct impure_value
|
||||
|
||||
void make_long(const SLONG val, const signed char scale = 0);
|
||||
void make_int64(const SINT64 val, const signed char scale = 0);
|
||||
void make_double(const double val);
|
||||
};
|
||||
|
||||
// Do not use these methods where dsc_sub_type is not explicitly set to zero.
|
||||
@ -116,12 +117,23 @@ inline void impure_value::make_int64(const SINT64 val, const signed char scale)
|
||||
this->vlu_desc.dsc_address = reinterpret_cast<UCHAR*>(&this->vlu_misc.vlu_int64);
|
||||
}
|
||||
|
||||
inline void impure_value::make_double(const double val)
|
||||
{
|
||||
this->vlu_misc.vlu_double = val;
|
||||
this->vlu_desc.dsc_dtype = DEFAULT_DOUBLE;
|
||||
this->vlu_desc.dsc_length = sizeof(double);
|
||||
this->vlu_desc.dsc_scale = 0;
|
||||
this->vlu_desc.dsc_sub_type = 0;
|
||||
this->vlu_desc.dsc_address = reinterpret_cast<UCHAR*>(&this->vlu_misc.vlu_double);
|
||||
}
|
||||
|
||||
struct impure_value_ex : public impure_value
|
||||
{
|
||||
SINT64 vlux_count;
|
||||
blb* vlu_blob;
|
||||
};
|
||||
|
||||
|
||||
const int VLU_computed = 1; // An invariant sub-query has been computed
|
||||
const int VLU_null = 2; // An invariant sub-query computed to null
|
||||
const int VLU_checked = 4; // Constraint already checked in first read or assignment to argument/variable
|
||||
|
@ -389,6 +389,8 @@ static const TOK tokens[] =
|
||||
{STARTING, "STARTS", 1, false}, // Alias of STARTING
|
||||
{STATEMENT, "STATEMENT", 2, true},
|
||||
{STATISTICS, "STATISTICS", 1, false},
|
||||
{STDDEV_POP, "STDDEV_POP", 2, false},
|
||||
{STDDEV_SAMP, "STDDEV_SAMP", 2, false},
|
||||
{SUBSTRING, "SUBSTRING", 2, true},
|
||||
{SUB_TYPE, "SUB_TYPE", 1, false},
|
||||
{SUM, "SUM", 1, false},
|
||||
@ -426,6 +428,8 @@ static const TOK tokens[] =
|
||||
{UUID_TO_CHAR, "UUID_TO_CHAR", 2, false},
|
||||
{KW_VALUE, "VALUE", 1, false},
|
||||
{VALUES, "VALUES", 1, false},
|
||||
{VAR_POP, "VAR_POP", 2, false},
|
||||
{VAR_SAMP, "VAR_SAMP", 2, false},
|
||||
{VARCHAR, "VARCHAR", 1, false},
|
||||
{VARIABLE, "VARIABLE", 1, false},
|
||||
{VARYING, "VARYING", 1, false},
|
||||
|
Loading…
Reference in New Issue
Block a user