123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219 |
- /*
- ** 2013-05-28
- **
- ** The author disclaims copyright to this source code. In place of
- ** a legal notice, here is a blessing:
- **
- ** May you do good and not evil.
- ** May you find forgiveness for yourself and forgive others.
- ** May you share freely, never taking more than you give.
- **
- ******************************************************************************
- **
- ** This file contains code to implement the percentile(Y,P) SQL function
- ** as described below:
- **
- ** (1) The percentile(Y,P) function is an aggregate function taking
- ** exactly two arguments.
- **
- ** (2) If the P argument to percentile(Y,P) is not the same for every
- ** row in the aggregate then an error is thrown. The word "same"
- ** in the previous sentence means that the value differ by less
- ** than 0.001.
- **
- ** (3) If the P argument to percentile(Y,P) evaluates to anything other
- ** than a number in the range of 0.0 to 100.0 inclusive then an
- ** error is thrown.
- **
- ** (4) If any Y argument to percentile(Y,P) evaluates to a value that
- ** is not NULL and is not numeric then an error is thrown.
- **
- ** (5) If any Y argument to percentile(Y,P) evaluates to plus or minus
- ** infinity then an error is thrown. (SQLite always interprets NaN
- ** values as NULL.)
- **
- ** (6) Both Y and P in percentile(Y,P) can be arbitrary expressions,
- ** including CASE WHEN expressions.
- **
- ** (7) The percentile(Y,P) aggregate is able to handle inputs of at least
- ** one million (1,000,000) rows.
- **
- ** (8) If there are no non-NULL values for Y, then percentile(Y,P)
- ** returns NULL.
- **
- ** (9) If there is exactly one non-NULL value for Y, the percentile(Y,P)
- ** returns the one Y value.
- **
- ** (10) If there N non-NULL values of Y where N is two or more and
- ** the Y values are ordered from least to greatest and a graph is
- ** drawn from 0 to N-1 such that the height of the graph at J is
- ** the J-th Y value and such that straight lines are drawn between
- ** adjacent Y values, then the percentile(Y,P) function returns
- ** the height of the graph at P*(N-1)/100.
- **
- ** (11) The percentile(Y,P) function always returns either a floating
- ** point number or NULL.
- **
- ** (12) The percentile(Y,P) is implemented as a single C99 source-code
- ** file that compiles into a shared-library or DLL that can be loaded
- ** into SQLite using the sqlite3_load_extension() interface.
- */
- #include "sqlite3ext.h"
- SQLITE_EXTENSION_INIT1
- #include <assert.h>
- #include <string.h>
- #include <stdlib.h>
- /* The following object is the session context for a single percentile()
- ** function. We have to remember all input Y values until the very end.
- ** Those values are accumulated in the Percentile.a[] array.
- */
- typedef struct Percentile Percentile;
- struct Percentile {
- unsigned nAlloc; /* Number of slots allocated for a[] */
- unsigned nUsed; /* Number of slots actually used in a[] */
- double rPct; /* 1.0 more than the value for P */
- double *a; /* Array of Y values */
- };
- /*
- ** Return TRUE if the input floating-point number is an infinity.
- */
- static int isInfinity(double r){
- sqlite3_uint64 u;
- assert( sizeof(u)==sizeof(r) );
- memcpy(&u, &r, sizeof(u));
- return ((u>>52)&0x7ff)==0x7ff;
- }
- /*
- ** Return TRUE if two doubles differ by 0.001 or less
- */
- static int sameValue(double a, double b){
- a -= b;
- return a>=-0.001 && a<=0.001;
- }
- /*
- ** The "step" function for percentile(Y,P) is called once for each
- ** input row.
- */
- static void percentStep(sqlite3_context *pCtx, int argc, sqlite3_value **argv){
- Percentile *p;
- double rPct;
- int eType;
- double y;
- assert( argc==2 );
- /* Requirement 3: P must be a number between 0 and 100 */
- eType = sqlite3_value_numeric_type(argv[1]);
- rPct = sqlite3_value_double(argv[1]);
- if( (eType!=SQLITE_INTEGER && eType!=SQLITE_FLOAT) ||
- ((rPct = sqlite3_value_double(argv[1]))<0.0 || rPct>100.0) ){
- sqlite3_result_error(pCtx, "2nd argument to percentile() is not "
- "a number between 0.0 and 100.0", -1);
- return;
- }
- /* Allocate the session context. */
- p = (Percentile*)sqlite3_aggregate_context(pCtx, sizeof(*p));
- if( p==0 ) return;
- /* Remember the P value. Throw an error if the P value is different
- ** from any prior row, per Requirement (2). */
- if( p->rPct==0.0 ){
- p->rPct = rPct+1.0;
- }else if( !sameValue(p->rPct,rPct+1.0) ){
- sqlite3_result_error(pCtx, "2nd argument to percentile() is not the "
- "same for all input rows", -1);
- return;
- }
- /* Ignore rows for which Y is NULL */
- eType = sqlite3_value_type(argv[0]);
- if( eType==SQLITE_NULL ) return;
- /* If not NULL, then Y must be numeric. Otherwise throw an error.
- ** Requirement 4 */
- if( eType!=SQLITE_INTEGER && eType!=SQLITE_FLOAT ){
- sqlite3_result_error(pCtx, "1st argument to percentile() is not "
- "numeric", -1);
- return;
- }
- /* Throw an error if the Y value is infinity or NaN */
- y = sqlite3_value_double(argv[0]);
- if( isInfinity(y) ){
- sqlite3_result_error(pCtx, "Inf input to percentile()", -1);
- return;
- }
- /* Allocate and store the Y */
- if( p->nUsed>=p->nAlloc ){
- unsigned n = p->nAlloc*2 + 250;
- double *a = sqlite3_realloc(p->a, sizeof(double)*n);
- if( a==0 ){
- sqlite3_free(p->a);
- memset(p, 0, sizeof(*p));
- sqlite3_result_error_nomem(pCtx);
- return;
- }
- p->nAlloc = n;
- p->a = a;
- }
- p->a[p->nUsed++] = y;
- }
- /*
- ** Compare to doubles for sorting using qsort()
- */
- static int doubleCmp(const void *pA, const void *pB){
- double a = *(double*)pA;
- double b = *(double*)pB;
- if( a==b ) return 0;
- if( a<b ) return -1;
- return +1;
- }
- /*
- ** Called to compute the final output of percentile() and to clean
- ** up all allocated memory.
- */
- static void percentFinal(sqlite3_context *pCtx){
- Percentile *p;
- unsigned i1, i2;
- double v1, v2;
- double ix, vx;
- p = (Percentile*)sqlite3_aggregate_context(pCtx, 0);
- if( p==0 ) return;
- if( p->a==0 ) return;
- if( p->nUsed ){
- qsort(p->a, p->nUsed, sizeof(double), doubleCmp);
- ix = (p->rPct-1.0)*(p->nUsed-1)*0.01;
- i1 = (unsigned)ix;
- i2 = ix==(double)i1 || i1==p->nUsed-1 ? i1 : i1+1;
- v1 = p->a[i1];
- v2 = p->a[i2];
- vx = v1 + (v2-v1)*(ix-i1);
- sqlite3_result_double(pCtx, vx);
- }
- sqlite3_free(p->a);
- memset(p, 0, sizeof(*p));
- }
- #ifdef _WIN32
- __declspec(dllexport)
- #endif
- int sqlite3_percentile_init(
- sqlite3 *db,
- char **pzErrMsg,
- const sqlite3_api_routines *pApi
- ){
- int rc = SQLITE_OK;
- SQLITE_EXTENSION_INIT2(pApi);
- (void)pzErrMsg; /* Unused parameter */
- rc = sqlite3_create_function(db, "percentile", 2, SQLITE_UTF8, 0,
- 0, percentStep, percentFinal);
- return rc;
- }
|