Browse Source

SQL: Introduce MAD (MedianAbsoluteDeviation) aggregation (#40048)

Add Median Absolute Deviation aggregation

Fix #39597
Costin Leau 6 years ago
parent
commit
4f09613942

+ 24 - 0
docs/reference/sql/functions/aggs.asciidoc

@@ -407,6 +407,30 @@ https://en.wikipedia.org/wiki/Kurtosis[Quantify] the shape of the distribution o
 include-tagged::{sql-specs}/docs.csv-spec[aggKurtosis]
 --------------------------------------------------
 
+[[sql-functions-aggs-mad]]
+===== `MAD`
+
+.Synopsis:
+[source, sql]
+--------------------------------------------------
+MAD(field_name<1>)
+--------------------------------------------------
+
+*Input*:
+
+<1> a numeric field
+
+*Output*: `double` numeric value
+
+.Description:
+
+https://en.wikipedia.org/wiki/Median_absolute_deviation[Measure] the variability of the input values in the field `field_name`.
+
+["source","sql",subs="attributes,macros"]
+--------------------------------------------------
+include-tagged::{sql-specs}/docs.csv-spec[aggMad]
+--------------------------------------------------
+
 [[sql-functions-aggs-percentile]]
 ===== `PERCENTILE`
 

+ 44 - 0
x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec

@@ -469,3 +469,47 @@ null           |   1           |   10
 F              |   4           |   6
 M              |   1           |   4
 ;
+
+medianAbsoluteDeviation
+schema::gender:s|mad:d
+SELECT gender, MAD(salary) AS mad FROM test_emp GROUP BY gender ORDER BY gender;
+
+    gender     |      mad      
+---------------+---------------
+null           |10789.0        
+F              |12719.0        
+M              |8905.0         
+;
+
+medianAbsoluteDeviationOnTwoFields
+schema::gender:s|avg:l|mad_s:l|mad_l:d
+SELECT gender, FLOOR(AVG(salary)) AS avg, FLOOR(MAD(salary)) AS mad_s, MAD(languages) AS mad_l FROM test_emp GROUP BY gender ORDER BY gender;
+
+    gender     |      avg      |     mad_s     |     mad_l     
+---------------+---------------+---------------+---------------
+null           |48760          |10789          |2.0            
+F              |50490          |12719          |1.5            
+M              |46860          |8905           |1.0     
+;
+
+medianAbsoluteDeviationOnSecondaryFieldWithOrder
+schema::gender:s|mad:d
+SELECT gender, MAD(salary) AS mad FROM test_emp GROUP BY gender ORDER BY mad ASC;
+
+    gender     |      mad      
+---------------+---------------
+M              |8905.0         
+null           |10789.0        
+F              |12719.0         
+;
+
+
+medianAbsoluteDeviationOnSecondaryFieldWithOrderAndHaving
+schema::gender:s|mad:d
+SELECT gender, MAD(salary) AS mad FROM test_emp GROUP BY gender HAVING mad > 10000 ORDER BY mad ASC;
+
+    gender     |      mad      
+---------------+---------------
+null           |10789.0        
+F              |12719.0         
+;

+ 2 - 1
x-pack/plugin/sql/qa/src/main/resources/command.csv-spec

@@ -16,7 +16,8 @@ LAST_VALUE       |AGGREGATE
 MAX              |AGGREGATE
 MIN              |AGGREGATE      
 SUM              |AGGREGATE      
-KURTOSIS         |AGGREGATE      
+KURTOSIS         |AGGREGATE
+MAD              |AGGREGATE      
 PERCENTILE       |AGGREGATE      
 PERCENTILE_RANK  |AGGREGATE      
 SKEWNESS         |AGGREGATE      

+ 12 - 1
x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec

@@ -193,7 +193,8 @@ LAST_VALUE       |AGGREGATE
 MAX              |AGGREGATE      
 MIN              |AGGREGATE      
 SUM              |AGGREGATE      
-KURTOSIS         |AGGREGATE      
+KURTOSIS         |AGGREGATE
+MAD              |AGGREGATE      
 PERCENTILE       |AGGREGATE      
 PERCENTILE_RANK  |AGGREGATE      
 SKEWNESS         |AGGREGATE      
@@ -1183,6 +1184,16 @@ SELECT MIN(salary) AS min, MAX(salary) AS max, KURTOSIS(salary) AS k FROM emp;
 // end::aggKurtosis
 ;
 
+aggMad
+// tag::aggMad
+SELECT MIN(salary) AS min, MAX(salary) AS max, AVG(salary) AS avg, MAD(salary) AS mad FROM emp;
+
+      min      |      max      |      avg      |      mad      
+---------------+---------------+---------------+---------------
+25324          |74999          |48248.55       |10096.5   
+// end::aggMad
+;
+
 aggPercentile
 // tag::aggPercentile
 SELECT languages, PERCENTILE(salary, 95) AS "95th" FROM emp 

+ 8 - 4
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/FunctionRegistry.java

@@ -14,6 +14,7 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.First;
 import org.elasticsearch.xpack.sql.expression.function.aggregate.Kurtosis;
 import org.elasticsearch.xpack.sql.expression.function.aggregate.Last;
 import org.elasticsearch.xpack.sql.expression.function.aggregate.Max;
+import org.elasticsearch.xpack.sql.expression.function.aggregate.MedianAbsoluteDeviation;
 import org.elasticsearch.xpack.sql.expression.function.aggregate.Min;
 import org.elasticsearch.xpack.sql.expression.function.aggregate.Percentile;
 import org.elasticsearch.xpack.sql.expression.function.aggregate.PercentileRank;
@@ -153,13 +154,16 @@ public class FunctionRegistry {
                 def(Min.class, Min::new, "MIN"),
                 def(Sum.class, Sum::new, "SUM"));
         // Statistics
-        addToMap(def(StddevPop.class, StddevPop::new, "STDDEV_POP"),
-                def(VarPop.class, VarPop::new,"VAR_POP"),
+        addToMap(
+                def(Kurtosis.class, Kurtosis::new, "KURTOSIS"),
+                def(MedianAbsoluteDeviation.class, MedianAbsoluteDeviation::new, "MAD"),
                 def(Percentile.class, Percentile::new, "PERCENTILE"),
                 def(PercentileRank.class, PercentileRank::new, "PERCENTILE_RANK"),
-                def(SumOfSquares.class, SumOfSquares::new, "SUM_OF_SQUARES"),
                 def(Skewness.class, Skewness::new, "SKEWNESS"),
-                def(Kurtosis.class, Kurtosis::new, "KURTOSIS"));
+                def(StddevPop.class, StddevPop::new, "STDDEV_POP"),
+                def(SumOfSquares.class, SumOfSquares::new, "SUM_OF_SQUARES"),
+                def(VarPop.class, VarPop::new,"VAR_POP")
+                );
         // histogram
         addToMap(def(Histogram.class, Histogram::new, "HISTOGRAM"));
         // Scalar functions

+ 39 - 0
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/MedianAbsoluteDeviation.java

@@ -0,0 +1,39 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+package org.elasticsearch.xpack.sql.expression.function.aggregate;
+
+import org.elasticsearch.xpack.sql.expression.Expression;
+import org.elasticsearch.xpack.sql.tree.NodeInfo;
+import org.elasticsearch.xpack.sql.tree.Source;
+import org.elasticsearch.xpack.sql.type.DataType;
+
+import java.util.List;
+
+public class MedianAbsoluteDeviation extends NumericAggregate {
+
+    public MedianAbsoluteDeviation(Source source, Expression field) {
+        super(source, field);
+    }
+
+    @Override
+    public DataType dataType() {
+        return DataType.DOUBLE;
+    }
+
+    @Override
+    public Expression replaceChildren(List<Expression> newChildren) {
+        if (newChildren.size() != 1) {
+            throw new IllegalArgumentException("expected [1] child but received [" + newChildren.size() + "]");
+        }
+        return new MedianAbsoluteDeviation(source(), newChildren.get(0));
+    }
+
+    @Override
+    protected NodeInfo<? extends Expression> info() {
+        return NodeInfo.create(this, MedianAbsoluteDeviation::new, field());
+    }
+}

+ 11 - 1
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryTranslator.java

@@ -24,6 +24,7 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.Count;
 import org.elasticsearch.xpack.sql.expression.function.aggregate.ExtendedStats;
 import org.elasticsearch.xpack.sql.expression.function.aggregate.First;
 import org.elasticsearch.xpack.sql.expression.function.aggregate.Last;
+import org.elasticsearch.xpack.sql.expression.function.aggregate.MedianAbsoluteDeviation;
 import org.elasticsearch.xpack.sql.expression.function.aggregate.MatrixStats;
 import org.elasticsearch.xpack.sql.expression.function.aggregate.Max;
 import org.elasticsearch.xpack.sql.expression.function.aggregate.Min;
@@ -74,6 +75,7 @@ import org.elasticsearch.xpack.sql.querydsl.agg.GroupByValue;
 import org.elasticsearch.xpack.sql.querydsl.agg.LeafAgg;
 import org.elasticsearch.xpack.sql.querydsl.agg.MatrixStatsAgg;
 import org.elasticsearch.xpack.sql.querydsl.agg.MaxAgg;
+import org.elasticsearch.xpack.sql.querydsl.agg.MedianAbsoluteDeviationAgg;
 import org.elasticsearch.xpack.sql.querydsl.agg.MinAgg;
 import org.elasticsearch.xpack.sql.querydsl.agg.OrAggFilter;
 import org.elasticsearch.xpack.sql.querydsl.agg.PercentileRanksAgg;
@@ -144,7 +146,8 @@ final class QueryTranslator {
             new CountAggs(),
             new DateTimes(),
             new Firsts(),
-            new Lasts()
+            new Lasts(), 
+            new MADs()
             );
 
     static class QueryTranslation {
@@ -833,6 +836,13 @@ final class QueryTranslator {
         }
     }
 
+    static class MADs extends SingleValueAggTranslator<MedianAbsoluteDeviation> {
+        @Override
+        protected LeafAgg toAgg(String id, MedianAbsoluteDeviation m) {
+            return new MedianAbsoluteDeviationAgg(id, field(m));
+        }
+    }
+
     static class Firsts extends TopHitsAggTranslator<First> {
 
         @Override

+ 23 - 0
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/agg/MedianAbsoluteDeviationAgg.java

@@ -0,0 +1,23 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+package org.elasticsearch.xpack.sql.querydsl.agg;
+
+import org.elasticsearch.search.aggregations.AggregationBuilder;
+
+import static org.elasticsearch.search.aggregations.AggregationBuilders.medianAbsoluteDeviation;
+
+public class MedianAbsoluteDeviationAgg extends LeafAgg {
+
+    public MedianAbsoluteDeviationAgg(String id, String fieldName) {
+        super(id, fieldName);
+    }
+
+    @Override
+    AggregationBuilder toBuilder() {
+        return medianAbsoluteDeviation(id()).field(fieldName());
+    }
+}