Browse Source

ESQL: SpatialCentroid aggregation tests and docs (#111236)

Iván Cea Fontenla 1 year ago
parent
commit
595d907f61

+ 2 - 2
docs/reference/esql/functions/aggregation-functions.asciidoc

@@ -16,7 +16,7 @@ The <<esql-stats-by>> command supports these aggregate functions:
 * <<esql-agg-median-absolute-deviation>>
 * <<esql-min>>
 * <<esql-percentile>>
-* experimental:[] <<esql-agg-st-centroid>>
+* experimental:[] <<esql-st_centroid_agg>>
 * <<esql-sum>>
 * <<esql-top>>
 * <<esql-agg-values>>
@@ -27,11 +27,11 @@ include::count.asciidoc[]
 include::count-distinct.asciidoc[]
 include::median.asciidoc[]
 include::median-absolute-deviation.asciidoc[]
-include::st_centroid_agg.asciidoc[]
 include::layout/avg.asciidoc[]
 include::layout/max.asciidoc[]
 include::layout/min.asciidoc[]
 include::layout/percentile.asciidoc[]
+include::layout/st_centroid_agg.asciidoc[]
 include::layout/sum.asciidoc[]
 include::layout/top.asciidoc[]
 include::values.asciidoc[]

+ 5 - 0
docs/reference/esql/functions/description/st_centroid_agg.asciidoc

@@ -0,0 +1,5 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Description*
+
+Calculate the spatial centroid over a field with spatial point geometry type.

+ 13 - 0
docs/reference/esql/functions/examples/st_centroid_agg.asciidoc

@@ -0,0 +1,13 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Example*
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/spatial.csv-spec[tag=st_centroid_agg-airports]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/spatial.csv-spec[tag=st_centroid_agg-airports-result]
+|===
+

+ 35 - 0
docs/reference/esql/functions/kibana/definition/st_centroid_agg.json

@@ -0,0 +1,35 @@
+{
+  "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
+  "type" : "agg",
+  "name" : "st_centroid_agg",
+  "description" : "Calculate the spatial centroid over a field with spatial point geometry type.",
+  "signatures" : [
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "cartesian_point",
+          "optional" : false,
+          "description" : ""
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "cartesian_point"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "geo_point",
+          "optional" : false,
+          "description" : ""
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "geo_point"
+    }
+  ],
+  "examples" : [
+    "FROM airports\n| STATS centroid=ST_CENTROID_AGG(location)"
+  ]
+}

+ 11 - 0
docs/reference/esql/functions/kibana/docs/st_centroid_agg.md

@@ -0,0 +1,11 @@
+<!--
+This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+-->
+
+### ST_CENTROID_AGG
+Calculate the spatial centroid over a field with spatial point geometry type.
+
+```
+FROM airports
+| STATS centroid=ST_CENTROID_AGG(location)
+```

+ 15 - 0
docs/reference/esql/functions/layout/st_centroid_agg.asciidoc

@@ -0,0 +1,15 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+[discrete]
+[[esql-st_centroid_agg]]
+=== `ST_CENTROID_AGG`
+
+*Syntax*
+
+[.text-center]
+image::esql/functions/signature/st_centroid_agg.svg[Embedded,opts=inline]
+
+include::../parameters/st_centroid_agg.asciidoc[]
+include::../description/st_centroid_agg.asciidoc[]
+include::../types/st_centroid_agg.asciidoc[]
+include::../examples/st_centroid_agg.asciidoc[]

+ 6 - 0
docs/reference/esql/functions/parameters/st_centroid_agg.asciidoc

@@ -0,0 +1,6 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Parameters*
+
+`field`::
+

+ 1 - 0
docs/reference/esql/functions/signature/st_centroid_agg.svg

@@ -0,0 +1 @@
+<svg version="1.1" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" width="384" height="46" viewbox="0 0 384 46"><defs><style type="text/css">#guide .c{fill:none;stroke:#222222;}#guide .k{fill:#000000;font-family:Roboto Mono,Sans-serif;font-size:20px;}#guide .s{fill:#e4f4ff;stroke:#222222;}#guide .syn{fill:#8D8D8D;font-family:Roboto Mono,Sans-serif;font-size:20px;}</style></defs><path class="c" d="M0 31h5m200 0h10m32 0h10m80 0h10m32 0h5"/><rect class="s" x="5" y="5" width="200" height="36"/><text class="k" x="15" y="31">ST_CENTROID_AGG</text><rect class="s" x="215" y="5" width="32" height="36" rx="7"/><text class="syn" x="225" y="31">(</text><rect class="s" x="257" y="5" width="80" height="36" rx="7"/><text class="k" x="267" y="31">field</text><rect class="s" x="347" y="5" width="32" height="36" rx="7"/><text class="syn" x="357" y="31">)</text></svg>

+ 10 - 0
docs/reference/esql/functions/types/st_centroid_agg.asciidoc

@@ -0,0 +1,10 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Supported types*
+
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+field | result
+cartesian_point | cartesian_point
+geo_point | geo_point
+|===

+ 1 - 1
x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec

@@ -314,7 +314,7 @@ sin           |Returns ths {wikipedia}/Sine_and_cosine[Sine] trigonometric funct
 sinh          |Returns the {wikipedia}/Hyperbolic_functions[hyperbolic sine] of an angle.
 split         |Split a single valued string into multiple strings.
 sqrt          |Returns the square root of a number. The input can be any numeric value, the return value is always a double. Square roots of negative numbers and infinities are null.
-st_centroid_ag|The centroid of a spatial field.
+st_centroid_ag|Calculate the spatial centroid over a field with spatial point geometry type.
 st_contains   |Returns whether the first geometry contains the second geometry. This is the inverse of the <<esql-st_within,ST_WITHIN>> function.
 st_disjoint   |Returns whether the two geometries or geometry columns are disjoint. This is the inverse of the <<esql-st_intersects,ST_INTERSECTS>> function. In mathematical terms: ST_Disjoint(A, B) ⇔ A ⋂ B = ∅
 st_distance   |Computes the distance between two points. For cartesian geometries, this is the pythagorean distance in the same units as the original coordinates. For geographic geometries, this is the circular distance along the great circle in meters.

+ 7 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroid.java

@@ -18,6 +18,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression;
 import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
 import org.elasticsearch.xpack.esql.core.tree.Source;
 import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.expression.function.Example;
 import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
 import org.elasticsearch.xpack.esql.expression.function.Param;
 import org.elasticsearch.xpack.esql.planner.ToAggregator;
@@ -38,7 +39,12 @@ public class SpatialCentroid extends SpatialAggregateFunction implements ToAggre
         SpatialCentroid::new
     );
 
-    @FunctionInfo(returnType = { "geo_point", "cartesian_point" }, description = "The centroid of a spatial field.", isAggregation = true)
+    @FunctionInfo(
+        returnType = { "geo_point", "cartesian_point" },
+        description = "Calculate the spatial centroid over a field with spatial point geometry type.",
+        isAggregation = true,
+        examples = @Example(file = "spatial", tag = "st_centroid_agg-airports")
+    )
     public SpatialCentroid(Source source, @Param(name = "field", type = { "geo_point", "cartesian_point" }) Expression field) {
         super(source, field, false);
     }

+ 1 - 1
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/FunctionName.java

@@ -13,7 +13,7 @@ import java.lang.annotation.RetentionPolicy;
 import java.lang.annotation.Target;
 
 /**
- * Tests that extend {@link AbstractScalarFunctionTestCase} can use this annotation to specify the name of the function
+ * Tests that extend {@link AbstractFunctionTestCase} can use this annotation to specify the name of the function
  * to use when generating documentation files while running tests.
  * If this is not used, the name will be deduced from the test class name, by removing the "Test" suffix, and converting
  * the class name to snake case. This annotation can be used to override that behavior, for cases where the deduced name

+ 60 - 0
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/MultiRowTestCaseSupplier.java

@@ -10,6 +10,8 @@ package org.elasticsearch.xpack.esql.expression.function;
 import org.apache.lucene.document.InetAddressPoint;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.network.InetAddresses;
+import org.elasticsearch.geo.GeometryTestUtils;
+import org.elasticsearch.geo.ShapeTestUtils;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.xpack.esql.core.type.DataType;
 
@@ -18,6 +20,8 @@ import java.util.List;
 
 import static org.elasticsearch.test.ESTestCase.randomBoolean;
 import static org.elasticsearch.test.ESTestCase.randomList;
+import static org.elasticsearch.xpack.esql.core.util.SpatialCoordinateTypes.CARTESIAN;
+import static org.elasticsearch.xpack.esql.core.util.SpatialCoordinateTypes.GEO;
 import static org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier.TypedDataSupplier;
 
 /**
@@ -321,4 +325,60 @@ public final class MultiRowTestCaseSupplier {
             )
         );
     }
+
+    public static List<TypedDataSupplier> geoPointCases(int minRows, int maxRows, boolean withAltitude) {
+        List<TypedDataSupplier> cases = new ArrayList<>();
+
+        cases.add(
+            new TypedDataSupplier(
+                "<no alt geo_points>",
+                () -> randomList(minRows, maxRows, () -> GEO.asWkb(GeometryTestUtils.randomPoint(false))),
+                DataType.GEO_POINT,
+                false,
+                true
+            )
+        );
+
+        if (withAltitude) {
+            cases.add(
+                new TypedDataSupplier(
+                    "<with alt geo_points>",
+                    () -> randomList(minRows, maxRows, () -> GEO.asWkb(GeometryTestUtils.randomPoint(true))),
+                    DataType.GEO_POINT,
+                    false,
+                    true
+                )
+            );
+        }
+
+        return cases;
+    }
+
+    public static List<TypedDataSupplier> cartesianPointCases(int minRows, int maxRows, boolean withAltitude) {
+        List<TypedDataSupplier> cases = new ArrayList<>();
+
+        cases.add(
+            new TypedDataSupplier(
+                "<no alt cartesian_points>",
+                () -> randomList(minRows, maxRows, () -> CARTESIAN.asWkb(ShapeTestUtils.randomPoint(false))),
+                DataType.CARTESIAN_POINT,
+                false,
+                true
+            )
+        );
+
+        if (withAltitude) {
+            cases.add(
+                new TypedDataSupplier(
+                    "<with alt cartesian_points>",
+                    () -> randomList(minRows, maxRows, () -> CARTESIAN.asWkb(ShapeTestUtils.randomPoint(true))),
+                    DataType.CARTESIAN_POINT,
+                    false,
+                    true
+                )
+            );
+        }
+
+        return cases;
+    }
 }

+ 12 - 0
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java

@@ -1076,12 +1076,24 @@ public record TestCaseSupplier(String name, List<DataType> types, Supplier<TestC
         return cartesianShapeCases(ESTestCase::randomBoolean);
     }
 
+    /**
+     * Generate cases for {@link DataType#GEO_POINT}.
+     * <p>
+     *     For multi-row parameters, see {@link MultiRowTestCaseSupplier#geoPointCases}.
+     * </p>
+     */
     public static List<TypedDataSupplier> geoPointCases(Supplier<Boolean> hasAlt) {
         return List.of(
             new TypedDataSupplier("<geo_point>", () -> GEO.asWkb(GeometryTestUtils.randomPoint(hasAlt.get())), DataType.GEO_POINT)
         );
     }
 
+    /**
+     * Generate cases for {@link DataType#CARTESIAN_POINT}.
+     * <p>
+     *     For multi-row parameters, see {@link MultiRowTestCaseSupplier#cartesianPointCases}.
+     * </p>
+     */
     public static List<TypedDataSupplier> cartesianPointCases(Supplier<Boolean> hasAlt) {
         return List.of(
             new TypedDataSupplier(

+ 89 - 0
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialCentroidTests.java

@@ -0,0 +1,89 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.expression.function.aggregate;
+
+import com.carrotsearch.randomizedtesting.annotations.Name;
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.geometry.Point;
+import org.elasticsearch.geometry.utils.GeometryValidator;
+import org.elasticsearch.geometry.utils.WellKnownBinary;
+import org.elasticsearch.search.aggregations.metrics.CompensatedSum;
+import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.tree.Source;
+import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.expression.function.AbstractAggregationTestCase;
+import org.elasticsearch.xpack.esql.expression.function.FunctionName;
+import org.elasticsearch.xpack.esql.expression.function.MultiRowTestCaseSupplier;
+import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
+
+import java.nio.ByteOrder;
+import java.util.List;
+import java.util.function.Supplier;
+import java.util.stream.Stream;
+
+import static org.hamcrest.Matchers.equalTo;
+
+@FunctionName("st_centroid_agg")
+public class SpatialCentroidTests extends AbstractAggregationTestCase {
+    public SpatialCentroidTests(@Name("TestCase") Supplier<TestCaseSupplier.TestCase> testCaseSupplier) {
+        this.testCase = testCaseSupplier.get();
+    }
+
+    @ParametersFactory
+    public static Iterable<Object[]> parameters() {
+        var suppliers = Stream.of(
+            MultiRowTestCaseSupplier.geoPointCases(1, 1000, true),
+            MultiRowTestCaseSupplier.cartesianPointCases(1, 1000, true)
+        ).flatMap(List::stream).map(SpatialCentroidTests::makeSupplier).toList();
+
+        // The withNoRowsExpectingNull() cases don't work here, as this aggregator doesn't return nulls.
+        // return parameterSuppliersFromTypedDataWithDefaultChecks(suppliers);
+        return parameterSuppliersFromTypedData(randomizeBytesRefsOffset(suppliers));
+    }
+
+    @Override
+    protected Expression build(Source source, List<Expression> args) {
+        return new SpatialCentroid(source, args.get(0));
+    }
+
+    private static TestCaseSupplier makeSupplier(TestCaseSupplier.TypedDataSupplier fieldSupplier) {
+        if (fieldSupplier.type() != DataType.CARTESIAN_POINT && fieldSupplier.type() != DataType.GEO_POINT) {
+            throw new IllegalStateException("Unexpected type: " + fieldSupplier.type());
+        }
+
+        return new TestCaseSupplier(List.of(fieldSupplier.type()), () -> {
+            var fieldTypedData = fieldSupplier.get();
+            var values = fieldTypedData.multiRowData();
+
+            var xSum = new CompensatedSum(0, 0);
+            var ySum = new CompensatedSum(0, 0);
+            long count = 0;
+
+            for (var value : values) {
+                var wkb = (BytesRef) value;
+                var point = (Point) WellKnownBinary.fromWKB(GeometryValidator.NOOP, false, wkb.bytes, wkb.offset, wkb.length);
+                xSum.add(point.getX());
+                ySum.add(point.getY());
+                count++;
+            }
+
+            var expected = new BytesRef(
+                WellKnownBinary.toWKB(new Point(xSum.value() / count, ySum.value() / count), ByteOrder.LITTLE_ENDIAN)
+            );
+
+            return new TestCaseSupplier.TestCase(
+                List.of(fieldTypedData),
+                "SpatialCentroid[field=Attribute[channel=0]]",
+                fieldTypedData.type(),
+                equalTo(expected)
+            );
+        });
+    }
+}