Przeglądaj źródła

ES|QL: add geo tests for mv_dedupe (#109342)

Adding more unit tests for MV_DEDUPE function, covering geo_point,
geo_shape, cartesian_point and cartesian_shape. This also adds docs for
Kibana.

Fixes https://github.com/elastic/elasticsearch/issues/108982
Luigi Dell'Aquila 1 rok temu
rodzic
commit
21952c7e36

+ 48 - 0
docs/reference/esql/functions/kibana/definition/mv_dedupe.json

@@ -17,6 +17,30 @@
       "variadic" : false,
       "returnType" : "boolean"
     },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "cartesian_point",
+          "optional" : false,
+          "description" : "Multivalue expression."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "cartesian_point"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "cartesian_shape",
+          "optional" : false,
+          "description" : "Multivalue expression."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "cartesian_shape"
+    },
     {
       "params" : [
         {
@@ -41,6 +65,30 @@
       "variadic" : false,
       "returnType" : "double"
     },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "geo_point",
+          "optional" : false,
+          "description" : "Multivalue expression."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "geo_point"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "geo_shape",
+          "optional" : false,
+          "description" : "Multivalue expression."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "geo_shape"
+    },
     {
       "params" : [
         {

+ 4 - 0
docs/reference/esql/functions/types/mv_dedupe.asciidoc

@@ -6,8 +6,12 @@
 |===
 field | result
 boolean | boolean
+cartesian_point | cartesian_point
+cartesian_shape | cartesian_shape
 datetime | datetime
 double | double
+geo_point | geo_point
+geo_shape | geo_shape
 integer | integer
 ip | ip
 keyword | keyword

+ 3 - 3
x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec

@@ -45,7 +45,7 @@ double e()
 "double mv_avg(number:double|integer|long|unsigned_long)"
 "keyword mv_concat(string:text|keyword, delim:text|keyword)"
 "integer mv_count(field:boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version)"
-"boolean|date|double|integer|ip|keyword|long|text|version mv_dedupe(field:boolean|date|double|integer|ip|keyword|long|text|version)"
+"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|version mv_dedupe(field:boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|version)"
 "boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version mv_first(field:boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version)"
 "boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version mv_last(field:boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version)"
 "boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version mv_max(field:boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version)"
@@ -160,7 +160,7 @@ min           |number                              |"double|integer|long"
 mv_avg        |number                              |"double|integer|long|unsigned_long"                                                                                               |Multivalue expression.
 mv_concat     |[string, delim]                     |["text|keyword", "text|keyword"]                                                                                                  |[Multivalue expression., Delimiter.]
 mv_count      |field                               |"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version"      |Multivalue expression.
-mv_dedupe     |field                               |"boolean|date|double|integer|ip|keyword|long|text|version"                                                                        |Multivalue expression.
+mv_dedupe     |field                               |"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|version"                    |Multivalue expression.
 mv_first      |field                               |"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version"      |Multivalue expression.
 mv_last       |field                               |"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version"      |Multivalue expression.
 mv_max        |field                               |"boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version"                                                          |Multivalue expression.
@@ -393,7 +393,7 @@ min           |"double|integer|long"
 mv_avg        |double                                                                                                                      |false                       |false           |false
 mv_concat     |keyword                                                                                                                     |[false, false]              |false           |false
 mv_count      |integer                                                                                                                     |false                       |false           |false
-mv_dedupe     |"boolean|date|double|integer|ip|keyword|long|text|version"                                                                  |false                       |false           |false
+mv_dedupe     |"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|version"              |false                       |false           |false
 mv_first      |"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version"|false                       |false           |false
 mv_last       |"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version"|false                       |false           |false
 mv_max        |"boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version"                                                    |false                       |false           |false

+ 29 - 3
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvDedupe.java

@@ -26,9 +26,22 @@ import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isTyp
  * Removes duplicate values from a multivalued field.
  */
 public class MvDedupe extends AbstractMultivalueFunction {
-    // @TODO: add cartesian_point, geo_point, unsigned_long
+    // @TODO: add unsigned_long
     @FunctionInfo(
-        returnType = { "boolean", "date", "double", "integer", "ip", "keyword", "long", "text", "version" },
+        returnType = {
+            "boolean",
+            "cartesian_point",
+            "cartesian_shape",
+            "date",
+            "double",
+            "geo_point",
+            "geo_shape",
+            "integer",
+            "ip",
+            "keyword",
+            "long",
+            "text",
+            "version" },
         description = "Remove duplicate values from a multivalued field.",
         note = "`MV_DEDUPE` may, but won't always, sort the values in the column.",
         examples = @Example(file = "string", tag = "mv_dedupe")
@@ -37,7 +50,20 @@ public class MvDedupe extends AbstractMultivalueFunction {
         Source source,
         @Param(
             name = "field",
-            type = { "boolean", "date", "double", "integer", "ip", "keyword", "long", "text", "version" },
+            type = {
+                "boolean",
+                "cartesian_point",
+                "cartesian_shape",
+                "date",
+                "double",
+                "geo_point",
+                "geo_shape",
+                "integer",
+                "ip",
+                "keyword",
+                "long",
+                "text",
+                "version" },
             description = "Multivalue expression."
         ) Expression field
     ) {

+ 6 - 0
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvDedupeTests.java

@@ -13,6 +13,7 @@ import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
 import org.elasticsearch.xpack.esql.core.tree.Source;
 import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.core.type.DataTypes;
 import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
 import org.hamcrest.Matcher;
 import org.hamcrest.Matchers;
@@ -42,6 +43,11 @@ public class MvDedupeTests extends AbstractMultivalueFunctionTestCase {
         doubles(cases, "mv_dedupe", "MvDedupe", (size, values) -> getMatcher(values.mapToObj(Double::valueOf)));
         ints(cases, "mv_dedupe", "MvDedupe", (size, values) -> getMatcher(values.mapToObj(Integer::valueOf)));
         longs(cases, "mv_dedupe", "MvDedupe", (size, values) -> getMatcher(values.mapToObj(Long::valueOf)));
+        cartesianPoints(cases, "mv_dedupe", "MvDedupe", (size, values) -> getMatcher(values));
+        cartesianShape(cases, "mv_dedupe", "MvDedupe", DataTypes.CARTESIAN_SHAPE, (size, values) -> getMatcher(values));
+        geoPoints(cases, "mv_dedupe", "MvDedupe", (size, values) -> getMatcher(values));
+        geoShape(cases, "mv_dedupe", "MvDedupe", DataTypes.GEO_SHAPE, (size, values) -> getMatcher(values));
+
         // TODO switch extraction to BigInteger so this just works.
         // unsignedLongs(cases, "mv_dedupe", "MvDedupe", (size, values) -> getMatcher(values));
         return parameterSuppliersFromTypedData(cases);