1
0
Эх сурвалжийг харах

Support text field in count_distinct (#101334)

Previously, we did not load text fields and did not register them to the 
count_distinct operator. However, we have now loaded the text fields and
should support them with count_distinct.

Closes #101330
Nhat Nguyen 2 жил өмнө
parent
commit
76d1ab6509

+ 28 - 0
x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java

@@ -35,11 +35,13 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.OptionalDouble;
+import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
@@ -1303,6 +1305,32 @@ public class EsqlActionIT extends AbstractEsqlIntegTestCase {
         }
     }
 
+    public void testCountTextField() {
+        assertAcked(client().admin().indices().prepareCreate("test_count").setMapping("name", "type=text"));
+        int numDocs = between(10, 1000);
+        Set<String> names = new HashSet<>();
+        for (int i = 0; i < numDocs; i++) {
+            String name = "name-" + randomIntBetween(1, 100);
+            names.add(name);
+            IndexRequestBuilder indexRequest = client().prepareIndex("test_count").setSource("name", name);
+            if (randomInt(100) < 5) {
+                indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
+            }
+            indexRequest.get();
+        }
+        client().admin().indices().prepareRefresh("test_count").get();
+        try (EsqlQueryResponse resp = run("FROM test_count | stats COUNT_DISTINCT(name)")) {
+            Iterator<Object> row = resp.values().next();
+            assertThat(row.next(), equalTo((long) names.size()));
+            assertFalse(row.hasNext());
+        }
+        try (EsqlQueryResponse resp = run("FROM test_count | stats COUNT(name)")) {
+            Iterator<Object> row = resp.values().next();
+            assertThat(row.next(), equalTo((long) numDocs));
+            assertFalse(row.hasNext());
+        }
+    }
+
     private void createNestedMappingIndex(String indexName) throws IOException {
         XContentBuilder builder = JsonXContent.contentBuilder();
         builder.startObject();

+ 1 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java

@@ -87,7 +87,7 @@ public class CountDistinct extends AggregateFunction implements OptionalArgument
         if (type == DataTypes.DOUBLE) {
             return new CountDistinctDoubleAggregatorFunctionSupplier(bigArrays, inputChannels, precision);
         }
-        if (type == DataTypes.KEYWORD || type == DataTypes.IP) {
+        if (type == DataTypes.KEYWORD || type == DataTypes.IP || type == DataTypes.TEXT) {
             return new CountDistinctBytesRefAggregatorFunctionSupplier(bigArrays, inputChannels, precision);
         }
         throw EsqlIllegalArgumentException.illegalDataType(type);

+ 1 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java

@@ -225,7 +225,7 @@ public class AggregateMapper {
             return "Long";
         } else if (type.equals(DataTypes.DOUBLE)) {
             return "Double";
-        } else if (type.equals(DataTypes.KEYWORD) || type.equals(DataTypes.IP)) {
+        } else if (type.equals(DataTypes.KEYWORD) || type.equals(DataTypes.IP) || type.equals(DataTypes.TEXT)) {
             return "BytesRef";
         } else {
             throw new EsqlIllegalArgumentException("illegal agg type: " + type.typeName());