Browse Source

Add scripting, supported-type tests to ValueCount (#53500)

Also adds a few small notes to the documentation regarding potentially
unintuitive behavior
Zachary Tong 5 years ago
parent
commit
84a59f8447

+ 7 - 0
docs/reference/aggregations/metrics/valuecount-aggregation.asciidoc

@@ -6,6 +6,9 @@ These values can be extracted either from specific fields in the documents, or b
 this aggregator will be used in conjunction with other single-value aggregations. For example, when computing the `avg`
 one might be interested in the number of values the average is computed over.
 
+`value_count` does not de-duplicate values, so even if a field has duplicates (or a script generates multiple
+identical values for a single document), each value will be counted individually.
+
 [source,console]
 --------------------------------------------------
 POST /sales/_search?size=0
@@ -77,3 +80,7 @@ POST /sales/_search?size=0
 }
 --------------------------------------------------
 // TEST[setup:sales,stored_example_script]
+
+NOTE:: Because `value_count` is designed to work with any field it internally treats all values as simple bytes.
+Due to this implementation, if `_value` script variable is used to fetch a value instead of accessing the field
+directly (e.g. a "value script"), the field value will be returned as a string instead of it's native format.

+ 151 - 0
server/src/test/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregatorTests.java

@@ -20,10 +20,12 @@
 package org.elasticsearch.search.aggregations.metrics;
 
 import org.apache.lucene.document.BinaryDocValuesField;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.document.IntPoint;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
@@ -35,6 +37,7 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.CheckedConsumer;
 import org.elasticsearch.common.geo.GeoPoint;
+import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.mapper.BooleanFieldMapper;
 import org.elasticsearch.index.mapper.DateFieldMapper;
 import org.elasticsearch.index.mapper.GeoPointFieldMapper;
@@ -44,14 +47,28 @@ import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.NumberFieldMapper;
 import org.elasticsearch.index.mapper.RangeFieldMapper;
 import org.elasticsearch.index.mapper.RangeType;
+import org.elasticsearch.script.MockScriptEngine;
+import org.elasticsearch.script.Script;
+import org.elasticsearch.script.ScriptEngine;
+import org.elasticsearch.script.ScriptModule;
+import org.elasticsearch.script.ScriptService;
+import org.elasticsearch.script.ScriptType;
+import org.elasticsearch.search.aggregations.AggregationBuilder;
 import org.elasticsearch.search.aggregations.AggregatorTestCase;
 import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
+import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
 import org.elasticsearch.search.aggregations.support.ValueType;
+import org.elasticsearch.search.aggregations.support.ValuesSourceType;
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.function.Consumer;
+import java.util.function.Function;
 
 import static java.util.Collections.singleton;
 
@@ -59,6 +76,41 @@ public class ValueCountAggregatorTests extends AggregatorTestCase {
 
     private static final String FIELD_NAME = "field";
 
+    /** Script to return the {@code _value} provided by aggs framework. */
+    private static final String VALUE_SCRIPT = "_value";
+    private static final String SINGLE_SCRIPT = "single";
+
+    @Override
+    protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
+        return new ValueCountAggregationBuilder("foo", null).field(fieldName);
+    }
+
+    @Override
+    protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
+        return List.of(
+            CoreValuesSourceType.NUMERIC,
+            CoreValuesSourceType.BYTES,
+            CoreValuesSourceType.GEOPOINT,
+            CoreValuesSourceType.RANGE,
+            CoreValuesSourceType.HISTOGRAM
+        );
+    }
+
+    @Override
+    protected ScriptService getMockScriptService() {
+        Map<String, Function<Map<String, Object>, Object>> scripts = new HashMap<>();
+
+        scripts.put(VALUE_SCRIPT, vars -> (Double.valueOf((String) vars.get("_value")) + 1));
+        scripts.put(SINGLE_SCRIPT, vars -> 1);
+
+        MockScriptEngine scriptEngine = new MockScriptEngine(MockScriptEngine.NAME,
+            scripts,
+            Collections.emptyMap());
+        Map<String, ScriptEngine> engines = Collections.singletonMap(scriptEngine.getType(), scriptEngine);
+
+        return new ScriptService(Settings.EMPTY, engines, ScriptModule.CORE_CONTEXTS);
+    }
+
     public void testNoDocs() throws IOException {
         for (ValueType valueType : ValueType.values()) {
             testCase(new MatchAllDocsQuery(), valueType, iw -> {
@@ -185,6 +237,105 @@ public class ValueCountAggregatorTests extends AggregatorTestCase {
         }, fieldType);
     }
 
+    public void testValueScriptNumber() throws IOException {
+        ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name", null)
+            .field(FIELD_NAME)
+            .script(new Script(ScriptType.INLINE, MockScriptEngine.NAME, VALUE_SCRIPT, Collections.emptyMap()));
+
+        MappedFieldType fieldType = createMappedFieldType(ValueType.NUMERIC);
+        fieldType.setName(FIELD_NAME);
+        fieldType.setHasDocValues(true);
+
+        testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
+            iw.addDocument(singleton(new NumericDocValuesField(FIELD_NAME, 7)));
+            iw.addDocument(singleton(new NumericDocValuesField(FIELD_NAME, 8)));
+            iw.addDocument(singleton(new NumericDocValuesField(FIELD_NAME, 9)));
+        }, card -> {
+            assertEquals(3, card.getValue(), 0);
+            assertTrue(AggregationInspectionHelper.hasValue(card));
+        }, fieldType);
+    }
+
+    public void testSingleScriptNumber() throws IOException {
+        ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name", null)
+            .field(FIELD_NAME);
+
+        MappedFieldType fieldType = createMappedFieldType(ValueType.NUMERIC);
+        fieldType.setName(FIELD_NAME);
+        fieldType.setHasDocValues(true);
+
+        testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
+            Document doc = new Document();
+            doc.add(new SortedNumericDocValuesField(FIELD_NAME, 7));
+            doc.add(new SortedNumericDocValuesField(FIELD_NAME, 7));
+            iw.addDocument(doc);
+
+            doc = new Document();
+            doc.add(new SortedNumericDocValuesField(FIELD_NAME, 8));
+            doc.add(new SortedNumericDocValuesField(FIELD_NAME, 8));
+            iw.addDocument(doc);
+
+            doc = new Document();
+            doc.add(new SortedNumericDocValuesField(FIELD_NAME, 1));
+            doc.add(new SortedNumericDocValuesField(FIELD_NAME, 1));
+            iw.addDocument(doc);
+        }, card -> {
+            // note: this is 6, even though the script returns a single value.  ValueCount does not de-dedupe
+            assertEquals(6, card.getValue(), 0);
+            assertTrue(AggregationInspectionHelper.hasValue(card));
+        }, fieldType);
+    }
+
+    public void testValueScriptString() throws IOException {
+        ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name", null)
+            .field(FIELD_NAME)
+            .script(new Script(ScriptType.INLINE, MockScriptEngine.NAME, VALUE_SCRIPT, Collections.emptyMap()));
+
+        MappedFieldType fieldType = createMappedFieldType(ValueType.STRING);
+        fieldType.setName(FIELD_NAME);
+        fieldType.setHasDocValues(true);
+
+        testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
+            iw.addDocument(singleton(new SortedDocValuesField(FIELD_NAME, new BytesRef("1"))));
+            iw.addDocument(singleton(new SortedDocValuesField(FIELD_NAME, new BytesRef("2"))));
+            iw.addDocument(singleton(new SortedDocValuesField(FIELD_NAME, new BytesRef("3"))));
+        }, card -> {
+            assertEquals(3, card.getValue(), 0);
+            assertTrue(AggregationInspectionHelper.hasValue(card));
+        }, fieldType);
+    }
+
+    public void testSingleScriptString() throws IOException {
+        ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name", null)
+            .field(FIELD_NAME);
+
+        MappedFieldType fieldType = createMappedFieldType(ValueType.STRING);
+        fieldType.setName(FIELD_NAME);
+        fieldType.setHasDocValues(true);
+
+        testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
+            Document doc = new Document();
+            // Note: unlike numerics, lucene de-dupes strings so we increment here
+            doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("1")));
+            doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("2")));
+            iw.addDocument(doc);
+
+            doc = new Document();
+            doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("3")));
+            doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("4")));
+            iw.addDocument(doc);
+
+            doc = new Document();
+            doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("5")));
+            doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("6")));
+            iw.addDocument(doc);
+        }, card -> {
+            // note: this is 6, even though the script returns a single value.  ValueCount does not de-dedupe
+            assertEquals(6, card.getValue(), 0);
+            assertTrue(AggregationInspectionHelper.hasValue(card));
+        }, fieldType);
+    }
+
     private void testCase(Query query,
                           ValueType valueType,
                           CheckedConsumer<RandomIndexWriter, IOException> indexer,