|
@@ -9,20 +9,27 @@
|
|
|
package org.elasticsearch.search.aggregations.metrics;
|
|
|
|
|
|
import org.apache.lucene.document.BinaryDocValuesField;
|
|
|
+import org.apache.lucene.document.Field;
|
|
|
+import org.apache.lucene.document.InetAddressPoint;
|
|
|
import org.apache.lucene.document.IntPoint;
|
|
|
import org.apache.lucene.document.NumericDocValuesField;
|
|
|
import org.apache.lucene.document.SortedDocValuesField;
|
|
|
import org.apache.lucene.document.SortedNumericDocValuesField;
|
|
|
import org.apache.lucene.document.SortedSetDocValuesField;
|
|
|
+import org.apache.lucene.document.StringField;
|
|
|
+import org.apache.lucene.index.Term;
|
|
|
import org.apache.lucene.search.FieldExistsQuery;
|
|
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
|
|
import org.apache.lucene.search.Query;
|
|
|
+import org.apache.lucene.search.TermQuery;
|
|
|
import org.apache.lucene.tests.index.RandomIndexWriter;
|
|
|
import org.apache.lucene.util.BytesRef;
|
|
|
import org.elasticsearch.common.geo.GeoPoint;
|
|
|
+import org.elasticsearch.common.network.InetAddresses;
|
|
|
import org.elasticsearch.common.settings.Settings;
|
|
|
import org.elasticsearch.core.CheckedConsumer;
|
|
|
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
|
|
+import org.elasticsearch.index.mapper.IpFieldMapper;
|
|
|
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
|
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
|
|
import org.elasticsearch.index.mapper.NumberFieldMapper;
|
|
@@ -36,6 +43,7 @@ import org.elasticsearch.script.ScriptService;
|
|
|
import org.elasticsearch.script.ScriptType;
|
|
|
import org.elasticsearch.search.aggregations.AggregationBuilder;
|
|
|
import org.elasticsearch.search.aggregations.AggregationBuilders;
|
|
|
+import org.elasticsearch.search.aggregations.Aggregator;
|
|
|
import org.elasticsearch.search.aggregations.AggregatorTestCase;
|
|
|
import org.elasticsearch.search.aggregations.InternalAggregation;
|
|
|
import org.elasticsearch.search.aggregations.bucket.global.Global;
|
|
@@ -47,6 +55,8 @@ import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
|
|
|
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
|
|
|
|
|
|
import java.io.IOException;
|
|
|
+import java.net.InetAddress;
|
|
|
+import java.util.Arrays;
|
|
|
import java.util.Collections;
|
|
|
import java.util.HashMap;
|
|
|
import java.util.List;
|
|
@@ -58,6 +68,9 @@ import java.util.function.Function;
|
|
|
|
|
|
import static java.util.Collections.emptyMap;
|
|
|
import static java.util.Collections.singleton;
|
|
|
+import static org.elasticsearch.test.MapMatcher.assertMap;
|
|
|
+import static org.elasticsearch.test.MapMatcher.matchesMap;
|
|
|
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
|
|
|
|
|
|
public class CardinalityAggregatorTests extends AggregatorTestCase {
|
|
|
|
|
@@ -218,6 +231,120 @@ public class CardinalityAggregatorTests extends AggregatorTestCase {
|
|
|
}, mappedFieldTypes);
|
|
|
}
|
|
|
|
|
|
+ public void testIndexedSingleValuedString() throws IOException {
|
|
|
+ // Indexing enables dynamic pruning optimizations
|
|
|
+ final CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder("name").field("str_value");
|
|
|
+ final MappedFieldType mappedFieldTypes = new KeywordFieldMapper.KeywordFieldType("str_value");
|
|
|
+
|
|
|
+ CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
|
|
|
+ iw.addDocument(
|
|
|
+ Arrays.asList(
|
|
|
+ new StringField("str_value", "one", Field.Store.NO),
|
|
|
+ new SortedDocValuesField("str_value", new BytesRef("one"))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ iw.addDocument(
|
|
|
+ Arrays.asList(
|
|
|
+ new StringField("unrelatedField", "two", Field.Store.NO),
|
|
|
+ new SortedDocValuesField("unrelatedField", new BytesRef("two"))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ iw.addDocument(
|
|
|
+ Arrays.asList(
|
|
|
+ new StringField("str_value", "three", Field.Store.NO),
|
|
|
+ new SortedDocValuesField("str_value", new BytesRef("three"))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ iw.addDocument(
|
|
|
+ Arrays.asList(
|
|
|
+ new StringField("str_value", "one", Field.Store.NO),
|
|
|
+ new SortedDocValuesField("str_value", new BytesRef("one"))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ };
|
|
|
+
|
|
|
+ testAggregation(aggregationBuilder, new MatchAllDocsQuery(), buildIndex, card -> {
|
|
|
+ assertEquals(2, card.getValue(), 0);
|
|
|
+ assertTrue(AggregationInspectionHelper.hasValue(card));
|
|
|
+ }, mappedFieldTypes);
|
|
|
+
|
|
|
+ // Enforce auto-detection of the execution mode
|
|
|
+ aggregationBuilder.executionHint(null);
|
|
|
+ debugTestCase(
|
|
|
+ aggregationBuilder,
|
|
|
+ new MatchAllDocsQuery(),
|
|
|
+ buildIndex,
|
|
|
+ (InternalCardinality card, Class<? extends Aggregator> impl, Map<String, Map<String, Object>> debug) -> {
|
|
|
+ assertEquals(2, card.getValue(), 0);
|
|
|
+ assertEquals(GlobalOrdCardinalityAggregator.class, impl);
|
|
|
+ assertMap(
|
|
|
+ debug,
|
|
|
+ matchesMap().entry(
|
|
|
+ "name",
|
|
|
+ matchesMap().entry("dynamic_pruning_used", greaterThanOrEqualTo(1))
|
|
|
+ .entry("dynamic_pruning_attempted", greaterThanOrEqualTo(1))
|
|
|
+ .entry("skipped_due_to_no_data", 0)
|
|
|
+ .entry("brute_force_used", 0)
|
|
|
+ )
|
|
|
+ );
|
|
|
+ },
|
|
|
+ mappedFieldTypes
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ public void testIndexedSingleValuedIP() throws IOException {
|
|
|
+ // IP addresses are interesting to test because they use sorted doc values like keywords, but index data using points rather than an
|
|
|
+ // inverted index, so this triggers a different code path to disable dynamic pruning
|
|
|
+ final CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder("name").field("ip_value");
|
|
|
+ final MappedFieldType mappedFieldTypes = new IpFieldMapper.IpFieldType("ip_value");
|
|
|
+
|
|
|
+ CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
|
|
|
+ InetAddress value = InetAddresses.forString("::1");
|
|
|
+ byte[] encodedValue = InetAddressPoint.encode(value);
|
|
|
+ iw.addDocument(
|
|
|
+ Arrays.asList(new InetAddressPoint("ip_value", value), new SortedDocValuesField("ip_value", new BytesRef(encodedValue)))
|
|
|
+ );
|
|
|
+ value = InetAddresses.forString("192.168.0.1");
|
|
|
+ encodedValue = InetAddressPoint.encode(value);
|
|
|
+ iw.addDocument(
|
|
|
+ Arrays.asList(new InetAddressPoint("ip_value", value), new SortedDocValuesField("ip_value", new BytesRef(encodedValue)))
|
|
|
+ );
|
|
|
+ value = InetAddresses.forString("::1");
|
|
|
+ encodedValue = InetAddressPoint.encode(value);
|
|
|
+ iw.addDocument(
|
|
|
+ Arrays.asList(new InetAddressPoint("ip_value", value), new SortedDocValuesField("ip_value", new BytesRef(encodedValue)))
|
|
|
+ );
|
|
|
+ };
|
|
|
+
|
|
|
+ testAggregation(aggregationBuilder, new MatchAllDocsQuery(), buildIndex, card -> {
|
|
|
+ assertEquals(2, card.getValue(), 0);
|
|
|
+ assertTrue(AggregationInspectionHelper.hasValue(card));
|
|
|
+ }, mappedFieldTypes);
|
|
|
+
|
|
|
+ // Enforce auto-detection of the execution mode
|
|
|
+ aggregationBuilder.executionHint(null);
|
|
|
+ debugTestCase(
|
|
|
+ aggregationBuilder,
|
|
|
+ new MatchAllDocsQuery(),
|
|
|
+ buildIndex,
|
|
|
+ (InternalCardinality card, Class<? extends Aggregator> impl, Map<String, Map<String, Object>> debug) -> {
|
|
|
+ assertEquals(2, card.getValue(), 0);
|
|
|
+ assertEquals(GlobalOrdCardinalityAggregator.class, impl);
|
|
|
+ assertMap(
|
|
|
+ debug,
|
|
|
+ matchesMap().entry(
|
|
|
+ "name",
|
|
|
+ matchesMap().entry("dynamic_pruning_used", 0)
|
|
|
+ .entry("dynamic_pruning_attempted", 0)
|
|
|
+ .entry("skipped_due_to_no_data", 0)
|
|
|
+ .entry("brute_force_used", greaterThanOrEqualTo(1))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ },
|
|
|
+ mappedFieldTypes
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
public void testSingleValuedStringValueScript() throws IOException {
|
|
|
final CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder("name").field("str_value")
|
|
|
.script(new Script(ScriptType.INLINE, MockScriptEngine.NAME, "_value", emptyMap()));
|
|
@@ -377,6 +504,83 @@ public class CardinalityAggregatorTests extends AggregatorTestCase {
|
|
|
}, mappedFieldTypes);
|
|
|
}
|
|
|
|
|
|
+ public void testIndexedMultiValuedString() throws IOException {
|
|
|
+ // Indexing enables dynamic pruning optimizations
|
|
|
+ final CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder("name").field("str_values");
|
|
|
+ final MappedFieldType mappedFieldTypes = new KeywordFieldMapper.KeywordFieldType("str_values");
|
|
|
+
|
|
|
+ CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
|
|
|
+ iw.addDocument(
|
|
|
+ List.of(
|
|
|
+ new StringField("str_values", "one", Field.Store.NO),
|
|
|
+ new SortedSetDocValuesField("str_values", new BytesRef("one")),
|
|
|
+ new StringField("str_values", "two", Field.Store.NO),
|
|
|
+ new SortedSetDocValuesField("str_values", new BytesRef("two"))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ iw.addDocument(
|
|
|
+ List.of(
|
|
|
+ new StringField("str_values", "one", Field.Store.NO),
|
|
|
+ new SortedSetDocValuesField("str_values", new BytesRef("one")),
|
|
|
+ new StringField("str_values", "three", Field.Store.NO),
|
|
|
+ new SortedSetDocValuesField("str_values", new BytesRef("three"))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ iw.addDocument(
|
|
|
+ List.of(
|
|
|
+ new StringField("str_values", "three", Field.Store.NO),
|
|
|
+ new SortedSetDocValuesField("str_values", new BytesRef("three")),
|
|
|
+ new StringField("str_values", "two", Field.Store.NO),
|
|
|
+ new SortedSetDocValuesField("str_values", new BytesRef("two"))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ iw.addDocument(
|
|
|
+ List.of(
|
|
|
+ new StringField("str_values", "three", Field.Store.NO),
|
|
|
+ new SortedSetDocValuesField("str_values", new BytesRef("three")),
|
|
|
+ new StringField("str_values", "two", Field.Store.NO),
|
|
|
+ new SortedSetDocValuesField("str_values", new BytesRef("two"))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ iw.addDocument(
|
|
|
+ List.of(
|
|
|
+ new StringField("str_values", "two", Field.Store.NO),
|
|
|
+ new SortedSetDocValuesField("str_values", new BytesRef("two")),
|
|
|
+ new StringField("str_values", "three", Field.Store.NO),
|
|
|
+ new SortedSetDocValuesField("str_values", new BytesRef("three"))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ };
|
|
|
+
|
|
|
+ testAggregation(aggregationBuilder, new MatchAllDocsQuery(), buildIndex, card -> {
|
|
|
+ assertEquals(3, card.getValue(), 0);
|
|
|
+ assertTrue(AggregationInspectionHelper.hasValue(card));
|
|
|
+ }, mappedFieldTypes);
|
|
|
+
|
|
|
+ // Enforce auto-detection of the execution mode
|
|
|
+ aggregationBuilder.executionHint(null);
|
|
|
+ debugTestCase(
|
|
|
+ aggregationBuilder,
|
|
|
+ new MatchAllDocsQuery(),
|
|
|
+ buildIndex,
|
|
|
+ (InternalCardinality card, Class<? extends Aggregator> impl, Map<String, Map<String, Object>> debug) -> {
|
|
|
+ assertEquals(3, card.getValue(), 0);
|
|
|
+ assertEquals(GlobalOrdCardinalityAggregator.class, impl);
|
|
|
+ assertMap(
|
|
|
+ debug,
|
|
|
+ matchesMap().entry(
|
|
|
+ "name",
|
|
|
+ matchesMap().entry("dynamic_pruning_used", greaterThanOrEqualTo(1))
|
|
|
+ .entry("dynamic_pruning_attempted", greaterThanOrEqualTo(1))
|
|
|
+ .entry("skipped_due_to_no_data", 0)
|
|
|
+ .entry("brute_force_used", 0)
|
|
|
+ )
|
|
|
+ );
|
|
|
+ },
|
|
|
+ mappedFieldTypes
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
public void testUnmappedMissingString() throws IOException {
|
|
|
CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder("name").field("number").missing("🍌🍌🍌");
|
|
|
|
|
@@ -593,6 +797,113 @@ public class CardinalityAggregatorTests extends AggregatorTestCase {
|
|
|
}, new AggTestConfig(aggregationBuilder, mappedFieldTypes));
|
|
|
}
|
|
|
|
|
|
+ public void testIndexedWithMissingValues() throws IOException {
|
|
|
+ // Indexing enables dynamic pruning optimizations
|
|
|
+ final CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder("name").field("str_value");
|
|
|
+ final MappedFieldType mappedFieldTypes = new KeywordFieldMapper.KeywordFieldType("str_value");
|
|
|
+
|
|
|
+ testAggregation(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
|
|
|
+ iw.addDocument(Collections.emptySet());
|
|
|
+ iw.addDocument(Collections.emptySet());
|
|
|
+ iw.addDocument(
|
|
|
+ Arrays.asList(
|
|
|
+ new StringField("str_value", "one", Field.Store.NO),
|
|
|
+ new SortedDocValuesField("str_value", new BytesRef("one"))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ iw.addDocument(Collections.emptySet());
|
|
|
+ iw.addDocument(
|
|
|
+ Arrays.asList(
|
|
|
+ new StringField("unrelatedField", "two", Field.Store.NO),
|
|
|
+ new SortedDocValuesField("unrelatedField", new BytesRef("two"))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ iw.addDocument(Collections.emptySet());
|
|
|
+ iw.addDocument(Collections.emptySet());
|
|
|
+ iw.addDocument(
|
|
|
+ Arrays.asList(
|
|
|
+ new StringField("str_value", "three", Field.Store.NO),
|
|
|
+ new SortedDocValuesField("str_value", new BytesRef("three"))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ iw.addDocument(Collections.emptySet());
|
|
|
+ iw.addDocument(Collections.emptySet());
|
|
|
+ iw.addDocument(Collections.emptySet());
|
|
|
+ iw.addDocument(
|
|
|
+ Arrays.asList(
|
|
|
+ new StringField("str_value", "one", Field.Store.NO),
|
|
|
+ new SortedDocValuesField("str_value", new BytesRef("one"))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ }, card -> {
|
|
|
+ assertEquals(2, card.getValue(), 0);
|
|
|
+ assertTrue(AggregationInspectionHelper.hasValue(card));
|
|
|
+ }, mappedFieldTypes);
|
|
|
+ }
|
|
|
+
|
|
|
+ public void testMoreThan128UniqueStringValues() throws IOException {
|
|
|
+ // Indexing enables dynamic pruning optimizations
|
|
|
+ // Fields with more than 128 unique values exercise slightly different code paths
|
|
|
+ final CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder("name").field("str_value");
|
|
|
+ final MappedFieldType mappedFieldTypes = new KeywordFieldMapper.KeywordFieldType("str_value");
|
|
|
+
|
|
|
+ CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
|
|
|
+ for (int i = 0; i < 200; ++i) {
|
|
|
+ final String value = Integer.toString(i);
|
|
|
+ iw.addDocument(
|
|
|
+ Arrays.asList(
|
|
|
+ new StringField("keyword1", Integer.toString(i % 2), Field.Store.NO),
|
|
|
+ new StringField("keyword2", Integer.toString(i % 5), Field.Store.NO),
|
|
|
+ new StringField("str_value", value, Field.Store.NO),
|
|
|
+ new SortedDocValuesField("str_value", new BytesRef(value))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ debugTestCase(
|
|
|
+ aggregationBuilder,
|
|
|
+ new TermQuery(new Term("keyword1", "0")),
|
|
|
+ buildIndex,
|
|
|
+ (InternalCardinality card, Class<? extends Aggregator> impl, Map<String, Map<String, Object>> debug) -> {
|
|
|
+ assertEquals(100, card.getValue(), 0);
|
|
|
+ assertEquals(GlobalOrdCardinalityAggregator.class, impl);
|
|
|
+ assertMap(
|
|
|
+ debug,
|
|
|
+ matchesMap().entry(
|
|
|
+ "name",
|
|
|
+ matchesMap().entry("dynamic_pruning_used", greaterThanOrEqualTo(1))
|
|
|
+ .entry("dynamic_pruning_attempted", greaterThanOrEqualTo(1))
|
|
|
+ .entry("skipped_due_to_no_data", 0)
|
|
|
+ .entry("brute_force_used", 0)
|
|
|
+ )
|
|
|
+ );
|
|
|
+ },
|
|
|
+ mappedFieldTypes
|
|
|
+ );
|
|
|
+
|
|
|
+ debugTestCase(
|
|
|
+ aggregationBuilder,
|
|
|
+ new TermQuery(new Term("keyword2", "0")),
|
|
|
+ buildIndex,
|
|
|
+ (InternalCardinality card, Class<? extends Aggregator> impl, Map<String, Map<String, Object>> debug) -> {
|
|
|
+ assertEquals(40, card.getValue(), 0);
|
|
|
+ assertEquals(GlobalOrdCardinalityAggregator.class, impl);
|
|
|
+ assertMap(
|
|
|
+ debug,
|
|
|
+ matchesMap().entry(
|
|
|
+ "name",
|
|
|
+ matchesMap().entry("dynamic_pruning_used", 0)
|
|
|
+ .entry("dynamic_pruning_attempted", greaterThanOrEqualTo(1))
|
|
|
+ .entry("skipped_due_to_no_data", 0)
|
|
|
+ .entry("brute_force_used", 0)
|
|
|
+ )
|
|
|
+ );
|
|
|
+ },
|
|
|
+ mappedFieldTypes
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
private void testAggregation(
|
|
|
Query query,
|
|
|
CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
|