|
@@ -21,6 +21,8 @@ package org.elasticsearch.search.aggregations.bucket.terms;
|
|
|
|
|
|
import org.apache.lucene.search.IndexSearcher;
|
|
|
import org.elasticsearch.common.ParseField;
|
|
|
+import org.elasticsearch.common.logging.DeprecationLogger;
|
|
|
+import org.elasticsearch.common.logging.Loggers;
|
|
|
import org.elasticsearch.search.DocValueFormat;
|
|
|
import org.elasticsearch.search.aggregations.AggregationExecutionException;
|
|
|
import org.elasticsearch.search.aggregations.Aggregator;
|
|
@@ -41,18 +43,18 @@ import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
|
|
|
import org.elasticsearch.search.internal.SearchContext;
|
|
|
|
|
|
import java.io.IOException;
|
|
|
-import java.util.Arrays;
|
|
|
import java.util.List;
|
|
|
import java.util.Map;
|
|
|
|
|
|
public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource, TermsAggregatorFactory> {
|
|
|
+ private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(TermsAggregatorFactory.class));
|
|
|
|
|
|
private final BucketOrder order;
|
|
|
private final IncludeExclude includeExclude;
|
|
|
private final String executionHint;
|
|
|
private final SubAggCollectionMode collectMode;
|
|
|
private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
|
|
|
- private boolean showTermDocCountError;
|
|
|
+ private final boolean showTermDocCountError;
|
|
|
|
|
|
TermsAggregatorFactory(String name,
|
|
|
ValuesSourceConfig<ValuesSource> config,
|
|
@@ -124,61 +126,15 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|
|
if (valuesSource instanceof ValuesSource.Bytes) {
|
|
|
ExecutionMode execution = null;
|
|
|
if (executionHint != null) {
|
|
|
- execution = ExecutionMode.fromString(executionHint);
|
|
|
+ execution = ExecutionMode.fromString(executionHint, DEPRECATION_LOGGER);
|
|
|
}
|
|
|
-
|
|
|
// In some cases, using ordinals is just not supported: override it
|
|
|
- if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) {
|
|
|
+ if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals == false) {
|
|
|
execution = ExecutionMode.MAP;
|
|
|
}
|
|
|
-
|
|
|
- final long maxOrd;
|
|
|
- final double ratio;
|
|
|
- if (execution == null || execution.needsGlobalOrdinals()) {
|
|
|
- ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource;
|
|
|
- IndexSearcher indexSearcher = context.searcher();
|
|
|
- maxOrd = valueSourceWithOrdinals.globalMaxOrd(indexSearcher);
|
|
|
- ratio = maxOrd / ((double) indexSearcher.getIndexReader().numDocs());
|
|
|
- } else {
|
|
|
- maxOrd = -1;
|
|
|
- ratio = -1;
|
|
|
- }
|
|
|
-
|
|
|
- // Let's try to use a good default
|
|
|
+ final long maxOrd = getMaxOrd(valuesSource, context.searcher());
|
|
|
if (execution == null) {
|
|
|
- // if there is a parent bucket aggregator the number of
|
|
|
- // instances of this aggregator is going
|
|
|
- // to be unbounded and most instances may only aggregate few
|
|
|
- // documents, so use hashed based
|
|
|
- // global ordinals to keep the bucket ords dense.
|
|
|
-
|
|
|
- // Additionally, if using partitioned terms the regular global
|
|
|
- // ordinals would be sparse so we opt for hash
|
|
|
-
|
|
|
- // Finally if we are sorting by sub aggregations, then these
|
|
|
- // aggregations cannot be deferred, so global_ordinals_hash is
|
|
|
- // a safer choice as we won't use memory for sub aggregations
|
|
|
- // for buckets that are not collected.
|
|
|
- if (Aggregator.descendsFromBucketAggregator(parent) ||
|
|
|
- (includeExclude != null && includeExclude.isPartitionBased()) ||
|
|
|
- isAggregationSort(order)) {
|
|
|
- execution = ExecutionMode.GLOBAL_ORDINALS_HASH;
|
|
|
- } else {
|
|
|
- if (factories == AggregatorFactories.EMPTY) {
|
|
|
- if (ratio <= 0.5 && maxOrd <= 2048) {
|
|
|
- // 0.5: At least we need reduce the number of global
|
|
|
- // ordinals look-ups by half
|
|
|
- // 2048: GLOBAL_ORDINALS_LOW_CARDINALITY has
|
|
|
- // additional memory usage, which directly linked to
|
|
|
- // maxOrd, so we need to limit.
|
|
|
- execution = ExecutionMode.GLOBAL_ORDINALS_LOW_CARDINALITY;
|
|
|
- } else {
|
|
|
- execution = ExecutionMode.GLOBAL_ORDINALS;
|
|
|
- }
|
|
|
- } else {
|
|
|
- execution = ExecutionMode.GLOBAL_ORDINALS;
|
|
|
- }
|
|
|
- }
|
|
|
+ execution = ExecutionMode.GLOBAL_ORDINALS;
|
|
|
}
|
|
|
SubAggCollectionMode cm = collectMode;
|
|
|
if (cm == null) {
|
|
@@ -247,6 +203,19 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|
|
return SubAggCollectionMode.DEPTH_FIRST;
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * Get the maximum global ordinal value for the provided {@link ValuesSource} or -1
|
|
|
+ * if the values source is not an instance of {@link ValuesSource.Bytes.WithOrdinals}.
|
|
|
+ */
|
|
|
+ static long getMaxOrd(ValuesSource source, IndexSearcher searcher) throws IOException {
|
|
|
+ if (source instanceof ValuesSource.Bytes.WithOrdinals) {
|
|
|
+ ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) source;
|
|
|
+ return valueSourceWithOrdinals.globalMaxOrd(searcher);
|
|
|
+ } else {
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
public enum ExecutionMode {
|
|
|
|
|
|
MAP(new ParseField("map")) {
|
|
@@ -265,18 +234,10 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|
|
boolean showTermDocCountError,
|
|
|
List<PipelineAggregator> pipelineAggregators,
|
|
|
Map<String, Object> metaData) throws IOException {
|
|
|
-
|
|
|
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
|
|
|
return new StringTermsAggregator(name, factories, valuesSource, order, format, bucketCountThresholds, filter,
|
|
|
context, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
|
|
|
-
|
|
|
}
|
|
|
-
|
|
|
- @Override
|
|
|
- boolean needsGlobalOrdinals() {
|
|
|
- return false;
|
|
|
- }
|
|
|
-
|
|
|
},
|
|
|
GLOBAL_ORDINALS(new ParseField("global_ordinals")) {
|
|
|
|
|
@@ -294,91 +255,61 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|
|
List<PipelineAggregator> pipelineAggregators,
|
|
|
Map<String, Object> metaData) throws IOException {
|
|
|
|
|
|
- final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
|
|
|
- return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order,
|
|
|
- format, bucketCountThresholds, filter, context, parent, false, subAggCollectMode, showTermDocCountError,
|
|
|
+ final long maxOrd = getMaxOrd(valuesSource, context.searcher());
|
|
|
+ assert maxOrd != -1;
|
|
|
+ final double ratio = maxOrd / ((double) context.searcher().getIndexReader().numDocs());
|
|
|
+ if (factories == AggregatorFactories.EMPTY &&
|
|
|
+ includeExclude == null &&
|
|
|
+ Aggregator.descendsFromBucketAggregator(parent) == false &&
|
|
|
+ ratio <= 0.5 && maxOrd <= 2048) {
|
|
|
+ /**
|
|
|
+ * We can use the low cardinality execution mode iff this aggregator:
|
|
|
+ * - has no sub-aggregator AND
|
|
|
+ * - is not a child of a bucket aggregator AND
|
|
|
+ * - At least we reduce the number of global ordinals look-ups by half (ration <= 0.5) AND
|
|
|
+ * - the maximum global ordinal is less than 2048 (LOW_CARDINALITY has additional memory usage,
|
|
|
+ * which directly linked to maxOrd, so we need to limit).
|
|
|
+ */
|
|
|
+ return new GlobalOrdinalsStringTermsAggregator.LowCardinality(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order,
|
|
|
+ format, bucketCountThresholds, context, parent, false, subAggCollectMode, showTermDocCountError,
|
|
|
pipelineAggregators, metaData);
|
|
|
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- boolean needsGlobalOrdinals() {
|
|
|
- return true;
|
|
|
- }
|
|
|
-
|
|
|
- },
|
|
|
- GLOBAL_ORDINALS_HASH(new ParseField("global_ordinals_hash")) {
|
|
|
-
|
|
|
- @Override
|
|
|
- Aggregator create(String name,
|
|
|
- AggregatorFactories factories,
|
|
|
- ValuesSource valuesSource,
|
|
|
- BucketOrder order,
|
|
|
- DocValueFormat format,
|
|
|
- TermsAggregator.BucketCountThresholds bucketCountThresholds,
|
|
|
- IncludeExclude includeExclude,
|
|
|
- SearchContext context,
|
|
|
- Aggregator parent,
|
|
|
- SubAggCollectionMode subAggCollectMode,
|
|
|
- boolean showTermDocCountError,
|
|
|
- List<PipelineAggregator> pipelineAggregators,
|
|
|
- Map<String, Object> metaData) throws IOException {
|
|
|
-
|
|
|
+ }
|
|
|
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
|
|
|
- return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource,
|
|
|
- order, format, bucketCountThresholds, filter, context, parent, true, subAggCollectMode,
|
|
|
- showTermDocCountError, pipelineAggregators, metaData);
|
|
|
-
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- boolean needsGlobalOrdinals() {
|
|
|
- return true;
|
|
|
- }
|
|
|
- },
|
|
|
- GLOBAL_ORDINALS_LOW_CARDINALITY(new ParseField("global_ordinals_low_cardinality")) {
|
|
|
-
|
|
|
- @Override
|
|
|
- Aggregator create(String name,
|
|
|
- AggregatorFactories factories,
|
|
|
- ValuesSource valuesSource,
|
|
|
- BucketOrder order,
|
|
|
- DocValueFormat format,
|
|
|
- TermsAggregator.BucketCountThresholds bucketCountThresholds,
|
|
|
- IncludeExclude includeExclude,
|
|
|
- SearchContext context,
|
|
|
- Aggregator parent,
|
|
|
- SubAggCollectionMode subAggCollectMode,
|
|
|
- boolean showTermDocCountError,
|
|
|
- List<PipelineAggregator> pipelineAggregators,
|
|
|
- Map<String, Object> metaData) throws IOException {
|
|
|
-
|
|
|
- if (includeExclude != null || factories.countAggregators() > 0
|
|
|
- // we need the FieldData impl to be able to extract the
|
|
|
- // segment to global ord mapping
|
|
|
- || valuesSource.getClass() != ValuesSource.Bytes.FieldData.class) {
|
|
|
- return GLOBAL_ORDINALS.create(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude,
|
|
|
- context, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
|
|
|
+ boolean remapGlobalOrds = true;
|
|
|
+ if (includeExclude == null &&
|
|
|
+ Aggregator.descendsFromBucketAggregator(parent) == false &&
|
|
|
+ (factories == AggregatorFactories.EMPTY ||
|
|
|
+ (isAggregationSort(order) == false && subAggCollectMode == SubAggCollectionMode.BREADTH_FIRST))) {
|
|
|
+ /**
|
|
|
+ * We don't need to remap global ords iff this aggregator:
|
|
|
+ * - has no include/exclude rules AND
|
|
|
+ * - is not a child of a bucket aggregator AND
|
|
|
+ * - has no sub-aggregator or only sub-aggregator that can be deferred ({@link SubAggCollectionMode#BREADTH_FIRST}).
|
|
|
+ **/
|
|
|
+ remapGlobalOrds = false;
|
|
|
}
|
|
|
- return new GlobalOrdinalsStringTermsAggregator.LowCardinality(name, factories,
|
|
|
- (ValuesSource.Bytes.WithOrdinals) valuesSource, order, format, bucketCountThresholds, context, parent,
|
|
|
- false, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
|
|
|
-
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- boolean needsGlobalOrdinals() {
|
|
|
- return true;
|
|
|
+ return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order,
|
|
|
+ format, bucketCountThresholds, filter, context, parent, remapGlobalOrds, subAggCollectMode, showTermDocCountError,
|
|
|
+ pipelineAggregators, metaData);
|
|
|
}
|
|
|
};
|
|
|
|
|
|
- public static ExecutionMode fromString(String value) {
|
|
|
- for (ExecutionMode mode : values()) {
|
|
|
- if (mode.parseField.match(value)) {
|
|
|
- return mode;
|
|
|
- }
|
|
|
+ public static ExecutionMode fromString(String value, final DeprecationLogger deprecationLogger) {
|
|
|
+ switch (value) {
|
|
|
+ case "global_ordinals":
|
|
|
+ return GLOBAL_ORDINALS;
|
|
|
+ case "global_ordinals_hash":
|
|
|
+ deprecationLogger.deprecated("[global_ordinals_hash] is deprecated. Please use [global_ordinals] instead.");
|
|
|
+ return GLOBAL_ORDINALS;
|
|
|
+ case "global_ordinals_low_cardinality":
|
|
|
+ deprecationLogger.deprecated("[global_ordinals_low_cardinality] is deprecated. Please use [global_ordinals] instead.");
|
|
|
+ return GLOBAL_ORDINALS;
|
|
|
+ case "map":
|
|
|
+ return MAP;
|
|
|
+ default:
|
|
|
+ throw new IllegalArgumentException("Unknown `execution_hint`: [" + value + "], expected any of [map, global_ordinals]");
|
|
|
}
|
|
|
- throw new IllegalArgumentException("Unknown `execution_hint`: [" + value + "], expected any of " + Arrays.toString(values()));
|
|
|
}
|
|
|
|
|
|
private final ParseField parseField;
|
|
@@ -401,8 +332,6 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|
|
List<PipelineAggregator> pipelineAggregators,
|
|
|
Map<String, Object> metaData) throws IOException;
|
|
|
|
|
|
- abstract boolean needsGlobalOrdinals();
|
|
|
-
|
|
|
@Override
|
|
|
public String toString() {
|
|
|
return parseField.getPreferredName();
|