|
@@ -20,21 +20,8 @@
|
|
|
package org.elasticsearch.search.aggregations.bucket.terms;
|
|
|
|
|
|
import org.apache.logging.log4j.LogManager;
|
|
|
-import org.apache.lucene.index.IndexReader;
|
|
|
-import org.apache.lucene.index.PostingsEnum;
|
|
|
-import org.apache.lucene.index.Term;
|
|
|
-import org.apache.lucene.search.BooleanClause.Occur;
|
|
|
-import org.apache.lucene.search.BooleanQuery;
|
|
|
-import org.apache.lucene.search.IndexSearcher;
|
|
|
-import org.apache.lucene.search.Query;
|
|
|
-import org.apache.lucene.search.TermQuery;
|
|
|
-import org.apache.lucene.util.BytesRef;
|
|
|
import org.elasticsearch.common.ParseField;
|
|
|
-import org.elasticsearch.common.lease.Releasable;
|
|
|
import org.elasticsearch.common.logging.DeprecationLogger;
|
|
|
-import org.elasticsearch.common.lucene.index.FilterableTermsEnum;
|
|
|
-import org.elasticsearch.common.lucene.index.FreqTermsEnum;
|
|
|
-import org.elasticsearch.index.mapper.MappedFieldType;
|
|
|
import org.elasticsearch.index.query.QueryBuilder;
|
|
|
import org.elasticsearch.index.query.QueryShardContext;
|
|
|
import org.elasticsearch.search.DocValueFormat;
|
|
@@ -60,21 +47,10 @@ import java.io.IOException;
|
|
|
import java.util.List;
|
|
|
import java.util.Map;
|
|
|
|
|
|
-public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFactory implements Releasable {
|
|
|
+public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFactory {
|
|
|
private static final DeprecationLogger deprecationLogger = new DeprecationLogger(
|
|
|
LogManager.getLogger(SignificantTermsAggregatorFactory.class));
|
|
|
|
|
|
- private final IncludeExclude includeExclude;
|
|
|
- private final String executionHint;
|
|
|
- private String indexedFieldName;
|
|
|
- private MappedFieldType fieldType;
|
|
|
- private FilterableTermsEnum termsEnum;
|
|
|
- private int numberOfAggregatorsCreated;
|
|
|
- final Query filter;
|
|
|
- private final int supersetNumDocs;
|
|
|
- private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
|
|
|
- private final SignificanceHeuristic significanceHeuristic;
|
|
|
-
|
|
|
static void registerAggregators(ValuesSourceRegistry.Builder builder) {
|
|
|
builder.register(SignificantTermsAggregationBuilder.NAME,
|
|
|
List.of(CoreValuesSourceType.BYTES, CoreValuesSourceType.IP),
|
|
@@ -102,7 +78,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|
|
SearchContext context,
|
|
|
Aggregator parent,
|
|
|
SignificanceHeuristic significanceHeuristic,
|
|
|
- SignificantTermsAggregatorFactory sigTermsFactory,
|
|
|
+ SignificanceLookup lookup,
|
|
|
boolean collectsFromSingleBucket,
|
|
|
Map<String, Object> metadata) throws IOException {
|
|
|
|
|
@@ -124,7 +100,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|
|
}
|
|
|
|
|
|
return execution.create(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent,
|
|
|
- significanceHeuristic, sigTermsFactory, collectsFromSingleBucket, metadata);
|
|
|
+ significanceHeuristic, lookup, collectsFromSingleBucket, metadata);
|
|
|
}
|
|
|
};
|
|
|
}
|
|
@@ -146,7 +122,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|
|
SearchContext context,
|
|
|
Aggregator parent,
|
|
|
SignificanceHeuristic significanceHeuristic,
|
|
|
- SignificantTermsAggregatorFactory sigTermsFactory,
|
|
|
+ SignificanceLookup lookup,
|
|
|
boolean collectsFromSingleBucket,
|
|
|
Map<String, Object> metadata) throws IOException {
|
|
|
|
|
@@ -167,18 +143,24 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|
|
}
|
|
|
|
|
|
return new NumericTermsAggregator(name, factories,
|
|
|
- agg -> agg.new SignificantLongTermsResults(sigTermsFactory, significanceHeuristic, collectsFromSingleBucket),
|
|
|
+ agg -> agg.new SignificantLongTermsResults(lookup, significanceHeuristic, collectsFromSingleBucket),
|
|
|
numericValuesSource, format, null, bucketCountThresholds, context, parent, SubAggCollectionMode.BREADTH_FIRST,
|
|
|
longFilter, collectsFromSingleBucket, metadata);
|
|
|
}
|
|
|
};
|
|
|
}
|
|
|
|
|
|
+ private final IncludeExclude includeExclude;
|
|
|
+ private final String executionHint;
|
|
|
+ private final QueryBuilder backgroundFilter;
|
|
|
+ private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
|
|
|
+ private final SignificanceHeuristic significanceHeuristic;
|
|
|
+
|
|
|
SignificantTermsAggregatorFactory(String name,
|
|
|
ValuesSourceConfig config,
|
|
|
IncludeExclude includeExclude,
|
|
|
String executionHint,
|
|
|
- QueryBuilder filterBuilder,
|
|
|
+ QueryBuilder backgroundFilter,
|
|
|
TermsAggregator.BucketCountThresholds bucketCountThresholds,
|
|
|
SignificanceHeuristic significanceHeuristic,
|
|
|
QueryShardContext queryShardContext,
|
|
@@ -192,85 +174,15 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|
|
throw new IllegalArgumentException("SignificantText aggregation requires fields to be searchable, but ["
|
|
|
+ config.fieldContext().fieldType().name() + "] is not");
|
|
|
}
|
|
|
-
|
|
|
- this.fieldType = config.fieldContext().fieldType();
|
|
|
- this.indexedFieldName = fieldType.name();
|
|
|
}
|
|
|
|
|
|
this.includeExclude = includeExclude;
|
|
|
this.executionHint = executionHint;
|
|
|
- this.filter = filterBuilder == null
|
|
|
- ? null
|
|
|
- : filterBuilder.toQuery(queryShardContext);
|
|
|
- IndexSearcher searcher = queryShardContext.searcher();
|
|
|
- /*
|
|
|
- * We need to use a superset size that includes deleted docs or we
|
|
|
- * could end up blowing up with bad statistics that cause us to blow
|
|
|
- * up later on.
|
|
|
- */
|
|
|
- this.supersetNumDocs = filter == null ? searcher.getIndexReader().maxDoc() : searcher.count(filter);
|
|
|
+ this.backgroundFilter = backgroundFilter;
|
|
|
this.bucketCountThresholds = bucketCountThresholds;
|
|
|
this.significanceHeuristic = significanceHeuristic;
|
|
|
}
|
|
|
|
|
|
- /**
|
|
|
- * Get the number of docs in the superset.
|
|
|
- */
|
|
|
- long getSupersetNumDocs() {
|
|
|
- return supersetNumDocs;
|
|
|
- }
|
|
|
-
|
|
|
- private FilterableTermsEnum getTermsEnum(String field) throws IOException {
|
|
|
- // TODO this method helps because of asMultiBucketAggregator. Once we remove it we can move this logic into the aggregators.
|
|
|
- if (termsEnum != null) {
|
|
|
- return termsEnum;
|
|
|
- }
|
|
|
- IndexReader reader = queryShardContext.getIndexReader();
|
|
|
- if (numberOfAggregatorsCreated > 1) {
|
|
|
- termsEnum = new FreqTermsEnum(reader, field, true, false, filter, queryShardContext.bigArrays());
|
|
|
- } else {
|
|
|
- termsEnum = new FilterableTermsEnum(reader, indexedFieldName, PostingsEnum.NONE, filter);
|
|
|
- }
|
|
|
- return termsEnum;
|
|
|
- }
|
|
|
-
|
|
|
- private long getBackgroundFrequency(String value) throws IOException {
|
|
|
- // fieldType can be null if the field is unmapped, but theoretically this method should only be called
|
|
|
- // when constructing buckets. Assert to ensure this is the case
|
|
|
- // TODO this is a bad setup and it should be refactored
|
|
|
- assert fieldType != null;
|
|
|
- Query query = fieldType.termQuery(value, queryShardContext);
|
|
|
- if (query instanceof TermQuery) {
|
|
|
- // for types that use the inverted index, we prefer using a caching terms
|
|
|
- // enum that will do a better job at reusing index inputs
|
|
|
- Term term = ((TermQuery) query).getTerm();
|
|
|
- FilterableTermsEnum termsEnum = getTermsEnum(term.field());
|
|
|
- if (termsEnum.seekExact(term.bytes())) {
|
|
|
- return termsEnum.docFreq();
|
|
|
- } else {
|
|
|
- return 0;
|
|
|
- }
|
|
|
- }
|
|
|
- // otherwise do it the naive way
|
|
|
- if (filter != null) {
|
|
|
- query = new BooleanQuery.Builder()
|
|
|
- .add(query, Occur.FILTER)
|
|
|
- .add(filter, Occur.FILTER)
|
|
|
- .build();
|
|
|
- }
|
|
|
- return queryShardContext.searcher().count(query);
|
|
|
- }
|
|
|
-
|
|
|
- long getBackgroundFrequency(BytesRef termBytes) throws IOException {
|
|
|
- String value = config.format().format(termBytes).toString();
|
|
|
- return getBackgroundFrequency(value);
|
|
|
- }
|
|
|
-
|
|
|
- long getBackgroundFrequency(long termNum) throws IOException {
|
|
|
- String value = config.format().format(termNum).toString();
|
|
|
- return getBackgroundFrequency(value);
|
|
|
- }
|
|
|
-
|
|
|
@Override
|
|
|
protected Aggregator createUnmapped(SearchContext searchContext,
|
|
|
Aggregator parent,
|
|
@@ -299,7 +211,6 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|
|
}
|
|
|
SignificantTermsAggregatorSupplier sigTermsAggregatorSupplier = (SignificantTermsAggregatorSupplier) aggregatorSupplier;
|
|
|
|
|
|
- numberOfAggregatorsCreated++;
|
|
|
BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds);
|
|
|
if (bucketCountThresholds.getShardSize() == SignificantTermsAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
|
|
|
// The user has not made a shardSize selection .
|
|
@@ -317,10 +228,11 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|
|
bucketCountThresholds.setShardSize(2 * BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize()));
|
|
|
}
|
|
|
|
|
|
- // TODO we should refactor so that we don't need to use this Factory as a singleton (e.g. stop passing `this` to the aggregators)
|
|
|
+ SignificanceLookup lookup = new SignificanceLookup(queryShardContext, config, backgroundFilter);
|
|
|
+
|
|
|
return sigTermsAggregatorSupplier.build(name, factories, valuesSource, config.format(),
|
|
|
bucketCountThresholds, includeExclude, executionHint, searchContext, parent,
|
|
|
- significanceHeuristic, this, collectsFromSingleBucket, metadata);
|
|
|
+ significanceHeuristic, lookup, collectsFromSingleBucket, metadata);
|
|
|
}
|
|
|
|
|
|
public enum ExecutionMode {
|
|
@@ -337,7 +249,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|
|
SearchContext aggregationContext,
|
|
|
Aggregator parent,
|
|
|
SignificanceHeuristic significanceHeuristic,
|
|
|
- SignificantTermsAggregatorFactory termsAggregatorFactory,
|
|
|
+ SignificanceLookup lookup,
|
|
|
boolean collectsFromSingleBucket,
|
|
|
Map<String, Object> metadata) throws IOException {
|
|
|
|
|
@@ -345,7 +257,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|
|
return new MapStringTermsAggregator(
|
|
|
name,
|
|
|
factories,
|
|
|
- a -> a.new SignificantTermsResults(termsAggregatorFactory, significanceHeuristic),
|
|
|
+ a -> a.new SignificantTermsResults(lookup, significanceHeuristic, collectsFromSingleBucket),
|
|
|
valuesSource,
|
|
|
null,
|
|
|
format,
|
|
@@ -374,7 +286,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|
|
SearchContext aggregationContext,
|
|
|
Aggregator parent,
|
|
|
SignificanceHeuristic significanceHeuristic,
|
|
|
- SignificantTermsAggregatorFactory termsAggregatorFactory,
|
|
|
+ SignificanceLookup lookup,
|
|
|
boolean collectsFromSingleBucket,
|
|
|
Map<String, Object> metadata) throws IOException {
|
|
|
|
|
@@ -391,11 +303,11 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|
|
**/
|
|
|
remapGlobalOrd = false;
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
return new GlobalOrdinalsStringTermsAggregator(
|
|
|
name,
|
|
|
factories,
|
|
|
- a -> a.new SignificantTermsResults(termsAggregatorFactory, significanceHeuristic),
|
|
|
+ a -> a.new SignificantTermsResults(lookup, significanceHeuristic, collectsFromSingleBucket),
|
|
|
(ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource,
|
|
|
null,
|
|
|
format,
|
|
@@ -440,7 +352,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|
|
SearchContext aggregationContext,
|
|
|
Aggregator parent,
|
|
|
SignificanceHeuristic significanceHeuristic,
|
|
|
- SignificantTermsAggregatorFactory termsAggregatorFactory,
|
|
|
+ SignificanceLookup lookup,
|
|
|
boolean collectsFromSingleBucket,
|
|
|
Map<String, Object> metadata) throws IOException;
|
|
|
|
|
@@ -449,15 +361,4 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|
|
return parseField.getPreferredName();
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- @Override
|
|
|
- public void close() {
|
|
|
- try {
|
|
|
- if (termsEnum instanceof Releasable) {
|
|
|
- ((Releasable) termsEnum).close();
|
|
|
- }
|
|
|
- } finally {
|
|
|
- termsEnum = null;
|
|
|
- }
|
|
|
- }
|
|
|
}
|