|
@@ -21,10 +21,9 @@ package org.elasticsearch.index.fielddata.plain;
|
|
|
import org.apache.logging.log4j.LogManager;
|
|
|
import org.apache.logging.log4j.Logger;
|
|
|
import org.apache.lucene.index.DirectoryReader;
|
|
|
-import org.apache.lucene.index.FilteredTermsEnum;
|
|
|
-import org.apache.lucene.index.LeafReader;
|
|
|
import org.apache.lucene.index.LeafReaderContext;
|
|
|
import org.apache.lucene.index.OrdinalMap;
|
|
|
+import org.apache.lucene.index.SortedSetDocValues;
|
|
|
import org.apache.lucene.index.Terms;
|
|
|
import org.apache.lucene.index.TermsEnum;
|
|
|
import org.apache.lucene.util.BytesRef;
|
|
@@ -32,35 +31,47 @@ import org.elasticsearch.ElasticsearchException;
|
|
|
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
|
|
|
import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData;
|
|
|
import org.elasticsearch.index.fielddata.LeafOrdinalsFieldData;
|
|
|
+import org.elasticsearch.index.fielddata.RamAccountingTermsEnum;
|
|
|
+import org.elasticsearch.index.fielddata.ScriptDocValues;
|
|
|
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder;
|
|
|
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsIndexFieldData;
|
|
|
import org.elasticsearch.indices.breaker.CircuitBreakerService;
|
|
|
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
|
|
|
|
|
|
import java.io.IOException;
|
|
|
+import java.util.function.Function;
|
|
|
|
|
|
-public abstract class AbstractIndexOrdinalsFieldData extends AbstractIndexFieldData<LeafOrdinalsFieldData>
|
|
|
- implements IndexOrdinalsFieldData {
|
|
|
+public abstract class AbstractIndexOrdinalsFieldData implements IndexOrdinalsFieldData {
|
|
|
private static final Logger logger = LogManager.getLogger(AbstractBinaryDVLeafFieldData.class);
|
|
|
|
|
|
- private final double minFrequency, maxFrequency;
|
|
|
- private final int minSegmentSize;
|
|
|
+ private final String fieldName;
|
|
|
+ private final ValuesSourceType valuesSourceType;
|
|
|
+ private final IndexFieldDataCache cache;
|
|
|
protected final CircuitBreakerService breakerService;
|
|
|
+ protected final Function<SortedSetDocValues, ScriptDocValues<?>> scriptFunction;
|
|
|
|
|
|
protected AbstractIndexOrdinalsFieldData(
|
|
|
String fieldName,
|
|
|
ValuesSourceType valuesSourceType,
|
|
|
IndexFieldDataCache cache,
|
|
|
CircuitBreakerService breakerService,
|
|
|
- double minFrequency,
|
|
|
- double maxFrequency,
|
|
|
- int minSegmentSize
|
|
|
+ Function<SortedSetDocValues, ScriptDocValues<?>> scriptFunction
|
|
|
) {
|
|
|
- super(fieldName, valuesSourceType, cache);
|
|
|
+ this.fieldName = fieldName;
|
|
|
+ this.valuesSourceType = valuesSourceType;
|
|
|
+ this.cache = cache;
|
|
|
this.breakerService = breakerService;
|
|
|
- this.minFrequency = minFrequency;
|
|
|
- this.maxFrequency = maxFrequency;
|
|
|
- this.minSegmentSize = minSegmentSize;
|
|
|
+ this.scriptFunction = scriptFunction;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public String getFieldName() {
|
|
|
+ return this.fieldName;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public ValuesSourceType getValuesSourceType() {
|
|
|
+ return valuesSourceType;
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -68,6 +79,27 @@ public abstract class AbstractIndexOrdinalsFieldData extends AbstractIndexFieldD
|
|
|
return null;
|
|
|
}
|
|
|
|
|
|
+ @Override
|
|
|
+ public LeafOrdinalsFieldData load(LeafReaderContext context) {
|
|
|
+ if (context.reader().getFieldInfos().fieldInfo(fieldName) == null) {
|
|
|
+ // Some leaf readers may be wrapped and report different set of fields and use the same cache key.
|
|
|
+ // If a field can't be found then it doesn't mean it isn't there,
|
|
|
+ // so if a field doesn't exist then we don't cache it and just return an empty field data instance.
|
|
|
+ // The next time the field is found, we do cache.
|
|
|
+ return AbstractLeafOrdinalsFieldData.empty();
|
|
|
+ }
|
|
|
+
|
|
|
+ try {
|
|
|
+ return cache.load(context, this);
|
|
|
+ } catch (Exception e) {
|
|
|
+ if (e instanceof ElasticsearchException) {
|
|
|
+ throw (ElasticsearchException) e;
|
|
|
+ } else {
|
|
|
+ throw new ElasticsearchException(e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
@Override
|
|
|
public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) {
|
|
|
IndexOrdinalsFieldData fieldData = loadGlobalInternal(indexReader);
|
|
@@ -121,60 +153,49 @@ public abstract class AbstractIndexOrdinalsFieldData extends AbstractIndexFieldD
|
|
|
this,
|
|
|
breakerService,
|
|
|
logger,
|
|
|
- AbstractLeafOrdinalsFieldData.DEFAULT_SCRIPT_FUNCTION
|
|
|
+ scriptFunction
|
|
|
);
|
|
|
}
|
|
|
|
|
|
- @Override
|
|
|
- protected LeafOrdinalsFieldData empty(int maxDoc) {
|
|
|
- return AbstractLeafOrdinalsFieldData.empty();
|
|
|
- }
|
|
|
-
|
|
|
- protected TermsEnum filter(Terms terms, TermsEnum iterator, LeafReader reader) throws IOException {
|
|
|
- if (iterator == null) {
|
|
|
- return null;
|
|
|
- }
|
|
|
- int docCount = terms.getDocCount();
|
|
|
- if (docCount == -1) {
|
|
|
- docCount = reader.maxDoc();
|
|
|
- }
|
|
|
- if (docCount >= minSegmentSize) {
|
|
|
- final int minFreq = minFrequency > 1.0
|
|
|
- ? (int) minFrequency
|
|
|
- : (int)(docCount * minFrequency);
|
|
|
- final int maxFreq = maxFrequency > 1.0
|
|
|
- ? (int) maxFrequency
|
|
|
- : (int)(docCount * maxFrequency);
|
|
|
- if (minFreq > 1 || maxFreq < docCount) {
|
|
|
- iterator = new FrequencyFilter(iterator, minFreq, maxFreq);
|
|
|
- }
|
|
|
- }
|
|
|
- return iterator;
|
|
|
- }
|
|
|
-
|
|
|
@Override
|
|
|
public boolean supportsGlobalOrdinalsMapping() {
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
- private static final class FrequencyFilter extends FilteredTermsEnum {
|
|
|
-
|
|
|
- private int minFreq;
|
|
|
- private int maxFreq;
|
|
|
- FrequencyFilter(TermsEnum delegate, int minFreq, int maxFreq) {
|
|
|
- super(delegate, false);
|
|
|
- this.minFreq = minFreq;
|
|
|
- this.maxFreq = maxFreq;
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- protected AcceptStatus accept(BytesRef arg0) throws IOException {
|
|
|
- int docFreq = docFreq();
|
|
|
- if (docFreq >= minFreq && docFreq <= maxFreq) {
|
|
|
- return AcceptStatus.YES;
|
|
|
- }
|
|
|
- return AcceptStatus.NO;
|
|
|
- }
|
|
|
+ /**
|
|
|
+ * A {@code PerValueEstimator} is a sub-class that can be used to estimate
|
|
|
+ * the memory overhead for loading the data. Each field data
|
|
|
+ * implementation should implement its own {@code PerValueEstimator} if it
|
|
|
+ * intends to take advantage of the CircuitBreaker.
|
|
|
+ * <p>
|
|
|
+ * Note that the .beforeLoad(...) and .afterLoad(...) methods must be
|
|
|
+ * manually called.
|
|
|
+ */
|
|
|
+ public interface PerValueEstimator {
|
|
|
+
|
|
|
+ /**
|
|
|
+ * @return the number of bytes for the given term
|
|
|
+ */
|
|
|
+ long bytesPerValue(BytesRef term);
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Execute any pre-loading estimations for the terms. May also
|
|
|
+ * optionally wrap a {@link TermsEnum} in a
|
|
|
+ * {@link RamAccountingTermsEnum}
|
|
|
+ * which will estimate the memory on a per-term basis.
|
|
|
+ *
|
|
|
+ * @param terms terms to be estimated
|
|
|
+ * @return A TermsEnum for the given terms
|
|
|
+ */
|
|
|
+ TermsEnum beforeLoad(Terms terms) throws IOException;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Possibly adjust a circuit breaker after field data has been loaded,
|
|
|
+ * now that the actual amount of memory used by the field data is known
|
|
|
+ *
|
|
|
+ * @param termsEnum terms that were loaded
|
|
|
+ * @param actualUsed actual field data memory usage
|
|
|
+ */
|
|
|
+ void afterLoad(TermsEnum termsEnum, long actualUsed);
|
|
|
}
|
|
|
-
|
|
|
}
|