|
@@ -23,6 +23,7 @@ import com.google.common.base.Preconditions;
|
|
import org.apache.lucene.index.AtomicReader;
|
|
import org.apache.lucene.index.AtomicReader;
|
|
import org.apache.lucene.index.AtomicReaderContext;
|
|
import org.apache.lucene.index.AtomicReaderContext;
|
|
import org.apache.lucene.index.Terms;
|
|
import org.apache.lucene.index.Terms;
|
|
|
|
+import org.apache.lucene.index.TermsEnum;
|
|
import org.apache.lucene.util.BytesRef;
|
|
import org.apache.lucene.util.BytesRef;
|
|
import org.apache.lucene.util.BytesRefIterator;
|
|
import org.apache.lucene.util.BytesRefIterator;
|
|
import org.apache.lucene.util.FixedBitSet;
|
|
import org.apache.lucene.util.FixedBitSet;
|
|
@@ -30,8 +31,10 @@ import org.apache.lucene.util.NumericUtils;
|
|
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
|
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
|
import org.apache.lucene.util.packed.PackedInts;
|
|
import org.apache.lucene.util.packed.PackedInts;
|
|
import org.elasticsearch.common.Nullable;
|
|
import org.elasticsearch.common.Nullable;
|
|
|
|
+import org.elasticsearch.common.breaker.MemoryCircuitBreaker;
|
|
import org.elasticsearch.common.settings.Settings;
|
|
import org.elasticsearch.common.settings.Settings;
|
|
import org.elasticsearch.index.Index;
|
|
import org.elasticsearch.index.Index;
|
|
|
|
+import org.elasticsearch.index.fielddata.RamAccountingTermsEnum;
|
|
import org.elasticsearch.index.fielddata.*;
|
|
import org.elasticsearch.index.fielddata.*;
|
|
import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
|
|
import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
|
|
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
|
|
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
|
|
@@ -40,7 +43,9 @@ import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
|
|
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
|
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
|
import org.elasticsearch.index.mapper.FieldMapper;
|
|
import org.elasticsearch.index.mapper.FieldMapper;
|
|
import org.elasticsearch.index.settings.IndexSettings;
|
|
import org.elasticsearch.index.settings.IndexSettings;
|
|
|
|
+import org.elasticsearch.indices.fielddata.breaker.CircuitBreakerService;
|
|
|
|
|
|
|
|
+import java.io.IOException;
|
|
import java.util.EnumSet;
|
|
import java.util.EnumSet;
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -58,18 +63,23 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|
}
|
|
}
|
|
|
|
|
|
@Override
|
|
@Override
|
|
- public IndexFieldData<AtomicNumericFieldData> build(Index index, @IndexSettings Settings indexSettings, FieldMapper<?> mapper, IndexFieldDataCache cache) {
|
|
|
|
- return new PackedArrayIndexFieldData(index, indexSettings, mapper.names(), mapper.fieldDataType(), cache, numericType);
|
|
|
|
|
|
+ public IndexFieldData<AtomicNumericFieldData> build(Index index, @IndexSettings Settings indexSettings, FieldMapper<?> mapper,
|
|
|
|
+ IndexFieldDataCache cache, CircuitBreakerService breakerService) {
|
|
|
|
+ return new PackedArrayIndexFieldData(index, indexSettings, mapper.names(), mapper.fieldDataType(), cache, numericType, breakerService);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
private final NumericType numericType;
|
|
private final NumericType numericType;
|
|
|
|
+ private final CircuitBreakerService breakerService;
|
|
|
|
|
|
- public PackedArrayIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache, NumericType numericType) {
|
|
|
|
|
|
+ public PackedArrayIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames,
|
|
|
|
+ FieldDataType fieldDataType, IndexFieldDataCache cache, NumericType numericType,
|
|
|
|
+ CircuitBreakerService breakerService) {
|
|
super(index, indexSettings, fieldNames, fieldDataType, cache);
|
|
super(index, indexSettings, fieldNames, fieldDataType, cache);
|
|
Preconditions.checkNotNull(numericType);
|
|
Preconditions.checkNotNull(numericType);
|
|
Preconditions.checkArgument(EnumSet.of(NumericType.BYTE, NumericType.SHORT, NumericType.INT, NumericType.LONG).contains(numericType), getClass().getSimpleName() + " only supports integer types, not " + numericType);
|
|
Preconditions.checkArgument(EnumSet.of(NumericType.BYTE, NumericType.SHORT, NumericType.INT, NumericType.LONG).contains(numericType), getClass().getSimpleName() + " only supports integer types, not " + numericType);
|
|
this.numericType = numericType;
|
|
this.numericType = numericType;
|
|
|
|
+ this.breakerService = breakerService;
|
|
}
|
|
}
|
|
|
|
|
|
@Override
|
|
@Override
|
|
@@ -88,8 +98,12 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
|
|
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
|
|
AtomicReader reader = context.reader();
|
|
AtomicReader reader = context.reader();
|
|
Terms terms = reader.terms(getFieldNames().indexName());
|
|
Terms terms = reader.terms(getFieldNames().indexName());
|
|
|
|
+ PackedArrayAtomicFieldData data = null;
|
|
|
|
+ PackedArrayEstimator estimator = new PackedArrayEstimator(breakerService.getBreaker(), getNumericType());
|
|
if (terms == null) {
|
|
if (terms == null) {
|
|
- return PackedArrayAtomicFieldData.empty(reader.maxDoc());
|
|
|
|
|
|
+ data = PackedArrayAtomicFieldData.empty(reader.maxDoc());
|
|
|
|
+ estimator.adjustForNoTerms(data.getMemorySizeInBytes());
|
|
|
|
+ return data;
|
|
}
|
|
}
|
|
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
|
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
|
// Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer order so we know the sequence of
|
|
// Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer order so we know the sequence of
|
|
@@ -98,8 +112,10 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|
|
|
|
|
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
|
|
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
|
|
OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio);
|
|
OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio);
|
|
|
|
+ TermsEnum termsEnum = estimator.beforeLoad(terms);
|
|
|
|
+ boolean success = false;
|
|
try {
|
|
try {
|
|
- BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
|
|
|
|
|
|
+ BytesRefIterator iter = builder.buildFromTerms(termsEnum);
|
|
BytesRef term;
|
|
BytesRef term;
|
|
assert !getNumericType().isFloatingPoint();
|
|
assert !getNumericType().isFloatingPoint();
|
|
final boolean indexedAsLong = getNumericType().requiredBits() > 32;
|
|
final boolean indexedAsLong = getNumericType().requiredBits() > 32;
|
|
@@ -156,28 +172,38 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|
final long ordinalsSize = build.getMemorySizeInBytes();
|
|
final long ordinalsSize = build.getMemorySizeInBytes();
|
|
|
|
|
|
if (uniqueValuesSize + ordinalsSize < singleValuesSize) {
|
|
if (uniqueValuesSize + ordinalsSize < singleValuesSize) {
|
|
- return new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
|
|
|
|
- if (missingValue != 0) {
|
|
|
|
- sValues.fill(0, sValues.size(), missingValue);
|
|
|
|
- }
|
|
|
|
- for (int i = 0; i < reader.maxDoc(); i++) {
|
|
|
|
- final long ord = ordinals.getOrd(i);
|
|
|
|
- if (ord != Ordinals.MISSING_ORDINAL) {
|
|
|
|
- sValues.set(i, values.get(ord - 1) - minValue);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if (set == null) {
|
|
|
|
- return new PackedArrayAtomicFieldData.Single(sValues, minValue, reader.maxDoc(), ordinals.getNumOrds());
|
|
|
|
|
|
+ data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
|
|
} else {
|
|
} else {
|
|
- return new PackedArrayAtomicFieldData.SingleSparse(sValues, minValue, reader.maxDoc(), missingValue, ordinals.getNumOrds());
|
|
|
|
|
|
+ final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
|
|
|
|
+ if (missingValue != 0) {
|
|
|
|
+ sValues.fill(0, sValues.size(), missingValue);
|
|
|
|
+ }
|
|
|
|
+ for (int i = 0; i < reader.maxDoc(); i++) {
|
|
|
|
+ final long ord = ordinals.getOrd(i);
|
|
|
|
+ if (ord != Ordinals.MISSING_ORDINAL) {
|
|
|
|
+ sValues.set(i, values.get(ord - 1) - minValue);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if (set == null) {
|
|
|
|
+ data = new PackedArrayAtomicFieldData.Single(sValues, minValue, reader.maxDoc(), ordinals.getNumOrds());
|
|
|
|
+ } else {
|
|
|
|
+ data = new PackedArrayAtomicFieldData.SingleSparse(sValues, minValue, reader.maxDoc(), missingValue, ordinals.getNumOrds());
|
|
|
|
+ }
|
|
}
|
|
}
|
|
} else {
|
|
} else {
|
|
- return new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
|
|
|
|
|
|
+ data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ success = true;
|
|
|
|
+ return data;
|
|
} finally {
|
|
} finally {
|
|
|
|
+ if (!success) {
|
|
|
|
+ // If something went wrong, unwind any current estimations we've made
|
|
|
|
+ estimator.afterLoad(termsEnum, 0);
|
|
|
|
+ } else {
|
|
|
|
+ // Adjust as usual, based on the actual size of the field data
|
|
|
|
+ estimator.afterLoad(termsEnum, data.getMemorySizeInBytes());
|
|
|
|
+ }
|
|
builder.close();
|
|
builder.close();
|
|
}
|
|
}
|
|
|
|
|
|
@@ -187,4 +213,62 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
|
|
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
|
|
return new LongValuesComparatorSource(this, missingValue, sortMode);
|
|
return new LongValuesComparatorSource(this, missingValue, sortMode);
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Estimator that wraps numeric field data loading in a
|
|
|
|
+ * RamAccountingTermsEnum, adjusting the breaker after data has been
|
|
|
|
+ * loaded
|
|
|
|
+ */
|
|
|
|
+ public class PackedArrayEstimator implements PerValueEstimator {
|
|
|
|
+
|
|
|
|
+ private final MemoryCircuitBreaker breaker;
|
|
|
|
+ private final NumericType type;
|
|
|
|
+
|
|
|
|
+ public PackedArrayEstimator(MemoryCircuitBreaker breaker, NumericType type) {
|
|
|
|
+ this.breaker = breaker;
|
|
|
|
+ this.type = type;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * @return number of bytes per term, based on the NumericValue.requiredBits()
|
|
|
|
+ */
|
|
|
|
+ @Override
|
|
|
|
+ public long bytesPerValue(BytesRef term) {
|
|
|
|
+ // Estimate about about 0.8 (8 / 10) compression ratio for
|
|
|
|
+ // numbers, but at least 4 bytes
|
|
|
|
+ return Math.max(type.requiredBits() / 10, 4);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * @return A TermsEnum wrapped in a RamAccountingTermsEnum
|
|
|
|
+ * @throws IOException
|
|
|
|
+ */
|
|
|
|
+ @Override
|
|
|
|
+ public TermsEnum beforeLoad(Terms terms) throws IOException {
|
|
|
|
+ return new RamAccountingTermsEnum(type.wrapTermsEnum(terms.iterator(null)), breaker, this);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Adjusts the breaker based on the aggregated value from the RamAccountingTermsEnum
|
|
|
|
+ *
|
|
|
|
+ * @param termsEnum terms that were wrapped and loaded
|
|
|
|
+ * @param actualUsed actual field data memory usage
|
|
|
|
+ */
|
|
|
|
+ @Override
|
|
|
|
+ public void afterLoad(TermsEnum termsEnum, long actualUsed) {
|
|
|
|
+ assert termsEnum instanceof RamAccountingTermsEnum;
|
|
|
|
+ long estimatedBytes = ((RamAccountingTermsEnum) termsEnum).getTotalBytes();
|
|
|
|
+ breaker.addWithoutBreaking(-(estimatedBytes - actualUsed));
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Adjust the breaker when no terms were actually loaded, but the field
|
|
|
|
+ * data takes up space regardless. For instance, when ordinals are
|
|
|
|
+ * used.
|
|
|
|
+ * @param actualUsed bytes actually used
|
|
|
|
+ */
|
|
|
|
+ public void adjustForNoTerms(long actualUsed) {
|
|
|
|
+ breaker.addWithoutBreaking(actualUsed);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
}
|
|
}
|