|
@@ -23,6 +23,7 @@ import org.apache.logging.log4j.Logger;
|
|
|
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
|
|
|
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader.FSTLoadMode;
|
|
|
import org.apache.lucene.document.Field;
|
|
|
+import org.apache.lucene.document.LongPoint;
|
|
|
import org.apache.lucene.document.NumericDocValuesField;
|
|
|
import org.apache.lucene.index.DirectoryReader;
|
|
|
import org.apache.lucene.index.IndexCommit;
|
|
@@ -31,17 +32,23 @@ import org.apache.lucene.index.IndexWriter;
|
|
|
import org.apache.lucene.index.IndexWriterConfig;
|
|
|
import org.apache.lucene.index.IndexableField;
|
|
|
import org.apache.lucene.index.LeafReader;
|
|
|
+import org.apache.lucene.index.LeafReaderContext;
|
|
|
import org.apache.lucene.index.LiveIndexWriterConfig;
|
|
|
import org.apache.lucene.index.MergePolicy;
|
|
|
import org.apache.lucene.index.SegmentCommitInfo;
|
|
|
import org.apache.lucene.index.SegmentInfos;
|
|
|
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
|
|
|
import org.apache.lucene.index.Term;
|
|
|
+import org.apache.lucene.search.DocIdSetIterator;
|
|
|
import org.apache.lucene.search.IndexSearcher;
|
|
|
+import org.apache.lucene.search.Query;
|
|
|
import org.apache.lucene.search.ReferenceManager;
|
|
|
+import org.apache.lucene.search.ScoreMode;
|
|
|
+import org.apache.lucene.search.Scorer;
|
|
|
import org.apache.lucene.search.SearcherFactory;
|
|
|
import org.apache.lucene.search.SearcherManager;
|
|
|
import org.apache.lucene.search.TermQuery;
|
|
|
+import org.apache.lucene.search.Weight;
|
|
|
import org.apache.lucene.store.AlreadyClosedException;
|
|
|
import org.apache.lucene.store.Directory;
|
|
|
import org.apache.lucene.store.FilterDirectory;
|
|
@@ -68,12 +75,14 @@ import org.elasticsearch.common.util.concurrent.ReleasableLock;
|
|
|
import org.elasticsearch.core.internal.io.IOUtils;
|
|
|
import org.elasticsearch.index.IndexSettings;
|
|
|
import org.elasticsearch.index.VersionType;
|
|
|
+import org.elasticsearch.index.fieldvisitor.IdOnlyFieldVisitor;
|
|
|
import org.elasticsearch.index.mapper.IdFieldMapper;
|
|
|
import org.elasticsearch.index.mapper.MapperService;
|
|
|
import org.elasticsearch.index.mapper.ParseContext;
|
|
|
import org.elasticsearch.index.mapper.ParsedDocument;
|
|
|
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
|
|
|
import org.elasticsearch.index.mapper.SourceFieldMapper;
|
|
|
+import org.elasticsearch.index.mapper.Uid;
|
|
|
import org.elasticsearch.index.merge.MergeStats;
|
|
|
import org.elasticsearch.index.merge.OnGoingMerge;
|
|
|
import org.elasticsearch.index.seqno.LocalCheckpointTracker;
|
|
@@ -94,7 +103,6 @@ import java.io.Closeable;
|
|
|
import java.io.IOException;
|
|
|
import java.nio.file.Path;
|
|
|
import java.util.Arrays;
|
|
|
-import java.util.Collection;
|
|
|
import java.util.HashMap;
|
|
|
import java.util.List;
|
|
|
import java.util.Map;
|
|
@@ -109,7 +117,7 @@ import java.util.concurrent.locks.Lock;
|
|
|
import java.util.concurrent.locks.ReentrantLock;
|
|
|
import java.util.function.BiFunction;
|
|
|
import java.util.function.LongSupplier;
|
|
|
-import java.util.function.Supplier;
|
|
|
+import java.util.stream.Collectors;
|
|
|
import java.util.stream.Stream;
|
|
|
|
|
|
public class InternalEngine extends Engine {
|
|
@@ -203,6 +211,7 @@ public class InternalEngine extends Engine {
|
|
|
this.softDeletesPolicy = newSoftDeletesPolicy();
|
|
|
this.combinedDeletionPolicy =
|
|
|
new CombinedDeletionPolicy(logger, translogDeletionPolicy, softDeletesPolicy, translog::getLastSyncedGlobalCheckpoint);
|
|
|
+ this.localCheckpointTracker = createLocalCheckpointTracker(localCheckpointTrackerSupplier);
|
|
|
writer = createWriter();
|
|
|
bootstrapAppendOnlyInfoFromWriter(writer);
|
|
|
historyUUID = loadHistoryUUID(writer);
|
|
@@ -232,11 +241,17 @@ public class InternalEngine extends Engine {
|
|
|
for (ReferenceManager.RefreshListener listener: engineConfig.getInternalRefreshListener()) {
|
|
|
this.internalSearcherManager.addListener(listener);
|
|
|
}
|
|
|
- this.localCheckpointTracker = createLocalCheckpointTracker(engineConfig, lastCommittedSegmentInfos, logger,
|
|
|
- () -> acquireSearcher("create_local_checkpoint_tracker", SearcherScope.INTERNAL), localCheckpointTrackerSupplier);
|
|
|
this.lastRefreshedCheckpointListener = new LastRefreshedCheckpointListener(localCheckpointTracker.getCheckpoint());
|
|
|
this.internalSearcherManager.addListener(lastRefreshedCheckpointListener);
|
|
|
maxSeqNoOfUpdatesOrDeletes = new AtomicLong(SequenceNumbers.max(localCheckpointTracker.getMaxSeqNo(), translog.getMaxSeqNo()));
|
|
|
+ if (softDeleteEnabled && localCheckpointTracker.getCheckpoint() < localCheckpointTracker.getMaxSeqNo()) {
|
|
|
+ try (Searcher searcher = acquireSearcher("restore_version_map_and_checkpoint_tracker", SearcherScope.INTERNAL)) {
|
|
|
+ restoreVersionMapAndCheckpointTracker(Lucene.wrapAllDocsLive(searcher.getDirectoryReader()));
|
|
|
+ } catch (IOException e) {
|
|
|
+ throw new EngineCreationFailureException(config().getShardId(),
|
|
|
+ "failed to restore version map and local checkpoint tracker", e);
|
|
|
+ }
|
|
|
+ }
|
|
|
success = true;
|
|
|
} finally {
|
|
|
if (success == false) {
|
|
@@ -250,30 +265,16 @@ public class InternalEngine extends Engine {
|
|
|
logger.trace("created new InternalEngine");
|
|
|
}
|
|
|
|
|
|
- private static LocalCheckpointTracker createLocalCheckpointTracker(EngineConfig engineConfig, SegmentInfos lastCommittedSegmentInfos,
|
|
|
- Logger logger, Supplier<Searcher> searcherSupplier, BiFunction<Long, Long, LocalCheckpointTracker> localCheckpointTrackerSupplier) {
|
|
|
- try {
|
|
|
- final SequenceNumbers.CommitInfo seqNoStats =
|
|
|
- SequenceNumbers.loadSeqNoInfoFromLuceneCommit(lastCommittedSegmentInfos.userData.entrySet());
|
|
|
- final long maxSeqNo = seqNoStats.maxSeqNo;
|
|
|
- final long localCheckpoint = seqNoStats.localCheckpoint;
|
|
|
- logger.trace("recovered maximum sequence number [{}] and local checkpoint [{}]", maxSeqNo, localCheckpoint);
|
|
|
- final LocalCheckpointTracker tracker = localCheckpointTrackerSupplier.apply(maxSeqNo, localCheckpoint);
|
|
|
- // Operations that are optimized using max_seq_no_of_updates optimization must not be processed twice; otherwise, they will
|
|
|
- // create duplicates in Lucene. To avoid this we check the LocalCheckpointTracker to see if an operation was already processed.
|
|
|
- // Thus, we need to restore the LocalCheckpointTracker bit by bit to ensure the consistency between LocalCheckpointTracker and
|
|
|
- // Lucene index. This is not the only solution since we can bootstrap max_seq_no_of_updates with max_seq_no of the commit to
|
|
|
- // disable the MSU optimization during recovery. Here we prefer to maintain the consistency of LocalCheckpointTracker.
|
|
|
- if (localCheckpoint < maxSeqNo && engineConfig.getIndexSettings().isSoftDeleteEnabled()) {
|
|
|
- try (Searcher searcher = searcherSupplier.get()) {
|
|
|
- Lucene.scanSeqNosInReader(searcher.getDirectoryReader(), localCheckpoint + 1, maxSeqNo,
|
|
|
- tracker::markSeqNoAsCompleted);
|
|
|
- }
|
|
|
- }
|
|
|
- return tracker;
|
|
|
- } catch (IOException ex) {
|
|
|
- throw new EngineCreationFailureException(engineConfig.getShardId(), "failed to create local checkpoint tracker", ex);
|
|
|
- }
|
|
|
+ private LocalCheckpointTracker createLocalCheckpointTracker(
|
|
|
+ BiFunction<Long, Long, LocalCheckpointTracker> localCheckpointTrackerSupplier) throws IOException {
|
|
|
+ final long maxSeqNo;
|
|
|
+ final long localCheckpoint;
|
|
|
+ final SequenceNumbers.CommitInfo seqNoStats =
|
|
|
+ SequenceNumbers.loadSeqNoInfoFromLuceneCommit(store.readLastCommittedSegmentsInfo().userData.entrySet());
|
|
|
+ maxSeqNo = seqNoStats.maxSeqNo;
|
|
|
+ localCheckpoint = seqNoStats.localCheckpoint;
|
|
|
+ logger.trace("recovered maximum sequence number [{}] and local checkpoint [{}]", maxSeqNo, localCheckpoint);
|
|
|
+ return localCheckpointTrackerSupplier.apply(maxSeqNo, localCheckpoint);
|
|
|
}
|
|
|
|
|
|
private SoftDeletesPolicy newSoftDeletesPolicy() throws IOException {
|
|
@@ -676,21 +677,26 @@ public class InternalEngine extends Engine {
|
|
|
LUCENE_DOC_NOT_FOUND
|
|
|
}
|
|
|
|
|
|
+ private static OpVsLuceneDocStatus compareOpToVersionMapOnSeqNo(String id, long seqNo, long primaryTerm, VersionValue versionValue) {
|
|
|
+ Objects.requireNonNull(versionValue);
|
|
|
+ if (seqNo > versionValue.seqNo) {
|
|
|
+ return OpVsLuceneDocStatus.OP_NEWER;
|
|
|
+ } else if (seqNo == versionValue.seqNo) {
|
|
|
+ assert versionValue.term == primaryTerm : "primary term not matched; id=" + id + " seq_no=" + seqNo
|
|
|
+ + " op_term=" + primaryTerm + " existing_term=" + versionValue.term;
|
|
|
+ return OpVsLuceneDocStatus.OP_STALE_OR_EQUAL;
|
|
|
+ } else {
|
|
|
+ return OpVsLuceneDocStatus.OP_STALE_OR_EQUAL;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
private OpVsLuceneDocStatus compareOpToLuceneDocBasedOnSeqNo(final Operation op) throws IOException {
|
|
|
assert op.seqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO : "resolving ops based on seq# but no seqNo is found";
|
|
|
final OpVsLuceneDocStatus status;
|
|
|
VersionValue versionValue = getVersionFromMap(op.uid().bytes());
|
|
|
assert incrementVersionLookup();
|
|
|
if (versionValue != null) {
|
|
|
- if (op.seqNo() > versionValue.seqNo) {
|
|
|
- status = OpVsLuceneDocStatus.OP_NEWER;
|
|
|
- } else if (op.seqNo() == versionValue.seqNo) {
|
|
|
- assert versionValue.term == op.primaryTerm() : "primary term not matched; id=" + op.id() + " seq_no=" + op.seqNo()
|
|
|
- + " op_term=" + op.primaryTerm() + " existing_term=" + versionValue.term;
|
|
|
- status = OpVsLuceneDocStatus.OP_STALE_OR_EQUAL;
|
|
|
- } else {
|
|
|
- status = OpVsLuceneDocStatus.OP_STALE_OR_EQUAL;
|
|
|
- }
|
|
|
+ status = compareOpToVersionMapOnSeqNo(op.id(), op.seqNo(), op.primaryTerm(), versionValue);
|
|
|
} else {
|
|
|
// load from index
|
|
|
assert incrementIndexVersionLookup();
|
|
@@ -1875,8 +1881,9 @@ public class InternalEngine extends Engine {
|
|
|
}
|
|
|
|
|
|
// for testing
|
|
|
- final Collection<DeleteVersionValue> getDeletedTombstones() {
|
|
|
- return versionMap.getAllTombstones().values();
|
|
|
+ final Map<BytesRef, VersionValue> getVersionMap() {
|
|
|
+ return Stream.concat(versionMap.getAllCurrent().entrySet().stream(), versionMap.getAllTombstones().entrySet().stream())
|
|
|
+ .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -2507,10 +2514,6 @@ public class InternalEngine extends Engine {
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
- int getVersionMapSize() {
|
|
|
- return versionMap.getAllCurrent().size();
|
|
|
- }
|
|
|
-
|
|
|
boolean isSafeAccessRequired() {
|
|
|
return versionMap.isSafeAccessRequired();
|
|
|
}
|
|
@@ -2773,4 +2776,57 @@ public class InternalEngine extends Engine {
|
|
|
final long minRetainedTranslogGen = Translog.readMinTranslogGeneration(translogPath, translogUUID);
|
|
|
store.trimUnsafeCommits(globalCheckpoint, minRetainedTranslogGen, engineConfig.getIndexSettings().getIndexVersionCreated());
|
|
|
}
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Restores the live version map and local checkpoint of this engine using documents (including soft-deleted)
|
|
|
+ * after the local checkpoint in the safe commit. This step ensures the live version map and checkpoint tracker
|
|
|
+ * are in sync with the Lucene commit.
|
|
|
+ */
|
|
|
+ private void restoreVersionMapAndCheckpointTracker(DirectoryReader directoryReader) throws IOException {
|
|
|
+ final IndexSearcher searcher = new IndexSearcher(directoryReader);
|
|
|
+ searcher.setQueryCache(null);
|
|
|
+ final Query query = LongPoint.newRangeQuery(SeqNoFieldMapper.NAME, getLocalCheckpoint() + 1, Long.MAX_VALUE);
|
|
|
+ final Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, 1.0f);
|
|
|
+ for (LeafReaderContext leaf : directoryReader.leaves()) {
|
|
|
+ final Scorer scorer = weight.scorer(leaf);
|
|
|
+ if (scorer == null) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ final CombinedDocValues dv = new CombinedDocValues(leaf.reader());
|
|
|
+ final IdOnlyFieldVisitor idFieldVisitor = new IdOnlyFieldVisitor();
|
|
|
+ final DocIdSetIterator iterator = scorer.iterator();
|
|
|
+ int docId;
|
|
|
+ while ((docId = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
|
|
+ final long primaryTerm = dv.docPrimaryTerm(docId);
|
|
|
+ if (primaryTerm == -1L) {
|
|
|
+ continue; // skip children docs which do not have primary term
|
|
|
+ }
|
|
|
+ final long seqNo = dv.docSeqNo(docId);
|
|
|
+ localCheckpointTracker.markSeqNoAsCompleted(seqNo);
|
|
|
+ idFieldVisitor.reset();
|
|
|
+ leaf.reader().document(docId, idFieldVisitor);
|
|
|
+ if (idFieldVisitor.getId() == null) {
|
|
|
+ assert dv.isTombstone(docId);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ final BytesRef uid = new Term(IdFieldMapper.NAME, Uid.encodeId(idFieldVisitor.getId())).bytes();
|
|
|
+ try (Releasable ignored = versionMap.acquireLock(uid)) {
|
|
|
+ final VersionValue curr = versionMap.getUnderLock(uid);
|
|
|
+ if (curr == null ||
|
|
|
+ compareOpToVersionMapOnSeqNo(idFieldVisitor.getId(), seqNo, primaryTerm, curr) == OpVsLuceneDocStatus.OP_NEWER) {
|
|
|
+ if (dv.isTombstone(docId)) {
|
|
|
+ // use 0L for the start time so we can prune this delete tombstone quickly
|
|
|
+ // when the local checkpoint advances (i.e., after a recovery completed).
|
|
|
+ final long startTime = 0L;
|
|
|
+ versionMap.putDeleteUnderLock(uid, new DeleteVersionValue(dv.docVersion(docId), seqNo, primaryTerm, startTime));
|
|
|
+ } else {
|
|
|
+ versionMap.putIndexUnderLock(uid, new IndexVersionValue(null, dv.docVersion(docId), seqNo, primaryTerm));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // remove live entries in the version map
|
|
|
+ refresh("restore_version_map_and_checkpoint_tracker", SearcherScope.INTERNAL, true);
|
|
|
+ }
|
|
|
}
|