|
@@ -24,6 +24,7 @@ import org.apache.logging.log4j.message.ParameterizedMessage;
|
|
|
import org.apache.lucene.codecs.CodecUtil;
|
|
|
import org.apache.lucene.index.CheckIndex;
|
|
|
import org.apache.lucene.index.CorruptIndexException;
|
|
|
+import org.apache.lucene.index.DirectoryReader;
|
|
|
import org.apache.lucene.index.IndexCommit;
|
|
|
import org.apache.lucene.index.IndexFileNames;
|
|
|
import org.apache.lucene.index.IndexFormatTooNewException;
|
|
@@ -75,6 +76,7 @@ import org.elasticsearch.env.NodeEnvironment;
|
|
|
import org.elasticsearch.env.ShardLock;
|
|
|
import org.elasticsearch.env.ShardLockObtainFailedException;
|
|
|
import org.elasticsearch.index.IndexSettings;
|
|
|
+import org.elasticsearch.index.engine.CombinedDeletionPolicy;
|
|
|
import org.elasticsearch.index.engine.Engine;
|
|
|
import org.elasticsearch.index.engine.InternalEngine;
|
|
|
import org.elasticsearch.index.seqno.SequenceNumbers;
|
|
@@ -1463,7 +1465,7 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
|
|
|
*/
|
|
|
public void createEmpty() throws IOException {
|
|
|
metadataLock.writeLock().lock();
|
|
|
- try (IndexWriter writer = newIndexWriter(IndexWriterConfig.OpenMode.CREATE, directory)) {
|
|
|
+ try (IndexWriter writer = newIndexWriter(IndexWriterConfig.OpenMode.CREATE, directory, null)) {
|
|
|
final Map<String, String> map = new HashMap<>();
|
|
|
map.put(Engine.HISTORY_UUID_KEY, UUIDs.randomBase64UUID());
|
|
|
map.put(SequenceNumbers.LOCAL_CHECKPOINT_KEY, Long.toString(SequenceNumbers.NO_OPS_PERFORMED));
|
|
@@ -1482,7 +1484,7 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
|
|
|
*/
|
|
|
public void bootstrapNewHistory() throws IOException {
|
|
|
metadataLock.writeLock().lock();
|
|
|
- try (IndexWriter writer = newIndexWriter(IndexWriterConfig.OpenMode.APPEND, directory)) {
|
|
|
+ try (IndexWriter writer = newIndexWriter(IndexWriterConfig.OpenMode.APPEND, directory, null)) {
|
|
|
final Map<String, String> userData = getUserData(writer);
|
|
|
final long maxSeqNo = Long.parseLong(userData.get(SequenceNumbers.MAX_SEQ_NO));
|
|
|
final Map<String, String> map = new HashMap<>();
|
|
@@ -1501,7 +1503,7 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
|
|
|
*/
|
|
|
public void associateIndexWithNewTranslog(final String translogUUID) throws IOException {
|
|
|
metadataLock.writeLock().lock();
|
|
|
- try (IndexWriter writer = newIndexWriter(IndexWriterConfig.OpenMode.APPEND, directory)) {
|
|
|
+ try (IndexWriter writer = newIndexWriter(IndexWriterConfig.OpenMode.APPEND, directory, null)) {
|
|
|
if (translogUUID.equals(getUserData(writer).get(Translog.TRANSLOG_UUID_KEY))) {
|
|
|
throw new IllegalArgumentException("a new translog uuid can't be equal to existing one. got [" + translogUUID + "]");
|
|
|
}
|
|
@@ -1520,7 +1522,7 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
|
|
|
*/
|
|
|
public void ensureIndexHasHistoryUUID() throws IOException {
|
|
|
metadataLock.writeLock().lock();
|
|
|
- try (IndexWriter writer = newIndexWriter(IndexWriterConfig.OpenMode.APPEND, directory)) {
|
|
|
+ try (IndexWriter writer = newIndexWriter(IndexWriterConfig.OpenMode.APPEND, directory, null)) {
|
|
|
final Map<String, String> userData = getUserData(writer);
|
|
|
if (userData.containsKey(Engine.HISTORY_UUID_KEY) == false) {
|
|
|
updateCommitData(writer, Collections.singletonMap(Engine.HISTORY_UUID_KEY, UUIDs.randomBase64UUID()));
|
|
@@ -1530,6 +1532,82 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * Keeping existing unsafe commits when opening an engine can be problematic because these commits are not safe
|
|
|
+ * at the recovering time but they can suddenly become safe in the future.
|
|
|
+ * The following issues can happen if unsafe commits are kept oninit.
|
|
|
+ * <p>
|
|
|
+ * 1. Replica can use unsafe commit in peer-recovery. This happens when a replica with a safe commit c1(max_seqno=1)
|
|
|
+ * and an unsafe commit c2(max_seqno=2) recovers from a primary with c1(max_seqno=1). If a new document(seqno=2)
|
|
|
+ * is added without flushing, the global checkpoint is advanced to 2; and the replica recovers again, it will use
|
|
|
+ * the unsafe commit c2(max_seqno=2 at most gcp=2) as the starting commit for sequenced-based recovery even the
|
|
|
+ * commit c2 contains a stale operation and the document(with seqno=2) will not be replicated to the replica.
|
|
|
+ * <p>
|
|
|
+ * 2. Min translog gen for recovery can go backwards in peer-recovery. This happens when are replica with a safe commit
|
|
|
+ * c1(local_checkpoint=1, recovery_translog_gen=1) and an unsafe commit c2(local_checkpoint=2, recovery_translog_gen=2).
|
|
|
+ * The replica recovers from a primary, and keeps c2 as the last commit, then sets last_translog_gen to 2. Flushing a new
|
|
|
+ * commit on the replica will cause exception as the new last commit c3 will have recovery_translog_gen=1. The recovery
|
|
|
+ * translog generation of a commit is calculated based on the current local checkpoint. The local checkpoint of c3 is 1
|
|
|
+ * while the local checkpoint of c2 is 2.
|
|
|
+ * <p>
|
|
|
+ * 3. Commit without translog can be used in recovery. An old index, which was created before multiple-commits is introduced
|
|
|
+ * (v6.2), may not have a safe commit. If that index has a snapshotted commit without translog and an unsafe commit,
|
|
|
+ * the policy can consider the snapshotted commit as a safe commit for recovery even the commit does not have translog.
|
|
|
+ */
|
|
|
+ public void trimUnsafeCommits(final long lastSyncedGlobalCheckpoint, final long minRetainedTranslogGen,
|
|
|
+ final org.elasticsearch.Version indexVersionCreated) throws IOException {
|
|
|
+ metadataLock.writeLock().lock();
|
|
|
+ try {
|
|
|
+ final List<IndexCommit> existingCommits = DirectoryReader.listCommits(directory);
|
|
|
+ if (existingCommits.isEmpty()) {
|
|
|
+ throw new IllegalArgumentException("No index found to trim");
|
|
|
+ }
|
|
|
+ final String translogUUID = existingCommits.get(existingCommits.size() - 1).getUserData().get(Translog.TRANSLOG_UUID_KEY);
|
|
|
+ final IndexCommit startingIndexCommit;
|
|
|
+ // We may not have a safe commit if an index was create before v6.2; and if there is a snapshotted commit whose translog
|
|
|
+ // are not retained but max_seqno is at most the global checkpoint, we may mistakenly select it as a starting commit.
|
|
|
+ // To avoid this issue, we only select index commits whose translog are fully retained.
|
|
|
+ if (indexVersionCreated.before(org.elasticsearch.Version.V_6_2_0)) {
|
|
|
+ final List<IndexCommit> recoverableCommits = new ArrayList<>();
|
|
|
+ for (IndexCommit commit : existingCommits) {
|
|
|
+ if (minRetainedTranslogGen <= Long.parseLong(commit.getUserData().get(Translog.TRANSLOG_GENERATION_KEY))) {
|
|
|
+ recoverableCommits.add(commit);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ assert recoverableCommits.isEmpty() == false : "No commit point with translog found; " +
|
|
|
+ "commits [" + existingCommits + "], minRetainedTranslogGen [" + minRetainedTranslogGen + "]";
|
|
|
+ startingIndexCommit = CombinedDeletionPolicy.findSafeCommitPoint(recoverableCommits, lastSyncedGlobalCheckpoint);
|
|
|
+ } else {
|
|
|
+ // TODO: Asserts the starting commit is a safe commit once peer-recovery sets global checkpoint.
|
|
|
+ startingIndexCommit = CombinedDeletionPolicy.findSafeCommitPoint(existingCommits, lastSyncedGlobalCheckpoint);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (translogUUID.equals(startingIndexCommit.getUserData().get(Translog.TRANSLOG_UUID_KEY)) == false) {
|
|
|
+ throw new IllegalStateException("starting commit translog uuid ["
|
|
|
+ + startingIndexCommit.getUserData().get(Translog.TRANSLOG_UUID_KEY) + "] is not equal to last commit's translog uuid ["
|
|
|
+ + translogUUID + "]");
|
|
|
+ }
|
|
|
+ if (startingIndexCommit.equals(existingCommits.get(existingCommits.size() - 1)) == false) {
|
|
|
+ try (IndexWriter writer = newIndexWriter(IndexWriterConfig.OpenMode.APPEND, directory, startingIndexCommit)) {
|
|
|
+ // this achieves two things:
|
|
|
+ // - by committing a new commit based on the starting commit, it make sure the starting commit will be opened
|
|
|
+ // - deletes any other commit (by lucene standard deletion policy)
|
|
|
+ //
|
|
|
+ // note that we can't just use IndexCommit.delete() as we really want to make sure that those files won't be used
|
|
|
+ // even if a virus scanner causes the files not to be used.
|
|
|
+
|
|
|
+ // The new commit will use segment files from the starting commit but userData from the last commit by default.
|
|
|
+ // Thus, we need to manually set the userData from the starting commit to the new commit.
|
|
|
+ writer.setLiveCommitData(startingIndexCommit.getUserData().entrySet());
|
|
|
+ writer.commit();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } finally {
|
|
|
+ metadataLock.writeLock().unlock();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
private void updateCommitData(IndexWriter writer, Map<String, String> keysToUpdate) throws IOException {
|
|
|
final Map<String, String> userData = getUserData(writer);
|
|
|
userData.putAll(keysToUpdate);
|
|
@@ -1543,9 +1621,12 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
|
|
|
return userData;
|
|
|
}
|
|
|
|
|
|
- private IndexWriter newIndexWriter(IndexWriterConfig.OpenMode openMode, final Directory dir) throws IOException {
|
|
|
+ private static IndexWriter newIndexWriter(final IndexWriterConfig.OpenMode openMode, final Directory dir, final IndexCommit commit)
|
|
|
+ throws IOException {
|
|
|
+ assert openMode == IndexWriterConfig.OpenMode.APPEND || commit == null : "can't specify create flag with a commit";
|
|
|
IndexWriterConfig iwc = new IndexWriterConfig(null)
|
|
|
.setCommitOnClose(false)
|
|
|
+ .setIndexCommit(commit)
|
|
|
// we don't want merges to happen here - we call maybe merge on the engine
|
|
|
// later once we stared it up otherwise we would need to wait for it here
|
|
|
// we also don't specify a codec here and merges should use the engines for this index
|