|
@@ -43,6 +43,7 @@ import org.elasticsearch.Version;
|
|
|
import org.elasticsearch.action.ActionListener;
|
|
|
import org.elasticsearch.action.index.IndexRequest;
|
|
|
import org.elasticsearch.action.support.PlainActionFuture;
|
|
|
+import org.elasticsearch.action.support.SubscribableListener;
|
|
|
import org.elasticsearch.cluster.metadata.DataStream;
|
|
|
import org.elasticsearch.common.lucene.LoggerInfoStream;
|
|
|
import org.elasticsearch.common.lucene.Lucene;
|
|
@@ -520,25 +521,24 @@ public class InternalEngine extends Engine {
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
- public InternalEngine recoverFromTranslog(TranslogRecoveryRunner translogRecoveryRunner, long recoverUpToSeqNo) throws IOException {
|
|
|
- try (ReleasableLock lock = readLock.acquire()) {
|
|
|
- ensureOpen();
|
|
|
- if (pendingTranslogRecovery.get() == false) {
|
|
|
- throw new IllegalStateException("Engine has already been recovered");
|
|
|
- }
|
|
|
- try {
|
|
|
- recoverFromTranslogInternal(translogRecoveryRunner, recoverUpToSeqNo);
|
|
|
- } catch (Exception e) {
|
|
|
- try {
|
|
|
- pendingTranslogRecovery.set(true); // just play safe and never allow commits on this see #ensureCanFlush
|
|
|
- failEngine("failed to recover from translog", e);
|
|
|
- } catch (Exception inner) {
|
|
|
- e.addSuppressed(inner);
|
|
|
+ public void recoverFromTranslog(TranslogRecoveryRunner translogRecoveryRunner, long recoverUpToSeqNo, ActionListener<Void> listener) {
|
|
|
+ ActionListener.run(listener, l -> {
|
|
|
+ try (ReleasableLock lock = readLock.acquire()) {
|
|
|
+ ensureOpen();
|
|
|
+ if (pendingTranslogRecovery.get() == false) {
|
|
|
+ throw new IllegalStateException("Engine has already been recovered");
|
|
|
}
|
|
|
- throw e;
|
|
|
+ recoverFromTranslogInternal(translogRecoveryRunner, recoverUpToSeqNo, l.delegateResponse((ll, e) -> {
|
|
|
+ try {
|
|
|
+ pendingTranslogRecovery.set(true); // just play safe and never allow commits on this see #ensureCanFlush
|
|
|
+ failEngine("failed to recover from translog", e);
|
|
|
+ } catch (Exception inner) {
|
|
|
+ e.addSuppressed(inner);
|
|
|
+ }
|
|
|
+ ll.onFailure(e);
|
|
|
+ }));
|
|
|
}
|
|
|
- }
|
|
|
- return this;
|
|
|
+ });
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -547,33 +547,45 @@ public class InternalEngine extends Engine {
|
|
|
pendingTranslogRecovery.set(false); // we are good - now we can commit
|
|
|
}
|
|
|
|
|
|
- private void recoverFromTranslogInternal(TranslogRecoveryRunner translogRecoveryRunner, long recoverUpToSeqNo) throws IOException {
|
|
|
- final int opsRecovered;
|
|
|
- final long localCheckpoint = getProcessedLocalCheckpoint();
|
|
|
- if (localCheckpoint < recoverUpToSeqNo) {
|
|
|
- try (Translog.Snapshot snapshot = newTranslogSnapshot(localCheckpoint + 1, recoverUpToSeqNo)) {
|
|
|
- opsRecovered = translogRecoveryRunner.run(this, snapshot);
|
|
|
- } catch (Exception e) {
|
|
|
- throw new EngineException(shardId, "failed to recover from translog", e);
|
|
|
- }
|
|
|
- } else {
|
|
|
- opsRecovered = 0;
|
|
|
- }
|
|
|
- // flush if we recovered something or if we have references to older translogs
|
|
|
- // note: if opsRecovered == 0 and we have older translogs it means they are corrupted or 0 length.
|
|
|
- assert pendingTranslogRecovery.get() : "translogRecovery is not pending but should be";
|
|
|
- pendingTranslogRecovery.set(false); // we are good - now we can commit
|
|
|
- logger.trace(
|
|
|
- () -> format(
|
|
|
- "flushing post recovery from translog: ops recovered [%s], current translog generation [%s]",
|
|
|
- opsRecovered,
|
|
|
- translog.currentFileGeneration()
|
|
|
- )
|
|
|
- );
|
|
|
- PlainActionFuture<FlushResult> future = PlainActionFuture.newFuture();
|
|
|
- flush(false, true, future);
|
|
|
- future.actionGet();
|
|
|
- translog.trimUnreferencedReaders();
|
|
|
+ private void recoverFromTranslogInternal(
|
|
|
+ TranslogRecoveryRunner translogRecoveryRunner,
|
|
|
+ long recoverUpToSeqNo,
|
|
|
+ ActionListener<Void> listener
|
|
|
+ ) {
|
|
|
+ ActionListener.run(listener, l -> {
|
|
|
+ final int opsRecovered;
|
|
|
+ final long localCheckpoint = getProcessedLocalCheckpoint();
|
|
|
+ if (localCheckpoint < recoverUpToSeqNo) {
|
|
|
+ try (Translog.Snapshot snapshot = newTranslogSnapshot(localCheckpoint + 1, recoverUpToSeqNo)) {
|
|
|
+ opsRecovered = translogRecoveryRunner.run(this, snapshot);
|
|
|
+ } catch (Exception e) {
|
|
|
+ throw new EngineException(shardId, "failed to recover from translog", e);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ opsRecovered = 0;
|
|
|
+ }
|
|
|
+ // flush if we recovered something or if we have references to older translogs
|
|
|
+ // note: if opsRecovered == 0 and we have older translogs it means they are corrupted or 0 length.
|
|
|
+ assert pendingTranslogRecovery.get() : "translogRecovery is not pending but should be";
|
|
|
+ pendingTranslogRecovery.set(false); // we are good - now we can commit
|
|
|
+ logger.trace(
|
|
|
+ () -> format(
|
|
|
+ "flushing post recovery from translog: ops recovered [%s], current translog generation [%s]",
|
|
|
+ opsRecovered,
|
|
|
+ translog.currentFileGeneration()
|
|
|
+ )
|
|
|
+ );
|
|
|
+
|
|
|
+ // flush might do something async and complete the listener on a different thread, from which we must fork back to a generic
|
|
|
+ // thread to continue with recovery, but if it doesn't do anything async then there's no need to fork, hence why we use a
|
|
|
+ // SubscribableListener here
|
|
|
+ final var flushListener = new SubscribableListener<FlushResult>();
|
|
|
+ flush(false, true, flushListener);
|
|
|
+ flushListener.addListener(l.delegateFailureAndWrap((ll, r) -> {
|
|
|
+ translog.trimUnreferencedReaders();
|
|
|
+ ll.onResponse(null);
|
|
|
+ }), engineConfig.getThreadPool().generic(), null);
|
|
|
+ });
|
|
|
}
|
|
|
|
|
|
protected Translog.Snapshot newTranslogSnapshot(long fromSeqNo, long toSeqNo) throws IOException {
|