Browse Source

[ENGINE] Simplify Engine construction and ref counting

Today the internal engine closes itself it the engine hits an exception
it can not recover from. This complicates a lot of refcounting issues
if such an exception happens during engine creation. This commit
only markes the engine as failed and let the user close it once the exception
bubbles up. Additionally it rolls back the indexwriter to prevent any changes after
the engine is failed.
Simon Willnauer 10 years ago
parent
commit
d2277d70ff

+ 94 - 112
src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java

@@ -26,10 +26,7 @@ import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
 import org.apache.lucene.search.*;
 import org.apache.lucene.search.*;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.LockObtainFailedException;
 import org.apache.lucene.store.LockObtainFailedException;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.Accountables;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.*;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ElasticsearchIllegalStateException;
 import org.elasticsearch.ElasticsearchIllegalStateException;
 import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.ExceptionsHelper;
@@ -109,7 +106,8 @@ public class InternalEngine implements Engine {
     private final SearcherFactory searcherFactory = new SearchFactory();
     private final SearcherFactory searcherFactory = new SearchFactory();
     private volatile SearcherManager searcherManager;
     private volatile SearcherManager searcherManager;
 
 
-    private volatile boolean closed = false;
+    private final AtomicBoolean isClosed = new AtomicBoolean(false);
+    private volatile boolean closedOrFailed = false;
     // flag indicating if a dirty operation has occurred since the last refresh
     // flag indicating if a dirty operation has occurred since the last refresh
     private volatile boolean dirty = false;
     private volatile boolean dirty = false;
 
 
@@ -121,8 +119,6 @@ public class InternalEngine implements Engine {
     private final Lock flushLock = new ReentrantLock();
     private final Lock flushLock = new ReentrantLock();
 
 
     protected final RecoveryCounter onGoingRecoveries = new RecoveryCounter();
     protected final RecoveryCounter onGoingRecoveries = new RecoveryCounter();
-
-
     // A uid (in the form of BytesRef) to the version map
     // A uid (in the form of BytesRef) to the version map
     // we use the hashed variant since we iterate over it and check removal and additions on existing keys
     // we use the hashed variant since we iterate over it and check removal and additions on existing keys
     private final LiveVersionMap versionMap;
     private final LiveVersionMap versionMap;
@@ -131,7 +127,7 @@ public class InternalEngine implements Engine {
 
 
     private final Object refreshMutex = new Object();
     private final Object refreshMutex = new Object();
 
 
-    private Throwable failedEngine = null;
+    private volatile Throwable failedEngine = null;
     private final Lock failEngineLock = new ReentrantLock();
     private final Lock failEngineLock = new ReentrantLock();
     private final FailedEngineListener failedEngineListener;
     private final FailedEngineListener failedEngineListener;
 
 
@@ -146,47 +142,55 @@ public class InternalEngine implements Engine {
         Preconditions.checkNotNull(engineConfig.getStore(), "Store must be provided to the engine");
         Preconditions.checkNotNull(engineConfig.getStore(), "Store must be provided to the engine");
         Preconditions.checkNotNull(engineConfig.getDeletionPolicy(), "Snapshot deletion policy must be provided to the engine");
         Preconditions.checkNotNull(engineConfig.getDeletionPolicy(), "Snapshot deletion policy must be provided to the engine");
         Preconditions.checkNotNull(engineConfig.getTranslog(), "Translog must be provided to the engine");
         Preconditions.checkNotNull(engineConfig.getTranslog(), "Translog must be provided to the engine");
-
-        this.shardId = engineConfig.getShardId();
-        this.logger = Loggers.getLogger(getClass(), engineConfig.getIndexSettings(), shardId);
-        this.lastDeleteVersionPruneTimeMSec = engineConfig.getThreadPool().estimatedTimeInMillis();
-        this.indexingService = engineConfig.getIndexingService();
-        this.warmer = engineConfig.getWarmer();
         this.store = engineConfig.getStore();
         this.store = engineConfig.getStore();
-        this.deletionPolicy = engineConfig.getDeletionPolicy();
-        this.translog = engineConfig.getTranslog();
-        this.mergePolicyProvider = engineConfig.getMergePolicyProvider();
-        this.mergeScheduler = engineConfig.getMergeScheduler();
-        this.versionMap = new LiveVersionMap();
-        this.dirtyLocks = new Object[engineConfig.getIndexConcurrency() * 50]; // we multiply it to have enough...
-        for (int i = 0; i < dirtyLocks.length; i++) {
-            dirtyLocks[i] = new Object();
-        }
-
-        this.mergeSchedulerFailureListener = new FailEngineOnMergeFailure();
-        this.mergeSchedulerListener = new MergeSchedulerListener();
-        this.mergeScheduler.addListener(mergeSchedulerListener);
-        this.mergeScheduler.addFailureListener(mergeSchedulerFailureListener);
-
-        this.failedEngineListener = engineConfig.getFailedEngineListener();
-        throttle = new IndexThrottle();
-        this.engineConfig = engineConfig;
-        listener = new EngineConfig.EngineSettingsListener(logger, engineConfig) {
-            @Override
-            protected void onChange() {
-                updateSettings();
+        store.incRef();
+        boolean success = false;
+        try {
+            this.shardId = engineConfig.getShardId();
+            this.logger = Loggers.getLogger(getClass(), engineConfig.getIndexSettings(), shardId);
+            this.lastDeleteVersionPruneTimeMSec = engineConfig.getThreadPool().estimatedTimeInMillis();
+            this.indexingService = engineConfig.getIndexingService();
+            this.warmer = engineConfig.getWarmer();
+            this.deletionPolicy = engineConfig.getDeletionPolicy();
+            this.translog = engineConfig.getTranslog();
+            this.mergePolicyProvider = engineConfig.getMergePolicyProvider();
+            this.mergeScheduler = engineConfig.getMergeScheduler();
+            this.versionMap = new LiveVersionMap();
+            this.dirtyLocks = new Object[engineConfig.getIndexConcurrency() * 50]; // we multiply it to have enough...
+            for (int i = 0; i < dirtyLocks.length; i++) {
+                dirtyLocks[i] = new Object();
+            }
+
+
+            this.failedEngineListener = engineConfig.getFailedEngineListener();
+            throttle = new IndexThrottle();
+            this.engineConfig = engineConfig;
+            listener = new EngineConfig.EngineSettingsListener(logger, engineConfig) {
+                @Override
+                protected void onChange() {
+                    updateSettings();
+                }
+            };
+            engineConfig.getIndexSettingsService().addListener(listener);
+            final IndexWriter writer = start();
+            assert indexWriter == null : "IndexWriter already initialized";
+            indexWriter = writer;
+            this.mergeSchedulerFailureListener = new FailEngineOnMergeFailure();
+            this.mergeSchedulerListener = new MergeSchedulerListener();
+            this.mergeScheduler.addListener(mergeSchedulerListener);
+            this.mergeScheduler.addFailureListener(mergeSchedulerFailureListener);
+            success = true;
+        } finally {
+            if (success == false) {
+                // failure we need to dec the store reference
+                store.decRef();
             }
             }
-        };
-        engineConfig.getIndexSettingsService().addListener(listener);
-        final IndexWriter writer = start();
-        assert indexWriter == null : "IndexWriter already initialized";
-        indexWriter = writer;
+        }
     }
     }
 
 
     @Override
     @Override
     public void updateIndexingBufferSize(ByteSizeValue indexingBufferSize) {
     public void updateIndexingBufferSize(ByteSizeValue indexingBufferSize) {
         ByteSizeValue preValue = engineConfig.getIndexingBufferSize();
         ByteSizeValue preValue = engineConfig.getIndexingBufferSize();
-
         try (InternalLock _ = readLock.acquire()) {
         try (InternalLock _ = readLock.acquire()) {
             ensureOpen();
             ensureOpen();
             engineConfig.setIndexingBufferSize(indexingBufferSize);
             engineConfig.setIndexingBufferSize(indexingBufferSize);
@@ -213,7 +217,6 @@ public class InternalEngine implements Engine {
     }
     }
 
 
     private IndexWriter start() throws EngineException {
     private IndexWriter start() throws EngineException {
-        store.incRef();
         boolean success = false;
         boolean success = false;
         IndexWriter indexWriter = null;
         IndexWriter indexWriter = null;
         SearcherManager searcherManager = null;
         SearcherManager searcherManager = null;
@@ -257,14 +260,13 @@ public class InternalEngine implements Engine {
             }
             }
         } finally {
         } finally {
             if (success == false) { // release everything we created on a failure
             if (success == false) { // release everything we created on a failure
-                store.decRef();
-                IOUtils.closeWhileHandlingException(indexWriter, searcherManager);
+                IOUtils.closeWhileHandlingException(searcherManager, indexWriter);
             }
             }
         }
         }
     }
     }
 
 
     private void updateSettings() {
     private void updateSettings() {
-        if (closed == false) {
+        if (closedOrFailed == false) {
             final LiveIndexWriterConfig iwc = indexWriter.getConfig();
             final LiveIndexWriterConfig iwc = indexWriter.getConfig();
             iwc.setUseCompoundFile(engineConfig.isCompoundOnFlush());
             iwc.setUseCompoundFile(engineConfig.isCompoundOnFlush());
             final boolean concurrencyNeedsUpdate = iwc.getMaxThreadStates() != engineConfig.getIndexConcurrency();
             final boolean concurrencyNeedsUpdate = iwc.getMaxThreadStates() != engineConfig.getIndexConcurrency();
@@ -460,7 +462,7 @@ public class InternalEngine implements Engine {
         // TODO: we force refresh when versionMap is using > 25% of IW's RAM buffer; should we make this separately configurable?
         // TODO: we force refresh when versionMap is using > 25% of IW's RAM buffer; should we make this separately configurable?
         if (versionMap.ramBytesUsedForRefresh() > 0.25 * engineConfig.getIndexingBufferSize().bytes() && versionMapRefreshPending.getAndSet(true) == false) {
         if (versionMap.ramBytesUsedForRefresh() > 0.25 * engineConfig.getIndexingBufferSize().bytes() && versionMapRefreshPending.getAndSet(true) == false) {
             try {
             try {
-                if (closed) {
+                if (closedOrFailed) {
                     // no point...
                     // no point...
                     return;
                     return;
                 }
                 }
@@ -694,7 +696,6 @@ public class InternalEngine implements Engine {
 
 
     @Override
     @Override
     public void refresh(String source, boolean force) throws EngineException {
     public void refresh(String source, boolean force) throws EngineException {
-        ensureOpen();
         // we obtain a read lock here, since we don't want a flush to happen while we are refreshing
         // we obtain a read lock here, since we don't want a flush to happen while we are refreshing
         // since it flushes the index as well (though, in terms of concurrency, we are allowed to do it)
         // since it flushes the index as well (though, in terms of concurrency, we are allowed to do it)
         try (InternalLock _ = readLock.acquire()) {
         try (InternalLock _ = readLock.acquire()) {
@@ -712,7 +713,7 @@ public class InternalEngine implements Engine {
                 }
                 }
             }
             }
         } catch (AlreadyClosedException e) {
         } catch (AlreadyClosedException e) {
-            // an index writer got replaced on us, ignore
+            ensureOpen();
         } catch (EngineClosedException e) {
         } catch (EngineClosedException e) {
             throw e;
             throw e;
         } catch (Throwable t) {
         } catch (Throwable t) {
@@ -856,7 +857,7 @@ public class InternalEngine implements Engine {
                 ensureOpen();
                 ensureOpen();
                 lastCommittedSegmentInfos = store.readLastCommittedSegmentsInfo();
                 lastCommittedSegmentInfos = store.readLastCommittedSegmentsInfo();
             } catch (Throwable e) {
             } catch (Throwable e) {
-                if (!closed) {
+                if (closedOrFailed == false) {
                     logger.warn("failed to read latest segment infos on flush", e);
                     logger.warn("failed to read latest segment infos on flush", e);
                     if (Lucene.isCorruptionException(e)) {
                     if (Lucene.isCorruptionException(e)) {
                         throw new FlushFailedEngineException(shardId, e);
                         throw new FlushFailedEngineException(shardId, e);
@@ -873,7 +874,7 @@ public class InternalEngine implements Engine {
     }
     }
 
 
     private void ensureOpen() {
     private void ensureOpen() {
-        if (closed) {
+        if (closedOrFailed) {
             throw new EngineClosedException(shardId, failedEngine);
             throw new EngineClosedException(shardId, failedEngine);
         }
         }
     }
     }
@@ -884,9 +885,10 @@ public class InternalEngine implements Engine {
      * @throws EngineClosedException if the engine is already closed
      * @throws EngineClosedException if the engine is already closed
      */
      */
     private IndexWriter currentIndexWriter() {
     private IndexWriter currentIndexWriter() {
+        ensureOpen();
         final IndexWriter writer = indexWriter;
         final IndexWriter writer = indexWriter;
         if (writer == null) {
         if (writer == null) {
-            assert closed : "Engine is not closed but writer is null";
+            assert closedOrFailed : "Engine is not closed but writer is null";
             throw new EngineClosedException(shardId, failedEngine);
             throw new EngineClosedException(shardId, failedEngine);
         }
         }
         return writer;
         return writer;
@@ -1006,8 +1008,8 @@ public class InternalEngine implements Engine {
         // take a write lock here so it won't happen while a flush is in progress
         // take a write lock here so it won't happen while a flush is in progress
         // this means that next commits will not be allowed once the lock is released
         // this means that next commits will not be allowed once the lock is released
         try (InternalLock _ = writeLock.acquire()) {
         try (InternalLock _ = writeLock.acquire()) {
-            if (closed) {
-                throw new EngineClosedException(shardId);
+            if (closedOrFailed) {
+                throw new EngineClosedException(shardId, failedEngine);
             }
             }
             onGoingRecoveries.startRecovery();
             onGoingRecoveries.startRecovery();
         }
         }
@@ -1077,7 +1079,10 @@ public class InternalEngine implements Engine {
     }
     }
 
 
     private Throwable wrapIfClosed(Throwable t) {
     private Throwable wrapIfClosed(Throwable t) {
-        if (closed) {
+        if (closedOrFailed) {
+            if (t != failedEngine && failedEngine != null) {
+                t.addSuppressed(failedEngine);
+            }
             return new EngineClosedException(shardId, t);
             return new EngineClosedException(shardId, t);
         }
         }
         return t;
         return t;
@@ -1205,9 +1210,9 @@ public class InternalEngine implements Engine {
         logger.debug("close now acquire writeLock");
         logger.debug("close now acquire writeLock");
         try (InternalLock _ = writeLock.acquire()) {
         try (InternalLock _ = writeLock.acquire()) {
             logger.debug("close acquired writeLock");
             logger.debug("close acquired writeLock");
-            if (!closed) {
+            if (isClosed.compareAndSet(false, true)) {
                 try {
                 try {
-                    closed = true;
+                    closedOrFailed = true;
                     this.versionMap.clear();
                     this.versionMap.clear();
                     logger.debug("close searcherManager");
                     logger.debug("close searcherManager");
                     try {
                     try {
@@ -1228,8 +1233,8 @@ public class InternalEngine implements Engine {
                 } catch (Throwable e) {
                 } catch (Throwable e) {
                     logger.warn("failed to rollback writer on close", e);
                     logger.warn("failed to rollback writer on close", e);
                 } finally {
                 } finally {
-                    store.decRef();
                     indexWriter = null;
                     indexWriter = null;
+                    store.decRef();
                     this.mergeScheduler.removeListener(mergeSchedulerListener);
                     this.mergeScheduler.removeListener(mergeSchedulerListener);
                     this.mergeScheduler.removeFailureListener(mergeSchedulerFailureListener);
                     this.mergeScheduler.removeFailureListener(mergeSchedulerFailureListener);
                     engineConfig.getIndexSettingsService().removeListener(listener);
                     engineConfig.getIndexSettingsService().removeListener(listener);
@@ -1239,8 +1244,7 @@ public class InternalEngine implements Engine {
     }
     }
 
 
     LiveIndexWriterConfig currentIndexWriterConfig() {
     LiveIndexWriterConfig currentIndexWriterConfig() {
-        ensureOpen();
-        return this.indexWriter.getConfig();
+        return currentIndexWriter().getConfig();
     }
     }
 
 
     @Override
     @Override
@@ -1248,33 +1252,41 @@ public class InternalEngine implements Engine {
         assert failure != null;
         assert failure != null;
         if (failEngineLock.tryLock()) {
         if (failEngineLock.tryLock()) {
             try {
             try {
-                // we first mark the store as corrupted before we notify any listeners
-                // this must happen first otherwise we might try to reallocate so quickly
-                // on the same node that we don't see the corrupted marker file when
-                // the shard is initializing
-                if (Lucene.isCorruptionException(failure)) {
-                    try {
-                        store.markStoreCorrupted(ExceptionsHelper.unwrapCorruption(failure));
-                    } catch (IOException e) {
-                        logger.warn("Couldn't marks store corrupted", e);
-                    }
-                }
-            } finally {
-                assert !readLock.assertLockIsHeld() : "readLock is held by a thread that tries to fail the engine";
-                if (failedEngine != null) {
-                    logger.debug("tried to fail engine but engine is already failed. ignoring. [{}]", reason, failure);
-                    return;
-                }
                 try {
                 try {
+                    // we first mark the store as corrupted before we notify any listeners
+                    // this must happen first otherwise we might try to reallocate so quickly
+                    // on the same node that we don't see the corrupted marker file when
+                    // the shard is initializing
+                    if (Lucene.isCorruptionException(failure)) {
+                        try {
+                            store.markStoreCorrupted(ExceptionsHelper.unwrapCorruption(failure));
+                        } catch (IOException e) {
+                            logger.warn("Couldn't marks store corrupted", e);
+                        }
+                    }
+                } finally {
+                    if (failedEngine != null) {
+                        logger.debug("tried to fail engine but engine is already failed. ignoring. [{}]", reason, failure);
+                        return;
+                    }
                     logger.warn("failed engine [{}]", failure, reason);
                     logger.warn("failed engine [{}]", failure, reason);
                     // we must set a failure exception, generate one if not supplied
                     // we must set a failure exception, generate one if not supplied
                     failedEngine = failure;
                     failedEngine = failure;
                     failedEngineListener.onFailedEngine(shardId, reason, failure);
                     failedEngineListener.onFailedEngine(shardId, reason, failure);
-                } finally {
+                }
+            } catch (Throwable t) {
+                // don't bubble up these exceptions up
+                logger.warn("failEngine threw exception", t);
+            } finally {
+                closedOrFailed = true;
+                try (InternalLock _ = readLock.acquire()) {
+                    // we take the readlock here to ensure nobody replaces this IW concurrently.
                     if (indexWriter != null) {
                     if (indexWriter != null) {
-                        // we might be not yet be fully constructed - don't call close
-                        close();
+                        indexWriter.rollback();
                     }
                     }
+                } catch (Throwable t) {
+                    logger.warn("Rolling back indexwriter on engine failure failed", t);
+                    // to be on the safe side we just rollback the IW
                 }
                 }
             }
             }
         } else {
         } else {
@@ -1347,7 +1359,7 @@ public class InternalEngine implements Engine {
                         }
                         }
                     } catch (Throwable t) {
                     } catch (Throwable t) {
                         // Don't fail a merge if the warm-up failed
                         // Don't fail a merge if the warm-up failed
-                        if (!closed) {
+                        if (closedOrFailed == false) {
                             logger.warn("Warm-up failed", t);
                             logger.warn("Warm-up failed", t);
                         }
                         }
                         if (t instanceof Error) {
                         if (t instanceof Error) {
@@ -1471,7 +1483,7 @@ public class InternalEngine implements Engine {
                     }
                     }
                     warmer.warmTopReader(new IndicesWarmer.WarmerContext(shardId, new SimpleSearcher("warmer", searcher)));
                     warmer.warmTopReader(new IndicesWarmer.WarmerContext(shardId, new SimpleSearcher("warmer", searcher)));
                 } catch (Throwable e) {
                 } catch (Throwable e) {
-                    if (!closed) {
+                    if (closedOrFailed == false) {
                         logger.warn("failed to prepare/warm", e);
                         logger.warn("failed to prepare/warm", e);
                     }
                     }
                 } finally {
                 } finally {
@@ -1510,51 +1522,21 @@ public class InternalEngine implements Engine {
     }
     }
 
 
     private static final class InternalLock implements Releasable {
     private static final class InternalLock implements Releasable {
-        private final ThreadLocal<AtomicInteger> lockIsHeld;
         private final Lock lock;
         private final Lock lock;
 
 
         InternalLock(Lock lock) {
         InternalLock(Lock lock) {
-            ThreadLocal<AtomicInteger> tl = null;
-            assert (tl = new ThreadLocal<>()) != null;
-            lockIsHeld = tl;
             this.lock = lock;
             this.lock = lock;
         }
         }
 
 
         @Override
         @Override
         public void close() {
         public void close() {
             lock.unlock();
             lock.unlock();
-            assert onAssertRelease();
         }
         }
 
 
         InternalLock acquire() throws EngineException {
         InternalLock acquire() throws EngineException {
             lock.lock();
             lock.lock();
-            assert onAssertLock();
             return this;
             return this;
         }
         }
-
-
-        protected boolean onAssertRelease() {
-            AtomicInteger count = lockIsHeld.get();
-            if (count.decrementAndGet() == 0) {
-                lockIsHeld.remove();
-            }
-            return true;
-        }
-
-        protected boolean onAssertLock() {
-            AtomicInteger count = lockIsHeld.get();
-            if (count == null) {
-                count = new AtomicInteger(0);
-                lockIsHeld.set(count);
-            }
-            count.incrementAndGet();
-            return true;
-        }
-
-        boolean assertLockIsHeld() {
-            AtomicInteger count = lockIsHeld.get();
-            return count != null && count.get() > 0;
-        }
     }
     }
 
 
     public void activateThrottling() {
     public void activateThrottling() {

+ 2 - 1
src/main/java/org/elasticsearch/index/shard/IndexShard.java

@@ -1053,7 +1053,8 @@ public class IndexShard extends AbstractIndexShardComponent implements IndexShar
                     }
                     }
                 }
                 }
             } finally {
             } finally {
-                IOUtils.closeWhileHandlingException(engineSafe()); // we need to close ourself - we failed all bets are off
+                // close the engine all bets are off... don't use engineSafe() here it can throw an exception
+                IOUtils.closeWhileHandlingException(currentEngineReference.get());
             }
             }
         }
         }
     }
     }