Browse Source

Handle delete document level failures (#46100)

Today we assume that document failures can not occur for deletes. This
assumption is bogus, as they can fail for a variety of reasons such as
the Lucene index having reached the document limit. Because of this
assumption, we were asserting that such a document-level failure would
never happen. When this bogus assertion is violated, we fail the node, a
catastrophe. Instead, we need to treat this as a fatal engine exception.
Jason Tedor 6 years ago
parent
commit
a2d4b81b6b

+ 13 - 2
server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

@@ -102,6 +102,7 @@ import java.nio.file.Path;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;
@@ -1406,9 +1407,19 @@ public class InternalEngine extends Engine {
             }
             return new DeleteResult(
                 plan.versionOfDeletion, delete.primaryTerm(), delete.seqNo(), plan.currentlyDeleted == false);
-        } catch (Exception ex) {
+        } catch (final Exception ex) {
+            /*
+             * Document level failures when deleting are unexpected, we likely hit something fatal such as the Lucene index being corrupt,
+             * or the Lucene document limit. We have already issued a sequence number here so this is fatal, fail the engine.
+             */
             if (ex instanceof AlreadyClosedException == false && indexWriter.getTragicException() == null) {
-                throw new AssertionError("delete operation should never fail at document level", ex);
+                final String reason = String.format(
+                    Locale.ROOT,
+                    "delete id[%s] origin [%s] seq#[%d] failed at the document level",
+                    delete.id(),
+                    delete.origin(),
+                    delete.seqNo());
+                failEngine(reason, ex);
             }
             throw ex;
         }

+ 55 - 5
server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java

@@ -81,6 +81,7 @@ import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
 import org.elasticsearch.cluster.routing.ShardRouting;
 import org.elasticsearch.cluster.routing.ShardRoutingState;
 import org.elasticsearch.cluster.routing.TestShardRouting;
+import org.elasticsearch.common.CheckedBiConsumer;
 import org.elasticsearch.common.CheckedRunnable;
 import org.elasticsearch.common.Randomness;
 import org.elasticsearch.common.Strings;
@@ -5990,11 +5991,16 @@ public class InternalEngineTests extends EngineTestCase {
             IndexMetaData.builder(defaultSettings.getIndexMetaData()).settings(settings).build());
         try (Store store = createStore();
              Engine engine = createEngine((dir, iwc) -> new IndexWriter(dir, iwc) {
-            @Override
-            public long addDocument(Iterable<? extends IndexableField> doc) throws IOException {
-                throw new IllegalArgumentException("fatal");
-            }
-        }, null, null, config(indexSettings, store, createTempDir(), NoMergePolicy.INSTANCE, null))) {
+
+                     @Override
+                     public long addDocument(Iterable<? extends IndexableField> doc) throws IOException {
+                         throw new IllegalArgumentException("fatal");
+                     }
+
+                 },
+                 null,
+                 null,
+                 config(indexSettings, store, createTempDir(), NoMergePolicy.INSTANCE, null))) {
             final Engine.NoOp op = new Engine.NoOp(0, 0, PRIMARY, System.currentTimeMillis(), "test");
             final IllegalArgumentException e = expectThrows(IllegalArgumentException. class, () -> engine.noOp(op));
             assertThat(e.getMessage(), equalTo("fatal"));
@@ -6005,4 +6011,48 @@ public class InternalEngineTests extends EngineTestCase {
         }
     }
 
+    public void testDeleteFailureSoftDeletesEnabledDocAlreadyDeleted() throws IOException {
+        runTestDeleteFailure(true, InternalEngine::delete);
+    }
+
+    public void testDeleteFailureSoftDeletesEnabled() throws IOException {
+        runTestDeleteFailure(true, (engine, op) -> {});
+    }
+
+    public void testDeleteFailureSoftDeletesDisabled() throws IOException {
+        runTestDeleteFailure(false, (engine, op) -> {});
+    }
+
+    private void runTestDeleteFailure(
+        final boolean softDeletesEnabled,
+        final CheckedBiConsumer<InternalEngine, Engine.Delete, IOException> consumer) throws IOException {
+        engine.close();
+        final Settings settings = Settings.builder()
+            .put(defaultSettings.getSettings())
+            .put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), softDeletesEnabled).build();
+        final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(
+            IndexMetaData.builder(defaultSettings.getIndexMetaData()).settings(settings).build());
+        final AtomicReference<ThrowingIndexWriter> iw = new AtomicReference<>();
+        try (Store store = createStore();
+             InternalEngine engine = createEngine(
+                 (dir, iwc) -> {
+                     iw.set(new ThrowingIndexWriter(dir, iwc));
+                     return iw.get();
+                 },
+                 null,
+                 null,
+                 config(indexSettings, store, createTempDir(), NoMergePolicy.INSTANCE, null))) {
+            engine.index(new Engine.Index(newUid("0"), primaryTerm.get(), InternalEngineTests.createParsedDoc("0", null)));
+            final Engine.Delete op = new Engine.Delete("_doc", "0", newUid("0"), primaryTerm.get());
+            consumer.accept(engine, op);
+            iw.get().setThrowFailure(() -> new IllegalArgumentException("fatal"));
+            final IllegalArgumentException e = expectThrows(IllegalArgumentException. class, () -> engine.delete(op));
+            assertThat(e.getMessage(), equalTo("fatal"));
+            assertTrue(engine.isClosed.get());
+            assertThat(engine.failedEngine.get(), not(nullValue()));
+            assertThat(engine.failedEngine.get(), instanceOf(IllegalArgumentException.class));
+            assertThat(engine.failedEngine.get().getMessage(), equalTo("fatal"));
+        }
+    }
+
 }