浏览代码

Shard CLI tool always check shards (#41480)

The shard CLI tool would not do anything if a corruption marker was not
present. But a corruption marker is only added if a corruption is
detected during indexing/writing, not if a search or other read fails.

Changed the tool to always check shards regardless of corruption marker
presence.

Related to #41298
Henning Andersen 6 年之前
父节点
当前提交
d992b1da00

+ 4 - 13
server/src/main/java/org/elasticsearch/index/shard/RemoveCorruptedLuceneSegmentsAction.java

@@ -38,9 +38,7 @@ public class RemoveCorruptedLuceneSegmentsAction {
                                                                                      Lock writeLock,
                                                                                      PrintStream printStream,
                                                                                      boolean verbose) throws IOException {
-        if (RemoveCorruptedShardDataCommand.isCorruptMarkerFileIsPresent(indexDirectory) == false) {
-            return Tuple.tuple(RemoveCorruptedShardDataCommand.CleanStatus.CLEAN, null);
-        }
+        boolean markedCorrupted = RemoveCorruptedShardDataCommand.isCorruptMarkerFileIsPresent(indexDirectory);
 
         final CheckIndex.Status status;
         try (CheckIndex checker = new CheckIndex(indexDirectory, writeLock)) {
@@ -55,7 +53,9 @@ public class RemoveCorruptedLuceneSegmentsAction {
             }
 
             return status.clean
-                ? Tuple.tuple(RemoveCorruptedShardDataCommand.CleanStatus.CLEAN_WITH_CORRUPTED_MARKER, null)
+                ? Tuple.tuple(markedCorrupted
+                    ? RemoveCorruptedShardDataCommand.CleanStatus.CLEAN_WITH_CORRUPTED_MARKER
+                    : RemoveCorruptedShardDataCommand.CleanStatus.CLEAN, null)
                 : Tuple.tuple(RemoveCorruptedShardDataCommand.CleanStatus.CORRUPTED,
                     "Corrupted Lucene index segments found - " + status.totLoseDocCount + " documents will be lost.");
         }
@@ -67,8 +67,6 @@ public class RemoveCorruptedLuceneSegmentsAction {
                         Lock writeLock,
                         PrintStream printStream,
                         boolean verbose) throws IOException {
-        checkCorruptMarkerFileIsPresent(indexDirectory);
-
         final CheckIndex.Status status;
         try (CheckIndex checker = new CheckIndex(indexDirectory, writeLock)) {
 
@@ -90,11 +88,4 @@ public class RemoveCorruptedLuceneSegmentsAction {
             }
         }
     }
-
-    protected void checkCorruptMarkerFileIsPresent(Directory directory) throws IOException {
-        if (RemoveCorruptedShardDataCommand.isCorruptMarkerFileIsPresent(directory) == false) {
-            throw new ElasticsearchException("There is no corruption file marker");
-        }
-    }
-
 }

+ 75 - 15
server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandTests.java

@@ -76,6 +76,9 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
     private Path translogPath;
     private Path indexPath;
 
+    private static final Pattern NUM_CORRUPT_DOCS_PATTERN =
+        Pattern.compile("Corrupted Lucene index segments found -\\s+(?<docs>\\d+) documents will be lost.");
+
     @Before
     public void setup() throws IOException {
         shardId = new ShardId("index0", "_na_", 0);
@@ -154,11 +157,13 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
         final boolean corruptSegments = randomBoolean();
         CorruptionUtils.corruptIndex(random(), indexPath, corruptSegments);
 
-        // test corrupted shard
-        final IndexShard corruptedShard = reopenIndexShard(true);
-        allowShardFailures();
-        expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
-        closeShards(corruptedShard);
+        if (randomBoolean()) {
+            // test corrupted shard and add corruption marker
+            final IndexShard corruptedShard = reopenIndexShard(true);
+            allowShardFailures();
+            expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
+            closeShards(corruptedShard);
+        }
 
         final RemoveCorruptedShardDataCommand command = new RemoveCorruptedShardDataCommand();
         final MockTerminal t = new MockTerminal();
@@ -196,8 +201,7 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
 
             final Set<String> shardDocUIDs = getShardDocUIDs(newShard);
 
-            final Pattern pattern = Pattern.compile("Corrupted Lucene index segments found -\\s+(?<docs>\\d+) documents will be lost.");
-            final Matcher matcher = pattern.matcher(output);
+            final Matcher matcher = NUM_CORRUPT_DOCS_PATTERN.matcher(output);
             assertThat(matcher.find(), equalTo(true));
             final int expectedNumDocs = numDocs - Integer.parseInt(matcher.group("docs"));
 
@@ -272,12 +276,13 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
 
         CorruptionUtils.corruptIndex(random(), indexPath, false);
 
-        // test corrupted shard
-        final IndexShard corruptedShard = reopenIndexShard(true);
-        allowShardFailures();
-        expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
-        closeShards(corruptedShard);
-
+        if (randomBoolean()) {
+            // test corrupted shard and add corruption marker
+            final IndexShard corruptedShard = reopenIndexShard(true);
+            allowShardFailures();
+            expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
+            closeShards(corruptedShard);
+        }
         TestTranslog.corruptRandomTranslogFile(logger, random(), Arrays.asList(translogPath));
 
         final RemoveCorruptedShardDataCommand command = new RemoveCorruptedShardDataCommand();
@@ -313,8 +318,7 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
 
         final Set<String> shardDocUIDs = getShardDocUIDs(newShard);
 
-        final Pattern pattern = Pattern.compile("Corrupted Lucene index segments found -\\s+(?<docs>\\d+) documents will be lost.");
-        final Matcher matcher = pattern.matcher(output);
+        final Matcher matcher = NUM_CORRUPT_DOCS_PATTERN.matcher(output);
         assertThat(matcher.find(), equalTo(true));
         final int expectedNumDocs = numDocsToKeep - Integer.parseInt(matcher.group("docs"));
 
@@ -347,6 +351,62 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
             shardPath -> assertThat(shardPath.resolveIndex(), equalTo(indexPath)));
     }
 
+    public void testCleanWithCorruptionMarker() throws Exception {
+        // index some docs in several segments
+        final int numDocs = indexDocs(indexShard, true);
+
+        indexShard.store().markStoreCorrupted(null);
+
+        closeShards(indexShard);
+
+        allowShardFailures();
+        final IndexShard corruptedShard = reopenIndexShard(true);
+        expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
+        closeShards(corruptedShard);
+
+        final RemoveCorruptedShardDataCommand command = new RemoveCorruptedShardDataCommand();
+        final MockTerminal t = new MockTerminal();
+        final OptionParser parser = command.getParser();
+
+        final OptionSet options = parser.parse("-d", translogPath.toString());
+        // run command with dry-run
+        t.addTextInput("n"); // mean dry run
+        t.addTextInput("n"); // mean dry run
+        t.setVerbosity(Terminal.Verbosity.VERBOSE);
+        try {
+            command.execute(t, options, environment);
+            fail();
+        } catch (ElasticsearchException e) {
+            assertThat(e.getMessage(), containsString("aborted by user"));
+            assertThat(t.getOutput(), containsString("Continue and remove corrupted data from the shard ?"));
+            assertThat(t.getOutput(), containsString("Lucene index is marked corrupted, but no corruption detected"));
+        }
+
+        logger.info("--> output:\n{}", t.getOutput());
+
+        // run command without dry-run
+        t.reset();
+        t.addTextInput("y");
+        t.addTextInput("y");
+        command.execute(t, options, environment);
+
+        final String output = t.getOutput();
+        logger.info("--> output:\n{}", output);
+
+        failOnShardFailures();
+        final IndexShard newShard = newStartedShard(p -> reopenIndexShard(false), true);
+
+        final Set<String> shardDocUIDs = getShardDocUIDs(newShard);
+        assertEquals(numDocs, shardDocUIDs.size());
+
+        assertThat(t.getOutput(), containsString("This shard has been marked as corrupted but no corruption can now be detected."));
+
+        final Matcher matcher = NUM_CORRUPT_DOCS_PATTERN.matcher(output);
+        assertFalse(matcher.find());
+
+        closeShards(newShard);
+    }
+
     private IndexShard reopenIndexShard(boolean corrupted) throws IOException {
         // open shard with the same location
         final ShardRouting shardRouting = ShardRoutingHelper.initWithSameId(indexShard.routingEntry(),