4 жил өмнө · 31e1b1ae22
--- a/docs/reference/index-modules.asciidoc
+++ b/docs/reference/index-modules.asciidoc
@@ -69,19 +69,6 @@ document distribution. See the
 
				 breaking change].
			
 
				 ====
			
 
				 
			
 
				-`index.shard.check_on_startup`::
			
 
				-
			
 
				-Whether or not shards should be checked for corruption before opening. When
			
 
				-corruption is detected, it will prevent the shard from being opened.
			
 
				-Accepts:
			
 
				-`false`::: (default) Don't check for corruption when opening a shard.
			
 
				-`checksum`::: Check for physical corruption.
			
 
				-`true`::: Check for both physical and logical corruption. This is much more
			
 
				-expensive in terms of CPU and memory usage.
			
 
				-+
			
 
				-WARNING: Expert only. Checking shards may take a lot of time on large
			
 
				-indices.
			
 
				-
			
 
				 [[index-codec]] `index.codec`::
			
 
				 
			
 
				     The +default+ value compresses stored data with LZ4
			
@@ -130,6 +117,45 @@ to incomplete history on the leader. Defaults to `12h`.
 
				     per request through the use of the `expand_wildcards` parameter. Possible values are
			
 
				     `true` and `false` (default).
			
 
				 
			
 
				+[[index-shard-check-on-startup]] `index.shard.check_on_startup`::
			
 
				++
			
 
				+====
			
 
				+WARNING: Expert users only. This setting enables some very expensive processing
			
 
				+at shard startup and is only ever useful while diagnosing a problem in your
			
 
				+cluster. If you do use it, you should do so only temporarily and remove it
			
 
				+once it is no longer needed.
			
 
				+
			
 
				+{es} automatically performs integrity checks on the contents of shards at
			
 
				+various points during their lifecycle. For instance, it verifies the checksum
			
 
				+of every file transferred when recovering a replica or taking a snapshot. It
			
 
				+also verifies the integrity of many important files when opening a shard, which
			
 
				+happens when starting up a node and when finishing a shard recovery or
			
 
				+relocation. You can therefore manually verify the integrity of a whole shard
			
 
				+while it is running by taking a snapshot of it into a fresh repository or by
			
 
				+recovering it onto a fresh node.
			
 
				+
			
 
				+This setting determines whether {es} performs additional integrity checks while
			
 
				+opening a shard. If these checks detect corruption then they will prevent the
			
 
				+shard from being opened. It accepts the following values:
			
 
				+
			
 
				+`false`::: Don't perform additional checks for corruption when opening a shard.
			
 
				+This is the default and recommended behaviour.
			
 
				+
			
 
				+`checksum`::: Verify that the checksum of every file in the shard matches its
			
 
				+contents. This will detect cases where the data read from disk differ from the
			
 
				+data that {es} originally wrote, for instance due to undetected disk corruption
			
 
				+or other hardware failures. These checks require reading the entire shard from
			
 
				+disk which takes substantial time and IO bandwidth and may affect cluster
			
 
				+performance by evicting important data from your filesystem cache.
			
 
				+
			
 
				+`true`::: Performs the same checks as `checksum` and also checks for logical
			
 
				+inconsistencies in the shard, which could for instance be caused by the data
			
 
				+being corrupted while it was being written due to faulty RAM or other hardware
			
 
				+failures. These checks require reading the entire shard from disk which takes
			
 
				+substantial time and IO bandwidth, and then performing various checks on the
			
 
				+contents of the shard which take substantial time, CPU and memory.
			
 
				+====
			
 
				+
			
 
				 [discrete]
			
 
				 [[dynamic-index-settings]]
			
 
				 === Dynamic index settings
			
--- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java
+++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java
@@ -46,27 +46,27 @@ import org.elasticsearch.cluster.routing.RecoverySource;
 
				 import org.elasticsearch.cluster.routing.RecoverySource.SnapshotRecoverySource;
			
 
				 import org.elasticsearch.cluster.routing.ShardRouting;
			
 
				 import org.elasticsearch.cluster.routing.allocation.decider.DiskThresholdDecider;
			
 
				-import org.elasticsearch.core.Booleans;
			
 
				-import org.elasticsearch.core.CheckedConsumer;
			
 
				-import org.elasticsearch.core.CheckedFunction;
			
 
				-import org.elasticsearch.core.CheckedRunnable;
			
 
				-import org.elasticsearch.core.Nullable;
			
 
				-import org.elasticsearch.core.Tuple;
			
 
				 import org.elasticsearch.common.io.stream.BytesStreamOutput;
			
 
				-import org.elasticsearch.core.Releasable;
			
 
				-import org.elasticsearch.core.Releasables;
			
 
				 import org.elasticsearch.common.lucene.Lucene;
			
 
				 import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader;
			
 
				 import org.elasticsearch.common.metrics.CounterMetric;
			
 
				 import org.elasticsearch.common.metrics.MeanMetric;
			
 
				 import org.elasticsearch.common.settings.Settings;
			
 
				-import org.elasticsearch.core.TimeValue;
			
 
				 import org.elasticsearch.common.util.BigArrays;
			
 
				 import org.elasticsearch.common.util.CollectionUtils;
			
 
				 import org.elasticsearch.common.util.concurrent.AbstractRunnable;
			
 
				 import org.elasticsearch.common.util.concurrent.AsyncIOProcessor;
			
 
				 import org.elasticsearch.common.util.concurrent.ThreadContext;
			
 
				 import org.elasticsearch.common.xcontent.XContentHelper;
			
 
				+import org.elasticsearch.core.Booleans;
			
 
				+import org.elasticsearch.core.CheckedConsumer;
			
 
				+import org.elasticsearch.core.CheckedFunction;
			
 
				+import org.elasticsearch.core.CheckedRunnable;
			
 
				+import org.elasticsearch.core.Nullable;
			
 
				+import org.elasticsearch.core.Releasable;
			
 
				+import org.elasticsearch.core.Releasables;
			
 
				+import org.elasticsearch.core.TimeValue;
			
 
				+import org.elasticsearch.core.Tuple;
			
 
				 import org.elasticsearch.core.internal.io.IOUtils;
			
 
				 import org.elasticsearch.gateway.WriteStateException;
			
 
				 import org.elasticsearch.index.Index;
			
@@ -2544,6 +2544,11 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
 
				     public void maybeCheckIndex() {
			
 
				         recoveryState.setStage(RecoveryState.Stage.VERIFY_INDEX);
			
 
				         if (Booleans.isTrue(checkIndexOnStartup) || "checksum".equals(checkIndexOnStartup)) {
			
 
				+            logger.warn(
			
 
				+                "performing expensive diagnostic checks during shard startup [{}={}]; " +
			
 
				+                    "these checks should only be enabled temporarily, you must remove this index setting as soon as possible",
			
 
				+                IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(),
			
 
				+                checkIndexOnStartup);
			
 
				             try {
			
 
				                 checkIndex();
			
 
				             } catch (IOException ex) {
			
@@ -2576,7 +2581,13 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
 
				         if ("checksum".equals(checkIndexOnStartup)) {
			
 
				             // physical verification only: verify all checksums for the latest commit
			
 
				             IOException corrupt = null;
			
 
				-            MetadataSnapshot metadata = snapshotStoreMetadata();
			
 
				+            final MetadataSnapshot metadata;
			
 
				+            try {
			
 
				+                metadata = snapshotStoreMetadata();
			
 
				+            } catch (IOException e) {
			
 
				+                logger.warn("check index [failure]", e);
			
 
				+                throw e;
			
 
				+            }
			
 
				             for (Map.Entry<String, StoreFileMetadata> entry : metadata.asMap().entrySet()) {
			
 
				                 try {
			
 
				                     Store.checkIntegrity(entry.getValue(), store.directory());
			
@@ -2601,7 +2612,9 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
 
				                     // ignore if closed....
			
 
				                     return;
			
 
				                 }
			
 
				-                logger.warn("check index [failure]\n{}", os.bytes().utf8ToString());
			
 
				+                logger.warn("check index [failure]");
			
 
				+                // report details in a separate message, it might contain control characters which mess up detection of the failure message
			
 
				+                logger.warn("{}", os.bytes().utf8ToString());
			
 
				                 throw new IOException("index check failure");
			
 
				             }
			
 
				         }
			
--- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java
+++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java
@@ -7,6 +7,8 @@
 
				  */
			
 
				 package org.elasticsearch.index.shard;
			
 
				 
			
 
				+import org.apache.logging.log4j.Level;
			
 
				+import org.apache.logging.log4j.LogManager;
			
 
				 import org.apache.logging.log4j.Logger;
			
 
				 import org.apache.lucene.index.CorruptIndexException;
			
 
				 import org.apache.lucene.index.DirectoryReader;
			
@@ -44,20 +46,16 @@ import org.elasticsearch.cluster.routing.ShardRoutingHelper;
 
				 import org.elasticsearch.cluster.routing.ShardRoutingState;
			
 
				 import org.elasticsearch.cluster.routing.TestShardRouting;
			
 
				 import org.elasticsearch.cluster.routing.UnassignedInfo;
			
 
				-import org.elasticsearch.core.CheckedFunction;
			
 
				 import org.elasticsearch.common.Randomness;
			
 
				 import org.elasticsearch.common.Strings;
			
 
				 import org.elasticsearch.common.UUIDs;
			
 
				 import org.elasticsearch.common.breaker.CircuitBreaker;
			
 
				 import org.elasticsearch.common.bytes.BytesArray;
			
 
				-import org.elasticsearch.core.Tuple;
			
 
				 import org.elasticsearch.common.io.stream.BytesStreamOutput;
			
 
				 import org.elasticsearch.common.io.stream.StreamInput;
			
 
				-import org.elasticsearch.core.Releasable;
			
 
				-import org.elasticsearch.core.Releasables;
			
 
				+import org.elasticsearch.common.logging.Loggers;
			
 
				 import org.elasticsearch.common.settings.IndexScopedSettings;
			
 
				 import org.elasticsearch.common.settings.Settings;
			
 
				-import org.elasticsearch.core.TimeValue;
			
 
				 import org.elasticsearch.common.util.concurrent.AbstractRunnable;
			
 
				 import org.elasticsearch.common.util.concurrent.AtomicArray;
			
 
				 import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
			
@@ -65,6 +63,11 @@ import org.elasticsearch.common.xcontent.NamedXContentRegistry;
 
				 import org.elasticsearch.common.xcontent.XContentBuilder;
			
 
				 import org.elasticsearch.common.xcontent.XContentFactory;
			
 
				 import org.elasticsearch.common.xcontent.XContentType;
			
 
				+import org.elasticsearch.core.CheckedFunction;
			
 
				+import org.elasticsearch.core.Releasable;
			
 
				+import org.elasticsearch.core.Releasables;
			
 
				+import org.elasticsearch.core.TimeValue;
			
 
				+import org.elasticsearch.core.Tuple;
			
 
				 import org.elasticsearch.core.internal.io.IOUtils;
			
 
				 import org.elasticsearch.env.NodeEnvironment;
			
 
				 import org.elasticsearch.index.IndexModule;
			
@@ -117,6 +120,7 @@ import org.elasticsearch.snapshots.SnapshotId;
 
				 import org.elasticsearch.test.CorruptionUtils;
			
 
				 import org.elasticsearch.test.DummyShardLock;
			
 
				 import org.elasticsearch.test.FieldMaskingReader;
			
 
				+import org.elasticsearch.test.MockLogAppender;
			
 
				 import org.elasticsearch.test.store.MockFSDirectoryFactory;
			
 
				 import org.elasticsearch.threadpool.ThreadPool;
			
 
				 import org.junit.Assert;
			
@@ -3089,9 +3093,32 @@ public class IndexShardTests extends IndexShardTestCase {
 
				         IndexShard corruptedShard = newShard(shardRouting, shardPath, indexMetadata, null, null, indexShard.engineFactory,
			
 
				             indexShard.getGlobalCheckpointSyncer(), indexShard.getRetentionLeaseSyncer(), EMPTY_EVENT_LISTENER);
			
 
				 
			
 
				-        final IndexShardRecoveryException indexShardRecoveryException =
			
 
				-            expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
			
 
				-        assertThat(indexShardRecoveryException.getMessage(), equalTo("failed recovery"));
			
 
				+        final MockLogAppender appender = new MockLogAppender();
			
 
				+        appender.start();
			
 
				+        Loggers.addAppender(LogManager.getLogger(IndexShard.class), appender);
			
 
				+        try {
			
 
				+            appender.addExpectation(new MockLogAppender.SeenEventExpectation(
			
 
				+                "expensive checks warning",
			
 
				+                "org.elasticsearch.index.shard.IndexShard",
			
 
				+                Level.WARN,
			
 
				+                "performing expensive diagnostic checks during shard startup [index.shard.check_on_startup=*]; these checks should only " +
			
 
				+                    "be enabled temporarily, you must remove this index setting as soon as possible"));
			
 
				+
			
 
				+            appender.addExpectation(new MockLogAppender.SeenEventExpectation(
			
 
				+                "failure message",
			
 
				+                "org.elasticsearch.index.shard.IndexShard",
			
 
				+                Level.WARN,
			
 
				+                "check index [failure]*"));
			
 
				+
			
 
				+            final IndexShardRecoveryException indexShardRecoveryException =
			
 
				+                expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
			
 
				+            assertThat(indexShardRecoveryException.getMessage(), equalTo("failed recovery"));
			
 
				+
			
 
				+            appender.assertAllExpectationsMatched();
			
 
				+        } finally {
			
 
				+            Loggers.removeAppender(LogManager.getLogger(IndexShard.class), appender);
			
 
				+            appender.stop();
			
 
				+        }
			
 
				 
			
 
				         // check that corrupt marker is there
			
 
				         Files.walkFileTree(indexPath, corruptedVisitor);