Browse Source

Store Disk Threshold Ignore Setting in IndexMetadata (#78672)

Speed up disk threshold allocation decider by storing the ignore
flag directly in the index metadata. Also, make the relocating
shards size estimate more effective.
Armin Braun 4 years ago
parent
commit
3cbb265f9d

+ 13 - 2
server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java

@@ -25,6 +25,7 @@ import org.elasticsearch.cluster.block.ClusterBlockLevel;
 import org.elasticsearch.cluster.node.DiscoveryNodeFilters;
 import org.elasticsearch.cluster.routing.IndexRouting;
 import org.elasticsearch.cluster.routing.allocation.IndexMetadataUpdater;
+import org.elasticsearch.cluster.routing.allocation.decider.DiskThresholdDecider;
 import org.elasticsearch.common.collect.ImmutableOpenIntMap;
 import org.elasticsearch.common.collect.ImmutableOpenMap;
 import org.elasticsearch.common.collect.MapBuilder;
@@ -397,6 +398,8 @@ public class IndexMetadata implements Diffable<IndexMetadata>, ToXContentFragmen
 
     private final long creationDate;
 
+    private final boolean ignoreDiskWatermarks;
+
     private IndexMetadata(
             final Index index,
             final long version,
@@ -425,7 +428,9 @@ public class IndexMetadata implements Diffable<IndexMetadata>, ToXContentFragmen
             final boolean isHidden,
             final IndexLongFieldRange timestampRange,
             final int priority,
-            final long creationDate) {
+            final long creationDate,
+            final boolean ignoreDiskWatermarks
+    ) {
 
         this.index = index;
         this.version = version;
@@ -462,6 +467,7 @@ public class IndexMetadata implements Diffable<IndexMetadata>, ToXContentFragmen
         this.timestampRange = timestampRange;
         this.priority = priority;
         this.creationDate = creationDate;
+        this.ignoreDiskWatermarks = ignoreDiskWatermarks;
         assert numberOfShards * routingFactor == routingNumShards :  routingNumShards + " must be a multiple of " + numberOfShards;
     }
 
@@ -556,6 +562,10 @@ public class IndexMetadata implements Diffable<IndexMetadata>, ToXContentFragmen
         return waitForActiveShards;
     }
 
+    public boolean ignoreDiskWatermarks() {
+        return ignoreDiskWatermarks;
+    }
+
     public Settings getSettings() {
         return settings;
     }
@@ -1329,7 +1339,8 @@ public class IndexMetadata implements Diffable<IndexMetadata>, ToXContentFragmen
                     INDEX_HIDDEN_SETTING.get(settings),
                     timestampRange,
                     IndexMetadata.INDEX_PRIORITY_SETTING.get(settings),
-                    settings.getAsLong(SETTING_CREATION_DATE, -1L)
+                    settings.getAsLong(SETTING_CREATION_DATE, -1L),
+                    DiskThresholdDecider.SETTING_IGNORE_DISK_WATERMARKS.get(settings)
             );
         }
 

+ 6 - 6
server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java

@@ -35,7 +35,6 @@ import org.elasticsearch.index.Index;
 import org.elasticsearch.index.shard.ShardId;
 import org.elasticsearch.snapshots.SnapshotShardSizeInfo;
 
-import java.util.List;
 import java.util.Set;
 
 import static org.elasticsearch.cluster.routing.allocation.DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING;
@@ -113,15 +112,16 @@ public class DiskThresholdDecider extends AllocationDecider {
         // no longer initializing because their recovery failed or was cancelled.
 
         // Where reserved space is unavailable (e.g. stats are out-of-sync) compute a conservative estimate for initialising shards
-        final List<ShardRouting> initializingShards = node.shardsWithState(ShardRoutingState.INITIALIZING);
-        initializingShards.removeIf(shardRouting -> reservedSpace.containsShardId(shardRouting.shardId()));
-        for (ShardRouting routing : initializingShards) {
+        for (ShardRouting routing : node.shardsWithState(ShardRoutingState.INITIALIZING)) {
             if (routing.relocatingNodeId() == null) {
                 // in practice the only initializing-but-not-relocating shards with a nonzero expected shard size will be ones created
                 // by a resize (shrink/split/clone) operation which we expect to happen using hard links, so they shouldn't be taking
                 // any additional space and can be ignored here
                 continue;
             }
+            if (reservedSpace.containsShardId(routing.shardId())) {
+                continue;
+            }
 
             final String actualPath = clusterInfo.getDataPath(routing);
             // if we don't yet know the actual path of the incoming shard then conservatively assume it's going to the path with the least
@@ -161,7 +161,7 @@ public class DiskThresholdDecider extends AllocationDecider {
             return decision;
         }
 
-        if (SETTING_IGNORE_DISK_WATERMARKS.get(allocation.metadata().index(shardRouting.index()).getSettings())) {
+        if (allocation.metadata().index(shardRouting.index()).ignoreDiskWatermarks()) {
             return YES_DISK_WATERMARKS_IGNORED;
         }
 
@@ -332,7 +332,7 @@ public class DiskThresholdDecider extends AllocationDecider {
             return decision;
         }
 
-        if (SETTING_IGNORE_DISK_WATERMARKS.get(allocation.metadata().index(shardRouting.index()).getSettings())) {
+        if (allocation.metadata().index(shardRouting.index()).ignoreDiskWatermarks()) {
             return YES_DISK_WATERMARKS_IGNORED;
         }