Browse Source

Do not report negative free bytes for DiskThresholdDecider#canAllocate (#33641)

Do not report negative free bytes for DiskThresholdDecider#canAllocate (#33641)

Closes #33596
Vladimir Dolzhenko 7 years ago
parent
commit
4d0bea705c

+ 16 - 14
server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java

@@ -121,6 +121,7 @@ public class DiskThresholdDecider extends AllocationDecider {
         // Cache the used disk percentage for displaying disk percentages consistent with documentation
         double usedDiskPercentage = usage.getUsedDiskAsPercentage();
         long freeBytes = usage.getFreeBytes();
+        ByteSizeValue freeBytesValue = new ByteSizeValue(freeBytes);
         if (logger.isTraceEnabled()) {
             logger.trace("node [{}] has {}% used disk", node.nodeId(), usedDiskPercentage);
         }
@@ -134,22 +135,22 @@ public class DiskThresholdDecider extends AllocationDecider {
         if (freeBytes < diskThresholdSettings.getFreeBytesThresholdLow().getBytes()) {
             if (skipLowTresholdChecks == false) {
                 if (logger.isDebugEnabled()) {
-                    logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, preventing allocation",
-                            diskThresholdSettings.getFreeBytesThresholdLow(), freeBytes, node.nodeId());
+                    logger.debug("less than the required {} free bytes threshold ({} free) on node {}, preventing allocation",
+                            diskThresholdSettings.getFreeBytesThresholdLow(), freeBytesValue, node.nodeId());
                 }
                 return allocation.decision(Decision.NO, NAME,
                     "the node is above the low watermark cluster setting [%s=%s], having less than the minimum required [%s] free " +
                     "space, actual free: [%s]",
                     CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(),
                     diskThresholdSettings.getLowWatermarkRaw(),
-                    diskThresholdSettings.getFreeBytesThresholdLow(), new ByteSizeValue(freeBytes));
+                    diskThresholdSettings.getFreeBytesThresholdLow(), freeBytesValue);
             } else if (freeBytes > diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) {
                 // Allow the shard to be allocated because it is primary that
                 // has never been allocated if it's under the high watermark
                 if (logger.isDebugEnabled()) {
-                    logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, " +
+                    logger.debug("less than the required {} free bytes threshold ({} free) on node {}, " +
                                     "but allowing allocation because primary has never been allocated",
-                            diskThresholdSettings.getFreeBytesThresholdLow(), freeBytes, node.nodeId());
+                            diskThresholdSettings.getFreeBytesThresholdLow(), freeBytesValue, node.nodeId());
                 }
                 return allocation.decision(Decision.YES, NAME,
                         "the node is above the low watermark, but less than the high watermark, and this primary shard has " +
@@ -158,16 +159,16 @@ public class DiskThresholdDecider extends AllocationDecider {
                 // Even though the primary has never been allocated, the node is
                 // above the high watermark, so don't allow allocating the shard
                 if (logger.isDebugEnabled()) {
-                    logger.debug("less than the required {} free bytes threshold ({} bytes free) on node {}, " +
+                    logger.debug("less than the required {} free bytes threshold ({} free) on node {}, " +
                                     "preventing allocation even though primary has never been allocated",
-                            diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytes, node.nodeId());
+                            diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytesValue, node.nodeId());
                 }
                 return allocation.decision(Decision.NO, NAME,
                     "the node is above the high watermark cluster setting [%s=%s], having less than the minimum required [%s] free " +
                     "space, actual free: [%s]",
                     CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(),
                     diskThresholdSettings.getHighWatermarkRaw(),
-                    diskThresholdSettings.getFreeBytesThresholdHigh(), new ByteSizeValue(freeBytes));
+                    diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytesValue);
             }
         }
 
@@ -219,15 +220,16 @@ public class DiskThresholdDecider extends AllocationDecider {
         double freeSpaceAfterShard = freeDiskPercentageAfterShardAssigned(usage, shardSize);
         long freeBytesAfterShard = freeBytes - shardSize;
         if (freeBytesAfterShard < diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) {
-            logger.warn("after allocating, node [{}] would have less than the required " +
-                    "{} free bytes threshold ({} bytes free), preventing allocation",
-                    node.nodeId(), diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytesAfterShard);
+            logger.warn("after allocating, node [{}] would have less than the required threshold of " +
+                    "{} free (currently {} free, estimated shard size is {}), preventing allocation",
+                    node.nodeId(), diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytesValue, new ByteSizeValue(shardSize));
             return allocation.decision(Decision.NO, NAME,
                 "allocating the shard to this node will bring the node above the high watermark cluster setting [%s=%s] " +
-                    "and cause it to have less than the minimum required [%s] of free space (free bytes after shard added: [%s])",
+                    "and cause it to have less than the minimum required [%s] of free space (free: [%s], estimated shard size: [%s])",
                 CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(),
                 diskThresholdSettings.getHighWatermarkRaw(),
-                diskThresholdSettings.getFreeBytesThresholdHigh(), new ByteSizeValue(freeBytesAfterShard));
+                diskThresholdSettings.getFreeBytesThresholdHigh(),
+                freeBytesValue, new ByteSizeValue(shardSize));
         }
         if (freeSpaceAfterShard < diskThresholdSettings.getFreeDiskThresholdHigh()) {
             logger.warn("after allocating, node [{}] would have more than the allowed " +
@@ -243,7 +245,7 @@ public class DiskThresholdDecider extends AllocationDecider {
 
         return allocation.decision(Decision.YES, NAME,
                 "enough disk for shard on node, free: [%s], shard size: [%s], free after allocating shard: [%s]",
-                new ByteSizeValue(freeBytes),
+                freeBytesValue,
                 new ByteSizeValue(shardSize),
                 new ByteSizeValue(freeBytesAfterShard));
     }

+ 58 - 1
server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java

@@ -21,6 +21,7 @@ package org.elasticsearch.cluster.routing.allocation.decider;
 
 import org.elasticsearch.Version;
 import org.elasticsearch.cluster.ClusterInfo;
+import org.elasticsearch.cluster.ClusterName;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.DiskUsage;
 import org.elasticsearch.cluster.ESAllocationTestCase;
@@ -79,7 +80,8 @@ public class DiskThresholdDeciderUnitTests extends ESAllocationTestCase {
                 .addAsNew(metaData.index("test"))
                 .build();
 
-        ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(routingTable).build();
+        ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY))
+            .metaData(metaData).routingTable(routingTable).build();
 
         clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder()
                         .add(node_0)
@@ -110,6 +112,61 @@ public class DiskThresholdDeciderUnitTests extends ESAllocationTestCase {
             "disk space than the maximum allowed [90.0%]"));
     }
 
+    public void testCannotAllocateDueToLackOfDiskResources() {
+        ClusterSettings nss = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
+        DiskThresholdDecider decider = new DiskThresholdDecider(Settings.EMPTY, nss);
+
+        MetaData metaData = MetaData.builder()
+            .put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1))
+            .build();
+
+        final Index index = metaData.index("test").getIndex();
+
+        ShardRouting test_0 = ShardRouting.newUnassigned(new ShardId(index, 0), true, EmptyStoreRecoverySource.INSTANCE,
+            new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
+        DiscoveryNode node_0 = new DiscoveryNode("node_0", buildNewFakeTransportAddress(), Collections.emptyMap(),
+            new HashSet<>(Arrays.asList(DiscoveryNode.Role.values())), Version.CURRENT);
+        DiscoveryNode node_1 = new DiscoveryNode("node_1", buildNewFakeTransportAddress(), Collections.emptyMap(),
+            new HashSet<>(Arrays.asList(DiscoveryNode.Role.values())), Version.CURRENT);
+
+        RoutingTable routingTable = RoutingTable.builder()
+            .addAsNew(metaData.index("test"))
+            .build();
+
+        ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY))
+            .metaData(metaData).routingTable(routingTable).build();
+
+        clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder()
+            .add(node_0)
+            .add(node_1)
+        ).build();
+
+        // actual test -- after all that bloat :)
+
+        ImmutableOpenMap.Builder<String, DiskUsage> leastAvailableUsages = ImmutableOpenMap.builder();
+        leastAvailableUsages.put("node_0", new DiskUsage("node_0", "node_0", "_na_", 100, 0)); // all full
+        ImmutableOpenMap.Builder<String, DiskUsage> mostAvailableUsage = ImmutableOpenMap.builder();
+        final int freeBytes = randomIntBetween(20, 100);
+        mostAvailableUsage.put("node_0", new DiskUsage("node_0", "node_0", "_na_", 100, freeBytes));
+
+        ImmutableOpenMap.Builder<String, Long> shardSizes = ImmutableOpenMap.builder();
+        // way bigger than available space
+        final long shardSize = randomIntBetween(110, 1000);
+        shardSizes.put("[test][0][p]", shardSize);
+        ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages.build(), mostAvailableUsage.build(), shardSizes.build(), ImmutableOpenMap.of());
+        RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, Collections.singleton(decider)),
+            clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime());
+        allocation.debugDecision(true);
+        Decision decision = decider.canAllocate(test_0, new RoutingNode("node_0", node_0), allocation);
+        assertEquals(Decision.Type.NO, decision.type());
+
+        assertThat(decision.getExplanation(), containsString(
+            "allocating the shard to this node will bring the node above the high watermark cluster setting "
+                +"[cluster.routing.allocation.disk.watermark.high=90%] "
+                + "and cause it to have less than the minimum required [0b] of free space "
+                + "(free: [" + freeBytes + "b], estimated shard size: [" + shardSize + "b])"));
+    }
+
     public void testCanRemainUsesLeastAvailableSpace() {
         ClusterSettings nss = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
         DiskThresholdDecider decider = new DiskThresholdDecider(Settings.EMPTY, nss);

+ 10 - 0
server/src/test/java/org/elasticsearch/common/unit/ByteSizeUnitTests.java

@@ -81,6 +81,16 @@ public class ByteSizeUnitTests extends ESTestCase {
         assertThat(PB.toPB(1), equalTo(1L));
     }
 
+    public void testToString() {
+        int v = randomIntBetween(1, 1023);
+        assertThat(new ByteSizeValue(PB.toBytes(v)).toString(), equalTo(v + "pb"));
+        assertThat(new ByteSizeValue(TB.toBytes(v)).toString(), equalTo(v + "tb"));
+        assertThat(new ByteSizeValue(GB.toBytes(v)).toString(), equalTo(v + "gb"));
+        assertThat(new ByteSizeValue(MB.toBytes(v)).toString(), equalTo(v + "mb"));
+        assertThat(new ByteSizeValue(KB.toBytes(v)).toString(), equalTo(v + "kb"));
+        assertThat(new ByteSizeValue(BYTES.toBytes(v)).toString(), equalTo(v + "b"));
+    }
+
     public void testSerialization() throws IOException {
         for (ByteSizeUnit unit : ByteSizeUnit.values()) {
             try (BytesStreamOutput out = new BytesStreamOutput()) {