1
0
Эх сурвалжийг харах

Fix _cluster/stats .nodes.fs deduplication (#94798)

Joe Gallo 2 жил өмнө
parent
commit
caeacd450e

+ 6 - 0
docs/changelog/94798.yaml

@@ -0,0 +1,6 @@
+pr: 94798
+summary: Fix _cluster/stats `.nodes.fs` deduplication
+area: Stats
+type: bug
+issues:
+ - 24472

+ 27 - 6
server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsNodes.java

@@ -845,19 +845,40 @@ public class ClusterStatsNodes implements ToXContentFragment {
 
     static class ClusterFsStatsDeduplicator {
 
-        private final Set<InetAddress> seenAddresses;
+        private record DedupEntry(InetAddress inetAddress, String mount, String path) {}
+
+        private final Set<DedupEntry> seenAddressesMountsPaths;
+
         private final FsInfo.Path total = new FsInfo.Path();
 
         ClusterFsStatsDeduplicator(int expectedSize) {
-            seenAddresses = Sets.newHashSetWithExpectedSize(expectedSize);
+            // each address+mount is stored twice (once without a path, and once with a path), thus 2x
+            seenAddressesMountsPaths = Sets.newHashSetWithExpectedSize(2 * expectedSize);
         }
 
         public void add(InetAddress inetAddress, FsInfo fsInfo) {
-            if (seenAddresses.add(inetAddress) == false) {
-                return;
-            }
             if (fsInfo != null) {
-                total.add(fsInfo.getTotal());
+                for (FsInfo.Path p : fsInfo) {
+                    final String mount = p.getMount();
+                    final String path = p.getPath();
+
+                    // this deduplication logic is hard to get right. it might be impossible to make it work correctly in
+                    // *all* circumstances. this is best-effort only, but it's aimed at trying to solve 90%+ of cases.
+
+                    // rule 0: we want to sum the unique mounts for each ip address, so if we *haven't* seen a particular
+                    // address and mount, then definitely add that to the total.
+
+                    // rule 1: however, as a special case, if we see the same address+mount+path triple more than once, then we
+                    // override the ip+mount de-duplication logic -- using that as indicator that we're seeing a special
+                    // containerization situation, in which case we assume the operator is maintaining different disks for each node.
+
+                    boolean seenAddressMount = seenAddressesMountsPaths.add(new DedupEntry(inetAddress, mount, null)) == false;
+                    boolean seenAddressMountPath = seenAddressesMountsPaths.add(new DedupEntry(inetAddress, mount, path)) == false;
+
+                    if ((seenAddressMount == false) || seenAddressMountPath) {
+                        total.add(p);
+                    }
+                }
             }
         }
 

+ 53 - 6
server/src/test/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsNodesTests.java

@@ -222,7 +222,24 @@ public class ClusterStatsNodesTests extends ESTestCase {
         }
 
         {
-            // two nodes, different ip addresses, same data paths, same device
+            // two nodes, same ip address, but different data paths on different devices
+            InetAddress address1 = InetAddresses.forString("192.168.0.1");
+            FsInfo.Path path1 = new FsInfo.Path("/data/a", "/dev/sda", 3, 2, 1);
+            InetAddress address2 = InetAddresses.forString("192.168.0.1");
+            FsInfo.Path path2 = new FsInfo.Path("/data/b", "/dev/sdb", 3, 2, 1);
+            ClusterStatsNodes.ClusterFsStatsDeduplicator deduplicator = new ClusterStatsNodes.ClusterFsStatsDeduplicator(1);
+            deduplicator.add(address1, newFsInfo(path1));
+            deduplicator.add(address2, newFsInfo(path2));
+            FsInfo.Path total = deduplicator.getTotal();
+
+            // since they're different devices, they sum
+            assertThat(total.getTotal().getBytes(), equalTo(6L));
+            assertThat(total.getFree().getBytes(), equalTo(4L));
+            assertThat(total.getAvailable().getBytes(), equalTo(2L));
+        }
+
+        {
+            // two nodes, different ip addresses, different data paths, same device
             InetAddress address1 = InetAddresses.forString("192.168.0.1");
             FsInfo.Path path1 = new FsInfo.Path("/data/a", "/dev/sda", 3, 2, 1);
             InetAddress address2 = InetAddresses.forString("192.168.0.2");
@@ -239,7 +256,7 @@ public class ClusterStatsNodesTests extends ESTestCase {
         }
 
         {
-            // two nodes, same ip addresses, same data paths, same device
+            // two nodes, same ip address, same data path, same device
             InetAddress address1 = InetAddresses.forString("192.168.0.1");
             FsInfo.Path path1 = new FsInfo.Path("/app/data", "/app (/dev/mapper/lxc-data)", 3, 2, 1);
             InetAddress address2 = InetAddresses.forString("192.168.0.1");
@@ -251,14 +268,44 @@ public class ClusterStatsNodesTests extends ESTestCase {
 
             // wait a second, this is the super-special case -- you can't actually have two nodes doing this unless something
             // very interesting is happening, so they sum (i.e. we assume the operator is doing smart things)
-            // assertThat(total.getTotal().getBytes(), equalTo(6L));
-            // assertThat(total.getFree().getBytes(), equalTo(4L));
-            // assertThat(total.getAvailable().getBytes(), equalTo(2L));
-            // BUG 1: we don't sum in this super-special case, we just dedup by ip address
+            assertThat(total.getTotal().getBytes(), equalTo(6L));
+            assertThat(total.getFree().getBytes(), equalTo(4L));
+            assertThat(total.getAvailable().getBytes(), equalTo(2L));
+        }
+
+        {
+            // two nodes, same ip address, different data paths, same device
+            InetAddress address1 = InetAddresses.forString("192.168.0.1");
+            FsInfo.Path path1 = new FsInfo.Path("/app/data1", "/dev/sda", 3, 2, 1);
+            InetAddress address2 = InetAddresses.forString("192.168.0.1");
+            FsInfo.Path path2 = new FsInfo.Path("/app/data2", "/dev/sda", 3, 2, 1);
+            ClusterStatsNodes.ClusterFsStatsDeduplicator deduplicator = new ClusterStatsNodes.ClusterFsStatsDeduplicator(1);
+            deduplicator.add(address1, newFsInfo(path1));
+            deduplicator.add(address2, newFsInfo(path2));
+            FsInfo.Path total = deduplicator.getTotal();
+
+            // since the paths aren't the same, it doesn't trigger the special case -- it's just the same device and doesn't sum
             assertThat(total.getTotal().getBytes(), equalTo(3L));
             assertThat(total.getFree().getBytes(), equalTo(2L));
             assertThat(total.getAvailable().getBytes(), equalTo(1L));
         }
+
+        {
+            // two nodes, same ip address, same data path, different devices
+            InetAddress address1 = InetAddresses.forString("192.168.0.1");
+            FsInfo.Path path1 = new FsInfo.Path("/app/data", "/dev/sda", 3, 2, 1);
+            InetAddress address2 = InetAddresses.forString("192.168.0.1");
+            FsInfo.Path path2 = new FsInfo.Path("/app/data", "/dev/sdb", 3, 2, 1);
+            ClusterStatsNodes.ClusterFsStatsDeduplicator deduplicator = new ClusterStatsNodes.ClusterFsStatsDeduplicator(1);
+            deduplicator.add(address1, newFsInfo(path1));
+            deduplicator.add(address2, newFsInfo(path2));
+            FsInfo.Path total = deduplicator.getTotal();
+
+            // having the same path isn't special in this case, it's just unique ip/mount pairs, so they sum
+            assertThat(total.getTotal().getBytes(), equalTo(6L));
+            assertThat(total.getFree().getBytes(), equalTo(4L));
+            assertThat(total.getAvailable().getBytes(), equalTo(2L));
+        }
     }
 
     private static FsInfo newFsInfo(FsInfo.Path... paths) {

+ 2 - 1
x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java

@@ -32,6 +32,7 @@ import org.elasticsearch.cluster.routing.RecoverySource;
 import org.elasticsearch.cluster.routing.ShardRouting;
 import org.elasticsearch.cluster.routing.UnassignedInfo;
 import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.collect.Iterators;
 import org.elasticsearch.common.network.NetworkModule;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.transport.BoundTransportAddress;
@@ -371,7 +372,7 @@ public class ClusterStatsMonitoringDocTests extends BaseMonitoringDocTestCase<Cl
 
         final FsInfo mockFsInfo = mock(FsInfo.class);
         when(mockNodeStats.getFs()).thenReturn(mockFsInfo);
-        when(mockFsInfo.getTotal()).thenReturn(new FsInfo.Path("_fs_path", "_fs_mount", 100L, 49L, 51L));
+        when(mockFsInfo.iterator()).thenReturn(Iterators.single(new FsInfo.Path("_fs_path", "_fs_mount", 100L, 49L, 51L)));
 
         final OsStats mockOsStats = mock(OsStats.class);
         when(mockNodeStats.getOs()).thenReturn(mockOsStats);