Browse Source

Avoiding the use of dynamic map keys in the cluster_formation results of the stable master health indicator (#89842)

Part of the stable master history health indicator's results (the
`cluster_formation` section within `details`) used dynamic keys in a
map. This gets rid of that. So now instead of:

```
"details": {
                "current_master": {
                    "node_id": null,
                    "name": null
                },
                "recent_masters": [
                    {
                        "node_id": "31WBm9iTTRuMyWnBhWNUGA",
                        "name": "master-node-3"
                    }
                ],
                "cluster_formation": {
                    "31WBm9iTTRuMyWnBhWNUGA": "master not discovered or elected yet, an election requires at least 2 nodes with ids from [nADkAeGsT-q12gw89Ga1FA, 31WBm9iTTRuMyWnBhWNUGA, w8v48JvuRsuDCjwBn8KbRw], have only discovered non-quorum [{master-node-3}{31WBm9iTTRuMyWnBhWNUGA}{lJmGYiTPS_W7AJU7csG_gQ}{master-node-3}{127.0.0.1}{127.0.0.1:9301}{dm}]; discovery will continue using [127.0.0.1:9300, 127.0.0.1:9302, 127.0.0.1:9303, 127.0.0.1:9304, 127.0.0.1:9305, [::1]:9300, [::1]:9302, [::1]:9303, [::1]:9304, [::1]:9305] from hosts providers and [{master-node-2}{nADkAeGsT-q12gw89Ga1FA}{logzEHuuTpqwJp-RWssBPw}{master-node-2}{127.0.0.1}{127.0.0.1:9300}{dm}, {master-node-3}{31WBm9iTTRuMyWnBhWNUGA}{lJmGYiTPS_W7AJU7csG_gQ}{master-node-3}{127.0.0.1}{127.0.0.1:9301}{dm}] from last-known cluster state; node term 39, last-accepted version 461 in term 39"
                }
}
```

We will have:

```
"details": {
                "current_master": {
                    "node_id": null,
                    "name": null
                },
                "recent_masters": [
                    {
                        "node_id": "31WBm9iTTRuMyWnBhWNUGA",
                        "name": "master-node-3"
                    }
                ],
                "cluster_formation": [
                    {
                        "node_id": "31WBm9iTTRuMyWnBhWNUGA",
                        "cluster_formation_message": "master not discovered or elected yet, an election requires at least 2 nodes with ids from [nADkAeGsT-q12gw89Ga1FA, 31WBm9iTTRuMyWnBhWNUGA, w8v48JvuRsuDCjwBn8KbRw], have only discovered non-quorum [{master-node-3}{31WBm9iTTRuMyWnBhWNUGA}{lJmGYiTPS_W7AJU7csG_gQ}{master-node-3}{127.0.0.1}{127.0.0.1:9301}{dm}]; discovery will continue using [127.0.0.1:9300, 127.0.0.1:9302, 127.0.0.1:9303, 127.0.0.1:9304, 127.0.0.1:9305, [::1]:9300, [::1]:9302, [::1]:9303, [::1]:9304, [::1]:9305] from hosts providers and [{master-node-2}{nADkAeGsT-q12gw89Ga1FA}{logzEHuuTpqwJp-RWssBPw}{master-node-2}{127.0.0.1}{127.0.0.1:9300}{dm}, {master-node-3}{31WBm9iTTRuMyWnBhWNUGA}{lJmGYiTPS_W7AJU7csG_gQ}{master-node-3}{127.0.0.1}{127.0.0.1:9301}{dm}] from last-known cluster state; node term 39, last-accepted version 461 in term 39"
                    }
                ]
}
```
Keith Massey 3 years ago
parent
commit
06cfa747be

+ 8 - 6
docs/reference/health/health.asciidoc

@@ -262,16 +262,18 @@ details have contents and a structure that is unique to each indicator.
 ====
 
 `cluster_formation`::
-    (Optional, map) If there has been no elected master node recently, the node being queried attempts to
+    (Optional, array) If there has been no elected master node recently, the node being queried attempts to
     gather information about why the cluster has been unable to form, or why the node being queried has been
-    unable to join the cluster if it has formed. This could contain each master eligibles
-    node view on the cluster formation. The node id is the key in the map and its 
-    cluster formation information is the value.
+    unable to join the cluster if it has formed. This array could contain any entry for each master eligible
+    node's view of cluster formation.
 +
 .Properties of `cluster_formation`
 [%collapsible%open]
 ====
-`description`::
+`node_id`::
+    (string) The node id of a master-eligible node
+
+`cluster_formation_message`::
     (string) A detailed description explaining what went wrong with cluster formation, or why this node was
     unable to join the cluster if it has formed.
 ====
@@ -342,7 +344,7 @@ details have contents and a structure that is unique to each indicator.
 
 `unhealthy_policies`::
     (map) A detailed view on the policies that are considered unhealthy due to having
-    several consecutive unssuccesful invocations. 
+    several consecutive unssuccesful invocations.
     The `count` key represents the number of unhealthy policies (int).
     The `invocations_since_last_success` key will report a map where the unhealthy policy
     name is the key and it's corresponding number of failed invocations is the value.

+ 10 - 1
server/src/main/java/org/elasticsearch/cluster/coordination/StableMasterHealthIndicatorService.java

@@ -19,6 +19,7 @@ import org.elasticsearch.health.ImpactArea;
 
 import java.util.Collection;
 import java.util.List;
+import java.util.Map;
 
 /**
  * This indicator reports the health of master stability.
@@ -53,6 +54,7 @@ public class StableMasterHealthIndicatorService implements HealthIndicatorServic
     private static final String DETAILS_RECENT_MASTERS = "recent_masters";
     private static final String DETAILS_EXCEPTION_FETCHING_HISTORY = "exception_fetching_history";
     private static final String CLUSTER_FORMATION = "cluster_formation";
+    private static final String CLUSTER_FORMATION_MESSAGE = "cluster_formation_message";
 
     // Impacts of having an unstable master:
     private static final String UNSTABLE_MASTER_INGEST_IMPACT = "The cluster cannot create, delete, or rebalance indices, and cannot "
@@ -155,7 +157,14 @@ public class StableMasterHealthIndicatorService implements HealthIndicatorServic
                 });
             }
             if (coordinationDiagnosticsDetails.nodeToClusterFormationDescriptionMap() != null) {
-                builder.field(CLUSTER_FORMATION, coordinationDiagnosticsDetails.nodeToClusterFormationDescriptionMap());
+                builder.field(
+                    CLUSTER_FORMATION,
+                    coordinationDiagnosticsDetails.nodeToClusterFormationDescriptionMap()
+                        .entrySet()
+                        .stream()
+                        .map(entry -> Map.of("node_id", entry.getKey(), CLUSTER_FORMATION_MESSAGE, entry.getValue()))
+                        .toList()
+                );
             }
             return builder.endObject();
         };

+ 9 - 4
server/src/test/java/org/elasticsearch/cluster/coordination/StableMasterHealthIndicatorServiceTests.java

@@ -38,6 +38,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.UUID;
@@ -134,10 +135,14 @@ public class StableMasterHealthIndicatorServiceTests extends AbstractCoordinator
             assertThat(recentMasterMap.get("name"), not(emptyOrNullString()));
             assertThat(recentMasterMap.get("node_id"), not(emptyOrNullString()));
         }
-        Map<String, String> clusterFormationMap = (Map<String, String>) detailsMap.get("cluster_formation");
-        assertThat(clusterFormationMap.size(), equalTo(2));
-        assertThat(clusterFormationMap.get(node1.getId()), equalTo(node1ClusterFormation));
-        assertThat(clusterFormationMap.get(node2.getId()), equalTo(node2ClusterFormation));
+        List<Map<String, String>> clusterFormations = (List<Map<String, String>>) detailsMap.get("cluster_formation");
+        assertThat(clusterFormations.size(), equalTo(2));
+        Map<String, String> nodeToClusterFormationMap = new HashMap<>();
+        for (Map<String, String> clusterFormationMap : clusterFormations) {
+            nodeToClusterFormationMap.put(clusterFormationMap.get("node_id"), clusterFormationMap.get("cluster_formation_message"));
+        }
+        assertThat(nodeToClusterFormationMap.get(node1.getId()), equalTo(node1ClusterFormation));
+        assertThat(nodeToClusterFormationMap.get(node2.getId()), equalTo(node2ClusterFormation));
         List<Diagnosis> diagnosis = result.diagnosisList();
         assertThat(diagnosis.size(), equalTo(1));
         assertThat(diagnosis.get(0), is(StableMasterHealthIndicatorService.CONTACT_SUPPORT_USER_ACTION));