Browse Source

HealthPeriodicLogger disabled by default (#97722)

Co-authored-by: Matt Culbreth <matt.culbreth@elastic.co>
Mary Gouseti 2 years ago
parent
commit
9aed799b51

+ 4 - 0
docs/reference/settings/health-diagnostic-settings.asciidoc

@@ -45,6 +45,10 @@ comprise its local health such as its disk usage.
 `health.ilm.max_retries_per_step`::
 (<<cluster-update-settings,Dynamic>>) The minimum amount of times an index has retried by an {ilm-init} step before it is considered stagnant. Defaults to `100`
 
+`health.periodic_logger.enabled`::
+(<<cluster-update-settings,Dynamic>>) Enables the health periodic logger, which logs the health statuses of each health indicator along with the top level one as observed by the Health API.
+Defaults to `false`.
+
 `health.periodic_logger.poll_interval`::
 (<<cluster-update-settings,Dynamic>>, <<time-units, time unit value>>) How often {es} logs the health status of the cluster and of each health indicator as observed by the Health API.
 Defaults to `60s` (60 seconds).

+ 2 - 1
server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java

@@ -568,7 +568,8 @@ public final class ClusterSettings extends AbstractScopedSettings {
         TcpTransport.isUntrustedRemoteClusterEnabled() ? RemoteClusterPortSettings.TCP_NO_DELAY : null,
         TcpTransport.isUntrustedRemoteClusterEnabled() ? RemoteClusterPortSettings.TCP_REUSE_ADDRESS : null,
         TcpTransport.isUntrustedRemoteClusterEnabled() ? RemoteClusterPortSettings.TCP_SEND_BUFFER_SIZE : null,
-        HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_POLL_INTERVAL_SETTING,
+        HealthPeriodicLogger.POLL_INTERVAL_SETTING,
+        HealthPeriodicLogger.ENABLED_SETTING,
         DataStreamLifecycle.isEnabled() ? DataStreamLifecycle.CLUSTER_LIFECYCLE_DEFAULT_ROLLOVER_SETTING : null,
         IndicesClusterStateService.SHARD_LOCK_RETRY_INTERVAL_SETTING,
         IndicesClusterStateService.SHARD_LOCK_RETRY_TIMEOUT_SETTING,

+ 33 - 8
server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java

@@ -42,15 +42,21 @@ import java.util.concurrent.atomic.AtomicBoolean;
 public class HealthPeriodicLogger implements ClusterStateListener, Closeable, SchedulerEngine.Listener {
     public static final String HEALTH_FIELD_PREFIX = "elasticsearch.health";
 
-    public static final String HEALTH_PERIODIC_LOGGER_POLL_INTERVAL = "health.periodic_logger.poll_interval";
-    public static final Setting<TimeValue> HEALTH_PERIODIC_LOGGER_POLL_INTERVAL_SETTING = Setting.timeSetting(
-        HEALTH_PERIODIC_LOGGER_POLL_INTERVAL,
+    public static final Setting<TimeValue> POLL_INTERVAL_SETTING = Setting.timeSetting(
+        "health.periodic_logger.poll_interval",
         TimeValue.timeValueSeconds(60),
         TimeValue.timeValueSeconds(15),
         Setting.Property.Dynamic,
         Setting.Property.NodeScope
     );
 
+    public static final Setting<Boolean> ENABLED_SETTING = Setting.boolSetting(
+        "health.periodic_logger.enabled",
+        false,
+        Setting.Property.Dynamic,
+        Setting.Property.NodeScope
+    );
+
     /**
      * Name constant for the job HealthService schedules
      */
@@ -71,6 +77,7 @@ public class HealthPeriodicLogger implements ClusterStateListener, Closeable, Sc
 
     private final SetOnce<SchedulerEngine> scheduler = new SetOnce<>();
     private volatile TimeValue pollInterval;
+    private volatile boolean enabled;
 
     private static final Logger logger = LogManager.getLogger(HealthPeriodicLogger.class);
 
@@ -89,16 +96,19 @@ public class HealthPeriodicLogger implements ClusterStateListener, Closeable, Sc
         this.client = client;
         this.healthService = healthService;
         this.clock = Clock.systemUTC();
-        this.pollInterval = HEALTH_PERIODIC_LOGGER_POLL_INTERVAL_SETTING.get(settings);
+        this.pollInterval = POLL_INTERVAL_SETTING.get(settings);
+        this.enabled = ENABLED_SETTING.get(settings);
     }
 
     /**
      * Initializer method to avoid the publication of a self reference in the constructor.
      */
     public void init() {
-        clusterService.addListener(this);
-        clusterService.getClusterSettings()
-            .addSettingsUpdateConsumer(HEALTH_PERIODIC_LOGGER_POLL_INTERVAL_SETTING, this::updatePollInterval);
+        if (this.enabled) {
+            clusterService.addListener(this);
+        }
+        clusterService.getClusterSettings().addSettingsUpdateConsumer(ENABLED_SETTING, this::enable);
+        clusterService.getClusterSettings().addSettingsUpdateConsumer(POLL_INTERVAL_SETTING, this::updatePollInterval);
     }
 
     @Override
@@ -137,7 +147,7 @@ public class HealthPeriodicLogger implements ClusterStateListener, Closeable, Sc
 
     @Override
     public void triggered(SchedulerEngine.Event event) {
-        if (event.getJobName().equals(HEALTH_PERIODIC_LOGGER_JOB_NAME)) {
+        if (event.getJobName().equals(HEALTH_PERIODIC_LOGGER_JOB_NAME) && this.enabled) {
             this.tryToLogHealth();
         }
     }
@@ -229,6 +239,10 @@ public class HealthPeriodicLogger implements ClusterStateListener, Closeable, Sc
             return;
         }
 
+        if (this.enabled == false) {
+            return;
+        }
+
         // don't schedule the job if the node is shutting down
         if (isClusterServiceStoppedOrClosed()) {
             logger.trace(
@@ -257,6 +271,17 @@ public class HealthPeriodicLogger implements ClusterStateListener, Closeable, Sc
         }
     }
 
+    private void enable(boolean enabled) {
+        this.enabled = enabled;
+        if (enabled) {
+            clusterService.addListener(this);
+            maybeScheduleJob();
+        } else {
+            clusterService.removeListener(this);
+            maybeCancelJob();
+        }
+    }
+
     private void updatePollInterval(TimeValue newInterval) {
         this.pollInterval = newInterval;
         maybeScheduleJob();

+ 88 - 61
server/src/test/java/org/elasticsearch/health/HealthPeriodicLoggerTests.java

@@ -24,8 +24,8 @@ import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.logging.Loggers;
 import org.elasticsearch.common.scheduler.SchedulerEngine;
 import org.elasticsearch.common.settings.ClusterSettings;
-import org.elasticsearch.common.settings.Setting;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.test.MockLogAppender;
 import org.elasticsearch.threadpool.TestThreadPool;
@@ -34,7 +34,6 @@ import org.junit.After;
 import org.junit.Before;
 
 import java.util.ArrayList;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@@ -62,7 +61,12 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
     private ClusterService clusterService;
 
     private HealthPeriodicLogger testHealthPeriodicLogger;
-    private ClusterService testClusterService;
+    private ClusterSettings clusterSettings;
+    private final DiscoveryNode node1 = DiscoveryNodeUtils.builder("node_1").roles(Set.of(DiscoveryNodeRole.MASTER_ROLE)).build();
+    private final DiscoveryNode node2 = DiscoveryNodeUtils.builder("node_2")
+        .roles(Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE))
+        .build();
+    private ClusterState stateWithLocalHealthNode;
 
     private NodeClient getTestClient() {
         return mock(NodeClient.class);
@@ -75,22 +79,15 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
     @Before
     public void setupServices() {
         threadPool = new TestThreadPool(getTestName());
-
-        Set<Setting<?>> builtInClusterSettings = new HashSet<>(ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
-        builtInClusterSettings.add(HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_POLL_INTERVAL_SETTING);
-        ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, builtInClusterSettings);
-        this.clusterService = createClusterService(this.threadPool, clusterSettings);
-
+        stateWithLocalHealthNode = ClusterStateCreationUtils.state(node2, node1, node2, new DiscoveryNode[] { node1, node2 });
+        this.clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
+        this.clusterService = createClusterService(stateWithLocalHealthNode, this.threadPool, clusterSettings);
         this.client = getTestClient();
-
     }
 
     @After
     public void cleanup() {
         clusterService.close();
-        if (testClusterService != null) {
-            testClusterService.close();
-        }
         if (testHealthPeriodicLogger != null) {
             testHealthPeriodicLogger.close();
         }
@@ -98,18 +95,6 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
         client.close();
     }
 
-    private List<HealthIndicatorResult> getTestIndicatorResults() {
-        var networkLatency = new HealthIndicatorResult("network_latency", GREEN, null, null, null, null);
-        var slowTasks = new HealthIndicatorResult("slow_task_assignment", YELLOW, null, null, null, null);
-        var shardsAvailable = new HealthIndicatorResult("shards_availability", GREEN, null, null, null, null);
-
-        return List.of(networkLatency, slowTasks, shardsAvailable);
-    }
-
-    private String makeHealthStatusString(String key) {
-        return String.format(Locale.ROOT, "%s.%s.status", HealthPeriodicLogger.HEALTH_FIELD_PREFIX, key);
-    }
-
     public void testConvertToLoggedFields() {
         var results = getTestIndicatorResults();
         var overallStatus = HealthStatus.merge(results.stream().map(HealthIndicatorResult::status));
@@ -137,52 +122,76 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
 
     public void testHealthNodeIsSelected() {
         HealthService testHealthService = this.getMockedHealthService();
-
-        // create a cluster topology
-        final DiscoveryNode node1 = DiscoveryNodeUtils.builder("node_1").roles(Set.of(DiscoveryNodeRole.MASTER_ROLE)).build();
-        final DiscoveryNode node2 = DiscoveryNodeUtils.builder("node_2")
-            .roles(Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE))
-            .build();
-        ClusterState current = ClusterStateCreationUtils.state(node2, node1, node2, new DiscoveryNode[] { node1, node2 });
-
-        testClusterService = createClusterService(current, this.threadPool);
-        testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, testClusterService, client, testHealthService);
-        testHealthPeriodicLogger.init();
+        testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(clusterService, testHealthService, true);
 
         // test that it knows that it's not initially the health node
         assertFalse(testHealthPeriodicLogger.isHealthNode);
 
         // trigger a cluster change and recheck
-        testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", current, ClusterState.EMPTY_STATE));
+        testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", stateWithLocalHealthNode, ClusterState.EMPTY_STATE));
         assertTrue(testHealthPeriodicLogger.isHealthNode);
     }
 
     public void testJobScheduling() {
         HealthService testHealthService = this.getMockedHealthService();
 
-        // create a cluster topology
-        final DiscoveryNode node1 = DiscoveryNodeUtils.builder("node_1").roles(Set.of(DiscoveryNodeRole.MASTER_ROLE)).build();
-        final DiscoveryNode node2 = DiscoveryNodeUtils.builder("node_2")
-            .roles(Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE))
-            .build();
-        ClusterState current = ClusterStateCreationUtils.state(node2, node1, node2, new DiscoveryNode[] { node1, node2 });
-
-        testClusterService = createClusterService(current, this.threadPool);
-        testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, testClusterService, client, testHealthService);
-        testHealthPeriodicLogger.init();
+        testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(clusterService, testHealthService, true);
 
-        testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", current, ClusterState.EMPTY_STATE));
+        testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", stateWithLocalHealthNode, ClusterState.EMPTY_STATE));
         assertTrue(testHealthPeriodicLogger.isHealthNode);
 
         SchedulerEngine scheduler = testHealthPeriodicLogger.getScheduler();
+        assertNotNull(scheduler);
         assertTrue(scheduler.scheduledJobIds().contains(HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_JOB_NAME));
 
         ClusterState noHealthNode = ClusterStateCreationUtils.state(node2, node1, new DiscoveryNode[] { node1, node2 });
-        testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", noHealthNode, current));
+        testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", noHealthNode, stateWithLocalHealthNode));
         assertFalse(testHealthPeriodicLogger.isHealthNode);
         assertFalse(scheduler.scheduledJobIds().contains(HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_JOB_NAME));
     }
 
+    public void testEnabled() {
+        HealthService testHealthService = this.getMockedHealthService();
+        testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(clusterService, testHealthService, true);
+
+        testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", stateWithLocalHealthNode, ClusterState.EMPTY_STATE));
+        assertTrue(testHealthPeriodicLogger.isHealthNode);
+
+        // disable it and then verify that the job is gone
+        {
+            this.clusterSettings.applySettings(Settings.builder().put(HealthPeriodicLogger.ENABLED_SETTING.getKey(), false).build());
+            assertFalse(
+                testHealthPeriodicLogger.getScheduler().scheduledJobIds().contains(HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_JOB_NAME)
+            );
+        }
+
+        // enable it and then verify that the job is created
+        {
+            this.clusterSettings.applySettings(Settings.builder().put(HealthPeriodicLogger.ENABLED_SETTING.getKey(), true).build());
+            assertTrue(
+                testHealthPeriodicLogger.getScheduler().scheduledJobIds().contains(HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_JOB_NAME)
+            );
+        }
+    }
+
+    public void testUpdatePollInterval() {
+        HealthService testHealthService = this.getMockedHealthService();
+
+        testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(clusterService, testHealthService, false);
+        assertNull(testHealthPeriodicLogger.getScheduler());
+
+        testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", stateWithLocalHealthNode, ClusterState.EMPTY_STATE));
+        assertTrue(testHealthPeriodicLogger.isHealthNode);
+
+        // verify that updating the polling interval doesn't schedule the job
+        {
+            this.clusterSettings.applySettings(
+                Settings.builder().put(HealthPeriodicLogger.POLL_INTERVAL_SETTING.getKey(), TimeValue.timeValueSeconds(15)).build()
+            );
+            assertNull(testHealthPeriodicLogger.getScheduler());
+        }
+    }
+
     public void testTriggeredJobCallsTryToLogHealth() throws Exception {
         AtomicBoolean listenerCalled = new AtomicBoolean(false);
         AtomicBoolean failureCalled = new AtomicBoolean(false);
@@ -199,8 +208,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
         };
         HealthService testHealthService = this.getMockedHealthService();
 
-        testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService);
-        testHealthPeriodicLogger.init();
+        testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true);
 
         HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger);
         spyHealthPeriodicLogger.isHealthNode = true;
@@ -221,8 +229,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
 
         HealthService testHealthService = this.getMockedHealthService();
 
-        testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService);
-        testHealthPeriodicLogger.init();
+        testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true);
 
         HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger);
         spyHealthPeriodicLogger.isHealthNode = true;
@@ -266,8 +273,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
             return null;
         }).when(testHealthService).getHealth(any(), isNull(), anyBoolean(), anyInt(), any());
 
-        testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService);
-        testHealthPeriodicLogger.init();
+        testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true);
 
         HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger);
         spyHealthPeriodicLogger.isHealthNode = true;
@@ -301,8 +307,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
             return null;
         }).when(testHealthService).getHealth(any(), isNull(), anyBoolean(), anyInt(), any());
 
-        testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService);
-        testHealthPeriodicLogger.init();
+        testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true);
 
         HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger);
         spyHealthPeriodicLogger.isHealthNode = true;
@@ -329,8 +334,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
 
         HealthService testHealthService = this.getMockedHealthService();
 
-        testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService);
-        testHealthPeriodicLogger.init();
+        testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true);
 
         HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger);
         spyHealthPeriodicLogger.isHealthNode = true;
@@ -398,8 +402,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
 
         HealthService testHealthService = this.getMockedHealthService();
 
-        testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService);
-        testHealthPeriodicLogger.init();
+        testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true);
 
         HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger);
         spyHealthPeriodicLogger.isHealthNode = true;
@@ -420,4 +423,28 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
 
     }
 
+    private List<HealthIndicatorResult> getTestIndicatorResults() {
+        var networkLatency = new HealthIndicatorResult("network_latency", GREEN, null, null, null, null);
+        var slowTasks = new HealthIndicatorResult("slow_task_assignment", YELLOW, null, null, null, null);
+        var shardsAvailable = new HealthIndicatorResult("shards_availability", GREEN, null, null, null, null);
+
+        return List.of(networkLatency, slowTasks, shardsAvailable);
+    }
+
+    private String makeHealthStatusString(String key) {
+        return String.format(Locale.ROOT, "%s.%s.status", HealthPeriodicLogger.HEALTH_FIELD_PREFIX, key);
+    }
+
+    private HealthPeriodicLogger createAndInitHealthPeriodicLogger(
+        ClusterService clusterService,
+        HealthService testHealthService,
+        boolean enabled
+    ) {
+        testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, this.client, testHealthService);
+        testHealthPeriodicLogger.init();
+        if (enabled) {
+            clusterSettings.applySettings(Settings.builder().put(HealthPeriodicLogger.ENABLED_SETTING.getKey(), true).build());
+        }
+        return testHealthPeriodicLogger;
+    }
 }