|
@@ -11,8 +11,11 @@ package org.elasticsearch.health.node;
|
|
|
import org.apache.logging.log4j.LogManager;
|
|
|
import org.apache.logging.log4j.Logger;
|
|
|
import org.elasticsearch.Version;
|
|
|
+import org.elasticsearch.action.ActionListener;
|
|
|
import org.elasticsearch.action.admin.cluster.node.stats.NodeStats;
|
|
|
import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags;
|
|
|
+import org.elasticsearch.action.support.master.AcknowledgedResponse;
|
|
|
+import org.elasticsearch.client.internal.Client;
|
|
|
import org.elasticsearch.cluster.ClusterChangedEvent;
|
|
|
import org.elasticsearch.cluster.ClusterState;
|
|
|
import org.elasticsearch.cluster.ClusterStateListener;
|
|
@@ -24,19 +27,30 @@ import org.elasticsearch.common.settings.ClusterSettings;
|
|
|
import org.elasticsearch.common.settings.Setting;
|
|
|
import org.elasticsearch.common.settings.Settings;
|
|
|
import org.elasticsearch.common.unit.ByteSizeValue;
|
|
|
+import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
|
|
|
+import org.elasticsearch.common.util.concurrent.RunOnce;
|
|
|
+import org.elasticsearch.core.Nullable;
|
|
|
import org.elasticsearch.core.TimeValue;
|
|
|
import org.elasticsearch.health.HealthStatus;
|
|
|
import org.elasticsearch.health.metadata.HealthMetadata;
|
|
|
+import org.elasticsearch.health.node.action.HealthNodeNotDiscoveredException;
|
|
|
+import org.elasticsearch.health.node.selection.HealthNode;
|
|
|
import org.elasticsearch.health.node.selection.HealthNodeTaskExecutor;
|
|
|
import org.elasticsearch.node.NodeService;
|
|
|
+import org.elasticsearch.threadpool.Scheduler;
|
|
|
import org.elasticsearch.threadpool.ThreadPool;
|
|
|
+import org.elasticsearch.transport.NodeNotConnectedException;
|
|
|
|
|
|
-import java.util.concurrent.atomic.AtomicBoolean;
|
|
|
+import java.util.Objects;
|
|
|
+import java.util.concurrent.atomic.AtomicReference;
|
|
|
+
|
|
|
+import static org.elasticsearch.core.Strings.format;
|
|
|
|
|
|
/**
|
|
|
* This class monitors the health of the node regarding the load on several resources.
|
|
|
* Currently, it only checks for available disk space. Furthermore, it informs the health
|
|
|
- * node about the local health upon change or when a new node is detected.
|
|
|
+ * node about the local health upon change or when a new node is detected or when the
|
|
|
+ * master node changed.
|
|
|
*/
|
|
|
public class LocalHealthMonitor implements ClusterStateListener {
|
|
|
|
|
@@ -53,23 +67,35 @@ public class LocalHealthMonitor implements ClusterStateListener {
|
|
|
private final ClusterService clusterService;
|
|
|
private final ThreadPool threadPool;
|
|
|
private final DiskCheck diskCheck;
|
|
|
+ private final Client client;
|
|
|
|
|
|
private volatile TimeValue monitorInterval;
|
|
|
private volatile boolean enabled;
|
|
|
- // Signals that all the prerequisites have been fulfilled and the monitoring task can be scheduled.
|
|
|
+
|
|
|
+ // Signals that all the prerequisites have been fulfilled and the monitoring can be started.
|
|
|
private volatile boolean prerequisitesFulfilled;
|
|
|
- // Ensures that only one monitoring task will be in progress at any moment in time.
|
|
|
- // It removes the need to synchronize scheduling since at the event that there are two
|
|
|
- // monitoring tasks scheduled, one of them will be no-op.
|
|
|
- private final AtomicBoolean inProgress = new AtomicBoolean();
|
|
|
- // Keeps the latest health state that was successfully reported.
|
|
|
- private volatile DiskHealthInfo lastReportedDiskHealthInfo = null;
|
|
|
-
|
|
|
- private LocalHealthMonitor(Settings settings, ClusterService clusterService, NodeService nodeService, ThreadPool threadPool) {
|
|
|
+
|
|
|
+ // Keeps the latest health state that was successfully reported to the current health node.
|
|
|
+ private final AtomicReference<DiskHealthInfo> lastReportedDiskHealthInfo = new AtomicReference<>();
|
|
|
+ // Keeps the last seen health node. We use this variable to ensure that there wasn't a health node
|
|
|
+ // change between the time we send an update until the time we update the lastReportedDiskHealthInfo.
|
|
|
+ private final AtomicReference<String> lastSeenHealthNode = new AtomicReference<>();
|
|
|
+ // Using a volatile reference to ensure that there is a single instance of monitoring running at all times.
|
|
|
+ // No need for extra synchronization because all the writes are executed on the cluster applier thread.
|
|
|
+ private volatile Monitoring monitoring;
|
|
|
+
|
|
|
+ private LocalHealthMonitor(
|
|
|
+ Settings settings,
|
|
|
+ ClusterService clusterService,
|
|
|
+ NodeService nodeService,
|
|
|
+ ThreadPool threadPool,
|
|
|
+ Client client
|
|
|
+ ) {
|
|
|
this.threadPool = threadPool;
|
|
|
this.monitorInterval = POLL_INTERVAL_SETTING.get(settings);
|
|
|
this.enabled = HealthNodeTaskExecutor.ENABLED_SETTING.get(settings);
|
|
|
this.clusterService = clusterService;
|
|
|
+ this.client = client;
|
|
|
this.diskCheck = new DiskCheck(nodeService);
|
|
|
}
|
|
|
|
|
@@ -77,9 +103,10 @@ public class LocalHealthMonitor implements ClusterStateListener {
|
|
|
Settings settings,
|
|
|
ClusterService clusterService,
|
|
|
NodeService nodeService,
|
|
|
- ThreadPool threadPool
|
|
|
+ ThreadPool threadPool,
|
|
|
+ Client client
|
|
|
) {
|
|
|
- LocalHealthMonitor localHealthMonitor = new LocalHealthMonitor(settings, clusterService, nodeService, threadPool);
|
|
|
+ LocalHealthMonitor localHealthMonitor = new LocalHealthMonitor(settings, clusterService, nodeService, threadPool, client);
|
|
|
localHealthMonitor.registerListeners();
|
|
|
return localHealthMonitor;
|
|
|
}
|
|
@@ -91,63 +118,283 @@ public class LocalHealthMonitor implements ClusterStateListener {
|
|
|
clusterService.addListener(this);
|
|
|
}
|
|
|
|
|
|
+ // When the monitoring interval changes, we restart the health monitoring with the new interval.
|
|
|
void setMonitorInterval(TimeValue monitorInterval) {
|
|
|
this.monitorInterval = monitorInterval;
|
|
|
- maybeScheduleNow();
|
|
|
+ stopMonitoring();
|
|
|
+ startMonitoringIfNecessary();
|
|
|
}
|
|
|
|
|
|
+ // When the health node is enabled we try to start monitoring if it is not
|
|
|
+ // already running, no need to restart it since there was no configuration
|
|
|
+ // change. When the health node is disabled we stop monitoring.
|
|
|
void setEnabled(boolean enabled) {
|
|
|
this.enabled = enabled;
|
|
|
- maybeScheduleNow();
|
|
|
+ if (enabled) {
|
|
|
+ startMonitoringIfNecessary();
|
|
|
+ } else {
|
|
|
+ stopMonitoring();
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
- /**
|
|
|
- * We always check if the prerequisites are fulfilled and if the health node
|
|
|
- * is enabled before we schedule a monitoring task.
|
|
|
- */
|
|
|
- private void maybeScheduleNextRun(TimeValue time) {
|
|
|
+ private void stopMonitoring() {
|
|
|
+ // If there is an existing schedule, cancel it
|
|
|
+ Scheduler.Cancellable currentMonitoring = monitoring;
|
|
|
+ if (currentMonitoring != null) {
|
|
|
+ currentMonitoring.cancel();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private void startMonitoringIfNecessary() {
|
|
|
if (prerequisitesFulfilled && enabled) {
|
|
|
- threadPool.scheduleUnlessShuttingDown(time, ThreadPool.Names.MANAGEMENT, this::monitorHealth);
|
|
|
+ if (isMonitorRunning() == false) {
|
|
|
+ monitoring = Monitoring.start(
|
|
|
+ monitorInterval,
|
|
|
+ threadPool,
|
|
|
+ lastReportedDiskHealthInfo,
|
|
|
+ lastSeenHealthNode,
|
|
|
+ diskCheck,
|
|
|
+ clusterService,
|
|
|
+ client
|
|
|
+ );
|
|
|
+ logger.debug("Local health monitoring started {}", monitoring);
|
|
|
+ } else {
|
|
|
+ logger.debug("Local health monitoring already started {}, skipping", monitoring);
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // Helper method that starts the monitoring without a delay.
|
|
|
- private void maybeScheduleNow() {
|
|
|
- maybeScheduleNextRun(TimeValue.ZERO);
|
|
|
+ private boolean isMonitorRunning() {
|
|
|
+ Scheduler.Cancellable scheduled = this.monitoring;
|
|
|
+ return scheduled != null && scheduled.isCancelled() == false;
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
public void clusterChanged(ClusterChangedEvent event) {
|
|
|
- if (prerequisitesFulfilled == false) {
|
|
|
- prerequisitesFulfilled = event.state().nodesIfRecovered().getMinNodeVersion().onOrAfter(Version.V_8_5_0)
|
|
|
- && HealthMetadata.getFromClusterState(event.state()) != null;
|
|
|
- maybeScheduleNow();
|
|
|
+ DiscoveryNode currentHealthNode = HealthNode.findHealthNode(event.state());
|
|
|
+ DiscoveryNode currentMasterNode = event.state().nodes().getMasterNode();
|
|
|
+ boolean healthNodeChanged = hasHealthNodeChanged(currentHealthNode, event);
|
|
|
+ boolean masterNodeChanged = hasMasterNodeChanged(currentMasterNode, event);
|
|
|
+ if (healthNodeChanged || masterNodeChanged) {
|
|
|
+ // On health node or on master node changes, the health node might be reset so the reported
|
|
|
+ // health info gets reset to null, to ensure it will be resent.
|
|
|
+ lastSeenHealthNode.set(currentHealthNode == null ? null : currentHealthNode.getId());
|
|
|
+ lastReportedDiskHealthInfo.set(null);
|
|
|
+ if (logger.isDebugEnabled()) {
|
|
|
+ String reason;
|
|
|
+ if (healthNodeChanged && masterNodeChanged) {
|
|
|
+ reason = "the master node and the health node";
|
|
|
+ } else if (healthNodeChanged) {
|
|
|
+ reason = "the health node";
|
|
|
+ } else {
|
|
|
+ reason = "the master node";
|
|
|
+ }
|
|
|
+ logger.debug(
|
|
|
+ "Resetting the health monitoring because {} changed, current health node is {}.",
|
|
|
+ reason,
|
|
|
+ currentHealthNode == null ? null : format("[%s][%s]", currentHealthNode.getName(), currentHealthNode.getId())
|
|
|
+ );
|
|
|
+ }
|
|
|
+ }
|
|
|
+ prerequisitesFulfilled = event.state().nodesIfRecovered().getMinNodeVersion().onOrAfter(Version.V_8_5_0)
|
|
|
+ && HealthMetadata.getFromClusterState(event.state()) != null
|
|
|
+ && currentHealthNode != null
|
|
|
+ && currentMasterNode != null;
|
|
|
+ if (prerequisitesFulfilled == false || healthNodeChanged || masterNodeChanged) {
|
|
|
+ stopMonitoring();
|
|
|
+ }
|
|
|
+ if (prerequisitesFulfilled) {
|
|
|
+ startMonitoringIfNecessary();
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // Visible for testing
|
|
|
- void monitorHealth() {
|
|
|
- if (inProgress.compareAndSet(false, true)) {
|
|
|
- ClusterState clusterState = clusterService.state();
|
|
|
- HealthMetadata healthMetadata = HealthMetadata.getFromClusterState(clusterState);
|
|
|
- assert healthMetadata != null : "health metadata should have been initialized.";
|
|
|
- DiskHealthInfo previousHealth = this.lastReportedDiskHealthInfo;
|
|
|
- DiskHealthInfo currentHealth = diskCheck.getHealth(healthMetadata, clusterState);
|
|
|
- if (currentHealth.equals(previousHealth) == false) {
|
|
|
- logger.debug("Health status changed from {} to {}", previousHealth, currentHealth);
|
|
|
- this.lastReportedDiskHealthInfo = currentHealth;
|
|
|
- }
|
|
|
- inProgress.set(false);
|
|
|
- // Scheduling happens after the flag inProgress is false, this ensures that
|
|
|
- // if the feature is enabled after the following schedule statement, the setEnabled
|
|
|
- // method will be able to schedule the next run, and it will not be a no-op.
|
|
|
- // We prefer to err towards an extra scheduling than miss the enabling of this feature alltogether.
|
|
|
- maybeScheduleNextRun(monitorInterval);
|
|
|
+ private boolean hasMasterNodeChanged(DiscoveryNode currentMasterNode, ClusterChangedEvent event) {
|
|
|
+ DiscoveryNode previousMasterNode = event.previousState().nodes().getMasterNode();
|
|
|
+ if (currentMasterNode == null || previousMasterNode == null) {
|
|
|
+ return currentMasterNode != previousMasterNode;
|
|
|
}
|
|
|
+ return previousMasterNode.getEphemeralId().equals(currentMasterNode.getEphemeralId()) == false;
|
|
|
}
|
|
|
|
|
|
+ // We compare the current health node against both the last seen health node from this node and the
|
|
|
+ // health node reported in the previous cluster state to be safe that we do not miss any change due to
|
|
|
+ // a flaky state.
|
|
|
+ private boolean hasHealthNodeChanged(DiscoveryNode currentHealthNode, ClusterChangedEvent event) {
|
|
|
+ DiscoveryNode previousHealthNode = HealthNode.findHealthNode(event.previousState());
|
|
|
+ return Objects.equals(lastSeenHealthNode.get(), currentHealthNode == null ? null : currentHealthNode.getId()) == false
|
|
|
+ || Objects.equals(previousHealthNode, currentHealthNode) == false;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Nullable
|
|
|
DiskHealthInfo getLastReportedDiskHealthInfo() {
|
|
|
- return lastReportedDiskHealthInfo;
|
|
|
+ return lastReportedDiskHealthInfo.get();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * This class is responsible for running the health monitoring. It evaluates and checks the health info of this node
|
|
|
+ * in the configured intervals. The first run happens upon initialization. If there is an exception, it will log it
|
|
|
+ * and continue to schedule the next run.
|
|
|
+ */
|
|
|
+ static class Monitoring implements Runnable, Scheduler.Cancellable {
|
|
|
+
|
|
|
+ private final TimeValue interval;
|
|
|
+ private final String executor;
|
|
|
+ private final Scheduler scheduler;
|
|
|
+ private final ClusterService clusterService;
|
|
|
+ private final DiskCheck diskCheck;
|
|
|
+ private final Client client;
|
|
|
+
|
|
|
+ private final AtomicReference<DiskHealthInfo> lastReportedDiskHealthInfo;
|
|
|
+ private final AtomicReference<String> lastSeenHealthNode;
|
|
|
+
|
|
|
+ private volatile boolean cancelled = false;
|
|
|
+ private volatile Scheduler.ScheduledCancellable scheduledRun;
|
|
|
+
|
|
|
+ private Monitoring(
|
|
|
+ TimeValue interval,
|
|
|
+ Scheduler scheduler,
|
|
|
+ String executor,
|
|
|
+ AtomicReference<DiskHealthInfo> lastReportedDiskHealthInfo,
|
|
|
+ AtomicReference<String> lastSeenHealthNode,
|
|
|
+ DiskCheck diskCheck,
|
|
|
+ ClusterService clusterService,
|
|
|
+ Client client
|
|
|
+ ) {
|
|
|
+ this.interval = interval;
|
|
|
+ this.executor = executor;
|
|
|
+ this.scheduler = scheduler;
|
|
|
+ this.lastReportedDiskHealthInfo = lastReportedDiskHealthInfo;
|
|
|
+ this.lastSeenHealthNode = lastSeenHealthNode;
|
|
|
+ this.clusterService = clusterService;
|
|
|
+ this.diskCheck = diskCheck;
|
|
|
+ this.client = client;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Creates a monitoring instance and starts the schedules the first run.
|
|
|
+ */
|
|
|
+ static Monitoring start(
|
|
|
+ TimeValue interval,
|
|
|
+ Scheduler scheduler,
|
|
|
+ AtomicReference<DiskHealthInfo> lastReportedDiskHealthInfo,
|
|
|
+ AtomicReference<String> lastSeenHealthNode,
|
|
|
+ DiskCheck diskCheck,
|
|
|
+ ClusterService clusterService,
|
|
|
+ Client client
|
|
|
+ ) {
|
|
|
+ Monitoring monitoring = new Monitoring(
|
|
|
+ interval,
|
|
|
+ scheduler,
|
|
|
+ ThreadPool.Names.MANAGEMENT,
|
|
|
+ lastReportedDiskHealthInfo,
|
|
|
+ lastSeenHealthNode,
|
|
|
+ diskCheck,
|
|
|
+ clusterService,
|
|
|
+ client
|
|
|
+ );
|
|
|
+ monitoring.scheduledRun = scheduler.schedule(monitoring, TimeValue.ZERO, monitoring.executor);
|
|
|
+ return monitoring;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Attempts to cancel monitoring. This method has no effect if
|
|
|
+ * the monitoring is already cancelled. If the {@code scheduledRun}
|
|
|
+ * has not started when {@code cancel} is called, this run should
|
|
|
+ * never run. If the {@code scheduledRun} is already running, then
|
|
|
+ * it will not be interrupted but the next run will not be scheduled.
|
|
|
+ *
|
|
|
+ * @return false, if the {@code scheduledRun} was already cancelled; true
|
|
|
+ * otherwise.
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ public boolean cancel() {
|
|
|
+ if (cancelled) {
|
|
|
+ // already cancelled
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ cancelled = true;
|
|
|
+ scheduledRun.cancel();
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public boolean isCancelled() {
|
|
|
+ return cancelled;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * This method evaluates the health info of this node and if there is a change it sends an update request to the health node.
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ public void run() {
|
|
|
+ if (cancelled) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ boolean nextRunScheduled = false;
|
|
|
+ Runnable scheduleNextRun = new RunOnce(this::scheduleNextRunIfNecessary);
|
|
|
+ try {
|
|
|
+ ClusterState clusterState = clusterService.state();
|
|
|
+ HealthMetadata healthMetadata = HealthMetadata.getFromClusterState(clusterState);
|
|
|
+ if (healthMetadata != null) {
|
|
|
+ DiskHealthInfo previousHealth = this.lastReportedDiskHealthInfo.get();
|
|
|
+ DiskHealthInfo currentHealth = diskCheck.getHealth(healthMetadata, clusterState);
|
|
|
+ if (currentHealth.equals(previousHealth) == false) {
|
|
|
+ String nodeId = clusterService.localNode().getId();
|
|
|
+ String healthNodeId = lastSeenHealthNode.get();
|
|
|
+ ActionListener<AcknowledgedResponse> listener = ActionListener.wrap(response -> {
|
|
|
+ // Update the last reported value only if the health node hasn't changed.
|
|
|
+ if (Objects.equals(healthNodeId, lastSeenHealthNode.get())
|
|
|
+ && lastReportedDiskHealthInfo.compareAndSet(previousHealth, currentHealth)) {
|
|
|
+ logger.debug(
|
|
|
+ "Health info [{}] successfully sent, last reported value: {}.",
|
|
|
+ currentHealth,
|
|
|
+ lastReportedDiskHealthInfo.get()
|
|
|
+ );
|
|
|
+ }
|
|
|
+ }, e -> {
|
|
|
+ if (e.getCause() instanceof NodeNotConnectedException
|
|
|
+ || e.getCause() instanceof HealthNodeNotDiscoveredException) {
|
|
|
+ logger.debug("Failed to connect to the health node [{}], will try again.", e.getCause().getMessage());
|
|
|
+ } else {
|
|
|
+ logger.debug(
|
|
|
+ () -> format("Failed to send health info [%s] to health node, will try again.", currentHealth),
|
|
|
+ e
|
|
|
+ );
|
|
|
+ }
|
|
|
+ });
|
|
|
+ client.execute(
|
|
|
+ UpdateHealthInfoCacheAction.INSTANCE,
|
|
|
+ new UpdateHealthInfoCacheAction.Request(nodeId, currentHealth),
|
|
|
+ ActionListener.runAfter(listener, scheduleNextRun)
|
|
|
+ );
|
|
|
+ nextRunScheduled = true;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ logger.warn(() -> format("Failed to run scheduled health monitoring on thread pool [%s]", executor), e);
|
|
|
+ } finally {
|
|
|
+ // If the next run isn't scheduled because for example the health info hasn't changed, we schedule it here.
|
|
|
+ if (nextRunScheduled == false) {
|
|
|
+ scheduleNextRun.run();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private void scheduleNextRunIfNecessary() {
|
|
|
+ if (cancelled) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ scheduledRun = scheduler.schedule(this, interval, executor);
|
|
|
+ } catch (final EsRejectedExecutionException e) {
|
|
|
+ logger.debug(() -> format("Scheduled health monitoring was rejected on thread pool [%s]", executor), e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public String toString() {
|
|
|
+ return "Monitoring{interval=" + interval + ", cancelled=" + cancelled + "}";
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -173,7 +420,7 @@ public class LocalHealthMonitor implements ClusterStateListener {
|
|
|
if (node.isDedicatedFrozenNode()) {
|
|
|
long frozenFloodStageThreshold = diskMetadata.getFreeBytesFrozenFloodStageWatermark(totalBytes).getBytes();
|
|
|
if (usage.getFreeBytes() < frozenFloodStageThreshold) {
|
|
|
- logger.debug("flood stage disk watermark [{}] exceeded on {}", frozenFloodStageThreshold, usage);
|
|
|
+ logger.debug("Flood stage disk watermark [{}] exceeded on {}", frozenFloodStageThreshold, usage);
|
|
|
return new DiskHealthInfo(HealthStatus.RED, DiskHealthInfo.Cause.FROZEN_NODE_OVER_FLOOD_STAGE_THRESHOLD);
|
|
|
}
|
|
|
return new DiskHealthInfo(HealthStatus.GREEN);
|
|
@@ -181,11 +428,13 @@ public class LocalHealthMonitor implements ClusterStateListener {
|
|
|
|
|
|
long floodStageThreshold = diskMetadata.getFreeBytesFloodStageWatermark(totalBytes).getBytes();
|
|
|
if (usage.getFreeBytes() < floodStageThreshold) {
|
|
|
+ logger.debug("Flood stage disk watermark [{}] exceeded on {}", floodStageThreshold, usage);
|
|
|
return new DiskHealthInfo(HealthStatus.RED, DiskHealthInfo.Cause.NODE_OVER_THE_FLOOD_STAGE_THRESHOLD);
|
|
|
}
|
|
|
|
|
|
long highThreshold = diskMetadata.getFreeBytesHighWatermark(totalBytes).getBytes();
|
|
|
if (usage.getFreeBytes() < highThreshold && hasRelocatingShards(clusterState, node.getId()) == false) {
|
|
|
+ logger.debug("High disk watermark [{}] exceeded on {}", highThreshold, usage);
|
|
|
return new DiskHealthInfo(HealthStatus.YELLOW, DiskHealthInfo.Cause.NODE_OVER_HIGH_THRESHOLD);
|
|
|
}
|
|
|
return new DiskHealthInfo(HealthStatus.GREEN);
|