瀏覽代碼

ILM/SLM health indicator services (#83440)

Adding implementations that will track health of ilm and slm services
based on corresponding services operation status and number of policies.
Ievgen Degtiarenko 3 年之前
父節點
當前提交
a170b936c0

+ 5 - 0
docs/changelog/83440.yaml

@@ -0,0 +1,5 @@
+pr: 83440
+summary: ILM/SLM health indicator services
+area: Health
+type: enhancement
+issues: []

+ 8 - 0
server/src/main/java/org/elasticsearch/health/HealthIndicatorService.java

@@ -13,5 +13,13 @@ package org.elasticsearch.health;
  */
 public interface HealthIndicatorService {
 
+    String name();
+
+    String component();
+
     HealthIndicatorResult calculate();
+
+    default HealthIndicatorResult createIndicator(HealthStatus status, String summary, HealthIndicatorDetails details) {
+        return new HealthIndicatorResult(name(), component(), status, summary, details);
+    }
 }

+ 14 - 0
server/src/main/java/org/elasticsearch/health/ServerHealthComponents.java

@@ -0,0 +1,14 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.health;
+
+public class ServerHealthComponents {
+    public static final String DATA = "data";
+    public static final String SNAPSHOT = "snapshot";
+}

+ 3 - 5
server/src/main/java/org/elasticsearch/health/SimpleHealthIndicatorDetails.java

@@ -14,12 +14,10 @@ import java.io.IOException;
 import java.util.Map;
 import java.util.Objects;
 
-public class SimpleHealthIndicatorDetails implements HealthIndicatorDetails {
+public record SimpleHealthIndicatorDetails(Map<String, Object> details) implements HealthIndicatorDetails {
 
-    private final Map<String, Object> details;
-
-    public SimpleHealthIndicatorDetails(Map<String, Object> details) {
-        this.details = Objects.requireNonNull(details);
+    public SimpleHealthIndicatorDetails {
+        Objects.requireNonNull(details);
     }
 
     @Override

+ 69 - 0
x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IlmHealthIndicatorService.java

@@ -0,0 +1,69 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.ilm;
+
+import org.elasticsearch.cluster.service.ClusterService;
+import org.elasticsearch.health.HealthIndicatorDetails;
+import org.elasticsearch.health.HealthIndicatorResult;
+import org.elasticsearch.health.HealthIndicatorService;
+import org.elasticsearch.health.SimpleHealthIndicatorDetails;
+import org.elasticsearch.xpack.core.ilm.IndexLifecycleMetadata;
+import org.elasticsearch.xpack.core.ilm.OperationMode;
+
+import java.util.Map;
+
+import static org.elasticsearch.health.HealthStatus.GREEN;
+import static org.elasticsearch.health.HealthStatus.YELLOW;
+import static org.elasticsearch.health.ServerHealthComponents.DATA;
+
+/**
+ * This indicator reports health for index lifecycle management component.
+ *
+ * Indicator will report YELLOW status when ILM is not running and there are configured policies.
+ * Constant indexing could eventually use entire disk space on hot topology in such cases.
+ *
+ * ILM must be running to fix warning reported by this indicator.
+ */
+public class IlmHealthIndicatorService implements HealthIndicatorService {
+
+    public static final String NAME = "ILM";
+
+    private final ClusterService clusterService;
+
+    public IlmHealthIndicatorService(ClusterService clusterService) {
+        this.clusterService = clusterService;
+    }
+
+    @Override
+    public String name() {
+        return NAME;
+    }
+
+    @Override
+    public String component() {
+        return DATA;
+    }
+
+    @Override
+    public HealthIndicatorResult calculate() {
+        var ilmMetadata = clusterService.state().metadata().custom(IndexLifecycleMetadata.TYPE, IndexLifecycleMetadata.EMPTY);
+        if (ilmMetadata.getPolicyMetadatas().isEmpty()) {
+            return createIndicator(GREEN, "No policies configured", createDetails(ilmMetadata));
+        } else if (ilmMetadata.getOperationMode() != OperationMode.RUNNING) {
+            return createIndicator(YELLOW, "ILM is not running", createDetails(ilmMetadata));
+        } else {
+            return createIndicator(GREEN, "ILM is running", createDetails(ilmMetadata));
+        }
+    }
+
+    private static HealthIndicatorDetails createDetails(IndexLifecycleMetadata metadata) {
+        return new SimpleHealthIndicatorDetails(
+            Map.of("ilm-status", metadata.getOperationMode(), "policies", metadata.getPolicies().size())
+        );
+    }
+}

+ 69 - 0
x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SlmHealthIndicatorService.java

@@ -0,0 +1,69 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.slm;
+
+import org.elasticsearch.cluster.service.ClusterService;
+import org.elasticsearch.health.HealthIndicatorDetails;
+import org.elasticsearch.health.HealthIndicatorResult;
+import org.elasticsearch.health.HealthIndicatorService;
+import org.elasticsearch.health.SimpleHealthIndicatorDetails;
+import org.elasticsearch.xpack.core.ilm.OperationMode;
+import org.elasticsearch.xpack.core.slm.SnapshotLifecycleMetadata;
+
+import java.util.Map;
+
+import static org.elasticsearch.health.HealthStatus.GREEN;
+import static org.elasticsearch.health.HealthStatus.YELLOW;
+import static org.elasticsearch.health.ServerHealthComponents.SNAPSHOT;
+
+/**
+ * This indicator reports health for snapshot lifecycle management component.
+ *
+ * Indicator will report YELLOW status when SLM is not running and there are configured policies.
+ * Data might not be backed up timely in such cases.
+ *
+ * SLM must be running to fix warning reported by this indicator.
+ */
+public class SlmHealthIndicatorService implements HealthIndicatorService {
+
+    public static final String NAME = "SLM";
+
+    private final ClusterService clusterService;
+
+    public SlmHealthIndicatorService(ClusterService clusterService) {
+        this.clusterService = clusterService;
+    }
+
+    @Override
+    public String name() {
+        return NAME;
+    }
+
+    @Override
+    public String component() {
+        return SNAPSHOT;
+    }
+
+    @Override
+    public HealthIndicatorResult calculate() {
+        var slmMetadata = clusterService.state().metadata().custom(SnapshotLifecycleMetadata.TYPE, SnapshotLifecycleMetadata.EMPTY);
+        if (slmMetadata.getSnapshotConfigurations().isEmpty()) {
+            return createIndicator(GREEN, "No policies configured", createDetails(slmMetadata));
+        } else if (slmMetadata.getOperationMode() != OperationMode.RUNNING) {
+            return createIndicator(YELLOW, "SLM is not running", createDetails(slmMetadata));
+        } else {
+            return createIndicator(GREEN, "SLM is running", createDetails(slmMetadata));
+        }
+    }
+
+    private static HealthIndicatorDetails createDetails(SnapshotLifecycleMetadata metadata) {
+        return new SimpleHealthIndicatorDetails(
+            Map.of("slm-status", metadata.getOperationMode(), "policies", metadata.getSnapshotConfigurations().size())
+        );
+    }
+}

+ 130 - 0
x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IlmHealthIndicatorServiceTests.java

@@ -0,0 +1,130 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.ilm;
+
+import org.elasticsearch.cluster.ClusterName;
+import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.metadata.Metadata;
+import org.elasticsearch.cluster.service.ClusterService;
+import org.elasticsearch.health.HealthIndicatorResult;
+import org.elasticsearch.health.SimpleHealthIndicatorDetails;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.xpack.core.ilm.IndexLifecycleMetadata;
+import org.elasticsearch.xpack.core.ilm.LifecyclePolicy;
+import org.elasticsearch.xpack.core.ilm.LifecyclePolicyMetadata;
+
+import java.util.Map;
+
+import static org.elasticsearch.health.HealthStatus.GREEN;
+import static org.elasticsearch.health.HealthStatus.YELLOW;
+import static org.elasticsearch.health.ServerHealthComponents.DATA;
+import static org.elasticsearch.xpack.core.ilm.OperationMode.RUNNING;
+import static org.elasticsearch.xpack.core.ilm.OperationMode.STOPPED;
+import static org.elasticsearch.xpack.core.ilm.OperationMode.STOPPING;
+import static org.elasticsearch.xpack.ilm.IlmHealthIndicatorService.NAME;
+import static org.hamcrest.Matchers.equalTo;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class IlmHealthIndicatorServiceTests extends ESTestCase {
+
+    public void testIsGreenWhenRunningAndPoliciesConfigured() {
+        var clusterState = createClusterStateWith(new IndexLifecycleMetadata(createIlmPolicy(), RUNNING));
+        var service = createIlmHealthIndicatorService(clusterState);
+
+        assertThat(
+            service.calculate(),
+            equalTo(
+                new HealthIndicatorResult(
+                    NAME,
+                    DATA,
+                    GREEN,
+                    "ILM is running",
+                    new SimpleHealthIndicatorDetails(Map.of("ilm-status", RUNNING, "policies", 1))
+                )
+            )
+        );
+    }
+
+    public void testIsYellowWhenNotRunningAndPoliciesConfigured() {
+        var status = randomFrom(STOPPED, STOPPING);
+        var clusterState = createClusterStateWith(new IndexLifecycleMetadata(createIlmPolicy(), status));
+        var service = createIlmHealthIndicatorService(clusterState);
+
+        assertThat(
+            service.calculate(),
+            equalTo(
+                new HealthIndicatorResult(
+                    NAME,
+                    DATA,
+                    YELLOW,
+                    "ILM is not running",
+                    new SimpleHealthIndicatorDetails(Map.of("ilm-status", status, "policies", 1))
+                )
+            )
+        );
+    }
+
+    public void testIsGreenWhenNotRunningAndNoPolicies() {
+        var status = randomFrom(STOPPED, STOPPING);
+        var clusterState = createClusterStateWith(new IndexLifecycleMetadata(Map.of(), status));
+        var service = createIlmHealthIndicatorService(clusterState);
+
+        assertThat(
+            service.calculate(),
+            equalTo(
+                new HealthIndicatorResult(
+                    NAME,
+                    DATA,
+                    GREEN,
+                    "No policies configured",
+                    new SimpleHealthIndicatorDetails(Map.of("ilm-status", status, "policies", 0))
+                )
+            )
+        );
+    }
+
+    public void testIsGreenWhenNoMetadata() {
+        var clusterState = createClusterStateWith(null);
+        var service = createIlmHealthIndicatorService(clusterState);
+
+        assertThat(
+            service.calculate(),
+            equalTo(
+                new HealthIndicatorResult(
+                    NAME,
+                    DATA,
+                    GREEN,
+                    "No policies configured",
+                    new SimpleHealthIndicatorDetails(Map.of("ilm-status", RUNNING, "policies", 0))
+                )
+            )
+        );
+    }
+
+    private static ClusterState createClusterStateWith(IndexLifecycleMetadata metadata) {
+        var builder = new ClusterState.Builder(new ClusterName("test-cluster"));
+        if (metadata != null) {
+            builder.metadata(new Metadata.Builder().putCustom(IndexLifecycleMetadata.TYPE, metadata));
+        }
+        return builder.build();
+    }
+
+    private static Map<String, LifecyclePolicyMetadata> createIlmPolicy() {
+        return Map.of(
+            "test-policy",
+            new LifecyclePolicyMetadata(new LifecyclePolicy("test-policy", Map.of()), Map.of(), 1L, System.currentTimeMillis())
+        );
+    }
+
+    private static IlmHealthIndicatorService createIlmHealthIndicatorService(ClusterState clusterState) {
+        var clusterService = mock(ClusterService.class);
+        when(clusterService.state()).thenReturn(clusterState);
+        return new IlmHealthIndicatorService(clusterService);
+    }
+}

+ 134 - 0
x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SlmHealthIndicatorServiceTests.java

@@ -0,0 +1,134 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.slm;
+
+import org.elasticsearch.cluster.ClusterName;
+import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.metadata.Metadata;
+import org.elasticsearch.cluster.service.ClusterService;
+import org.elasticsearch.health.HealthIndicatorResult;
+import org.elasticsearch.health.SimpleHealthIndicatorDetails;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.xpack.core.slm.SnapshotLifecycleMetadata;
+import org.elasticsearch.xpack.core.slm.SnapshotLifecyclePolicy;
+import org.elasticsearch.xpack.core.slm.SnapshotLifecyclePolicyMetadata;
+
+import java.util.Map;
+
+import static org.elasticsearch.health.HealthStatus.GREEN;
+import static org.elasticsearch.health.HealthStatus.YELLOW;
+import static org.elasticsearch.health.ServerHealthComponents.SNAPSHOT;
+import static org.elasticsearch.xpack.core.ilm.OperationMode.RUNNING;
+import static org.elasticsearch.xpack.core.ilm.OperationMode.STOPPED;
+import static org.elasticsearch.xpack.core.ilm.OperationMode.STOPPING;
+import static org.elasticsearch.xpack.slm.SlmHealthIndicatorService.NAME;
+import static org.hamcrest.Matchers.equalTo;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class SlmHealthIndicatorServiceTests extends ESTestCase {
+
+    public void testIsGreenWhenRunningAndPoliciesConfigured() {
+        var clusterState = createClusterStateWith(new SnapshotLifecycleMetadata(createSlmPolicy(), RUNNING, null));
+        var service = createSlmHealthIndicatorService(clusterState);
+
+        assertThat(
+            service.calculate(),
+            equalTo(
+                new HealthIndicatorResult(
+                    NAME,
+                    SNAPSHOT,
+                    GREEN,
+                    "SLM is running",
+                    new SimpleHealthIndicatorDetails(Map.of("slm-status", RUNNING, "policies", 1))
+                )
+            )
+        );
+    }
+
+    public void testIsYellowWhenNotRunningAndPoliciesConfigured() {
+        var status = randomFrom(STOPPED, STOPPING);
+        var clusterState = createClusterStateWith(new SnapshotLifecycleMetadata(createSlmPolicy(), status, null));
+        var service = createSlmHealthIndicatorService(clusterState);
+
+        assertThat(
+            service.calculate(),
+            equalTo(
+                new HealthIndicatorResult(
+                    NAME,
+                    SNAPSHOT,
+                    YELLOW,
+                    "SLM is not running",
+                    new SimpleHealthIndicatorDetails(Map.of("slm-status", status, "policies", 1))
+                )
+            )
+        );
+    }
+
+    public void testIsGreenWhenNotRunningAndNoPolicies() {
+        var status = randomFrom(STOPPED, STOPPING);
+        var clusterState = createClusterStateWith(new SnapshotLifecycleMetadata(Map.of(), status, null));
+        var service = createSlmHealthIndicatorService(clusterState);
+
+        assertThat(
+            service.calculate(),
+            equalTo(
+                new HealthIndicatorResult(
+                    NAME,
+                    SNAPSHOT,
+                    GREEN,
+                    "No policies configured",
+                    new SimpleHealthIndicatorDetails(Map.of("slm-status", status, "policies", 0))
+                )
+            )
+        );
+    }
+
+    public void testIsGreenWhenNoMetadata() {
+        var clusterState = createClusterStateWith(null);
+        var service = createSlmHealthIndicatorService(clusterState);
+
+        assertThat(
+            service.calculate(),
+            equalTo(
+                new HealthIndicatorResult(
+                    NAME,
+                    SNAPSHOT,
+                    GREEN,
+                    "No policies configured",
+                    new SimpleHealthIndicatorDetails(Map.of("slm-status", RUNNING, "policies", 0))
+                )
+            )
+        );
+    }
+
+    private static ClusterState createClusterStateWith(SnapshotLifecycleMetadata metadata) {
+        var builder = new ClusterState.Builder(new ClusterName("test-cluster"));
+        if (metadata != null) {
+            builder.metadata(new Metadata.Builder().putCustom(SnapshotLifecycleMetadata.TYPE, metadata));
+        }
+        return builder.build();
+    }
+
+    private static Map<String, SnapshotLifecyclePolicyMetadata> createSlmPolicy() {
+        return Map.of(
+            "test-policy",
+            SnapshotLifecyclePolicyMetadata.builder()
+                .setPolicy(new SnapshotLifecyclePolicy("id", "test-policy", "", "test-repository", null, null))
+                .setVersion(1L)
+                .setModifiedDate(System.currentTimeMillis())
+                .build()
+        );
+    }
+
+    private static SlmHealthIndicatorService createSlmHealthIndicatorService(ClusterState clusterState) {
+        var clusterService = mock(ClusterService.class);
+        when(clusterService.state()).thenReturn(clusterState);
+        return new SlmHealthIndicatorService(clusterService);
+    }
+}