瀏覽代碼

Allow ILM to transition to implicit cached steps (#91779)

ILM tries to honour the cached phase however, some steps are implicit
(e.g. injected actions or the terminal policy/phase step)
Currently, ILM would throw an exception if the currently cached phase was removed:
```
step [{"phase":"warm","action":"complete","name":"complete"}] for index [index]
with policy [my-policy] does not exist
```
ILM would currently also throw an exception if the next step was an
implicit one and the phase was removed from the underlying policy e.g. if the
index is on the `migrate` step and looking to transition to the
`check-migration` step, whilt the `warm` phase doesn't exist in the
policy anymore
```
step [{"phase":"warm","action":"migrate","name":"check-migration"}] for index
[index] with policy [my-policy] does not exist
```

This fixes these scenarios by enhancing the
`PolicyStepsRegistry#parseStepKeysFromPhase` method to compute all the
steps in the phase (including all implicit steps)
Andrei Dan 2 年之前
父節點
當前提交
dc726e60a9

+ 6 - 0
docs/changelog/91779.yaml

@@ -0,0 +1,6 @@
+pr: 91779
+summary: Allow ILM to transition to implicit cached steps
+area: ILM+SLM
+type: bug
+issues:
+ - 91749

+ 1 - 1
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/PhaseCacheManagement.java

@@ -263,7 +263,7 @@ public final class PhaseCacheManagement {
      * information, returns null.
      */
     @Nullable
-    public static Set<Step.StepKey> readStepKeys(
+    static Set<Step.StepKey> readStepKeys(
         final NamedXContentRegistry xContentRegistry,
         final Client client,
         final String phaseDef,

+ 3 - 2
x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java

@@ -85,8 +85,9 @@ public final class IndexLifecycleTransition {
         }
 
         final Set<Step.StepKey> cachedStepKeys = stepRegistry.parseStepKeysFromPhase(
-            lifecycleState.phaseDefinition(),
-            lifecycleState.phase()
+            policyName,
+            lifecycleState.phase(),
+            lifecycleState.phaseDefinition()
         );
         boolean isNewStepCached = cachedStepKeys != null && cachedStepKeys.contains(newStepKey);
 

+ 46 - 6
x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/PolicyStepsRegistry.java

@@ -33,7 +33,6 @@ import org.elasticsearch.xpack.core.ilm.InitializePolicyContextStep;
 import org.elasticsearch.xpack.core.ilm.LifecyclePolicy;
 import org.elasticsearch.xpack.core.ilm.LifecyclePolicyMetadata;
 import org.elasticsearch.xpack.core.ilm.Phase;
-import org.elasticsearch.xpack.core.ilm.PhaseCacheManagement;
 import org.elasticsearch.xpack.core.ilm.PhaseExecutionInfo;
 import org.elasticsearch.xpack.core.ilm.Step;
 import org.elasticsearch.xpack.core.ilm.TerminalPolicyStep;
@@ -42,12 +41,14 @@ import java.io.IOException;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
-import java.util.Optional;
+import java.util.Objects;
 import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.stream.Collectors;
 
 public class PolicyStepsRegistry {
     private static final Logger logger = LogManager.getLogger(PolicyStepsRegistry.class);
@@ -245,13 +246,50 @@ public class PolicyStepsRegistry {
 
     /*
      * Parses the step keys from the {@code phaseDef} for the given phase.
+     * ILM makes use of some implicit steps that belong to actions that we automatically inject
+     * (eg. unfollow and migrate) or special purpose steps like the phase `complete` step.
+     *
+     * The {@code phaseDef} is *mostly* a valid json we store in the lifecycle execution state. However,
+     * we have a few of exceptional cases:
+     * - null is treated as the `new` phase (see {@code InitializePolicyContextStep})
+     * - the `new` phase is not stored as json but ... "new"
+     * - there's a legacy step, the {@code TerminalPolicyStep} which is also not stored as json but as "completed"
+     * (note: this step exists only for BWC reasons as these days we move to the {@code PhaseCompleteStep} when reaching
+     * the end of the phase)
+     *
+     * This method returns **all** the steps that are part of the phase definition including the implicit steps.
+     *
      * Returns null if there's a parsing error.
      */
     @Nullable
-    public Set<Step.StepKey> parseStepKeysFromPhase(String phaseDef, String currentPhase) {
-        return PhaseCacheManagement.readStepKeys(xContentRegistry, client, phaseDef, currentPhase, licenseState);
+    public Set<Step.StepKey> parseStepKeysFromPhase(String policy, String currentPhase, String phaseDef) {
+        try {
+            String phaseDefNonNull = Objects.requireNonNullElse(phaseDef, InitializePolicyContextStep.INITIALIZATION_PHASE);
+            return parseStepsFromPhase(policy, currentPhase, phaseDefNonNull).stream().map(Step::getKey).collect(Collectors.toSet());
+        } catch (IOException e) {
+            logger.trace(
+                () -> String.format(
+                    Locale.ROOT,
+                    "unable to parse steps for policy [{}], phase [{}], and phase definition [{}]",
+                    policy,
+                    currentPhase,
+                    phaseDef
+                ),
+                e
+            );
+            return null;
+        }
     }
 
+    /**
+     * The {@code phaseDef} is *mostly* a valid json we store in the lifecycle execution state. However,
+     * we have a few of exceptional cases:
+     * - null is treated as the `new` phase (see {@code InitializePolicyContextStep})
+     * - the `new` phase is not stored as json but ... "new"
+     * - there's a legacy step, the {@code TerminalPolicyStep} which is also not stored as json but as "completed"
+     * (note: this step exists only for BWC reasons as these days we move to the {@code PhaseCompleteStep} when reaching
+     * the end of the phase)
+     */
     private List<Step> parseStepsFromPhase(String policy, String currentPhase, String phaseDef) throws IOException {
         final PhaseExecutionInfo phaseExecutionInfo;
         LifecyclePolicyMetadata policyMetadata = lifecyclePolicyMap.get(policy);
@@ -341,8 +379,10 @@ public class PolicyStepsRegistry {
         }
 
         // parse phase steps from the phase definition in the index settings
-        final String phaseJson = Optional.ofNullable(indexMetadata.getLifecycleExecutionState().phaseDefinition())
-            .orElse(InitializePolicyContextStep.INITIALIZATION_PHASE);
+        final String phaseJson = Objects.requireNonNullElse(
+            indexMetadata.getLifecycleExecutionState().phaseDefinition(),
+            InitializePolicyContextStep.INITIALIZATION_PHASE
+        );
 
         final List<Step> phaseSteps;
         try {

+ 60 - 0
x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransitionTests.java

@@ -684,6 +684,66 @@ public class IndexLifecycleTransitionTests extends ESTestCase {
         }
     }
 
+    public void testValidateTransitionToInjectedMissingStep() {
+        // we'll test the case when the warm phase was deleted and the next step is an injected one
+
+        LifecycleExecutionState.Builder executionState = LifecycleExecutionState.builder()
+            .setPhase("warm")
+            .setAction("migrate")
+            .setStep("migrate")
+            .setPhaseDefinition("""
+                {
+                  "policy" : "my-policy",
+                  "phase_definition" : {
+                    "min_age" : "20m",
+                    "actions" : {
+                      "set_priority" : {
+                        "priority" : 150
+                      }
+                    }
+                  },
+                  "version" : 1,
+                  "modified_date_in_millis" : 1578521007076
+                }""");
+
+        IndexMetadata meta = buildIndexMetadata("my-policy", executionState);
+
+        try (Client client = new NoOpClient(getTestName())) {
+            Step.StepKey currentStepKey = new Step.StepKey("warm", MigrateAction.NAME, MigrateAction.NAME);
+            Step.StepKey nextStepKey = new Step.StepKey("warm", MigrateAction.NAME, DataTierMigrationRoutedStep.NAME);
+
+            Step.StepKey waitForRolloverStepKey = new Step.StepKey("hot", RolloverAction.NAME, WaitForRolloverReadyStep.NAME);
+            Step.StepKey rolloverStepKey = new Step.StepKey("hot", RolloverAction.NAME, RolloverStep.NAME);
+            Step waitForRolloverReadyStep = new WaitForRolloverReadyStep(
+                waitForRolloverStepKey,
+                rolloverStepKey,
+                client,
+                null,
+                null,
+                null,
+                1L,
+                null,
+                null,
+                null,
+                null,
+                null,
+                null
+            );
+
+            try {
+                IndexLifecycleTransition.validateTransition(
+                    meta,
+                    currentStepKey,
+                    nextStepKey,
+                    createOneStepPolicyStepRegistry("my-policy", waitForRolloverReadyStep)
+                );
+            } catch (Exception e) {
+                logger.error(e.getMessage(), e);
+                fail("validateTransition should not throw exception on valid transitions");
+            }
+        }
+    }
+
     public void testMoveClusterStateToFailedStep() {
         String indexName = "my_index";
         String policyName = "my_policy";

+ 52 - 4
x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/MoveToNextStepUpdateTaskTests.java

@@ -8,6 +8,7 @@ package org.elasticsearch.xpack.ilm;
 
 import org.apache.lucene.util.SetOnce;
 import org.elasticsearch.Version;
+import org.elasticsearch.client.internal.Client;
 import org.elasticsearch.cluster.ClusterName;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
@@ -27,20 +28,34 @@ import org.elasticsearch.xpack.core.ilm.Step;
 import org.elasticsearch.xpack.core.ilm.Step.StepKey;
 import org.junit.Before;
 
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.Locale;
+import java.util.Map;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import static org.elasticsearch.cluster.metadata.LifecycleExecutionState.ILM_CUSTOM_METADATA_KEY;
 import static org.hamcrest.Matchers.equalTo;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
 
 public class MoveToNextStepUpdateTaskTests extends ESTestCase {
 
+    private static final NamedXContentRegistry REGISTRY;
+
     String policy;
     ClusterState clusterState;
     Index index;
     LifecyclePolicy lifecyclePolicy;
 
+    static {
+        try (IndexLifecycle indexLifecycle = new IndexLifecycle(Settings.EMPTY)) {
+            List<NamedXContentRegistry.Entry> entries = new ArrayList<>(indexLifecycle.getNamedXContent());
+            REGISTRY = new NamedXContentRegistry(entries);
+        }
+    }
+
     @Before
     public void setupClusterState() {
         policy = randomAlphaOfLength(10);
@@ -75,13 +90,22 @@ public class MoveToNextStepUpdateTaskTests extends ESTestCase {
         setStateToKey(currentStepKey, now);
 
         AtomicBoolean changed = new AtomicBoolean(false);
+        Client client = mock(Client.class);
+        when(client.settings()).thenReturn(Settings.EMPTY);
+        AlwaysExistingStepRegistry stepRegistry = new AlwaysExistingStepRegistry(client);
+        stepRegistry.update(
+            new IndexLifecycleMetadata(
+                Map.of(policy, new LifecyclePolicyMetadata(lifecyclePolicy, Collections.emptyMap(), 2L, 2L)),
+                OperationMode.RUNNING
+            )
+        );
         MoveToNextStepUpdateTask task = new MoveToNextStepUpdateTask(
             index,
             policy,
             currentStepKey,
             nextStepKey,
             () -> now,
-            new AlwaysExistingStepRegistry(),
+            stepRegistry,
             state -> changed.set(true)
         );
         ClusterState newState = task.execute(clusterState);
@@ -140,13 +164,22 @@ public class MoveToNextStepUpdateTaskTests extends ESTestCase {
         setStateToKey(currentStepKey, now);
 
         SetOnce<Boolean> changed = new SetOnce<>();
+        Client client = mock(Client.class);
+        when(client.settings()).thenReturn(Settings.EMPTY);
+        AlwaysExistingStepRegistry stepRegistry = new AlwaysExistingStepRegistry(client);
+        stepRegistry.update(
+            new IndexLifecycleMetadata(
+                Map.of(policy, new LifecyclePolicyMetadata(lifecyclePolicy, Collections.emptyMap(), 2L, 2L)),
+                OperationMode.RUNNING
+            )
+        );
         MoveToNextStepUpdateTask task = new MoveToNextStepUpdateTask(
             index,
             policy,
             currentStepKey,
             invalidNextStep,
             () -> now,
-            new AlwaysExistingStepRegistry(),
+            stepRegistry,
             s -> changed.set(true)
         );
         ClusterState newState = task.execute(clusterState);
@@ -186,7 +219,11 @@ public class MoveToNextStepUpdateTaskTests extends ESTestCase {
     private static class AlwaysExistingStepRegistry extends PolicyStepsRegistry {
 
         AlwaysExistingStepRegistry() {
-            super(new NamedXContentRegistry(Collections.emptyList()), null, null);
+            this(null);
+        }
+
+        AlwaysExistingStepRegistry(Client client) {
+            super(REGISTRY, client, null);
         }
 
         @Override
@@ -215,7 +252,18 @@ public class MoveToNextStepUpdateTaskTests extends ESTestCase {
         lifecycleState.setActionTime(now);
         lifecycleState.setStep(stepKey.name());
         lifecycleState.setStepTime(now);
-        lifecycleState.setPhaseDefinition("{\"actions\":{\"TEST_ACTION\":{}}}");
+
+        lifecycleState.setPhaseDefinition(String.format(Locale.ROOT, """
+            {
+              "policy" : "%s",
+              "phase_definition" : {
+                "min_age" : "20m",
+                "actions" : {
+                }
+              },
+              "version" : 1,
+              "modified_date_in_millis" : 1578521007076
+            }""", policy));
         clusterState = ClusterState.builder(clusterState)
             .metadata(
                 Metadata.builder(clusterState.getMetadata())