Browse Source

[ILM] Fix race condition in test (#35143)

Previously, testRunStateChangePolicyWithNextStep asserted that the
ClusterState before and after running the steps were equal. The test
only passed due to a race condition: The latch would be triggered by the
step execution, but the cluster state update thread would continue
running before committing the change to the cluster state. This allowed
the test to read the old cluster state and pass the equality check about
99.99% of the time.

The test now waits for the new cluster state to be committed before
checking that it is _not_ equal to the old cluster state.
Gordon Brown 7 years ago
parent
commit
b3da3eae08

+ 13 - 5
x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/indexlifecycle/IndexLifecycleRunnerTests.java

@@ -255,18 +255,26 @@ public class IndexLifecycleRunnerTests extends ESTestCase {
                 .localNodeId(node.getId()))
             .build();
         ClusterServiceUtils.setState(clusterService, state);
-        IndexLifecycleRunner runner = new IndexLifecycleRunner(stepRegistry, clusterService, () -> 0L);
+        long stepTime = randomLong();
+        IndexLifecycleRunner runner = new IndexLifecycleRunner(stepRegistry, clusterService, () -> stepTime);
 
         ClusterState before = clusterService.state();
         CountDownLatch latch = new CountDownLatch(1);
-        step.setLatch(latch);
+        nextStep.setLatch(latch);
         runner.runPolicyAfterStateChange(policyName, indexMetaData);
 
-        latch.await(5, TimeUnit.SECONDS);
-        ClusterState after = clusterService.state();
+        assertTrue(latch.await(5, TimeUnit.SECONDS));
 
-        assertEquals(before, after);
+        // The cluster state can take a few extra milliseconds to update after the steps are executed
+        assertBusy(() -> assertNotEquals(before, clusterService.state()));
+        LifecycleExecutionState newExecutionState = LifecycleExecutionState
+            .fromIndexMetadata(clusterService.state().metaData().index(indexMetaData.getIndex()));
+        assertThat(newExecutionState.getPhase(), equalTo("phase"));
+        assertThat(newExecutionState.getAction(), equalTo("action"));
+        assertThat(newExecutionState.getStep(), equalTo("next_cluster_state_action_step"));
+        assertThat(newExecutionState.getStepTime(), equalTo(stepTime));
         assertThat(step.getExecuteCount(), equalTo(1L));
+        assertThat(nextStep.getExecuteCount(), equalTo(1L));
         clusterService.close();
         threadPool.shutdownNow();
     }