Forráskód Böngészése

[Transform] Don't fail a transform due to ILM closing an index (#90396)

Transform can fail due to a ClusterBlockException that reports to be non-retryable. This is a special kind of race condition where the initial checks pass, but meanwhile between the check and the action something changes. In the particular case a wildcard index pattern got resolved to concrete index names. One of the indices got closed (ILM) before transform run the search operation. Pragmatically we should handle a cluster block exception as retry-able error.

fixes #89802
Hendrik Muhs 3 éve
szülő
commit
09eafed22f

+ 6 - 0
docs/changelog/90396.yaml

@@ -0,0 +1,6 @@
+pr: 90396
+summary: Don't fail a transform on a ClusterBlockException, this may be due to ILM closing an index
+area: Transform
+type: bug
+issues:
+ - 89802

+ 4 - 0
x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/transforms/TransformFailureHandler.java

@@ -11,6 +11,7 @@ import org.apache.logging.log4j.Level;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.cluster.block.ClusterBlockException;
 import org.elasticsearch.common.breaker.CircuitBreakingException;
 import org.elasticsearch.script.ScriptException;
 import org.elasticsearch.xpack.core.transform.TransformMessages;
@@ -65,6 +66,9 @@ class TransformFailureHandler {
             handleScriptException(scriptException, unattended);
         } else if (unwrappedException instanceof BulkIndexingException bulkIndexingException) {
             handleBulkIndexingException(bulkIndexingException, unattended, getNumFailureRetries(settingsConfig));
+        } else if (unwrappedException instanceof ClusterBlockException clusterBlockException) {
+            // gh#89802 always retry for a cluster block exception, because a cluster block should be temporary.
+            retry(clusterBlockException, clusterBlockException.getDetailedMessage(), unattended, getNumFailureRetries(settingsConfig));
         } else if (unwrappedException instanceof ElasticsearchException elasticsearchException) {
             handleElasticsearchException(elasticsearchException, unattended, getNumFailureRetries(settingsConfig));
         } else if (unwrappedException instanceof IllegalArgumentException illegalArgumentException) {

+ 33 - 0
x-pack/plugin/transform/src/test/java/org/elasticsearch/xpack/transform/transforms/TransformFailureHandlerTests.java

@@ -11,6 +11,8 @@ import org.elasticsearch.ElasticsearchStatusException;
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.search.SearchPhaseExecutionException;
 import org.elasticsearch.action.search.ShardSearchFailure;
+import org.elasticsearch.cluster.block.ClusterBlockException;
+import org.elasticsearch.cluster.metadata.MetadataIndexStateService;
 import org.elasticsearch.common.breaker.CircuitBreaker;
 import org.elasticsearch.common.breaker.CircuitBreakingException;
 import org.elasticsearch.rest.RestStatus;
@@ -20,6 +22,9 @@ import org.elasticsearch.xpack.core.transform.transforms.SettingsConfig;
 import org.elasticsearch.xpack.core.transform.transforms.TransformTaskState;
 import org.elasticsearch.xpack.transform.notifications.MockTransformAuditor;
 
+import java.util.Map;
+import java.util.Set;
+
 import static java.util.Collections.singletonList;
 
 public class TransformFailureHandlerTests extends ESTestCase {
@@ -113,6 +118,34 @@ public class TransformFailureHandlerTests extends ESTestCase {
         assertNoFailure(handler, new NullPointerException("NPE"), contextListener, settings);
     }
 
+    public void testClusterBlock() {
+        String transformId = randomAlphaOfLength(10);
+        SettingsConfig settings = new SettingsConfig.Builder().setNumFailureRetries(2).build();
+
+        MockTransformAuditor auditor = MockTransformAuditor.createMockAuditor();
+        MockTransformContextListener contextListener = new MockTransformContextListener();
+        TransformContext context = new TransformContext(TransformTaskState.STARTED, "", 0, contextListener);
+        context.setPageSize(500);
+
+        TransformFailureHandler handler = new TransformFailureHandler(auditor, context, transformId);
+
+        final ClusterBlockException clusterBlock = new ClusterBlockException(
+            Map.of("test-index", Set.of(MetadataIndexStateService.INDEX_CLOSED_BLOCK))
+        );
+
+        handler.handleIndexerFailure(clusterBlock, settings);
+        assertFalse(contextListener.getFailed());
+        assertEquals(1, contextListener.getFailureCountChangedCounter());
+
+        handler.handleIndexerFailure(clusterBlock, settings);
+        assertFalse(contextListener.getFailed());
+        assertEquals(2, contextListener.getFailureCountChangedCounter());
+
+        handler.handleIndexerFailure(clusterBlock, settings);
+        assertTrue(contextListener.getFailed());
+        assertEquals(3, contextListener.getFailureCountChangedCounter());
+    }
+
     private void assertNoFailure(
         TransformFailureHandler handler,
         Exception e,