浏览代码

Retry on RepositoryException in SLM tests (#48548)

Due to a bug, GETing a snapshot can cause a RespositoryException to be
thrown. This error is transient and should be retried, rather than
causing the test to fail. This commit converts those
RepositoryExceptions into AssertionErrors so that they will be retried
in code wrapped in assertBusy.
Gordon Brown 6 年之前
父节点
当前提交
5f72407ce7

+ 2 - 1
client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/ILMDocumentationIT.java

@@ -765,7 +765,6 @@ public class ILMDocumentationIT extends ESRestHighLevelClientTestCase {
         assertTrue(latch.await(30L, TimeUnit.SECONDS));
         assertTrue(latch.await(30L, TimeUnit.SECONDS));
     }
     }
 
 
-    @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/46021")
     public void testAddSnapshotLifecyclePolicy() throws Exception {
     public void testAddSnapshotLifecyclePolicy() throws Exception {
         RestHighLevelClient client = highLevelClient();
         RestHighLevelClient client = highLevelClient();
 
 
@@ -1057,6 +1056,8 @@ public class ILMDocumentationIT extends ESRestHighLevelClientTestCase {
             } catch (Exception e) {
             } catch (Exception e) {
                 if (e.getMessage().contains("snapshot_missing_exception")) {
                 if (e.getMessage().contains("snapshot_missing_exception")) {
                     fail("snapshot does not exist: " + snapshotName);
                     fail("snapshot does not exist: " + snapshotName);
+                } else if (e.getMessage().contains("repository_exception")) {
+                    fail("got a respository_exception, retrying. original message: " + e.getMessage());
                 }
                 }
                 throw e;
                 throw e;
             }
             }

+ 14 - 10
x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SLMSnapshotBlockingIntegTests.java

@@ -6,7 +6,6 @@
 
 
 package org.elasticsearch.xpack.slm;
 package org.elasticsearch.xpack.slm;
 
 
-import org.apache.lucene.util.LuceneTestCase;
 import org.elasticsearch.action.ActionFuture;
 import org.elasticsearch.action.ActionFuture;
 import org.elasticsearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse;
 import org.elasticsearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse;
 import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus;
 import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus;
@@ -124,7 +123,6 @@ public class SLMSnapshotBlockingIntegTests extends ESIntegTestCase {
         }
         }
     }
     }
 
 
-    @LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/48441")
     public void testRetentionWhileSnapshotInProgress() throws Exception {
     public void testRetentionWhileSnapshotInProgress() throws Exception {
         final String indexName = "test";
         final String indexName = "test";
         final String policyId = "slm-policy";
         final String policyId = "slm-policy";
@@ -144,8 +142,7 @@ public class SLMSnapshotBlockingIntegTests extends ESIntegTestCase {
         logger.info("--> kicked off snapshot {}", completedSnapshotName);
         logger.info("--> kicked off snapshot {}", completedSnapshotName);
         assertBusy(() -> {
         assertBusy(() -> {
             try {
             try {
-                SnapshotsStatusResponse s =
-                    client().admin().cluster().prepareSnapshotStatus(REPO).setSnapshots(completedSnapshotName).get();
+                SnapshotsStatusResponse s = getSnapshotStatus(completedSnapshotName);
                 assertThat("expected a snapshot but none were returned", s.getSnapshots().size(), equalTo(1));
                 assertThat("expected a snapshot but none were returned", s.getSnapshots().size(), equalTo(1));
                 SnapshotStatus status = s.getSnapshots().get(0);
                 SnapshotStatus status = s.getSnapshots().get(0);
                 logger.info("--> waiting for snapshot {} to be completed, got: {}", completedSnapshotName, status.getState());
                 logger.info("--> waiting for snapshot {} to be completed, got: {}", completedSnapshotName, status.getState());
@@ -213,13 +210,8 @@ public class SLMSnapshotBlockingIntegTests extends ESIntegTestCase {
                 client().admin().cluster().prepareReroute().get();
                 client().admin().cluster().prepareReroute().get();
                 logger.info("--> waiting for snapshot to be deleted");
                 logger.info("--> waiting for snapshot to be deleted");
                 try {
                 try {
-                    SnapshotsStatusResponse s =
-                        client().admin().cluster().prepareSnapshotStatus(REPO).setSnapshots(completedSnapshotName).get();
+                    SnapshotsStatusResponse s = getSnapshotStatus(completedSnapshotName);
                     assertNull("expected no snapshot but one was returned", s.getSnapshots().get(0));
                     assertNull("expected no snapshot but one was returned", s.getSnapshots().get(0));
-                } catch (RepositoryException e) {
-                    // Concurrent status calls and write operations may lead to failures in determining the current repository generation
-                    // TODO: Remove this hack once tracking the current repository generation has been made consistent
-                    throw new AssertionError(e);
                 } catch (SnapshotMissingException e) {
                 } catch (SnapshotMissingException e) {
                     // Great, we wanted it to be deleted!
                     // Great, we wanted it to be deleted!
                 }
                 }
@@ -383,6 +375,18 @@ public class SLMSnapshotBlockingIntegTests extends ESIntegTestCase {
         }
         }
     }
     }
 
 
+    private SnapshotsStatusResponse getSnapshotStatus(String snapshotName) {
+        try {
+            return client().admin().cluster().prepareSnapshotStatus(REPO).setSnapshots(snapshotName).get();
+        } catch (RepositoryException e) {
+            // Convert this to an AssertionError so that it can be retried in an assertBusy - this is often a transient error because
+            // concurrent status calls and write operations may lead to failures in determining the current repository generation
+            // TODO: Remove this hack once tracking the current repository generation has been made consistent
+            logger.warn(e);
+            throw new AssertionError(e);
+        }
+    }
+
     private void createAndPopulateIndex(String indexName) throws InterruptedException {
     private void createAndPopulateIndex(String indexName) throws InterruptedException {
         logger.info("--> creating and populating index [{}]", indexName);
         logger.info("--> creating and populating index [{}]", indexName);
         assertAcked(prepareCreate(indexName, 0, Settings.builder()
         assertAcked(prepareCreate(indexName, 0, Settings.builder()