Browse Source

Retry when the server can't be resolved (#123852)

Nick Tindall 7 months ago
parent
commit
74d61a4052

+ 5 - 0
docs/changelog/123852.yaml

@@ -0,0 +1,5 @@
+pr: 123852
+summary: Retry when the server can't be resolved (Google Cloud Storage)
+area: Snapshot/Restore
+type: enhancement
+issues: []

+ 17 - 1
modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageService.java

@@ -14,6 +14,7 @@ import com.google.api.client.http.HttpRequestInitializer;
 import com.google.api.client.http.HttpTransport;
 import com.google.api.client.http.javanet.NetHttpTransport;
 import com.google.api.client.util.SecurityUtils;
+import com.google.api.gax.retrying.ResultRetryAlgorithm;
 import com.google.auth.oauth2.GoogleCredentials;
 import com.google.auth.oauth2.ServiceAccountCredentials;
 import com.google.cloud.ServiceOptions;
@@ -24,6 +25,7 @@ import com.google.cloud.storage.StorageRetryStrategy;
 
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
+import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.blobstore.OperationPurpose;
@@ -41,6 +43,7 @@ import java.net.HttpURLConnection;
 import java.net.Proxy;
 import java.net.URI;
 import java.net.URL;
+import java.net.UnknownHostException;
 import java.security.KeyStore;
 import java.util.Map;
 import java.util.stream.Collectors;
@@ -207,7 +210,7 @@ public class GoogleCloudStorageService {
         final HttpTransportOptions httpTransportOptions
     ) {
         final StorageOptions.Builder storageOptionsBuilder = StorageOptions.newBuilder()
-            .setStorageRetryStrategy(StorageRetryStrategy.getLegacyStorageRetryStrategy())
+            .setStorageRetryStrategy(getRetryStrategy())
             .setTransportOptions(httpTransportOptions)
             .setHeaderProvider(() -> {
                 return Strings.hasLength(gcsClientSettings.getApplicationName())
@@ -264,6 +267,19 @@ public class GoogleCloudStorageService {
         return storageOptionsBuilder.build();
     }
 
+    protected StorageRetryStrategy getRetryStrategy() {
+        return ShouldRetryDecorator.decorate(
+            StorageRetryStrategy.getLegacyStorageRetryStrategy(),
+            (Throwable prevThrowable, Object prevResponse, ResultRetryAlgorithm<Object> delegate) -> {
+                // Retry in the event of an unknown host exception
+                if (ExceptionsHelper.unwrap(prevThrowable, UnknownHostException.class) != null) {
+                    return true;
+                }
+                return delegate.shouldRetry(prevThrowable, prevResponse);
+            }
+        );
+    }
+
     /**
      * This method imitates what MetadataConfig.getProjectId() does, but does not have the endpoint hardcoded.
      */

+ 81 - 0
modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/ShouldRetryDecorator.java

@@ -0,0 +1,81 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.repositories.gcs;
+
+import com.google.api.gax.retrying.ResultRetryAlgorithm;
+import com.google.api.gax.retrying.TimedAttemptSettings;
+import com.google.cloud.storage.StorageRetryStrategy;
+
+import java.util.concurrent.CancellationException;
+
+public class ShouldRetryDecorator<T> implements StorageRetryStrategy {
+
+    private final ResultRetryAlgorithm<T> idempotentRetryAlgorithm;
+    private final ResultRetryAlgorithm<T> nonIdempotentRetryAlgorithm;
+
+    /**
+     * Decorate the should-retry logic for the specified {@link StorageRetryStrategy}
+     *
+     * @param delegate The underling storage retry strategy
+     * @param shouldRetryDecorator The decorated behaviour of {@link ResultRetryAlgorithm#shouldRetry(Throwable, Object)}
+     * @return A decorated {@link StorageRetryStrategy}
+     */
+    public static StorageRetryStrategy decorate(StorageRetryStrategy delegate, Decorator<?> shouldRetryDecorator) {
+        return new ShouldRetryDecorator<>(delegate, shouldRetryDecorator);
+    }
+
+    /**
+     * The logic to use for {@link ResultRetryAlgorithm#shouldRetry(Throwable, Object)}
+     */
+    public interface Decorator<R> {
+        boolean shouldRetry(Throwable prevThrowable, R prevResponse, ResultRetryAlgorithm<R> delegate);
+    }
+
+    /**
+     * @param delegate The delegate {@link StorageRetryStrategy}
+     * @param shouldRetryDecorator The function to call for shouldRetry for idempotent and non-idempotent requests
+     */
+    @SuppressWarnings("unchecked")
+    private ShouldRetryDecorator(StorageRetryStrategy delegate, Decorator<T> shouldRetryDecorator) {
+        this.idempotentRetryAlgorithm = new DelegatingResultRetryAlgorithm<>(
+            (ResultRetryAlgorithm<T>) delegate.getIdempotentHandler(),
+            shouldRetryDecorator
+        );
+        this.nonIdempotentRetryAlgorithm = new DelegatingResultRetryAlgorithm<>(
+            (ResultRetryAlgorithm<T>) delegate.getNonidempotentHandler(),
+            shouldRetryDecorator
+        );
+    }
+
+    @Override
+    public ResultRetryAlgorithm<?> getIdempotentHandler() {
+        return idempotentRetryAlgorithm;
+    }
+
+    @Override
+    public ResultRetryAlgorithm<?> getNonidempotentHandler() {
+        return nonIdempotentRetryAlgorithm;
+    }
+
+    private record DelegatingResultRetryAlgorithm<R>(ResultRetryAlgorithm<R> delegate, Decorator<R> shouldRetryDecorator)
+        implements
+            ResultRetryAlgorithm<R> {
+
+        @Override
+        public TimedAttemptSettings createNextAttempt(Throwable prevThrowable, R prevResponse, TimedAttemptSettings prevSettings) {
+            return delegate.createNextAttempt(prevThrowable, prevResponse, prevSettings);
+        }
+
+        @Override
+        public boolean shouldRetry(Throwable prevThrowable, R prevResponse) throws CancellationException {
+            return shouldRetryDecorator.shouldRetry(prevThrowable, prevResponse, delegate);
+        }
+    }
+}

+ 59 - 5
modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerRetriesTests.java

@@ -11,11 +11,13 @@ package org.elasticsearch.repositories.gcs;
 import fixture.gcs.FakeOAuth2HttpHandler;
 import fixture.gcs.GoogleCloudStorageHttpHandler;
 
+import com.google.api.client.http.HttpExecuteInterceptor;
+import com.google.api.client.http.HttpRequestInitializer;
 import com.google.api.gax.retrying.RetrySettings;
+import com.google.cloud.ServiceOptions;
 import com.google.cloud.http.HttpTransportOptions;
 import com.google.cloud.storage.StorageException;
 import com.google.cloud.storage.StorageOptions;
-import com.google.cloud.storage.StorageRetryStrategy;
 import com.sun.net.httpserver.HttpHandler;
 
 import org.apache.http.HttpStatus;
@@ -61,6 +63,7 @@ import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;
 import java.util.Queue;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -89,12 +92,19 @@ import static org.hamcrest.Matchers.notNullValue;
 @SuppressForbidden(reason = "use a http server")
 public class GoogleCloudStorageBlobContainerRetriesTests extends AbstractBlobContainerRetriesTestCase {
 
+    private final Map<String, AtomicInteger> requestCounters = new ConcurrentHashMap<>();
+    private String endpointUrlOverride;
+
     private String httpServerUrl() {
         assertThat(httpServer, notNullValue());
         InetSocketAddress address = httpServer.getAddress();
         return "http://" + InetAddresses.toUriString(address.getAddress()) + ":" + address.getPort();
     }
 
+    private String getEndpointUrl() {
+        return endpointUrlOverride != null ? endpointUrlOverride : httpServerUrl();
+    }
+
     @Override
     protected String downloadStorageEndpoint(BlobContainer container, String blob) {
         return "/download/storage/v1/b/bucket/o/" + container.path().buildAsString() + blob;
@@ -120,7 +130,7 @@ public class GoogleCloudStorageBlobContainerRetriesTests extends AbstractBlobCon
     ) {
         final Settings.Builder clientSettings = Settings.builder();
         final String client = randomAlphaOfLength(5).toLowerCase(Locale.ROOT);
-        clientSettings.put(ENDPOINT_SETTING.getConcreteSettingForNamespace(client).getKey(), httpServerUrl());
+        clientSettings.put(ENDPOINT_SETTING.getConcreteSettingForNamespace(client).getKey(), getEndpointUrl());
         clientSettings.put(TOKEN_URI_SETTING.getConcreteSettingForNamespace(client).getKey(), httpServerUrl() + "/token");
         if (readTimeout != null) {
             clientSettings.put(READ_TIMEOUT_SETTING.getConcreteSettingForNamespace(client).getKey(), readTimeout);
@@ -136,8 +146,33 @@ public class GoogleCloudStorageBlobContainerRetriesTests extends AbstractBlobCon
                 final GoogleCloudStorageClientSettings gcsClientSettings,
                 final HttpTransportOptions httpTransportOptions
             ) {
-                StorageOptions options = super.createStorageOptions(gcsClientSettings, httpTransportOptions);
-                RetrySettings.Builder retrySettingsBuilder = RetrySettings.newBuilder()
+                final HttpTransportOptions requestCountingHttpTransportOptions = new HttpTransportOptions(
+                    HttpTransportOptions.newBuilder()
+                        .setConnectTimeout(httpTransportOptions.getConnectTimeout())
+                        .setHttpTransportFactory(httpTransportOptions.getHttpTransportFactory())
+                        .setReadTimeout(httpTransportOptions.getReadTimeout())
+                ) {
+                    @Override
+                    public HttpRequestInitializer getHttpRequestInitializer(ServiceOptions<?, ?> serviceOptions) {
+                        // Add initializer/interceptor without interfering with any pre-existing ones
+                        HttpRequestInitializer httpRequestInitializer = super.getHttpRequestInitializer(serviceOptions);
+                        return request -> {
+                            if (httpRequestInitializer != null) {
+                                httpRequestInitializer.initialize(request);
+                            }
+                            HttpExecuteInterceptor interceptor = request.getInterceptor();
+                            request.setInterceptor(req -> {
+                                if (interceptor != null) {
+                                    interceptor.intercept(req);
+                                }
+                                requestCounters.computeIfAbsent(request.getUrl().getRawPath(), (url) -> new AtomicInteger())
+                                    .incrementAndGet();
+                            });
+                        };
+                    }
+                };
+                final StorageOptions options = super.createStorageOptions(gcsClientSettings, requestCountingHttpTransportOptions);
+                final RetrySettings.Builder retrySettingsBuilder = RetrySettings.newBuilder()
                     .setTotalTimeout(options.getRetrySettings().getTotalTimeout())
                     .setInitialRetryDelay(Duration.ofMillis(10L))
                     .setRetryDelayMultiplier(1.0d)
@@ -150,7 +185,7 @@ public class GoogleCloudStorageBlobContainerRetriesTests extends AbstractBlobCon
                     retrySettingsBuilder.setMaxAttempts(maxRetries + 1);
                 }
                 return options.toBuilder()
-                    .setStorageRetryStrategy(StorageRetryStrategy.getLegacyStorageRetryStrategy())
+                    .setStorageRetryStrategy(getRetryStrategy())
                     .setHost(options.getHost())
                     .setCredentials(options.getCredentials())
                     .setRetrySettings(retrySettingsBuilder.build())
@@ -173,6 +208,25 @@ public class GoogleCloudStorageBlobContainerRetriesTests extends AbstractBlobCon
         return new GoogleCloudStorageBlobContainer(randomBoolean() ? BlobPath.EMPTY : BlobPath.EMPTY.add("foo"), blobStore);
     }
 
+    public void testShouldRetryOnConnectionRefused() {
+        // port 1 should never be open
+        endpointUrlOverride = "http://127.0.0.1:1";
+        executeListBlobsAndAssertRetries();
+    }
+
+    public void testShouldRetryOnUnresolvableHost() {
+        // https://www.rfc-editor.org/rfc/rfc2606.html#page-2
+        endpointUrlOverride = "http://unresolvable.invalid";
+        executeListBlobsAndAssertRetries();
+    }
+
+    private void executeListBlobsAndAssertRetries() {
+        final int maxRetries = randomIntBetween(3, 5);
+        final BlobContainer blobContainer = createBlobContainer(maxRetries, null, null, null, null);
+        expectThrows(StorageException.class, () -> blobContainer.listBlobs(randomPurpose()));
+        assertEquals(maxRetries + 1, requestCounters.get("/storage/v1/b/bucket/o").get());
+    }
+
     public void testReadLargeBlobWithRetries() throws Exception {
         final int maxRetries = randomIntBetween(2, 10);
         final AtomicInteger countDown = new AtomicInteger(maxRetries);