Browse Source

Return 200 OK response code for a cluster health timeout (#78968)

Returning 408 for a cluster health timeout was deprecated in #78180 and backported to 7.x in #78940

Now we can do a breaking change in 8.0 respecting the user choice to run ES in 7.x compatible mode via the REST Compatibility layer.

Fixes #70849
Artem Prigoda 3 years ago
parent
commit
a2c3daead1

+ 1 - 6
client/rest-high-level/src/test/java/org/elasticsearch/client/ClusterClientIT.java

@@ -334,14 +334,9 @@ public class ClusterClientIT extends ESRestHighLevelClientTestCase {
 
         assertThat(response, notNullValue());
         assertThat(response.isTimedOut(), equalTo(true));
-        assertThat(response.status(), equalTo(RestStatus.REQUEST_TIMEOUT));
+        assertThat(response.status(), equalTo(RestStatus.OK));
         assertThat(response.getStatus(), equalTo(ClusterHealthStatus.RED));
         assertNoIndices(response);
-        assertCriticalWarnings(
-            "The HTTP status code for a cluster health timeout will be changed from 408 to 200 in a "
-                + "future version. Set the [return_200_for_cluster_health_timeout] query parameter to [true] to suppress this message and "
-                + "opt in to the future behaviour now."
-        );
     }
 
     public void testRemoteInfo() throws Exception {

+ 20 - 0
docs/changelog/78968.yaml

@@ -0,0 +1,20 @@
+pr: 78968
+summary: HTTP Status code has changed for the Cluster Health API in case of a server timeout
+area: CRUD
+type: breaking
+issues: []
+breaking:
+  title: HTTP Status code has changed for the Cluster Health API in case of a server timeout
+  area: API
+  details: |-
+    The cluster health API includes options for waiting
+    for certain health conditions to be satisfied. If the requested conditions are
+    not satisfied within a timeout then ES will send back a normal response
+    including the field `"timed_out": true`. In earlier versions it would also use
+    the HTTP response code `408 Request timeout` if the request timed out, and `200
+    OK` otherwise. The `408 Request timeout` response code is not appropriate for
+    this situation, so from version 8.0.0 ES will use the response code `200 OK`
+    for both cases.
+  impact: |-
+    To detect a server timeout, check the `timed_out` field of the JSON response.
+  notable: true

+ 17 - 0
docs/reference/migration/migrate_8_0/cluster.asciidoc

@@ -32,4 +32,21 @@ time out.
 *Impact* +
 Do not set `cluster.join.timeout` in your `elasticsearch.yml` file.
 ====
+
+.HTTP Status code has changed for the Cluster Health API in case of a server timeout.
+[%collapsible]
+====
+*Details* +
+The {ref}/cluster-health.html[cluster health API] includes options for waiting
+for certain health conditions to be satisfied. If the requested conditions are
+not satisfied within a timeout then {es} will send back a normal response
+including the field `"timed_out": true`. In earlier versions it would also use
+the HTTP response code `408 Request timeout` if the request timed out, and `200
+OK` otherwise. The `408 Request timeout` response code is not appropriate for
+this situation, so from version 8.0.0 {es} will use the response code `200 OK`
+for both cases.
+
+*Impact* +
+To detect a server timeout, check the `timed_out` field of the JSON response.
+====
 // end::notable-breaking-changes[]

+ 6 - 2
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml

@@ -1,7 +1,9 @@
 ---
 "cluster health request timeout on waiting for nodes":
+  - skip:
+      version: " - 8.0.99"
+      reason: "Set for 7.99.99 when back-ported to 8.0"
   - do:
-      catch: request_timeout
       cluster.health:
         wait_for_nodes: 10
         timeout: 1ms
@@ -19,8 +21,10 @@
 
 ---
 "cluster health request timeout waiting for active shards":
+  - skip:
+      version: " - 8.0.99"
+      reason: "Set for 7.99.99 when back-ported to 8.0"
   - do:
-      catch: request_timeout
       cluster.health:
         timeout: 1ms
         wait_for_active_shards: 5

+ 9 - 16
server/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponse.java

@@ -16,11 +16,10 @@ import org.elasticsearch.cluster.health.ClusterStateHealth;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
-import org.elasticsearch.common.logging.DeprecationLogger;
 import org.elasticsearch.common.xcontent.StatusToXContentObject;
+import org.elasticsearch.core.RestApiVersion;
 import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.rest.RestStatus;
-import org.elasticsearch.rest.action.search.RestSearchAction;
 import org.elasticsearch.xcontent.ConstructingObjectParser;
 import org.elasticsearch.xcontent.ObjectParser;
 import org.elasticsearch.xcontent.ParseField;
@@ -57,7 +56,6 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo
     private static final String INITIALIZING_SHARDS = "initializing_shards";
     private static final String UNASSIGNED_SHARDS = "unassigned_shards";
     private static final String INDICES = "indices";
-    private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(RestSearchAction.class);
 
     private static final ConstructingObjectParser<ClusterHealthResponse, Void> PARSER = new ConstructingObjectParser<>(
         "cluster_health_response",
@@ -122,12 +120,6 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo
         XContentParser parser,
         Void context,
         String index) -> ClusterIndexHealth.innerFromXContent(parser, index);
-    static final String ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY = "return_200_for_cluster_health_timeout";
-    static final String CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG = "The HTTP status code for a cluster health timeout "
-        + "will be changed from 408 to 200 in a future version. Set the ["
-        + ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY
-        + "] "
-        + "query parameter to [true] to suppress this message and opt in to the future behaviour now.";
 
     static {
         // ClusterStateHealth fields
@@ -351,15 +343,16 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo
 
     @Override
     public RestStatus status() {
-        if (isTimedOut() == false) {
-            return RestStatus.OK;
-        }
-        if (return200ForClusterHealthTimeout) {
-            return RestStatus.OK;
-        } else {
-            deprecationLogger.compatibleCritical("cluster_health_request_timeout", CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG);
+        return status(RestApiVersion.current());
+    }
+
+    @Override
+    public RestStatus status(RestApiVersion restApiVersion) {
+        // Legacy behaviour
+        if (isTimedOut() && restApiVersion == RestApiVersion.V_7 && return200ForClusterHealthTimeout == false) {
             return RestStatus.REQUEST_TIMEOUT;
         }
+        return RestStatus.OK;
     }
 
     @Override

+ 5 - 0
server/src/main/java/org/elasticsearch/common/xcontent/StatusToXContentObject.java

@@ -7,6 +7,7 @@
  */
 package org.elasticsearch.common.xcontent;
 
+import org.elasticsearch.core.RestApiVersion;
 import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.xcontent.ToXContentObject;
 
@@ -20,4 +21,8 @@ public interface StatusToXContentObject extends ToXContentObject {
      * Returns the REST status to make sure it is returned correctly
      */
     RestStatus status();
+
+    default RestStatus status(RestApiVersion restApiVersion) {
+        return status();
+    }
 }

+ 1 - 1
server/src/main/java/org/elasticsearch/rest/action/RestStatusToXContentListener.java

@@ -44,7 +44,7 @@ public class RestStatusToXContentListener<Response extends StatusToXContentObjec
     public RestResponse buildResponse(Response response, XContentBuilder builder) throws Exception {
         assert response.isFragment() == false; // would be nice if we could make default methods final
         response.toXContent(builder, channel.request());
-        RestResponse restResponse = new BytesRestResponse(response.status(), builder);
+        RestResponse restResponse = new BytesRestResponse(response.status(builder.getRestApiVersion()), builder);
         if (RestStatus.CREATED == restResponse.status()) {
             final String location = extractLocation.apply(response);
             if (location != null) {

+ 18 - 3
server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterHealthAction.java

@@ -15,9 +15,12 @@ import org.elasticsearch.client.node.NodeClient;
 import org.elasticsearch.cluster.health.ClusterHealthStatus;
 import org.elasticsearch.common.Priority;
 import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.logging.DeprecationCategory;
+import org.elasticsearch.common.logging.DeprecationLogger;
 import org.elasticsearch.rest.BaseRestHandler;
 import org.elasticsearch.rest.RestRequest;
 import org.elasticsearch.rest.action.RestStatusToXContentListener;
+import org.elasticsearch.rest.action.search.RestSearchAction;
 
 import java.io.IOException;
 import java.util.Collections;
@@ -30,6 +33,12 @@ import static org.elasticsearch.rest.RestRequest.Method.GET;
 
 public class RestClusterHealthAction extends BaseRestHandler {
 
+    private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(RestSearchAction.class);
+    private static final String RETURN_200_FOR_CLUSTER_HEALTH_TIMEOUT = "return_200_for_cluster_health_timeout";
+    private static final String CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG = "the ["
+        + RETURN_200_FOR_CLUSTER_HEALTH_TIMEOUT
+        + "] parameter is deprecated and will be removed in a future release.";
+
     @Override
     public List<Route> routes() {
         return List.of(new Route(GET, "/_cluster/health"), new Route(GET, "/_cluster/health/{index}"));
@@ -81,9 +90,15 @@ public class RestClusterHealthAction extends BaseRestHandler {
         if (request.param("wait_for_events") != null) {
             clusterHealthRequest.waitForEvents(Priority.valueOf(request.param("wait_for_events").toUpperCase(Locale.ROOT)));
         }
-        clusterHealthRequest.return200ForClusterHealthTimeout(
-            request.paramAsBoolean("return_200_for_cluster_health_timeout", clusterHealthRequest.doesReturn200ForClusterHealthTimeout())
-        );
+        String return200ForClusterHealthTimeout = request.param(RETURN_200_FOR_CLUSTER_HEALTH_TIMEOUT);
+        if (return200ForClusterHealthTimeout != null) {
+            deprecationLogger.warn(
+                DeprecationCategory.API,
+                "cluster_health_request_timeout",
+                CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG
+            );
+        }
+        clusterHealthRequest.return200ForClusterHealthTimeout(Boolean.parseBoolean(return200ForClusterHealthTimeout));
         return clusterHealthRequest;
     }
 

+ 24 - 8
server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java

@@ -20,6 +20,7 @@ import org.elasticsearch.common.io.stream.BytesStreamOutput;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.Writeable;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.core.RestApiVersion;
 import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.test.AbstractSerializingTestCase;
@@ -43,16 +44,11 @@ import static org.hamcrest.Matchers.lessThanOrEqualTo;
 public class ClusterHealthResponsesTests extends AbstractSerializingTestCase<ClusterHealthResponse> {
     private final ClusterHealthRequest.Level level = randomFrom(ClusterHealthRequest.Level.values());
 
-    public void testIsTimeout() {
+    public void testIsTimeoutReturns200ByDefault() {
         ClusterHealthResponse res = new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, false);
         for (int i = 0; i < 5; i++) {
             res.setTimedOut(randomBoolean());
-            if (res.isTimedOut()) {
-                assertEquals(RestStatus.REQUEST_TIMEOUT, res.status());
-                assertCriticalWarnings(ClusterHealthResponse.CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG);
-            } else {
-                assertEquals(RestStatus.OK, res.status());
-            }
+            assertEquals(RestStatus.OK, res.status(RestApiVersion.V_8));
         }
     }
 
@@ -60,7 +56,27 @@ public class ClusterHealthResponsesTests extends AbstractSerializingTestCase<Clu
         ClusterHealthResponse res = new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, true);
         for (int i = 0; i < 5; i++) {
             res.setTimedOut(randomBoolean());
-            assertEquals(RestStatus.OK, res.status());
+            assertEquals(RestStatus.OK, res.status(RestApiVersion.V_8));
+        }
+    }
+
+    public void testTimeoutReturns200InIfOptedInV7CompatibilityMode() {
+        ClusterHealthResponse res = new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, true);
+        for (int i = 0; i < 5; i++) {
+            res.setTimedOut(randomBoolean());
+            assertEquals(RestStatus.OK, res.status(RestApiVersion.V_7));
+        }
+    }
+
+    public void testTimeoutReturns408InV7CompatibilityMode() {
+        ClusterHealthResponse res = new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, false);
+        for (int i = 0; i < 5; i++) {
+            res.setTimedOut(randomBoolean());
+            if (res.isTimedOut()) {
+                assertEquals(RestStatus.REQUEST_TIMEOUT, res.status(RestApiVersion.V_7));
+            } else {
+                assertEquals(RestStatus.OK, res.status(RestApiVersion.V_7));
+            }
         }
     }
 

+ 9 - 0
server/src/test/java/org/elasticsearch/rest/action/admin/cluster/RestClusterHealthActionTests.java

@@ -8,6 +8,7 @@
 
 package org.elasticsearch.rest.action.admin.cluster;
 
+import org.apache.logging.log4j.Level;
 import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest;
 import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.cluster.health.ClusterHealthStatus;
@@ -68,6 +69,14 @@ public class RestClusterHealthActionTests extends ESTestCase {
         assertThat(clusterHealthRequest.waitForNodes(), equalTo(waitForNodes));
         assertThat(clusterHealthRequest.waitForEvents(), equalTo(waitForEvents));
         assertThat(clusterHealthRequest.doesReturn200ForClusterHealthTimeout(), equalTo(requestTimeout200));
+
+        assertWarnings(
+            true,
+            new DeprecationWarning(
+                Level.WARN,
+                "the [return_200_for_cluster_health_timeout] parameter is deprecated and will be removed in a future release."
+            )
+        );
     }
 
     private FakeRestRequest buildRestRequest(Map<String, String> params) {

+ 0 - 2
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rollup/security_tests.yml

@@ -126,7 +126,6 @@ teardown:
 
   # this is a hacky way to sleep for 5s, since we will never have 10 nodes
   - do:
-      catch: request_timeout
       cluster.health:
         wait_for_nodes: 10
         timeout: "5s"
@@ -286,7 +285,6 @@ teardown:
 
   # this is a hacky way to sleep for 5s, since we will never have 10 nodes
   - do:
-      catch: request_timeout
       cluster.health:
         wait_for_nodes: 10
         timeout: "5s"