浏览代码

Change skip_unavailable default value to true (#105792)

In order to improve the experience of cross-cluster search, we are changing
the default value of the remote cluster `skip_unavailable` setting from `false` to `true`.

This setting causes any cross-cluster _search (or _async_search) to entirely fail when
any remote cluster with `skip_unavailable=false` is either unavailable (connection to it fails)
or if the search on it fails on all shards.

Setting `skip_unavailable=true` allows partial results from other clusters to be
returned. In that case, the search response cluster metadata will show a `skipped`
status, so the user can see that no data came in from that cluster. Kibana also
now leverages this metadata in the cross-cluster search responses to allow users
to see how many clusters returned data and drill down into which clusters did not
(including failure messages).

Currently, the user/admin has to specifically set the value to `true` in the configs, like so:

```
cluster:
    remote:
        remote1:
            seeds: 10.10.10.10:9300
            skip_unavailable: true
```

even though that is probably what search admins want in the vast majority of cases.

Setting `skip_unavailable=false` should be a conscious (and probably rare) choice
by an Elasticsearch admin that a particular cluster's results are so essential to a
search (or visualization in dashboard or Discover panel) that no results at all should
be shown if it cannot return any results.
Michael Peterson 1 年之前
父节点
当前提交
a451511e3a
共有 22 个文件被更改,包括 254 次插入107 次删除
  1. 18 0
      docs/changelog/105792.yaml
  2. 1 1
      docs/reference/ccr/getting-started.asciidoc
  3. 9 9
      docs/reference/modules/cluster/remote-clusters-connect.asciidoc
  4. 14 3
      docs/reference/modules/cluster/remote-clusters-settings.asciidoc
  5. 34 20
      docs/reference/search/search-your-data/search-across-clusters.asciidoc
  6. 6 0
      modules/reindex/src/internalClusterTest/java/org/elasticsearch/index/reindex/CrossClusterReindexIT.java
  7. 1 0
      qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java
  8. 1 0
      qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java
  9. 1 0
      qa/multi-cluster-search/build.gradle
  10. 1 1
      qa/multi-cluster-search/src/test/resources/rest-api-spec/test/multi_cluster/10_basic.yml
  11. 8 8
      qa/multi-cluster-search/src/test/resources/rest-api-spec/test/multi_cluster/20_info.yml
  12. 1 1
      server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java
  13. 26 14
      server/src/test/java/org/elasticsearch/action/search/TransportSearchActionTests.java
  14. 1 0
      server/src/test/java/org/elasticsearch/transport/RemoteClusterClientTests.java
  15. 1 1
      server/src/test/java/org/elasticsearch/transport/RemoteClusterServiceTests.java
  16. 1 1
      server/src/test/java/org/elasticsearch/transport/RemoteClusterSettingsTests.java
  17. 3 1
      x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityApiKeyRestIT.java
  18. 15 3
      x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityEsqlIT.java
  19. 38 16
      x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityFcActionAuthorizationIT.java
  20. 2 0
      x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityLicensingAndFeatureUsageRestIT.java
  21. 71 28
      x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityRestIT.java
  22. 1 0
      x-pack/qa/multi-cluster-search-security/legacy-with-basic-license/build.gradle

+ 18 - 0
docs/changelog/105792.yaml

@@ -0,0 +1,18 @@
+pr: 105792
+summary: "Change `skip_unavailable` remote cluster setting default value to true"
+area: Search
+type: breaking
+issues: []
+breaking:
+  title: "Change `skip_unavailable` remote cluster setting default value to true"
+  area: Cluster and node setting
+  details: The default value of the `skip_unavailable` setting is now set to true.
+    All existing and future remote clusters that do not define this setting will use the new default.
+    This setting only affects cross-cluster searches using the _search or _async_search API.
+  impact: Unavailable remote clusters in a cross-cluster search will no longer cause the search to fail unless
+    skip_unavailable is configured to be `false` in elasticsearch.yml or via the `_cluster/settings` API. 
+    Unavailable clusters with `skip_unavailable`=`true` (either explicitly or by using the new default) are marked 
+    as SKIPPED in the search response metadata section and do not fail the entire search. If users want to ensure that a
+    search returns a failure when a particular remote cluster is not available, `skip_unavailable` must be now be 
+    set explicitly.    
+  notable: false

+ 1 - 1
docs/reference/ccr/getting-started.asciidoc

@@ -147,7 +147,7 @@ cluster with cluster alias `leader`.
     "num_nodes_connected" : 1, <1>
     "max_connections_per_cluster" : 3,
     "initial_connect_timeout" : "30s",
-    "skip_unavailable" : false,
+    "skip_unavailable" : true,
     "mode" : "sniff"
   }
 }

+ 9 - 9
docs/reference/modules/cluster/remote-clusters-connect.asciidoc

@@ -37,7 +37,7 @@ clusters on individual nodes in the local cluster, define static settings in
 `elasticsearch.yml` for each node.
 
 The following request adds a remote cluster with an alias of `cluster_one`. This
-_cluster alias_ is a unique identifier that represents the connection to the 
+_cluster alias_ is a unique identifier that represents the connection to the
 remote cluster and is used to distinguish between local and remote indices.
 
 [source,console,subs=attributes+]
@@ -60,7 +60,7 @@ PUT /_cluster/settings
 // TEST[setup:host]
 // TEST[s/127.0.0.1:\{remote-interface-default-port\}/\${transport_host}/]
 <1> The cluster alias of this remote cluster is `cluster_one`.
-<2> Specifies the hostname and {remote-interface} port of a seed node in the 
+<2> Specifies the hostname and {remote-interface} port of a seed node in the
 remote cluster.
 
 You can use the <<cluster-remote-info,remote cluster info API>> to verify that
@@ -86,7 +86,7 @@ cluster with the cluster alias `cluster_one`:
     "num_nodes_connected" : 1,  <1>
     "max_connections_per_cluster" : 3,
     "initial_connect_timeout" : "30s",
-    "skip_unavailable" : false, <2>
+    "skip_unavailable" : true, <2>
 ifeval::["{trust-mechanism}"=="api-key"]
     "cluster_credentials": "::es_redacted::", <3>
 endif::[]
@@ -103,7 +103,7 @@ connected to.
 <2> Indicates whether to skip the remote cluster if searched through {ccs} but
 no nodes are available.
 ifeval::["{trust-mechanism}"=="api-key"]
-<3> If present, indicates the remote cluster has connected using API key 
+<3> If present, indicates the remote cluster has connected using API key
 authentication.
 endif::[]
 
@@ -187,7 +187,7 @@ PUT _cluster/settings
 
 You can delete a remote cluster from the cluster settings by passing `null`
 values for each remote cluster setting. The following request removes
-`cluster_two` from the cluster settings, leaving `cluster_one` and 
+`cluster_two` from the cluster settings, leaving `cluster_one` and
 `cluster_three` intact:
 
 [source,console]
@@ -212,15 +212,15 @@ PUT _cluster/settings
 
 ===== Statically configure remote clusters
 If you specify settings in `elasticsearch.yml`, only the nodes with
-those settings can connect to the remote cluster and serve remote cluster 
+those settings can connect to the remote cluster and serve remote cluster
 requests.
 
-NOTE: Remote cluster settings that are specified using the 
+NOTE: Remote cluster settings that are specified using the
 <<cluster-update-settings,cluster update settings API>> take precedence over
 settings that you specify in `elasticsearch.yml` for individual nodes.
 
-In the following example, `cluster_one`, `cluster_two`, and `cluster_three` are 
-arbitrary cluster aliases representing the connection to each cluster. These 
+In the following example, `cluster_one`, `cluster_two`, and `cluster_three` are
+arbitrary cluster aliases representing the connection to each cluster. These
 names are subsequently used to distinguish between local and remote indices.
 
 [source,yaml,subs=attributes+]

+ 14 - 3
docs/reference/modules/cluster/remote-clusters-settings.asciidoc

@@ -28,9 +28,20 @@ mode are described separately.
 
   Per cluster boolean setting that allows to skip specific clusters when no
   nodes belonging to them are available and they are the target of a remote
-  cluster request. Default is `false`, meaning that all clusters are mandatory
-  by default, but they can selectively be made optional by setting this setting
-  to `true`.
+  cluster request.
+
+IMPORTANT: In Elasticsearch 8.15, the default value for `skip_unavailable` was
+changed from `false` to `true`. Before Elasticsearch 8.15, if you want a cluster
+to be treated as optional for a {ccs}, then you need to set that configuration.
+From Elasticsearch 8.15 forward, you need to set the configuration in order to
+make a cluster required for the {ccs}. Once you upgrade the local ("querying")
+cluster search coordinator node (the node you send CCS requests to) to 8.15 or later,
+any remote clusters that do not have an explicit setting for `skip_unavailable` will
+immediately change over to using the new default of true. This is true regardless of
+whether you have upgraded the remote clusters to 8.15, as the `skip_unavailable`
+search behavior is entirely determined by the setting on the local cluster where
+you configure the remotes.
+
 
 `cluster.remote.<cluster_alias>.transport.ping_schedule`::
 

+ 34 - 20
docs/reference/search/search-your-data/search-across-clusters.asciidoc

@@ -1178,7 +1178,13 @@ gathered from all 3 clusters and the total shard count on each cluster is listed
 By default, a {ccs} fails if a remote cluster in the request is unavailable
 or returns an error where the search on all shards failed. Use the
 `skip_unavailable` cluster setting to mark a specific remote cluster as
-optional for {ccs}.
+either optional or required for {ccs}.
+
+IMPORTANT: In Elasticsearch 8.15, the default value for `skip_unavailable` was
+changed from `false` to `true`. Before Elasticsearch 8.15, if you want a cluster
+to be treated as optional for a {ccs}, then you need to set that configuration.
+From Elasticsearch 8.15 forward, you need to set the configuration in order to
+make a cluster required for the {ccs}.
 
 If `skip_unavailable` is `true`, a {ccs}:
 
@@ -1196,25 +1202,33 @@ parameter and the related `search.default_allow_partial_results` cluster setting
 when searching the remote cluster. This means searches on the remote cluster may
 return partial results.
 
-The following <<cluster-update-settings,cluster update settings>>
-API request changes `skip_unavailable` setting to `true` for `cluster_two`.
-
-[source,console]
---------------------------------
-PUT _cluster/settings
-{
-  "persistent": {
-    "cluster.remote.cluster_two.skip_unavailable": true
-  }
-}
---------------------------------
-// TEST[continued]
-
-If `cluster_two` is disconnected or unavailable during a {ccs}, {es} won't
-include matching documents from that cluster in the final results. If at
-least one shard provides results, those results will be used and the
-search will return partial data. (If doing {ccs} using async search,
-the `is_partial` field will be set to `true` to indicate partial results.)
+You can modify the `skip_unavailable` setting by editing the `cluster.remote.<cluster_alias>`
+settings in the elasticsearch.yml config file. For example:
+
+```
+cluster:
+    remote:
+        cluster_one:
+            seeds: 35.238.149.1:9300
+            skip_unavailable: false
+        cluster_two:
+            seeds: 35.238.149.2:9300
+            skip_unavailable: true
+```
+
+Or you can set the cluster.remote settings via the
+<<cluster-update-settings,cluster update settings>> API as shown
+<<ccs-remote-cluster-setup, here>>.
+
+When a remote cluster configured with `skip_unavailable: true` (such as
+`cluster_two` above) is disconnected or unavailable during a {ccs}, {es} won't
+include matching documents from that cluster in the final results and the
+search will be considered successful (HTTP status 200 OK).
+
+If at least one shard from a cluster provides search results, those results will
+be used and the search will return partial data. This is true regardless of
+the `skip_unavailable` setting of the remote cluster. (If doing {ccs} using async
+search, the `is_partial` field will be set to `true` to indicate partial results.)
 
 [discrete]
 [[ccs-network-delays]]

+ 6 - 0
modules/reindex/src/internalClusterTest/java/org/elasticsearch/index/reindex/CrossClusterReindexIT.java

@@ -19,6 +19,7 @@ import org.elasticsearch.test.AbstractMultiClustersTestCase;
 
 import java.util.Collection;
 import java.util.List;
+import java.util.Map;
 
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
 import static org.hamcrest.Matchers.containsString;
@@ -38,6 +39,11 @@ public class CrossClusterReindexIT extends AbstractMultiClustersTestCase {
         return List.of(REMOTE_CLUSTER);
     }
 
+    @Override
+    protected Map<String, Boolean> skipUnavailableForRemoteClusters() {
+        return Map.of(REMOTE_CLUSTER, false);
+    }
+
     @Override
     protected Collection<Class<? extends Plugin>> nodePlugins(String clusterAlias) {
         return List.of(ReindexPlugin.class);

+ 1 - 0
qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java

@@ -101,6 +101,7 @@ public class CcsCommonYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
         .setting("node.roles", "[data,ingest,master,remote_cluster_client]")
         .setting("cluster.remote.remote_cluster.seeds", () -> "\"" + remoteCluster.getTransportEndpoint(0) + "\"")
         .setting("cluster.remote.connections_per_cluster", "1")
+        .setting("cluster.remote.remote_cluster.skip_unavailable", "false")
         .apply(commonClusterConfig)
         .build();
 

+ 1 - 0
qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java

@@ -246,6 +246,7 @@ public class RcsCcsCommonYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
 
     private static void configureRemoteCluster() throws IOException {
         final Settings.Builder builder = Settings.builder();
+        builder.put("cluster.remote." + REMOTE_CLUSTER_NAME + ".skip_unavailable", "false");
         if (randomBoolean()) {
             builder.put("cluster.remote." + REMOTE_CLUSTER_NAME + ".mode", "proxy")
                 .put("cluster.remote." + REMOTE_CLUSTER_NAME + ".proxy_address", fulfillingCluster.getRemoteClusterServerEndpoint(0));

+ 1 - 0
qa/multi-cluster-search/build.gradle

@@ -48,6 +48,7 @@ BuildParams.bwcVersions.withWireCompatible(ccsSupportedVersion) { bwcVersion, ba
     setting 'cluster.remote.connections_per_cluster', '1'
     setting 'cluster.remote.my_remote_cluster.seeds',
       { "\"${remoteCluster.get().getAllTransportPortURI().get(0)}\"" }
+    setting 'cluster.remote.my_remote_cluster.skip_unavailable', 'false'
   }
 
   tasks.register("${baseName}#remote-cluster", RestIntegTestTask) {

+ 1 - 1
qa/multi-cluster-search/src/test/resources/rest-api-spec/test/multi_cluster/10_basic.yml

@@ -249,7 +249,7 @@
           persistent:
             cluster.remote.test_remote_cluster.seeds: $remote_ip
 
-  - match: {persistent: {cluster.remote.test_remote_cluster.seeds: $remote_ip}}
+  - match: {persistent.cluster\.remote\.test_remote_cluster\.seeds: $remote_ip}
 
   - do:
       search:

+ 8 - 8
qa/multi-cluster-search/src/test/resources/rest-api-spec/test/multi_cluster/20_info.yml

@@ -113,33 +113,33 @@
 
   - do:
       cluster.remote_info: {}
-  - is_false: remote1.skip_unavailable
+  - is_true: remote1.skip_unavailable
 
   - do:
       cluster.put_settings:
         body:
           persistent:
-            cluster.remote.remote1.skip_unavailable: true
+            cluster.remote.remote1.skip_unavailable: false
 
-  - is_true: persistent.cluster.remote.remote1.skip_unavailable
+  - is_false: persistent.cluster.remote.remote1.skip_unavailable
 
   - do:
       cluster.remote_info: {}
 
-  - is_true: remote1.skip_unavailable
+  - is_false: remote1.skip_unavailable
 
   - do:
       cluster.put_settings:
         body:
           persistent:
-            cluster.remote.remote1.skip_unavailable: false
+            cluster.remote.remote1.skip_unavailable: true
 
-  - is_false: persistent.cluster.remote.remote1.skip_unavailable
+  - is_true: persistent.cluster.remote.remote1.skip_unavailable
 
   - do:
       cluster.remote_info: {}
 
-  - is_false: remote1.skip_unavailable
+  - is_true: remote1.skip_unavailable
 
   - do:
       cluster.put_settings:
@@ -152,7 +152,7 @@
   - do:
       cluster.remote_info: {}
 
-  - is_false: remote1.skip_unavailable
+  - is_true: remote1.skip_unavailable
 
   - do:
       cluster.put_settings:

+ 1 - 1
server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java

@@ -87,7 +87,7 @@ public final class RemoteClusterService extends RemoteClusterAware implements Cl
     public static final Setting.AffixSetting<Boolean> REMOTE_CLUSTER_SKIP_UNAVAILABLE = Setting.affixKeySetting(
         "cluster.remote.",
         "skip_unavailable",
-        (ns, key) -> boolSetting(key, false, new RemoteConnectionEnabled<>(ns, key), Setting.Property.Dynamic, Setting.Property.NodeScope)
+        (ns, key) -> boolSetting(key, true, new RemoteConnectionEnabled<>(ns, key), Setting.Property.Dynamic, Setting.Property.NodeScope)
     );
 
     public static final Setting.AffixSetting<TimeValue> REMOTE_CLUSTER_PING_SCHEDULE = Setting.affixKeySetting(

+ 26 - 14
server/src/test/java/org/elasticsearch/action/search/TransportSearchActionTests.java

@@ -469,7 +469,8 @@ public class TransportSearchActionTests extends ESTestCase {
         int numClusters,
         DiscoveryNode[] nodes,
         Map<String, OriginalIndices> remoteIndices,
-        Settings.Builder settingsBuilder
+        Settings.Builder settingsBuilder,
+        boolean skipUnavailable
     ) {
         MockTransportService[] mockTransportServices = new MockTransportService[numClusters];
         for (int i = 0; i < numClusters; i++) {
@@ -486,6 +487,7 @@ public class TransportSearchActionTests extends ESTestCase {
             knownNodes.add(remoteSeedNode);
             nodes[i] = remoteSeedNode;
             settingsBuilder.put("cluster.remote.remote" + i + ".seeds", remoteSeedNode.getAddress().toString());
+            settingsBuilder.put("cluster.remote.remote" + i + ".skip_unavailable", Boolean.toString(skipUnavailable));
             remoteIndices.put("remote" + i, new OriginalIndices(new String[] { "index" }, IndicesOptions.lenientExpandOpen()));
         }
         return mockTransportServices;
@@ -496,7 +498,8 @@ public class TransportSearchActionTests extends ESTestCase {
         DiscoveryNode[] nodes = new DiscoveryNode[numClusters];
         Map<String, OriginalIndices> remoteIndicesByCluster = new HashMap<>();
         Settings.Builder builder = Settings.builder();
-        MockTransportService[] mockTransportServices = startTransport(numClusters, nodes, remoteIndicesByCluster, builder);
+        boolean skipUnavailable = randomBoolean();
+        MockTransportService[] mockTransportServices = startTransport(numClusters, nodes, remoteIndicesByCluster, builder, skipUnavailable);
         Settings settings = builder.build();
         boolean local = randomBoolean();
         OriginalIndices localIndices = local ? new OriginalIndices(new String[] { "index" }, SearchRequest.DEFAULT_INDICES_OPTIONS) : null;
@@ -566,7 +569,8 @@ public class TransportSearchActionTests extends ESTestCase {
         DiscoveryNode[] nodes = new DiscoveryNode[numClusters];
         Map<String, OriginalIndices> remoteIndicesByCluster = new HashMap<>();
         Settings.Builder builder = Settings.builder();
-        MockTransportService[] mockTransportServices = startTransport(numClusters, nodes, remoteIndicesByCluster, builder);
+        boolean skipUnavailable = randomBoolean();
+        MockTransportService[] mockTransportServices = startTransport(numClusters, nodes, remoteIndicesByCluster, builder, skipUnavailable);
         Settings settings = builder.build();
         boolean local = randomBoolean();
         OriginalIndices localIndices = local ? new OriginalIndices(new String[] { "index" }, SearchRequest.DEFAULT_INDICES_OPTIONS) : null;
@@ -709,7 +713,8 @@ public class TransportSearchActionTests extends ESTestCase {
         DiscoveryNode[] nodes = new DiscoveryNode[numClusters];
         Map<String, OriginalIndices> remoteIndicesByCluster = new HashMap<>();
         Settings.Builder builder = Settings.builder();
-        MockTransportService[] mockTransportServices = startTransport(numClusters, nodes, remoteIndicesByCluster, builder);
+        boolean skipUnavailable = randomBoolean();
+        MockTransportService[] mockTransportServices = startTransport(numClusters, nodes, remoteIndicesByCluster, builder, skipUnavailable);
         Settings settings = builder.build();
         boolean local = randomBoolean();
         OriginalIndices localIndices = local ? new OriginalIndices(new String[] { "index" }, SearchRequest.DEFAULT_INDICES_OPTIONS) : null;
@@ -734,10 +739,13 @@ public class TransportSearchActionTests extends ESTestCase {
                 final CountDownLatch latch = new CountDownLatch(1);
                 SetOnce<Tuple<SearchRequest, ActionListener<SearchResponse>>> setOnce = new SetOnce<>();
                 AtomicReference<Exception> failure = new AtomicReference<>();
-                LatchedActionListener<SearchResponse> listener = new LatchedActionListener<>(
-                    ActionListener.wrap(r -> fail("no response expected"), failure::set),
-                    latch
-                );
+                LatchedActionListener<SearchResponse> listener = new LatchedActionListener<>(ActionListener.wrap(r -> {
+                    if (skipUnavailable) {
+                        assertThat(r.getClusters().getClusterStateCount(SearchResponse.Cluster.Status.SKIPPED), equalTo(numClusters));
+                    } else {
+                        fail("no response expected");  // failure should be returned, not SearchResponse
+                    }
+                }, failure::set), latch);
 
                 TaskId parentTaskId = new TaskId("n", 1);
                 SearchTask task = new SearchTask(2, "search", "search", () -> "desc", parentTaskId, Collections.emptyMap());
@@ -763,10 +771,14 @@ public class TransportSearchActionTests extends ESTestCase {
                     resolveWithEmptySearchResponse(tuple);
                 }
                 awaitLatch(latch, 5, TimeUnit.SECONDS);
-                assertNotNull(failure.get());
-                assertThat(failure.get(), instanceOf(RemoteTransportException.class));
-                RemoteTransportException remoteTransportException = (RemoteTransportException) failure.get();
-                assertEquals(RestStatus.NOT_FOUND, remoteTransportException.status());
+                if (skipUnavailable) {
+                    assertNull(failure.get());
+                } else {
+                    assertNotNull(failure.get());
+                    assertThat(failure.get(), instanceOf(RemoteTransportException.class));
+                    RemoteTransportException remoteTransportException = (RemoteTransportException) failure.get();
+                    assertEquals(RestStatus.NOT_FOUND, remoteTransportException.status());
+                }
             }
 
         } finally {
@@ -781,7 +793,7 @@ public class TransportSearchActionTests extends ESTestCase {
         DiscoveryNode[] nodes = new DiscoveryNode[numClusters];
         Map<String, OriginalIndices> remoteIndicesByCluster = new HashMap<>();
         Settings.Builder builder = Settings.builder();
-        MockTransportService[] mockTransportServices = startTransport(numClusters, nodes, remoteIndicesByCluster, builder);
+        MockTransportService[] mockTransportServices = startTransport(numClusters, nodes, remoteIndicesByCluster, builder, false);
         Settings settings = builder.build();
         boolean local = randomBoolean();
         OriginalIndices localIndices = local ? new OriginalIndices(new String[] { "index" }, SearchRequest.DEFAULT_INDICES_OPTIONS) : null;
@@ -1035,7 +1047,7 @@ public class TransportSearchActionTests extends ESTestCase {
         DiscoveryNode[] nodes = new DiscoveryNode[numClusters];
         Map<String, OriginalIndices> remoteIndicesByCluster = new HashMap<>();
         Settings.Builder builder = Settings.builder();
-        MockTransportService[] mockTransportServices = startTransport(numClusters, nodes, remoteIndicesByCluster, builder);
+        MockTransportService[] mockTransportServices = startTransport(numClusters, nodes, remoteIndicesByCluster, builder, false);
         Settings settings = builder.build();
         try (
             MockTransportService service = MockTransportService.createNewService(

+ 1 - 0
server/src/test/java/org/elasticsearch/transport/RemoteClusterClientTests.java

@@ -145,6 +145,7 @@ public class RemoteClusterClientTests extends ESTestCase {
             Settings localSettings = Settings.builder()
                 .put(onlyRole(DiscoveryNodeRole.REMOTE_CLUSTER_CLIENT_ROLE))
                 .put("cluster.remote.test.seeds", remoteNode.getAddress().getAddress() + ":" + remoteNode.getAddress().getPort())
+                .put("cluster.remote.test.skip_unavailable", "false") // ensureConnected is only true for skip_unavailable=false
                 .build();
             try (
                 MockTransportService service = MockTransportService.createNewService(

+ 1 - 1
server/src/test/java/org/elasticsearch/transport/RemoteClusterServiceTests.java

@@ -1282,7 +1282,7 @@ public class RemoteClusterServiceTests extends ESTestCase {
                 service.start();
                 service.acceptIncomingRequests();
 
-                assertFalse(service.getRemoteClusterService().isSkipUnavailable("cluster1"));
+                assertTrue(service.getRemoteClusterService().isSkipUnavailable("cluster1"));
 
                 if (randomBoolean()) {
                     updateSkipUnavailable(service.getRemoteClusterService(), "cluster1", false);

+ 1 - 1
server/src/test/java/org/elasticsearch/transport/RemoteClusterSettingsTests.java

@@ -68,7 +68,7 @@ public class RemoteClusterSettingsTests extends ESTestCase {
 
     public void testSkipUnavailableDefault() {
         final String alias = randomAlphaOfLength(8);
-        assertFalse(REMOTE_CLUSTER_SKIP_UNAVAILABLE.getConcreteSettingForNamespace(alias).get(Settings.EMPTY));
+        assertTrue(REMOTE_CLUSTER_SKIP_UNAVAILABLE.getConcreteSettingForNamespace(alias).get(Settings.EMPTY));
     }
 
     public void testSeedsDefault() {

+ 3 - 1
x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityApiKeyRestIT.java

@@ -320,14 +320,16 @@ public class RemoteClusterSecurityApiKeyRestIT extends AbstractRemoteClusterSecu
                 )
             );
 
-            // Check that authentication fails if we use a non-existent cross cluster access API key
+            // Check that authentication fails if we use a non-existent cross cluster access API key (when skip_unavailable=false)
             updateClusterSettings(
                 randomBoolean()
                     ? Settings.builder()
                         .put("cluster.remote.invalid_remote.seeds", fulfillingCluster.getRemoteClusterServerEndpoint(0))
+                        .put("cluster.remote.invalid_remote.skip_unavailable", "false")
                         .build()
                     : Settings.builder()
                         .put("cluster.remote.invalid_remote.mode", "proxy")
+                        .put("cluster.remote.invalid_remote.skip_unavailable", "false")
                         .put("cluster.remote.invalid_remote.proxy_address", fulfillingCluster.getRemoteClusterServerEndpoint(0))
                         .build()
             );

+ 15 - 3
x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityEsqlIT.java

@@ -499,12 +499,18 @@ public class RemoteClusterSecurityEsqlIT extends AbstractRemoteClusterSecurityTe
         configureRemoteCluster();
         populateData();
 
+        final boolean skipUnavailable = randomBoolean();
+
         // avoids getting 404 errors
         updateClusterSettings(
             randomBoolean()
-                ? Settings.builder().put("cluster.remote.invalid_remote.seeds", fulfillingCluster.getRemoteClusterServerEndpoint(0)).build()
+                ? Settings.builder()
+                    .put("cluster.remote.invalid_remote.seeds", fulfillingCluster.getRemoteClusterServerEndpoint(0))
+                    .put("cluster.remote.invalid_remote.skip_unavailable", Boolean.toString(skipUnavailable))
+                    .build()
                 : Settings.builder()
                     .put("cluster.remote.invalid_remote.mode", "proxy")
+                    .put("cluster.remote.invalid_remote.skip_unavailable", Boolean.toString(skipUnavailable))
                     .put("cluster.remote.invalid_remote.proxy_address", fulfillingCluster.getRemoteClusterServerEndpoint(0))
                     .build()
         );
@@ -520,8 +526,14 @@ public class RemoteClusterSecurityEsqlIT extends AbstractRemoteClusterSecurityTe
             var q2 = "FROM invalid_remote:employees |  SORT emp_id DESC | LIMIT 10";
             performRequestWithRemoteSearchUser(esqlRequest(q2));
         });
-        assertThat(error.getResponse().getStatusLine().getStatusCode(), equalTo(401));
-        assertThat(error.getMessage(), containsString("unable to find apikey"));
+
+        if (skipUnavailable == false) {
+            assertThat(error.getResponse().getStatusLine().getStatusCode(), equalTo(401));
+            assertThat(error.getMessage(), containsString("unable to find apikey"));
+        } else {
+            assertThat(error.getResponse().getStatusLine().getStatusCode(), equalTo(500));
+            assertThat(error.getMessage(), containsString("Unable to connect to [invalid_remote]"));
+        }
     }
 
     @SuppressWarnings("unchecked")

+ 38 - 16
x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityFcActionAuthorizationIT.java

@@ -47,6 +47,7 @@ import org.elasticsearch.test.rest.ObjectPath;
 import org.elasticsearch.test.transport.MockTransportService;
 import org.elasticsearch.threadpool.TestThreadPool;
 import org.elasticsearch.threadpool.ThreadPool;
+import org.elasticsearch.transport.ConnectTransportException;
 import org.elasticsearch.transport.RemoteClusterService;
 import org.elasticsearch.transport.RemoteConnectionInfo;
 import org.elasticsearch.xpack.ccr.action.repositories.ClearCcrRestoreSessionAction;
@@ -82,6 +83,7 @@ import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.is;
 
 public class RemoteClusterSecurityFcActionAuthorizationIT extends ESRestTestCase {
@@ -176,7 +178,9 @@ public class RemoteClusterSecurityFcActionAuthorizationIT extends ESRestTestCase
         }
 
         // Simulate QC behaviours by directly connecting to the FC using a transport service
-        try (MockTransportService service = startTransport("node", threadPool, (String) crossClusterApiKeyMap.get("encoded"))) {
+        final String apiKey = (String) crossClusterApiKeyMap.get("encoded");
+        final boolean skipUnavailable = randomBoolean();
+        try (MockTransportService service = startTransport("node", threadPool, apiKey, skipUnavailable)) {
             final RemoteClusterService remoteClusterService = service.getRemoteClusterService();
             final List<RemoteConnectionInfo> remoteConnectionInfos = remoteClusterService.getRemoteConnectionInfos().toList();
             assertThat(remoteConnectionInfos, hasSize(1));
@@ -328,28 +332,35 @@ public class RemoteClusterSecurityFcActionAuthorizationIT extends ESRestTestCase
         final Response createApiKeyResponse = adminClient().performRequest(createApiKeyRequest);
         assertOK(createApiKeyResponse);
         final Map<String, Object> apiKeyMap = responseAsMap(createApiKeyResponse);
-        try (MockTransportService service = startTransport("node", threadPool, (String) apiKeyMap.get("encoded"))) {
+        final String apiKey = (String) apiKeyMap.get("encoded");
+        final boolean skipUnavailable = randomBoolean();
+        try (MockTransportService service = startTransport("node", threadPool, apiKey, skipUnavailable)) {
             final RemoteClusterService remoteClusterService = service.getRemoteClusterService();
             final var remoteClusterClient = remoteClusterService.getRemoteClusterClient(
                 "my_remote_cluster",
                 EsExecutors.DIRECT_EXECUTOR_SERVICE,
                 RemoteClusterService.DisconnectedStrategy.RECONNECT_UNLESS_SKIP_UNAVAILABLE
             );
-
-            final ElasticsearchSecurityException e = expectThrows(
-                ElasticsearchSecurityException.class,
+            final Exception e = expectThrows(
+                Exception.class,
                 () -> executeRemote(
                     remoteClusterClient,
                     RemoteClusterNodesAction.REMOTE_TYPE,
                     RemoteClusterNodesAction.Request.REMOTE_CLUSTER_SERVER_NODES
                 )
             );
-            assertThat(
-                e.getMessage(),
-                containsString(
-                    "authentication expected API key type of [cross_cluster], but API key [" + apiKeyMap.get("id") + "] has type [rest]"
-                )
-            );
+            if (skipUnavailable) {
+                assertThat(e, instanceOf(ConnectTransportException.class));
+                assertThat(e.getMessage(), containsString("Unable to connect to [my_remote_cluster]"));
+            } else {
+                assertThat(e, instanceOf(ElasticsearchSecurityException.class));
+                assertThat(
+                    e.getMessage(),
+                    containsString(
+                        "authentication expected API key type of [cross_cluster], but API key [" + apiKeyMap.get("id") + "] has type [rest]"
+                    )
+                );
+            }
         }
     }
 
@@ -392,12 +403,14 @@ public class RemoteClusterSecurityFcActionAuthorizationIT extends ESRestTestCase
         final FieldCapabilitiesRequest request = new FieldCapabilitiesRequest().indices("index").fields("name");
 
         // Perform cross-cluster requests
+        boolean skipUnavailable = randomBoolean();
         try (
             MockTransportService service = startTransport(
                 "node",
                 threadPool,
                 (String) crossClusterApiKeyMap.get("encoded"),
-                Map.of(TransportFieldCapabilitiesAction.NAME, crossClusterAccessSubjectInfo)
+                Map.of(TransportFieldCapabilitiesAction.NAME, crossClusterAccessSubjectInfo),
+                skipUnavailable
             )
         ) {
             final RemoteClusterService remoteClusterService = service.getRemoteClusterService();
@@ -508,7 +521,8 @@ public class RemoteClusterSecurityFcActionAuthorizationIT extends ESRestTestCase
                 "node",
                 threadPool,
                 (String) crossClusterApiKeyMap.get("encoded"),
-                Map.of(TransportGetAction.TYPE.name() + "[s]", buildCrossClusterAccessSubjectInfo(indexA))
+                Map.of(TransportGetAction.TYPE.name() + "[s]", buildCrossClusterAccessSubjectInfo(indexA)),
+                randomBoolean()
             )
         ) {
             final RemoteClusterService remoteClusterService = service.getRemoteClusterService();
@@ -552,15 +566,21 @@ public class RemoteClusterSecurityFcActionAuthorizationIT extends ESRestTestCase
         );
     }
 
-    private static MockTransportService startTransport(final String nodeName, final ThreadPool threadPool, String encodedApiKey) {
-        return startTransport(nodeName, threadPool, encodedApiKey, Map.of());
+    private static MockTransportService startTransport(
+        final String nodeName,
+        final ThreadPool threadPool,
+        String encodedApiKey,
+        boolean skipUnavailable
+    ) {
+        return startTransport(nodeName, threadPool, encodedApiKey, Map.of(), skipUnavailable);
     }
 
     private static MockTransportService startTransport(
         final String nodeName,
         final ThreadPool threadPool,
         String encodedApiKey,
-        Map<String, CrossClusterAccessSubjectInfo> subjectInfoLookup
+        Map<String, CrossClusterAccessSubjectInfo> subjectInfoLookup,
+        boolean skipUnavailable
     ) {
         final String remoteClusterServerEndpoint = testCluster.getRemoteClusterServerEndpoint(0);
 
@@ -573,9 +593,11 @@ public class RemoteClusterSecurityFcActionAuthorizationIT extends ESRestTestCase
         builder.setSecureSettings(secureSettings);
         if (randomBoolean()) {
             builder.put("cluster.remote.my_remote_cluster.mode", "sniff")
+                .put("cluster.remote.my_remote_cluster.skip_unavailable", Boolean.toString(skipUnavailable))
                 .put("cluster.remote.my_remote_cluster.seeds", remoteClusterServerEndpoint);
         } else {
             builder.put("cluster.remote.my_remote_cluster.mode", "proxy")
+                .put("cluster.remote.my_remote_cluster.skip_unavailable", Boolean.toString(skipUnavailable))
                 .put("cluster.remote.my_remote_cluster.proxy_address", remoteClusterServerEndpoint);
         }
 

+ 2 - 0
x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityLicensingAndFeatureUsageRestIT.java

@@ -105,9 +105,11 @@ public class RemoteClusterSecurityLicensingAndFeatureUsageRestIT extends Abstrac
         final Settings.Builder builder = Settings.builder();
         if (isProxyMode) {
             builder.put("cluster.remote.my_remote_cluster.mode", "proxy")
+                .put("cluster.remote.my_remote_cluster.skip_unavailable", "false")
                 .put("cluster.remote.my_remote_cluster.proxy_address", fulfillingCluster.getRemoteClusterServerEndpoint(0));
         } else {
             builder.put("cluster.remote.my_remote_cluster.mode", "sniff")
+                .put("cluster.remote.my_remote_cluster.skip_unavailable", "false")
                 .putList("cluster.remote.my_remote_cluster.seeds", fulfillingCluster.getRemoteClusterServerEndpoint(0));
         }
         updateClusterSettings(builder.build());

+ 71 - 28
x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityRestIT.java

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.xpack.remotecluster;
 
+import org.apache.http.util.EntityUtils;
 import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.client.Request;
 import org.elasticsearch.client.RequestOptions;
@@ -331,66 +332,108 @@ public class RemoteClusterSecurityRestIT extends AbstractRemoteClusterSecurityTe
                 )
             );
 
-            // Check that authentication fails if we use a non-existent API key
+            // Check that authentication fails if we use a non-existent API key (when skip_unavailable=false)
+            boolean skipUnavailable = randomBoolean();
             updateClusterSettings(
                 randomBoolean()
                     ? Settings.builder()
                         .put("cluster.remote.invalid_remote.seeds", fulfillingCluster.getRemoteClusterServerEndpoint(0))
+                        .put("cluster.remote.invalid_remote.skip_unavailable", Boolean.toString(skipUnavailable))
                         .build()
                     : Settings.builder()
                         .put("cluster.remote.invalid_remote.mode", "proxy")
+                        .put("cluster.remote.invalid_remote.skip_unavailable", Boolean.toString(skipUnavailable))
                         .put("cluster.remote.invalid_remote.proxy_address", fulfillingCluster.getRemoteClusterServerEndpoint(0))
                         .build()
             );
-            final ResponseException exception4 = expectThrows(
-                ResponseException.class,
-                () -> performRequestWithRemoteSearchUser(new Request("GET", "/invalid_remote:index1/_search"))
-            );
-            assertThat(exception4.getResponse().getStatusLine().getStatusCode(), equalTo(401));
-            assertThat(exception4.getMessage(), containsString("unable to find apikey"));
+            if (skipUnavailable) {
+                /*
+                  when skip_unavailable=true, response should be something like:
+                  {"took":1,"timed_out":false,"num_reduce_phases":0,"_shards":{"total":0,"successful":0,"skipped":0,"failed":0},
+                   "_clusters":{"total":1,"successful":0,"skipped":1,"running":0,"partial":0,"failed":0,
+                                "details":{"invalid_remote":{"status":"skipped","indices":"index1","timed_out":false,
+                                "failures":[{"shard":-1,"index":null,"reason":{"type":"connect_transport_exception",
+                                             "reason":"Unable to connect to [invalid_remote]"}}]}}},
+                    "hits":{"total":{"value":0,"relation":"eq"},"max_score":null,"hits":[]}}
+                 */
+                Response invalidRemoteResponse = performRequestWithRemoteSearchUser(new Request("GET", "/invalid_remote:index1/_search"));
+                assertThat(invalidRemoteResponse.getStatusLine().getStatusCode(), equalTo(200));
+                String responseJson = EntityUtils.toString(invalidRemoteResponse.getEntity());
+                assertThat(responseJson, containsString("\"status\":\"skipped\""));
+                assertThat(responseJson, containsString("connect_transport_exception"));
+            } else {
+                final ResponseException exception4 = expectThrows(
+                    ResponseException.class,
+                    () -> performRequestWithRemoteSearchUser(new Request("GET", "/invalid_remote:index1/_search"))
+                );
+                assertThat(exception4.getResponse().getStatusLine().getStatusCode(), equalTo(401));
+                assertThat(exception4.getMessage(), containsString("unable to find apikey"));
+            }
 
-            // check that REST API key is not supported by cross cluster access
+            // check that REST API key is not supported by cross cluster access (when skip_unavailable=false)
+            skipUnavailable = randomBoolean();
             updateClusterSettings(
                 randomBoolean()
                     ? Settings.builder()
                         .put("cluster.remote.wrong_api_key_type.seeds", fulfillingCluster.getRemoteClusterServerEndpoint(0))
+                        .put("cluster.remote.wrong_api_key_type.skip_unavailable", Boolean.toString(skipUnavailable))
                         .build()
                     : Settings.builder()
                         .put("cluster.remote.wrong_api_key_type.mode", "proxy")
+                        .put("cluster.remote.wrong_api_key_type.skip_unavailable", Boolean.toString(skipUnavailable))
                         .put("cluster.remote.wrong_api_key_type.proxy_address", fulfillingCluster.getRemoteClusterServerEndpoint(0))
                         .build()
             );
-            final ResponseException exception5 = expectThrows(
-                ResponseException.class,
-                () -> performRequestWithRemoteSearchUser(new Request("GET", "/wrong_api_key_type:*/_search"))
-            );
-            assertThat(exception5.getResponse().getStatusLine().getStatusCode(), equalTo(401));
-            assertThat(
-                exception5.getMessage(),
-                containsString(
-                    "authentication expected API key type of [cross_cluster], but API key ["
-                        + REST_API_KEY_MAP_REF.get().get("id")
-                        + "] has type [rest]"
-                )
-            );
+            if (skipUnavailable) {
+                Response invalidRemoteResponse = performRequestWithRemoteSearchUser(new Request("GET", "/wrong_api_key_type:*/_search"));
+                assertThat(invalidRemoteResponse.getStatusLine().getStatusCode(), equalTo(200));
+                String responseJson = EntityUtils.toString(invalidRemoteResponse.getEntity());
+                assertThat(responseJson, containsString("\"status\":\"skipped\""));
+                assertThat(responseJson, containsString("connect_transport_exception"));
+            } else {
+                final ResponseException exception5 = expectThrows(
+                    ResponseException.class,
+                    () -> performRequestWithRemoteSearchUser(new Request("GET", "/wrong_api_key_type:*/_search"))
+                );
+                assertThat(exception5.getResponse().getStatusLine().getStatusCode(), equalTo(401));
+                assertThat(
+                    exception5.getMessage(),
+                    containsString(
+                        "authentication expected API key type of [cross_cluster], but API key ["
+                            + REST_API_KEY_MAP_REF.get().get("id")
+                            + "] has type [rest]"
+                    )
+                );
+            }
 
-            // Check invalid cross-cluster API key length is rejected
+            // Check invalid cross-cluster API key length is rejected (and gets security error when skip_unavailable=false)
+            skipUnavailable = randomBoolean();
             updateClusterSettings(
                 randomBoolean()
                     ? Settings.builder()
                         .put("cluster.remote.invalid_secret_length.seeds", fulfillingCluster.getRemoteClusterServerEndpoint(0))
+                        .put("cluster.remote.invalid_secret_length.skip_unavailable", Boolean.toString(skipUnavailable))
                         .build()
                     : Settings.builder()
                         .put("cluster.remote.invalid_secret_length.mode", "proxy")
+                        .put("cluster.remote.invalid_secret_length.skip_unavailable", Boolean.toString(skipUnavailable))
                         .put("cluster.remote.invalid_secret_length.proxy_address", fulfillingCluster.getRemoteClusterServerEndpoint(0))
                         .build()
             );
-            final ResponseException exception6 = expectThrows(
-                ResponseException.class,
-                () -> performRequestWithRemoteSearchUser(new Request("GET", "/invalid_secret_length:*/_search"))
-            );
-            assertThat(exception6.getResponse().getStatusLine().getStatusCode(), equalTo(401));
-            assertThat(exception6.getMessage(), containsString("invalid cross-cluster API key value"));
+            if (skipUnavailable) {
+                Response invalidRemoteResponse = performRequestWithRemoteSearchUser(new Request("GET", "/invalid_secret_length:*/_search"));
+                assertThat(invalidRemoteResponse.getStatusLine().getStatusCode(), equalTo(200));
+                String responseJson = EntityUtils.toString(invalidRemoteResponse.getEntity());
+                assertThat(responseJson, containsString("\"status\":\"skipped\""));
+                assertThat(responseJson, containsString("connect_transport_exception"));
+            } else {
+                final ResponseException exception6 = expectThrows(
+                    ResponseException.class,
+                    () -> performRequestWithRemoteSearchUser(new Request("GET", "/invalid_secret_length:*/_search"))
+                );
+                assertThat(exception6.getResponse().getStatusLine().getStatusCode(), equalTo(401));
+                assertThat(exception6.getMessage(), containsString("invalid cross-cluster API key value"));
+            }
         }
     }
 

+ 1 - 0
x-pack/qa/multi-cluster-search-security/legacy-with-basic-license/build.gradle

@@ -45,6 +45,7 @@ def queryingCluster = testClusters.register('querying-cluster') {
   setting 'cluster.remote.connections_per_cluster', "1"
   user username: "test_user", password: "x-pack-test-password"
 
+  setting 'cluster.remote.my_remote_cluster.skip_unavailable', 'false'
   if (proxyMode) {
     setting 'cluster.remote.my_remote_cluster.mode', 'proxy'
     setting 'cluster.remote.my_remote_cluster.proxy_address', {