Browse Source

Display effective retention in the relevant data stream APIs (#112019)

Mary Gouseti 1 year ago
parent
commit
ed60470518
16 changed files with 595 additions and 58 deletions
  1. 5 0
      docs/changelog/112019.yaml
  2. 6 2
      docs/reference/data-streams/lifecycle/apis/get-lifecycle.asciidoc
  3. 11 7
      docs/reference/data-streams/lifecycle/index.asciidoc
  4. 215 0
      docs/reference/data-streams/lifecycle/tutorial-manage-data-stream-retention.asciidoc
  5. 8 5
      docs/reference/data-streams/lifecycle/tutorial-manage-new-data-stream.asciidoc
  6. 21 17
      docs/reference/data-streams/lifecycle/tutorial-migrate-data-stream-from-ilm-to-dsl.asciidoc
  7. 190 0
      modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/lifecycle/DataStreamGlobalRetentionIT.java
  8. 7 0
      modules/data-streams/src/main/java/org/elasticsearch/datastreams/lifecycle/rest/RestExplainDataStreamLifecycleAction.java
  9. 7 0
      modules/data-streams/src/main/java/org/elasticsearch/datastreams/lifecycle/rest/RestGetDataStreamLifecycleAction.java
  10. 7 0
      modules/data-streams/src/main/java/org/elasticsearch/datastreams/rest/RestGetDataStreamsAction.java
  11. 104 0
      modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/lifecycle/40_effective_retention.yml
  12. 1 1
      server/src/main/java/org/elasticsearch/action/datastreams/GetDataStreamAction.java
  13. 1 1
      server/src/main/java/org/elasticsearch/action/datastreams/lifecycle/ExplainDataStreamLifecycleAction.java
  14. 1 1
      server/src/main/java/org/elasticsearch/action/datastreams/lifecycle/GetDataStreamLifecycleAction.java
  15. 6 8
      server/src/main/java/org/elasticsearch/cluster/metadata/DataStreamLifecycle.java
  16. 5 16
      server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamLifecycleTests.java

+ 5 - 0
docs/changelog/112019.yaml

@@ -0,0 +1,5 @@
+pr: 112019
+summary: Display effective retention in the relevant data stream APIs
+area: Data streams
+type: enhancement
+issues: []

+ 6 - 2
docs/reference/data-streams/lifecycle/apis/get-lifecycle.asciidoc

@@ -128,14 +128,18 @@ The response will look like the following:
       "name": "my-data-stream-1",
       "lifecycle": {
         "enabled": true,
-        "data_retention": "7d"
+        "data_retention": "7d",
+        "effective_retention": "7d",
+        "retention_determined_by": "data_stream_configuration"
       }
     },
     {
       "name": "my-data-stream-2",
       "lifecycle": {
         "enabled": true,
-        "data_retention": "7d"
+        "data_retention": "7d",
+        "effective_retention": "7d",
+        "retention_determined_by": "data_stream_configuration"
       }
     }
   ]

+ 11 - 7
docs/reference/data-streams/lifecycle/index.asciidoc

@@ -14,10 +14,11 @@ To achieve that, it supports:
 * Automatic <<index-rollover,rollover>>, which chunks your incoming data in smaller pieces to facilitate better performance
 and backwards incompatible mapping changes.
 * Configurable retention, which allows you to configure the time period for which your data is guaranteed to be stored.
-{es} is allowed at a later time to delete data older than this time period.
+{es} is allowed at a later time to delete data older than this time period. Retention can be configured on the data stream level
+or on a global level. Read more about the different options in this <<tutorial-manage-data-stream-retention,tutorial>>.
 
 A data stream lifecycle also supports downsampling the data stream backing indices.
-See <<data-streams-put-lifecycle-downsampling-example, the downsampling example>> for 
+See <<data-streams-put-lifecycle-downsampling-example, the downsampling example>> for
 more details.
 
 [discrete]
@@ -33,16 +34,17 @@ each data stream and performs the following steps:
 3. After an index is not the write index anymore (i.e. the data stream has been rolled over),
 automatically tail merges the index. Data stream lifecycle executes a merge operation that only targets
 the long tail of small segments instead of the whole shard. As the segments are organised
-into tiers of exponential sizes, merging the long tail of small segments is only a 
+into tiers of exponential sizes, merging the long tail of small segments is only a
 fraction of the cost of force merging to a single segment. The small segments would usually
 hold the most recent data so tail merging will focus the merging resources on the higher-value
 data that is most likely to keep being queried.
-4. If <<data-streams-put-lifecycle-downsampling-example, downsampling>> is configured it will execute 
+4. If <<data-streams-put-lifecycle-downsampling-example, downsampling>> is configured it will execute
 all the configured downsampling rounds.
 5. Applies retention to the remaining backing indices. This means deleting the backing indices whose
-`generation_time` is longer than the configured retention period. The `generation_time` is only applicable to rolled over backing
-indices and it is either the time since the backing index got rolled over, or the time optionally configured in the
-<<index-data-stream-lifecycle-origination-date,`index.lifecycle.origination_date`>> setting.
+`generation_time` is longer than the effective retention period (read more about the
+<<effective-retention-calculation, effective retention calculation>>). The `generation_time` is only applicable to rolled
+over backing indices and it is either the time since the backing index got rolled over, or the time optionally configured
+in the <<index-data-stream-lifecycle-origination-date,`index.lifecycle.origination_date`>> setting.
 
 IMPORTANT: We use the `generation_time` instead of the creation time because this ensures that all data in the backing
 index have passed the retention period. As a result, the retention period is not the exact time data gets deleted, but
@@ -75,4 +77,6 @@ include::tutorial-manage-new-data-stream.asciidoc[]
 
 include::tutorial-manage-existing-data-stream.asciidoc[]
 
+include::tutorial-manage-data-stream-retention.asciidoc[]
+
 include::tutorial-migrate-data-stream-from-ilm-to-dsl.asciidoc[]

+ 215 - 0
docs/reference/data-streams/lifecycle/tutorial-manage-data-stream-retention.asciidoc

@@ -0,0 +1,215 @@
+[role="xpack"]
+[[tutorial-manage-data-stream-retention]]
+=== Tutorial: Data stream retention
+
+In this tutorial, we are going to go over the data stream lifecycle retention; we will define it, go over how it can be configured
+and how it can gets applied. Keep in mind, the following options apply only to data streams that are managed by the data stream lifecycle.
+
+. <<what-is-retention>>
+. <<retention-configuration>>
+. <<effective-retention-calculation>>
+. <<effective-retention-application>>
+
+You can verify if a data steam is managed by the data stream lifecycle via the <<data-streams-get-lifecycle,get data stream lifecycle API>>:
+
+////
+[source,console]
+----
+PUT /_index_template/template
+{
+  "index_patterns": ["my-data-stream*"],
+  "template": {
+    "lifecycle": {}
+  },
+  "data_stream": { }
+}
+
+PUT /_data_stream/my-data-stream
+----
+// TESTSETUP
+////
+
+////
+[source,console]
+----
+DELETE /_data_stream/my-data-stream*
+DELETE /_index_template/template
+PUT /_cluster/settings
+{
+  "persistent" : {
+    "data_streams.lifecycle.retention.*" : null
+  }
+}
+----
+// TEARDOWN
+////
+
+[source,console]
+--------------------------------------------------
+GET _data_stream/my-data-stream/_lifecycle
+--------------------------------------------------
+
+The result should look like this:
+
+[source,console-result]
+--------------------------------------------------
+{
+  "data_streams": [
+    {
+      "name": "my-data-stream",                                   <1>
+      "lifecycle": {
+        "enabled": true                                           <2>
+      }
+    }
+  ]
+}
+--------------------------------------------------
+// TESTRESPONSE[skip:the result is for illustrating purposes only]
+<1> The name of your data stream.
+<2> Ensure that the lifecycle is enabled, meaning this should be `true`.
+
+[discrete]
+[[what-is-retention]]
+==== What is data stream retention?
+
+We define retention as the least amount of time the data of a data stream are going to be kept in {es}. After this time period
+has passed, {es} is allowed to remove these data to free up space and/or manage costs.
+
+NOTE: Retention does not define the period that the data will be removed, but the minimum time period they will be kept.
+
+We define 4 different types of retention:
+
+* The data stream retention, or `data_retention`, which is the retention configured on the data stream level. It can be
+set via an <<index-templates,index template>> for future data streams or via the <<data-streams-put-lifecycle, PUT data
+stream lifecycle API>> for an existing data stream. When the data stream retention is not set, it implies that the data
+need to be kept forever.
+* The global default retention, let's call it `default_retention`, which is a retention configured via the cluster setting
+<<data-streams-lifecycle-retention-default, `data_streams.lifecycle.retention.default`>> and will be
+applied to all data streams managed by data stream lifecycle that do not have `data_retention` configured. Effectively,
+it ensures that there will be no data streams keeping their data forever. This can be set via the
+<<cluster-update-settings, update cluster settings API>>.
+* The global max retention, let's call it `max_retention`, which is a retention configured via the cluster setting
+<<data-streams-lifecycle-retention-max, `data_streams.lifecycle.retention.max`>> and will be applied to
+all data streams managed by data stream lifecycle. Effectively, it ensures that there will be no data streams whose retention
+will exceed this time period. This can be set via the <<cluster-update-settings, update cluster settings API>>.
+* The effective retention, or `effective_retention`, which is the retention applied at a data stream on a given moment.
+Effective retention cannot be set, it is derived by taking into account all the configured retention listed above and is
+calculated as it is described <<effective-retention-calculation,here>>.
+
+[discrete]
+[[retention-configuration]]
+==== How to configure retention?
+
+- By setting the `data_retention` on the data stream level. This retention can be configured in two ways:
++
+-- For new data streams, it can be defined in the index template that would be applied during the data stream's creation.
+You can use the <<indices-put-template,create index template API>>, for example:
++
+[source,console]
+--------------------------------------------------
+PUT _index_template/template
+{
+  "index_patterns": ["my-data-stream*"],
+  "data_stream": { },
+  "priority": 500,
+  "template": {
+    "lifecycle": {
+      "data_retention": "7d"
+    }
+  },
+  "_meta": {
+    "description": "Template with data stream lifecycle"
+  }
+}
+--------------------------------------------------
+-- For an existing data stream, it can be set via the <<data-streams-put-lifecycle, PUT lifecycle API>>.
++
+[source,console]
+----
+PUT _data_stream/my-data-stream/_lifecycle
+{
+  "data_retention": "30d" <1>
+}
+----
+// TEST[continued]
+<1> The retention period of this data stream is set to 30 days.
+
+- By setting the global retention via the `data_streams.lifecycle.retention.default` and/or `data_streams.lifecycle.retention.max`
+that are set on a cluster level. You can be set via the <<cluster-update-settings, update cluster settings API>>. For example:
++
+[source,console]
+--------------------------------------------------
+PUT /_cluster/settings
+{
+  "persistent" : {
+    "data_streams.lifecycle.retention.default" : "7d",
+    "data_streams.lifecycle.retention.max" : "90d"
+  }
+}
+--------------------------------------------------
+// TEST[continued]
+
+[discrete]
+[[effective-retention-calculation]]
+==== How is the effective retention calculated?
+The effective is calculated in the following way:
+
+- The `effective_retention` is the `default_retention`, when `default_retention` is defined and the data stream does not
+have `data_retention`.
+- The `effective_retention` is the `data_retention`, when `data_retention` is defined and if `max_retention` is defined,
+it is less than the `max_retention`.
+- The `effective_retention` is the `max_retention`, when `max_retention` is defined, and the data stream has either no
+`data_retention` or its `data_retention` is greater than the `max_retention`.
+
+The above is demonstrated in the examples below:
+
+|===
+|`default_retention`    |`max_retention`    |`data_retention`   |`effective_retention`  |Retention determined by
+
+|Not set                |Not set            |Not set            |Infinite               |N/A
+|Not relevant           |12 months          |**30 days**        |30 days                |`data_retention`
+|Not relevant           |Not set            |**30 days**        |30 days                |`data_retention`
+|**30 days**            |12 months          |Not set            |30 days                |`default_retention`
+|**30 days**            |30 days            |Not set            |30 days                |`default_retention`
+|Not relevant           |**30 days**        |12 months          |30 days                |`max_retention`
+|Not set                |**30 days**        |Not set            |30 days                |`max_retention`
+|===
+
+Considering our example, if we retrieve the lifecycle of `my-data-stream`:
+[source,console]
+----
+GET _data_stream/my-data-stream/_lifecycle
+----
+// TEST[continued]
+
+We see that it will remain the same with what the user configured:
+[source,console-result]
+----
+{
+  "data_streams": [
+    {
+      "name": "my-data-stream",
+      "lifecycle": {
+        "enabled": true,
+        "data_retention": "30d",
+        "effective_retention": "30d",
+        "retention_determined_by": "data_stream_configuration"
+      }
+    }
+  ]
+}
+----
+
+[discrete]
+[[effective-retention-application]]
+==== How is the effective retention applied?
+
+Retention is applied to the remaining backing indices of a data stream as the last step of
+<<data-streams-lifecycle-how-it-works, a data stream lifecycle run>>. Data stream lifecycle will retrieve the backing indices
+whose `generation_time` is longer than the effective retention period and delete them. The `generation_time` is only
+applicable to rolled over backing indices and it is either the time since the backing index got rolled over, or the time
+optionally configured in the <<index-data-stream-lifecycle-origination-date,`index.lifecycle.origination_date`>> setting.
+
+IMPORTANT: We use the `generation_time` instead of the creation time because this ensures that all data in the backing
+index have passed the retention period. As a result, the retention period is not the exact time data get deleted, but
+the minimum time data will be stored.

+ 8 - 5
docs/reference/data-streams/lifecycle/tutorial-manage-new-data-stream.asciidoc

@@ -91,10 +91,12 @@ The result will look like this:
 {
   "data_streams": [
     {
-      "name": "my-data-stream",<1>
+      "name": "my-data-stream",                                     <1>
       "lifecycle": {
-        "enabled": true,       <2>
-        "data_retention": "7d" <3>
+        "enabled": true,                                            <2>
+        "data_retention": "7d",                                     <3>
+        "effective_retention": "7d",                                <4>
+        "retention_determined_by": "data_stream_configuration"
       }
     }
   ]
@@ -102,8 +104,9 @@ The result will look like this:
 --------------------------------------------------
 <1> The name of your data stream.
 <2> Shows if the data stream lifecycle is enabled for this data stream.
-<3> The retention period of the data indexed in this data stream, this means that the data in this data stream will
-be kept at least for 7 days. After that {es} can delete it at its own discretion.
+<3> The retention period of the data indexed in this data stream, as configured by the user.
+<4> The retention period that will be applied by the data stream lifecycle. This means that the data in this data stream will
+    be kept at least for 7 days. After that {es} can delete it at its own discretion.
 
 If you want to see more information about how the data stream lifecycle is applied on individual backing indices use the
 <<data-streams-explain-lifecycle,explain data stream lifecycle API>>:

+ 21 - 17
docs/reference/data-streams/lifecycle/tutorial-migrate-data-stream-from-ilm-to-dsl.asciidoc

@@ -1,14 +1,14 @@
 [role="xpack"]
 [[tutorial-migrate-data-stream-from-ilm-to-dsl]]
-=== Tutorial: Migrate ILM managed data stream to data stream lifecycle 
+=== Tutorial: Migrate ILM managed data stream to data stream lifecycle
 
 In this tutorial we'll look at migrating an existing data stream from <<index-lifecycle-management,Index Lifecycle Management ({ilm-init})>> to
-<<data-stream-lifecycle,data stream lifecycle>>. The existing {ilm-init} managed backing indices will continue 
+<<data-stream-lifecycle,data stream lifecycle>>. The existing {ilm-init} managed backing indices will continue
 to be managed by {ilm-init} until they age out and get deleted by {ilm-init}; however,
-the new backing indices will be managed by data stream lifecycle. 
-This way, a data stream is gradually migrated away from being managed by {ilm-init} to 
+the new backing indices will be managed by data stream lifecycle.
+This way, a data stream is gradually migrated away from being managed by {ilm-init} to
 being managed by data stream lifecycle. As we'll see, {ilm-init} and data stream lifecycle
-can co-manage a data stream; however, an index can only be managed by one system at 
+can co-manage a data stream; however, an index can only be managed by one system at
 a time.
 
 [discrete]
@@ -17,7 +17,7 @@ a time.
 To migrate a data stream from {ilm-init} to data stream lifecycle we'll have to execute
 two steps:
 
-1. Update the index template that's backing the data stream to set <<index-lifecycle-prefer-ilm, prefer_ilm>> 
+1. Update the index template that's backing the data stream to set <<index-lifecycle-prefer-ilm, prefer_ilm>>
 to `false`, and to configure data stream lifecycle.
 2. Configure the data stream lifecycle for the _existing_ data stream using
 the <<data-streams-put-lifecycle, lifecycle API>>.
@@ -174,8 +174,8 @@ in the index template).
 To migrate the `dsl-data-stream` to data stream lifecycle we'll have to execute
 two steps:
 
-1. Update the index template that's backing the data stream to set <<index-lifecycle-prefer-ilm, prefer_ilm>> 
-to `false`, and to configure data stream lifecycle. 
+1. Update the index template that's backing the data stream to set <<index-lifecycle-prefer-ilm, prefer_ilm>>
+to `false`, and to configure data stream lifecycle.
 2. Configure the data stream lifecycle for the _existing_ `dsl-data-stream` using
 the <<data-streams-put-lifecycle, lifecycle API>>.
 
@@ -209,9 +209,9 @@ PUT _index_template/dsl-data-stream-template
 // TEST[continued]
 
 <1> The `prefer_ilm` setting will now be configured on the **new** backing indices
-(created by rolling over the data stream) such that {ilm-init} does _not_ take 
+(created by rolling over the data stream) such that {ilm-init} does _not_ take
 precedence over data stream lifecycle.
-<2> We're configuring the data stream lifecycle so _new_ data streams will be 
+<2> We're configuring the data stream lifecycle so _new_ data streams will be
 managed by data stream lifecycle.
 
 We've now made sure that new data streams will be managed by data stream lifecycle.
@@ -227,7 +227,7 @@ PUT _data_stream/dsl-data-stream/_lifecycle
 ----
 // TEST[continued]
 
-We can inspect the data stream to check that the next generation will indeed be 
+We can inspect the data stream to check that the next generation will indeed be
 managed by data stream lifecycle:
 
 [source,console]
@@ -266,7 +266,9 @@ GET _data_stream/dsl-data-stream
       "template": "dsl-data-stream-template",
       "lifecycle": {
         "enabled": true,
-        "data_retention": "7d"
+        "data_retention": "7d",
+        "effective_retention": "7d",
+        "retention_determined_by": "data_stream_configuration"
       },
       "ilm_policy": "pre-dsl-ilm-policy",
       "next_generation_managed_by": "Data stream lifecycle",         <3>
@@ -292,7 +294,7 @@ GET _data_stream/dsl-data-stream
 <4> The `prefer_ilm` setting value we configured in the index template is reflected
 and will be configured accordingly for new backing indices.
 
-We'll now rollover the data stream to see the new generation index being managed by 
+We'll now rollover the data stream to see the new generation index being managed by
 data stream lifecycle:
 
 [source,console]
@@ -344,7 +346,9 @@ GET _data_stream/dsl-data-stream
       "template": "dsl-data-stream-template",
       "lifecycle": {
         "enabled": true,
-        "data_retention": "7d"
+        "data_retention": "7d",
+        "effective_retention": "7d",
+        "retention_determined_by": "data_stream_configuration"
       },
       "ilm_policy": "pre-dsl-ilm-policy",
       "next_generation_managed_by": "Data stream lifecycle",
@@ -375,9 +379,9 @@ in the index template
 [discrete]
 [[migrate-from-dsl-to-ilm]]
 ==== Migrate data stream back to ILM
-We can easily change this data stream to be managed by {ilm-init} because we didn't remove 
-the {ilm-init} policy when we <<update-index-template-for-dsl, updated 
-the index template>>. 
+We can easily change this data stream to be managed by {ilm-init} because we didn't remove
+the {ilm-init} policy when we <<update-index-template-for-dsl, updated
+the index template>>.
 
 We can achieve this in two ways:
 

+ 190 - 0
modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/lifecycle/DataStreamGlobalRetentionIT.java

@@ -0,0 +1,190 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+package org.elasticsearch.datastreams.lifecycle;
+
+import org.elasticsearch.client.Request;
+import org.elasticsearch.client.Response;
+import org.elasticsearch.client.WarningFailureException;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.datastreams.DisabledSecurityDataStreamTestCase;
+import org.junit.After;
+import org.junit.Before;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.nullValue;
+
+public class DataStreamGlobalRetentionIT extends DisabledSecurityDataStreamTestCase {
+
+    @Before
+    public void setup() throws IOException {
+        updateClusterSettings(
+            Settings.builder()
+                .put("data_streams.lifecycle.poll_interval", "1s")
+                .put("cluster.lifecycle.default.rollover", "min_docs=1,max_docs=1")
+                .build()
+        );
+        // Create a template with the default lifecycle
+        Request putComposableIndexTemplateRequest = new Request("POST", "/_index_template/1");
+        putComposableIndexTemplateRequest.setJsonEntity("""
+            {
+              "index_patterns": ["my-data-stream*"],
+              "data_stream": {},
+              "template": {
+                "lifecycle": {}
+              }
+            }
+            """);
+        assertOK(client().performRequest(putComposableIndexTemplateRequest));
+
+        // Create a data streams with one doc
+        Request createDocRequest = new Request("POST", "/my-data-stream/_doc?refresh=true");
+        createDocRequest.setJsonEntity("{ \"@timestamp\": \"2022-12-12\"}");
+        assertOK(client().performRequest(createDocRequest));
+    }
+
+    @After
+    public void cleanUp() throws IOException {
+        adminClient().performRequest(new Request("DELETE", "_data_stream/*"));
+        updateClusterSettings(
+            Settings.builder().putNull("data_streams.lifecycle.retention.default").putNull("data_streams.lifecycle.retention.max").build()
+        );
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testDataStreamRetention() throws Exception {
+        // Set global retention and add retention to the data stream
+        {
+            updateClusterSettings(
+                Settings.builder()
+                    .put("data_streams.lifecycle.retention.default", "7d")
+                    .put("data_streams.lifecycle.retention.default", "90d")
+                    .build()
+            );
+            Request request = new Request("PUT", "_data_stream/my-data-stream/_lifecycle");
+            request.setJsonEntity("""
+                {
+                  "data_retention": "10s"
+                }""");
+            assertAcknowledged(client().performRequest(request));
+        }
+
+        // Verify that the effective retention matches the default retention
+        {
+            Request request = new Request("GET", "/_data_stream/my-data-stream");
+            Response response = client().performRequest(request);
+            List<Object> dataStreams = (List<Object>) entityAsMap(response).get("data_streams");
+            assertThat(dataStreams.size(), is(1));
+            Map<String, Object> dataStream = (Map<String, Object>) dataStreams.get(0);
+            assertThat(dataStream.get("name"), is("my-data-stream"));
+            Map<String, Object> lifecycle = (Map<String, Object>) dataStream.get("lifecycle");
+            assertThat(lifecycle.get("effective_retention"), is("10s"));
+            assertThat(lifecycle.get("retention_determined_by"), is("data_stream_configuration"));
+            assertThat(lifecycle.get("data_retention"), is("10s"));
+        }
+
+        // Verify that the first generation index was removed
+        assertBusy(() -> {
+            Response response = client().performRequest(new Request("GET", "/_data_stream/my-data-stream"));
+            Map<String, Object> dataStream = ((List<Map<String, Object>>) entityAsMap(response).get("data_streams")).get(0);
+            assertThat(dataStream.get("name"), is("my-data-stream"));
+            List<Object> backingIndices = (List<Object>) dataStream.get("indices");
+            assertThat(backingIndices.size(), is(1));
+            // 2 backing indices created + 1 for the deleted index
+            assertThat(dataStream.get("generation"), is(3));
+        }, 20, TimeUnit.SECONDS);
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testDefaultRetention() throws Exception {
+        // Set default global retention
+        updateClusterSettings(Settings.builder().put("data_streams.lifecycle.retention.default", "10s").build());
+
+        // Verify that the effective retention matches the default retention
+        {
+            Request request = new Request("GET", "/_data_stream/my-data-stream");
+            Response response = client().performRequest(request);
+            List<Object> dataStreams = (List<Object>) entityAsMap(response).get("data_streams");
+            assertThat(dataStreams.size(), is(1));
+            Map<String, Object> dataStream = (Map<String, Object>) dataStreams.get(0);
+            assertThat(dataStream.get("name"), is("my-data-stream"));
+            Map<String, Object> lifecycle = (Map<String, Object>) dataStream.get("lifecycle");
+            assertThat(lifecycle.get("effective_retention"), is("10s"));
+            assertThat(lifecycle.get("retention_determined_by"), is("default_global_retention"));
+            assertThat(lifecycle.get("data_retention"), nullValue());
+        }
+
+        // Verify that the first generation index was removed
+        assertBusy(() -> {
+            Response response = client().performRequest(new Request("GET", "/_data_stream/my-data-stream"));
+            Map<String, Object> dataStream = ((List<Map<String, Object>>) entityAsMap(response).get("data_streams")).get(0);
+            assertThat(dataStream.get("name"), is("my-data-stream"));
+            List<Object> backingIndices = (List<Object>) dataStream.get("indices");
+            assertThat(backingIndices.size(), is(1));
+            // 2 backing indices created + 1 for the deleted index
+            assertThat(dataStream.get("generation"), is(3));
+        }, 20, TimeUnit.SECONDS);
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testMaxRetention() throws Exception {
+        // Set default global retention
+        updateClusterSettings(Settings.builder().put("data_streams.lifecycle.retention.max", "10s").build());
+        boolean withDataStreamLevelRetention = randomBoolean();
+        if (withDataStreamLevelRetention) {
+            try {
+                Request request = new Request("PUT", "_data_stream/my-data-stream/_lifecycle");
+                request.setJsonEntity("""
+                    {
+                      "data_retention": "30d"
+                    }""");
+                assertAcknowledged(client().performRequest(request));
+                fail("Should have returned a warning about data retention exceeding the max retention");
+            } catch (WarningFailureException warningFailureException) {
+                assertThat(
+                    warningFailureException.getMessage(),
+                    containsString("The retention provided [30d] is exceeding the max allowed data retention of this project [10s]")
+                );
+            }
+        }
+
+        // Verify that the effective retention matches the max retention
+        {
+            Request request = new Request("GET", "/_data_stream/my-data-stream");
+            Response response = client().performRequest(request);
+            List<Object> dataStreams = (List<Object>) entityAsMap(response).get("data_streams");
+            assertThat(dataStreams.size(), is(1));
+            Map<String, Object> dataStream = (Map<String, Object>) dataStreams.get(0);
+            assertThat(dataStream.get("name"), is("my-data-stream"));
+            Map<String, Object> lifecycle = (Map<String, Object>) dataStream.get("lifecycle");
+            assertThat(lifecycle.get("effective_retention"), is("10s"));
+            assertThat(lifecycle.get("retention_determined_by"), is("max_global_retention"));
+            if (withDataStreamLevelRetention) {
+                assertThat(lifecycle.get("data_retention"), is("30d"));
+            } else {
+                assertThat(lifecycle.get("data_retention"), nullValue());
+            }
+        }
+
+        // Verify that the first generation index was removed
+        assertBusy(() -> {
+            Response response = client().performRequest(new Request("GET", "/_data_stream/my-data-stream"));
+            Map<String, Object> dataStream = ((List<Map<String, Object>>) entityAsMap(response).get("data_streams")).get(0);
+            assertThat(dataStream.get("name"), is("my-data-stream"));
+            List<Object> backingIndices = (List<Object>) dataStream.get("indices");
+            assertThat(backingIndices.size(), is(1));
+            // 2 backing indices created + 1 for the deleted index
+            assertThat(dataStream.get("generation"), is(3));
+        }, 20, TimeUnit.SECONDS);
+    }
+}

+ 7 - 0
modules/data-streams/src/main/java/org/elasticsearch/datastreams/lifecycle/rest/RestExplainDataStreamLifecycleAction.java

@@ -11,6 +11,7 @@ package org.elasticsearch.datastreams.lifecycle.rest;
 import org.elasticsearch.action.datastreams.lifecycle.ExplainDataStreamLifecycleAction;
 import org.elasticsearch.action.support.IndicesOptions;
 import org.elasticsearch.client.internal.node.NodeClient;
+import org.elasticsearch.cluster.metadata.DataStreamLifecycle;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.rest.BaseRestHandler;
 import org.elasticsearch.rest.RestRequest;
@@ -19,6 +20,7 @@ import org.elasticsearch.rest.ServerlessScope;
 import org.elasticsearch.rest.action.RestRefCountedChunkedToXContentListener;
 
 import java.util.List;
+import java.util.Set;
 
 import static org.elasticsearch.rest.RestRequest.Method.GET;
 import static org.elasticsearch.rest.RestUtils.getMasterNodeTimeout;
@@ -56,4 +58,9 @@ public class RestExplainDataStreamLifecycleAction extends BaseRestHandler {
     public boolean allowSystemIndexAccessByDefault() {
         return true;
     }
+
+    @Override
+    public Set<String> supportedCapabilities() {
+        return Set.of(DataStreamLifecycle.EFFECTIVE_RETENTION_REST_API_CAPABILITY);
+    }
 }

+ 7 - 0
modules/data-streams/src/main/java/org/elasticsearch/datastreams/lifecycle/rest/RestGetDataStreamLifecycleAction.java

@@ -10,6 +10,7 @@ package org.elasticsearch.datastreams.lifecycle.rest;
 import org.elasticsearch.action.datastreams.lifecycle.GetDataStreamLifecycleAction;
 import org.elasticsearch.action.support.IndicesOptions;
 import org.elasticsearch.client.internal.node.NodeClient;
+import org.elasticsearch.cluster.metadata.DataStreamLifecycle;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.rest.BaseRestHandler;
 import org.elasticsearch.rest.RestRequest;
@@ -19,6 +20,7 @@ import org.elasticsearch.rest.ServerlessScope;
 import org.elasticsearch.rest.action.RestRefCountedChunkedToXContentListener;
 
 import java.util.List;
+import java.util.Set;
 
 import static org.elasticsearch.rest.RestRequest.Method.GET;
 
@@ -54,4 +56,9 @@ public class RestGetDataStreamLifecycleAction extends BaseRestHandler {
     public boolean allowSystemIndexAccessByDefault() {
         return true;
     }
+
+    @Override
+    public Set<String> supportedCapabilities() {
+        return Set.of(DataStreamLifecycle.EFFECTIVE_RETENTION_REST_API_CAPABILITY);
+    }
 }

+ 7 - 0
modules/data-streams/src/main/java/org/elasticsearch/datastreams/rest/RestGetDataStreamsAction.java

@@ -10,6 +10,7 @@ package org.elasticsearch.datastreams.rest;
 import org.elasticsearch.action.datastreams.GetDataStreamAction;
 import org.elasticsearch.action.support.IndicesOptions;
 import org.elasticsearch.client.internal.node.NodeClient;
+import org.elasticsearch.cluster.metadata.DataStreamLifecycle;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.rest.BaseRestHandler;
 import org.elasticsearch.rest.RestRequest;
@@ -19,6 +20,7 @@ import org.elasticsearch.rest.ServerlessScope;
 import org.elasticsearch.rest.action.RestToXContentListener;
 
 import java.util.List;
+import java.util.Set;
 
 import static org.elasticsearch.rest.RestRequest.Method.GET;
 
@@ -50,4 +52,9 @@ public class RestGetDataStreamsAction extends BaseRestHandler {
     public boolean allowSystemIndexAccessByDefault() {
         return true;
     }
+
+    @Override
+    public Set<String> supportedCapabilities() {
+        return Set.of(DataStreamLifecycle.EFFECTIVE_RETENTION_REST_API_CAPABILITY);
+    }
 }

+ 104 - 0
modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/lifecycle/40_effective_retention.yml

@@ -0,0 +1,104 @@
+setup:
+  - requires:
+      cluster_features: [ "gte_v8.11.0" ]
+      reason: "Data stream lifecycle was released as tech preview in 8.11"
+      test_runner_features: allowed_warnings
+  - do:
+      allowed_warnings:
+        - "index template [template-with-lifecycle] has index patterns [managed-data-stream] matching patterns from existing older templates [global] with patterns (global => [*]); this template [template-with-lifecycle] will take precedence during new index creation"
+      indices.put_index_template:
+        name: template-with-lifecycle
+        body:
+          index_patterns: [ managed-data-stream ]
+          template:
+            settings:
+              index.number_of_replicas: 0
+            lifecycle:
+              data_retention: "30d"
+          data_stream: { }
+  - do:
+      indices.create_data_stream:
+        name: managed-data-stream
+---
+teardown:
+  - do:
+      cluster.put_settings:
+        body:
+          persistent:
+            data_streams.lifecycle.retention.max: null
+            data_streams.lifecycle.retention.default: null
+
+---
+"Retrieve effective retention via the data stream API":
+  - requires:
+      reason: "Effective retention was exposed in 8.16+"
+      test_runner_features: [capabilities]
+      capabilities:
+        - method: GET
+          path: /_data_stream/{index}
+          capabilities: [ 'data_stream_lifecycle_effective_retention' ]
+  - do:
+      indices.get_data_stream:
+        name: "managed-data-stream"
+  - match: { data_streams.0.name: managed-data-stream }
+  - match: { data_streams.0.lifecycle.data_retention: '30d' }
+  - match: { data_streams.0.lifecycle.effective_retention: '30d'}
+  - match: { data_streams.0.lifecycle.retention_determined_by: 'data_stream_configuration'}
+
+---
+"Retrieve effective retention with explain":
+  - requires:
+      reason: "Effective retention was exposed in 8.16+"
+      test_runner_features: [capabilities]
+      capabilities:
+        - method: GET
+          path: /{index}/_lifecycle/explain
+          capabilities: [ 'data_stream_lifecycle_effective_retention' ]
+  - do:
+      cluster.put_settings:
+        body:
+          persistent:
+            data_streams.lifecycle.retention.max: "7d"
+  - is_true: acknowledged
+  - do:
+      indices.get_data_stream:
+        name: "managed-data-stream"
+  - match: { data_streams.0.name: managed-data-stream }
+  - set:
+      data_streams.0.indices.0.index_name: backing_index
+
+  - do:
+      indices.explain_data_lifecycle:
+        index: managed-data-stream
+        include_defaults: true
+  - match: { indices.$backing_index.managed_by_lifecycle: true }
+  - match: { indices.$backing_index.lifecycle.data_retention: '30d' }
+  - match: { indices.$backing_index.lifecycle.effective_retention: '7d' }
+  - match: { indices.$backing_index.lifecycle.retention_determined_by: 'max_global_retention' }
+
+---
+"Retrieve effective retention with data stream lifecycle":
+  - requires:
+      reason: "Effective retention was exposed in 8.16+"
+      test_runner_features: [capabilities]
+      capabilities:
+        - method: GET
+          path: /_data_stream/{index}/_lifecycle
+          capabilities: [ 'data_stream_lifecycle_effective_retention' ]
+  - do:
+      indices.put_data_lifecycle:
+        name: "managed-data-stream"
+        body: {}
+  - is_true: acknowledged
+  - do:
+      cluster.put_settings:
+        body:
+          persistent:
+            data_streams.lifecycle.retention.default: "7d"
+  - do:
+      indices.get_data_lifecycle:
+        name: "managed-data-stream"
+  - length: { data_streams: 1}
+  - match: { data_streams.0.name: managed-data-stream }
+  - match: { data_streams.0.lifecycle.effective_retention: '7d' }
+  - match: { data_streams.0.lifecycle.retention_determined_by: 'default_global_retention' }

+ 1 - 1
server/src/main/java/org/elasticsearch/action/datastreams/GetDataStreamAction.java

@@ -556,7 +556,7 @@ public class GetDataStreamAction extends ActionType<GetDataStreamAction.Response
             for (DataStreamInfo dataStream : dataStreams) {
                 dataStream.toXContent(
                     builder,
-                    DataStreamLifecycle.maybeAddEffectiveRetentionParams(params),
+                    DataStreamLifecycle.addEffectiveRetentionParams(params),
                     rolloverConfiguration,
                     globalRetention
                 );

+ 1 - 1
server/src/main/java/org/elasticsearch/action/datastreams/lifecycle/ExplainDataStreamLifecycleAction.java

@@ -217,7 +217,7 @@ public class ExplainDataStreamLifecycleAction {
                 builder.field(explainIndexDataLifecycle.getIndex());
                 explainIndexDataLifecycle.toXContent(
                     builder,
-                    DataStreamLifecycle.maybeAddEffectiveRetentionParams(outerParams),
+                    DataStreamLifecycle.addEffectiveRetentionParams(outerParams),
                     rolloverConfiguration,
                     globalRetention
                 );

+ 1 - 1
server/src/main/java/org/elasticsearch/action/datastreams/lifecycle/GetDataStreamLifecycleAction.java

@@ -187,7 +187,7 @@ public class GetDataStreamLifecycleAction {
                     builder.field(LIFECYCLE_FIELD.getPreferredName());
                     lifecycle.toXContent(
                         builder,
-                        org.elasticsearch.cluster.metadata.DataStreamLifecycle.maybeAddEffectiveRetentionParams(params),
+                        org.elasticsearch.cluster.metadata.DataStreamLifecycle.addEffectiveRetentionParams(params),
                         rolloverConfiguration,
                         isSystemDataStream ? null : globalRetention
                     );

+ 6 - 8
server/src/main/java/org/elasticsearch/cluster/metadata/DataStreamLifecycle.java

@@ -24,7 +24,6 @@ import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.core.Tuple;
-import org.elasticsearch.rest.RestRequest;
 import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
 import org.elasticsearch.xcontent.AbstractObjectParser;
 import org.elasticsearch.xcontent.ConstructingObjectParser;
@@ -55,6 +54,7 @@ public class DataStreamLifecycle implements SimpleDiffable<DataStreamLifecycle>,
 
     // Versions over the wire
     public static final TransportVersion ADDED_ENABLED_FLAG_VERSION = TransportVersions.V_8_10_X;
+    public static final String EFFECTIVE_RETENTION_REST_API_CAPABILITY = "data_stream_lifecycle_effective_retention";
 
     public static final String DATA_STREAMS_LIFECYCLE_ONLY_SETTING_NAME = "data_streams.lifecycle_only.mode";
     // The following XContent params are used to enrich the DataStreamLifecycle json with effective retention information
@@ -367,14 +367,12 @@ public class DataStreamLifecycle implements SimpleDiffable<DataStreamLifecycle>,
     }
 
     /**
-     * Adds a retention param to signal that this serialisation should include the effective retention metadata
+     * Adds a retention param to signal that this serialisation should include the effective retention metadata.
+     * @param params the XContent params to be extended with the new flag
+     * @return XContent params with `include_effective_retention` set to true. If the flag exists it will override it.
      */
-    public static ToXContent.Params maybeAddEffectiveRetentionParams(ToXContent.Params params) {
-        boolean shouldAddEffectiveRetention = Objects.equals(params.param(RestRequest.PATH_RESTRICTED), "serverless");
-        return new DelegatingMapParams(
-            Map.of(INCLUDE_EFFECTIVE_RETENTION_PARAM_NAME, Boolean.toString(shouldAddEffectiveRetention)),
-            params
-        );
+    public static ToXContent.Params addEffectiveRetentionParams(ToXContent.Params params) {
+        return new DelegatingMapParams(INCLUDE_EFFECTIVE_RETENTION_PARAMS, params);
     }
 
     public static Builder newBuilder(DataStreamLifecycle lifecycle) {

+ 5 - 16
server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamLifecycleTests.java

@@ -39,7 +39,6 @@ import java.util.stream.Stream;
 import static org.elasticsearch.cluster.metadata.DataStreamLifecycle.RetentionSource.DATA_STREAM_CONFIGURATION;
 import static org.elasticsearch.cluster.metadata.DataStreamLifecycle.RetentionSource.DEFAULT_GLOBAL_RETENTION;
 import static org.elasticsearch.cluster.metadata.DataStreamLifecycle.RetentionSource.MAX_GLOBAL_RETENTION;
-import static org.elasticsearch.rest.RestRequest.PATH_RESTRICTED;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.not;
@@ -348,21 +347,11 @@ public class DataStreamLifecycleTests extends AbstractXContentSerializingTestCas
     }
 
     public void testEffectiveRetentionParams() {
-        {
-            ToXContent.Params params = DataStreamLifecycle.maybeAddEffectiveRetentionParams(new ToXContent.MapParams(Map.of()));
-            assertThat(params.paramAsBoolean(DataStreamLifecycle.INCLUDE_EFFECTIVE_RETENTION_PARAM_NAME, false), equalTo(false));
-        }
-        {
-            ToXContent.Params params = DataStreamLifecycle.maybeAddEffectiveRetentionParams(
-                new ToXContent.MapParams(Map.of(PATH_RESTRICTED, "not-serverless"))
-            );
-            assertThat(params.paramAsBoolean(DataStreamLifecycle.INCLUDE_EFFECTIVE_RETENTION_PARAM_NAME, false), equalTo(false));
-        }
-        {
-            ToXContent.Params params = DataStreamLifecycle.maybeAddEffectiveRetentionParams(
-                new ToXContent.MapParams(Map.of(PATH_RESTRICTED, "serverless"))
-            );
-            assertThat(params.paramAsBoolean(DataStreamLifecycle.INCLUDE_EFFECTIVE_RETENTION_PARAM_NAME, false), equalTo(true));
+        Map<String, String> initialParams = randomMap(0, 10, () -> Tuple.tuple(randomAlphaOfLength(10), randomAlphaOfLength(10)));
+        ToXContent.Params params = DataStreamLifecycle.addEffectiveRetentionParams(new ToXContent.MapParams(initialParams));
+        assertThat(params.paramAsBoolean(DataStreamLifecycle.INCLUDE_EFFECTIVE_RETENTION_PARAM_NAME, false), equalTo(true));
+        for (String key : initialParams.keySet()) {
+            assertThat(initialParams.get(key), equalTo(params.param(key)));
         }
     }