Browse Source

[ML] make composite aggs in datafeeds Generally Available (#88589)

Commit makes composite aggs in datafeeds generally available.
Benjamin Trent 3 years ago
parent
commit
a044b5c01e

+ 4 - 8
docs/reference/ml/anomaly-detection/ml-configuring-aggregations.asciidoc

@@ -13,8 +13,7 @@ reduces the volume of data that must be considered while detecting anomalies.
 
 TIP: If you use a terms aggregation and the cardinality of a term is high but
 still significantly less than your total number of documents, use
-{ref}/search-aggregations-bucket-composite-aggregation.html[composite aggregations]
-experimental:[Support for composite aggregations inside datafeeds is currently experimental].
+{ref}/search-aggregations-bucket-composite-aggregation.html[composite aggregations].
 
 [discrete]
 [[aggs-limits-dfeeds]]
@@ -78,7 +77,7 @@ PUT _ml/anomaly_detectors/farequote
   },
   "data_description": {
     "time_field":"time"  <1>
-  }, 
+  },
   "datafeed_config":{
     "indices": ["farequote"],
     "aggregations": {
@@ -137,8 +136,6 @@ includes all the values of the field instead of the top values per bucket.
 [[aggs-using-composite]]
 === Using composite aggregations in {anomaly-jobs}
 
-experimental::[]
-
 For `composite` aggregation support, there must be exactly one `date_histogram` value
 source. That value source must not be sorted in descending order. Additional
 `composite` aggregation value sources are allowed, such as `terms`.
@@ -147,7 +144,7 @@ NOTE: A {dfeed} that uses composite aggregations may not be as performant as
 {dfeeds} that use scrolling or date histogram aggregations. Composite
 aggregations are optimized for queries that are either `match_all` or `range`
 filters. Other types of
-queries may cause the `composite` aggregation to be ineffecient.
+queries may cause the `composite` aggregation to be inefficient.
 
 Here is an example that uses a `composite` aggregation instead of a
 `date_histogram`.
@@ -429,8 +426,7 @@ different values of a field.
 
 IMPORTANT: If you use a terms aggregation, by default it returns buckets for
 the top ten terms. Thus if the cardinality of the term is greater than 10, not
-all terms are analyzed. In this case, consider using `composite` aggregations
-experimental:[Support for composite aggregations inside datafeeds is currently experimental].
+all terms are analyzed. In this case, consider using `composite` aggregations.
 
 You can change this behavior by setting the `size` parameter. To
 determine the cardinality of your data, you can run searches such as:

+ 1 - 1
x-pack/qa/rolling-upgrade/build.gradle

@@ -1,4 +1,3 @@
-import org.elasticsearch.gradle.Version
 import org.elasticsearch.gradle.internal.BwcVersions
 import org.elasticsearch.gradle.internal.info.BuildParams
 import org.elasticsearch.gradle.testclusters.StandaloneRestIntegTestTask
@@ -146,6 +145,7 @@ BuildParams.bwcVersions.withWireCompatible { bwcVersion, baseName ->
       'mixed_cluster/40_ml_datafeed_crud/Put job and datafeed in mixed cluster',
       'mixed_cluster/40_ml_datafeed_crud/Put job and datafeed without aggs in mixed cluster',
       'mixed_cluster/40_ml_datafeed_crud/Put job and datafeed with aggs in mixed cluster',
+      'mixed_cluster/40_ml_datafeed_crud/Put job and datafeed with composite aggs in mixed cluster',
       'mixed_cluster/80_transform_jobs_crud/Test put batch transform on mixed cluster',
       'mixed_cluster/80_transform_jobs_crud/Test put continuous transform on mixed cluster',
       'mixed_cluster/90_ml_data_frame_analytics_crud/Put an outlier_detection job on the mixed cluster',

+ 86 - 0
x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/40_ml_datafeed_crud.yml

@@ -39,7 +39,20 @@
         datafeed_id: old-cluster-datafeed-with-aggs
   - match: { datafeeds.0.state: "stopped"}
   - is_false: datafeeds.0.node
+---
+"Test old cluster datafeed with comp aggs":
+  - do:
+      ml.get_datafeeds:
+        datafeed_id: old-cluster-datafeed-with-comp-aggs
+  - match: { datafeeds.0.datafeed_id: "old-cluster-datafeed-with-comp-aggs"}
+  - length: { datafeeds.0.indices: 1 }
+  - is_false: datafeeds.0.script_fields
 
+  - do:
+      ml.get_datafeed_stats:
+        datafeed_id: old-cluster-datafeed-with-comp-aggs
+  - match: { datafeeds.0.state: "stopped"}
+  - is_false: datafeeds.0.node
 ---
 "Put job and datafeed without aggs in mixed cluster":
   - do:
@@ -148,3 +161,76 @@
         datafeed_id: mixed-cluster-datafeed-with-aggs
   - match: { datafeeds.0.state: stopped}
   - is_false: datafeeds.0.node
+---
+"Put job and datafeed with composite aggs in mixed cluster":
+  - do:
+      ml.put_job:
+        job_id: mixed-cluster-datafeed-job-with-comp-aggs
+        body:  >
+          {
+            "description":"Cluster upgrade",
+            "analysis_config" : {
+                "bucket_span": "60s",
+                "summary_count_field_name": "doc_count",
+                "detectors" :[{"function":"count"}]
+            },
+            "analysis_limits" : {
+                "model_memory_limit": "50mb"
+            },
+            "data_description" : {
+                "format":"xcontent",
+                "time_field":"time"
+            }
+          }
+  - match: { job_id: mixed-cluster-datafeed-job-with-comp-aggs }
+
+  - do:
+      ml.put_datafeed:
+        datafeed_id: mixed-cluster-datafeed-with-comp-aggs
+        body:  >
+          {
+            "job_id":"mixed-cluster-datafeed-job-with-comp-aggs",
+            "indices":["airline-data"],
+            "aggregations": {
+              "buckets": {
+                "composite": {
+                  "size": 2000,
+                  "sources": [
+                    {
+                      "time_bucket": {
+                       "date_histogram": {
+                         "field": "time",
+                         "fixed_interval": "30s",
+                         "time_zone": "UTC"
+                       }
+                      }
+                    }
+                  ]
+                },
+                "aggregations": {
+                  "time": {
+                    "max": {"field": "time"}
+                  },
+                  "airline": {
+                    "terms": {
+                      "field": "airline",
+                      "size": 100
+                    },
+                    "aggregations": {
+                      "responsetime": {
+                        "avg": {
+                          "field": "responsetime"
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+
+  - do:
+      ml.get_datafeed_stats:
+        datafeed_id: mixed-cluster-datafeed-with-comp-aggs
+  - match: { datafeeds.0.state: stopped}
+  - is_false: datafeeds.0.node

+ 73 - 0
x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/old_cluster/40_ml_datafeed_crud.yml

@@ -259,3 +259,76 @@
         datafeed_id: old-cluster-datafeed-with-aggs
   - match: { datafeeds.0.state: stopped}
   - is_false: datafeeds.0.node
+---
+"Put job and datafeed with composite aggs":
+  - do:
+      ml.put_job:
+        job_id: old-cluster-datafeed-job-with-comp-aggs
+        body:  >
+          {
+            "description":"Cluster upgrade",
+            "analysis_config" : {
+                "bucket_span": "60s",
+                "summary_count_field_name": "doc_count",
+                "detectors" :[{"function":"count"}]
+            },
+            "analysis_limits" : {
+                "model_memory_limit": "50mb"
+            },
+            "data_description" : {
+                "format":"xcontent",
+                "time_field":"time"
+            }
+          }
+  - match: { job_id: old-cluster-datafeed-job-with-comp-aggs }
+
+  - do:
+      ml.put_datafeed:
+        datafeed_id: old-cluster-datafeed-with-comp-aggs
+        body:  >
+          {
+            "job_id":"old-cluster-datafeed-job-with-comp-aggs",
+            "indices":["airline-data"],
+            "aggregations": {
+              "buckets": {
+                "composite": {
+                  "size": 2000,
+                  "sources": [
+                    {
+                      "time_bucket": {
+                       "date_histogram": {
+                         "field": "time",
+                         "fixed_interval": "30s",
+                         "time_zone": "UTC"
+                       }
+                      }
+                    }
+                  ]
+                },
+                "aggregations": {
+                  "time": {
+                    "max": {"field": "time"}
+                  },
+                  "airline": {
+                    "terms": {
+                      "field": "airline",
+                      "size": 100
+                    },
+                    "aggregations": {
+                      "responsetime": {
+                        "avg": {
+                          "field": "responsetime"
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+
+  - do:
+      ml.get_datafeed_stats:
+        datafeed_id: old-cluster-datafeed-with-comp-aggs
+  - match: { datafeeds.0.state: stopped}
+  - is_false: datafeeds.0.node

+ 90 - 0
x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/upgraded_cluster/40_ml_datafeed_crud.yml

@@ -200,6 +200,96 @@ setup:
         job_id: mixed-cluster-datafeed-job-with-aggs
   - match: { acknowledged: true }
 
+  - do:
+      indices.delete:
+        index: airline-data
+---
+"Test old and mixed cluster datafeeds with composite aggs":
+  - do:
+      indices.create:
+        index: airline-data
+        body:
+          mappings:
+            properties:
+              time:
+                type: date
+
+  - do:
+      ml.get_datafeeds:
+        datafeed_id: old-cluster-datafeed-with-comp-aggs
+  - match: { datafeeds.0.datafeed_id: "old-cluster-datafeed-with-comp-aggs"}
+  - length: { datafeeds.0.indices: 1 }
+
+  - do:
+      ml.get_datafeed_stats:
+        datafeed_id: old-cluster-datafeed-with-comp-aggs
+  - match: { datafeeds.0.state: "stopped"}
+  - is_false: datafeeds.0.node
+
+  - do:
+      ml.get_datafeeds:
+        datafeed_id: mixed-cluster-datafeed-with-comp-aggs
+  - match: { datafeeds.0.datafeed_id: "mixed-cluster-datafeed-with-comp-aggs"}
+  - length: { datafeeds.0.indices: 1 }
+
+  - do:
+      ml.get_datafeed_stats:
+        datafeed_id: mixed-cluster-datafeed-with-comp-aggs
+  - match: { datafeeds.0.state: "stopped"}
+  - is_false: datafeeds.0.node
+
+  - do:
+      ml.open_job:
+        job_id: old-cluster-datafeed-job-with-comp-aggs
+
+  - do:
+      ml.start_datafeed:
+        datafeed_id: old-cluster-datafeed-with-comp-aggs
+        start: 0
+
+  - do:
+      ml.stop_datafeed:
+        datafeed_id: old-cluster-datafeed-with-comp-aggs
+
+  - do:
+      ml.close_job:
+        job_id: old-cluster-datafeed-job-with-comp-aggs
+
+  - do:
+      ml.delete_datafeed:
+        datafeed_id: old-cluster-datafeed-with-comp-aggs
+
+  - do:
+      ml.delete_job:
+        job_id: old-cluster-datafeed-job-with-comp-aggs
+  - match: { acknowledged: true }
+
+  - do:
+      ml.open_job:
+        job_id: mixed-cluster-datafeed-job-with-comp-aggs
+
+  - do:
+      ml.start_datafeed:
+        datafeed_id: mixed-cluster-datafeed-with-comp-aggs
+        start: 0
+
+  - do:
+      ml.stop_datafeed:
+        datafeed_id: mixed-cluster-datafeed-with-comp-aggs
+
+  - do:
+      ml.close_job:
+        job_id: mixed-cluster-datafeed-job-with-comp-aggs
+
+  - do:
+      ml.delete_datafeed:
+        datafeed_id: mixed-cluster-datafeed-with-comp-aggs
+
+  - do:
+      ml.delete_job:
+        job_id: mixed-cluster-datafeed-job-with-comp-aggs
+  - match: { acknowledged: true }
+
   - do:
       indices.delete:
         index: airline-data