Prechádzať zdrojové kódy

[ML] fixing datafeed preview after allowing datafeed_config in job_config (#75625)

Since PR: https://github.com/elastic/elasticsearch/pull/74265

It is valid to supply a `datafeed_config` inside of a job config. But, the `_preview` API allows BOTH
a `datafeed_config` and `job_config` to be top-level objects. This can cause confusion and allowing
both a top level `datafeed_config` and an internal `job_config.datafeed_config` is unsupported 
behavior.

This commit protects against these weird scenarios and also strictly allows only a `job_config` with
a nested `datafeed_config` to be provided.

Consequently, on preview, the valid combinations are:

- Datafeed config ID only
- top level datafeed_config and top level job_config (without a nested datafeed_config)
- top level job_config with a nested datafeed_config

everything else is considered invalid
Benjamin Trent 4 rokov pred
rodič
commit
cc25792390

+ 13 - 2
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PreviewDatafeedAction.java

@@ -178,8 +178,19 @@ public class PreviewDatafeedAction extends ActionType<PreviewDatafeedAction.Resp
                 }
                 if (jobBuilder != null) {
                     jobBuilder.setId("preview_job_id");
-                    if (datafeedBuilder == null) {
-                        throw new IllegalArgumentException("[datafeed_config] must be present when a [job_config] is provided");
+                    if (datafeedBuilder == null && jobBuilder.getDatafeedConfig() == null) {
+                        throw new IllegalArgumentException(
+                            "[datafeed_config] must be present when a [job_config.datafeed_config] is not present"
+                        );
+                    }
+                    if (datafeedBuilder != null && jobBuilder.getDatafeedConfig() != null) {
+                        throw new IllegalArgumentException(
+                            "[datafeed_config] must not be present when a [job_config.datafeed_config] is present"
+                        );
+                    }
+                    // If the datafeed_config has been provided via the jobBuilder, set it here for easier serialization and use
+                    if (jobBuilder.getDatafeedConfig() != null) {
+                        datafeedBuilder = jobBuilder.getDatafeedConfig().setJobId(jobBuilder.getId()).setId(jobBuilder.getId());
                     }
                 }
                 if (datafeedId != null && (datafeedBuilder != null || jobBuilder != null)) {

+ 4 - 0
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/Job.java

@@ -928,6 +928,10 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContentO
             return this;
         }
 
+        public DatafeedConfig.Builder getDatafeedConfig() {
+            return datafeedConfig;
+        }
+
         /**
          * This is used for parsing. If the datafeed_config exists AND its indices options are `null`, we set them to these options
          *

+ 39 - 6
x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/PreviewDatafeedActionRequestTests.java

@@ -14,6 +14,7 @@ import org.elasticsearch.search.SearchModule;
 import org.elasticsearch.test.AbstractWireSerializingTestCase;
 import org.elasticsearch.xpack.core.ml.action.PreviewDatafeedAction.Request;
 import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
+import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfigBuilderTests;
 import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfigTests;
 import org.elasticsearch.xpack.core.ml.job.config.JobTests;
 
@@ -27,12 +28,25 @@ public class PreviewDatafeedActionRequestTests extends AbstractWireSerializingTe
     @Override
     protected Request createTestInstance() {
         String jobId = randomAlphaOfLength(10);
-        return randomBoolean() ?
-            new Request(randomAlphaOfLength(10)) :
-            new Request(
-                DatafeedConfigTests.createRandomizedDatafeedConfig(jobId),
-                randomBoolean() ? JobTests.buildJobBuilder(jobId) : null
-            );
+        switch (randomInt(2)) {
+            case 0:
+                return new Request(randomAlphaOfLength(10));
+            case 1:
+                return new Request(
+                    DatafeedConfigTests.createRandomizedDatafeedConfig(jobId),
+                    randomBoolean() ? JobTests.buildJobBuilder(jobId) : null
+                );
+            case 2:
+                return new Request.Builder()
+                    .setJobBuilder(
+                        JobTests.buildJobBuilder(jobId).setDatafeed(DatafeedConfigBuilderTests.createRandomizedDatafeedConfigBuilder(
+                            null,
+                            null,
+                            3600000
+                ))).build();
+            default:
+                throw new IllegalArgumentException("Unexpected test state");
+        }
     }
 
     @Override
@@ -62,6 +76,25 @@ public class PreviewDatafeedActionRequestTests extends AbstractWireSerializingTe
         ex = expectThrows(IllegalArgumentException.class, requestBuilder::build);
         assertThat(ex.getMessage(),
             containsString("[datafeed_config.job_id] must be set or a [job_config] must be provided"));
+
+        requestBuilder
+            .setJobBuilder(
+                JobTests.buildJobBuilder(jobId)
+                    .setDatafeed(new DatafeedConfig.Builder(DatafeedConfigTests.createRandomizedDatafeedConfig(jobId)))
+            )
+            .setDatafeedId(null)
+            .setDatafeedBuilder(new DatafeedConfig.Builder());
+        ex = expectThrows(IllegalArgumentException.class, requestBuilder::build);
+        assertThat(ex.getMessage(),
+            containsString("[datafeed_config] must not be present when a [job_config.datafeed_config] is present"));
+
+        requestBuilder
+            .setJobBuilder(JobTests.buildJobBuilder(jobId))
+            .setDatafeedId(null)
+            .setDatafeedBuilder(null);
+        ex = expectThrows(IllegalArgumentException.class, requestBuilder::build);
+        assertThat(ex.getMessage(),
+            containsString("[datafeed_config] must be present when a [job_config.datafeed_config] is not present"));
     }
 
     @Override

+ 1 - 1
x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/datafeed/DatafeedConfigBuilderTests.java

@@ -34,7 +34,7 @@ import static org.elasticsearch.xpack.core.ml.utils.QueryProviderTests.createRan
 
 public class DatafeedConfigBuilderTests extends AbstractWireSerializingTestCase<DatafeedConfig.Builder> {
 
-    static DatafeedConfig.Builder createRandomizedDatafeedConfigBuilder(String jobId, String datafeedId, long bucketSpanMillis) {
+    public static DatafeedConfig.Builder createRandomizedDatafeedConfigBuilder(String jobId, String datafeedId, long bucketSpanMillis) {
         DatafeedConfig.Builder builder = new DatafeedConfig.Builder();
         if (jobId != null) {
             builder.setJobId(jobId);

+ 1 - 0
x-pack/plugin/ml/qa/ml-with-security/build.gradle

@@ -196,6 +196,7 @@ tasks.named("yamlRestTest").configure {
     'ml/preview_datafeed/Test preview missing datafeed',
     'ml/preview_datafeed/Test preview with datafeed_id and job config',
     'ml/preview_datafeed/Test preview with datafeed id and config',
+    'ml/preview_datafeed/Test preview with datafeed config and job config with datafeed config',
     'ml/reset_job/Test reset given job is open',
     'ml/reset_job/Test reset given unknown job id',
     'ml/revert_model_snapshot/Test revert model with invalid snapshotId',

+ 114 - 0
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/preview_datafeed.yml

@@ -182,6 +182,37 @@ setup:
   - match: { 3.time: 1487379660000 }
   - match: { 3.airline: foo }
   - match: { 3.responsetime: 42.0 }
+
+  - do:
+      ml.preview_datafeed:
+        body: >
+          {
+            "job_config": {
+              "analysis_config": {
+                "bucket_span": "1h",
+                "detectors": [{"function":"sum","field_name":"responsetime","by_field_name":"airline"}]
+              },
+              "data_description": {
+                "time_field":"time"
+              },
+              "datafeed_config": {
+                "indexes":"airline-data"
+              }
+            }
+          }
+  - length: { $body: 4 }
+  - match: { 0.time: 1487376000000 }
+  - match: { 0.airline: foo }
+  - match: { 0.responsetime: 1.0 }
+  - match: { 1.time: 1487377800000 }
+  - match: { 1.airline: foo }
+  - match: { 1.responsetime: 1.0 }
+  - match: { 2.time: 1487379600000 }
+  - match: { 2.airline: bar }
+  - match: { 2.responsetime: 42.0 }
+  - match: { 3.time: 1487379660000 }
+  - match: { 3.airline: foo }
+  - match: { 3.responsetime: 42.0 }
 ---
 "Test preview aggregation datafeed with doc_count":
 
@@ -313,6 +344,65 @@ setup:
   - match: { 2.responsetime: 42.0 }
   - match: { 2.doc_count: 1 }
 
+  - do:
+      ml.preview_datafeed:
+        body: >
+          {
+            "job_config": {
+              "analysis_config" : {
+                  "bucket_span": "1h",
+                  "summary_count_field_name": "doc_count",
+                  "detectors" :[{"function":"sum","field_name":"responsetime","by_field_name":"airline"}]
+              },
+              "data_description" : {
+                  "time_field":"time"
+              },
+              "datafeed_config": {
+                "indexes":"airline-data",
+                "aggregations": {
+                  "buckets": {
+                    "histogram": {
+                      "field": "time",
+                      "interval": 3600000
+                    },
+                    "aggregations": {
+                      "time": {
+                        "max": {
+                          "field": "time"
+                        }
+                      },
+                      "airline": {
+                        "terms": {
+                          "field": "airline",
+                          "size": 100
+                        },
+                        "aggregations": {
+                          "responsetime": {
+                            "sum": {
+                               "field": "responsetime"
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+  - length: { $body: 3 }
+  - match: { 0.time: 1487377800000 }
+  - match: { 0.airline: foo }
+  - match: { 0.responsetime: 2.0 }
+  - match: { 0.doc_count: 2 }
+  - match: { 1.time: 1487379660000 }
+  - match: { 1.airline: bar }
+  - match: { 1.responsetime: 42.0 }
+  - match: { 1.doc_count: 1 }
+  - match: { 1.time: 1487379660000 }
+  - match: { 2.airline: foo }
+  - match: { 2.responsetime: 42.0 }
+  - match: { 2.doc_count: 1 }
 ---
 "Test preview single metric aggregation datafeed with different summary count field":
 
@@ -496,6 +586,30 @@ setup:
              }
           }
 ---
+"Test preview with datafeed config and job config with datafeed config":
+
+  - do:
+      catch: bad_request
+      ml.preview_datafeed:
+        body: >
+          {
+            "datafeed_config": {
+              "indexes":"airline-data"
+            },
+            "job_config": {
+              "analysis_config" : {
+                "bucket_span": "1h",
+                "detectors" :[{"function":"sum","field_name":"responsetime","by_field_name":"airline"}]
+              },
+              "data_description" : {
+                "time_field":"time"
+              },
+              "datafeed_config": {
+                "indexes":"airline-data"
+              }
+            }
+          }
+---
 "Test preview datafeed with unavailable index":
 
   - do: