Jelajahi Sumber

[ML] Warn when creating job with an unusual bucket span (#82145)

Emit deprecation warning when creating new jobs with bucket spans that
aren't an integral divisor or multiple of a day.

Relates #81645

Co-authored-by: lcawl <lcawley@elastic.co>
Ed Savage 3 tahun lalu
induk
melakukan
e8a46649c5

+ 8 - 4
docs/reference/ml/ml-shared.asciidoc

@@ -169,10 +169,14 @@ end::bucket-count-anomaly-jobs[]
 
 tag::bucket-span[]
 The size of the interval that the analysis is aggregated into, typically between
-`5m` and `1h`. The default value is `5m`. If the {anomaly-job} uses a {dfeed}
-with {ml-docs}/ml-configuring-aggregation.html[aggregations], this value must be
-divisible by the interval of the date histogram aggregation. For more
-information, see {ml-docs}/ml-ad-finding-anomalies.html#ml-ad-bucket-span[Bucket span].
+`5m` and `1h`. This value should be either a whole number of days or equate to a
+whole number of buckets in one day;
+deprecated:[8.1, Values that do not meet these recommendations are deprecated and will be disallowed in a future version].
+If the {anomaly-job} uses a {dfeed} with
+{ml-docs}/ml-configuring-aggregation.html[aggregations], this value must also be
+divisible by the interval of the date histogram aggregation. The default value
+is `5m`. For more information, see
+{ml-docs}/ml-ad-finding-anomalies.html#ml-ad-bucket-span[Bucket span].
 end::bucket-span[]
 
 tag::bucket-span-results[]

+ 1 - 0
x-pack/plugin/build.gradle

@@ -117,6 +117,7 @@ tasks.named("yamlRestTestV7CompatTransform").configure{ task ->
   task.replaceValueInMatch("_type", "_doc")
   task.addAllowedWarningRegex("\\[types removal\\].*")
   task.addAllowedWarningRegexForTest("Including \\[accept_enterprise\\] in get license.*", "Installing enterprise license")
+  task.addAllowedWarningRegex("bucket_span .* is not an integral .* of the number of seconds in 1d.* This is now deprecated.*")
 
   task.replaceValueTextByKeyValue("catch",
     'bad_request',

+ 30 - 1
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/Job.java

@@ -14,6 +14,8 @@ import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.logging.DeprecationCategory;
+import org.elasticsearch.common.logging.DeprecationLogger;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.core.TimeValue;
@@ -62,6 +64,8 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContentO
 
     public static final String ANOMALY_DETECTOR_JOB_TYPE = "anomaly_detector";
 
+    private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(Job.class);
+
     /*
      * Field names used in serialization
      */
@@ -1207,6 +1211,32 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContentO
                 && modelSnapshotRetentionDays > DEFAULT_DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS) {
                 dailyModelSnapshotRetentionAfterDays = DEFAULT_DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS;
             }
+
+            final long SECONDS_IN_A_DAY = 86400;
+            if (analysisConfig.getBucketSpan().seconds() > SECONDS_IN_A_DAY) {
+                if (analysisConfig.getBucketSpan().seconds() % SECONDS_IN_A_DAY != 0) {
+                    deprecationLogger.critical(
+                        DeprecationCategory.OTHER,
+                        "bucket_span",
+                        "bucket_span {} [{}s] is not an integral multiple of the number of seconds in 1d [{}s]. This is now deprecated.",
+                        analysisConfig.getBucketSpan().toString(),
+                        analysisConfig.getBucketSpan().seconds(),
+                        SECONDS_IN_A_DAY
+                    );
+                }
+            } else {
+                if (SECONDS_IN_A_DAY % analysisConfig.getBucketSpan().seconds() != 0) {
+                    deprecationLogger.critical(
+                        DeprecationCategory.OTHER,
+                        "bucket_span",
+                        "bucket_span {} [{}s] is not an integral divisor of the number of seconds in 1d [{}s]. This is now deprecated.",
+                        analysisConfig.getBucketSpan().toString(),
+                        analysisConfig.getBucketSpan().seconds(),
+                        SECONDS_IN_A_DAY
+                    );
+                }
+            }
+
             if (analysisConfig.getModelPruneWindow() == null) {
                 long modelPruneWindowSeconds = analysisConfig.getBucketSpan().seconds() / 2 + AnalysisConfig.DEFAULT_MODEL_PRUNE_WINDOW
                     .seconds();
@@ -1217,7 +1247,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContentO
                 modelPruneWindowSeconds = Math.max(20 * analysisConfig.getBucketSpan().seconds(), modelPruneWindowSeconds);
 
                 AnalysisConfig.Builder analysisConfigBuilder = new AnalysisConfig.Builder(analysisConfig);
-                final long SECONDS_IN_A_DAY = 86400;
                 final long SECONDS_IN_AN_HOUR = 3600;
                 final long SECONDS_IN_A_MINUTE = 60;
                 if (modelPruneWindowSeconds % SECONDS_IN_A_DAY == 0) {

+ 58 - 0
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/jobs_crud.yml

@@ -38,6 +38,56 @@
         job_id: "missing-*"
         allow_no_match: false
 
+---
+"Test put job deprecated bucket span":
+  - skip:
+      features:
+        - "warnings"
+
+  - do:
+      warnings:
+        - 'bucket_span 1.5d [129600s] is not an integral multiple of the number of seconds in 1d [86400s]. This is now deprecated.'
+      ml.put_job:
+        job_id: job-deprecated-bucket-span-1.5d
+        body:  >
+          {
+            "description":"Analysis of response time by airline",
+            "analysis_config" : {
+                "bucket_span": "36h",
+                "detectors" :[{"function":"metric","field_name":"responsetime","by_field_name":"airline"}]
+            },
+            "data_description" : {
+                "time_field":"time",
+                "time_format":"yyyy-MM-dd HH:mm:ssX"
+            }
+          }
+  - match: { job_id: "job-deprecated-bucket-span-1.5d" }
+  - match: { analysis_config.bucket_span: "36h" }
+  - match: { analysis_limits.model_memory_limit: "1024mb" }
+  - match: { analysis_limits.categorization_examples_limit: 4 }
+
+  - do:
+      warnings:
+        - 'bucket_span 19m [1140s] is not an integral divisor of the number of seconds in 1d [86400s]. This is now deprecated.'
+      ml.put_job:
+        job_id: job-deprecated-bucket-span-19m
+        body:  >
+          {
+            "description":"Analysis of response time by airline",
+            "analysis_config" : {
+                "bucket_span": "19m",
+                "detectors" :[{"function":"metric","field_name":"responsetime","by_field_name":"airline"}]
+            },
+            "data_description" : {
+                "time_field":"time",
+                "time_format":"yyyy-MM-dd HH:mm:ssX"
+            }
+          }
+  - match: { job_id: "job-deprecated-bucket-span-19m" }
+  - match: { analysis_config.bucket_span: "19m" }
+  - match: { analysis_limits.model_memory_limit: "1024mb" }
+  - match: { analysis_limits.categorization_examples_limit: 4 }
+
 ---
 "Test job crud apis":
 
@@ -96,6 +146,10 @@
 
 ---
 "Test put job with model_prune_window":
+  - skip:
+      features:
+        - "warnings"
+
   - do:
       ml.put_job:
         job_id: job-model-prune-window
@@ -132,6 +186,8 @@
   - is_true: create_time
 
   - do:
+      warnings:
+        - 'bucket_span 1.6d [144000s] is not an integral multiple of the number of seconds in 1d [86400s]. This is now deprecated.'
       ml.put_job:
         job_id: job-default-model-prune-window-40h
         body: >
@@ -183,6 +239,8 @@
 # is the closest multiple of the bucket span greater than or equal to 30
 # days (43200m)
   - do:
+      warnings:
+        - 'bucket_span 17m [1020s] is not an integral divisor of the number of seconds in 1d [86400s]. This is now deprecated.'
       ml.put_job:
         job_id: job-default-model-prune-window_with_odd_bucket_span
         body: >

+ 6 - 1
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/jobs_get_result_overall_buckets.yml

@@ -1,6 +1,9 @@
 setup:
   - skip:
-      features: headers
+      features:
+        - "warnings"
+        - "headers"
+
   - do:
       headers:
         Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser
@@ -36,6 +39,8 @@ setup:
           }
 
   - do:
+      warnings:
+        - 'bucket_span 17m [1020s] is not an integral divisor of the number of seconds in 1d [86400s]. This is now deprecated.'
       headers:
         Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser
       ml.put_job:

+ 1 - 0
x-pack/qa/xpack-prefix-rest-compat/build.gradle

@@ -169,6 +169,7 @@ tasks.named("yamlRestTestV7CompatTransform").configure{ task ->
   task.replaceKeyInDo("ml.validate", "xpack-ml.validate")
   task.replaceKeyInDo("ml.validate_detector", "xpack-ml.validate_detector")
   task.addAllowedWarningRegex(".*_xpack/ml.* is deprecated.*")
+  task.addAllowedWarningRegex("bucket_span .* is not an integral .* of the number of sconds in 1d.* This is now deprecated.*")
 
   task.replaceKeyInDo("ssl.certificates", "xpack-ssl.certificates", "Test get SSL certificates")
   task.addAllowedWarningRegexForTest(".*_xpack/ssl.* is deprecated.*", "Test get SSL certificates")