Browse Source

[ML] Explain anomaly score factors (#90675)

This PR surfaces new information about the impact of the factors on the initial anomaly score in the anomaly record:

- single bucket impact is determined by the deviation between actual and typical in the current bucket
- multi-bucket impact is determined by the deviation between actual and typical in the past 12 buckets
- anomaly characteristics are statistical properties of the current anomaly compared to the historical observations
- high variance penalty is the reduction of anomaly score in the buckets with large confidence intervals.
- incomplete bucket penalty is the reduction of anomaly score in the buckets with fewer samples than historically expected.

Additionally, we compute lower- and upper-confidence bounds and the typical value for the anomaly records. This improves the explainability of the cases where the model plot is not activated with only a slight overhead in performance (1-2%).
Valeriy Khakhutskyy 3 years ago
parent
commit
95758e88a2

+ 5 - 0
docs/changelog/90675.yaml

@@ -0,0 +1,5 @@
+pr: 90675
+summary: Provide additional information about anomaly score factors
+area: Machine Learning
+type: enhancement
+issues: []

+ 42 - 2
docs/reference/ml/anomaly-detection/apis/get-record.asciidoc

@@ -15,7 +15,7 @@ Retrieves anomaly records for an {anomaly-job}.
 [[ml-get-record-prereqs]]
 == {api-prereq-title}
 
-Requires the `monitor_ml` cluster privilege. This privilege is included in the 
+Requires the `monitor_ml` cluster privilege. This privilege is included in the
 `machine_learning_user` built-in role.
 
 [[ml-get-record-desc]]
@@ -104,6 +104,46 @@ The API returns an array of record objects, which have the following properties:
 `actual`::
 (array) The actual value for the bucket.
 
+//Begin anomaly_score_explanation
+`anomaly_score_explanation`::
+(object) When present, it provides information about the factors impacting the
+initial anomaly score.
++
+.Properties of `anomaly_score_explanation`
+[%collapsible%open]
+====
+`anomaly_characteristics_impact`::::
+(Optional, integer) Impact of the statistical properties of the detected anomalous interval.
+
+`anomaly_length`::::
+(Optional, integer) Length of the detected anomaly in the number of buckets.
+
+`anomaly_type`::::
+(Optional, string) Type of the detected anomaly: spike or dip.
+
+`high_variance_penalty`::::
+(Optional, boolean) Indicates reduction of anomaly score for the bucket with large confidence intervals.
+
+`incomplete_bucket_penalty`::::
+(Optional, boolean) Indicates reduction of anomaly score if the bucket contains fewer samples than historically expected.
+
+`lower_confidence_bound`::::
+(Optional, double) Lower bound of the 95% confidence interval.
+
+`multi_bucket_impact`::::
+(Optional, integer) Impact of the deviation between actual and typical in the past 12 buckets."
+
+`single_bucket_impact`::::
+(Optional, integer) Impact of the deviation between actual and typical in the current bucket.
+
+`typical_value`::::
+(Optional, double) Typical (expected) value for this bucket.
+
+`upper_confidence_bound`::::
+(Optional, double) Upper bound of the 95% confidence interval.
+====
+//End anomaly_score_explanation
+
 `bucket_span`::
 (number)
 include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=bucket-span-results]
@@ -123,7 +163,7 @@ this field is not present. This sub-resource contains the most anomalous records
 for the `over_field_name`. For scalability reasons, a maximum of the 10 most
 significant causes of the anomaly are returned. As part of the core analytical modeling, these low-level anomaly records are aggregated for their parent over
 field record. The causes resource contains similar elements to the record
-resource, namely `actual`, `typical`, `geo_results.actual_point`, 
+resource, namely `actual`, `typical`, `geo_results.actual_point`,
 `geo_results.typical_point`, `*_field_name` and `*_field_value`. Probability and
 scores are not applicable to causes.
 

+ 29 - 2
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecord.java

@@ -6,6 +6,7 @@
  */
 package org.elasticsearch.xpack.core.ml.job.results;
 
+import org.elasticsearch.Version;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
@@ -36,7 +37,6 @@ import java.util.Objects;
  * can be returned if the members have not been set.
  */
 public class AnomalyRecord implements ToXContentObject, Writeable {
-
     /**
      * Result type
      */
@@ -58,6 +58,7 @@ public class AnomalyRecord implements ToXContentObject, Writeable {
     public static final ParseField INFLUENCERS = new ParseField("influencers");
     public static final ParseField BUCKET_SPAN = new ParseField("bucket_span");
     public static final ParseField GEO_RESULTS = new ParseField("geo_results");
+    public static final ParseField ANOMALY_SCORE_EXPLANATION = new ParseField("anomaly_score_explanation");
 
     // Used for QueryPage
     public static final ParseField RESULTS_FIELD = new ParseField("records");
@@ -134,6 +135,11 @@ public class AnomalyRecord implements ToXContentObject, Writeable {
             ignoreUnknownFields ? GeoResults.LENIENT_PARSER : GeoResults.STRICT_PARSER,
             GEO_RESULTS
         );
+        parser.declareObject(
+            AnomalyRecord::setAnomalyScoreExplanation,
+            ignoreUnknownFields ? AnomalyScoreExplanation.LENIENT_PARSER : AnomalyScoreExplanation.STRICT_PARSER,
+            ANOMALY_SCORE_EXPLANATION
+        );
 
         return parser;
     }
@@ -154,6 +160,7 @@ public class AnomalyRecord implements ToXContentObject, Writeable {
     private boolean isInterim;
     private GeoResults geoResults;
 
+    private AnomalyScoreExplanation anomalyScoreExplanation;
     private String fieldName;
 
     private String overFieldName;
@@ -180,6 +187,7 @@ public class AnomalyRecord implements ToXContentObject, Writeable {
         jobId = in.readString();
         detectorIndex = in.readInt();
         probability = in.readDouble();
+
         multiBucketImpact = in.readOptionalDouble();
         byFieldName = in.readOptionalString();
         byFieldValue = in.readOptionalString();
@@ -209,6 +217,9 @@ public class AnomalyRecord implements ToXContentObject, Writeable {
             influences = in.readList(Influence::new);
         }
         geoResults = in.readOptionalWriteable(GeoResults::new);
+        if (in.getVersion().onOrAfter(Version.V_8_6_0)) {
+            anomalyScoreExplanation = in.readOptionalWriteable(AnomalyScoreExplanation::new);
+        }
     }
 
     @Override
@@ -253,6 +264,9 @@ public class AnomalyRecord implements ToXContentObject, Writeable {
             out.writeList(influences);
         }
         out.writeOptionalWriteable(geoResults);
+        if (out.getVersion().onOrAfter(Version.V_8_6_0)) {
+            out.writeOptionalWriteable(anomalyScoreExplanation);
+        }
     }
 
     @Override
@@ -315,6 +329,9 @@ public class AnomalyRecord implements ToXContentObject, Writeable {
         if (geoResults != null) {
             builder.field(GEO_RESULTS.getPreferredName(), geoResults);
         }
+        if (anomalyScoreExplanation != null) {
+            builder.field(ANOMALY_SCORE_EXPLANATION.getPreferredName(), anomalyScoreExplanation);
+        }
 
         Map<String, LinkedHashSet<String>> inputFields = inputFieldMap();
         for (String inputFieldName : inputFields.keySet()) {
@@ -568,6 +585,14 @@ public class AnomalyRecord implements ToXContentObject, Writeable {
         this.geoResults = geoResults;
     }
 
+    public AnomalyScoreExplanation getAnomalyScoreExplanation() {
+        return anomalyScoreExplanation;
+    }
+
+    public void setAnomalyScoreExplanation(AnomalyScoreExplanation anomalyScoreExplanation) {
+        this.anomalyScoreExplanation = anomalyScoreExplanation;
+    }
+
     @Override
     public int hashCode() {
         return Objects.hash(
@@ -595,7 +620,8 @@ public class AnomalyRecord implements ToXContentObject, Writeable {
             causes,
             influences,
             jobId,
-            geoResults
+            geoResults,
+            anomalyScoreExplanation
         );
     }
 
@@ -634,6 +660,7 @@ public class AnomalyRecord implements ToXContentObject, Writeable {
             && Objects.equals(this.isInterim, that.isInterim)
             && Objects.equals(this.causes, that.causes)
             && Objects.equals(this.geoResults, that.geoResults)
+            && Objects.equals(this.anomalyScoreExplanation, that.anomalyScoreExplanation)
             && Objects.equals(this.influences, that.influences);
     }
 

+ 252 - 0
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyScoreExplanation.java

@@ -0,0 +1,252 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.core.ml.job.results;
+
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.xcontent.ObjectParser;
+import org.elasticsearch.xcontent.ParseField;
+import org.elasticsearch.xcontent.ToXContentObject;
+import org.elasticsearch.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.Objects;
+
+public class AnomalyScoreExplanation implements ToXContentObject, Writeable {
+    public static final ParseField ANOMALY_SCORE_EXPLANATION = new ParseField("anomaly_score_explanation");
+
+    public static final ParseField ANOMALY_TYPE = new ParseField("anomaly_type");
+    public static final ParseField ANOMALY_LENGTH = new ParseField("anomaly_length");
+    public static final ParseField SINGLE_BUCKET_IMPACT = new ParseField("single_bucket_impact");
+    public static final ParseField MULTI_BUCKET_IMPACT = new ParseField("multi_bucket_impact");
+    public static final ParseField ANOMALY_CHARACTERISTICS_IMPACT = new ParseField("anomaly_characteristics_impact");
+    public static final ParseField LOWER_CONFIDENCE_BOUND = new ParseField("lower_confidence_bound");
+    public static final ParseField TYPICAL_VALUE = new ParseField("typical_value");
+    public static final ParseField UPPER_CONFIDENCE_BOUND = new ParseField("upper_confidence_bound");
+    public static final ParseField HIGH_VARIANCE_PENALTY = new ParseField("high_variance_penalty");
+    public static final ParseField INCOMPLETE_BUCKET_PENALTY = new ParseField("incomplete_bucket_penalty");
+
+    public static final ObjectParser<AnomalyScoreExplanation, Void> STRICT_PARSER = createParser(false);
+    public static final ObjectParser<AnomalyScoreExplanation, Void> LENIENT_PARSER = createParser(true);
+
+    private static ObjectParser<AnomalyScoreExplanation, Void> createParser(Boolean ignoreUnknownFields) {
+        ObjectParser<AnomalyScoreExplanation, Void> parser = new ObjectParser<>(
+            ANOMALY_SCORE_EXPLANATION.getPreferredName(),
+            ignoreUnknownFields,
+            AnomalyScoreExplanation::new
+        );
+        parser.declareString(AnomalyScoreExplanation::setAnomalyType, ANOMALY_TYPE);
+        parser.declareInt(AnomalyScoreExplanation::setAnomalyLength, ANOMALY_LENGTH);
+        parser.declareInt(AnomalyScoreExplanation::setSingleBucketImpact, SINGLE_BUCKET_IMPACT);
+        parser.declareInt(AnomalyScoreExplanation::setMultiBucketImpact, MULTI_BUCKET_IMPACT);
+        parser.declareInt(AnomalyScoreExplanation::setAnomalyCharacteristicsImpact, ANOMALY_CHARACTERISTICS_IMPACT);
+        parser.declareDouble(AnomalyScoreExplanation::setLowerConfidenceBound, LOWER_CONFIDENCE_BOUND);
+        parser.declareDouble(AnomalyScoreExplanation::setTypicalValue, TYPICAL_VALUE);
+        parser.declareDouble(AnomalyScoreExplanation::setUpperConfidenceBound, UPPER_CONFIDENCE_BOUND);
+        parser.declareBoolean(AnomalyScoreExplanation::setHighVariancePenalty, HIGH_VARIANCE_PENALTY);
+        parser.declareBoolean(AnomalyScoreExplanation::setIncompleteBucketPenalty, INCOMPLETE_BUCKET_PENALTY);
+        return parser;
+    }
+
+    private String anomalyType;
+    private Integer anomalyLength;
+    private Integer singleBucketImpact;
+    private Integer multiBucketImpact;
+    private Integer anomalyCharacteristicsImpact;
+    private Double lowerConfidenceBound;
+    private Double typicalValue;
+    private Double upperConfidenceBound;
+    private Boolean highVariancePenalty;
+    private Boolean incompleteBucketPenalty;
+
+    AnomalyScoreExplanation() {}
+
+    public AnomalyScoreExplanation(StreamInput in) throws IOException {
+        this.anomalyType = in.readOptionalString();
+        this.anomalyLength = in.readOptionalInt();
+        this.singleBucketImpact = in.readOptionalInt();
+        this.multiBucketImpact = in.readOptionalInt();
+        this.anomalyCharacteristicsImpact = in.readOptionalInt();
+        this.lowerConfidenceBound = in.readOptionalDouble();
+        this.typicalValue = in.readOptionalDouble();
+        this.upperConfidenceBound = in.readOptionalDouble();
+        this.highVariancePenalty = in.readOptionalBoolean();
+        this.incompleteBucketPenalty = in.readOptionalBoolean();
+    }
+
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        out.writeOptionalString(anomalyType);
+        out.writeOptionalInt(anomalyLength);
+        out.writeOptionalInt(singleBucketImpact);
+        out.writeOptionalInt(multiBucketImpact);
+        out.writeOptionalInt(anomalyCharacteristicsImpact);
+        out.writeOptionalDouble(lowerConfidenceBound);
+        out.writeOptionalDouble(typicalValue);
+        out.writeOptionalDouble(upperConfidenceBound);
+        out.writeOptionalBoolean(highVariancePenalty);
+        out.writeOptionalBoolean(incompleteBucketPenalty);
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.startObject();
+        if (anomalyType != null) {
+            builder.field(ANOMALY_TYPE.getPreferredName(), anomalyType);
+        }
+        if (anomalyLength != null) {
+            builder.field(ANOMALY_LENGTH.getPreferredName(), anomalyLength);
+        }
+        if (singleBucketImpact != null) {
+            builder.field(SINGLE_BUCKET_IMPACT.getPreferredName(), singleBucketImpact);
+        }
+        if (multiBucketImpact != null) {
+            builder.field(MULTI_BUCKET_IMPACT.getPreferredName(), multiBucketImpact);
+        }
+        if (anomalyCharacteristicsImpact != null) {
+            builder.field(ANOMALY_CHARACTERISTICS_IMPACT.getPreferredName(), anomalyCharacteristicsImpact);
+        }
+        if (lowerConfidenceBound != null) {
+            builder.field(LOWER_CONFIDENCE_BOUND.getPreferredName(), lowerConfidenceBound);
+        }
+        if (typicalValue != null) {
+            builder.field(TYPICAL_VALUE.getPreferredName(), typicalValue);
+        }
+        if (upperConfidenceBound != null) {
+            builder.field(UPPER_CONFIDENCE_BOUND.getPreferredName(), upperConfidenceBound);
+        }
+        if (highVariancePenalty != null) {
+            builder.field(HIGH_VARIANCE_PENALTY.getPreferredName(), highVariancePenalty);
+        }
+        if (incompleteBucketPenalty != null) {
+            builder.field(INCOMPLETE_BUCKET_PENALTY.getPreferredName(), incompleteBucketPenalty);
+        }
+        builder.endObject();
+        return builder;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(
+            anomalyType,
+            anomalyLength,
+            singleBucketImpact,
+            multiBucketImpact,
+            anomalyCharacteristicsImpact,
+            lowerConfidenceBound,
+            typicalValue,
+            upperConfidenceBound,
+            highVariancePenalty,
+            incompleteBucketPenalty
+        );
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (this == other) {
+            return true;
+        }
+
+        if (other == null || getClass() != other.getClass()) {
+            return false;
+        }
+        AnomalyScoreExplanation that = (AnomalyScoreExplanation) other;
+        return Objects.equals(this.anomalyType, that.anomalyType)
+            && Objects.equals(this.anomalyLength, that.anomalyLength)
+            && Objects.equals(this.singleBucketImpact, that.singleBucketImpact)
+            && Objects.equals(this.multiBucketImpact, that.multiBucketImpact)
+            && Objects.equals(this.anomalyCharacteristicsImpact, that.anomalyCharacteristicsImpact)
+            && Objects.equals(this.lowerConfidenceBound, that.lowerConfidenceBound)
+            && Objects.equals(this.typicalValue, that.typicalValue)
+            && Objects.equals(this.upperConfidenceBound, that.upperConfidenceBound)
+            && Objects.equals(this.highVariancePenalty, that.highVariancePenalty)
+            && Objects.equals(this.incompleteBucketPenalty, that.incompleteBucketPenalty);
+    }
+
+    public String getAnomalyType() {
+        return anomalyType;
+    }
+
+    public void setAnomalyType(String anomalyType) {
+        this.anomalyType = anomalyType;
+    }
+
+    public Integer getAnomalyLength() {
+        return anomalyLength;
+    }
+
+    public void setAnomalyLength(Integer anomalyLength) {
+        this.anomalyLength = anomalyLength;
+    }
+
+    public Integer getSingleBucketImpact() {
+        return singleBucketImpact;
+    }
+
+    public void setSingleBucketImpact(Integer singleBucketImpact) {
+        this.singleBucketImpact = singleBucketImpact;
+    }
+
+    public Integer getMultiBucketImpact() {
+        return multiBucketImpact;
+    }
+
+    public void setMultiBucketImpact(Integer multiBucketImpact) {
+        this.multiBucketImpact = multiBucketImpact;
+    }
+
+    public Integer getAnomalyCharacteristicsImpact() {
+        return anomalyCharacteristicsImpact;
+    }
+
+    public void setAnomalyCharacteristicsImpact(Integer anomalyCharacteristicsImpact) {
+        this.anomalyCharacteristicsImpact = anomalyCharacteristicsImpact;
+    }
+
+    public Double getLowerConfidenceBound() {
+        return lowerConfidenceBound;
+    }
+
+    public void setLowerConfidenceBound(Double lowerConfidenceBound) {
+        this.lowerConfidenceBound = lowerConfidenceBound;
+    }
+
+    public Double getTypicalValue() {
+        return typicalValue;
+    }
+
+    public void setTypicalValue(Double typicalValue) {
+        this.typicalValue = typicalValue;
+    }
+
+    public Double getUpperConfidenceBound() {
+        return upperConfidenceBound;
+    }
+
+    public void setUpperConfidenceBound(Double upperConfidenceBound) {
+        this.upperConfidenceBound = upperConfidenceBound;
+    }
+
+    public Boolean isHighVariancePenalty() {
+        return highVariancePenalty;
+    }
+
+    public void setHighVariancePenalty(Boolean highVariancePenalty) {
+        this.highVariancePenalty = highVariancePenalty;
+    }
+
+    public Boolean isIncompleteBucketPenalty() {
+        return incompleteBucketPenalty;
+    }
+
+    public void setIncompleteBucketPenalty(Boolean incompleteBucketPenalty) {
+        this.incompleteBucketPenalty = incompleteBucketPenalty;
+    }
+
+}

+ 12 - 0
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/ReservedFieldNames.java

@@ -75,6 +75,7 @@ public final class ReservedFieldNames {
         AnomalyRecord.TYPICAL.getPreferredName(),
         AnomalyRecord.ACTUAL.getPreferredName(),
         AnomalyRecord.GEO_RESULTS.getPreferredName(),
+        AnomalyRecord.ANOMALY_SCORE_EXPLANATION.getPreferredName(),
         AnomalyRecord.INFLUENCERS.getPreferredName(),
         AnomalyRecord.FIELD_NAME.getPreferredName(),
         AnomalyRecord.OVER_FIELD_NAME.getPreferredName(),
@@ -84,6 +85,17 @@ public final class ReservedFieldNames {
         AnomalyRecord.INITIAL_RECORD_SCORE.getPreferredName(),
         AnomalyRecord.BUCKET_SPAN.getPreferredName(),
 
+        AnomalyScoreExplanation.ANOMALY_TYPE.getPreferredName(),
+        AnomalyScoreExplanation.ANOMALY_LENGTH.getPreferredName(),
+        AnomalyScoreExplanation.SINGLE_BUCKET_IMPACT.getPreferredName(),
+        AnomalyScoreExplanation.MULTI_BUCKET_IMPACT.getPreferredName(),
+        AnomalyScoreExplanation.ANOMALY_CHARACTERISTICS_IMPACT.getPreferredName(),
+        AnomalyScoreExplanation.LOWER_CONFIDENCE_BOUND.getPreferredName(),
+        AnomalyScoreExplanation.TYPICAL_VALUE.getPreferredName(),
+        AnomalyScoreExplanation.UPPER_CONFIDENCE_BOUND.getPreferredName(),
+        AnomalyScoreExplanation.HIGH_VARIANCE_PENALTY.getPreferredName(),
+        AnomalyScoreExplanation.INCOMPLETE_BUCKET_PENALTY.getPreferredName(),
+
         GeoResults.TYPICAL_POINT.getPreferredName(),
         GeoResults.ACTUAL_POINT.getPreferredName(),
 

+ 34 - 0
x-pack/plugin/core/src/main/resources/org/elasticsearch/xpack/core/ml/anomalydetection/results_index_mappings.json

@@ -240,6 +240,40 @@
         }
       }
     },
+    "anomaly_score_explanation": {
+      "properties": {
+        "anomaly_type": {
+          "type": "keyword"
+        },
+        "anomaly_length": {
+          "type": "integer"
+        },
+        "single_bucket_impact": {
+          "type": "integer"
+        },
+        "multi_bucket_impact": {
+          "type": "integer"
+        },
+        "anomaly_characteristics_impact": {
+          "type": "integer"
+        },
+        "lower_confidence_bound": {
+          "type": "double"
+        },
+        "typical_value": {
+          "type": "double"
+        },
+        "upper_confidence_bound": {
+          "type": "double"
+        },
+        "high_variance_penalty": {
+          "type": "boolean"
+        },
+        "incomplete_bucket_penalty": {
+          "type": "boolean"
+        }
+      }
+    },
     "influencer_field_name" : {
       "type" : "keyword"
     },

+ 12 - 0
x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecordTests.java

@@ -89,6 +89,18 @@ public class AnomalyRecordTests extends AbstractXContentSerializingTestCase<Anom
             }
             anomalyRecord.setCauses(causes);
         }
+        if (randomBoolean()) {
+            AnomalyScoreExplanation anomalyScoreExplanation = new AnomalyScoreExplanation();
+            anomalyScoreExplanation.setAnomalyType(randomAlphaOfLength(12));
+            anomalyScoreExplanation.setAnomalyLength(randomInt());
+            anomalyScoreExplanation.setSingleBucketImpact(randomInt());
+            anomalyScoreExplanation.setMultiBucketImpact(randomInt());
+            anomalyScoreExplanation.setLowerConfidenceBound(randomDouble());
+            anomalyScoreExplanation.setTypicalValue(randomDouble());
+            anomalyScoreExplanation.setUpperConfidenceBound(randomDouble());
+            anomalyScoreExplanation.setHighVariancePenalty(randomBoolean());
+            anomalyScoreExplanation.setIncompleteBucketPenalty(randomBoolean());
+        }
 
         return anomalyRecord;
     }