Browse Source

[ML] adding result_type and mlcategory fields to category definitions (#63326)

To easy correlation between anomaly results and category definitions, this commit adds a new keyword mapped field `mlcategory`.

This field is always the same as the `category_id` field (which is mapped as a long). But since anomaly results store
the `mlcategory` as a keyword, it simplifies queries if category_definitions also had this field as a keyword.

The stored JSON is a `string`.

Additionally, this commit adds a `result_type: category_definition` entry to category definition documents.

This will help simplify and unify result queries in the future.

closes https://github.com/elastic/elasticsearch/issues/60108
Benjamin Trent 5 years ago
parent
commit
0fe9787929

+ 7 - 0
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/CategoryDefinition.java

@@ -40,6 +40,7 @@ public class CategoryDefinition implements ToXContentObject, Writeable {
     public static final ParseField GROK_PATTERN = new ParseField("grok_pattern");
     public static final ParseField NUM_MATCHES = new ParseField("num_matches");
     public static final ParseField PREFERRED_TO_CATEGORIES = new ParseField("preferred_to_categories");
+    public static final ParseField MLCATEGORY = new ParseField("mlcategory");
 
     // Used for QueryPage
     public static final ParseField RESULTS_FIELD = new ParseField("categories");
@@ -62,6 +63,8 @@ public class CategoryDefinition implements ToXContentObject, Writeable {
         parser.declareString(CategoryDefinition::setGrokPattern, GROK_PATTERN);
         parser.declareLongArray(CategoryDefinition::setPreferredToCategories, PREFERRED_TO_CATEGORIES);
         parser.declareLong(CategoryDefinition::setNumMatches, NUM_MATCHES);
+        parser.declareString((cd, rt) -> { /*Ignore as it is always category_definition*/ }, Result.RESULT_TYPE);
+        parser.declareString((cd, mc) -> { /*Ignore as it is always equal to category_id*/ }, MLCATEGORY);
         return parser;
     }
 
@@ -246,6 +249,10 @@ public class CategoryDefinition implements ToXContentObject, Writeable {
         if (partitionFieldName != null && partitionFieldValue != null && ReservedFieldNames.isValidFieldName(partitionFieldName)) {
             builder.field(partitionFieldName, partitionFieldValue);
         }
+        // Even though category_definitions now have a result type, queries need for category definition values
+        // still need to be done by looking for the category_id field. At least until 9.x
+        builder.field(Result.RESULT_TYPE.getPreferredName(), TYPE.getPreferredName());
+        builder.field(MLCATEGORY.getPreferredName(), String.valueOf(categoryId));
 
         builder.endObject();
         return builder;

+ 3 - 0
x-pack/plugin/core/src/main/resources/org/elasticsearch/xpack/core/ml/anomalydetection/results_index_mappings.json

@@ -333,6 +333,9 @@
       "missing_field_count" : {
         "type" : "long"
       },
+      "mlcategory": {
+        "type": "keyword"
+      },
       "model_bytes" : {
         "type" : "long"
       },

+ 3 - 0
x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/persistence/ElasticsearchMappingsTests.java

@@ -95,6 +95,9 @@ public class ElasticsearchMappingsTests extends ESTestCase {
         overridden.add(Quantiles.TYPE.getPreferredName());
         overridden.add(TimingStats.TYPE.getPreferredName());
         overridden.add(DatafeedTimingStats.TYPE.getPreferredName());
+        // This is a special case so that categorical job results can be paired easily with anomaly results
+        // This is acceptable as both mappings are keyword for the results documents and for category definitions
+        overridden.add(CategoryDefinition.MLCATEGORY.getPreferredName());
 
         Set<String> expected = collectResultsDocFieldNames();
         expected.removeAll(overridden);

+ 2 - 0
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobResultsProvider.java

@@ -825,6 +825,8 @@ public class JobResultsProvider {
         if (categoryId != null) {
             categoryIdQuery = QueryBuilders.termQuery(CategoryDefinition.CATEGORY_ID.getPreferredName(), categoryId);
         } else if (from != null && size != null) {
+            // Note: Even though category definitions currently have a result_type field, this was not the case for older versions
+            // So, until at least 9.x, this existsQuery is still the preferred way to gather category definition objects
             categoryIdQuery = QueryBuilders.existsQuery(CategoryDefinition.CATEGORY_ID.getPreferredName());
             sourceBuilder.from(from).size(size)
                     .sort(new FieldSortBuilder(CategoryDefinition.CATEGORY_ID.getPreferredName()).order(SortOrder.ASC));