Browse Source

[ML][Data Frame] adding dest.index and id validations (#43053)

* [ML][Data Frame] adding dest.index and id validations

* adjusting message format

* Adjusting id validity pattern

* Update DataFrameStrings.java
Benjamin Trent 6 years ago
parent
commit
c68ea83efb

+ 4 - 0
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/dataframe/DataFrameMessages.java

@@ -65,6 +65,10 @@ public class DataFrameMessages {
     public static final String FAILED_TO_PARSE_TRANSFORM_CHECKPOINTS =
             "Failed to parse transform checkpoints for [{0}]";
 
+
+    public static final String ID_TOO_LONG = "The id cannot contain more than {0} characters.";
+    public static final String INVALID_ID = "Invalid {0}; ''{1}'' can contain lowercase alphanumeric (a-z and 0-9), hyphens or " +
+        "underscores; must start and end with alphanumeric";
     private DataFrameMessages() {
     }
 

+ 25 - 0
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/dataframe/action/PutDataFrameTransformAction.java

@@ -15,12 +15,18 @@ import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.xcontent.ToXContentObject;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.indices.InvalidIndexNameException;
+import org.elasticsearch.xpack.core.dataframe.DataFrameField;
 import org.elasticsearch.xpack.core.dataframe.transforms.DataFrameTransformConfig;
+import org.elasticsearch.xpack.core.dataframe.utils.DataFrameStrings;
+import org.elasticsearch.xpack.core.dataframe.DataFrameMessages;
 
 import java.io.IOException;
+import java.util.Locale;
 import java.util.Objects;
 
 import static org.elasticsearch.action.ValidateActions.addValidationError;
+import static org.elasticsearch.cluster.metadata.MetaDataCreateIndexService.validateIndexOrAliasName;
 
 public class PutDataFrameTransformAction extends Action<AcknowledgedResponse> {
 
@@ -67,6 +73,25 @@ public class PutDataFrameTransformAction extends Action<AcknowledgedResponse> {
             for(String failure : config.getPivotConfig().aggFieldValidation()) {
                 validationException = addValidationError(failure, validationException);
             }
+            String destIndex = config.getDestination().getIndex();
+            try {
+                validateIndexOrAliasName(destIndex, InvalidIndexNameException::new);
+                if (!destIndex.toLowerCase(Locale.ROOT).equals(destIndex)) {
+                    validationException = addValidationError("dest.index [" + destIndex +"] must be lowercase", validationException);
+                }
+            } catch (InvalidIndexNameException ex) {
+                validationException = addValidationError(ex.getMessage(), validationException);
+            }
+            if (DataFrameStrings.isValidId(config.getId()) == false) {
+                validationException = addValidationError(
+                    DataFrameMessages.getMessage(DataFrameMessages.INVALID_ID, DataFrameField.ID.getPreferredName(), config.getId()),
+                    validationException);
+            }
+            if (DataFrameStrings.hasValidLengthForId(config.getId()) == false) {
+                validationException = addValidationError(
+                    DataFrameMessages.getMessage(DataFrameMessages.ID_TOO_LONG, DataFrameStrings.ID_LENGTH_LIMIT),
+                    validationException);
+            }
             return validationException;
         }
 

+ 46 - 0
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/dataframe/utils/DataFrameStrings.java

@@ -0,0 +1,46 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.core.dataframe.utils;
+
+import org.elasticsearch.cluster.metadata.MetaData;
+
+import java.util.regex.Pattern;
+
+/**
+ * Yet Another String utilities class.
+ */
+public final class DataFrameStrings {
+
+    /**
+     * Valid user id pattern.
+     * Matches a string that contains characters, digits, hyphens, underscores or dots.
+     * The string may start and end only in characters or digits.
+     * Note that '.' is allowed but not documented.
+     */
+    private static final Pattern VALID_ID_CHAR_PATTERN = Pattern.compile("[a-zA-Z0-9](?:[a-zA-Z0-9_\\-\\.]*[a-zA-Z0-9])?");
+
+    public static final int ID_LENGTH_LIMIT = 64;
+
+    private DataFrameStrings() {
+    }
+
+    public static boolean isValidId(String id) {
+        return id != null && VALID_ID_CHAR_PATTERN.matcher(id).matches() && !MetaData.ALL.equals(id);
+    }
+
+    /**
+     * Checks if the given {@code id} has a valid length.
+     * We keep IDs in a length shorter or equal than {@link #ID_LENGTH_LIMIT}
+     * in order to avoid unfriendly errors when storing docs with
+     * more than 512 bytes.
+     *
+     * @param id the id
+     * @return {@code true} if the id has a valid length
+     */
+    public static boolean hasValidLengthForId(String id) {
+        return id.length() <= ID_LENGTH_LIMIT;
+    }
+}

+ 1 - 1
x-pack/plugin/data-frame/src/main/java/org/elasticsearch/xpack/dataframe/action/TransportPutDataFrameTransformAction.java

@@ -153,7 +153,7 @@ public class TransportPutDataFrameTransformAction
         final String[] concreteDest =
             indexNameExpressionResolver.concreteIndexNames(clusterState, IndicesOptions.lenientExpandOpen(), destIndex);
 
-        if (concreteDest.length > 1 || Regex.isSimpleMatchPattern(destIndex)) {
+        if (concreteDest.length > 1) {
             listener.onFailure(new ElasticsearchStatusException(
                 DataFrameMessages.getMessage(DataFrameMessages.REST_PUT_DATA_FRAME_DEST_SINGLE_INDEX, destIndex),
                 RestStatus.BAD_REQUEST

+ 64 - 17
x-pack/plugin/src/test/resources/rest-api-spec/test/data_frame/transforms_crud.yml

@@ -208,23 +208,6 @@ setup:
             }
           }
 ---
-"Test transform where dest is a simple index pattern":
-  - do:
-      catch: /Destination index .* should refer to a single index/
-      data_frame.put_data_frame_transform:
-        transform_id: "airline-transform"
-        body: >
-          {
-            "source": {
-              "index": ["airline-data*"]
-            },
-            "dest": { "index": "destination*" },
-            "pivot": {
-              "group_by": { "airline": {"terms": {"field": "airline"}}},
-              "aggs": {"avg_response": {"avg": {"field": "responsetime"}}}
-            }
-          }
----
 "Test alias scenarios":
   - do:
       indices.create:
@@ -364,3 +347,67 @@ setup:
               "aggs": {"airline.responsetime": {"avg": {"field": "responsetime"}}}
             }
           }
+---
+"Test invalid data frame id":
+  - do:
+      catch: /can contain lowercase alphanumeric \(a-z and 0-9\), hyphens or underscores; must start and end with alphanumeric/
+      data_frame.put_data_frame_transform:
+        transform_id: "!@#$%^&*(duplicate-field-transform"
+        body: >
+          {
+            "source": {
+              "index": "source-index"
+            },
+            "dest": { "index": "dest-index" },
+            "pivot": {
+              "group_by": { "airline.id": {"terms": {"field": "airline"}}},
+              "aggs": {"airline.response": {"avg": {"field": "responsetime"}}}
+            }
+          }
+  - do:
+      catch: /The id cannot contain more than 64 character/
+      data_frame.put_data_frame_transform:
+        transform_id: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+        body: >
+          {
+            "source": {
+              "index": "source-index"
+            },
+            "dest": { "index": "dest-index" },
+            "pivot": {
+              "group_by": { "airline.id": {"terms": {"field": "airline"}}},
+              "aggs": {"airline.response": {"avg": {"field": "responsetime"}}}
+            }
+          }
+---
+"Test invalid destination index name":
+  - do:
+      catch: /dest\.index \[DeStInAtIoN\] must be lowercase/
+      data_frame.put_data_frame_transform:
+        transform_id: "airline-transform"
+        body: >
+          {
+            "source": {
+              "index": "airline-data"
+            },
+            "dest": { "index": "DeStInAtIoN" },
+            "pivot": {
+              "group_by": { "airline": {"terms": {"field": "airline"}}},
+              "aggs": {"avg_response": {"avg": {"field": "responsetime"}}}
+            }
+          }
+  - do:
+      catch: /Invalid index name \[destination#dest\], must not contain \'#\'/
+      data_frame.put_data_frame_transform:
+        transform_id: "airline-transform"
+        body: >
+          {
+            "source": {
+              "index": "airline-data"
+            },
+            "dest": { "index": "destination#dest" },
+            "pivot": {
+              "group_by": { "airline": {"terms": {"field": "airline"}}},
+              "aggs": {"avg_response": {"avg": {"field": "responsetime"}}}
+            }
+          }