1
0
Эх сурвалжийг харах

Introduce the dissect library (#32297)

The dissect library will be used for the ingest node as an alternative
to Grok to split a string based on a pattern. Dissect differs from
Grok such that regular expressions are not used to split the string.
Note - Regular expressions are used during construction of the
objects, but not in the hot path.

A dissect pattern takes the form of: '%{a} %{b},%{c}' which is
composed of 3 keys (a,b,c) and two delimiters (space and comma).
This dissect pattern will match a string of the form: 'foo bar,baz'
and will result a key/value pairing of 'a=foo, b=bar, and c=baz'.
See the comments in DissectParser for a full explanation.

This commit does not include the ingest node processor that will consume
it. However, the consumption should be a trivial mapping between the
key/value pairing returned by the parser and the key/value pairing
needed for the IngestDocument.
Jake Landis 7 жил өмнө
parent
commit
be62092060

+ 50 - 0
libs/dissect/build.gradle

@@ -0,0 +1,50 @@
+import org.elasticsearch.gradle.precommit.PrecommitTasks
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+archivesBaseName = 'elasticsearch-dissect'
+
+dependencies {
+     if (isEclipse == false || project.path == ":libs:dissect-tests") {
+        testCompile("org.elasticsearch.test:framework:${version}") {
+            exclude group: 'org.elasticsearch', module: 'dissect'
+        }
+    }
+    testCompile "com.fasterxml.jackson.core:jackson-core:${versions.jackson}"
+    testCompile("com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}")
+    testCompile("com.fasterxml.jackson.core:jackson-databind:${versions.jackson}")
+}
+
+forbiddenApisMain {
+    signaturesURLs = [PrecommitTasks.getResource('/forbidden/jdk-signatures.txt')]
+}
+
+if (isEclipse) {
+    // in eclipse the project is under a fake root, we need to change around the source sets
+    sourceSets {
+        if (project.path == ":libs:dissect") {
+            main.java.srcDirs = ['java']
+            main.resources.srcDirs = ['resources']
+        } else {
+            test.java.srcDirs = ['java']
+            test.resources.srcDirs = ['resources']
+        }
+    }
+}

+ 3 - 0
libs/dissect/src/main/eclipse-build.gradle

@@ -0,0 +1,3 @@
+
+// this is just shell gradle file for eclipse to have separate projects for dissect src and tests
+apply from: '../../build.gradle'

+ 57 - 0
libs/dissect/src/main/java/org/elasticsearch/dissect/DissectException.java

@@ -0,0 +1,57 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.dissect;
+
+/**
+ * Parent class for all dissect related exceptions. Consumers may catch this exception or more specific child exceptions.
+ */
+public abstract class DissectException extends RuntimeException {
+    DissectException(String message) {
+        super(message);
+    }
+
+    /**
+     * Error while parsing a dissect pattern
+     */
+    static class PatternParse extends DissectException {
+        PatternParse(String pattern, String reason) {
+            super("Unable to parse pattern: " + pattern + " Reason: " + reason);
+        }
+    }
+
+    /**
+     * Error while parsing a dissect key
+     */
+    static class KeyParse extends DissectException {
+        KeyParse(String key, String reason) {
+            super("Unable to parse key: " + key + " Reason: " + reason);
+        }
+    }
+
+    /**
+     * Unable to find a match between pattern and source string
+     */
+    static class FindMatch extends DissectException {
+        FindMatch(String pattern, String source) {
+            super("Unable to find match for dissect pattern: " + pattern + " against source: " + source);
+
+        }
+    }
+}

+ 191 - 0
libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java

@@ -0,0 +1,191 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.dissect;
+
+import java.util.EnumSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * <p>A Key of a dissect pattern. This class models the name and modifiers and provides some validation.</p>
+ * <p>For dissect pattern of {@code %{a} %{+a} %{b}} the dissect keys are:
+ * <ul>
+ * <li>{@code a}</li>
+ * <li>{@code +a}</li>
+ * <li>{@code b}</li>
+ * </ul>
+ * This class represents a single key.
+ * <p>A single key is composed of a name and it's modifiers. For the key {@code +a}, {@code a} is the name and {@code +} is the modifier.
+ * @see DissectParser
+ */
+public final class DissectKey {
+    private static final Pattern LEFT_MODIFIER_PATTERN = Pattern.compile("([+*&?])(.*?)(->)?$", Pattern.DOTALL);
+    private static final Pattern RIGHT_PADDING_PATTERN = Pattern.compile("^(.*?)(->)?$", Pattern.DOTALL);
+    private static final Pattern APPEND_WITH_ORDER_PATTERN = Pattern.compile("[+](.*?)(/)([0-9]+)(->)?$", Pattern.DOTALL);
+    private final Modifier modifier;
+    private boolean skip;
+    private boolean skipRightPadding;
+    private int appendPosition;
+    private String name;
+
+    /**
+     * Constructor - parses the String key into it's name and modifier(s)
+     *
+     * @param key The key without the leading <code>%{</code> or trailing <code>}</code>, for example {@code a->}
+     */
+    DissectKey(String key) {
+        skip = key == null || key.isEmpty();
+        modifier = Modifier.findModifier(key);
+        switch (modifier) {
+            case NONE:
+                Matcher matcher = RIGHT_PADDING_PATTERN.matcher(key);
+                while (matcher.find()) {
+                    name = matcher.group(1);
+                    skipRightPadding = matcher.group(2) != null;
+                }
+                skip = name.isEmpty();
+                break;
+            case NAMED_SKIP:
+                matcher = LEFT_MODIFIER_PATTERN.matcher(key);
+                while (matcher.find()) {
+                    name = matcher.group(2);
+                    skipRightPadding = matcher.group(3) != null;
+                }
+                skip = true;
+                break;
+            case APPEND:
+                matcher = LEFT_MODIFIER_PATTERN.matcher(key);
+                while (matcher.find()) {
+                    name = matcher.group(2);
+                    skipRightPadding = matcher.group(3) != null;
+                }
+                break;
+            case FIELD_NAME:
+                matcher = LEFT_MODIFIER_PATTERN.matcher(key);
+                while (matcher.find()) {
+                    name = matcher.group(2);
+                    skipRightPadding = matcher.group(3) != null;
+                }
+                break;
+            case FIELD_VALUE:
+                matcher = LEFT_MODIFIER_PATTERN.matcher(key);
+                while (matcher.find()) {
+                    name = matcher.group(2);
+                    skipRightPadding = matcher.group(3) != null;
+                }
+                break;
+            case APPEND_WITH_ORDER:
+                matcher = APPEND_WITH_ORDER_PATTERN.matcher(key);
+                while (matcher.find()) {
+                    name = matcher.group(1);
+                    appendPosition = Short.valueOf(matcher.group(3));
+                    skipRightPadding = matcher.group(4) != null;
+                }
+                break;
+        }
+
+        if (name == null || (name.isEmpty() && !skip)) {
+            throw new DissectException.KeyParse(key, "The key name could be determined");
+        }
+    }
+
+    /**
+     * Copy constructor to explicitly override the modifier.
+     * @param key The key to copy (except for the modifier)
+     * @param modifier the modifer to use for this copy
+     */
+    DissectKey(DissectKey key, DissectKey.Modifier modifier){
+        this.modifier = modifier;
+        this.skipRightPadding = key.skipRightPadding;
+        this.skip = key.skip;
+        this.name = key.name;
+        this.appendPosition = key.appendPosition;
+    }
+
+    Modifier getModifier() {
+        return modifier;
+    }
+
+    boolean skip() {
+        return skip;
+    }
+
+    boolean skipRightPadding() {
+        return skipRightPadding;
+    }
+
+    int getAppendPosition() {
+        return appendPosition;
+    }
+
+    String getName() {
+        return name;
+    }
+
+    //generated
+    @Override
+    public String toString() {
+        return "DissectKey{" +
+            "modifier=" + modifier +
+            ", skip=" + skip +
+            ", appendPosition=" + appendPosition +
+            ", name='" + name + '\'' +
+            '}';
+    }
+
+    public enum Modifier {
+        NONE(""), APPEND_WITH_ORDER("/"), APPEND("+"), FIELD_NAME("*"), FIELD_VALUE("&"), NAMED_SKIP("?");
+
+        private static final Pattern MODIFIER_PATTERN = Pattern.compile("[/+*&?]");
+
+        private final String modifier;
+
+        @Override
+        public String toString() {
+            return modifier;
+        }
+
+        Modifier(final String modifier) {
+            this.modifier = modifier;
+        }
+
+        //package private for testing
+        static Modifier fromString(String modifier) {
+            return EnumSet.allOf(Modifier.class).stream().filter(km -> km.modifier.equals(modifier))
+                .findFirst().orElseThrow(() -> new IllegalArgumentException("Found invalid modifier.")); //throw should never happen
+        }
+
+        private static Modifier findModifier(String key) {
+            Modifier modifier = Modifier.NONE;
+            if (key != null && !key.isEmpty()) {
+                Matcher matcher = MODIFIER_PATTERN.matcher(key);
+                int matches = 0;
+                while (matcher.find()) {
+                    Modifier priorModifier = modifier;
+                    modifier = Modifier.fromString(matcher.group());
+                    if (++matches > 1 && !(APPEND.equals(priorModifier) && APPEND_WITH_ORDER.equals(modifier))) {
+                        throw new DissectException.KeyParse(key, "multiple modifiers are not allowed.");
+                    }
+                }
+            }
+            return modifier;
+        }
+    }
+}

+ 198 - 0
libs/dissect/src/main/java/org/elasticsearch/dissect/DissectMatch.java

@@ -0,0 +1,198 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.dissect;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * Represents the matches of a {@link DissectParser#parse(String)}. Handles the appending and referencing based on the key instruction.
+ */
+final class DissectMatch {
+
+    private final String appendSeparator;
+    private final Map<String, String> results;
+    private final Map<String, String> simpleResults;
+    private final Map<String, ReferenceResult> referenceResults;
+    private final Map<String, AppendResult> appendResults;
+    private int implicitAppendOrder = -1000;
+    private final int maxMatches;
+    private final int maxResults;
+    private final int appendCount;
+    private final int referenceCount;
+    private final int simpleCount;
+    private int matches = 0;
+
+    DissectMatch(String appendSeparator, int maxMatches, int maxResults, int appendCount, int referenceCount) {
+        if (maxMatches <= 0 || maxResults <= 0) {
+            throw new IllegalArgumentException("Expected results are zero, can not construct DissectMatch");//should never happen
+        }
+        this.maxMatches = maxMatches;
+        this.maxResults = maxResults;
+        this.appendCount = appendCount;
+        this.referenceCount = referenceCount;
+        this.appendSeparator = appendSeparator;
+        results = new HashMap<>(maxResults);
+        this.simpleCount = maxMatches - referenceCount - appendCount;
+        simpleResults = simpleCount <= 0 ? null : new HashMap<>(simpleCount);
+        referenceResults = referenceCount <= 0 ? null : new HashMap<>(referenceCount);
+        appendResults = appendCount <= 0 ? null : new HashMap<>(appendCount);
+    }
+
+    /**
+     * Add the key/value that was found as result of the parsing
+     * @param key the {@link DissectKey}
+     * @param value the discovered value for the key
+     */
+    void add(DissectKey key, String value) {
+        matches++;
+        if (key.skip()) {
+            return;
+        }
+        switch (key.getModifier()) {
+            case NONE:
+                simpleResults.put(key.getName(), value);
+                break;
+            case APPEND:
+                appendResults.computeIfAbsent(key.getName(), k -> new AppendResult(appendSeparator)).addValue(value, implicitAppendOrder++);
+                break;
+            case APPEND_WITH_ORDER:
+                appendResults.computeIfAbsent(key.getName(),
+                    k -> new AppendResult(appendSeparator)).addValue(value, key.getAppendPosition());
+                break;
+            case FIELD_NAME:
+                referenceResults.computeIfAbsent(key.getName(), k -> new ReferenceResult()).setKey(value);
+                break;
+            case FIELD_VALUE:
+                referenceResults.computeIfAbsent(key.getName(), k -> new ReferenceResult()).setValue(value);
+                break;
+        }
+    }
+
+    boolean fullyMatched() {
+        return matches == maxMatches;
+    }
+
+    /**
+     * Checks if results are valid.
+     * @param results the results to check
+     * @return true if all dissect keys have been matched and the results are of the expected size.
+     */
+    boolean isValid(Map<String, String> results) {
+        return fullyMatched() && results.size() == maxResults;
+    }
+
+    /**
+     * Gets all the current matches. Pass the results of this to isValid to determine if a fully successful match has occured.
+     *
+     * @return the map of the results.
+     */
+    Map<String, String> getResults() {
+        results.clear();
+        if (simpleCount > 0) {
+            results.putAll(simpleResults);
+        }
+        if (referenceCount > 0) {
+            referenceResults.forEach((k, v) -> results.put(v.getKey(), v.getValue()));
+        }
+        if (appendCount > 0) {
+            appendResults.forEach((k, v) -> results.put(k, v.getAppendResult()));
+        }
+
+        return results;
+    }
+
+    /**
+     * a result that will need to be part of an append operation.
+     */
+    private final class AppendResult {
+        private final List<AppendValue> values = new ArrayList<>();
+        private final String appendSeparator;
+
+        private AppendResult(String appendSeparator) {
+            this.appendSeparator = appendSeparator;
+        }
+
+        private void addValue(String value, int order) {
+            values.add(new AppendValue(value, order));
+        }
+
+        private String getAppendResult() {
+            Collections.sort(values);
+            return values.stream().map(AppendValue::getValue).collect(Collectors.joining(appendSeparator));
+        }
+    }
+
+    /**
+     * An appendable value that can be sorted based on the provided order
+     */
+    private final class AppendValue implements Comparable<AppendValue> {
+        private final String value;
+        private final int order;
+
+        private AppendValue(String value, int order) {
+            this.value = value;
+            this.order = order;
+        }
+
+        private String getValue() {
+            return value;
+        }
+
+        private int getOrder() {
+            return order;
+        }
+
+        @Override
+        public int compareTo(AppendValue o) {
+            return Integer.compare(this.order, o.getOrder());
+        }
+    }
+
+    /**
+     * A result that needs to be converted to a key/value reference
+     */
+    private final class ReferenceResult {
+
+        private String key;
+
+        private String getKey() {
+            return key;
+        }
+
+        private String getValue() {
+            return value;
+        }
+
+        private String value;
+
+        private void setValue(String value) {
+            this.value = value;
+        }
+
+        private void setKey(String key) {
+            this.key = key;
+        }
+    }
+}

+ 310 - 0
libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java

@@ -0,0 +1,310 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.dissect;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * <p>Splits (dissects) a string into its parts based on a pattern.</p><p>A dissect pattern is composed of a set of keys and delimiters.
+ * For example the dissect pattern: <pre>%{a} %{b},%{c}</pre> has 3 keys (a,b,c) and two delimiters (space and comma). This pattern will
+ * match a string of the form: <pre>foo bar,baz</pre> and will result a key/value pairing of <pre>a=foo, b=bar, and c=baz.</pre>
+ * <p>Matches are all or nothing. For example, the same pattern will NOT match <pre>foo bar baz</pre> since all of the delimiters did not
+ * match. (the comma did not match)
+ * <p>Dissect patterns can optionally have modifiers. These modifiers instruct the parser to change it's behavior. For example the
+ * dissect pattern of <pre>%{a},%{b}:%{c}</pre> would not match <pre>foo,bar,baz</pre> since there the colon never matches.
+ * <p>Modifiers appear to the left or the right of the key name. The supported modifiers are:
+ * <ul>
+ * <li>{@code ->} Instructs the parser to ignore repeating delimiters to the right of the key. Example: <pre>
+ * pattern: {@code %{a->} %{b} %{c}}
+ * string: {@code foo         bar baz}
+ * result: {@code a=foo, b=bar, c=baz}
+ * </pre></li>
+ * <li>{@code +} Instructs the parser to appends this key's value to value of prior key with the same name.
+ * Example: <pre>
+ * pattern: {@code %{a} %{+a} %{+a}}
+ * string: {@code foo bar baz}
+ * result: {@code a=foobarbaz}
+ * </pre></li>
+ * <li>{@code /} Instructs the parser to appends this key's value to value of a key based based on the order specified after the
+ * {@code /}. Requires the {@code +} modifier to also be present in the key. Example: <pre>
+ * pattern: {@code %{a} %{+a/2} %{+a/1}}
+ * string: {@code foo bar baz}
+ * result: {@code a=foobazbar}
+ * </pre>
+ * </li>
+ * <li>{@code *} Instructs the parser to ignore the name of this key, instead use the value of key as the key name.
+ * Requires another key with the same name and the {@code &} modifier to be the value. Example: <pre>
+ * pattern: {@code %{*a} %{b} %{&a}}
+ * string: {@code foo bar baz}
+ * result: {@code foo=baz, b=bar}
+ * </pre></li>
+ * <li>{@code &} Instructs the parser to ignore this key and place the matched value to a key of the same name with the {@code *} modifier.
+ * Requires another key with the same name and the {@code *} modifier.
+ * Example: <pre>
+ * pattern: {@code %{*a} %{b} %{&a}}
+ * string: {@code foo bar baz}
+ * result: {@code foo=baz, b=bar}
+ * </pre></li>
+ * <li>{@code ?} Instructs the parser to ignore this key. The key name exists only for the purpose of human readability. Example
+ * <pre>
+ *  pattern: {@code %{a} %{?skipme} %{c}}
+ *  string: {@code foo bar baz}
+ *  result: {@code a=foo, c=baz}
+ * </pre>
+ * </ul>
+ * <p>Empty key names patterns are also supported. They behave just like the {@code ?} modifier, except the name is not required.
+ * The result will simply be ignored. Example
+ * <pre>
+ * pattern: {@code %{a} %{} %{c}}
+ * string: {@code foo bar baz}
+ * result: {@code a=foo, c=baz}
+ * </pre>
+
+ * <p>
+ * Inspired by the Logstash Dissect Filter by Guy Boertje
+ */
+public final class DissectParser {
+    private static final Pattern LEADING_DELIMITER_PATTERN = Pattern.compile("^(.*?)%");
+    private static final Pattern KEY_DELIMITER_FIELD_PATTERN = Pattern.compile("%\\{([^}]*?)}([^%]*)", Pattern.DOTALL);
+    private static final EnumSet<DissectKey.Modifier> ASSOCIATE_MODIFIERS = EnumSet.of(
+        DissectKey.Modifier.FIELD_NAME,
+        DissectKey.Modifier.FIELD_VALUE);
+    private static final EnumSet<DissectKey.Modifier> APPEND_MODIFIERS = EnumSet.of(
+        DissectKey.Modifier.APPEND,
+        DissectKey.Modifier.APPEND_WITH_ORDER);
+    private static final Function<DissectPair, String> KEY_NAME = val -> val.getKey().getName();
+    private final List<DissectPair> matchPairs;
+    private final String pattern;
+    private String leadingDelimiter = "";
+    private final int maxMatches;
+    private final int maxResults;
+    private final int appendCount;
+    private final int referenceCount;
+    private final String appendSeparator;
+
+    public DissectParser(String pattern, String appendSeparator) {
+        this.pattern = pattern;
+        this.appendSeparator = appendSeparator == null ? "" : appendSeparator;
+        Matcher matcher = LEADING_DELIMITER_PATTERN.matcher(pattern);
+        while (matcher.find()) {
+            leadingDelimiter = matcher.group(1);
+        }
+        List<DissectPair> matchPairs = new ArrayList<>();
+        matcher = KEY_DELIMITER_FIELD_PATTERN.matcher(pattern.substring(leadingDelimiter.length()));
+        while (matcher.find()) {
+            DissectKey key = new DissectKey(matcher.group(1));
+            String delimiter = matcher.group(2);
+            matchPairs.add(new DissectPair(key, delimiter));
+        }
+        this.maxMatches = matchPairs.size();
+        this.maxResults = Long.valueOf(matchPairs.stream()
+            .filter(dissectPair -> !dissectPair.getKey().skip()).map(KEY_NAME).distinct().count()).intValue();
+        if (this.maxMatches == 0 || maxResults == 0) {
+            throw new DissectException.PatternParse(pattern, "Unable to find any keys or delimiters.");
+        }
+        //append validation - look through all of the keys to see if there are any keys that need to participate in an append operation
+        // but don't have the '+' defined
+        Set<String> appendKeyNames = matchPairs.stream()
+            .filter(dissectPair -> APPEND_MODIFIERS.contains(dissectPair.getKey().getModifier()))
+            .map(KEY_NAME).distinct().collect(Collectors.toSet());
+        if (appendKeyNames.size() > 0) {
+            List<DissectPair> modifiedMatchPairs = new ArrayList<>(matchPairs.size());
+            for (DissectPair p : matchPairs) {
+                if (p.getKey().getModifier().equals(DissectKey.Modifier.NONE) && appendKeyNames.contains(p.getKey().getName())) {
+                    modifiedMatchPairs.add(new DissectPair(new DissectKey(p.getKey(), DissectKey.Modifier.APPEND), p.getDelimiter()));
+                } else {
+                    modifiedMatchPairs.add(p);
+                }
+            }
+            matchPairs = modifiedMatchPairs;
+        }
+        appendCount = appendKeyNames.size();
+
+        //reference validation - ensure that '*' and '&' come in pairs
+        Map<String, List<DissectPair>> referenceGroupings = matchPairs.stream()
+            .filter(dissectPair -> ASSOCIATE_MODIFIERS.contains(dissectPair.getKey().getModifier()))
+            .collect(Collectors.groupingBy(KEY_NAME));
+        for (Map.Entry<String, List<DissectPair>> entry : referenceGroupings.entrySet()) {
+            if (entry.getValue().size() != 2) {
+                throw new DissectException.PatternParse(pattern, "Found invalid key/reference associations: '"
+                    + entry.getValue().stream().map(KEY_NAME).collect(Collectors.joining(",")) +
+                    "' Please ensure each '*<key>' is matched with a matching '&<key>");
+            }
+        }
+
+        referenceCount = referenceGroupings.size() * 2;
+        this.matchPairs = Collections.unmodifiableList(matchPairs);
+    }
+
+
+    /**
+     * <p>Entry point to dissect a string into it's parts.</p>
+     *
+     * @param inputString The string to dissect
+     * @return the key/value Map of the results
+     * @throws DissectException if unable to dissect a pair into it's parts.
+     */
+    public Map<String, String> parse(String inputString) {
+        /**
+         *
+         * This implements a naive string matching algorithm. The string is walked left to right, comparing each byte against
+         * another string's bytes looking for matches. If the bytes match, then a second cursor looks ahead to see if all the bytes
+         * of the other string matches. If they all match, record it and advances the primary cursor to the match point. If it can not match
+         * all of the bytes then progress the main cursor. Repeat till the end of the input string. Since the string being searching for
+         * (the delimiter) is generally small and rare the naive approach is efficient.
+         *
+         * In this case the the string that is walked is the input string, and the string being searched for is the current delimiter.
+         * For example for a dissect pattern of {@code %{a},%{b}:%{c}} the delimiters (comma then colon) are searched for in the
+         * input string. At class construction the list of keys+delimiters are found (dissectPairs), which allows the use of that ordered
+         * list to know which delimiter to use for the search. The delimiters is progressed once the current delimiter is matched.
+         *
+         * There are two special cases that requires additional parsing beyond the standard naive algorithm. Consecutive delimiters should
+         * results in a empty matches unless the {@code ->} is provided. For example given the dissect pattern of
+         * {@code %{a},%{b},%{c},%{d}} and input string of {@code foo,,,} the match should be successful with empty values for b,c and d.
+         * However, if the key modifier {@code ->}, is present it will simply skip over any delimiters just to the right of the key
+         * without assigning any values. For example {@code %{a->},{%b}} will match the input string of {@code foo,,,,,,bar} with a=foo and
+         * b=bar.
+         *
+         */
+        DissectMatch dissectMatch = new DissectMatch(appendSeparator, maxMatches, maxResults, appendCount, referenceCount);
+        Iterator<DissectPair> it = matchPairs.iterator();
+        //ensure leading delimiter matches
+        if (inputString != null && inputString.length() > leadingDelimiter.length()
+            && leadingDelimiter.equals(inputString.substring(0, leadingDelimiter.length()))) {
+            byte[] input = inputString.getBytes(StandardCharsets.UTF_8);
+            //grab the first key/delimiter pair
+            DissectPair dissectPair = it.next();
+            DissectKey key = dissectPair.getKey();
+            byte[] delimiter = dissectPair.getDelimiter().getBytes(StandardCharsets.UTF_8);
+            //start dissection after the first delimiter
+            int i = leadingDelimiter.length();
+            int valueStart = i;
+            int lookAheadMatches;
+            //start walking the input string byte by byte, look ahead for matches where needed
+            //if a match is found jump forward to the end of the match
+            for (; i < input.length; i++) {
+                lookAheadMatches = 0;
+                //potential match between delimiter and input string
+                if (delimiter.length > 0 && input[i] == delimiter[0]) {
+                    //look ahead to see if the entire delimiter matches the input string
+                    for (int j = 0; j < delimiter.length; j++) {
+                        if (i + j < input.length && input[i + j] == delimiter[j]) {
+                            lookAheadMatches++;
+                        }
+                    }
+                    //found a full delimiter match
+                    if (lookAheadMatches == delimiter.length) {
+                        //record the key/value tuple
+                        byte[] value = Arrays.copyOfRange(input, valueStart, i);
+                        dissectMatch.add(key, new String(value, StandardCharsets.UTF_8));
+                        //jump to the end of the match
+                        i += lookAheadMatches;
+                        //look for consecutive delimiters (e.g. a,,,,d,e)
+                        while (i < input.length) {
+                            lookAheadMatches = 0;
+                            for (int j = 0; j < delimiter.length; j++) {
+                                if (i + j < input.length && input[i + j] == delimiter[j]) {
+                                    lookAheadMatches++;
+                                }
+                            }
+                            //found consecutive delimiters
+                            if (lookAheadMatches == delimiter.length) {
+                                //jump to the end of the match
+                                i += lookAheadMatches;
+                                if (!key.skipRightPadding()) {
+                                    //progress the keys/delimiter if possible
+                                    if (!it.hasNext()) {
+                                        break; //the while loop
+                                    }
+                                    dissectPair = it.next();
+                                    key = dissectPair.getKey();
+                                    //add the key with an empty value for the empty delimiter
+                                    dissectMatch.add(key, "");
+                                }
+                            } else {
+                                break; //the while loop
+                            }
+                        }
+                        //progress the keys/delimiter if possible
+                        if (!it.hasNext()) {
+                            break; //the for loop
+                        }
+                        dissectPair = it.next();
+                        key = dissectPair.getKey();
+                        delimiter = dissectPair.getDelimiter().getBytes(StandardCharsets.UTF_8);
+                        //i is always one byte after the last found delimiter, aka the start of the next value
+                        valueStart = i;
+                    }
+                }
+            }
+            //the last key, grab the rest of the input (unless consecutive delimiters already grabbed the last key)
+            //and there is no trailing delimiter
+            if (!dissectMatch.fullyMatched() && delimiter.length == 0 ) {
+                byte[] value = Arrays.copyOfRange(input, valueStart, input.length);
+                String valueString = new String(value, StandardCharsets.UTF_8);
+                dissectMatch.add(key, valueString);
+            }
+        }
+        Map<String, String> results = dissectMatch.getResults();
+
+        if (!dissectMatch.isValid(results)) {
+            throw new DissectException.FindMatch(pattern, inputString);
+        }
+        return results;
+    }
+
+    /**
+     * A tuple class to hold the dissect key and delimiter
+     */
+    private class DissectPair {
+
+        private final DissectKey key;
+        private final String delimiter;
+
+        private DissectPair(DissectKey key, String delimiter) {
+            this.key = key;
+            this.delimiter = delimiter;
+        }
+
+        private DissectKey getKey() {
+            return key;
+        }
+
+        private String getDelimiter() {
+            return delimiter;
+        }
+    }
+
+}
+
+
+

+ 7 - 0
libs/dissect/src/test/eclipse-build.gradle

@@ -0,0 +1,7 @@
+
+// this is just shell gradle file for eclipse to have separate projects for dissect src and tests
+apply from: '../../build.gradle'
+
+dependencies {
+  testCompile project(':libs:dissect')
+}

+ 178 - 0
libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java

@@ -0,0 +1,178 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.dissect;
+
+import org.elasticsearch.test.ESTestCase;
+import org.hamcrest.CoreMatchers;
+
+import java.util.EnumSet;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.is;
+
+public class DissectKeyTests extends ESTestCase {
+
+    public void testNoModifier() {
+        String keyName = randomAlphaOfLengthBetween(1, 10);
+        DissectKey dissectKey = new DissectKey(keyName);
+        assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE));
+        assertThat(dissectKey.skip(), is(false));
+        assertThat(dissectKey.skipRightPadding(), is(false));
+        assertThat(dissectKey.getAppendPosition(), equalTo(0));
+        assertThat(dissectKey.getName(), equalTo(keyName));
+    }
+
+    public void testAppendModifier() {
+        String keyName = randomAlphaOfLengthBetween(1, 10);
+        DissectKey dissectKey = new DissectKey("+" + keyName);
+        assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.APPEND));
+        assertThat(dissectKey.skip(), is(false));
+        assertThat(dissectKey.skipRightPadding(), is(false));
+        assertThat(dissectKey.getAppendPosition(), equalTo(0));
+        assertThat(dissectKey.getName(), equalTo(keyName));
+    }
+
+    public void testAppendWithOrderModifier() {
+        String keyName = randomAlphaOfLengthBetween(1, 10);
+        int length = randomIntBetween(1, 100);
+        DissectKey dissectKey = new DissectKey("+" + keyName + "/" + length);
+        assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.APPEND_WITH_ORDER));
+        assertThat(dissectKey.skip(), is(false));
+        assertThat(dissectKey.skipRightPadding(), is(false));
+        assertThat(dissectKey.getAppendPosition(), equalTo(length));
+        assertThat(dissectKey.getName(), equalTo(keyName));
+    }
+
+    public void testAppendWithOrderModifierNoName() {
+        int length = randomIntBetween(1, 100);
+        DissectException e = expectThrows(DissectException.class, () -> new DissectKey("+/" + length));
+        assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key"));
+    }
+
+    public void testOrderModifierWithoutAppend() {
+        String keyName = randomAlphaOfLengthBetween(1, 10);
+        int length = randomIntBetween(1, 100);
+        DissectException e = expectThrows(DissectException.class, () -> new DissectKey(keyName + "/" + length));
+        assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key"));
+    }
+
+    public void testFieldNameModifier() {
+        String keyName = randomAlphaOfLengthBetween(1, 10);
+        DissectKey dissectKey = new DissectKey("*" + keyName);
+        assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.FIELD_NAME));
+        assertThat(dissectKey.skip(), is(false));
+        assertThat(dissectKey.skipRightPadding(), is(false));
+        assertThat(dissectKey.getAppendPosition(), equalTo(0));
+        assertThat(dissectKey.getName(), equalTo(keyName));
+    }
+
+    public void testFieldValueModifiers() {
+        String keyName = randomAlphaOfLengthBetween(1, 10);
+        DissectKey dissectKey = new DissectKey("&" + keyName);
+        assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.FIELD_VALUE));
+        assertThat(dissectKey.skip(), is(false));
+        assertThat(dissectKey.skipRightPadding(), is(false));
+        assertThat(dissectKey.getAppendPosition(), equalTo(0));
+        assertThat(dissectKey.getName(), equalTo(keyName));
+    }
+
+    public void testRightPaddingModifiers() {
+        String keyName = randomAlphaOfLengthBetween(1, 10);
+        DissectKey dissectKey = new DissectKey(keyName + "->");
+        assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE));
+        assertThat(dissectKey.skip(), is(false));
+        assertThat(dissectKey.skipRightPadding(), is(true));
+        assertThat(dissectKey.getAppendPosition(), equalTo(0));
+        assertThat(dissectKey.getName(), equalTo(keyName));
+
+        dissectKey = new DissectKey("*" + keyName + "->");
+        assertThat(dissectKey.skipRightPadding(), is(true));
+
+        dissectKey = new DissectKey("&" + keyName + "->");
+        assertThat(dissectKey.skipRightPadding(), is(true));
+
+        dissectKey = new DissectKey("+" + keyName + "->");
+        assertThat(dissectKey.skipRightPadding(), is(true));
+
+        dissectKey = new DissectKey("?" + keyName + "->");
+        assertThat(dissectKey.skipRightPadding(), is(true));
+
+        dissectKey = new DissectKey("+" + keyName + "/2->");
+        assertThat(dissectKey.skipRightPadding(), is(true));
+    }
+
+    public void testMultipleLeftModifiers() {
+        String keyName = randomAlphaOfLengthBetween(1, 10);
+        List<String> validModifiers = EnumSet.allOf(DissectKey.Modifier.class).stream()
+            .filter(m -> !m.equals(DissectKey.Modifier.NONE))
+            .map(DissectKey.Modifier::toString)
+            .collect(Collectors.toList());
+        String modifier1 = randomFrom(validModifiers);
+        String modifier2 = randomFrom(validModifiers);
+        DissectException e = expectThrows(DissectException.class, () -> new DissectKey(modifier1 + modifier2 + keyName));
+        assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key"));
+    }
+
+    public void testSkipKey() {
+        String keyName = "";
+        DissectKey dissectKey = new DissectKey(keyName);
+        assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE));
+        assertThat(dissectKey.skip(), is(true));
+        assertThat(dissectKey.skipRightPadding(), is(false));
+        assertThat(dissectKey.getAppendPosition(), equalTo(0));
+        assertThat(dissectKey.getName(), equalTo(keyName));
+    }
+    public void testNamedSkipKey() {
+        String keyName = "myname";
+        DissectKey dissectKey = new DissectKey("?" +keyName);
+        assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NAMED_SKIP));
+        assertThat(dissectKey.skip(), is(true));
+        assertThat(dissectKey.skipRightPadding(), is(false));
+        assertThat(dissectKey.getAppendPosition(), equalTo(0));
+        assertThat(dissectKey.getName(), equalTo(keyName));
+    }
+
+    public void testSkipKeyWithPadding() {
+        String keyName = "";
+        DissectKey dissectKey = new DissectKey(keyName  + "->");
+        assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE));
+        assertThat(dissectKey.skip(), is(true));
+        assertThat(dissectKey.skipRightPadding(), is(true));
+        assertThat(dissectKey.getAppendPosition(), equalTo(0));
+        assertThat(dissectKey.getName(), equalTo(keyName));
+    }
+    public void testNamedEmptySkipKeyWithPadding() {
+        String keyName = "";
+        DissectKey dissectKey = new DissectKey("?" +keyName + "->");
+        assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NAMED_SKIP));
+        assertThat(dissectKey.skip(), is(true));
+        assertThat(dissectKey.skipRightPadding(), is(true));
+        assertThat(dissectKey.getAppendPosition(), equalTo(0));
+        assertThat(dissectKey.getName(), equalTo(keyName));
+    }
+
+    public void testInvalidModifiers() {
+        //should never happen due to regex
+        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> DissectKey.Modifier.fromString("x"));
+        assertThat(e.getMessage(), CoreMatchers.containsString("invalid modifier"));
+    }
+}

+ 93 - 0
libs/dissect/src/test/java/org/elasticsearch/dissect/DissectMatchTests.java

@@ -0,0 +1,93 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.dissect;
+
+import org.elasticsearch.common.collect.MapBuilder;
+import org.elasticsearch.test.ESTestCase;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Map;
+import java.util.stream.IntStream;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class DissectMatchTests extends ESTestCase {
+
+    public void testIllegalArgs() {
+        expectThrows(IllegalArgumentException.class, () -> new DissectMatch("", 0, 1, 0, 0));
+        expectThrows(IllegalArgumentException.class, () -> new DissectMatch("", 1, 0, 0, 0));
+    }
+
+    public void testValidAndFullyMatched() {
+        int expectedMatches = randomIntBetween(1, 26);
+        DissectMatch dissectMatch = new DissectMatch("", expectedMatches, expectedMatches, 0, 0);
+        IntStream.range(97, 97 + expectedMatches)  //allow for a-z values
+            .forEach(i -> dissectMatch.add(new DissectKey(new String(new byte[]{(byte) i}, StandardCharsets.UTF_8)), ""));
+        assertThat(dissectMatch.fullyMatched(), equalTo(true));
+        assertThat(dissectMatch.isValid(dissectMatch.getResults()), equalTo(true));
+    }
+
+    public void testNotValidAndFullyMatched() {
+        int expectedMatches = randomIntBetween(1, 26);
+        DissectMatch dissectMatch = new DissectMatch("", expectedMatches, expectedMatches, 0, 0);
+        IntStream.range(97, 97 + expectedMatches - 1)  //allow for a-z values
+            .forEach(i -> dissectMatch.add(new DissectKey(new String(new byte[]{(byte) i}, StandardCharsets.UTF_8)), ""));
+        assertThat(dissectMatch.fullyMatched(), equalTo(false));
+        assertThat(dissectMatch.isValid(dissectMatch.getResults()), equalTo(false));
+    }
+
+    public void testGetResultsIdempotent(){
+        int expectedMatches = randomIntBetween(1, 26);
+        DissectMatch dissectMatch = new DissectMatch("", expectedMatches, expectedMatches, 0, 0);
+        IntStream.range(97, 97 + expectedMatches)  //allow for a-z values
+            .forEach(i -> dissectMatch.add(new DissectKey(new String(new byte[]{(byte) i}, StandardCharsets.UTF_8)), ""));
+        assertThat(dissectMatch.getResults(), equalTo(dissectMatch.getResults()));
+    }
+
+    public void testAppend(){
+        DissectMatch dissectMatch = new DissectMatch("-", 3, 1, 3, 0);
+        dissectMatch.add(new DissectKey("+a"), "x");
+        dissectMatch.add(new DissectKey("+a"), "y");
+        dissectMatch.add(new DissectKey("+a"), "z");
+        Map<String, String> results = dissectMatch.getResults();
+        assertThat(dissectMatch.isValid(results), equalTo(true));
+        assertThat(results, equalTo(MapBuilder.newMapBuilder().put("a", "x-y-z").map()));
+    }
+
+    public void testAppendWithOrder(){
+        DissectMatch dissectMatch = new DissectMatch("-", 3, 1, 3, 0);
+        dissectMatch.add(new DissectKey("+a/3"), "x");
+        dissectMatch.add(new DissectKey("+a"), "y");
+        dissectMatch.add(new DissectKey("+a/1"), "z");
+        Map<String, String> results = dissectMatch.getResults();
+        assertThat(dissectMatch.isValid(results), equalTo(true));
+        assertThat(results, equalTo(MapBuilder.newMapBuilder().put("a", "y-z-x").map()));
+    }
+
+    public void testReference(){
+        DissectMatch dissectMatch = new DissectMatch("-", 2, 1, 0, 1);
+        dissectMatch.add(new DissectKey("&a"), "x");
+        dissectMatch.add(new DissectKey("*a"), "y");
+        Map<String, String> results = dissectMatch.getResults();
+        assertThat(dissectMatch.isValid(results), equalTo(true));
+        assertThat(results, equalTo(MapBuilder.newMapBuilder().put("y", "x").map()));
+    }
+
+}

+ 386 - 0
libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java

@@ -0,0 +1,386 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.dissect;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.elasticsearch.test.ESTestCase;
+import org.hamcrest.CoreMatchers;
+import org.hamcrest.Matchers;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import static com.carrotsearch.randomizedtesting.RandomizedTest.randomAsciiAlphanumOfLengthBetween;
+
+public class DissectParserTests extends ESTestCase {
+
+    public void testJavaDocExamples() {
+        assertMatch("%{a} %{b},%{c}", "foo bar,baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz"));
+        assertMiss("%{a},%{b}:%{c}", "foo,bar,baz");
+        assertMatch("%{a->} %{b} %{c}", "foo         bar baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz"));
+        assertMatch("%{a} %{+a} %{+a}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobarbaz"));
+        assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobazbar"));
+        assertMatch("%{*a} %{b} %{&a}", "foo bar baz", Arrays.asList("foo", "b"), Arrays.asList("baz", "bar"));
+        assertMatch("%{a} %{} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz"));
+        assertMatch("%{a} %{?skipme} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz"));
+        assertMatch("%{a},%{b},%{c},%{d}", "foo,,,", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", ""));
+        assertMatch("%{a->},%{b}", "foo,,,,,,bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+    }
+
+    /**
+     * Borrowed from Logstash's test cases:
+     * https://github.com/logstash-plugins/logstash-filter-dissect/blob/master/src/test/java/org/logstash/dissect/DissectorTest.java
+     * Append Note - Logstash appends with the delimiter as the separator between values, this uses a user defined separator
+     */
+    public void testLogstashSpecs() {
+        assertMatch("%{a} %{b->} %{c}", "foo bar   baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz"));
+        assertMiss("%{a}%{b} %{c}", null);
+        assertMiss("%{a} %{b}%{c} %{d}", "foo bar baz");
+        assertMiss("%{a} %{b} %{c}%{d}", "foo bar baz quux");
+        assertMatch("%{a} %{b->} %{c}", "foo bar   baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz"));
+        assertMatch("%{a} %{} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz"));
+        assertMatch("%{a} %{b} %{+b} %{z}", "foo bar baz quux", Arrays.asList("a", "b", "z"), Arrays.asList("foo", "bar baz", "quux"), " ");
+        assertMatch("%{a}------->%{b}", "foo------->bar baz quux", Arrays.asList("a", "b"), Arrays.asList("foo", "bar baz quux"));
+        assertMatch("%{a}------->%{}", "foo------->bar baz quux", Arrays.asList("a"), Arrays.asList("foo"));
+        assertMatch("%{a} » %{b}»%{c}€%{d}", "foo » bar»baz€quux",
+            Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "bar", "baz", "quux"));
+        assertMatch("%{a} %{b} %{+a}", "foo bar baz quux", Arrays.asList("a", "b"), Arrays.asList("foo baz quux", "bar"), " ");
+        //Logstash supports implicit ordering based anchored by the the key without the '+'
+        //This implementation will only honor implicit ordering for appending right to left else explicit order (/N) is required.
+        //The results of this test differ from Logstash.
+        assertMatch("%{+a} %{a} %{+a} %{b}", "December 31 1999 quux",
+            Arrays.asList("a", "b"), Arrays.asList("December 31 1999", "quux"), " ");
+        //Same test as above, but with same result as Logstash using explicit ordering in the pattern
+        assertMatch("%{+a/1} %{a} %{+a/2} %{b}", "December 31 1999 quux",
+            Arrays.asList("a", "b"), Arrays.asList("31 December 1999", "quux"), " ");
+        assertMatch("%{+a/2} %{+a/4} %{+a/1} %{+a/3}", "bar quux foo baz", Arrays.asList("a"), Arrays.asList("foo bar baz quux"), " ");
+        assertMatch("%{+a} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+        assertMatch("%{+a} %{b} %{+a} %{c}", "foo bar baz quux",
+            Arrays.asList("a", "b", "c"), Arrays.asList("foo baz", "bar", "quux"), " ");
+        assertMatch("%{} %{syslog_timestamp} %{hostname} %{rt}: %{reason} %{+reason} %{src_ip}/%{src_port}->%{dst_ip}/%{dst_port} " +
+                "%{polrt} %{+polrt} %{+polrt} %{from_zone} %{to_zone} %{rest}",
+            "42 2016-05-25T14:47:23Z host.name.com RT_FLOW - RT_FLOW_SESSION_DENY: session denied 2.2.2.20/60000->1.1.1.10/8090 None " +
+                "6(0) DEFAULT-DENY ZONE-UNTRUST ZONE-DMZ UNKNOWN UNKNOWN N/A(N/A) ge-0/0/0.0",
+            Arrays.asList("syslog_timestamp", "hostname", "rt", "reason", "src_ip", "src_port", "dst_ip", "dst_port", "polrt"
+                , "from_zone", "to_zone", "rest"),
+            Arrays.asList("2016-05-25T14:47:23Z", "host.name.com", "RT_FLOW - RT_FLOW_SESSION_DENY", "session denied", "2.2.2.20", "60000"
+                , "1.1.1.10", "8090", "None 6(0) DEFAULT-DENY", "ZONE-UNTRUST", "ZONE-DMZ", "UNKNOWN UNKNOWN N/A(N/A) ge-0/0/0.0"), " ");
+        assertBadKey("%{+/2}");
+        assertBadKey("%{&+a_field}");
+        assertMatch("%{a->}   %{b->}---%{c}", "foo            bar------------baz",
+            Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz"));
+        assertMatch("%{->}-%{a}", "-----666", Arrays.asList("a"), Arrays.asList("666"));
+        assertMatch("%{?skipme->}-%{a}", "-----666", Arrays.asList("a"), Arrays.asList("666"));
+        assertMatch("%{a},%{b},%{c},%{d},%{e},%{f}", "111,,333,,555,666",
+            Arrays.asList("a", "b", "c", "d", "e", "f"), Arrays.asList("111", "", "333", "", "555", "666"));
+        assertMatch("%{a}.࿏.%{b}", "⟳༒.࿏.༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲"));
+        assertMatch("%{a}", "子", Arrays.asList("a"), Arrays.asList("子"));
+        assertMatch("%{a}{\n}%{b}", "aaa{\n}bbb", Arrays.asList("a", "b"), Arrays.asList("aaa", "bbb"));
+        assertMiss("MACHINE[%{a}] %{b}", "1234567890 MACHINE[foo] bar");
+        assertMiss("%{a} %{b} %{c}", "foo:bar:baz");
+        assertMatch("/var/%{key1}/log/%{key2}.log", "/var/foo/log/bar.log", Arrays.asList("key1", "key2"), Arrays.asList("foo", "bar"));
+        assertMatch("%{a->}   %{b}-.-%{c}-%{d}-..-%{e}-%{f}-%{g}-%{h}", "foo            bar-.-baz-1111-..-22-333-4444-55555",
+            Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h"),
+            Arrays.asList("foo", "bar", "baz", "1111", "22", "333", "4444", "55555"));
+    }
+
+    public void testBasicMatch() {
+        String valueFirstInput = "";
+        String keyFirstPattern = "";
+        String delimiterFirstInput = "";
+        String delimiterFirstPattern = "";
+        //parallel arrays
+        List<String> expectedKeys = Arrays.asList(generateRandomStringArray(100, 10, false, false));
+        List<String> expectedValues = new ArrayList<>(expectedKeys.size());
+        for (String key : expectedKeys) {
+            String value = randomAsciiAlphanumOfLengthBetween(1, 100);
+            String delimiter = Integer.toString(randomInt()); //int to ensures values and delimiters don't overlap, else validation can fail
+            keyFirstPattern += "%{" + key + "}" + delimiter;
+            valueFirstInput += value + delimiter;
+            delimiterFirstPattern += delimiter + "%{" + key + "}";
+            delimiterFirstInput += delimiter + value;
+            expectedValues.add(value);
+        }
+        assertMatch(keyFirstPattern, valueFirstInput, expectedKeys, expectedValues);
+        assertMatch(delimiterFirstPattern, delimiterFirstInput, expectedKeys, expectedValues);
+    }
+
+    public void testBasicMatchUnicode() {
+        String valueFirstInput = "";
+        String keyFirstPattern = "";
+        String delimiterFirstInput = "";
+        String delimiterFirstPattern = "";
+        //parallel arrays
+        List<String> expectedKeys = new ArrayList<>();
+        List<String> expectedValues = new ArrayList<>();
+        for (int i = 0; i < randomIntBetween(1, 100); i++) {
+            String key = randomAsciiAlphanumOfLengthBetween(1, 100);
+            String value = randomRealisticUnicodeOfCodepointLengthBetween(1, 100);
+            String delimiter = Integer.toString(randomInt()); //int to ensures values and delimiters don't overlap, else validation can fail
+            keyFirstPattern += "%{" + key + "}" + delimiter;
+            valueFirstInput += value + delimiter;
+            delimiterFirstPattern += delimiter + "%{" + key + "}";
+            delimiterFirstInput += delimiter + value;
+            expectedKeys.add(key);
+            expectedValues.add(value);
+        }
+        assertMatch(keyFirstPattern, valueFirstInput, expectedKeys, expectedValues);
+        assertMatch(delimiterFirstPattern, delimiterFirstInput, expectedKeys, expectedValues);
+    }
+
+    public void testMatchUnicode() {
+        assertMatch("%{a} %{b}", "foo 子", Arrays.asList("a", "b"), Arrays.asList("foo", "子"));
+        assertMatch("%{a}࿏%{b} %{c}", "⟳༒࿏༒⟲ 子", Arrays.asList("a", "b", "c"), Arrays.asList("⟳༒", "༒⟲", "子"));
+        assertMatch("%{a}࿏%{+a} %{+a}", "⟳༒࿏༒⟲ 子", Arrays.asList("a"), Arrays.asList("⟳༒༒⟲子"));
+        assertMatch("%{a}࿏%{+a/2} %{+a/1}", "⟳༒࿏༒⟲ 子", Arrays.asList("a"), Arrays.asList("⟳༒子༒⟲"));
+        assertMatch("%{a->}࿏%{b}", "⟳༒࿏࿏࿏࿏࿏༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲"));
+        assertMatch("%{*a}࿏%{&a}", "⟳༒࿏༒⟲", Arrays.asList("⟳༒"), Arrays.asList("༒⟲"));
+        assertMatch("%{}࿏%{a}", "⟳༒࿏༒⟲", Arrays.asList("a"), Arrays.asList("༒⟲"));
+    }
+
+    public void testMatchRemainder() {
+        assertMatch("%{a}", "foo bar the rest", Arrays.asList("a"), Arrays.asList("foo bar the rest"));
+        assertMatch("%{a} %{b}", "foo bar the rest", Arrays.asList("a", "b"), Arrays.asList("foo", "bar the rest"));
+        assertMatch("%{} %{b}", "foo bar the rest", Arrays.asList("b"), Arrays.asList("bar the rest"));
+        assertMatch("%{a} %{b->}", "foo bar the rest", Arrays.asList("a", "b"), Arrays.asList("foo", "bar the rest"));
+        assertMatch("%{*a} %{&a}", "foo bar the rest", Arrays.asList("foo"), Arrays.asList("bar the rest"));
+        assertMatch("%{a} %{+a}", "foo bar the rest", Arrays.asList("a"), Arrays.asList("foo bar the rest"), " ");
+    }
+
+    public void testAppend() {
+        assertMatch("%{a} %{+a} %{+a}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobarbaz"));
+        assertMatch("%{a} %{+a} %{b} %{+b}", "foo bar baz lol", Arrays.asList("a", "b"), Arrays.asList("foobar", "bazlol"));
+        assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobazbar"));
+        assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foo baz bar"), " ");
+    }
+
+    public void testAssociate() {
+        assertMatch("%{*a} %{&a}", "foo bar", Arrays.asList("foo"), Arrays.asList("bar"));
+        assertMatch("%{&a} %{*a}", "foo bar", Arrays.asList("bar"), Arrays.asList("foo"));
+        assertMatch("%{*a} %{&a} %{*b} %{&b}", "foo bar baz lol", Arrays.asList("foo", "baz"), Arrays.asList("bar", "lol"));
+        assertMatch("%{*a} %{&a} %{c} %{*b} %{&b}", "foo bar x baz lol",
+            Arrays.asList("foo", "baz", "c"), Arrays.asList("bar", "lol", "x"));
+        assertBadPattern("%{*a} %{a}");
+        assertBadPattern("%{a} %{&a}");
+        assertMiss("%{*a} %{&a} {a} %{*b} %{&b}", "foo bar x baz lol");
+    }
+
+    public void testAppendAndAssociate() {
+        assertMatch("%{a} %{+a} %{*b} %{&b}", "foo bar baz lol", Arrays.asList("a", "baz"), Arrays.asList("foobar", "lol"));
+        assertMatch("%{a->} %{+a/2} %{+a/1} %{*b} %{&b}", "foo      bar baz lol x",
+            Arrays.asList("a", "lol"), Arrays.asList("foobazbar", "x"));
+    }
+
+    public void testEmptyKey() {
+        assertMatch("%{} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar"));
+        assertMatch("%{a} %{}", "foo bar", Arrays.asList("a"), Arrays.asList("foo"));
+        assertMatch("%{->} %{b}", "foo        bar", Arrays.asList("b"), Arrays.asList("bar"));
+        assertMatch("%{->} %{b}", "        bar", Arrays.asList("b"), Arrays.asList("bar"));
+        assertMatch("%{a} %{->}", "foo  bar       ", Arrays.asList("a"), Arrays.asList("foo"));
+    }
+
+    public void testNamedSkipKey() {
+        assertMatch("%{?foo} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar"));
+        assertMatch("%{?} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar"));
+        assertMatch("%{a} %{?bar}", "foo bar", Arrays.asList("a"), Arrays.asList("foo"));
+        assertMatch("%{?foo->} %{b}", "foo        bar", Arrays.asList("b"), Arrays.asList("bar"));
+        assertMatch("%{?->} %{b}", "foo        bar", Arrays.asList("b"), Arrays.asList("bar"));
+        assertMatch("%{?foo->} %{b}", "        bar", Arrays.asList("b"), Arrays.asList("bar"));
+        assertMatch("%{a} %{->?bar}", "foo  bar       ", Arrays.asList("a"), Arrays.asList("foo"));
+        assertMatch("%{a} %{?skipme} %{?skipme}", "foo  bar  baz", Arrays.asList("a"), Arrays.asList("foo"));
+        assertMatch("%{a} %{?} %{?}", "foo  bar  baz", Arrays.asList("a"), Arrays.asList("foo"));
+    }
+
+    public void testConsecutiveDelimiters() {
+        //leading
+        assertMatch("%{->},%{a}", ",,,,,foo", Arrays.asList("a"), Arrays.asList("foo"));
+        assertMatch("%{a->},%{b}", ",,,,,foo", Arrays.asList("a", "b"), Arrays.asList("", "foo"));
+        //trailing
+        assertMatch("%{a->},", "foo,,,,,", Arrays.asList("a"), Arrays.asList("foo"));
+        assertMatch("%{a} %{b},", "foo bar,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+        assertMatch("%{a} %{b->},", "foo bar,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+        //middle
+        assertMatch("%{a->},%{b}", "foo,,,,,bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+        assertMatch("%{a->} %{b}", "foo     bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+        assertMatch("%{a->}x%{b}", "fooxxxxxbar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+        assertMatch("%{a->} xyz%{b}", "foo xyz xyz xyz xyz xyzbar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+        //skipped with empty values
+        assertMatch("%{a},%{b},%{c},%{d}", "foo,,,", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", ""));
+        assertMatch("%{a},%{b},%{c},%{d}", "foo,,bar,baz", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "bar", "baz"));
+        assertMatch("%{a},%{b},%{c},%{d}", "foo,,,baz", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", "baz"));
+        assertMatch("%{a},%{b},%{c},%{d}", ",bar,,baz", Arrays.asList("a", "b", "c", "d"), Arrays.asList("", "bar", "", "baz"));
+        assertMatch("%{->},%{a->},%{b}", ",,,bar,,baz", Arrays.asList("a", "b"), Arrays.asList("bar", "baz"));
+    }
+
+    public void testAppendWithConsecutiveDelimiters() {
+        assertMatch("%{+a/1},%{+a/3}-%{+a/2} %{b}", "foo,bar----baz lol", Arrays.asList("a", "b"), Arrays.asList("foobar", ""));
+        assertMatch("%{+a/1},%{+a/3->}-%{+a/2} %{b}", "foo,bar----baz lol", Arrays.asList("a", "b"), Arrays.asList("foobazbar", "lol"));
+    }
+
+    public void testSkipRightPadding() {
+        assertMatch("%{a->} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+        assertMatch("%{a->} %{b}", "foo            bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+        assertMatch("%{->} %{a}", "foo            bar", Arrays.asList("a"), Arrays.asList("bar"));
+        assertMatch("%{a->} %{+a->} %{*b->} %{&b->} %{c}", "foo       bar    baz  lol    x",
+            Arrays.asList("a", "baz", "c"), Arrays.asList("foobar", "lol", "x"));
+    }
+
+    public void testTrimmedEnd() {
+        assertMatch("%{a} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+        assertMatch("%{a} %{b->} ", "foo bar        ", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+        //only whitespace is trimmed in the absence of trailing characters
+        assertMatch("%{a} %{b->}", "foo bar,,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar,,,,,,"));
+        //consecutive delimiters + right padding can be used to skip over the trailing delimiters
+        assertMatch("%{a} %{b->},", "foo bar,,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+    }
+
+    public void testLeadingDelimiter() {
+        assertMatch(",,,%{a} %{b}", ",,,foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+        assertMatch(",%{a} %{b}", ",,foo bar", Arrays.asList("a", "b"), Arrays.asList(",foo", "bar"));
+    }
+
+    /**
+     * Runtime errors
+     */
+    public void testMiss() {
+        assertMiss("%{a}%{b}", "foo");
+        assertMiss("%{a},%{b}", "foo bar");
+        assertMiss("%{a}, %{b}", "foo,bar");
+        assertMiss("x%{a},%{b}", "foo,bar");
+        assertMiss("x%{},%{b}", "foo,bar");
+        assertMiss("leading_delimiter_long%{a}", "foo");
+        assertMiss("%{a}trailing_delimiter_long", "foo");
+        assertMiss("leading_delimiter_long%{a}trailing_delimiter_long", "foo");
+        assertMiss("%{a}x", "foo");
+        assertMiss("%{a},%{b}x", "foo,bar");
+    }
+
+    /**
+     * Construction errors
+     */
+    public void testBadPatternOrKey() {
+        assertBadPattern("");
+        assertBadPattern("{}");
+        assertBadPattern("%{*a} %{&b}");
+        assertBadKey("%{*}");
+        assertBadKey("%{++}");
+    }
+
+    public void testSyslog() {
+        assertMatch("%{timestamp} %{+timestamp} %{+timestamp} %{logsource} %{program}[%{pid}]: %{message}",
+            "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]",
+            Arrays.asList("timestamp", "logsource", "program", "pid", "message"),
+            Arrays.asList("Mar 16 00:01:25", "evita", "postfix/smtpd", "1713", "connect from camomile.cloud9.net[168.100.1.3]"), " ");
+    }
+
+    public void testApacheLog() {
+        assertMatch("%{clientip} %{ident} %{auth} [%{timestamp}] \"%{verb} %{request} HTTP/%{httpversion}\" %{response} %{bytes}" +
+                " \"%{referrer}\" \"%{agent}\" %{->}",
+            "31.184.238.164 - - [24/Jul/2014:05:35:37 +0530] \"GET /logs/access.log HTTP/1.0\" 200 69849 " +
+                "\"http://8rursodiol.enjin.com\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " +
+                "Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36\" \"www.dlwindianrailways.com\"",
+            Arrays.asList("clientip", "ident", "auth", "timestamp", "verb", "request", "httpversion", "response", "bytes",
+                "referrer", "agent"),
+            Arrays.asList("31.184.238.164", "-", "-", "24/Jul/2014:05:35:37 +0530", "GET", "/logs/access.log", "1.0", "200", "69849",
+                "http://8rursodiol.enjin.com", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36" +
+                    " (KHTML, like Gecko) Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36"));
+    }
+
+    /**
+     * Shared specification between Beats, Logstash, and Ingest node
+     */
+    public void testJsonSpecification() throws Exception {
+        ObjectMapper mapper = new ObjectMapper();
+        JsonNode rootNode = mapper.readTree(this.getClass().getResourceAsStream("/specification/tests.json"));
+        Iterator<JsonNode> tests = rootNode.elements();
+        while (tests.hasNext()) {
+            JsonNode test = tests.next();
+            boolean skip = test.path("skip").asBoolean();
+            if (!skip) {
+                String name = test.path("name").asText();
+                logger.debug("Running Json specification: " + name);
+                String pattern = test.path("tok").asText();
+                String input = test.path("msg").asText();
+                String append = test.path("append").asText();
+                boolean fail = test.path("fail").asBoolean();
+                Iterator<Map.Entry<String, JsonNode>> expected = test.path("expected").fields();
+                List<String> expectedKeys = new ArrayList<>();
+                List<String> expectedValues = new ArrayList<>();
+                expected.forEachRemaining(entry -> {
+                    expectedKeys.add(entry.getKey());
+                    expectedValues.add(entry.getValue().asText());
+                });
+                if (fail) {
+                    assertFail(pattern, input);
+                } else {
+                    assertMatch(pattern, input, expectedKeys, expectedValues, append);
+                }
+            }
+        }
+    }
+
+    private DissectException assertFail(String pattern, String input){
+        return expectThrows(DissectException.class, () -> new DissectParser(pattern, null).parse(input));
+    }
+
+    private void assertMiss(String pattern, String input) {
+        DissectException e = assertFail(pattern, input);
+        assertThat(e.getMessage(), CoreMatchers.containsString("Unable to find match for dissect pattern"));
+        assertThat(e.getMessage(), CoreMatchers.containsString(pattern));
+        assertThat(e.getMessage(), input == null ? CoreMatchers.containsString("null") : CoreMatchers.containsString(input));
+    }
+
+    private void assertBadPattern(String pattern) {
+        DissectException e = assertFail(pattern, null);
+        assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse pattern"));
+        assertThat(e.getMessage(), CoreMatchers.containsString(pattern));
+    }
+
+    private void assertBadKey(String pattern, String key) {
+        DissectException e = assertFail(pattern, null);
+        assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key"));
+        assertThat(e.getMessage(), CoreMatchers.containsString(key));
+    }
+
+    private void assertBadKey(String pattern) {
+        assertBadKey(pattern, pattern.replace("%{", "").replace("}", ""));
+    }
+
+    private void assertMatch(String pattern, String input, List<String> expectedKeys, List<String> expectedValues) {
+        assertMatch(pattern, input, expectedKeys, expectedValues, null);
+    }
+
+    private void assertMatch(String pattern, String input, List<String> expectedKeys, List<String> expectedValues, String appendSeperator) {
+        Map<String, String> results = new DissectParser(pattern, appendSeperator).parse(input);
+        List<String> foundKeys = new ArrayList<>(results.keySet());
+        List<String> foundValues = new ArrayList<>(results.values());
+        Collections.sort(foundKeys);
+        Collections.sort(foundValues);
+        Collections.sort(expectedKeys);
+        Collections.sort(expectedValues);
+        assertThat(foundKeys, Matchers.equalTo(expectedKeys));
+        assertThat(foundValues, Matchers.equalTo(expectedValues));
+    }
+}

+ 363 - 0
libs/dissect/src/test/resources/specification/tests.json

@@ -0,0 +1,363 @@
+[
+  {
+    "name": "When all the defined fields are captured by we have remaining data",
+    "tok": "level=%{level} ts=%{timestamp} caller=%{caller} msg=\"%{message}\"",
+    "msg": "level=info ts=2018-06-27T17:19:13.036579993Z caller=main.go:222 msg=\"Starting OK\" version=\"(version=2.3.1, branch=HEAD, revision=188ca45bd85ce843071e768d855722a9d9dabe03)\"}",
+    "expected": {
+      "caller": "main.go:222",
+      "level": "info",
+      "message": "Starting OK",
+      "timestamp": "2018-06-27T17:19:13.036579993Z"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "Complex stack trace",
+    "tok": "%{day}-%{month}-%{year} %{hour} %{severity} [%{thread_id}] %{origin} %{message}",
+    "msg": "18-Apr-2018 06:53:20.411 INFO [http-nio-8080-exec-1] org.apache.coyote.http11.Http11Processor.service Error parsing HTTP request header\n Note: further occurrences of HTTP header parsing errors will be logged at DEBUG level.\n java.lang.IllegalArgumentException: Invalid character found in method name. HTTP method names must be tokens\n    at org.apache.coyote.http11.Http11InputBuffer.parseRequestLine(Http11InputBuffer.java:426)\n    at org.apache.coyote.http11.Http11Processor.service(Http11Processor.java:687)\n    at org.apache.coyote.AbstractProcessorLight.process(AbstractProcessorLight.java:66)\n    at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(AbstractProtocol.java:790)\n    at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun(NioEndpoint.java:1459)\n    at org.apache.tomcat.util.net.SocketProcessorBase.run(SocketProcessorBase.java:49)\n    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n    at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)\n    at java.lang.Thread.run(Thread.java:748)",
+    "expected": {
+      "day": "18",
+      "hour": "06:53:20.411",
+      "message": "Error parsing HTTP request header\n Note: further occurrences of HTTP header parsing errors will be logged at DEBUG level.\n java.lang.IllegalArgumentException: Invalid character found in method name. HTTP method names must be tokens\n    at org.apache.coyote.http11.Http11InputBuffer.parseRequestLine(Http11InputBuffer.java:426)\n    at org.apache.coyote.http11.Http11Processor.service(Http11Processor.java:687)\n    at org.apache.coyote.AbstractProcessorLight.process(AbstractProcessorLight.java:66)\n    at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(AbstractProtocol.java:790)\n    at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun(NioEndpoint.java:1459)\n    at org.apache.tomcat.util.net.SocketProcessorBase.run(SocketProcessorBase.java:49)\n    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n    at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)\n    at java.lang.Thread.run(Thread.java:748)",
+      "month": "Apr",
+      "origin": "org.apache.coyote.http11.Http11Processor.service",
+      "severity": "INFO",
+      "thread_id": "http-nio-8080-exec-1",
+      "year": "2018"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "success when delimiter found at the beginning and end of the string",
+    "tok": "/var/log/%{key}.log",
+    "msg": "/var/log/foobar.log",
+    "expected": {
+      "key": "foobar"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "fails when delimiter is not found at the beginning of the string",
+    "tok": "/var/log/%{key}.log",
+    "msg": "foobar",
+    "expected": null,
+    "skip": false,
+    "fail": true,
+    "append": ""
+  },
+  {
+    "name": "fails when delimiter is not found after the key",
+    "tok": "/var/log/%{key}.log",
+    "msg": "/var/log/foobar",
+    "expected": null,
+    "skip": false,
+    "fail": true,
+    "append": ""
+  },
+  {
+    "name": "simple dissect",
+    "tok": "%{key}",
+    "msg": "foobar",
+    "expected": {
+      "key": "foobar"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "dissect two replacement",
+    "tok": "%{key1} %{key2}",
+    "msg": "foo bar",
+    "expected": {
+      "key1": "foo",
+      "key2": "bar"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "fail on partial match",
+    "tok": "%{key1} %{key2} %{key3}",
+    "msg": "foo bar",
+    "expected": null,
+    "skip": false,
+    "fail": true,
+    "append": ""
+  },
+  {
+    "name": "one level dissect not end of string",
+    "tok": "/var/%{key}/log",
+    "msg": "/var/foobar/log",
+    "expected": {
+      "key": "foobar"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "one level dissect",
+    "tok": "/var/%{key}",
+    "msg": "/var/foobar/log",
+    "expected": {
+      "key": "foobar/log"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "multiple keys dissect end of string",
+    "tok": "/var/%{key}/log/%{key1}",
+    "msg": "/var/foobar/log/apache",
+    "expected": {
+      "key": "foobar",
+      "key1": "apache"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "multiple keys not end of string",
+    "tok": "/var/%{key}/log/%{key1}.log",
+    "msg": "/var/foobar/log/apache.log",
+    "expected": {
+      "key": "foobar",
+      "key1": "apache"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "append with order",
+    "tok": "%{+key/3} %{+key/1} %{+key/2}",
+    "msg": "1 2 3",
+    "expected": {
+      "key": "231"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "append with order and separator",
+    "tok": "%{+key/3} %{+key/1} %{+key/2}",
+    "msg": "1 2 3",
+    "expected": {
+      "key": "2::3::1"
+    },
+    "skip": false,
+    "fail": false,
+    "append": "::"
+  },
+  {
+    "name": "append with order and right padding",
+    "tok": "%{+key/3} %{+key/1-\u003e} %{+key/2}",
+    "msg": "1 2              3",
+    "expected": {
+      "key": "231"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "simple append",
+    "tok": "%{key}-%{+key}-%{+key}",
+    "msg": "1-2-3",
+    "expected": {
+      "key": "123"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "simple append with separator",
+    "tok": "%{key}-%{+key}-%{+key}",
+    "msg": "1-2-3",
+    "expected": {
+      "key": "1,2,3"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ","
+  },
+  {
+    "name": "reference field",
+    "tok": "%{*key} %{\u0026key}",
+    "msg": "hello world",
+    "expected": {
+      "hello": "world"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "reference field alt order",
+    "tok": "%{\u0026key} %{*key}",
+    "msg": "hello world",
+    "expected": {
+      "world": "hello"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "nameless skip field",
+    "tok": "%{} %{key}",
+    "msg": "hello world",
+    "expected": {
+      "key": "world"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "named skip field",
+    "tok": "%{?skipme} %{key}",
+    "msg": "hello world",
+    "expected": {
+      "key": "world"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "reference without pairing",
+    "tok": "%{key} %{\u0026key}",
+    "msg": "hello world",
+    "expected": null,
+    "skip": false,
+    "fail": true,
+    "append": ""
+  },
+  {
+    "name": "missing fields (consecutive delimiters)",
+    "tok": "%{name},%{addr1},%{addr2},%{addr3},%{city},%{zip}",
+    "msg": "Jane Doe,4321 Fifth Avenue,,,New York,87432",
+    "expected": {
+      "addr1": "4321 Fifth Avenue",
+      "addr2": "",
+      "addr3": "",
+      "city": "New York",
+      "name": "Jane Doe",
+      "zip": "87432"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "missing fields with right padding (consecutive delimiters)",
+    "tok": "%{name},%{addr1-\u003e},%{city},%{zip}",
+    "msg": "Jane Doe,4321 Fifth Avenue,,,New York,87432",
+    "expected": {
+      "addr1": "4321 Fifth Avenue",
+      "city": "New York",
+      "name": "Jane Doe",
+      "zip": "87432"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "ignore right padding",
+    "tok": "%{id} %{function-\u003e} %{server}",
+    "msg": "00000043 ViewReceive     machine-321",
+    "expected": {
+      "function": "ViewReceive",
+      "id": "00000043",
+      "server": "machine-321"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "padding on the last key need a delimiter",
+    "tok": "%{id} %{function} %{server-\u003e} ",
+    "msg": "00000043 ViewReceive machine-321    ",
+    "expected": {
+      "function": "ViewReceive",
+      "id": "00000043",
+      "server": "machine-321"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "ignore left padding",
+    "tok": "%{id-\u003e} %{function} %{server}",
+    "msg": "00000043    ViewReceive machine-321",
+    "expected": {
+      "function": "ViewReceive",
+      "id": "00000043",
+      "server": "machine-321"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "when the delimiters contains `{` and `}`",
+    "tok": "{%{a}}{%{b}} %{rest}",
+    "msg": "{c}{d} anything",
+    "expected": {
+      "a": "c",
+      "b": "d",
+      "rest": "anything"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  },
+  {
+    "name": "no keys defined",
+    "tok": "anything",
+    "msg": "anything",
+    "expected": null,
+    "skip": false,
+    "fail": true,
+    "append": ""
+  },
+  {
+    "name": "invalid key",
+    "tok": "%{some?thing}",
+    "msg": "anything",
+    "expected": null,
+    "skip": false,
+    "fail": true,
+    "append": ""
+  },
+  {
+    "name": "matches non-ascii",
+    "tok": "%{a}࿏%{b} %{c}",
+    "msg": "⟳༒࿏༒⟲ 子",
+    "expected": {
+      "a": "⟳༒",
+      "b": "༒⟲",
+      "c": "子"
+    },
+    "skip": false,
+    "fail": false,
+    "append": ""
+  }
+
+]