Browse Source

New wildcard field optimised for wildcard queries (#49993)

Indexes values using size 3 ngrams and also stores the full original as a binary doc value.
Wildcard queries operate by using a cheap approximation query on the ngram field followed up by a more expensive verification query using an automaton on the binary doc values.  Also supports aggregations and sorting.
markharwood 5 years ago
parent
commit
a2a4756736

+ 3 - 1
docs/reference/mapping/types.asciidoc

@@ -7,7 +7,7 @@ document:
 [float]
 === Core datatypes
 
-string::         <<text,`text`>> and <<keyword,`keyword`>>
+string::         <<text,`text`>>, <<keyword,`keyword`>> and <<wildcard,`wildcard`>>
 <<number>>::     `long`, `integer`, `short`, `byte`, `double`, `float`, `half_float`, `scaled_float`
 <<date>>::       `date`
 <<date_nanos>>:: `date_nanos`
@@ -131,3 +131,5 @@ include::types/token-count.asciidoc[]
 include::types/shape.asciidoc[]
 
 include::types/constant-keyword.asciidoc[]
+
+include::types/wildcard.asciidoc[]

+ 53 - 0
docs/reference/mapping/types/wildcard.asciidoc

@@ -0,0 +1,53 @@
+[role="xpack"]
+[testenv="basic"]
+[[wildcard]]
+=== Wildcard datatype
+++++
+<titleabbrev>Wildcard</titleabbrev>
+++++
+
+A `wildcard` field stores values optimised for wildcard grep-like queries.
+Wildcard queries are possible on other field types but suffer from constraints:
+* `text` fields limit matching of any wildcard expressions to individual tokens rather than the original whole value held in a field
+* `keyword` fields are untokenized but slow at performing wildcard queries (especially patterns with leading wildcards).
+
+Internally the `wildcard` field indexes the whole field value using ngrams and stores the full string.
+The index is used as a rough filter to cut down the number of values that are then checked by retrieving and checking the full values.
+This field is especially well suited to run grep-like queries on log lines. Storage costs are typically lower than those of `keyword`
+fields but search speeds for exact matches on full terms are slower.
+
+You index and search a wildcard field as follows
+
+[source,console]
+--------------------------------------------------
+PUT my_index
+{
+  "mappings": {
+    "properties": {
+      "my_wildcard": {
+        "type": "wildcard"
+      }
+    }
+  }
+}
+
+PUT my_index/_doc/1
+{
+  "my_wildcard" : "This string can be quite lengthy"
+}
+
+POST my_index/_doc/_search
+{
+  "query": {
+      "wildcard" : "*quite*lengthy"
+  }
+}
+
+
+--------------------------------------------------
+
+
+==== Limitations
+
+* `wildcard` fields are untokenized like keyword fields, so do not support queries that rely on word positions such as phrase queries.
+

+ 1 - 16
server/src/main/java/org/elasticsearch/index/fielddata/plain/BinaryDVIndexFieldData.java

@@ -21,8 +21,6 @@ package org.elasticsearch.index.fielddata.plain;
 
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.SortField;
-import org.apache.lucene.search.SortedSetSortField;
-import org.apache.lucene.search.SortedSetSelector;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.index.Index;
@@ -54,20 +52,7 @@ public class BinaryDVIndexFieldData extends DocValuesIndexFieldData implements I
     public SortField sortField(@Nullable Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested,
             boolean reverse) {
         XFieldComparatorSource source = new BytesRefFieldComparatorSource(this, missingValue, sortMode, nested);
-        /**
-         * Check if we can use a simple {@link SortedSetSortField} compatible with index sorting and
-         * returns a custom sort field otherwise.
-         */
-        if (nested != null ||
-                (sortMode != MultiValueMode.MAX && sortMode != MultiValueMode.MIN) ||
-                (source.sortMissingFirst(missingValue) == false && source.sortMissingLast(missingValue) == false)) {
-            return new SortField(getFieldName(), source, reverse);
-        }
-        SortField sortField = new SortedSetSortField(fieldName, reverse,
-            sortMode == MultiValueMode.MAX ? SortedSetSelector.Type.MAX : SortedSetSelector.Type.MIN);
-        sortField.setMissingValue(source.sortMissingLast(missingValue) ^ reverse ?
-            SortedSetSortField.STRING_LAST : SortedSetSortField.STRING_FIRST);
-        return sortField;
+        return new SortField(getFieldName(), source, reverse);
     }
 
     @Override

+ 10 - 0
x-pack/plugin/core/src/main/java/org/elasticsearch/license/XPackLicenseState.java

@@ -613,6 +613,16 @@ public class XPackLicenseState {
     public boolean isVectorsAllowed() {
         return allowForAllLicenses();
     }
+    
+
+    /**
+     * Determine if Wildcard support should be enabled.
+     * <p>
+     *  Wildcard is available for all license types except {@link OperationMode#MISSING}
+     */
+    public synchronized boolean isWildcardAllowed() {
+        return status.active;
+    }    
 
     public boolean isOdbcAllowed() {
         return isAllowedByLicense(OperationMode.PLATINUM);

+ 218 - 0
x-pack/plugin/src/test/resources/rest-api-spec/test/wildcard/10_wildcard_basic.yml

@@ -0,0 +1,218 @@
+setup:
+  - skip:
+      features: headers
+      version: " - 7.9.99"
+      reason: "wildcard fields were added from 8.0"
+
+  - do:
+      indices.create:
+        index: test-index
+        body:
+          settings:
+            number_of_replicas: 0
+          mappings:
+            properties:
+              my_wildcard:
+                 type: wildcard
+  - do:
+      index:
+        index: test-index
+        id: 1
+        body:
+          my_wildcard: hello world
+  - do:
+      index:
+        index: test-index
+        id: 2
+        body:
+          my_wildcard: goodbye world
+
+  - do:
+      indices.refresh: {}
+
+---
+"Short prefix query":
+  - do:
+      search:
+        body:
+          track_total_hits: true
+          query:
+            wildcard:
+              my_wildcard: {value: "hel*" }
+
+
+  - match: {hits.total.value: 1}
+
+---
+"Long prefix query":
+  - do:
+      search:
+        body:
+          track_total_hits: true
+          query:
+            wildcard:
+              my_wildcard: {value: "hello wor*" }
+
+
+  - match: {hits.total.value: 1}
+
+---
+"Short unrooted query":
+  - do:
+      search:
+        body:
+          track_total_hits: true
+          query:
+            wildcard:
+              my_wildcard: {value: "*ello*" }
+
+
+  - match: {hits.total.value: 1}
+
+---
+"Long unrooted query":
+  - do:
+      search:
+        body:
+          track_total_hits: true
+          query:
+            wildcard:
+              my_wildcard: {value: "*ello worl*" }
+
+
+  - match: {hits.total.value: 1}
+
+---
+"Short suffix query":
+  - do:
+      search:
+        body:
+          track_total_hits: true
+          query:
+            wildcard:
+              my_wildcard: {value: "*ld" }
+
+
+  - match: {hits.total.value: 2}
+
+---
+"Long suffix query":
+  - do:
+      search:
+        body:
+          track_total_hits: true
+          query:
+            wildcard:
+              my_wildcard: {value: "*ello world" }
+
+
+  - match: {hits.total.value: 1}
+
+---
+"No wildcard wildcard query":
+  - do:
+      search:
+        body:
+          track_total_hits: true
+          query:
+            wildcard:
+              my_wildcard: {value: "hello world" }
+
+
+  - match: {hits.total.value: 1}
+
+---
+"Term query on wildcard field":
+  - do:
+      search:
+        body:
+          track_total_hits: true
+          query:
+            term:
+              my_wildcard: "hello world"
+
+
+  - match: {hits.total.value: 1}
+
+---
+"Terms query on wildcard field":
+  - do:
+      search:
+        body:
+          track_total_hits: true
+          query:
+            terms:
+              my_wildcard: ["hello world", "does not exist"]
+
+
+  - match: {hits.total.value: 1}
+
+---
+"Prefix query on wildcard field":
+  - do:
+      search:
+        body:
+          track_total_hits: true
+          query:
+            prefix:
+              my_wildcard:
+                value: "hell*"
+
+
+  - match: {hits.total.value: 1}
+
+---
+"Sequence fail":
+  - do:
+      search:
+        body:
+          track_total_hits: true
+          query:
+            wildcard:
+              my_wildcard: {value: "*world*hello*" }
+
+
+  - match: {hits.total.value: 0}
+
+---
+"Aggs work":
+  - do:
+      search:
+        body:
+          track_total_hits: true
+          query:
+            wildcard:
+              my_wildcard: {value: "*world*" }
+          aggs:
+            top_vals:
+              terms: {field: "my_wildcard" }
+
+
+  - match: {hits.total.value: 2}
+  - length: { aggregations.top_vals.buckets: 2 }
+
+---
+"Sort works":
+  - do:
+      search:
+        body:
+          track_total_hits: true
+          sort: [ { "my_wildcard": "desc" } ]
+
+  - match: { hits.total.value: 2 }
+  - length: { hits.hits: 2 }
+  - match: { hits.hits.0._id: "1" }
+  - match: { hits.hits.1._id: "2" }
+
+  - do:
+      search:
+        body:
+          track_total_hits: true
+          sort: [ { "my_wildcard": "asc" } ]
+
+  - match: { hits.total.value: 2 }
+  - length: { hits.hits: 2 }
+  - match: { hits.hits.0._id: "2" }
+  - match: { hits.hits.1._id: "1" }
+
+

+ 18 - 0
x-pack/plugin/wildcard/build.gradle

@@ -0,0 +1,18 @@
+evaluationDependsOn(xpackModule('core'))
+
+apply plugin: 'elasticsearch.esplugin'
+
+esplugin {
+  name 'wildcard'
+  description 'A plugin for a keyword field type with efficient wildcard search'
+  classname 'org.elasticsearch.xpack.wildcard.Wildcard'
+  extendedPlugins = ['x-pack-core']
+}
+archivesBaseName = 'x-pack-wildcard'
+
+dependencies {
+  compileOnly project(path: xpackModule('core'), configuration: 'default')
+  testCompile project(path: xpackModule('core'), configuration: 'testArtifacts')
+}
+
+integTest.enabled = false

+ 31 - 0
x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/Wildcard.java

@@ -0,0 +1,31 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+package org.elasticsearch.xpack.wildcard;
+
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.mapper.Mapper;
+import org.elasticsearch.plugins.MapperPlugin;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.xpack.wildcard.mapper.WildcardFieldMapper;
+
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+public class Wildcard extends Plugin implements MapperPlugin {
+
+
+    public Wildcard(Settings settings) {
+    }
+
+    @Override
+    public Map<String, Mapper.TypeParser> getMappers() {
+        Map<String, Mapper.TypeParser> mappers = new LinkedHashMap<>();
+        mappers.put(WildcardFieldMapper.CONTENT_TYPE, new WildcardFieldMapper.TypeParser());
+        return Collections.unmodifiableMap(mappers);
+    }
+}

+ 104 - 0
x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/AutomatonQueryOnBinaryDv.java

@@ -0,0 +1,104 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+package org.elasticsearch.xpack.wildcard.mapper;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.ConstantScoreScorer;
+import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TwoPhaseIterator;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.ByteRunAutomaton;
+
+import java.io.IOException;
+import java.util.Objects;
+
+/**
+ * Query that runs an Automaton across all binary doc values. 
+ * Expensive to run so normally used in conjunction with more selective query clauses.
+ */
+public class AutomatonQueryOnBinaryDv extends Query {
+
+    private final String field;
+    private final String matchPattern;
+    private final Automaton automaton;
+
+    public AutomatonQueryOnBinaryDv(String field, String matchPattern, Automaton automaton) {
+        this.field = field;
+        this.matchPattern = matchPattern;
+        this.automaton = automaton;
+    }
+
+    @Override
+    public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
+                
+        ByteRunAutomaton bytesMatcher = new ByteRunAutomaton(automaton);
+        
+        return new ConstantScoreWeight(this, boost) {
+
+            @Override
+            public Scorer scorer(LeafReaderContext context) throws IOException {
+                ByteArrayDataInput badi = new ByteArrayDataInput();
+                final BinaryDocValues values = DocValues.getBinary(context.reader(), field);               
+                TwoPhaseIterator twoPhase = new TwoPhaseIterator(values) {
+                    @Override
+                    public boolean matches() throws IOException {
+                        BytesRef arrayOfValues = values.binaryValue();
+                        badi.reset(arrayOfValues.bytes);
+                        badi.setPosition(arrayOfValues.offset);
+                        
+                        int size = badi.readVInt();
+                        for (int i=0; i< size; i++) {
+                            int valLength = badi.readVInt();
+                            if (bytesMatcher.run(arrayOfValues.bytes, badi.getPosition(), valLength)) {
+                                return true;
+                            }
+                            badi.skipBytes(valLength);
+                        }
+                        return false;
+                    }
+
+                    @Override
+                    public float matchCost() {
+                        // TODO: how can we compute this?
+                        return 1000f;
+                    }
+                };
+                return new ConstantScoreScorer(this, score(), scoreMode, twoPhase);
+            }
+
+            @Override
+            public boolean isCacheable(LeafReaderContext ctx) {
+                return true;
+            }
+        };
+    }
+    @Override
+    public String toString(String field) {
+        return field+":"+matchPattern;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        AutomatonQueryOnBinaryDv other = (AutomatonQueryOnBinaryDv) obj;
+        return Objects.equals(field, other.field)  && Objects.equals(matchPattern, other.matchPattern);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(field, matchPattern);
+    }
+
+}

+ 575 - 0
x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java

@@ -0,0 +1,575 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+
+package org.elasticsearch.xpack.wildcard.mapper;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.ngram.NGramTokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.util.automaton.Automaton;
+import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.common.lucene.BytesRefs;
+import org.elasticsearch.common.lucene.Lucene;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.common.xcontent.support.XContentMapValues;
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AnalyzerScope;
+import org.elasticsearch.index.analysis.NamedAnalyzer;
+import org.elasticsearch.index.fielddata.IndexFieldData;
+import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
+import org.elasticsearch.index.fielddata.IndexFieldDataCache;
+import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
+import org.elasticsearch.index.fielddata.plain.BytesBinaryDVIndexFieldData;
+import org.elasticsearch.index.mapper.BinaryFieldMapper.CustomBinaryDocValuesField;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.Mapper;
+import org.elasticsearch.index.mapper.MapperParsingException;
+import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.index.mapper.ParseContext;
+import org.elasticsearch.index.mapper.ParseContext.Document;
+import org.elasticsearch.index.query.QueryShardContext;
+import org.elasticsearch.index.similarity.SimilarityProvider;
+import org.elasticsearch.indices.breaker.CircuitBreakerService;
+import org.elasticsearch.search.MultiValueMode;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import static org.elasticsearch.index.mapper.TypeParsers.parseField;
+
+/**
+ * A {@link FieldMapper} for indexing fields with ngrams for efficient wildcard matching
+ */
+public class WildcardFieldMapper extends FieldMapper {
+
+    public static final String CONTENT_TYPE = "wildcard";
+    public static short MAX_CLAUSES_IN_APPROXIMATION_QUERY = 10; 
+    public static final int NGRAM_SIZE = 3;        
+    static final NamedAnalyzer WILDCARD_ANALYZER = new NamedAnalyzer("_wildcard", AnalyzerScope.GLOBAL, new Analyzer() {
+        @Override
+        public TokenStreamComponents createComponents(String fieldName) {
+            Tokenizer tokenizer = new NGramTokenizer(NGRAM_SIZE, NGRAM_SIZE);
+            return new TokenStreamComponents(tokenizer);
+        }
+    });    
+
+    public static class Defaults {
+        public static final MappedFieldType FIELD_TYPE = new WildcardFieldType();
+
+        static {
+            FIELD_TYPE.setTokenized(false);
+            FIELD_TYPE.setIndexAnalyzer(WILDCARD_ANALYZER);
+            FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
+            FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
+            FIELD_TYPE.setStoreTermVectorOffsets(false);
+            FIELD_TYPE.setOmitNorms(true);
+            FIELD_TYPE.freeze();
+        }
+        public static final int IGNORE_ABOVE = Integer.MAX_VALUE;        
+    }
+
+    public static class Builder extends FieldMapper.Builder<Builder, WildcardFieldMapper> {
+        protected int ignoreAbove = Defaults.IGNORE_ABOVE;
+
+        public Builder(String name) {
+            super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
+            builder = this;
+        }
+
+        @Override
+        public Builder docValues(boolean docValues) {
+            if (docValues == false) {
+                throw new MapperParsingException("The field [" + name + "] cannot have doc values = false");                
+            }
+            return this;
+        }
+
+        @Override
+        public Builder indexOptions(IndexOptions indexOptions) {
+            if (indexOptions != IndexOptions.DOCS) {
+                throw new MapperParsingException("The field [" + name + "] cannot have indexOptions = " + indexOptions);
+            }
+            return this;
+        }
+
+        @Override
+        public Builder store(boolean store) {
+            if (store) {
+                throw new MapperParsingException("The field [" + name + "] cannot have store = true");                
+            }
+            return this;
+        }
+
+        @Override
+        public Builder similarity(SimilarityProvider similarity) {
+            throw new MapperParsingException("The field [" + name + "] cannot have custom similarities");                
+        }
+
+        @Override
+        public Builder index(boolean index) {
+            if (index == false) {
+                throw new MapperParsingException("The field [" + name + "] cannot have index = false");                
+            }
+            return this;
+        }
+        
+        public Builder ignoreAbove(int ignoreAbove) {
+            if (ignoreAbove < 0) {
+                throw new IllegalArgumentException("[ignore_above] must be positive, got " + ignoreAbove);
+            }
+            this.ignoreAbove = ignoreAbove;
+            return this;
+        }        
+          
+        
+        @Override
+        protected void setupFieldType(BuilderContext context) {
+            super.setupFieldType(context);
+            fieldType().setHasDocValues(true);
+            fieldType().setTokenized(false);
+            fieldType().setIndexOptions(IndexOptions.DOCS);                
+        }
+
+        @Override
+        public WildcardFieldType fieldType() {
+            return (WildcardFieldType) super.fieldType();
+        }
+
+        @Override
+        public WildcardFieldMapper build(BuilderContext context) {
+            setupFieldType(context);            
+            return new WildcardFieldMapper(
+                    name, fieldType, defaultFieldType, ignoreAbove, 
+                    context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
+        }
+    }
+
+    public static class TypeParser implements Mapper.TypeParser {
+        @Override
+        public Mapper.Builder<?, ?> parse(String name, Map<String, Object> node, ParserContext parserContext)
+                throws MapperParsingException {
+            WildcardFieldMapper.Builder builder = new WildcardFieldMapper.Builder(name);
+            parseField(builder, name, node, parserContext);
+            
+            for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) {
+                Map.Entry<String, Object> entry = iterator.next();
+                String propName = entry.getKey();
+                Object propNode = entry.getValue();
+                if (propName.equals("ignore_above")) {
+                    builder.ignoreAbove(XContentMapValues.nodeIntegerValue(propNode, -1));
+                    iterator.remove();
+                }
+            }            
+            
+            return builder;
+        }
+    }
+    
+     public static final char TOKEN_START_OR_END_CHAR = 0;    
+    
+     public static final class WildcardFieldType extends MappedFieldType {
+
+        public WildcardFieldType() {            
+            setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
+            setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);            
+        }
+
+        protected WildcardFieldType(WildcardFieldType ref) {
+            super(ref);
+        }
+
+        public WildcardFieldType clone() {
+            WildcardFieldType result = new WildcardFieldType(this);
+            return result;
+        }
+        
+                
+        // Holds parsed information about the wildcard pattern
+        static class PatternStructure {
+            boolean openStart, openEnd, hasSymbols;            
+            int lastGap =0;
+            int wildcardCharCount, wildcardStringCount;
+            String[] fragments;
+            Integer []  precedingGapSizes;
+            final String pattern;
+            
+            @SuppressWarnings("fallthrough") // Intentionally uses fallthrough mirroring implementation in Lucene's WildcardQuery
+            PatternStructure (String wildcardText) {
+                this.pattern = wildcardText;
+                ArrayList<String> fragmentList = new ArrayList<>();
+                ArrayList<Integer> precedingGapSizeList = new ArrayList<>();
+                StringBuilder sb = new StringBuilder();               
+                for (int i = 0; i < wildcardText.length();) {
+                    final int c = wildcardText.codePointAt(i);
+                    int length = Character.charCount(c);
+                    switch (c) {
+                    case WildcardQuery.WILDCARD_STRING:
+                        if (i == 0) {
+                            openStart = true;
+                        }
+                        openEnd = true;                        
+                        hasSymbols = true;
+                        wildcardStringCount++;
+                        
+                        if (sb.length() > 0) {
+                            precedingGapSizeList.add(lastGap);
+                            fragmentList.add(sb.toString());
+                            sb = new StringBuilder();
+                        }
+                        lastGap = Integer.MAX_VALUE;
+                        break;
+                    case WildcardQuery.WILDCARD_CHAR:
+                        if (i == 0) {
+                            openStart = true;
+                        }
+                        hasSymbols = true;
+                        wildcardCharCount++;
+                        openEnd = true;
+                        if (sb.length() > 0) {
+                            precedingGapSizeList.add(lastGap);
+                            fragmentList.add(sb.toString());
+                            sb = new StringBuilder();       
+                            lastGap = 0;
+                        }
+                        
+                        if (lastGap != Integer.MAX_VALUE) {
+                            lastGap++;
+                        }
+                        break;
+                    case WildcardQuery.WILDCARD_ESCAPE:
+                        // add the next codepoint instead, if it exists
+                        if (i + length < wildcardText.length()) {
+                            final int nextChar = wildcardText.codePointAt(i + length);
+                            length += Character.charCount(nextChar);
+                            sb.append(Character.toChars(nextChar));
+                            openEnd = false;
+                            break;
+                        } // else fallthru, lenient parsing with a trailing \
+                    default:
+                        openEnd = false;
+                        sb.append(Character.toChars(c));
+                    }
+                    i += length;
+                }
+                if (sb.length() > 0) {
+                    precedingGapSizeList.add(lastGap);
+                    fragmentList.add(sb.toString());
+                    lastGap = 0;
+                }
+                fragments = fragmentList.toArray(new String[0]);
+                precedingGapSizes = precedingGapSizeList.toArray(new Integer[0]);
+                
+            }
+            
+            public boolean needsVerification() {
+                // Return true if term queries are not enough evidence
+                if (fragments.length == 1 && wildcardCharCount == 0) {
+                    // The one case where we don't need verification is when 
+                    // we have a single fragment and no ? characters
+                    return false;
+                }
+                return true;
+            }
+            
+            // Returns number of positions for last gap (Integer.MAX means unlimited gap)
+            public int getPrecedingGapSize(int fragmentNum) {
+                return precedingGapSizes[fragmentNum];
+            }
+
+            public boolean isMatchAll() {
+                return fragments.length == 0 && wildcardStringCount >0 && wildcardCharCount ==0;
+            }
+
+            @Override
+            public int hashCode() {
+                return pattern.hashCode();
+            }
+
+            @Override
+            public boolean equals(Object obj) {
+                PatternStructure other = (PatternStructure) obj;
+                return pattern.equals(other.pattern);
+            }
+            
+            
+        }
+        
+
+        @Override
+        public Query wildcardQuery(String wildcardPattern, RewriteMethod method, QueryShardContext context) {
+            PatternStructure patternStructure = new PatternStructure(wildcardPattern);            
+            ArrayList<String> tokens = new ArrayList<>();
+
+            for (int i = 0; i < patternStructure.fragments.length; i++) {
+                String fragment = patternStructure.fragments[i];
+                int fLength = fragment.length();
+                if (fLength == 0) {
+                    continue;
+                }
+                
+                // Add any start/end of string character
+                if (i == 0 && patternStructure.openStart == false) {
+                    // Start-of-string anchored (is not a leading wildcard)
+                    fragment = TOKEN_START_OR_END_CHAR + fragment;
+                }
+                if (patternStructure.openEnd == false && i == patternStructure.fragments.length - 1) {
+                    // End-of-string anchored (is not a trailing wildcard)
+                    fragment = fragment + TOKEN_START_OR_END_CHAR + TOKEN_START_OR_END_CHAR;
+                }
+                if (fragment.codePointCount(0, fragment.length()) <= NGRAM_SIZE) {
+                    tokens.add(fragment);
+                } else {
+                    // Break fragment into multiple Ngrams                
+                    TokenStream tokenizer = WILDCARD_ANALYZER.tokenStream(name(), fragment);
+                    CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class);
+                    String lastUnusedToken = null;
+                    try {
+                        tokenizer.reset();
+                        boolean takeThis = true;
+                        // minimise number of terms searched - eg for "12345" and 3grams we only need terms
+                        // `123` and `345` - no need to search for 234. We take every other ngram.
+                        while (tokenizer.incrementToken()) {
+                            String tokenValue = termAtt.toString();
+                            if (takeThis) {
+                                tokens.add(tokenValue);
+                            } else {
+                                lastUnusedToken = tokenValue;                                
+                            }
+                            // alternate
+                            takeThis = !takeThis;
+                        }
+                        if (lastUnusedToken != null) {
+                            // given `cake` and 3 grams the loop above would output only `cak` and we need to add trailing
+                            // `ake` to complete the logic.
+                            tokens.add(lastUnusedToken);
+                        }
+                        tokenizer.end();
+                        tokenizer.close();
+                    } catch (IOException ioe) {
+                        throw new ElasticsearchParseException("Error parsing wildcard query pattern fragment [" + fragment + "]");
+                    }
+                }
+            }
+
+            if (patternStructure.isMatchAll()) {
+                return new MatchAllDocsQuery();
+            } 
+            BooleanQuery approximation = createApproximationQuery(tokens);
+            if (approximation.clauses().size() > 1 || patternStructure.needsVerification()) {
+                BooleanQuery.Builder verifyingBuilder = new BooleanQuery.Builder();
+                verifyingBuilder.add(new BooleanClause(approximation, Occur.MUST));
+                Automaton automaton = WildcardQuery.toAutomaton(new Term(name(), wildcardPattern));
+                verifyingBuilder.add(new BooleanClause(new AutomatonQueryOnBinaryDv(name(), wildcardPattern, automaton), Occur.MUST));
+                return verifyingBuilder.build();
+            }
+            return approximation;
+        }                
+
+        private BooleanQuery createApproximationQuery(ArrayList<String> tokens) {
+            BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
+            if (tokens.size() <= MAX_CLAUSES_IN_APPROXIMATION_QUERY) {
+                for (String token : tokens) {
+                    addClause(token, bqBuilder);
+                }
+                return bqBuilder.build();
+            }
+            // Thin out the number of clauses using a selection spread evenly across the range
+            float step = (float) (tokens.size() - 1) / (float) (MAX_CLAUSES_IN_APPROXIMATION_QUERY - 1); // set step size
+            for (int i = 0; i < MAX_CLAUSES_IN_APPROXIMATION_QUERY; i++) {
+                addClause(tokens.get(Math.round(step * i)), bqBuilder); // add each element of a position which is a multiple of step
+            }
+            // TODO we can be smarter about pruning here. e.g.
+            // * Avoid wildcard queries if there are sufficient numbers of other terms that are full 3grams that are cheaper term queries
+            // * We can select terms on their scarcity rather than even spreads across the search string.
+            
+            return bqBuilder.build();
+        }
+
+        private void addClause(String token, BooleanQuery.Builder bqBuilder) {
+            assert token.codePointCount(0, token.length()) <= NGRAM_SIZE;
+            if (token.codePointCount(0, token.length()) == NGRAM_SIZE) {
+                TermQuery tq = new TermQuery(new Term(name(), token));
+                bqBuilder.add(new BooleanClause(tq, Occur.MUST));
+            } else {
+                WildcardQuery wq = new WildcardQuery(new Term(name(), token + "*"));
+                wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
+                bqBuilder.add(new BooleanClause(wq, Occur.MUST));
+            }
+
+        }
+
+        @Override
+        public String typeName() {
+            return CONTENT_TYPE;
+        }
+
+        @Override
+        public Query existsQuery(QueryShardContext context) {
+            return new DocValuesFieldExistsQuery(name());
+        }
+
+        @Override
+        public Query termQuery(Object value, QueryShardContext context) {
+            return wildcardQuery(BytesRefs.toString(value), MultiTermQuery.CONSTANT_SCORE_REWRITE, context);
+        }
+        
+        @Override
+        public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
+            return wildcardQuery(value + "*", method, context);
+        }        
+
+        @Override
+        public Query termsQuery(List<?> values, QueryShardContext context) {
+            BooleanQuery.Builder bq = new BooleanQuery.Builder();
+            for (Object value : values) {
+                bq.add(termQuery(value, context), Occur.SHOULD);
+            }
+            return new ConstantScoreQuery(bq.build());
+        }     
+                
+        @Override
+        public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
+            failIfNoDocValues();
+            return new IndexFieldData.Builder() {
+
+                @Override
+                public IndexFieldData<?> build(IndexSettings indexSettings, MappedFieldType fieldType, IndexFieldDataCache cache,
+                        CircuitBreakerService breakerService, MapperService mapperService) {
+                    return new WildcardBytesBinaryDVIndexFieldData(indexSettings.getIndex(), fieldType.name());
+                }};
+        }        
+    }
+     
+    static class  WildcardBytesBinaryDVIndexFieldData extends BytesBinaryDVIndexFieldData{
+
+        WildcardBytesBinaryDVIndexFieldData(Index index, String fieldName) {
+            super(index, fieldName);
+        }
+
+        @Override
+        public SortField sortField(Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) {
+            XFieldComparatorSource source = new BytesRefFieldComparatorSource(this, missingValue,
+                    sortMode, nested);
+            return new SortField(getFieldName(), source, reverse);
+        }
+    
+    }
+
+    private int ignoreAbove;
+
+    private WildcardFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
+                int ignoreAbove, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
+        super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
+        this.ignoreAbove = ignoreAbove;
+        assert fieldType.indexOptions() == IndexOptions.DOCS;
+        
+        ngramFieldType = fieldType.clone();
+        ngramFieldType.setTokenized(true);                    
+        ngramFieldType.freeze();
+    }
+
+    /** Values that have more chars than the return value of this method will
+     *  be skipped at parsing time. */
+    // pkg-private for testing
+    int ignoreAbove() {
+        return ignoreAbove;
+    }    
+    
+    @Override
+    protected WildcardFieldMapper clone() {
+        return (WildcardFieldMapper) super.clone();
+    }
+
+    @Override
+    public WildcardFieldType fieldType() {
+        return (WildcardFieldType) super.fieldType();
+    }
+
+    @Override
+    protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
+        super.doXContentBody(builder, includeDefaults, params);
+        if (includeDefaults || ignoreAbove != Defaults.IGNORE_ABOVE) {
+            builder.field("ignore_above", ignoreAbove);
+        }
+    }
+    
+    @Override
+    protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
+        final String value;
+        if (context.externalValueSet()) {
+            value = context.externalValue().toString();
+        } else {
+            XContentParser parser = context.parser();
+            if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
+                value = fieldType().nullValueAsString();
+            } else {
+                value =  parser.textOrNull();
+            }
+        }
+        ParseContext.Document parseDoc = context.doc();
+        
+        createFields(value, parseDoc, fields);        
+    }   
+    
+    // For internal use by Lucene only - used to define ngram index
+    final MappedFieldType ngramFieldType;
+    
+    void createFields(String value, Document parseDoc, List<IndexableField>fields) {
+        if (value == null || value.length() > ignoreAbove) {
+            return;
+        }
+        String ngramValue = TOKEN_START_OR_END_CHAR + value + TOKEN_START_OR_END_CHAR + TOKEN_START_OR_END_CHAR;
+        Field ngramField = new Field(fieldType().name(), ngramValue, ngramFieldType);
+        fields.add(ngramField);
+        
+        CustomBinaryDocValuesField dvField = (CustomBinaryDocValuesField) parseDoc.getByKey(fieldType().name());
+        if (dvField == null) {
+            dvField = new CustomBinaryDocValuesField(fieldType().name(), value.getBytes(StandardCharsets.UTF_8));
+            parseDoc.addWithKey(fieldType().name(), dvField);
+        } else {
+            dvField.add(value.getBytes(StandardCharsets.UTF_8));
+        }        
+    }
+
+    @Override
+    protected String contentType() {
+        return CONTENT_TYPE;
+    }
+    
+
+    @Override
+    protected void doMerge(Mapper mergeWith) {
+        super.doMerge(mergeWith);
+        this.ignoreAbove = ((WildcardFieldMapper) mergeWith).ignoreAbove;
+    }    
+}

+ 331 - 0
x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java

@@ -0,0 +1,331 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+package org.elasticsearch.xpack.wildcard.mapper;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.Version;
+import org.elasticsearch.cluster.metadata.IndexMetaData;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.cache.bitset.BitsetFilterCache;
+import org.elasticsearch.index.fielddata.IndexFieldData;
+import org.elasticsearch.index.fielddata.IndexFieldDataCache;
+import org.elasticsearch.index.mapper.ContentPath;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.Mapper;
+import org.elasticsearch.index.mapper.MapperParsingException;
+import org.elasticsearch.index.mapper.ParseContext;
+import org.elasticsearch.index.query.QueryShardContext;
+import org.elasticsearch.search.sort.FieldSortBuilder;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.test.IndexSettingsModule;
+import org.elasticsearch.xpack.wildcard.mapper.WildcardFieldMapper.Builder;
+import org.junit.Before;
+import org.mockito.Mockito;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.function.BiFunction;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class WildcardFieldMapperTests extends ESTestCase {
+
+    private static final String KEYWORD_FIELD_NAME = "keyword_field";
+    private static final String WILDCARD_FIELD_NAME = "wildcard_field";
+    static final int MAX_FIELD_LENGTH = 100;
+    static WildcardFieldMapper wildcardFieldType;
+    static KeywordFieldMapper keywordFieldType;
+
+    @Override
+    @Before
+    public void setUp() throws Exception {
+        Builder builder = new WildcardFieldMapper.Builder(WILDCARD_FIELD_NAME);
+        builder.ignoreAbove(MAX_FIELD_LENGTH);        
+        wildcardFieldType = builder.build(new Mapper.BuilderContext(createIndexSettings().getSettings(), new ContentPath(0)));
+        
+        
+        org.elasticsearch.index.mapper.KeywordFieldMapper.Builder kwBuilder = new KeywordFieldMapper.Builder(KEYWORD_FIELD_NAME);
+        keywordFieldType = kwBuilder.build(new Mapper.BuilderContext(createIndexSettings().getSettings(), new ContentPath(0)));        
+        super.setUp();
+    }
+    
+    public void testIllegalDocValuesArgument() {
+        Builder ft = new WildcardFieldMapper.Builder("test");
+        MapperParsingException e = expectThrows(MapperParsingException.class,
+                () -> ft.docValues(false));
+        assertEquals("The field [test] cannot have doc values = false", e.getMessage());
+    }        
+
+    public void testIllegalIndexedArgument() {
+        Builder ft = new WildcardFieldMapper.Builder("test");
+        MapperParsingException e = expectThrows(MapperParsingException.class,
+                () -> ft.index(false));
+        assertEquals("The field [test] cannot have index = false", e.getMessage());
+    }
+    
+    public void testTooBigKeywordField() throws IOException {
+        Directory dir = newDirectory();
+        IndexWriterConfig iwc = newIndexWriterConfig(WildcardFieldMapper.WILDCARD_ANALYZER);
+        iwc.setMergePolicy(newTieredMergePolicy(random()));
+        RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+        
+        // Create a string that is too large and will not be indexed
+        String docContent = randomABString(MAX_FIELD_LENGTH + 1);
+        Document doc = new Document();
+        ParseContext.Document parseDoc = new ParseContext.Document();                        
+        addFields(parseDoc, doc, docContent);
+        indexDoc(parseDoc, doc, iw);        
+
+        iw.forceMerge(1);
+        DirectoryReader reader = iw.getReader();
+        IndexSearcher searcher = newSearcher(reader);
+        iw.close();
+
+        Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery("*a*", null, null);
+        TopDocs wildcardFieldTopDocs = searcher.search(wildcardFieldQuery, 10, Sort.INDEXORDER);
+        assertThat(wildcardFieldTopDocs.totalHits.value, equalTo(0L));
+                
+        reader.close();
+        dir.close();        
+    }
+    
+    //Test long query strings don't cause exceptions
+    public void testTooBigQueryField() throws IOException {
+        Directory dir = newDirectory();
+        IndexWriterConfig iwc = newIndexWriterConfig(WildcardFieldMapper.WILDCARD_ANALYZER);
+        iwc.setMergePolicy(newTieredMergePolicy(random()));
+        RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+        
+        // Create a string that is too large and will not be indexed
+        String docContent = randomABString(10);
+        Document doc = new Document();
+        ParseContext.Document parseDoc = new ParseContext.Document();                        
+        addFields(parseDoc, doc, docContent);
+        indexDoc(parseDoc, doc, iw);        
+
+        iw.forceMerge(1);
+        DirectoryReader reader = iw.getReader();
+        IndexSearcher searcher = newSearcher(reader);
+        iw.close();
+
+        String queryString = randomABString((BooleanQuery.getMaxClauseCount() * 2) + 1);
+        Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(queryString, null, null);
+        TopDocs wildcardFieldTopDocs = searcher.search(wildcardFieldQuery, 10, Sort.INDEXORDER);
+        assertThat(wildcardFieldTopDocs.totalHits.value, equalTo(0L));
+                
+        reader.close();
+        dir.close();        
+    }    
+    
+    
+    public void testSearchResultsVersusKeywordField() throws IOException {
+        Directory dir = newDirectory();
+        IndexWriterConfig iwc = newIndexWriterConfig(WildcardFieldMapper.WILDCARD_ANALYZER);
+        iwc.setMergePolicy(newTieredMergePolicy(random()));
+        RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+
+        int numDocs = 100;
+        HashSet<String> values = new HashSet<>();
+        for (int i = 0; i < numDocs; i++) {
+            Document doc = new Document();
+            ParseContext.Document parseDoc = new ParseContext.Document();                        
+            String docContent = randomABString(1 + randomInt(MAX_FIELD_LENGTH - 1));
+            if (values.contains(docContent) == false) {                
+                addFields(parseDoc, doc, docContent);
+                values.add(docContent);
+            }
+            // Occasionally add a multi-value field
+            if (randomBoolean()) {
+                docContent = randomABString(1 + randomInt(MAX_FIELD_LENGTH - 1));
+                if (values.contains(docContent) == false) {                
+                    addFields(parseDoc, doc, docContent);
+                    values.add(docContent);
+                }                
+            }
+            indexDoc(parseDoc, doc, iw);
+            
+        }
+
+        iw.forceMerge(1);
+        DirectoryReader reader = iw.getReader();
+        IndexSearcher searcher = newSearcher(reader);
+        iw.close();
+
+        int numSearches = 100;
+        for (int i = 0; i < numSearches; i++) {
+            String randomWildcardPattern = getRandomWildcardPattern();
+
+            Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(randomWildcardPattern, null, null);
+            TopDocs wildcardFieldTopDocs = searcher.search(wildcardFieldQuery, values.size() + 1, Sort.INDEXORDER);
+
+            Query keywordFieldQuery = new WildcardQuery(new Term(KEYWORD_FIELD_NAME, randomWildcardPattern));
+            TopDocs kwTopDocs = searcher.search(keywordFieldQuery, values.size() + 1, Sort.INDEXORDER);
+
+            assertThat(kwTopDocs.totalHits.value, equalTo(wildcardFieldTopDocs.totalHits.value));
+
+            HashSet<Integer> expectedDocs = new HashSet<>();
+            for (ScoreDoc topDoc : kwTopDocs.scoreDocs) {
+                expectedDocs.add(topDoc.doc);
+            }
+            for (ScoreDoc wcTopDoc : wildcardFieldTopDocs.scoreDocs) {
+                assertTrue(expectedDocs.remove(wcTopDoc.doc));
+            }
+            assertThat(expectedDocs.size(), equalTo(0));
+        }
+        
+        
+        //Test keyword and wildcard sort operations are also equivalent
+        QueryShardContext shardContextMock = createMockShardContext();
+        
+        FieldSortBuilder wildcardSortBuilder = new FieldSortBuilder(WILDCARD_FIELD_NAME);
+        SortField wildcardSortField = wildcardSortBuilder.build(shardContextMock).field;        
+        ScoreDoc[] wildcardHits = searcher.search(new MatchAllDocsQuery(), numDocs, new Sort(wildcardSortField)).scoreDocs;
+
+        FieldSortBuilder keywordSortBuilder = new FieldSortBuilder(KEYWORD_FIELD_NAME);
+        SortField keywordSortField = keywordSortBuilder.build(shardContextMock).field;        
+        ScoreDoc[] keywordHits = searcher.search(new MatchAllDocsQuery(), numDocs, new Sort(keywordSortField)).scoreDocs;
+        
+        assertThat(wildcardHits.length, equalTo(keywordHits.length));
+        for (int i = 0; i < wildcardHits.length; i++) {
+            assertThat(wildcardHits[i].doc, equalTo(keywordHits[i].doc));            
+        }
+        
+        reader.close();
+        dir.close();
+    }
+
+    
+    
+    protected MappedFieldType provideMappedFieldType(String name) {
+        if (name.equals(WILDCARD_FIELD_NAME)) {
+            return wildcardFieldType.fieldType();            
+        } else {
+            return keywordFieldType.fieldType();
+        }
+    }    
+    
+    protected final QueryShardContext createMockShardContext() {
+        Index index = new Index(randomAlphaOfLengthBetween(1, 10), "_na_");
+        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings(index,
+            Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build());
+        BitsetFilterCache bitsetFilterCache = new BitsetFilterCache(idxSettings, Mockito.mock(BitsetFilterCache.Listener.class));
+        BiFunction<MappedFieldType, String, IndexFieldData<?>> indexFieldDataLookup = (fieldType, fieldIndexName) -> {
+            IndexFieldData.Builder builder = fieldType.fielddataBuilder(fieldIndexName);
+            return builder.build(idxSettings, fieldType, new IndexFieldDataCache.None(), null, null);
+        };
+        return new QueryShardContext(0, idxSettings, BigArrays.NON_RECYCLING_INSTANCE, bitsetFilterCache, indexFieldDataLookup,
+                null, null, null, xContentRegistry(), null, null, null,
+                () -> randomNonNegativeLong(), null, null, () -> true) {
+
+            @Override
+            public MappedFieldType fieldMapper(String name) {
+                return provideMappedFieldType(name);
+            }
+        };
+    }       
+    
+    private void addFields(ParseContext.Document parseDoc, Document doc, String docContent) throws IOException {
+        ArrayList<IndexableField> fields = new ArrayList<>();
+        wildcardFieldType.createFields(docContent, parseDoc, fields);
+
+        for (IndexableField indexableField : fields) {
+            doc.add(indexableField);
+        }
+        // Add keyword fields too
+        doc.add(new SortedSetDocValuesField(KEYWORD_FIELD_NAME, new BytesRef(docContent)));        
+        doc.add(new StringField(KEYWORD_FIELD_NAME, docContent, Field.Store.YES));
+    }
+
+    private void indexDoc(ParseContext.Document parseDoc, Document doc, RandomIndexWriter iw) throws IOException {
+        IndexableField field = parseDoc.getByKey(wildcardFieldType.name());
+        if (field != null) {
+            doc.add(field);
+        }
+        iw.addDocument(doc);
+    }
+
+    protected IndexSettings createIndexSettings() {
+        return new IndexSettings(
+                IndexMetaData.builder("_index").settings(Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT))
+                        .numberOfShards(1).numberOfReplicas(0).creationDate(System.currentTimeMillis()).build(),
+                Settings.EMPTY);
+    }
+
+
+    static String randomABString(int minLength) {
+        StringBuilder sb = new StringBuilder();
+        while (sb.length() < minLength) {
+            if (randomBoolean()) {
+                sb.append("a");
+            } else {
+                sb.append("b");
+            }
+        }
+        return sb.toString();
+    }
+
+    private void randomSyntaxChar(StringBuilder sb) {
+        switch (randomInt(3)) {
+        case 0:
+            sb.append(WildcardQuery.WILDCARD_CHAR);
+            break;
+        case 1:
+            sb.append(WildcardQuery.WILDCARD_STRING);
+            break;
+        case 2:
+            sb.append(WildcardQuery.WILDCARD_ESCAPE);
+            sb.append(WildcardQuery.WILDCARD_STRING);
+            break;
+        case 3:
+            sb.append(WildcardQuery.WILDCARD_ESCAPE);
+            sb.append(WildcardQuery.WILDCARD_CHAR);
+            break;
+        }
+    }
+
+    private String getRandomWildcardPattern() {
+        StringBuilder sb = new StringBuilder();
+        int numFragments = 1 + randomInt(4);
+        if (randomInt(10) == 1) {
+            randomSyntaxChar(sb);
+        }
+        for (int i = 0; i < numFragments; i++) {
+            if (i > 0) {
+                randomSyntaxChar(sb);
+            }
+            sb.append(randomABString(1 + randomInt(6)));
+        }
+        if (randomInt(10) == 1) {
+            randomSyntaxChar(sb);
+        }
+        return sb.toString();
+    }
+}

+ 19 - 0
x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldTypeTests.java

@@ -0,0 +1,19 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+
+package org.elasticsearch.xpack.wildcard.mapper;
+
+import org.elasticsearch.index.mapper.FieldTypeTestCase;
+import org.elasticsearch.index.mapper.MappedFieldType;
+
+public class WildcardFieldTypeTests extends FieldTypeTestCase {
+
+    @Override
+    protected MappedFieldType createDefaultFieldType() {
+        return new WildcardFieldMapper.WildcardFieldType();
+    }  
+}