Kaynağa Gözat

Wildcard field - add normalisation of ngram tokens to reduce disk space. (#63120)

Adds normalisation of ngram tokens to reduce disk space.
All punctuation becomes / char and for A-Z0-9 chars turn even codepoints to prior odd e.g. aab becomes aaa

Closes #62817
markharwood 5 yıl önce
ebeveyn
işleme
c23061345a

+ 129 - 58
x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java

@@ -8,6 +8,8 @@
 package org.elasticsearch.xpack.wildcard.mapper;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ngram.NGramTokenizer;
@@ -37,6 +39,7 @@ import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.RegExp;
 import org.apache.lucene.util.automaton.RegExp.Kind;
 import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.geo.ShapeRelation;
 import org.elasticsearch.common.lucene.BytesRefs;
 import org.elasticsearch.common.lucene.Lucene;
@@ -91,13 +94,94 @@ public class WildcardFieldMapper extends FieldMapper {
     public static final String CONTENT_TYPE = "wildcard";
     public static short MAX_CLAUSES_IN_APPROXIMATION_QUERY = 10;
     public static final int NGRAM_SIZE = 3;
-    static final NamedAnalyzer WILDCARD_ANALYZER = new NamedAnalyzer("_wildcard", AnalyzerScope.GLOBAL, new Analyzer() {
+    static final NamedAnalyzer WILDCARD_ANALYZER_7_10 = new NamedAnalyzer("_wildcard_7_10", AnalyzerScope.GLOBAL, new Analyzer() {
         @Override
         public TokenStreamComponents createComponents(String fieldName) {
             Tokenizer tokenizer = new NGramTokenizer(NGRAM_SIZE, NGRAM_SIZE);
-            return new TokenStreamComponents(tokenizer);
+            
+            TokenStream tok = new LowerCaseFilter(tokenizer);
+            tok = new PunctuationFoldingFilter(tok);
+            
+            return new TokenStreamComponents(r -> {
+                tokenizer.setReader(r);
+            }, tok);            
+            
+            
         }
     });
+    
+    // @deprecated - used for BWC with elasticsearch 7.9
+    static final NamedAnalyzer WILDCARD_ANALYZER_7_9 = new NamedAnalyzer("_wildcard", AnalyzerScope.GLOBAL, new Analyzer() {
+        @Override
+        public TokenStreamComponents createComponents(String fieldName) {
+            Tokenizer tokenizer = new NGramTokenizer(NGRAM_SIZE, NGRAM_SIZE);
+            TokenStream tok = new LowerCaseFilter(tokenizer);
+            return new TokenStreamComponents(r -> {
+                tokenizer.setReader(r);
+            }, tok);            
+        }
+    });
+    
+    public static class PunctuationFoldingFilter extends TokenFilter {
+        private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+        
+        /**
+         * Create a new PunctuationFoldingFilter, that normalizes token text such that even-numbered ascii values
+         * are made odd and punctuation is replaced with /
+         * 
+         * @param in TokenStream to filter
+         */
+        public PunctuationFoldingFilter(TokenStream in) {
+          super(in);
+        }
+        
+        @Override
+        public final boolean incrementToken() throws IOException {
+          if (input.incrementToken()) {
+              normalize(termAtt.buffer(), 0, termAtt.length());
+            return true;
+          } else
+            return false;
+        }
+        
+        public static String normalize(String s) {
+            char[] chars = s.toCharArray();
+            normalize(chars, 0, chars.length);
+            return new String(chars);            
+        }
+        
+        /**
+         * Normalizes a token
+         */
+        public static void normalize(final char[] buffer, final int offset, final int limit) {
+          assert buffer.length >= limit;
+          assert 0 <= offset && offset <= buffer.length;
+          for (int i = offset; i < limit;) {
+            int codepoint = Character.codePointAt(buffer, i, limit);
+            i += Character.toChars(
+                    normalize(codepoint), buffer, i);
+           }
+        }
+
+        private static int normalize(int codepoint) {
+            if (codepoint == TOKEN_START_OR_END_CHAR) {
+                return codepoint;
+            }
+            if (Character.isLetterOrDigit(codepoint) == false) {
+                // Replace non letters or digits with /
+                return 47;
+            }
+            // All other ascii characters, normalize even numbers to prior odd.
+            if (codepoint > 48 && codepoint <= 128 && codepoint % 2 == 0) {
+                // Odd ascii chars in 0-9 a-z range.
+                return codepoint - 1;
+            } else {
+                // return even ascii char or non-ascii chars
+                return codepoint;
+            }
+        }        
+        
+      }
 
     public static class Defaults {
         public static final FieldType FIELD_TYPE = new FieldType();
@@ -172,8 +256,14 @@ public class WildcardFieldMapper extends FieldMapper {
         @Override
         public WildcardFieldMapper build(BuilderContext context) {
             return new WildcardFieldMapper(
-                    name, fieldType, new WildcardFieldType(buildFullName(context), fieldType, meta), ignoreAbove,
-                    multiFieldsBuilder.build(this, context), copyTo, nullValue);
+                name,
+                fieldType,
+                new WildcardFieldType(buildFullName(context), fieldType, meta, context.indexCreatedVersion()),
+                ignoreAbove,
+                multiFieldsBuilder.build(this, context),
+                copyTo,
+                nullValue
+            );
         }
     }
 
@@ -212,17 +302,21 @@ public class WildcardFieldMapper extends FieldMapper {
 
         static Analyzer lowercaseNormalizer = new LowercaseNormalizer();
 
-        private WildcardFieldType(String name, FieldType fieldType, Map<String, String> meta) {
+        private WildcardFieldType(String name, FieldType fieldType, Map<String, String> meta, Version version) {
             super(name, true, fieldType.stored(), true,
                 new TextSearchInfo(fieldType, null, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER), meta);
-            setIndexAnalyzer(WILDCARD_ANALYZER);
+            
+            if (version.onOrAfter(Version.V_7_10_0)) {
+                setIndexAnalyzer(WILDCARD_ANALYZER_7_10);
+            } else {
+                setIndexAnalyzer(WILDCARD_ANALYZER_7_9);
+            }
         }
 
         @Override
         public Query wildcardQuery(String wildcardPattern, RewriteMethod method, boolean caseInsensitive, QueryShardContext context) {
 
-            String ngramIndexPattern = addLineEndChars(toLowerCase(wildcardPattern));
-
+            String ngramIndexPattern = addLineEndChars(wildcardPattern);
             // Break search term into tokens
             Set<String> tokens = new LinkedHashSet<>();
             StringBuilder sequence = new StringBuilder();
@@ -305,8 +399,8 @@ public class WildcardFieldMapper extends FieldMapper {
             if (value.length() == 0) {
                 return new MatchNoDocsQuery();
             }
-
-            RegExp ngramRegex = new RegExp(addLineEndChars(toLowerCase(value)), syntaxFlags, matchFlags);
+            
+            RegExp ngramRegex = new RegExp(addLineEndChars(value), syntaxFlags, matchFlags);
 
             Query approxBooleanQuery = toApproximationQuery(ngramRegex);
             Query approxNgramQuery = rewriteBoolToNgramQuery(approxBooleanQuery);
@@ -590,7 +684,7 @@ public class WildcardFieldMapper extends FieldMapper {
             return q instanceof MatchAllDocsQuery || q instanceof MatchAllButRequireVerificationQuery;
         }
 
-        protected String firstNgramToken(String fragment) {
+        protected String firstNgramToken(String fragment, Analyzer analyzer) {
             LinkedHashSet<String> tokens = new LinkedHashSet<>();
             getNgramTokens(tokens, fragment);
             return tokens.iterator().next();
@@ -603,41 +697,30 @@ public class WildcardFieldMapper extends FieldMapper {
                 return;
             }
             // Break fragment into multiple Ngrams
-            TokenStream tokenizer = WILDCARD_ANALYZER.tokenStream(name(), fragment);
+            TokenStream tokenizer = indexAnalyzer().tokenStream(name(), fragment);
             CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class);
-            // If fragment length < NGRAM_SIZE then it is not emitted by token stream so need
-            // to initialise with the value here
-            String lastUnusedToken = fragment;
+            int foundTokens = 0;
             try {
                 tokenizer.reset();
-                boolean takeThis = true;
-                // minimise number of terms searched - eg for "12345" and 3grams we only need terms
-                // `123` and `345` - no need to search for 234. We take every other ngram.
                 while (tokenizer.incrementToken()) {
                     String tokenValue = termAtt.toString();
-                    if (takeThis) {
-                        tokens.add(tokenValue);
-                        lastUnusedToken = null;
-                    } else {
-                        lastUnusedToken = tokenValue;
-                    }
-                    // alternate
-                    takeThis = !takeThis;
-                    if (tokens.size() >= MAX_CLAUSES_IN_APPROXIMATION_QUERY) {
-                        lastUnusedToken = null;
-                        break;
-                    }
-                }
-                if (lastUnusedToken != null) {
-                    // given `cake` and 3 grams the loop above would output only `cak` and we need to add trailing
-                    // `ake` to complete the logic.
-                    tokens.add(lastUnusedToken);
+                    tokens.add(tokenValue);
+                    foundTokens++;
                 }
                 tokenizer.end();
                 tokenizer.close();
             } catch (IOException ioe) {
                 throw new ElasticsearchParseException("Error parsing wildcard regex pattern fragment [" + fragment + "]");
             }
+            
+            if (foundTokens == 0 && fragment.length() > 0) {
+                // fragment must have been less than NGRAM_SIZE - add a placeholder which may be used in a prefix query e.g. ab*
+                fragment = toLowerCase(fragment);
+                if (indexAnalyzer() == WILDCARD_ANALYZER_7_10) {
+                    fragment = PunctuationFoldingFilter.normalize(fragment);
+                }
+                tokens.add(fragment);
+            }
         }
 
 
@@ -678,8 +761,8 @@ public class WildcardFieldMapper extends FieldMapper {
                 // Long common prefixes e.g. "C:/Program Files/a,txt" to "C:/Program Files/z,txt"
                 // can be accelerated by searching for all the common leading ngrams e.g. c:/, /pr, rog, gra etc
                 StringBuilder commonPrefix = new StringBuilder();
-                String lowerS = addLineEndChars(toLowerCase(lower.utf8ToString()));
-                String upperS = addLineEndChars(toLowerCase(upper.utf8ToString()));
+                String lowerS = addLineEndChars(lower.utf8ToString());
+                String upperS = addLineEndChars(upper.utf8ToString());
                 for (int i = 0; i < Math.min(lowerS.length(), upperS.length());) {
                     final int cL = lowerS.codePointAt(i);
                     final int cU = upperS.codePointAt(i);
@@ -717,23 +800,15 @@ public class WildcardFieldMapper extends FieldMapper {
                     }
                 }
             }
-            if (accelerationQuery == null) {
-                // Fallback - if there is no common prefix sequence then we look for the range of ngrams that appear at the start
-                // of the string e.g. given 100 to 999 we would search for ngrams in the range
-                //   TOKEN_START_OR_END_CHAR + "10" to
-                //   TOKEN_START_OR_END_CHAR + "99"
-                BytesRef lowerNgram = lower == null ? null : new BytesRef(firstNgramToken(
-                    addLineEndChars(toLowerCase(lower.utf8ToString()))));
-                BytesRef upperNgram = upper == null ? null : new BytesRef(firstNgramToken(
-                    addLineEndChars(toLowerCase(upper.utf8ToString()))));
-                accelerationQuery = new TermRangeQuery(name(), lowerNgram, upperNgram, true, true);
-            }
-
             Supplier <Automaton> deferredAutomatonSupplier = ()->{
                 return TermRangeQuery.toAutomaton(lower, upper, includeLower, includeUpper);
             };
             AutomatonQueryOnBinaryDv slowQuery = new AutomatonQueryOnBinaryDv(name(), lower + "-" + upper, deferredAutomatonSupplier);
 
+            if (accelerationQuery == null) {
+                return slowQuery;
+            }
+
             BooleanQuery.Builder qBuilder = new BooleanQuery.Builder();
             qBuilder.add(accelerationQuery, Occur.MUST);
             qBuilder.add(slowQuery, Occur.MUST);
@@ -750,26 +825,25 @@ public class WildcardFieldMapper extends FieldMapper {
             QueryShardContext context
         ) {
             String searchTerm = BytesRefs.toString(value);
-            String lowerSearchTerm = toLowerCase(searchTerm);
             try {
                 BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
                 //The approximation query can have a prefix and any number of ngrams.
                 BooleanQuery.Builder approxBuilder = new BooleanQuery.Builder();
 
-                String postPrefixString = lowerSearchTerm;
+                String postPrefixString = searchTerm;
 
                 // Add all content prior to prefixLength as a MUST clause to the ngram index query
                 if (prefixLength > 0) {
                     Set<String> prefixTokens = new LinkedHashSet<>();
-                    postPrefixString = lowerSearchTerm.substring(prefixLength);
-                    String prefixCandidate = TOKEN_START_OR_END_CHAR + lowerSearchTerm.substring(0,  prefixLength);
+                    postPrefixString = searchTerm.substring(prefixLength);
+                    String prefixCandidate = TOKEN_START_OR_END_CHAR + searchTerm.substring(0,  prefixLength);
                     getNgramTokens(prefixTokens, prefixCandidate);
                     for (String prefixToken : prefixTokens) {
                         addClause(prefixToken, approxBuilder, Occur.MUST);
                     }
                 }
                 // Tokenize all content after the prefix
-                TokenStream tokenizer = WILDCARD_ANALYZER.tokenStream(name(), postPrefixString);
+                TokenStream tokenizer = indexAnalyzer().tokenStream(name(), postPrefixString);
                 CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class);
                 ArrayList<String> postPrefixTokens = new ArrayList<>();
                 String firstToken = null;
@@ -985,10 +1059,7 @@ public class WildcardFieldMapper extends FieldMapper {
         if (value == null || value.length() > ignoreAbove) {
             return;
         }
-        // Always lower case the ngram index and value - helps with
-        // a) speed (less ngram variations to explore on disk and in RAM-based automaton) and
-        // b) uses less disk space
-        String ngramValue = addLineEndChars(WildcardFieldType.toLowerCase(value));
+        String ngramValue = addLineEndChars(value);
         Field ngramField = new Field(fieldType().name(), ngramValue, ngramFieldType);
         fields.add(ngramField);
 

+ 85 - 37
x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java

@@ -78,18 +78,21 @@ import static org.mockito.Mockito.when;
 
 public class WildcardFieldMapperTests extends ESTestCase {
 
-    static QueryShardContext createMockQueryShardContext(boolean allowExpensiveQueries) {
+    static QueryShardContext createMockQueryShardContext(boolean allowExpensiveQueries, Version version) {
         QueryShardContext queryShardContext = mock(QueryShardContext.class);
         when(queryShardContext.allowExpensiveQueries()).thenReturn(allowExpensiveQueries);
+        when(queryShardContext.indexVersionCreated()).thenReturn(version);        
         return queryShardContext;
-    }
+    }    
 
     private static final String KEYWORD_FIELD_NAME = "keyword_field";
     private static final String WILDCARD_FIELD_NAME = "wildcard_field";
-    public static final QueryShardContext MOCK_QSC = createMockQueryShardContext(true);
+    public static final QueryShardContext MOCK_QSC = createMockQueryShardContext(true, Version.CURRENT);
+    public static final QueryShardContext MOCK_7_9_QSC = createMockQueryShardContext(true, Version.V_7_9_0);
 
     static final int MAX_FIELD_LENGTH = 30;
     static WildcardFieldMapper wildcardFieldType;
+    static WildcardFieldMapper wildcardFieldType79;
     static KeywordFieldMapper keywordFieldType;
 
     @Override
@@ -97,11 +100,17 @@ public class WildcardFieldMapperTests extends ESTestCase {
     public void setUp() throws Exception {
         Builder builder = new WildcardFieldMapper.Builder(WILDCARD_FIELD_NAME);
         builder.ignoreAbove(MAX_FIELD_LENGTH);
-        wildcardFieldType = builder.build(new Mapper.BuilderContext(createIndexSettings().getSettings(), new ContentPath(0)));
-
+        wildcardFieldType = builder.build(
+            new Mapper.BuilderContext(createIndexSettings(Version.CURRENT).getSettings(), new ContentPath(0))
+        );
+        wildcardFieldType79 = builder.build(
+            new Mapper.BuilderContext(createIndexSettings(Version.V_7_9_0).getSettings(), new ContentPath(0))
+        );
 
         org.elasticsearch.index.mapper.KeywordFieldMapper.Builder kwBuilder = new KeywordFieldMapper.Builder(KEYWORD_FIELD_NAME);
-        keywordFieldType = kwBuilder.build(new Mapper.BuilderContext(createIndexSettings().getSettings(), new ContentPath(0)));
+        keywordFieldType = kwBuilder.build(
+            new Mapper.BuilderContext(createIndexSettings(Version.CURRENT).getSettings(), new ContentPath(0))
+        );
         super.setUp();
     }
 
@@ -121,7 +130,7 @@ public class WildcardFieldMapperTests extends ESTestCase {
 
     public void testTooBigKeywordField() throws IOException {
         Directory dir = newDirectory();
-        IndexWriterConfig iwc = newIndexWriterConfig(WildcardFieldMapper.WILDCARD_ANALYZER);
+        IndexWriterConfig iwc = newIndexWriterConfig(WildcardFieldMapper.WILDCARD_ANALYZER_7_10);
         iwc.setMergePolicy(newTieredMergePolicy(random()));
         RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
 
@@ -144,11 +153,44 @@ public class WildcardFieldMapperTests extends ESTestCase {
         reader.close();
         dir.close();
     }
+    
+    public void testBWCIndexVersion() throws IOException {
+        // Create old format index using wildcard ngram analyzer used in 7.9 launch
+        Directory dir = newDirectory();
+        IndexWriterConfig iwc = newIndexWriterConfig(WildcardFieldMapper.WILDCARD_ANALYZER_7_9);
+        iwc.setMergePolicy(newTieredMergePolicy(random()));
+        RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+
+        Document doc = new Document();
+        ParseContext.Document parseDoc = new ParseContext.Document();
+        addFields(parseDoc, doc, "a b");
+        indexDoc(parseDoc, doc, iw);
+
+        iw.forceMerge(1);
+        DirectoryReader reader = iw.getReader();
+        IndexSearcher searcher = newSearcher(reader);
+        iw.close();
+        
+
+        // Unnatural circumstance - testing we fail if we were to use the new analyzer on old index
+        Query oldWildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery("a b", null, null);
+        TopDocs oldWildcardFieldTopDocs = searcher.search(oldWildcardFieldQuery, 10, Sort.INDEXORDER);
+        assertThat(oldWildcardFieldTopDocs.totalHits.value, equalTo(0L));
+        
+        
+        // Natural circumstance test we revert to the old analyzer for old indices
+        Query wildcardFieldQuery = wildcardFieldType79.fieldType().wildcardQuery("a b", null, null);
+        TopDocs wildcardFieldTopDocs = searcher.search(wildcardFieldQuery, 10, Sort.INDEXORDER);
+        assertThat(wildcardFieldTopDocs.totalHits.value, equalTo(1L));
+
+        reader.close();
+        dir.close();
+    }    
 
     //Test long query strings don't cause exceptions
     public void testTooBigQueryField() throws IOException {
         Directory dir = newDirectory();
-        IndexWriterConfig iwc = newIndexWriterConfig(WildcardFieldMapper.WILDCARD_ANALYZER);
+        IndexWriterConfig iwc = newIndexWriterConfig(WildcardFieldMapper.WILDCARD_ANALYZER_7_10);
         iwc.setMergePolicy(newTieredMergePolicy(random()));
         RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
 
@@ -183,7 +225,7 @@ public class WildcardFieldMapperTests extends ESTestCase {
     
     public void testTermAndPrefixQueryIgnoreWildcardSyntax() throws IOException {
         Directory dir = newDirectory();
-        IndexWriterConfig iwc = newIndexWriterConfig(WildcardFieldMapper.WILDCARD_ANALYZER);
+        IndexWriterConfig iwc = newIndexWriterConfig(WildcardFieldMapper.WILDCARD_ANALYZER_7_10);
         iwc.setMergePolicy(newTieredMergePolicy(random()));
         RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
 
@@ -224,7 +266,7 @@ public class WildcardFieldMapperTests extends ESTestCase {
 
     public void testSearchResultsVersusKeywordField() throws IOException {
         Directory dir = newDirectory();
-        IndexWriterConfig iwc = newIndexWriterConfig(WildcardFieldMapper.WILDCARD_ANALYZER);
+        IndexWriterConfig iwc = newIndexWriterConfig(WildcardFieldMapper.WILDCARD_ANALYZER_7_10);
         iwc.setMergePolicy(newTieredMergePolicy(random()));
         RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
 
@@ -362,7 +404,7 @@ public class WildcardFieldMapperTests extends ESTestCase {
 
     public void testRangeQueryVersusKeywordField() throws IOException {
         Directory dir = newDirectory();
-        IndexWriterConfig iwc = newIndexWriterConfig(WildcardFieldMapper.WILDCARD_ANALYZER);
+        IndexWriterConfig iwc = newIndexWriterConfig(WildcardFieldMapper.WILDCARD_ANALYZER_7_10);
         iwc.setMergePolicy(newTieredMergePolicy(random()));
         RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
 
@@ -375,6 +417,10 @@ public class WildcardFieldMapperTests extends ESTestCase {
         indexDoc(iw, "a.txt");
         indexDoc(iw, "n.txt");
         indexDoc(iw, "z.txt");
+        indexDoc(iw, "A.txt");
+        indexDoc(iw, "N.txt");
+        indexDoc(iw, "^.txt");
+        indexDoc(iw, "Z.txt");
 
         iw.forceMerge(1);
         DirectoryReader reader = iw.getReader();
@@ -391,7 +437,8 @@ public class WildcardFieldMapperTests extends ESTestCase {
             {"a.txt", "z.txt"},
             {"a.txt", "n.txt"},
             {null, "z.txt"},
-            {"a.txt", null}
+            {"a.txt", null},
+            {"A.txt", "z.txt"}
         };
 
         for (String[] bounds : rangeTests) {
@@ -437,15 +484,16 @@ public class WildcardFieldMapperTests extends ESTestCase {
 
         // All of these regexes should be accelerated as the equivalent of the given QueryString query
         String acceleratedTests[][] = {
-            {".*foo.*", "foo"},
-            {"..foobar","+foo +oba +ar_ +r__"},
-            {"(maynotexist)?foobar","+foo +oba +ar_ +r__"},
-            {".*/etc/passw.*", "+\\/et +tc\\/ +\\/pa +ass +ssw"},
-            {".*etc/passwd",  "+etc +c\\/p +pas +ssw +wd_ +d__"},
-            {"(http|ftp)://foo.*",  "+((+htt +ttp) ftp) +(+\\:\\/\\/ +\\/fo +foo)"},
-            {"[Pp][Oo][Ww][Ee][Rr][Ss][Hh][Ee][Ll][Ll]\\.[Ee][Xx][Ee]",  "+_po +owe +ers +she +ell +l\\.e +exe +e__"},
-            {"foo<1-100>bar",  "+(+_fo +foo) +(+bar +r__ )"},
-            {"(aaa.+&.+bbb)cat", "+cat +t__"},
+            {".*foo.*", "eoo"},
+            {"..foobar","+eoo +ooa +oaa +aaq +aq_ +q__"},
+            {"(maynotexist)?foobar","+eoo +ooa +oaa +aaq +aq_ +q__"},
+            {".*/etc/passw.*", "+\\/es +esc +sc\\/ +c\\/o +\\/oa +oas +ass +ssw"},
+            {".*etc/passwd",  " +esc +sc\\/ +c\\/o +\\/oa +oas +ass +ssw +swc +wc_ +c__"},
+            {"(http|ftp)://foo.*",  "+((+gss +sso) eso) +(+\\/\\/\\/ +\\/\\/e +\\/eo +eoo)"},
+            {"[Pp][Oo][Ww][Ee][Rr][Ss][Hh][Ee][Ll][Ll]\\.[Ee][Xx][Ee]",
+                "+_oo +oow +owe +weq +eqs +qsg +sge +gek +ekk +kk\\/ +k\\/e +\\/ew +ewe +we_ +e__"},
+            {"foo<1-100>bar",  "+(+_eo +eoo) +(+aaq +aq_ +q__)"},
+            {"(aaa.+&.+bbb)cat", "+cas +as_ +s__"},
             {".a", "a__"}
             };
         for (String[] test : acceleratedTests) {
@@ -470,7 +518,7 @@ public class WildcardFieldMapperTests extends ESTestCase {
         String suboptimalTests[][] = {
             // TODO short wildcards like a* OR b* aren't great so we just drop them.
             // Ideally we would attach to successors to create (acd OR bcd)
-            { "[ab]cd",  "+cd_ +d__"}
+            { "[ab]cd",  "+cc_ +c__"}
             };
         for (String[] test : suboptimalTests) {
             String regex = test[0];
@@ -500,13 +548,13 @@ public class WildcardFieldMapperTests extends ESTestCase {
 
         // All of these patterns should be accelerated.
         String tests[][] = {
-            { "*foobar", "+foo +oba +ar_ +r__" },
-            { "foobar*", "+_fo +oob +bar" },
-            { "foo\\*bar*", "+_fo +oo\\* +\\*ba +bar" },
-            { "foo\\?bar*", "+_fo +oo\\? +\\?ba +bar" },
-            { "foo*bar", "+_fo +foo +bar +r__" },
-            { "foo?bar", "+_fo +foo +bar +r__" },
-            { "?foo*bar?", "+foo +bar" },
+            { "*foobar", "+eoo +ooa +oaa +aaq +aq_ +q__" },
+            { "foobar*", "+_eo +eoo +ooa +oaa +aaq" },
+            { "foo\\*bar*", "+_eo +eoo +oo\\/ +o\\/a +\\/aa +aaq" },
+            { "foo\\?bar*", "+_eo +eoo +oo\\/ +o\\/a +\\/aa +aaq" },
+            { "foo*bar", "+_eo +eoo +aaq +aq_ +q__" },
+            { "foo?bar", "+_eo +eoo +aaq +aq_ +q__" },
+            { "?foo*bar?", "+eoo +aaq" },
             { "*c", "+c__" } };
         for (String[] test : tests) {
             String pattern = test[0];
@@ -602,10 +650,10 @@ public class WildcardFieldMapperTests extends ESTestCase {
     public void testFuzzyAcceleration() throws IOException, ParseException {
 
         FuzzyTest[] tests = {
-            new FuzzyTest("123456", 0, Fuzziness.ONE, null, 1, "123 456"),
-            new FuzzyTest("1234567890", 2, Fuzziness.ONE, "_12", 1, "345 678"),
-            new FuzzyTest("12345678901", 2, Fuzziness.ONE, "_12", 2, "345 678 901"),
-            new FuzzyTest("12345678", 4, Fuzziness.ONE, "_12 234", 0, null)
+            new FuzzyTest("123456", 0, Fuzziness.ONE, null, 1, "113 355"),
+            new FuzzyTest("1234567890", 2, Fuzziness.ONE, "_11", 1, "335 577"),
+            new FuzzyTest("12345678901", 2, Fuzziness.ONE, "_11", 2, "335 577 901"),
+            new FuzzyTest("12345678", 4, Fuzziness.ONE, "_11 113 133", 0, null)
         };
         for (FuzzyTest test : tests) {
             Query wildcardFieldQuery = test.getFuzzyQuery();
@@ -651,8 +699,8 @@ public class WildcardFieldMapperTests extends ESTestCase {
     public void testRangeAcceleration() throws IOException, ParseException {
 
         RangeTest[] tests = {
-            new RangeTest("c:/a.txt", "c:/z.txt", "_c: c:/"),
-            new RangeTest("C:/ProgramFiles/a.txt", "C:/ProgramFiles/z.txt", "_c: :/p pro ogr ram mfi ile es/"),
+            new RangeTest("c:/a.txt", "c:/z.txt", "_c/ c//"),
+            new RangeTest("C:/ProgramFiles/a.txt", "C:/ProgramFiles/z/txt", "_c/ c// //o /oq oqo qog ogq gqa qam ame mei eik ike kes es/"),
         };
         for (RangeTest test : tests) {
             Query wildcardFieldQuery = test.getRangeQuery();
@@ -894,9 +942,9 @@ public class WildcardFieldMapperTests extends ESTestCase {
         iw.addDocument(doc);
     }
 
-    protected IndexSettings createIndexSettings() {
+    protected IndexSettings createIndexSettings(Version version) {
         return new IndexSettings(
-                IndexMetadata.builder("_index").settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT))
+                IndexMetadata.builder("_index").settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, version))
                         .numberOfShards(1).numberOfReplicas(0).creationDate(System.currentTimeMillis()).build(),
                 Settings.EMPTY);
     }