Browse Source

Core: add max_determinized_states to query_string and regexp query/filter

This prevents too-difficult regular expressions from consuming
excessive RAM/CPU; the default max_determinized_states is 10,000 (same
as Lucene) but query_string and regepx query/filter can override
per-request.

The also upgrades to a new Lucene 5.0.0 snapshot.

Closes #8386

Closes #8357
Michael McCandless 11 năm trước cách đây
mục cha
commit
8aebb9656b
30 tập tin đã thay đổi với 261 bổ sung75 xóa
  1. 0 2
      dev-tools/forbidden/all-signatures.txt
  2. 10 2
      docs/reference/query-dsl/filters/regexp-filter.asciidoc
  3. 5 1
      docs/reference/query-dsl/queries/query-string-query.asciidoc
  4. 20 0
      docs/reference/query-dsl/queries/regexp-query.asciidoc
  5. 6 2
      pom.xml
  6. 4 2
      src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java
  7. 13 0
      src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java
  8. 7 3
      src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java
  9. 5 2
      src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java
  10. 2 3
      src/main/java/org/elasticsearch/common/http/client/HttpDownloadHelper.java
  11. 0 16
      src/main/java/org/elasticsearch/common/io/FileSystemUtils.java
  12. 8 7
      src/main/java/org/elasticsearch/common/lucene/search/RegexpFilter.java
  13. 3 2
      src/main/java/org/elasticsearch/index/mapper/FieldMapper.java
  14. 5 4
      src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java
  15. 12 8
      src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java
  16. 15 0
      src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java
  17. 3 0
      src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java
  18. 18 3
      src/main/java/org/elasticsearch/index/query/RegexpFilterBuilder.java
  19. 8 4
      src/main/java/org/elasticsearch/index/query/RegexpFilterParser.java
  20. 16 1
      src/main/java/org/elasticsearch/index/query/RegexpQueryBuilder.java
  21. 8 4
      src/main/java/org/elasticsearch/index/query/RegexpQueryParser.java
  22. 1 2
      src/main/java/org/elasticsearch/index/translog/fs/RafReference.java
  23. 5 1
      src/main/java/org/elasticsearch/search/suggest/context/ContextMapping.java
  24. 1 2
      src/test/java/org/elasticsearch/benchmark/fs/FsAppendBenchmark.java
  25. 48 1
      src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java
  26. 7 0
      src/test/java/org/elasticsearch/index/query/query-regexp-max-determinized-states.json
  27. 6 0
      src/test/java/org/elasticsearch/index/query/query-regexp-too-many-determinized-states.json
  28. 17 0
      src/test/java/org/elasticsearch/index/query/regexp-filter-max-determinized-states.json
  29. 6 0
      src/test/java/org/elasticsearch/index/query/regexp-max-determinized-states.json
  30. 2 3
      src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreTests.java

+ 0 - 2
dev-tools/forbidden/all-signatures.txt

@@ -7,5 +7,3 @@ java.io.File#delete() @ use Files.delete for real exception, IOUtils.deleteFiles
 # temporary situation, until we upgrade with LUCENE-6051 fix 
 # (at which point forbidden apis will fail and we remove this section)
 @defaultMessage Use FileSystemUtils methods for now to workaround LUCENE-6051
-org.apache.lucene.util.IOUtils#deleteFilesIgnoringExceptions(java.lang.Iterable)
-org.apache.lucene.util.IOUtils#deleteFilesIfExist(java.lang.Iterable)

+ 10 - 2
docs/reference/query-dsl/filters/regexp-filter.asciidoc

@@ -27,7 +27,14 @@ See <<regexp-syntax>> for details of the supported regular expression language.
 You can also select the cache name and use the same regexp flags in the
 filter as in the query.
 
-*Note*: You have to enable caching explicitly in order to have the
+Regular expressions are dangerous because it's easy to accidentally
+create an innocuous looking one that requires an exponential number of
+internal determinized automaton states (and corresponding RAM and CPU)
+for Lucene to execute.  Lucene prevents these using the
+`max_determinized_states` setting (defaults to 10000).  You can raise
+this limit to allow more complex regular expressions to execute.
+
+You have to enable caching explicitly in order to have the
 `regexp` filter cached.
 
 [source,js]
@@ -41,7 +48,8 @@ filter as in the query.
             "regexp":{
                 "name.first" : {
                     "value" : "s.*y",
-                    "flags" : "INTERSECTION|COMPLEMENT|EMPTY"
+                    "flags" : "INTERSECTION|COMPLEMENT|EMPTY",
+		    "max_determinized_states": 20000
                 },
                 "_name":"test",
                 "_cache" : true,

+ 5 - 1
docs/reference/query-dsl/queries/query-string-query.asciidoc

@@ -61,7 +61,11 @@ phrase matches are required. Default value is `0`.
 not analyzed. By setting this value to `true`, a best effort will be
 made to analyze those as well.
 
-|`auto_generate_phrase_queries` |Default to `false`.
+|`auto_generate_phrase_queries` |Defaults to `false`.
+
+|`max_determinized_states` |Limit on how many automaton states regexp
+queries are allowed to create.  This protects against too-difficult
+(e.g. exponentially hard) regexps.  Defaults to 10000.
 
 |`minimum_should_match` |A value controlling how many "should" clauses
 in the resulting boolean query should match. It can be an absolute value

+ 20 - 0
docs/reference/query-dsl/queries/regexp-query.asciidoc

@@ -55,5 +55,25 @@ Possible flags are `ALL`, `ANYSTRING`, `AUTOMATON`, `COMPLEMENT`,
 http://lucene.apache.org/core/4_9_0/core/org/apache/lucene/util/automaton/RegExp.html[Lucene
 documentation] for their meaning
 
+Regular expressions are dangerous because it's easy to accidentally
+create an innocuous looking one that requires an exponential number of
+internal determinized automaton states (and corresponding RAM and CPU)
+for Lucene to execute.  Lucene prevents these using the
+`max_determinized_states` setting (defaults to 10000).  You can raise
+this limit to allow more complex regular expressions to execute.
+
+[source,js]
+--------------------------------------------------
+{
+    "regexp":{
+        "name.first": {
+            "value": "s.*y",
+            "flags" : "INTERSECTION|COMPLEMENT|EMPTY",
+	    "max_determinized_states": 20000
+        }
+    }
+}
+--------------------------------------------------
+
 
 include::regexp-syntax.asciidoc[]

+ 6 - 2
pom.xml

@@ -32,7 +32,7 @@
 
     <properties>
         <lucene.version>5.0.0</lucene.version>
-        <lucene.maven.version>5.0.0-snapshot-1636426</lucene.maven.version>
+        <lucene.maven.version>5.0.0-snapshot-1637347</lucene.maven.version>
         <tests.jvms>auto</tests.jvms>
         <tests.shuffle>true</tests.shuffle>
         <tests.output>onerror</tests.output>
@@ -52,7 +52,11 @@
         </repository>
         <repository>
             <id>Lucene snapshots</id>
-            <url>https://download.elasticsearch.org/lucenesnapshots/maven/</url> 
+            <url>https://download.elasticsearch.org/lucenesnapshots/1637347</url> 
+        </repository>
+        <repository>
+            <id>Temporary</id>
+            <url>http://people.apache.org/~mikemccand/fake_staging_area/repo/</url> 
         </repository>
     </repositories>
 

+ 4 - 2
src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java

@@ -21,6 +21,7 @@ package org.apache.lucene.queryparser.classic;
 
 import com.google.common.base.Objects;
 import com.google.common.collect.ImmutableMap;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -123,6 +124,7 @@ public class MapperQueryParser extends QueryParser {
         setMultiTermRewriteMethod(settings.rewriteMethod());
         setEnablePositionIncrements(settings.enablePositionIncrements());
         setAutoGeneratePhraseQueries(settings.autoGeneratePhraseQueries());
+        setMaxDeterminizedStates(settings.maxDeterminizedStates());
         setAllowLeadingWildcard(settings.allowLeadingWildcard());
         setLowercaseExpandedTerms(settings.lowercaseExpandedTerms());
         setPhraseSlop(settings.phraseSlop());
@@ -814,12 +816,12 @@ public class MapperQueryParser extends QueryParser {
                         if (fieldMappers.explicitTypeInNameWithDocMapper()) {
                             String[] previousTypes = QueryParseContext.setTypesWithPrevious(new String[]{fieldMappers.docMapper().type()});
                             try {
-                                query = currentMapper.regexpQuery(termStr, RegExp.ALL, multiTermRewriteMethod, parseContext);
+                                query = currentMapper.regexpQuery(termStr, RegExp.ALL, maxDeterminizedStates, multiTermRewriteMethod, parseContext);
                             } finally {
                                 QueryParseContext.setTypes(previousTypes);
                             }
                         } else {
-                            query = currentMapper.regexpQuery(termStr, RegExp.ALL, multiTermRewriteMethod, parseContext);
+                            query = currentMapper.regexpQuery(termStr, RegExp.ALL, maxDeterminizedStates, multiTermRewriteMethod, parseContext);
                         }
                     }
                     if (query == null) {

+ 13 - 0
src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java

@@ -20,9 +20,11 @@
 package org.apache.lucene.queryparser.classic;
 
 import com.carrotsearch.hppc.ObjectFloatOpenHashMap;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.search.FuzzyQuery;
 import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.util.automaton.Operations;
 import org.joda.time.DateTimeZone;
 
 import java.util.Collection;
@@ -50,6 +52,7 @@ public class QueryParserSettings {
     private float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
     private int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
     private int fuzzyMaxExpansions = FuzzyQuery.defaultMaxExpansions;
+    private int maxDeterminizedStates = Operations.DEFAULT_MAX_DETERMINIZED_STATES;
     private MultiTermQuery.RewriteMethod fuzzyRewriteMethod = null;
     private boolean analyzeWildcard = DEFAULT_ANALYZE_WILDCARD;
     private boolean escape = false;
@@ -115,6 +118,14 @@ public class QueryParserSettings {
         this.autoGeneratePhraseQueries = autoGeneratePhraseQueries;
     }
 
+    public int maxDeterminizedStates() {
+        return maxDeterminizedStates;
+    }
+
+    public void maxDeterminizedStates(int maxDeterminizedStates) {
+        this.maxDeterminizedStates = maxDeterminizedStates;
+    }
+
     public boolean allowLeadingWildcard() {
         return allowLeadingWildcard;
     }
@@ -323,6 +334,7 @@ public class QueryParserSettings {
         QueryParserSettings that = (QueryParserSettings) o;
 
         if (autoGeneratePhraseQueries != that.autoGeneratePhraseQueries()) return false;
+        if (maxDeterminizedStates != that.maxDeterminizedStates()) return false;
         if (allowLeadingWildcard != that.allowLeadingWildcard) return false;
         if (Float.compare(that.boost, boost) != 0) return false;
         if (enablePositionIncrements != that.enablePositionIncrements) return false;
@@ -378,6 +390,7 @@ public class QueryParserSettings {
         result = 31 * result + (boost != +0.0f ? Float.floatToIntBits(boost) : 0);
         result = 31 * result + (defaultOperator != null ? defaultOperator.hashCode() : 0);
         result = 31 * result + (autoGeneratePhraseQueries ? 1 : 0);
+        result = 31 * result + maxDeterminizedStates;
         result = 31 * result + (allowLeadingWildcard ? 1 : 0);
         result = 31 * result + (lowercaseExpandedTerms ? 1 : 0);
         result = 31 * result + (enablePositionIncrements ? 1 : 0);

+ 7 - 3
src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java

@@ -19,6 +19,7 @@
 package org.apache.lucene.search.suggest.analyzing;
 
 import com.carrotsearch.hppc.ObjectIntOpenHashMap;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.TokenStreamToAutomaton;
@@ -312,7 +313,8 @@ public class XAnalyzingSuggester extends Lookup {
   protected Automaton convertAutomaton(Automaton a) {
     if (queryPrefix != null) {
       a = Operations.concatenate(Arrays.asList(queryPrefix, a));
-      a = Operations.determinize(a);
+      // This automaton should not blow up during determinize:
+      a = Operations.determinize(a, Integer.MAX_VALUE);
     }
     return a;
   }
@@ -952,14 +954,16 @@ public class XAnalyzingSuggester extends Lookup {
       try {
           automaton = getTokenStreamToAutomaton().toAutomaton(ts);
       } finally {
-        IOUtils.closeWhileHandlingException(ts);
+          IOUtils.closeWhileHandlingException(ts);
       }
 
       automaton = replaceSep(automaton);
 
       // TODO: we can optimize this somewhat by determinizing
       // while we convert
-      automaton = Operations.determinize(automaton);
+
+      // This automaton should not blow up during determinize:
+      automaton = Operations.determinize(automaton, Integer.MAX_VALUE);
       return automaton;
   }
   

+ 5 - 2
src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java

@@ -205,7 +205,8 @@ public final class XFuzzySuggester extends XAnalyzingSuggester {
       if (unicodeAware) {
         // FLORIAN EDIT: get converted Automaton from superclass
         Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a));
-        utf8automaton = Operations.determinize(utf8automaton);
+        // This automaton should not blow up during determinize:
+        utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE);
         return utf8automaton;
       } else {
         return super.convertAutomaton(a);
@@ -253,7 +254,9 @@ public final class XFuzzySuggester extends XAnalyzingSuggester {
           Automaton a = Operations.union(Arrays.asList(subs));
           // TODO: we could call toLevenshteinAutomata() before det? 
           // this only happens if you have multiple paths anyway (e.g. synonyms)
-          return Operations.determinize(a);
+
+          // This automaton should not blow up during determinize:
+          return Operations.determinize(a, Integer.MAX_VALUE);
         }
       }
 }

+ 2 - 3
src/main/java/org/elasticsearch/common/http/client/HttpDownloadHelper.java

@@ -22,7 +22,6 @@ package org.elasticsearch.common.http.client;
 import org.apache.lucene.util.IOUtils;
 import org.elasticsearch.ElasticsearchTimeoutException;
 import org.elasticsearch.common.Nullable;
-import org.elasticsearch.common.io.FileSystemUtils;
 import org.elasticsearch.common.unit.TimeValue;
 
 import java.io.*;
@@ -347,7 +346,7 @@ public class HttpDownloadHelper {
                     // Try to delete the garbage we'd otherwise leave
                     // behind.
                     IOUtils.closeWhileHandlingException(os, is);
-                    FileSystemUtils.deleteFilesIgnoringExceptions(dest.toPath());
+                    IOUtils.deleteFilesIgnoringExceptions(dest.toPath());
                 } else {
                     IOUtils.close(os, is);
                 }
@@ -386,7 +385,7 @@ public class HttpDownloadHelper {
             } else {
                 IOUtils.closeWhileHandlingException(is, os);
                 if (dest != null && dest.exists()) {
-                    FileSystemUtils.deleteFilesIgnoringExceptions(dest.toPath());
+                    IOUtils.deleteFilesIgnoringExceptions(dest.toPath());
                 }
             }
         }

+ 0 - 16
src/main/java/org/elasticsearch/common/io/FileSystemUtils.java

@@ -131,22 +131,6 @@ public class FileSystemUtils {
 
     private FileSystemUtils() {}
 
-    /**
-     * Temporary solution until LUCENE-6051 is fixed
-     * @see org.apache.lucene.util.IOUtils#deleteFilesIgnoringExceptions(java.nio.file.Path...)
-     */
-    public static void deleteFilesIgnoringExceptions(Path... files) {
-        for (Path name : files) {
-            if (name != null) {
-                try {
-                    Files.delete(name);
-                } catch (Throwable ignored) {
-                    // ignore
-                }
-            }
-        }
-    }
-
     /**
      * This utility copy a full directory content (excluded) under
      * a new directory but without overwriting existing files.

+ 8 - 7
src/main/java/org/elasticsearch/common/lucene/search/RegexpFilter.java

@@ -25,6 +25,7 @@ import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.MultiTermQueryWrapperFilter;
 import org.apache.lucene.search.RegexpQuery;
 import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.RegExp;
 
 import java.io.IOException;
@@ -47,7 +48,11 @@ public class RegexpFilter extends Filter {
     }
 
     public RegexpFilter(Term term, int flags) {
-        filter = new InternalFilter(term, flags);
+        this(term, flags, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
+    }
+
+    public RegexpFilter(Term term, int flags, int maxDeterminizedStates) {
+        filter = new InternalFilter(term, flags, maxDeterminizedStates);
         this.term = term;
         this.flags = flags;
     }
@@ -97,12 +102,8 @@ public class RegexpFilter extends Filter {
 
     static class InternalFilter extends MultiTermQueryWrapperFilter<RegexpQuery> {
 
-        public InternalFilter(Term term) {
-            super(new RegexpQuery(term));
-        }
-
-        public InternalFilter(Term term, int flags) {
-            super(new RegexpQuery(term, flags));
+        public InternalFilter(Term term, int flags, int maxDeterminizedStates) {
+            super(new RegexpQuery(term, flags, maxDeterminizedStates));
         }
     }
 

+ 3 - 2
src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

@@ -20,6 +20,7 @@
 package org.elasticsearch.index.mapper;
 
 import com.google.common.base.Strings;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.Term;
@@ -260,9 +261,9 @@ public interface FieldMapper<T> extends Mapper {
 
     Filter prefixFilter(Object value, @Nullable QueryParseContext context);
 
-    Query regexpQuery(Object value, int flags, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context);
+    Query regexpQuery(Object value, int flags, int maxDeterminizedStates, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context);
 
-    Filter regexpFilter(Object value, int flags, @Nullable QueryParseContext parseContext);
+    Filter regexpFilter(Object value, int flags, int maxDeterminizedStates, @Nullable QueryParseContext parseContext);
 
     /**
      * A term query to use when parsing a query string. Can return <tt>null</tt>.

+ 5 - 4
src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java

@@ -24,6 +24,7 @@ import com.carrotsearch.hppc.cursors.ObjectCursor;
 import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
 import com.google.common.base.Objects;
 import com.google.common.collect.ImmutableList;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
@@ -553,8 +554,8 @@ public abstract class AbstractFieldMapper<T> implements FieldMapper<T> {
     }
 
     @Override
-    public Query regexpQuery(Object value, int flags, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context) {
-        RegexpQuery query = new RegexpQuery(names().createIndexNameTerm(indexedValueForSearch(value)), flags);
+    public Query regexpQuery(Object value, int flags, int maxDeterminizedStates, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context) {
+        RegexpQuery query = new RegexpQuery(names().createIndexNameTerm(indexedValueForSearch(value)), flags, maxDeterminizedStates);
         if (method != null) {
             query.setRewriteMethod(method);
         }
@@ -562,8 +563,8 @@ public abstract class AbstractFieldMapper<T> implements FieldMapper<T> {
     }
 
     @Override
-    public Filter regexpFilter(Object value, int flags, @Nullable QueryParseContext parseContext) {
-        return new RegexpFilter(names().createIndexNameTerm(indexedValueForSearch(value)), flags);
+    public Filter regexpFilter(Object value, int flags, int maxDeterminizedStates, @Nullable QueryParseContext parseContext) {
+        return new RegexpFilter(names().createIndexNameTerm(indexedValueForSearch(value)), flags, maxDeterminizedStates);
     }
 
     @Override

+ 12 - 8
src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java

@@ -20,6 +20,7 @@
 package org.elasticsearch.index.mapper.internal;
 
 import com.google.common.collect.Iterables;
+
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
@@ -235,13 +236,14 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
     }
 
     @Override
-    public Query regexpQuery(Object value, int flags, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context) {
+    public Query regexpQuery(Object value, int flags, int maxDeterminizedStates, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context) {
         if (fieldType.indexOptions() != IndexOptions.NONE || context == null) {
-            return super.regexpQuery(value, flags, method, context);
+            return super.regexpQuery(value, flags, maxDeterminizedStates, method, context);
         }
         Collection<String> queryTypes = context.queryTypes();
         if (queryTypes.size() == 1) {
-            RegexpQuery regexpQuery = new RegexpQuery(new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(Iterables.getFirst(queryTypes, null), BytesRefs.toBytesRef(value))), flags);
+            RegexpQuery regexpQuery = new RegexpQuery(new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(Iterables.getFirst(queryTypes, null), BytesRefs.toBytesRef(value))),
+                                                      flags, maxDeterminizedStates);
             if (method != null) {
                 regexpQuery.setRewriteMethod(method);
             }
@@ -249,7 +251,7 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
         }
         BooleanQuery query = new BooleanQuery();
         for (String queryType : queryTypes) {
-            RegexpQuery regexpQuery = new RegexpQuery(new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(queryType, BytesRefs.toBytesRef(value))), flags);
+            RegexpQuery regexpQuery = new RegexpQuery(new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(queryType, BytesRefs.toBytesRef(value))), flags, maxDeterminizedStates);
             if (method != null) {
                 regexpQuery.setRewriteMethod(method);
             }
@@ -258,17 +260,19 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
         return query;
     }
 
-    public Filter regexpFilter(Object value, int flags, @Nullable QueryParseContext context) {
+    public Filter regexpFilter(Object value, int flags, int maxDeterminizedStates, @Nullable QueryParseContext context) {
         if (fieldType.indexOptions() != IndexOptions.NONE || context == null) {
-            return super.regexpFilter(value, flags, context);
+            return super.regexpFilter(value, flags, maxDeterminizedStates, context);
         }
         Collection<String> queryTypes = context.queryTypes();
         if (queryTypes.size() == 1) {
-            return new RegexpFilter(new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(Iterables.getFirst(queryTypes, null), BytesRefs.toBytesRef(value))), flags);
+            return new RegexpFilter(new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(Iterables.getFirst(queryTypes, null), BytesRefs.toBytesRef(value))),
+                                    flags, maxDeterminizedStates);
         }
         XBooleanFilter filter = new XBooleanFilter();
         for (String queryType : queryTypes) {
-            filter.add(new RegexpFilter(new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(queryType, BytesRefs.toBytesRef(value))), flags), BooleanClause.Occur.SHOULD);
+            filter.add(new RegexpFilter(new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(queryType, BytesRefs.toBytesRef(value))),
+                                        flags, maxDeterminizedStates), BooleanClause.Occur.SHOULD);
         }
         return filter;
     }

+ 15 - 0
src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java

@@ -20,6 +20,7 @@
 package org.elasticsearch.index.query;
 
 import com.carrotsearch.hppc.ObjectFloatOpenHashMap;
+
 import org.elasticsearch.common.unit.Fuzziness;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 
@@ -95,6 +96,9 @@ public class QueryStringQueryBuilder extends BaseQueryBuilder implements Boostab
 
     private String timeZone;
 
+    /** To limit effort spent determinizing regexp queries. */
+    private Integer maxDeterminizedStates;
+
     public QueryStringQueryBuilder(String queryString) {
         this.queryString = queryString;
     }
@@ -200,6 +204,14 @@ public class QueryStringQueryBuilder extends BaseQueryBuilder implements Boostab
         return this;
     }
 
+    /**
+     * Protects against too-difficult regular expression queries.
+     */
+    public QueryStringQueryBuilder maxDeterminizedStates(int maxDeterminizedStates) {
+        this.maxDeterminizedStates = maxDeterminizedStates;
+        return this;
+    }
+
     /**
      * Should leading wildcards be allowed or not. Defaults to <tt>true</tt>.
      */
@@ -364,6 +376,9 @@ public class QueryStringQueryBuilder extends BaseQueryBuilder implements Boostab
         if (autoGeneratePhraseQueries != null) {
             builder.field("auto_generate_phrase_queries", autoGeneratePhraseQueries);
         }
+        if (maxDeterminizedStates != null) {
+            builder.field("max_determinized_states", maxDeterminizedStates);
+        }
         if (allowLeadingWildcard != null) {
             builder.field("allow_leading_wildcard", allowLeadingWildcard);
         }

+ 3 - 0
src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java

@@ -21,6 +21,7 @@ package org.elasticsearch.index.query;
 
 import com.carrotsearch.hppc.ObjectFloatOpenHashMap;
 import com.google.common.collect.Lists;
+
 import org.apache.lucene.queryparser.classic.MapperQueryParser;
 import org.apache.lucene.queryparser.classic.QueryParserSettings;
 import org.apache.lucene.search.BooleanQuery;
@@ -157,6 +158,8 @@ public class QueryStringQueryParser implements QueryParser {
                     qpSettings.allowLeadingWildcard(parser.booleanValue());
                 } else if ("auto_generate_phrase_queries".equals(currentFieldName) || "autoGeneratePhraseQueries".equals(currentFieldName)) {
                     qpSettings.autoGeneratePhraseQueries(parser.booleanValue());
+                } else if ("max_determinized_states".equals(currentFieldName) || "maxDeterminizedStates".equals(currentFieldName)) {
+                    qpSettings.maxDeterminizedStates(parser.intValue());
                 } else if ("lowercase_expanded_terms".equals(currentFieldName) || "lowercaseExpandedTerms".equals(currentFieldName)) {
                     qpSettings.lowercaseExpandedTerms(parser.booleanValue());
                 } else if ("enable_position_increments".equals(currentFieldName) || "enablePositionIncrements".equals(currentFieldName)) {

+ 18 - 3
src/main/java/org/elasticsearch/index/query/RegexpFilterBuilder.java

@@ -19,6 +19,7 @@
 
 package org.elasticsearch.index.query;
 
+import org.apache.lucene.util.automaton.Operations;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 
 import java.io.IOException;
@@ -34,6 +35,8 @@ public class RegexpFilterBuilder extends BaseFilterBuilder {
     private final String name;
     private final String regexp;
     private int flags = -1;
+    private int maxDeterminizedStates = Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+    private boolean maxDetermizedStatesSet;
 
     private Boolean cache;
     private String cacheKey;
@@ -75,6 +78,15 @@ public class RegexpFilterBuilder extends BaseFilterBuilder {
         return this;
     }
 
+    /**
+     * Sets the regexp maxDeterminizedStates.
+     */
+    public RegexpFilterBuilder maxDeterminizedStates(int value) {
+        this.maxDeterminizedStates = value;
+        this.maxDetermizedStatesSet = true;
+        return this;
+    }
+
     /**
      * Should the filter be cached or not. Defaults to <tt>false</tt>.
      */
@@ -96,8 +108,11 @@ public class RegexpFilterBuilder extends BaseFilterBuilder {
         } else {
             builder.startObject(name)
                     .field("value", regexp)
-                    .field("flags_value", flags)
-                    .endObject();
+                    .field("flags_value", flags);
+            if (maxDetermizedStatesSet) {
+                builder.field("max_determinized_states", maxDeterminizedStates);
+            }
+            builder.endObject();
         }
 
         if (filterName != null) {
@@ -111,4 +126,4 @@ public class RegexpFilterBuilder extends BaseFilterBuilder {
         }
         builder.endObject();
     }
-}
+}

+ 8 - 4
src/main/java/org/elasticsearch/index/query/RegexpFilterParser.java

@@ -21,6 +21,7 @@ package org.elasticsearch.index.query;
 
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Filter;
+import org.apache.lucene.util.automaton.Operations;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.lucene.BytesRefs;
 import org.elasticsearch.common.lucene.search.RegexpFilter;
@@ -62,6 +63,7 @@ public class RegexpFilterParser implements FilterParser {
 
         String filterName = null;
         String currentFieldName = null;
+        int maxDeterminizedStates = Operations.DEFAULT_MAX_DETERMINIZED_STATES;
         XContentParser.Token token;
         while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
             if (token == XContentParser.Token.FIELD_NAME) {
@@ -77,6 +79,8 @@ public class RegexpFilterParser implements FilterParser {
                         } else if ("flags".equals(currentFieldName)) {
                             String flags = parser.textOrNull();
                             flagsValue = RegexpFlag.resolveValue(flags);
+                        } else if ("max_determinized_states".equals(currentFieldName)) {
+                            maxDeterminizedStates = parser.intValue();
                         } else if ("flags_value".equals(currentFieldName)) {
                             flagsValue = parser.intValue();
                         } else {
@@ -114,16 +118,16 @@ public class RegexpFilterParser implements FilterParser {
             if (smartNameFieldMappers.explicitTypeInNameWithDocMapper()) {
                 String[] previousTypes = QueryParseContext.setTypesWithPrevious(new String[]{smartNameFieldMappers.docMapper().type()});
                 try {
-                    filter = smartNameFieldMappers.mapper().regexpFilter(value, flagsValue, parseContext);
+                    filter = smartNameFieldMappers.mapper().regexpFilter(value, flagsValue, maxDeterminizedStates, parseContext);
                 } finally {
                     QueryParseContext.setTypes(previousTypes);
                 }
             } else {
-                filter = smartNameFieldMappers.mapper().regexpFilter(value, flagsValue, parseContext);
+                filter = smartNameFieldMappers.mapper().regexpFilter(value, flagsValue, maxDeterminizedStates, parseContext);
             }
         }
         if (filter == null) {
-            filter = new RegexpFilter(new Term(fieldName, BytesRefs.toBytesRef(value)), flagsValue);
+            filter = new RegexpFilter(new Term(fieldName, BytesRefs.toBytesRef(value)), flagsValue, maxDeterminizedStates);
         }
 
         if (cache) {
@@ -136,4 +140,4 @@ public class RegexpFilterParser implements FilterParser {
         }
         return filter;
     }
-}
+}

+ 16 - 1
src/main/java/org/elasticsearch/index/query/RegexpQueryBuilder.java

@@ -19,6 +19,7 @@
 
 package org.elasticsearch.index.query;
 
+import org.apache.lucene.util.automaton.Operations;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 
 import java.io.IOException;
@@ -37,6 +38,8 @@ public class RegexpQueryBuilder extends BaseQueryBuilder implements BoostableQue
     private float boost = -1;
     private String rewrite;
     private String queryName;
+    private int maxDeterminizedStates = Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+    private boolean maxDetermizedStatesSet;
 
     /**
      * Constructs a new term query.
@@ -71,6 +74,15 @@ public class RegexpQueryBuilder extends BaseQueryBuilder implements BoostableQue
         return this;
     }
 
+    /**
+     * Sets the regexp maxDeterminizedStates.
+     */
+    public RegexpQueryBuilder maxDeterminizedStates(int value) {
+        this.maxDeterminizedStates = value;
+        this.maxDetermizedStatesSet = true;
+        return this;
+    }
+
     public RegexpQueryBuilder rewrite(String rewrite) {
         this.rewrite = rewrite;
         return this;
@@ -95,6 +107,9 @@ public class RegexpQueryBuilder extends BaseQueryBuilder implements BoostableQue
             if (flags != -1) {
                 builder.field("flags_value", flags);
             }
+            if (maxDetermizedStatesSet) {
+                builder.field("max_determinized_states", maxDeterminizedStates);
+            }
             if (boost != -1) {
                 builder.field("boost", boost);
             }
@@ -108,4 +123,4 @@ public class RegexpQueryBuilder extends BaseQueryBuilder implements BoostableQue
         }
         builder.endObject();
     }
-}
+}

+ 8 - 4
src/main/java/org/elasticsearch/index/query/RegexpQueryParser.java

@@ -23,6 +23,7 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.search.MultiTermQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.RegexpQuery;
+import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.RegExp;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.lucene.BytesRefs;
@@ -64,6 +65,7 @@ public class RegexpQueryParser implements QueryParser {
         Object value = null;
         float boost = 1.0f;
         int flagsValue = -1;
+        int maxDeterminizedStates = Operations.DEFAULT_MAX_DETERMINIZED_STATES;
         String queryName = null;
         token = parser.nextToken();
         if (token == XContentParser.Token.START_OBJECT) {
@@ -86,6 +88,8 @@ public class RegexpQueryParser implements QueryParser {
                         if (flagsValue < 0) {
                             flagsValue = RegExp.ALL;
                         }
+                    } else if ("maxDeterminizedStates".equals(currentFieldName)) {
+                        maxDeterminizedStates = parser.intValue();
                     } else if ("_name".equals(currentFieldName)) {
                         queryName = parser.text();
                     }
@@ -111,16 +115,16 @@ public class RegexpQueryParser implements QueryParser {
             if (smartNameFieldMappers.explicitTypeInNameWithDocMapper()) {
                 String[] previousTypes = QueryParseContext.setTypesWithPrevious(new String[]{smartNameFieldMappers.docMapper().type()});
                 try {
-                    query = smartNameFieldMappers.mapper().regexpQuery(value, flagsValue, method, parseContext);
+                    query = smartNameFieldMappers.mapper().regexpQuery(value, flagsValue, maxDeterminizedStates, method, parseContext);
                 } finally {
                     QueryParseContext.setTypes(previousTypes);
                 }
             } else {
-                query = smartNameFieldMappers.mapper().regexpQuery(value, flagsValue, method, parseContext);
+                query = smartNameFieldMappers.mapper().regexpQuery(value, flagsValue, maxDeterminizedStates, method, parseContext);
             }
         }
         if (query == null) {
-            RegexpQuery regexpQuery = new RegexpQuery(new Term(fieldName, BytesRefs.toBytesRef(value)), flagsValue);
+            RegexpQuery regexpQuery = new RegexpQuery(new Term(fieldName, BytesRefs.toBytesRef(value)), flagsValue, maxDeterminizedStates);
             if (method != null) {
                 regexpQuery.setRewriteMethod(method);
             }
@@ -135,4 +139,4 @@ public class RegexpQueryParser implements QueryParser {
     }
 
 
-}
+}

+ 1 - 2
src/main/java/org/elasticsearch/index/translog/fs/RafReference.java

@@ -20,7 +20,6 @@
 package org.elasticsearch.index.translog.fs;
 
 import org.apache.lucene.util.IOUtils;
-import org.elasticsearch.common.io.FileSystemUtils;
 
 import java.io.File;
 import java.io.FileNotFoundException;
@@ -78,7 +77,7 @@ public class RafReference {
                 // ignore
             } finally {
                 if (delete) {
-                    FileSystemUtils.deleteFilesIgnoringExceptions(file.toPath());
+                    IOUtils.deleteFilesIgnoringExceptions(file.toPath());
                 }
             }
 

+ 5 - 1
src/main/java/org/elasticsearch/search/suggest/context/ContextMapping.java

@@ -22,6 +22,7 @@ package org.elasticsearch.search.suggest.context;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
 import org.apache.lucene.util.automaton.Automata;
@@ -259,7 +260,10 @@ public abstract class ContextMapping implements ToXContent {
             for (ContextQuery query : queries) {
                 a = Operations.concatenate(Arrays.asList(query.toAutomaton(), gap, a));
             }
-            return Operations.determinize(a);
+
+            // TODO: should we limit this?  Do any of our ContextQuery impls really create exponential regexps?  GeoQuery looks safe (union
+            // of strings).
+            return Operations.determinize(a, Integer.MAX_VALUE);
         }
 
         /**

+ 1 - 2
src/test/java/org/elasticsearch/benchmark/fs/FsAppendBenchmark.java

@@ -20,7 +20,6 @@ package org.elasticsearch.benchmark.fs;
 
 import org.apache.lucene.util.IOUtils;
 import org.elasticsearch.common.StopWatch;
-import org.elasticsearch.common.io.FileSystemUtils;
 import org.elasticsearch.common.unit.ByteSizeValue;
 
 import java.io.File;
@@ -36,7 +35,7 @@ import java.util.Random;
 public class FsAppendBenchmark {
 
     public static void main(String[] args) throws Exception {
-        FileSystemUtils.deleteFilesIgnoringExceptions(Paths.get("work/test.log"));
+        IOUtils.deleteFilesIgnoringExceptions(Paths.get("work/test.log"));
         RandomAccessFile raf = new RandomAccessFile("work/test.log", "rw");
         raf.setLength(0);
 

+ 48 - 1
src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java

@@ -19,9 +19,9 @@
 
 package org.elasticsearch.index.query;
 
-
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
+
 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.MultiFields;
@@ -66,6 +66,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.CharsRefBuilder;
 import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ElasticsearchIllegalArgumentException;
 import org.elasticsearch.action.termvector.MultiTermVectorsRequest;
@@ -344,6 +345,29 @@ public class SimpleIndexQueryParserTests extends ElasticsearchSingleNodeTest {
         }
     }
 
+    @Test
+    public void testQueryStringRegexp() throws Exception {
+        IndexQueryParserService queryParser = queryParser();
+        String query = copyToStringFromClasspath("/org/elasticsearch/index/query/query-regexp-max-determinized-states.json");
+        Query parsedQuery = queryParser.parse(query).query();
+        assertThat(parsedQuery, instanceOf(RegexpQuery.class));
+        RegexpQuery regexpQuery = (RegexpQuery) parsedQuery;
+        assertTrue(regexpQuery.toString().contains("/foo*bar/"));
+    }
+
+    @Test
+    public void testQueryStringRegexpTooManyDeterminizedStates() throws Exception {
+        IndexQueryParserService queryParser = queryParser();
+        String query = copyToStringFromClasspath("/org/elasticsearch/index/query/query-regexp-too-many-determinized-states.json");
+        try {
+            queryParser.parse(query).query();
+            fail("did not hit exception");
+        } catch (QueryParsingException qpe) {
+            // expected
+            assertTrue(qpe.getCause() instanceof TooComplexToDeterminizeException);
+        }
+    }
+
     @Test
     public void testMatchAllBuilder() throws Exception {
         IndexQueryParserService queryParser = queryParser();
@@ -674,6 +698,16 @@ public class SimpleIndexQueryParserTests extends ElasticsearchSingleNodeTest {
         assertThat(regexpQuery.getField(), equalTo("name.first"));
     }
 
+    @Test
+    public void testRegexpQueryWithMaxDeterminizedStates() throws IOException {
+        IndexQueryParserService queryParser = queryParser();
+        String query = copyToStringFromClasspath("/org/elasticsearch/index/query/regexp-max-determinized-states.json");
+        Query parsedQuery = queryParser.parse(query).query();
+        assertThat(parsedQuery, instanceOf(RegexpQuery.class));
+        RegexpQuery regexpQuery = (RegexpQuery) parsedQuery;
+        assertThat(regexpQuery.getField(), equalTo("name.first"));
+    }
+
     @Test
     public void testRegexpFilteredQuery() throws IOException {
         IndexQueryParserService queryParser = queryParser();
@@ -687,6 +721,19 @@ public class SimpleIndexQueryParserTests extends ElasticsearchSingleNodeTest {
         assertThat(regexpFilter.regexp(), equalTo("s.*y"));
     }
 
+    @Test
+    public void testRegexpFilteredQueryWithMaxDeterminizedStates() throws IOException {
+        IndexQueryParserService queryParser = queryParser();
+        String query = copyToStringFromClasspath("/org/elasticsearch/index/query/regexp-filter-max-determinized-states.json");
+        Query parsedQuery = queryParser.parse(query).query();
+        assertThat(parsedQuery, instanceOf(FilteredQuery.class));
+        Filter filter = ((FilteredQuery) parsedQuery).getFilter();
+        assertThat(filter, instanceOf(RegexpFilter.class));
+        RegexpFilter regexpFilter = (RegexpFilter) filter;
+        assertThat(regexpFilter.field(), equalTo("name.first"));
+        assertThat(regexpFilter.regexp(), equalTo("s.*y"));
+    }
+
     @Test
     public void testNamedRegexpFilteredQuery() throws IOException {
         IndexQueryParserService queryParser = queryParser();

+ 7 - 0
src/test/java/org/elasticsearch/index/query/query-regexp-max-determinized-states.json

@@ -0,0 +1,7 @@
+{
+    query_string: {
+        default_field: "content",
+        query:"/foo*bar/",
+	max_determinized_states: 5000
+    }
+}

+ 6 - 0
src/test/java/org/elasticsearch/index/query/query-regexp-too-many-determinized-states.json

@@ -0,0 +1,6 @@
+{
+    query_string: {
+        default_field: "content",
+        query: "/[ac]*a[ac]{50,200}/"
+    }
+}

+ 17 - 0
src/test/java/org/elasticsearch/index/query/regexp-filter-max-determinized-states.json

@@ -0,0 +1,17 @@
+{
+    "filtered": {
+        "query": {
+            "term": {
+                "name.first": "shay"
+            }
+        },
+        "filter": {
+            "regexp": {
+                "name.first": {
+		    "value": "s.*y",
+		    "max_determinized_states": 6000
+		}
+            }
+        }
+    }
+}

+ 6 - 0
src/test/java/org/elasticsearch/index/query/regexp-max-determinized-states.json

@@ -0,0 +1,6 @@
+{
+    "regexp": {
+        "name.first": "s.*y",
+	"max_determinized_states": 5000
+    }
+}

+ 2 - 3
src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreTests.java

@@ -46,7 +46,6 @@ import org.elasticsearch.cluster.metadata.MappingMetaData;
 import org.elasticsearch.cluster.metadata.SnapshotMetaData;
 import org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider;
 import org.elasticsearch.common.collect.ImmutableOpenMap;
-import org.elasticsearch.common.io.FileSystemUtils;
 import org.elasticsearch.common.settings.ImmutableSettings;
 import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.index.store.support.AbstractIndexStore;
@@ -735,8 +734,8 @@ public class SharedClusterSnapshotRestoreTests extends AbstractSnapshotTests {
         File testIndex1 = new File(indices, "test-idx-1");
         File testIndex2 = new File(indices, "test-idx-2");
         File testIndex2Shard0 = new File(testIndex2, "0");
-        FileSystemUtils.deleteFilesIgnoringExceptions(new File(testIndex1, "snapshot-test-snap-1").toPath());
-        FileSystemUtils.deleteFilesIgnoringExceptions(new File(testIndex2Shard0, "snapshot-test-snap-1").toPath());
+        IOUtils.deleteFilesIgnoringExceptions(new File(testIndex1, "snapshot-test-snap-1").toPath());
+        IOUtils.deleteFilesIgnoringExceptions(new File(testIndex2Shard0, "snapshot-test-snap-1").toPath());
 
         logger.info("--> delete snapshot");
         client.admin().cluster().prepareDeleteSnapshot("test-repo", "test-snap-1").get();