浏览代码

Fail `span_multi` queries that exceeds boolean max clause limit (#30913)

By default span_multi query will limit term expansions = boolean max clause.
This will limit high heap usage in case of high cardinality term
expansions. This applies only if top_terms_N is not used in inner multi
query.
Nirmal Chidambaram 7 年之前
父节点
当前提交
75a676c70b

+ 6 - 7
docs/reference/query-dsl/span-multi-term-query.asciidoc

@@ -37,10 +37,9 @@ GET /_search
 --------------------------------------------------
 // CONSOLE
 
-WARNING: By default `span_multi queries are rewritten to a `span_or` query
-containing **all** the expanded terms. This can be expensive if the number of expanded
-terms is large. To avoid an unbounded expansion you can set the
-<<query-dsl-multi-term-rewrite,rewrite method>> of the multi term query to `top_terms_*`
-rewrite. Or, if you use `span_multi` on `prefix` query only, you can
-activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
-rewrite any prefix query on the field to a a single term query that matches the indexed prefix.
+WARNING: `span_multi` queries will hit too many clauses failure if the number of terms that match the query exceeds the
+boolean query limit (defaults to 1024).To avoid an unbounded expansion you can set the <<query-dsl-multi-term-rewrite,
+rewrite method>> of the multi term query to `top_terms_*` rewrite. Or, if you use `span_multi` on `prefix` query only,
+you can activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
+rewrite any prefix query on the field to a a single term query that matches the indexed prefix.
+

+ 61 - 6
server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java

@@ -19,6 +19,9 @@
 package org.elasticsearch.index.query;
 
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.MultiTermQuery;
@@ -26,11 +29,15 @@ import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
+import org.apache.lucene.search.ScoringRewrite;
+import org.apache.lucene.search.TopTermsRewrite;
 import org.apache.lucene.search.spans.SpanBoostQuery;
 import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
+import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.elasticsearch.Version;
+import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.ParsingException;
 import org.elasticsearch.common.io.stream.StreamInput;
@@ -42,6 +49,8 @@ import org.elasticsearch.index.mapper.TextFieldMapper;
 import org.elasticsearch.index.query.support.QueryParsers;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Objects;
 
 /**
@@ -49,12 +58,10 @@ import java.util.Objects;
  * as a {@link SpanQueryBuilder} so it can be nested.
  */
 public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTermQueryBuilder>
-        implements SpanQueryBuilder {
+    implements SpanQueryBuilder {
 
     public static final String NAME = "span_multi";
-
     private static final ParseField MATCH_FIELD = new ParseField("match");
-
     private final MultiTermQueryBuilder multiTermQueryBuilder;
 
     public SpanMultiTermQueryBuilder(MultiTermQueryBuilder multiTermQueryBuilder) {
@@ -83,7 +90,7 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
 
     @Override
     protected void doXContent(XContentBuilder builder, Params params)
-            throws IOException {
+        throws IOException {
         builder.startObject(NAME);
         builder.field(MATCH_FIELD.getPreferredName());
         multiTermQueryBuilder.toXContent(builder, params);
@@ -105,7 +112,7 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
                     QueryBuilder query = parseInnerQueryBuilder(parser);
                     if (query instanceof MultiTermQueryBuilder == false) {
                         throw new ParsingException(parser.getTokenLocation(),
-                                "[span_multi] [" + MATCH_FIELD.getPreferredName() + "] must be of type multi term query");
+                            "[span_multi] [" + MATCH_FIELD.getPreferredName() + "] must be of type multi term query");
                     }
                     subQuery = (MultiTermQueryBuilder) query;
                 } else {
@@ -124,12 +131,55 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
 
         if (subQuery == null) {
             throw new ParsingException(parser.getTokenLocation(),
-                    "[span_multi] must have [" + MATCH_FIELD.getPreferredName() + "] multi term query clause");
+                "[span_multi] must have [" + MATCH_FIELD.getPreferredName() + "] multi term query clause");
         }
 
         return new SpanMultiTermQueryBuilder(subQuery).queryName(queryName).boost(boost);
     }
 
+    public static class TopTermSpanBooleanQueryRewriteWithMaxClause extends SpanMultiTermQueryWrapper.SpanRewriteMethod {
+
+        private MultiTermQuery multiTermQuery;
+        private final long maxExpansions;
+
+        TopTermSpanBooleanQueryRewriteWithMaxClause(long max) {
+            maxExpansions = max;
+        }
+
+        @Override
+        public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
+            multiTermQuery = query;
+            return (SpanQuery) this.delegate.rewrite(reader, multiTermQuery);
+        }
+
+        final ScoringRewrite<List<SpanQuery>> delegate = new ScoringRewrite<List<SpanQuery>>() {
+
+            @Override
+            protected List<SpanQuery> getTopLevelBuilder() {
+                return new ArrayList();
+            }
+
+            @Override
+            protected Query build(List<SpanQuery> builder) {
+                return new SpanOrQuery((SpanQuery[]) builder.toArray(new SpanQuery[builder.size()]));
+            }
+
+            @Override
+            protected void checkMaxClauseCount(int count) {
+                if (count > maxExpansions) {
+                    throw new ElasticsearchException("[" + multiTermQuery.toString() + " ] " +
+                        "exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]");
+                }
+            }
+
+            @Override
+            protected void addClause(List<SpanQuery> topLevel, Term term, int docCount, float boost, TermContext states) {
+                SpanTermQuery q = new SpanTermQuery(term, states);
+                topLevel.add(q);
+            }
+        };
+    }
+
     @Override
     protected Query doToQuery(QueryShardContext context) throws IOException {
         Query subQuery = multiTermQueryBuilder.toQuery(context);
@@ -190,10 +240,15 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
                     + MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName());
             }
             spanQuery = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
+            if (((MultiTermQuery) subQuery).getRewriteMethod() instanceof TopTermsRewrite == false) {
+                ((SpanMultiTermQueryWrapper<MultiTermQuery>) spanQuery).setRewriteMethod(new
+                    TopTermSpanBooleanQueryRewriteWithMaxClause(BooleanQuery.getMaxClauseCount()));
+            }
         }
         if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
             return new SpanBoostQuery(spanQuery, boost);
         }
+
         return spanQuery;
     }
 

+ 34 - 1
server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java

@@ -34,7 +34,6 @@ import org.elasticsearch.Version;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.compress.CompressedXContent;
 import org.elasticsearch.common.io.stream.StreamOutput;
-import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.search.internal.SearchContext;
@@ -238,4 +237,38 @@ public class SpanMultiTermQueryBuilderTests extends AbstractQueryTestCase<SpanMu
         assertEquals(json, "ki", ((PrefixQueryBuilder) parsed.innerQuery()).value());
         assertEquals(json, 1.08, parsed.innerQuery().boost(), 0.0001);
     }
+
+    public void testDefaultMaxRewriteBuilder() throws Exception {
+        Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b")).
+            toQuery(createShardContext());
+
+        if (query instanceof SpanBoostQuery) {
+            query = ((SpanBoostQuery)query).getQuery();
+        }
+
+        assertTrue(query instanceof SpanMultiTermQueryWrapper);
+        if (query instanceof SpanMultiTermQueryWrapper) {
+            MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod();
+            assertTrue(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause);
+        }
+
+    }
+
+    public void testTopNMultiTermsRewriteInsideSpan() throws Exception {
+
+        Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b").rewrite
+            ("top_terms_boost_2000")).
+            toQuery(createShardContext());
+
+        if (query instanceof SpanBoostQuery) {
+            query = ((SpanBoostQuery)query).getQuery();
+        }
+
+        assertTrue(query instanceof SpanMultiTermQueryWrapper);
+        if (query instanceof SpanMultiTermQueryWrapper) {
+            MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod();
+            assertFalse(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause);
+        }
+
+    }
 }

+ 33 - 0
server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java

@@ -19,7 +19,9 @@
 
 package org.elasticsearch.search.query;
 
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.util.English;
+import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
 import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.SearchPhaseExecutionException;
@@ -33,8 +35,12 @@ import org.elasticsearch.index.query.BoolQueryBuilder;
 import org.elasticsearch.index.query.MatchQueryBuilder;
 import org.elasticsearch.index.query.MultiMatchQueryBuilder;
 import org.elasticsearch.index.query.Operator;
+import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.index.query.RangeQueryBuilder;
+import org.elasticsearch.index.query.SpanMultiTermQueryBuilder;
+import org.elasticsearch.index.query.SpanNearQueryBuilder;
+import org.elasticsearch.index.query.SpanTermQueryBuilder;
 import org.elasticsearch.index.query.TermQueryBuilder;
 import org.elasticsearch.index.query.WrapperQueryBuilder;
 import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
@@ -52,6 +58,7 @@ import org.joda.time.DateTimeZone;
 import org.joda.time.format.ISODateTimeFormat;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Random;
@@ -1819,4 +1826,30 @@ public class SearchQueryIT extends ESIntegTestCase {
         assertHitCount(searchResponse, 1);
     }
 
+    public void testTermExpansionExceptionOnSpanFailure() throws ExecutionException, InterruptedException {
+        Settings.Builder builder = Settings.builder();
+        builder.put(SETTING_NUMBER_OF_SHARDS, 1).build();
+
+        createIndex("test", builder.build());
+        ArrayList<IndexRequestBuilder> reqs = new ArrayList<>();
+        int origBoolMaxClauseCount = BooleanQuery.getMaxClauseCount();
+        try {
+            BooleanQuery.setMaxClauseCount(2);
+            for (int i = 0; i < BooleanQuery.getMaxClauseCount() + 1; i++) {
+                reqs.add(client().prepareIndex("test", "_doc", Integer.toString(i)).setSource("body", "foo" +
+                    Integer.toString(i) + " bar baz"));
+            }
+            indexRandom(true, false, reqs);
+
+            QueryBuilder queryBuilder = new SpanNearQueryBuilder(new SpanMultiTermQueryBuilder(QueryBuilders.wildcardQuery
+                ("body", "f*")), 0).addClause(new SpanTermQueryBuilder("body", "bar"));
+
+            expectThrows(ElasticsearchException.class, () ->
+                client().prepareSearch().setIndices("test").setQuery(queryBuilder).get());
+        } finally {
+            BooleanQuery.setMaxClauseCount(origBoolMaxClauseCount);
+        }
+
+    }
+
 }