7 年之前 · 0f878eff19
--- a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java
+++ b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java
@@ -44,6 +44,7 @@ import org.apache.lucene.search.spans.SpanQuery;
 
				 import org.apache.lucene.search.spans.SpanTermQuery;
			
 
				 import org.apache.lucene.util.BytesRef;
			
 
				 import org.apache.lucene.util.QueryBuilder;
			
 
				+import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings;
			
 
				 import org.elasticsearch.ElasticsearchException;
			
 
				 import org.elasticsearch.common.io.stream.StreamInput;
			
 
				 import org.elasticsearch.common.io.stream.StreamOutput;
			
@@ -57,11 +58,14 @@ import org.elasticsearch.index.query.QueryShardContext;
 
				 import org.elasticsearch.index.query.support.QueryParsers;
			
 
				 
			
 
				 import java.io.IOException;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Iterator;
			
 
				+import java.util.List;
			
 
				 
			
 
				 import static org.elasticsearch.common.lucene.search.Queries.newLenientFieldQuery;
			
 
				 import static org.elasticsearch.common.lucene.search.Queries.newUnmappedFieldQuery;
			
 
				 
			
 
				-public class    MatchQuery {
			
 
				+public class MatchQuery {
			
 
				 
			
 
				     public enum Type implements Writeable {
			
 
				         /**
			
@@ -503,6 +507,82 @@ public class    MatchQuery {
 
				             }
			
 
				             return query;
			
 
				         }
			
 
				+
			
 
				+        /**
			
 
				+         * Overrides {@link QueryBuilder#analyzeGraphPhrase(TokenStream, String, int)} to add
			
 
				+         * a limit (see {@link BooleanQuery#getMaxClauseCount()}) to the number of {@link SpanQuery}
			
 
				+         * that this method can create.
			
 
				+         *
			
 
				+         * TODO Remove when https://issues.apache.org/jira/browse/LUCENE-8479 is fixed.
			
 
				+         */
			
 
				+        @Override
			
 
				+        protected SpanQuery analyzeGraphPhrase(TokenStream source, String field, int phraseSlop) throws IOException {
			
 
				+            source.reset();
			
 
				+            GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source);
			
 
				+            List<SpanQuery> clauses = new ArrayList<>();
			
 
				+            int[] articulationPoints = graph.articulationPoints();
			
 
				+            int lastState = 0;
			
 
				+            int maxBooleanClause = BooleanQuery.getMaxClauseCount();
			
 
				+            for (int i = 0; i <= articulationPoints.length; i++) {
			
 
				+                int start = lastState;
			
 
				+                int end = -1;
			
 
				+                if (i < articulationPoints.length) {
			
 
				+                    end = articulationPoints[i];
			
 
				+                }
			
 
				+                lastState = end;
			
 
				+                final SpanQuery queryPos;
			
 
				+                if (graph.hasSidePath(start)) {
			
 
				+                    List<SpanQuery> queries = new ArrayList<>();
			
 
				+                    Iterator<TokenStream> it = graph.getFiniteStrings(start, end);
			
 
				+                    while (it.hasNext()) {
			
 
				+                        TokenStream ts = it.next();
			
 
				+                        SpanQuery q = createSpanQuery(ts, field);
			
 
				+                        if (q != null) {
			
 
				+                            if (queries.size() >= maxBooleanClause) {
			
 
				+                                throw new BooleanQuery.TooManyClauses();
			
 
				+                            }
			
 
				+                            queries.add(q);
			
 
				+                        }
			
 
				+                    }
			
 
				+                    if (queries.size() > 0) {
			
 
				+                        queryPos = new SpanOrQuery(queries.toArray(new SpanQuery[0]));
			
 
				+                    } else {
			
 
				+                        queryPos = null;
			
 
				+                    }
			
 
				+                } else {
			
 
				+                    Term[] terms = graph.getTerms(field, start);
			
 
				+                    assert terms.length > 0;
			
 
				+                    if (terms.length >= maxBooleanClause) {
			
 
				+                        throw new BooleanQuery.TooManyClauses();
			
 
				+                    }
			
 
				+                    if (terms.length == 1) {
			
 
				+                        queryPos = new SpanTermQuery(terms[0]);
			
 
				+                    } else {
			
 
				+                        SpanTermQuery[] orClauses = new SpanTermQuery[terms.length];
			
 
				+                        for (int idx = 0; idx < terms.length; idx++) {
			
 
				+                            orClauses[idx] = new SpanTermQuery(terms[idx]);
			
 
				+                        }
			
 
				+
			
 
				+                        queryPos = new SpanOrQuery(orClauses);
			
 
				+                    }
			
 
				+                }
			
 
				+
			
 
				+                if (queryPos != null) {
			
 
				+                    if (clauses.size() >= maxBooleanClause) {
			
 
				+                        throw new BooleanQuery.TooManyClauses();
			
 
				+                    }
			
 
				+                    clauses.add(queryPos);
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            if (clauses.isEmpty()) {
			
 
				+                return null;
			
 
				+            } else if (clauses.size() == 1) {
			
 
				+                return clauses.get(0);
			
 
				+            } else {
			
 
				+                return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), phraseSlop, true);
			
 
				+            }
			
 
				+        }
			
 
				     }
			
 
				 
			
 
				     /**
			
--- a/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java
+++ b/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java
@@ -19,6 +19,11 @@
 
				 
			
 
				 package org.elasticsearch.index.query;
			
 
				 
			
 
				+import org.apache.lucene.analysis.Analyzer;
			
 
				+import org.apache.lucene.analysis.CannedBinaryTokenStream;
			
 
				+import org.apache.lucene.analysis.MockTokenizer;
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.Tokenizer;
			
 
				 import org.apache.lucene.index.Term;
			
 
				 import org.apache.lucene.queries.ExtendedCommonTermsQuery;
			
 
				 import org.apache.lucene.search.BooleanClause;
			
@@ -30,6 +35,7 @@ import org.apache.lucene.search.MatchNoDocsQuery;
 
				 import org.apache.lucene.search.PointRangeQuery;
			
 
				 import org.apache.lucene.search.Query;
			
 
				 import org.apache.lucene.search.TermQuery;
			
 
				+import org.apache.lucene.util.BytesRef;
			
 
				 import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
			
 
				 import org.elasticsearch.common.ParsingException;
			
 
				 import org.elasticsearch.common.Strings;
			
@@ -46,6 +52,8 @@ import org.elasticsearch.test.AbstractQueryTestCase;
 
				 import org.hamcrest.Matcher;
			
 
				 
			
 
				 import java.io.IOException;
			
 
				+import java.io.Reader;
			
 
				+import java.util.ArrayList;
			
 
				 import java.util.HashMap;
			
 
				 import java.util.List;
			
 
				 import java.util.Locale;
			
@@ -392,4 +400,72 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
 
				         assertThat(query.toString(),
			
 
				             containsString("field:[string_no_pos] was indexed without position data; cannot run PhraseQuery"));
			
 
				     }
			
 
				+
			
 
				+    public void testMaxBooleanClause() {
			
 
				+        MatchQuery query = new MatchQuery(createShardContext());
			
 
				+        query.setAnalyzer(new MockGraphAnalyzer(createGiantGraph(40)));
			
 
				+        expectThrows(BooleanQuery.TooManyClauses.class, () -> query.parse(Type.PHRASE, STRING_FIELD_NAME, ""));
			
 
				+        query.setAnalyzer(new MockGraphAnalyzer(createGiantGraphMultiTerms()));
			
 
				+        expectThrows(BooleanQuery.TooManyClauses.class, () -> query.parse(Type.PHRASE, STRING_FIELD_NAME, ""));
			
 
				+    }
			
 
				+
			
 
				+    private static class MockGraphAnalyzer extends Analyzer {
			
 
				+        final CannedBinaryTokenStream.BinaryToken[] tokens;
			
 
				+
			
 
				+        private MockGraphAnalyzer(CannedBinaryTokenStream.BinaryToken[] tokens ) {
			
 
				+            this.tokens = tokens;
			
 
				+        }
			
 
				+        @Override
			
 
				+        protected TokenStreamComponents createComponents(String fieldName) {
			
 
				+            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
			
 
				+            return new TokenStreamComponents(tokenizer) {
			
 
				+                @Override
			
 
				+                public TokenStream getTokenStream() {
			
 
				+                    return new CannedBinaryTokenStream(tokens);
			
 
				+                }
			
 
				+
			
 
				+                @Override
			
 
				+                protected void setReader(final Reader reader) {
			
 
				+                }
			
 
				+            };
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Creates a graph token stream with 2 side paths at each position.
			
 
				+     **/
			
 
				+    private static CannedBinaryTokenStream.BinaryToken[] createGiantGraph(int numPos) {
			
 
				+        List<CannedBinaryTokenStream.BinaryToken> tokens = new ArrayList<>();
			
 
				+        BytesRef term1 = new BytesRef("foo");
			
 
				+        BytesRef term2 = new BytesRef("bar");
			
 
				+        for (int i = 0; i < numPos;) {
			
 
				+            if (i % 2 == 0) {
			
 
				+                tokens.add(new CannedBinaryTokenStream.BinaryToken(term2, 1, 1));
			
 
				+                tokens.add(new CannedBinaryTokenStream.BinaryToken(term1, 0, 2));
			
 
				+                i += 2;
			
 
				+            } else {
			
 
				+                tokens.add(new CannedBinaryTokenStream.BinaryToken(term2, 1, 1));
			
 
				+                i++;
			
 
				+            }
			
 
				+        }
			
 
				+        return tokens.toArray(new CannedBinaryTokenStream.BinaryToken[0]);
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Creates a graph token stream with {@link BooleanQuery#getMaxClauseCount()}
			
 
				+     * expansions at the last position.
			
 
				+     **/
			
 
				+    private static CannedBinaryTokenStream.BinaryToken[] createGiantGraphMultiTerms() {
			
 
				+        List<CannedBinaryTokenStream.BinaryToken> tokens = new ArrayList<>();
			
 
				+        BytesRef term1 = new BytesRef("foo");
			
 
				+        BytesRef term2 = new BytesRef("bar");
			
 
				+        tokens.add(new CannedBinaryTokenStream.BinaryToken(term2, 1, 1));
			
 
				+        tokens.add(new CannedBinaryTokenStream.BinaryToken(term1, 0, 2));
			
 
				+        tokens.add(new CannedBinaryTokenStream.BinaryToken(term2, 1, 1));
			
 
				+        tokens.add(new CannedBinaryTokenStream.BinaryToken(term2, 1, 1));
			
 
				+        for (int i = 0; i < BooleanQuery.getMaxClauseCount(); i++) {
			
 
				+            tokens.add(new CannedBinaryTokenStream.BinaryToken(term1, 0, 1));
			
 
				+        }
			
 
				+        return tokens.toArray(new CannedBinaryTokenStream.BinaryToken[0]);
			
 
				+    }
			
 
				 }