Преглед изворни кода

Give Lucene more opportunities to enable the filter-by-filter optimization. (#85322)

Filter-by-filter only gets enabled when evaluating documents that fall
into all buckets doesn't require evaluating the main query multiple
times, so that we can be somewhat confident that it runs faster than if
we ran aggregations naively. This logic relies on Query rewriting, since
there are many queries that may look non-trivial at first sight that
rewrite to a `MatchAllDocsQuery`, e.g. range queries that match the
entire shard or filters on `constant_keyword` fields.

This change moves more of this logic to Lucene by wrapping queries with
a `ConstantScoreQuery` prior to rewriting. With the current Lucene
version, this has the benefit of unwrapping `BoostQuery` so I could
remove this bit from Elasticsearch. Furthermore, the next version of
Lucene has more rewrite rules coming for queries that are wrapped into a
`ConstantScoreQuery` (apache/lucene#672), so more queries might become
candidates for filter-by-filter execution in the future.
Adrien Grand пре 3 година
родитељ
комит
50306fa625

+ 5 - 0
docs/changelog/85322.yaml

@@ -0,0 +1,5 @@
+pr: 85322
+summary: Give Lucene more opportunities to enable the filter-by-filter optimization
+area: Aggregations
+type: enhancement
+issues: []

+ 15 - 15
server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java

@@ -13,7 +13,6 @@ import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.sandbox.search.IndexSortSortedNumericDocValuesRangeQuery;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.BulkScorer;
 import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.DocValuesFieldExistsQuery;
@@ -49,7 +48,9 @@ public class QueryToFilterAdapter<Q extends Query> {
      * Note: This method rewrites the query against the {@link IndexSearcher}
      */
     public static QueryToFilterAdapter<?> build(IndexSearcher searcher, String key, Query query) throws IOException {
-        query = searcher.rewrite(query);
+        // Wrapping with a ConstantScoreQuery enables a few more rewrite
+        // rules as of Lucene 9.2
+        query = searcher.rewrite(new ConstantScoreQuery(query));
         if (query instanceof ConstantScoreQuery) {
             /*
              * Unwrap constant score because it gets in the way of us
@@ -155,18 +156,21 @@ public class QueryToFilterAdapter<Q extends Query> {
      */
     QueryToFilterAdapter<?> union(Query extraQuery) throws IOException {
         /*
+         * Wrapping with a ConstantScoreQuery enables a few more rewrite
+         * rules as of Lucene 9.2.
          * It'd be *wonderful* if Lucene could do fancy optimizations
-         * when merging queries but it doesn't at the moment. Admittedly,
-         * we have a much more limited problem. We don't care about score
-         * here at all. We know which queries its worth spending time to
-         * optimize because we know which aggs rewrite into this one.
+         * when merging queries like combining ranges but it doesn't at
+         * the moment. Admittedly, we have a much more limited problem.
+         * We don't care about score here at all. We know which queries
+         * it's worth spending time to optimize because we know which aggs
+         * rewrite into this one.
          */
-        extraQuery = searcher().rewrite(extraQuery);
-        if (extraQuery instanceof MatchAllDocsQuery) {
+        extraQuery = searcher().rewrite(new ConstantScoreQuery(extraQuery));
+        Query unwrappedExtraQuery = unwrap(extraQuery);
+        if (unwrappedExtraQuery instanceof MatchAllDocsQuery) {
             return this;
         }
         Query unwrappedQuery = unwrap(query);
-        Query unwrappedExtraQuery = unwrap(extraQuery);
         if (unwrappedQuery instanceof PointRangeQuery && unwrappedExtraQuery instanceof PointRangeQuery) {
             Query merged = MergedPointRangeQuery.merge((PointRangeQuery) unwrappedQuery, (PointRangeQuery) unwrappedExtraQuery);
             if (merged != null) {
@@ -175,8 +179,8 @@ public class QueryToFilterAdapter<Q extends Query> {
             }
         }
         BooleanQuery.Builder builder = new BooleanQuery.Builder();
-        builder.add(query, BooleanClause.Occur.MUST);
-        builder.add(extraQuery, BooleanClause.Occur.MUST);
+        builder.add(query, BooleanClause.Occur.FILTER);
+        builder.add(extraQuery, BooleanClause.Occur.FILTER);
         return new QueryToFilterAdapter<>(searcher(), key(), builder.build()) {
             public boolean isInefficientUnion() {
                 return true;
@@ -198,10 +202,6 @@ public class QueryToFilterAdapter<Q extends Query> {
                 query = ((IndexOrDocValuesQuery) query).getIndexQuery();
                 continue;
             }
-            if (query instanceof BoostQuery) {
-                query = ((BoostQuery) query).getQuery();
-                continue;
-            }
             return query;
         }
     }