Browse Source

Don't apply the rewrite-as-range optimization if field is multivalued (#84535)

This fixes a bug when trying to optimize date histograms into date range aggregations.  For multivalued fields with a date query, the optimization would only capture the values of the field that matched the query, rather than all values of the document as a non-rewritten date histogram would do.  This PR disables the query-level optimization when there are multi-valued fields in the index, as there's no good way work around it. Index level range rewrite optimizations are unaffected.

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
Mark Tozzi 3 years ago
parent
commit
947f88b1e9

+ 6 - 0
docs/changelog/84535.yaml

@@ -0,0 +1,6 @@
+pr: 84535
+summary: Don't apply the rewrite-as-range optimization if field is multivalued
+area: Aggregations
+type: bug
+issues:
+ - 82903

+ 50 - 1
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml

@@ -15,6 +15,11 @@ setup:
             properties:
               range:
                 type: date_range
+              date:
+                type: date
+              date_not_indexed:
+                type: date
+                index: false
 
   - do:
       bulk:
@@ -33,6 +38,8 @@ setup:
           - '{"range": {"gte": "2016-04-01"}}'
           - '{"index": {}}'
           - '{"range": {"lt": "2016-02-01"}}'
+          - '{"index": {}}'
+          - '{ "date":["2021-05-01","2021-04-01","2021-04-22"], "date_not_indexed":["2021-05-01","2021-04-01","2021-03-22"]}'
 
 ---
 "date_histogram on range with hard bounds":
@@ -53,7 +60,7 @@ setup:
                   "min": "2015-06-01"
                   "max": "2016-06-01"
 
-  - match: { hits.total.value: 6 }
+  - match: { hits.total.value: 7 }
   - length: { aggregations.histo.buckets: 13 }
   - match: { aggregations.histo.buckets.0.key_as_string: "2015-06-01T00:00:00.000Z" }
   - match: { aggregations.histo.buckets.0.doc_count: 1 }
@@ -61,3 +68,45 @@ setup:
   - match: { aggregations.histo.buckets.8.doc_count: 1 }
   - match: { aggregations.histo.buckets.12.key_as_string: "2016-06-01T00:00:00.000Z" }
   - match: { aggregations.histo.buckets.12.doc_count: 1 }
+---
+"Multi-value date histogram":
+  - skip:
+      version: " - 8.1.99"
+      reason:  Bug fixed in 8.2.0
+
+  - do:
+      search:
+        body:
+          query:
+            match:
+              date: "2021-04-01"
+          aggs:
+            datehisto:
+              date_histogram:
+                field: "date"
+                calendar_interval: "1M"
+
+  - match: { hits.total.value: 1 }
+  - length: { aggregations.datehisto.buckets: 2 }
+
+---
+"Multi-value date histogram docvalues only":
+  - skip:
+      version: " - 8.1.99"
+      reason:  Bug fixed in 8.2.0
+
+  - do:
+      search:
+        body:
+          profile: true
+          query:
+            match:
+              date_not_indexed: "2021-04-01"
+          aggs:
+            datehisto:
+              date_histogram:
+                field: "date_not_indexed"
+                calendar_interval: "1M"
+
+  - match: { hits.total.value: 1 }
+  - length: { aggregations.datehisto.buckets: 3 }

+ 26 - 2
server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java

@@ -8,7 +8,11 @@
 
 package org.elasticsearch.search.aggregations.support;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.PointValues;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.PointRangeQuery;
@@ -277,6 +281,7 @@ public enum CoreValuesSourceType implements ValuesSourceType {
             if (fieldContext.fieldType() instanceof DateFieldType == false) {
                 return new ValuesSource.Numeric.FieldData((IndexNumericFieldData) fieldContext.indexFieldData());
             }
+
             return new ValuesSource.Numeric.FieldData((IndexNumericFieldData) fieldContext.indexFieldData()) {
                 /**
                  * Proper dates get a real implementation of
@@ -296,6 +301,7 @@ public enum CoreValuesSourceType implements ValuesSourceType {
 
                     // Check the search index for bounds
                     if (fieldContext.fieldType().isIndexed()) {
+                        log.trace("Attempting to apply index bound date rounding");
                         /*
                          * We can't look up the min and max date without both the
                          * search index (isSearchable) and the resolution which
@@ -310,8 +316,24 @@ public enum CoreValuesSourceType implements ValuesSourceType {
                         }
                     }
 
-                    // Check the query for bounds
-                    if (context.query() != null) {
+                    boolean isMultiValue = false;
+                    for (LeafReaderContext leaf : context.searcher().getLeafContexts()) {
+                        if (fieldContext.fieldType().isIndexed()) {
+                            PointValues pointValues = leaf.reader().getPointValues(fieldContext.field());
+                            if (pointValues != null && pointValues.size() != pointValues.getDocCount()) {
+                                isMultiValue = true;
+                            }
+                        } else if (fieldContext.fieldType().hasDocValues()) {
+                            if (DocValues.unwrapSingleton(leaf.reader().getSortedNumericDocValues(fieldContext.field())) == null) {
+                                isMultiValue = true;
+                            }
+                        }
+                    }
+
+                    // Check the query for bounds. If the field is multivalued, we can't apply query bounds, because a document that
+                    // matches the query might also have values outside the query, which would not be included in any range.
+                    if (context.query() != null && false == isMultiValue) {
+                        log.trace("Attempting to apply query bound rounding");
                         context.query().visit(new QueryVisitor() {
                             @Override
                             public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) {
@@ -403,6 +425,8 @@ public enum CoreValuesSourceType implements ValuesSourceType {
         }
     };
 
+    public static final Logger log = LogManager.getLogger(CoreValuesSourceType.class);
+
     public static ValuesSourceType fromString(String name) {
         return valueOf(name.trim().toUpperCase(Locale.ROOT));
     }