فهرست منبع

Allow doc-values only search on date types (#82602)

Similar to #82409, but for date fields.

Allows searching on date field types (date, date_nanos) when those fields are not indexed (index: false) but just doc
values are enabled.

This enables searches on archive data, which has access to doc values but not index structures. When combined with
searchable snapshots, it allows downloading only data for a given (doc value) field to quickly filter down to a select set
of documents.

Relates #81210 and #52728
Yannick Welsch 3 سال پیش
والد
کامیت
928c09a373

+ 2 - 1
docs/reference/mapping/params/doc-values.asciidoc

@@ -17,7 +17,8 @@ makes this data access pattern possible. They store the same values as the
 sorting and aggregations. Doc values are supported on almost all field types,
 with the __notable exception of `text` and `annotated_text` fields__.
 
-<<number,Numeric types>>, such as `long` and `double`, can also be queried
+<<number,Numeric types>>, such as `long` and `double`, and <<date,Date types>>
+can also be queried
 when they are not <<mapping-index,indexed>> but only have doc values enabled.
 Query performance on doc values is much slower than on index structures, but
 offers an interesting tradeoff between disk usage and query performance for

+ 3 - 1
docs/reference/mapping/types/date.asciidoc

@@ -137,7 +137,9 @@ The following parameters are accepted by `date` fields:
 
 <<mapping-index,`index`>>::
 
-    Should the field be searchable? Accepts `true` (default) and `false`.
+    Should the field be quickly searchable? Accepts `true` (default) and
+    `false`. Date fields that only have <<doc-values,`doc_values`>>
+    enabled can also be queried, albeit slower.
 
 <<null-value,`null_value`>>::
 

+ 2 - 1
docs/reference/query-dsl.asciidoc

@@ -33,7 +33,8 @@ the stability of the cluster. Those queries can be categorised as follows:
 
 * Queries that need to do linear scans to identify matches:
 ** <<query-dsl-script-query,`script` queries>>
-** queries on <<number,numeric fields>> that are not indexed but have <<doc-values,doc values>> enabled
+** queries on <<number,numeric>> and <<date,date>> fields that are not indexed
+   but have <<doc-values,doc values>> enabled
 
 * Queries that have a high up-front cost:
 ** <<query-dsl-fuzzy-query,`fuzzy` queries>> (except on

+ 15 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/field_caps/10_basic.yml

@@ -83,6 +83,9 @@ setup:
                     type:     long
                   date:
                     type:     date
+                  non_indexed_date:
+                    type:     date
+                    index:    false
                   geo:
                     type:     keyword
                   object:
@@ -210,6 +213,18 @@ setup:
 
   - match: {fields.object\.nested1.long.searchable:                       true}
 
+---
+"Field caps for date field with only doc values":
+  - skip:
+      version: " - 8.0.99"
+      reason: "doc values search was added in 8.1.0"
+  - do:
+      field_caps:
+        index: 'test1,test2,test3'
+        fields: non_indexed_date
+
+  - match: {fields.non_indexed_date.date.searchable:                       true}
+
 ---
 "Get object and nested field caps":
 

+ 62 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/140_pre_filter_search_shards.yml

@@ -10,6 +10,10 @@ setup:
                 created_at:
                    type: date
                    format: "yyyy-MM-dd"
+                created_at_not_indexed:
+                   type: date
+                   index: false
+                   format: "yyyy-MM-dd"
   - do:
       indices.create:
           index: index_2
@@ -21,6 +25,10 @@ setup:
                 created_at:
                    type: date_nanos
                    format: "yyyy-MM-dd"
+                created_at_not_indexed:
+                   type: date
+                   index: false
+                   format: "yyyy-MM-dd"
   - do:
       indices.create:
           index: index_3
@@ -32,6 +40,10 @@ setup:
                 created_at:
                    type: date
                    format: "yyyy-MM-dd"
+                created_at_not_indexed:
+                   type: date
+                   index: false
+                   format: "yyyy-MM-dd"
 
 
 ---
@@ -222,3 +234,53 @@ setup:
   - length: { hits.hits: 1 }
   - match: {hits.hits.0._id: "3" }
   - length: { aggregations.idx_terms.buckets: 3 }
+
+---
+"prefilter on non-indexed date fields":
+  - skip:
+      version: "- 8.0.99"
+      reason: "doc values search was added in 8.1.0"
+
+  - do:
+      index:
+        index: index_1
+        id: 1
+        body: { "created_at_not_indexed": "2016-01-01"}
+  - do:
+      index:
+        index: index_2
+        id: 2
+        body: { "created_at_not_indexed": "2017-01-01" }
+
+  - do:
+      index:
+        index: index_3
+        id: 3
+        body: { "created_at_not_indexed": "2018-01-01" }
+  - do:
+      indices.refresh: {}
+
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body: { "size" : 0, "query" : { "range" : { "created_at_not_indexed" : { "gte" : "2016-02-01", "lt": "2018-02-01"} } } }
+
+  - match: { _shards.total: 3 }
+  - match: { _shards.successful: 3 }
+  - match: { _shards.skipped: 0 }
+  - match: { _shards.failed: 0 }
+  - match: { hits.total: 2 }
+
+  # this is a case where we would normally skip due to rewrite but we can't because we only have doc values
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        pre_filter_shard_size: 1
+        body: { "size" : 0, "query" : { "range" : { "created_at_not_indexed" : { "gte" : "2016-02-01", "lt": "2018-02-01"} } } }
+
+  - match: { _shards.total: 3 }
+  - match: { _shards.successful: 3 }
+  - match: { _shards.skipped : 0 }
+  - match: { _shards.failed: 0 }
+  - match: { hits.total: 2 }

+ 24 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml

@@ -32,6 +32,10 @@ setup:
               short:
                 type: short
                 index: false
+              date:
+                type: date
+                format: yyyy/MM/dd
+                index: false
 
   - do:
       index:
@@ -45,6 +49,7 @@ setup:
           integer: 1
           long: 1
           short: 1
+          date: "2017/01/01"
 
   - do:
       index:
@@ -58,6 +63,7 @@ setup:
           integer: 2
           long: 2
           short: 2
+          date: "2017/01/02"
 
   - do:
       indices.refresh: {}
@@ -196,3 +202,21 @@ setup:
         index: test
         body: { query: { range: { short: { gte: 0 } } } }
   - length:   { hits.hits: 2  }
+
+---
+"Test match query on date field where only doc values are enabled":
+
+  - do:
+      search:
+        index: test
+        body: { query: { match: { date: { query: "2017/01/01" } } } }
+  - length:   { hits.hits: 1  }
+
+---
+"Test range query on date field where only doc values are enabled":
+
+  - do:
+      search:
+        index: test
+        body: { query: { range: { date: { gte: "2017/01/01" } } } }
+  - length:   { hits.hits: 2  }

+ 27 - 10
server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java

@@ -367,7 +367,7 @@ public final class DateFieldMapper extends FieldMapper {
 
         public DateFieldType(
             String name,
-            boolean isSearchable,
+            boolean isIndexed,
             boolean isStored,
             boolean hasDocValues,
             DateFormatter dateTimeFormatter,
@@ -376,7 +376,7 @@ public final class DateFieldMapper extends FieldMapper {
             FieldValues<Long> scriptValues,
             Map<String, String> meta
         ) {
-            super(name, isSearchable, isStored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS, meta);
+            super(name, isIndexed, isStored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS, meta);
             this.dateTimeFormatter = dateTimeFormatter;
             this.dateMathParser = dateTimeFormatter.toDateMathParser();
             this.resolution = resolution;
@@ -388,6 +388,10 @@ public final class DateFieldMapper extends FieldMapper {
             this(name, true, false, true, DEFAULT_DATE_TIME_FORMATTER, Resolution.MILLISECONDS, null, null, Collections.emptyMap());
         }
 
+        public DateFieldType(String name, boolean isIndexed) {
+            this(name, isIndexed, false, true, DEFAULT_DATE_TIME_FORMATTER, Resolution.MILLISECONDS, null, null, Collections.emptyMap());
+        }
+
         public DateFieldType(String name, DateFormatter dateFormatter) {
             this(name, true, false, true, dateFormatter, Resolution.MILLISECONDS, null, null, Collections.emptyMap());
         }
@@ -464,6 +468,11 @@ public final class DateFieldMapper extends FieldMapper {
             return formatter.format(dateTime);
         }
 
+        @Override
+        public boolean isSearchable() {
+            return isIndexed() || hasDocValues();
+        }
+
         @Override
         public Query termQuery(Object value, @Nullable SearchExecutionContext context) {
             return rangeQuery(value, value, true, true, ShapeRelation.INTERSECTS, null, null, context);
@@ -480,7 +489,7 @@ public final class DateFieldMapper extends FieldMapper {
             @Nullable DateMathParser forcedDateParser,
             SearchExecutionContext context
         ) {
-            failIfNotIndexed();
+            failIfNotIndexedNorDocValuesFallback(context);
             if (relation == ShapeRelation.DISJOINT) {
                 throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] does not support DISJOINT ranges");
             }
@@ -496,14 +505,18 @@ public final class DateFieldMapper extends FieldMapper {
                 parser = forcedDateParser;
             }
             return dateRangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, timeZone, parser, context, resolution, (l, u) -> {
-                Query query = LongPoint.newRangeQuery(name(), l, u);
-                if (hasDocValues()) {
-                    Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u);
-                    query = new IndexOrDocValuesQuery(query, dvQuery);
-
-                    if (context.indexSortedOnField(name())) {
-                        query = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query);
+                Query query;
+                if (isIndexed()) {
+                    query = LongPoint.newRangeQuery(name(), l, u);
+                    if (hasDocValues()) {
+                        Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u);
+                        query = new IndexOrDocValuesQuery(query, dvQuery);
                     }
+                } else {
+                    query = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u);
+                }
+                if (hasDocValues() && context.indexSortedOnField(name())) {
+                    query = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query);
                 }
                 return query;
             });
@@ -593,6 +606,10 @@ public final class DateFieldMapper extends FieldMapper {
             DateMathParser dateParser,
             QueryRewriteContext context
         ) throws IOException {
+            if (isIndexed() == false && hasDocValues()) {
+                // we don't have a quick way to run this check on doc values, so fall back to default assuming we are within bounds
+                return Relation.INTERSECTS;
+            }
             byte[] minPackedValue = PointValues.getMinPackedValue(reader, name());
             if (minPackedValue == null) {
                 // no points, so nothing matches

+ 3 - 0
server/src/main/java/org/elasticsearch/index/shard/IndexShard.java

@@ -2025,6 +2025,9 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
         if (mappedFieldType instanceof DateFieldMapper.DateFieldType == false) {
             return ShardLongFieldRange.UNKNOWN; // field missing or not a date
         }
+        if (mappedFieldType.isIndexed() == false) {
+            return ShardLongFieldRange.UNKNOWN; // range information missing
+        }
 
         final ShardLongFieldRange rawTimestampFieldRange;
         try {

+ 31 - 5
server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java

@@ -63,8 +63,19 @@ public class DateFieldTypeTests extends FieldTypeTestCase {
         );
     }
 
+    public void testIsFieldWithinRangeOnlyDocValues() throws IOException {
+        QueryRewriteContext context = new QueryRewriteContext(parserConfig(), writableRegistry(), null, () -> nowInMillis);
+        IndexReader reader = new MultiReader();
+        DateFieldType ft = new DateFieldType("my_date", false);
+        // in case of only doc-values, we can't establish disjointness
+        assertEquals(
+            Relation.INTERSECTS,
+            ft.isFieldWithinQuery(reader, "2015-10-12", "2016-04-03", randomBoolean(), randomBoolean(), null, null, context)
+        );
+    }
+
     public void testIsFieldWithinQueryDateMillis() throws IOException {
-        DateFieldType ft = new DateFieldType("my_date", Resolution.MILLISECONDS);
+        DateFieldType ft = new DateFieldType("my_date");
         isFieldWithinRangeTestCase(ft);
     }
 
@@ -192,11 +203,15 @@ public class DateFieldTypeTests extends FieldTypeTestCase {
         );
         assertEquals(expected, ft.termQuery(date, context));
 
+        ft = new DateFieldType("field", false);
+        expected = SortedNumericDocValuesField.newSlowRangeQuery("field", instant, instant + 999);
+        assertEquals(expected, ft.termQuery(date, context));
+
         MappedFieldType unsearchable = new DateFieldType(
             "field",
             false,
             false,
-            true,
+            false,
             DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER,
             Resolution.MILLISECONDS,
             null,
@@ -204,7 +219,7 @@ public class DateFieldTypeTests extends FieldTypeTestCase {
             Collections.emptyMap()
         );
         IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery(date, context));
-        assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
+        assertEquals("Cannot search on field [field] since it is not indexed nor has doc values.", e.getMessage());
     }
 
     public void testRangeQuery() throws IOException {
@@ -245,6 +260,10 @@ public class DateFieldTypeTests extends FieldTypeTestCase {
         );
         assertEquals(expected, ft.rangeQuery(date1, date2, true, true, null, null, null, context).rewrite(new MultiReader()));
 
+        MappedFieldType ft2 = new DateFieldType("field", false);
+        Query expected2 = SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2);
+        assertEquals(expected2, ft2.rangeQuery(date1, date2, true, true, null, null, null, context).rewrite(new MultiReader()));
+
         instant1 = nowInMillis;
         instant2 = instant1 + 100;
         expected = new DateRangeIncludingNowQuery(
@@ -255,11 +274,14 @@ public class DateFieldTypeTests extends FieldTypeTestCase {
         );
         assertEquals(expected, ft.rangeQuery("now", instant2, true, true, null, null, null, context));
 
+        expected2 = new DateRangeIncludingNowQuery(SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2));
+        assertEquals(expected2, ft2.rangeQuery("now", instant2, true, true, null, null, null, context));
+
         MappedFieldType unsearchable = new DateFieldType(
             "field",
             false,
             false,
-            true,
+            false,
             DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER,
             Resolution.MILLISECONDS,
             null,
@@ -270,7 +292,7 @@ public class DateFieldTypeTests extends FieldTypeTestCase {
             IllegalArgumentException.class,
             () -> unsearchable.rangeQuery(date1, date2, true, true, null, null, null, context)
         );
-        assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
+        assertEquals("Cannot search on field [field] since it is not indexed nor has doc values.", e.getMessage());
     }
 
     public void testRangeQueryWithIndexSort() {
@@ -321,6 +343,10 @@ public class DateFieldTypeTests extends FieldTypeTestCase {
             new IndexOrDocValuesQuery(pointQuery, dvQuery)
         );
         assertEquals(expected, ft.rangeQuery(date1, date2, true, true, null, null, null, context));
+
+        ft = new DateFieldType("field", false);
+        expected = new IndexSortSortedNumericDocValuesRangeQuery("field", instant1, instant2, dvQuery);
+        assertEquals(expected, ft.rangeQuery(date1, date2, true, true, null, null, null, context));
     }
 
     public void testDateNanoDocValues() throws IOException {

+ 16 - 9
x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsIntegTests.java

@@ -700,6 +700,7 @@ public class SearchableSnapshotsIntegTests extends BaseSearchableSnapshotsIntegT
         final String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);
         final int numShards = between(1, 3);
 
+        boolean indexed = randomBoolean();
         final String dateType = randomFrom("date", "date_nanos");
         assertAcked(
             client().admin()
@@ -711,6 +712,7 @@ public class SearchableSnapshotsIntegTests extends BaseSearchableSnapshotsIntegT
                         .startObject("properties")
                         .startObject(DataStream.TimestampField.FIXED_TIMESTAMP_FIELD)
                         .field("type", dateType)
+                        .field("index", indexed)
                         .field("format", "strict_date_optional_time_nanos")
                         .endObject()
                         .endObject()
@@ -768,16 +770,21 @@ public class SearchableSnapshotsIntegTests extends BaseSearchableSnapshotsIntegT
             .getTimestampRange();
 
         assertTrue(timestampRange.isComplete());
-        assertThat(timestampRange, not(sameInstance(IndexLongFieldRange.UNKNOWN)));
-        if (docCount == 0) {
-            assertThat(timestampRange, sameInstance(IndexLongFieldRange.EMPTY));
+
+        if (indexed) {
+            assertThat(timestampRange, not(sameInstance(IndexLongFieldRange.UNKNOWN)));
+            if (docCount == 0) {
+                assertThat(timestampRange, sameInstance(IndexLongFieldRange.EMPTY));
+            } else {
+                assertThat(timestampRange, not(sameInstance(IndexLongFieldRange.EMPTY)));
+                DateFieldMapper.Resolution resolution = dateType.equals("date")
+                    ? DateFieldMapper.Resolution.MILLISECONDS
+                    : DateFieldMapper.Resolution.NANOSECONDS;
+                assertThat(timestampRange.getMin(), greaterThanOrEqualTo(resolution.convert(Instant.parse("2020-11-26T00:00:00Z"))));
+                assertThat(timestampRange.getMin(), lessThanOrEqualTo(resolution.convert(Instant.parse("2020-11-27T00:00:00Z"))));
+            }
         } else {
-            assertThat(timestampRange, not(sameInstance(IndexLongFieldRange.EMPTY)));
-            DateFieldMapper.Resolution resolution = dateType.equals("date")
-                ? DateFieldMapper.Resolution.MILLISECONDS
-                : DateFieldMapper.Resolution.NANOSECONDS;
-            assertThat(timestampRange.getMin(), greaterThanOrEqualTo(resolution.convert(Instant.parse("2020-11-26T00:00:00Z"))));
-            assertThat(timestampRange.getMin(), lessThanOrEqualTo(resolution.convert(Instant.parse("2020-11-27T00:00:00Z"))));
+            assertThat(timestampRange, sameInstance(IndexLongFieldRange.UNKNOWN));
         }
     }