Browse Source

Fix range query on date fields for number inputs (#63692)

Currently, if you write a date range query with numeric 'to' or 'from' bounds,
they can be interpreted as years if no format is provided. We use
"strict_date_optional_time||epoch_millis" in this case that can interpret inputs
like 1000 as the year 1000 for example. 
This PR change this to always interpret and parse numbers with the "epoch_millis"
parser if no other formatter was provided.

Closes #63680
Christoph Büscher 4 years ago
parent
commit
c327794ae8

+ 18 - 0
docs/reference/migration/migrate_8_0/search.asciidoc

@@ -157,3 +157,21 @@ Change any use of `-1` as `from` parameter in request body or url parameters by
 setting it to `0` or omitting it entirely. Requests containing negative values will
 return an error.
 ====
+
+.Range queries on date fields treat numeric values alwas as milliseconds-since-epoch.
+[%collapsible]
+====
+*Details* +
+Range queries on date fields used to misinterpret small numbers (e.g. four digits like 1000)
+as a year when no additional format was set, but would interpret other numeric values as
+milliseconds since epoch. We now treat all numeric values in absence of a specific `format` 
+parameter as milliseconds since epoch. If you want to query for years instead, with a missing
+`format` you now need to quote the input value (e.g. "1984").
+
+*Impact* +
+If you query date fields without a specified `format`, check if the values in your queries are
+actually meant to be milliseconds-since-epoch and use a numeric value in this case. If not, use
+a string value which gets parsed by either the date format set on the field in the mappings or
+by `strict_date_optional_time` by default.
+
+====

+ 8 - 0
docs/reference/query-dsl/range-query.asciidoc

@@ -183,6 +183,14 @@ to `2099-12-01T23:59:59.999_999_999Z`. This date uses the provided year (`2099`)
 and month (`12`) but uses the default day (`01`), hour (`23`), minute (`59`),
 second (`59`), and nanosecond (`999_999_999`).
 
+[[numeric-date]]
+====== Numeric date range value
+
+When no date format is specified and the range query is targeting a date field, numeric
+values are interpreted representing milliseconds-since-the-epoch. If you want the value
+to represent a year, e.g. 2020, you need to pass it as a String value (e.g. "2020") that
+will be parsed according to the default format or the set format.
+
 [[range-query-date-math-rounding]]
 ====== Date math and rounding
 {es} rounds <<date-math,date math>> values in parameters as follows:

+ 2 - 1
docs/reference/release-notes/8.0.0-alpha1.asciidoc

@@ -29,5 +29,6 @@ by using appropriate thresholds. If for instance we want to simulate `index.inde
 all we need to do is to set `index.indexing.slowlog.threshold.index.debug` and
 `index.indexing.slowlog.threshold.index.trace` to `-1` {es-pull}57591[#57591]
 
-
+Search::
+* Consistent treatment of numeric values for range query on date fields without `format` {es-pull}[#63692]
 

+ 2 - 2
qa/multi-cluster-search/src/test/resources/rest-api-spec/test/multi_cluster/30_field_caps.yml

@@ -106,7 +106,7 @@
       field_caps:
         index: 'field_caps_index_4,my_remote_cluster:field_*'
         fields: [number]
-        body: { index_filter: { range: { created_at: { lt: 2018 } } } }
+        body: { index_filter: { range: { created_at: { lt: "2018" } } } }
 
   - match: {indices:                                    ["field_caps_index_4","my_remote_cluster:field_caps_index_1"]}
   - length: {fields.number:                             1}
@@ -117,7 +117,7 @@
       field_caps:
         index: 'field_caps_index_4,my_remote_cluster:field_*'
         fields: [number]
-        body: { index_filter: { range: { created_at: { gt: 2019 } } } }
+        body: { index_filter: { range: { created_at: { gt: "2019" } } } }
 
   - match: {indices:                                    ["my_remote_cluster:field_caps_index_3"]}
   - length: {fields.number:                             1}

+ 3 - 3
rest-api-spec/src/main/resources/rest-api-spec/test/field_caps/30_filter.yml

@@ -81,7 +81,7 @@ setup:
       field_caps:
         index: test-*
         fields: "*"
-        body: { index_filter: { range: { timestamp: { gte: 2010 }}}}
+        body: { index_filter: { range: { timestamp: { gte: "2010" }}}}
 
   - match: {indices:                                      ["test-1", "test-2", "test-3"]}
   - length: {fields.field1:                               3}
@@ -90,7 +90,7 @@ setup:
       field_caps:
         index: test-*
         fields: "*"
-        body: { index_filter: { range: { timestamp: { gte: 2019 } } } }
+        body: { index_filter: { range: { timestamp: { gte: "2019" } } } }
 
   - match: {indices:                                      ["test-2", "test-3"]}
   - length: {fields.field1:                               2}
@@ -106,7 +106,7 @@ setup:
       field_caps:
         index: test-*
         fields: "*"
-        body: { index_filter: { range: { timestamp: { lt: 2019 } } } }
+        body: { index_filter: { range: { timestamp: { lt: "2019" } } } }
 
   - match: {indices:                                      ["test-1"]}
   - length: {fields.field1:                               1}

+ 20 - 0
server/src/internalClusterTest/java/org/elasticsearch/search/simple/SimpleSearchIT.java

@@ -54,6 +54,8 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitC
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.oneOf;
 
 public class SimpleSearchIT extends ESIntegTestCase {
 
@@ -198,6 +200,7 @@ public class SimpleSearchIT extends ESIntegTestCase {
         createIndex("test");
         client().prepareIndex("test").setId("1").setSource("field", "2010-01-05T02:00").get();
         client().prepareIndex("test").setId("2").setSource("field", "2010-01-06T02:00").get();
+        client().prepareIndex("test").setId("3").setSource("field", "1967-01-01T00:00").get();
         ensureGreen();
         refresh();
         SearchResponse searchResponse = client().prepareSearch("test").setQuery(QueryBuilders.rangeQuery("field").gte("2010-01-03||+2d")
@@ -223,6 +226,23 @@ public class SimpleSearchIT extends ESIntegTestCase {
         searchResponse = client().prepareSearch("test").setQuery(
                 QueryBuilders.queryStringQuery("field:[2010-01-03||+2d TO 2010-01-04||+2d/d]")).get();
         assertHitCount(searchResponse, 2L);
+
+        // a string value of "1000" should be parsed as the year 1000 and return all three docs
+        searchResponse = client().prepareSearch("test")
+            .setQuery(QueryBuilders.rangeQuery("field").gt("1000"))
+            .get();
+        assertNoFailures(searchResponse);
+        assertHitCount(searchResponse, 3L);
+
+        // a numeric value of 1000 should be parsed as 1000 millis since epoch and return only docs after 1970
+        searchResponse = client().prepareSearch("test")
+            .setQuery(QueryBuilders.rangeQuery("field").gt(1000))
+            .get();
+        assertNoFailures(searchResponse);
+        assertHitCount(searchResponse, 2L);
+        String[] expectedIds = new String[] {"1", "2"};
+        assertThat(searchResponse.getHits().getHits()[0].getId(), is(oneOf(expectedIds)));
+        assertThat(searchResponse.getHits().getHits()[1].getId(), is(oneOf(expectedIds)));
     }
 
     public void testRangeQueryKeyword() throws Exception {

+ 18 - 4
server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java

@@ -73,6 +73,7 @@ public final class DateFieldMapper extends FieldMapper {
     public static final String CONTENT_TYPE = "date";
     public static final String DATE_NANOS_CONTENT_TYPE = "date_nanos";
     public static final DateFormatter DEFAULT_DATE_TIME_FORMATTER = DateFormatter.forPattern("strict_date_optional_time||epoch_millis");
+    private static final DateMathParser EPOCH_MILLIS_PARSER = DateFormatter.forPattern("epoch_millis").toDateMathParser();
 
     public enum Resolution {
         MILLISECONDS(CONTENT_TYPE, NumericType.DATE) {
@@ -384,9 +385,17 @@ public final class DateFieldMapper extends FieldMapper {
                 throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() +
                         "] does not support DISJOINT ranges");
             }
-            DateMathParser parser = forcedDateParser == null
-                    ? dateMathParser
-                    : forcedDateParser;
+            DateMathParser parser;
+            if (forcedDateParser == null) {
+                if (lowerTerm instanceof Number || upperTerm instanceof Number) {
+                    // force epoch_millis
+                    parser = EPOCH_MILLIS_PARSER;
+                } else {
+                    parser = dateMathParser;
+                }
+            } else {
+                parser = forcedDateParser;
+            }
             return dateRangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, timeZone, parser, context, resolution, (l, u) -> {
                 Query query = LongPoint.newRangeQuery(name(), l, u);
                 if (hasDocValues()) {
@@ -479,7 +488,12 @@ public final class DateFieldMapper extends FieldMapper {
                                            Object from, Object to, boolean includeLower, boolean includeUpper,
                                            ZoneId timeZone, DateMathParser dateParser, QueryRewriteContext context) throws IOException {
             if (dateParser == null) {
-                dateParser = this.dateMathParser;
+                if (from instanceof Number || to instanceof Number) {
+                    // force epoch_millis
+                    dateParser = EPOCH_MILLIS_PARSER;
+                } else {
+                    dateParser = this.dateMathParser;
+                }
             }
 
             long fromInclusive = Long.MIN_VALUE;

+ 8 - 20
x-pack/plugin/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/mapper/DateScriptFieldType.java

@@ -8,6 +8,7 @@ package org.elasticsearch.xpack.runtimefields.mapper;
 
 import com.carrotsearch.hppc.LongHashSet;
 import com.carrotsearch.hppc.LongSet;
+
 import org.apache.lucene.search.Query;
 import org.elasticsearch.common.CheckedBiConsumer;
 import org.elasticsearch.common.Nullable;
@@ -91,10 +92,12 @@ public class DateScriptFieldType extends AbstractScriptFieldType<DateFieldScript
     });
 
     private final DateFormatter dateTimeFormatter;
+    private final DateMathParser dateMathParser;
 
     private DateScriptFieldType(String name, DateFieldScript.Factory scriptFactory, DateFormatter dateTimeFormatter, Builder builder) {
         super(name, (n, params, ctx) -> scriptFactory.newFactory(n, params, ctx, dateTimeFormatter), builder);
         this.dateTimeFormatter = dateTimeFormatter;
+        this.dateMathParser = dateTimeFormatter.toDateMathParser();
     }
 
     DateScriptFieldType(
@@ -107,6 +110,7 @@ public class DateScriptFieldType extends AbstractScriptFieldType<DateFieldScript
     ) {
         super(name, (n, params, ctx) -> scriptFactory.newFactory(n, params, ctx, dateTimeFormatter), script, meta, toXContent);
         this.dateTimeFormatter = dateTimeFormatter;
+        this.dateMathParser = dateTimeFormatter.toDateMathParser();
     }
 
     @Override
@@ -148,7 +152,7 @@ public class DateScriptFieldType extends AbstractScriptFieldType<DateFieldScript
                 origin,
                 true,
                 null,
-                dateTimeFormatter.toDateMathParser(),
+                this.dateMathParser,
                 now,
                 DateFieldMapper.Resolution.MILLISECONDS
             );
@@ -179,7 +183,7 @@ public class DateScriptFieldType extends AbstractScriptFieldType<DateFieldScript
         @Nullable DateMathParser parser,
         QueryShardContext context
     ) {
-        parser = parser == null ? dateTimeFormatter.toDateMathParser() : parser;
+        parser = parser == null ? this.dateMathParser : parser;
         checkAllowExpensiveQueries(context);
         return DateFieldType.dateRangeQuery(
             lowerTerm,
@@ -197,14 +201,7 @@ public class DateScriptFieldType extends AbstractScriptFieldType<DateFieldScript
     @Override
     public Query termQuery(Object value, QueryShardContext context) {
         return DateFieldType.handleNow(context, now -> {
-            long l = DateFieldType.parseToLong(
-                value,
-                false,
-                null,
-                dateTimeFormatter.toDateMathParser(),
-                now,
-                DateFieldMapper.Resolution.MILLISECONDS
-            );
+            long l = DateFieldType.parseToLong(value, false, null, this.dateMathParser, now, DateFieldMapper.Resolution.MILLISECONDS);
             checkAllowExpensiveQueries(context);
             return new LongScriptFieldTermQuery(script, leafFactory(context)::newInstance, name(), l);
         });
@@ -218,16 +215,7 @@ public class DateScriptFieldType extends AbstractScriptFieldType<DateFieldScript
         return DateFieldType.handleNow(context, now -> {
             LongSet terms = new LongHashSet(values.size());
             for (Object value : values) {
-                terms.add(
-                    DateFieldType.parseToLong(
-                        value,
-                        false,
-                        null,
-                        dateTimeFormatter.toDateMathParser(),
-                        now,
-                        DateFieldMapper.Resolution.MILLISECONDS
-                    )
-                );
+                terms.add(DateFieldType.parseToLong(value, false, null, this.dateMathParser, now, DateFieldMapper.Resolution.MILLISECONDS));
             }
             checkAllowExpensiveQueries(context);
             return new LongScriptFieldTermsQuery(script, leafFactory(context)::newInstance, name(), terms);