Browse Source

Specify parse index when error occurs on multiple datetime parses (#108607)

Simon Cooper 1 year ago
parent
commit
cb7f2b6833

+ 5 - 0
docs/changelog/108607.yaml

@@ -0,0 +1,5 @@
+pr: 108607
+summary: Specify parse index when error occurs on multiple datetime parses
+area: Infra/Core
+type: bug
+issues: []

+ 1 - 2
server/src/main/java/org/elasticsearch/common/time/DateTimeParser.java

@@ -12,7 +12,6 @@ import java.time.ZoneId;
 import java.time.format.DateTimeParseException;
 import java.time.temporal.TemporalAccessor;
 import java.util.Locale;
-import java.util.Optional;
 
 /**
  * An object that can parse strings into datetime objects
@@ -40,5 +39,5 @@ interface DateTimeParser {
      * <p>
      * The pattern must fully match, using the whole string. It must not throw exceptions if parsing fails.
      */
-    Optional<TemporalAccessor> tryParse(CharSequence str);
+    ParseResult tryParse(CharSequence str);
 }

+ 2 - 3
server/src/main/java/org/elasticsearch/common/time/Iso8601DateTimeParser.java

@@ -14,7 +14,6 @@ import java.time.temporal.ChronoField;
 import java.time.temporal.TemporalAccessor;
 import java.util.Locale;
 import java.util.Map;
-import java.util.Optional;
 import java.util.Set;
 
 class Iso8601DateTimeParser implements DateTimeParser {
@@ -72,7 +71,7 @@ class Iso8601DateTimeParser implements DateTimeParser {
     }
 
     @Override
-    public Optional<TemporalAccessor> tryParse(CharSequence str) {
-        return Optional.ofNullable(parser.tryParse(str, timezone).result());
+    public ParseResult tryParse(CharSequence str) {
+        return parser.tryParse(str, timezone);
     }
 }

+ 44 - 59
server/src/main/java/org/elasticsearch/common/time/Iso8601Parser.java

@@ -37,21 +37,6 @@ import java.util.Set;
  */
 class Iso8601Parser {
 
-    /**
-     * The result of the parse. If successful, {@code result} will be non-null.
-     * If parse failed, {@code errorIndex} specifies the index into the parsed string
-     * that the first invalid data was encountered.
-     */
-    record Result(@Nullable DateTime result, int errorIndex) {
-        Result(DateTime result) {
-            this(result, -1);
-        }
-
-        static Result error(int errorIndex) {
-            return new Result(null, errorIndex);
-        }
-    }
-
     private static final Set<ChronoField> VALID_MANDATORY_FIELDS = EnumSet.of(
         ChronoField.YEAR,
         ChronoField.MONTH_OF_YEAR,
@@ -127,24 +112,24 @@ class Iso8601Parser {
     }
 
     /**
-     * Attempts to parse {@code str} as an ISO-8601 datetime, returning a {@link Result} indicating if the parse
+     * Attempts to parse {@code str} as an ISO-8601 datetime, returning a {@link ParseResult} indicating if the parse
      * was successful or not, and what fields were present.
      * @param str             The string to parse
      * @param defaultTimezone The default timezone to return, if no timezone is present in the string
-     * @return                The {@link Result} of the parse.
+     * @return                The {@link ParseResult} of the parse.
      */
-    Result tryParse(CharSequence str, @Nullable ZoneId defaultTimezone) {
+    ParseResult tryParse(CharSequence str, @Nullable ZoneId defaultTimezone) {
         if (str.charAt(0) == '-') {
             // the year is negative. This is most unusual.
             // Instead of always adding offsets and dynamically calculating position in the main parser code below,
             // just in case it starts with a -, just parse the substring, then adjust the output appropriately
-            Result result = parse(new CharSubSequence(str, 1, str.length()), defaultTimezone);
+            ParseResult result = parse(new CharSubSequence(str, 1, str.length()), defaultTimezone);
 
             if (result.errorIndex() >= 0) {
-                return Result.error(result.errorIndex() + 1);
+                return ParseResult.error(result.errorIndex() + 1);
             } else {
-                DateTime dt = result.result();
-                return new Result(
+                DateTime dt = (DateTime) result.result();
+                return new ParseResult(
                     new DateTime(
                         -dt.years(),
                         dt.months(),
@@ -178,15 +163,15 @@ class Iso8601Parser {
      * at any point after a time field.
      * It also does not use exceptions, instead returning {@code null} where a value cannot be parsed.
      */
-    private Result parse(CharSequence str, @Nullable ZoneId defaultTimezone) {
+    private ParseResult parse(CharSequence str, @Nullable ZoneId defaultTimezone) {
         int len = str.length();
 
         // YEARS
         Integer years = parseInt(str, 0, 4);
-        if (years == null) return Result.error(0);
+        if (years == null) return ParseResult.error(0);
         if (len == 4) {
             return isOptional(ChronoField.MONTH_OF_YEAR)
-                ? new Result(
+                ? new ParseResult(
                     withZoneOffset(
                         years,
                         defaults.get(ChronoField.MONTH_OF_YEAR),
@@ -198,17 +183,17 @@ class Iso8601Parser {
                         defaultTimezone
                     )
                 )
-                : Result.error(4);
+                : ParseResult.error(4);
         }
 
-        if (str.charAt(4) != '-') return Result.error(4);
+        if (str.charAt(4) != '-') return ParseResult.error(4);
 
         // MONTHS
         Integer months = parseInt(str, 5, 7);
-        if (months == null || months > 12) return Result.error(5);
+        if (months == null || months > 12) return ParseResult.error(5);
         if (len == 7) {
             return isOptional(ChronoField.DAY_OF_MONTH)
-                ? new Result(
+                ? new ParseResult(
                     withZoneOffset(
                         years,
                         months,
@@ -220,17 +205,17 @@ class Iso8601Parser {
                         defaultTimezone
                     )
                 )
-                : Result.error(7);
+                : ParseResult.error(7);
         }
 
-        if (str.charAt(7) != '-') return Result.error(7);
+        if (str.charAt(7) != '-') return ParseResult.error(7);
 
         // DAYS
         Integer days = parseInt(str, 8, 10);
-        if (days == null || days > 31) return Result.error(8);
+        if (days == null || days > 31) return ParseResult.error(8);
         if (len == 10) {
             return optionalTime || isOptional(ChronoField.HOUR_OF_DAY)
-                ? new Result(
+                ? new ParseResult(
                     withZoneOffset(
                         years,
                         months,
@@ -242,13 +227,13 @@ class Iso8601Parser {
                         defaultTimezone
                     )
                 )
-                : Result.error(10);
+                : ParseResult.error(10);
         }
 
-        if (str.charAt(10) != 'T') return Result.error(10);
+        if (str.charAt(10) != 'T') return ParseResult.error(10);
         if (len == 11) {
             return isOptional(ChronoField.HOUR_OF_DAY)
-                ? new Result(
+                ? new ParseResult(
                     withZoneOffset(
                         years,
                         months,
@@ -260,15 +245,15 @@ class Iso8601Parser {
                         defaultTimezone
                     )
                 )
-                : Result.error(11);
+                : ParseResult.error(11);
         }
 
         // HOURS + timezone
         Integer hours = parseInt(str, 11, 13);
-        if (hours == null || hours > 23) return Result.error(11);
+        if (hours == null || hours > 23) return ParseResult.error(11);
         if (len == 13) {
             return isOptional(ChronoField.MINUTE_OF_HOUR)
-                ? new Result(
+                ? new ParseResult(
                     withZoneOffset(
                         years,
                         months,
@@ -280,12 +265,12 @@ class Iso8601Parser {
                         defaultTimezone
                     )
                 )
-                : Result.error(13);
+                : ParseResult.error(13);
         }
         if (isZoneId(str, 13)) {
             ZoneId timezone = parseZoneId(str, 13);
             return timezone != null && isOptional(ChronoField.MINUTE_OF_HOUR)
-                ? new Result(
+                ? new ParseResult(
                     withZoneOffset(
                         years,
                         months,
@@ -297,17 +282,17 @@ class Iso8601Parser {
                         timezone
                     )
                 )
-                : Result.error(13);
+                : ParseResult.error(13);
         }
 
-        if (str.charAt(13) != ':') return Result.error(13);
+        if (str.charAt(13) != ':') return ParseResult.error(13);
 
         // MINUTES + timezone
         Integer minutes = parseInt(str, 14, 16);
-        if (minutes == null || minutes > 59) return Result.error(14);
+        if (minutes == null || minutes > 59) return ParseResult.error(14);
         if (len == 16) {
             return isOptional(ChronoField.SECOND_OF_MINUTE)
-                ? new Result(
+                ? new ParseResult(
                     withZoneOffset(
                         years,
                         months,
@@ -319,12 +304,12 @@ class Iso8601Parser {
                         defaultTimezone
                     )
                 )
-                : Result.error(16);
+                : ParseResult.error(16);
         }
         if (isZoneId(str, 16)) {
             ZoneId timezone = parseZoneId(str, 16);
             return timezone != null && isOptional(ChronoField.SECOND_OF_MINUTE)
-                ? new Result(
+                ? new ParseResult(
                     withZoneOffset(
                         years,
                         months,
@@ -336,30 +321,30 @@ class Iso8601Parser {
                         timezone
                     )
                 )
-                : Result.error(16);
+                : ParseResult.error(16);
         }
 
-        if (str.charAt(16) != ':') return Result.error(16);
+        if (str.charAt(16) != ':') return ParseResult.error(16);
 
         // SECONDS + timezone
         Integer seconds = parseInt(str, 17, 19);
-        if (seconds == null || seconds > 59) return Result.error(17);
+        if (seconds == null || seconds > 59) return ParseResult.error(17);
         if (len == 19) {
-            return new Result(
+            return new ParseResult(
                 withZoneOffset(years, months, days, hours, minutes, seconds, defaultZero(ChronoField.NANO_OF_SECOND), defaultTimezone)
             );
         }
         if (isZoneId(str, 19)) {
             ZoneId timezone = parseZoneId(str, 19);
             return timezone != null
-                ? new Result(
+                ? new ParseResult(
                     withZoneOffset(years, months, days, hours, minutes, seconds, defaultZero(ChronoField.NANO_OF_SECOND), timezone)
                 )
-                : Result.error(19);
+                : ParseResult.error(19);
         }
 
         char decSeparator = str.charAt(19);
-        if (decSeparator != '.' && decSeparator != ',') return Result.error(19);
+        if (decSeparator != '.' && decSeparator != ',') return ParseResult.error(19);
 
         // NANOS + timezone
         // nanos are always optional
@@ -373,23 +358,23 @@ class Iso8601Parser {
             nanos = nanos * 10 + (c - ZERO);
         }
 
-        if (pos == 20) return Result.error(20);   // didn't find a number at all
+        if (pos == 20) return ParseResult.error(20);   // didn't find a number at all
 
         // multiply it by the correct multiplicand to get the nanos
         nanos *= NANO_MULTIPLICANDS[29 - pos];
 
         if (len == pos) {
-            return new Result(withZoneOffset(years, months, days, hours, minutes, seconds, nanos, defaultTimezone));
+            return new ParseResult(withZoneOffset(years, months, days, hours, minutes, seconds, nanos, defaultTimezone));
         }
         if (isZoneId(str, pos)) {
             ZoneId timezone = parseZoneId(str, pos);
             return timezone != null
-                ? new Result(withZoneOffset(years, months, days, hours, minutes, seconds, nanos, timezone))
-                : Result.error(pos);
+                ? new ParseResult(withZoneOffset(years, months, days, hours, minutes, seconds, nanos, timezone))
+                : ParseResult.error(pos);
         }
 
         // still chars left at the end - string is not valid
-        return Result.error(pos);
+        return ParseResult.error(pos);
     }
 
     private static boolean isZoneId(CharSequence str, int pos) {

+ 7 - 5
server/src/main/java/org/elasticsearch/common/time/JavaDateFormatter.java

@@ -206,13 +206,15 @@ class JavaDateFormatter implements DateFormatter {
      */
     private static TemporalAccessor doParse(String input, DateTimeParser[] parsers) {
         if (parsers.length > 1) {
-            for (DateTimeParser formatter : parsers) {
-                var result = formatter.tryParse(input);
-                if (result.isPresent()) {
-                    return result.get();
+            int earliestError = Integer.MAX_VALUE;
+            for (DateTimeParser parser : parsers) {
+                ParseResult result = parser.tryParse(input);
+                if (result.result() != null) {
+                    return result.result();
                 }
+                earliestError = Math.min(earliestError, result.errorIndex());
             }
-            throw new DateTimeParseException("Failed to parse with all enclosed parsers", input, 0);
+            throw new DateTimeParseException("Failed to parse with all enclosed parsers", input, earliestError);
         }
         return parsers[0].parse(input);
     }

+ 3 - 4
server/src/main/java/org/elasticsearch/common/time/JavaTimeDateTimeParser.java

@@ -14,7 +14,6 @@ import java.time.format.DateTimeFormatter;
 import java.time.format.DateTimeFormatterBuilder;
 import java.time.temporal.TemporalAccessor;
 import java.util.Locale;
-import java.util.Optional;
 import java.util.function.Consumer;
 import java.util.function.UnaryOperator;
 
@@ -65,9 +64,9 @@ class JavaTimeDateTimeParser implements DateTimeParser {
     }
 
     @Override
-    public Optional<TemporalAccessor> tryParse(CharSequence str) {
+    public ParseResult tryParse(CharSequence str) {
         ParsePosition pos = new ParsePosition(0);
-        return Optional.ofNullable((TemporalAccessor) formatter.toFormat().parseObject(str.toString(), pos))
-            .filter(ta -> pos.getIndex() == str.length());
+        var result = (TemporalAccessor) formatter.toFormat().parseObject(str.toString(), pos);
+        return pos.getIndex() == str.length() ? new ParseResult(result) : ParseResult.error(Math.max(pos.getErrorIndex(), pos.getIndex()));
     }
 }

+ 28 - 0
server/src/main/java/org/elasticsearch/common/time/ParseResult.java

@@ -0,0 +1,28 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.common.time;
+
+import org.elasticsearch.core.Nullable;
+
+import java.time.temporal.TemporalAccessor;
+
+/**
+ * The result of the parse. If successful, {@code result} will be non-null.
+ * If parse failed, {@code errorIndex} specifies the index into the parsed string
+ * that the first invalid data was encountered.
+ */
+record ParseResult(@Nullable TemporalAccessor result, int errorIndex) {
+    ParseResult(TemporalAccessor result) {
+        this(result, -1);
+    }
+
+    static ParseResult error(int errorIndex) {
+        return new ParseResult(null, errorIndex);
+    }
+}

+ 8 - 1
server/src/test/java/org/elasticsearch/common/time/DateFormattersTests.java

@@ -20,6 +20,7 @@ import java.time.ZoneId;
 import java.time.ZoneOffset;
 import java.time.ZonedDateTime;
 import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeParseException;
 import java.time.temporal.ChronoField;
 import java.time.temporal.TemporalAccessor;
 import java.util.List;
@@ -38,11 +39,12 @@ import static org.hamcrest.Matchers.sameInstance;
 
 public class DateFormattersTests extends ESTestCase {
 
-    private void assertParseException(String input, String format) {
+    private IllegalArgumentException assertParseException(String input, String format) {
         DateFormatter javaTimeFormatter = DateFormatter.forPattern(format);
         IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> javaTimeFormatter.parse(input));
         assertThat(e.getMessage(), containsString(input));
         assertThat(e.getMessage(), containsString(format));
+        return e;
     }
 
     private void assertParses(String input, String format) {
@@ -1136,6 +1138,11 @@ public class DateFormattersTests extends ESTestCase {
         assertParseException("2014-06-06T12:01:02.123", "yyyy-MM-dd'T'HH:mm:ss||yyyy-MM-dd'T'HH:mm:ss.SS");
     }
 
+    public void testExceptionErrorIndex() {
+        Exception e = assertParseException("2024-01-01j", "iso8601||strict_date_optional_time");
+        assertThat(((DateTimeParseException) e.getCause()).getErrorIndex(), equalTo(10));
+    }
+
     public void testStrictParsing() {
         assertParses("2018W313", "strict_basic_week_date");
         assertParseException("18W313", "strict_basic_week_date");

+ 5 - 5
server/src/test/java/org/elasticsearch/common/time/Iso8601ParserTests.java

@@ -45,12 +45,12 @@ public class Iso8601ParserTests extends ESTestCase {
         return new Iso8601Parser(Set.of(), true, Map.of());
     }
 
-    private static Matcher<Iso8601Parser.Result> hasResult(DateTime dateTime) {
-        return transformedMatch(Iso8601Parser.Result::result, equalTo(dateTime));
+    private static Matcher<ParseResult> hasResult(DateTime dateTime) {
+        return transformedMatch(ParseResult::result, equalTo(dateTime));
     }
 
-    private static Matcher<Iso8601Parser.Result> hasError(int parseError) {
-        return transformedMatch(Iso8601Parser.Result::errorIndex, equalTo(parseError));
+    private static Matcher<ParseResult> hasError(int parseError) {
+        return transformedMatch(ParseResult::errorIndex, equalTo(parseError));
     }
 
     public void testStrangeParses() {
@@ -188,7 +188,7 @@ public class Iso8601ParserTests extends ESTestCase {
         assertThat(defaultParser().tryParse("2023-01-01T12:00:00.0000000005", null), hasError(29));
     }
 
-    private static Matcher<Iso8601Parser.Result> hasTimezone(ZoneId offset) {
+    private static Matcher<ParseResult> hasTimezone(ZoneId offset) {
         return transformedMatch(r -> r.result().query(TemporalQueries.zone()), equalTo(offset));
     }