Explorar el Código

Fail composite aggregation if after key is unparsable (#74252)

If the after key cannot parse back to the same value we generated it from, when the client sends it back we will land on a wrong page.  If this page is earlier, it is likely we will (eventually) generate the same wrong after key, resulting in an infinite loop as the client repeatedly ask to retrieve the same page or pages of data.  This change fixes that by failing the composite aggregation (with an exception) if the after key is unparsable with the given format.  We provide as much information about what failed as possible.
Mark Tozzi hace 4 años
padre
commit
cd2732d40b

+ 55 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/230_composite.yml

@@ -490,6 +490,61 @@ setup:
   - match: { aggregations.test.buckets.0.doc_count: 1 }
 
 ---
+"Composite aggregation with invalid format":
+  - skip:
+      version: " - 7.99.99"
+      reason:  After key parse checking added in 7.14 (backport pending)
+
+  - do:
+      catch: /created output it couldn't parse/
+      search:
+        rest_total_hits_as_int: true
+        index: test
+        body:
+          aggregations:
+            test:
+              composite:
+                sources: [
+                  {
+                    "date": {
+                      "date_histogram": {
+                        "field": "date",
+                        "calendar_interval": "1d",
+                        # mixes week based and month based
+                        "format": "YYYY-MM-dd"
+                      }
+                    }
+                  }
+                ]
+
+---
+"Composite aggregation with lossy format":
+  - skip:
+      version: " - 7.99.99"
+      reason:  After key parse checking added in 7.14 (backport pending)
+
+  - do:
+      catch: /created output it couldn't parse/
+      search:
+        rest_total_hits_as_int: true
+        index: test
+        body:
+          aggregations:
+            test:
+              composite:
+                sources: [
+                  {
+                    "date": {
+                      "date_histogram": {
+                        "field": "date",
+                        "calendar_interval": "1d",
+                        # format is lower resolution than buckets, after key will lose data
+                        "format": "yyyy-MM"
+                      }
+                    }
+                  }
+                ]
+---
 "Composite aggregation with date_histogram offset":
   - skip:
       version: " - 7.5.99"

+ 8 - 2
server/src/main/java/org/elasticsearch/search/DocValueFormat.java

@@ -237,10 +237,11 @@ public interface DocValueFormat extends NamedWriteable {
         }
 
         public DateTime(StreamInput in) throws IOException {
-            this.formatter = DateFormatter.forPattern(in.readString());
-            this.parser = formatter.toDateMathParser();
+            String formatterPattern = in.readString();
             String zoneId = in.readString();
             this.timeZone = ZoneId.of(zoneId);
+            this.formatter = DateFormatter.forPattern(formatterPattern).withZone(this.timeZone);
+            this.parser = formatter.toDateMathParser();
             this.resolution = DateFieldMapper.Resolution.ofOrdinal(in.readVInt());
             if (in.getVersion().onOrAfter(Version.V_7_7_0) && in.getVersion().before(Version.V_8_0_0)) {
                 /* when deserialising from 7.7+ nodes expect a flag indicating if a pattern is of joda style
@@ -374,6 +375,11 @@ public interface DocValueFormat extends NamedWriteable {
         public String format(double value) {
             return format((long) value);
         }
+
+        @Override
+        public long parseLong(String value, boolean roundUp, LongSupplier now) {
+            return GeoTileUtils.longEncode(value);
+        }
     };
 
     DocValueFormat BOOLEAN = BooleanDocValueFormat.INSTANCE;

+ 6 - 3
server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/GeoTileValuesSource.java

@@ -10,11 +10,10 @@ package org.elasticsearch.search.aggregations.bucket.composite;
 
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.SortedNumericDocValues;
-import org.elasticsearch.core.CheckedFunction;
 import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.core.CheckedFunction;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.search.DocValueFormat;
-import org.elasticsearch.search.aggregations.bucket.geogrid.GeoTileUtils;
 
 import java.io.IOException;
 import java.util.function.LongUnaryOperator;
@@ -44,7 +43,11 @@ class GeoTileValuesSource extends LongValuesSource {
         } else if (value instanceof Number) {
             afterValue = ((Number) value).longValue();
         } else {
-            afterValue = GeoTileUtils.longEncode(value.toString());
+            afterValue = format.parseLong(
+                value.toString(),
+                false,
+                () -> { throw new IllegalArgumentException("now() is not supported in [after] key"); }
+            );
         }
     }
 }

+ 31 - 7
server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/InternalComposite.java

@@ -390,34 +390,58 @@ public class InternalComposite
      * Format <code>obj</code> using the provided {@link DocValueFormat}.
      * If the format is equals to {@link DocValueFormat#RAW}, the object is returned as is
      * for numbers and a string for {@link BytesRef}s.
+     *
+     * This method will then attempt to parse the formatted value using the specified format,
+     * and throw an IllegalArgumentException if parsing fails.  This in turn prevents us from
+     * returning an after_key which we can't subsequently parse into the original value.
      */
     static Object formatObject(Object obj, DocValueFormat format) {
         if (obj == null) {
             return null;
         }
+        Object formatted = obj;
+        Object parsed;
         if (obj.getClass() == BytesRef.class) {
             BytesRef value = (BytesRef) obj;
             if (format == DocValueFormat.RAW) {
-                return value.utf8ToString();
+                formatted = value.utf8ToString();
             } else {
-                return format.format(value);
+                formatted = format.format(value);
+            }
+            parsed = format.parseBytesRef(formatted.toString());
+            if (parsed.equals(obj) == false) {
+                throw new IllegalArgumentException("Format [" + format + "] created output it couldn't parse for value [" + obj +"] "
+                    + "of type [" + obj.getClass() + "]. parsed value: [" + parsed + "(" + parsed.getClass() + ")]");
             }
         } else if (obj.getClass() == Long.class) {
             long value = (long) obj;
             if (format == DocValueFormat.RAW) {
-                return value;
+                formatted = value;
             } else {
-                return format.format(value);
+                formatted = format.format(value);
+            }
+            parsed = format.parseLong(formatted.toString(), false, () -> {
+                throw new UnsupportedOperationException("Using now() is not supported in after keys");
+            });
+            if (parsed.equals(((Number) obj).longValue()) == false) {
+                throw new IllegalArgumentException("Format [" + format + "] created output it couldn't parse for value [" + obj +"] "
+                    + "of type [" + obj.getClass() + "]. parsed value: [" + parsed + "(" + parsed.getClass() + ")]");
             }
         } else if (obj.getClass() == Double.class) {
             double value = (double) obj;
             if (format == DocValueFormat.RAW) {
-                return value;
+                formatted = value;
             } else {
-                return format.format(value);
+                formatted = format.format(value);
+            }
+            parsed = format.parseDouble(formatted.toString(), false,
+                () -> {throw new UnsupportedOperationException("Using now() is not supported in after keys");});
+            if (parsed.equals(((Number) obj).doubleValue()) == false) {
+                throw new IllegalArgumentException("Format [" + format + "] created output it couldn't parse for value [" + obj +"] "
+                    + "of type [" + obj.getClass() + "]. parsed value: [" + parsed + "(" + parsed.getClass() + ")]");
             }
         }
-        return obj;
+        return formatted;
     }
 
     static class ArrayMap extends AbstractMap<String, Object> implements Comparable<ArrayMap> {

+ 32 - 0
server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/InternalCompositeTests.java

@@ -377,6 +377,38 @@ public class InternalCompositeTests extends InternalMultiBucketAggregationTestCa
             containsString("java.lang.String cannot be cast to"));
     }
 
+    public void testFormatObjectChecked() {
+        DocValueFormat weekYearMonth = new DocValueFormat.DateTime(
+            // YYYY is week-based year.  The parser will ignore MM (month-of-year) and dd (day-of-month)
+            DateFormatter.forPattern("YYYY-MM-dd"),
+            ZoneOffset.UTC,
+            DateFieldMapper.Resolution.MILLISECONDS
+        );
+        long epoch = 1622060077L; // May 25th 2021, evening
+        expectThrows(IllegalArgumentException.class, () -> InternalComposite.formatObject(epoch, weekYearMonth));
+    }
+
+    public void testFormatDateEpochTimezone() {
+        DocValueFormat epochSecond = new DocValueFormat.DateTime(
+            // YYYY is week-based year.  The parser will ignore MM (month-of-year) and dd (day-of-month)
+            DateFormatter.forPattern("epoch_second"),
+            ZoneOffset.ofHours(-2),
+            DateFieldMapper.Resolution.MILLISECONDS
+        );
+        DocValueFormat epochMillis = new DocValueFormat.DateTime(
+            // YYYY is week-based year.  The parser will ignore MM (month-of-year) and dd (day-of-month)
+            DateFormatter.forPattern("epoch_millis"),
+            ZoneOffset.ofHours(-2),
+            DateFieldMapper.Resolution.MILLISECONDS
+        );
+        long epoch = 1622060077L; // May 25th 2021, evening
+        Object actual = InternalComposite.formatObject(epoch, epochSecond);
+        assertEquals("1622060.077", actual.toString());
+
+        actual = InternalComposite.formatObject(epoch, epochMillis);
+        assertEquals("1622060077", actual.toString());
+    }
+
     private InternalComposite.ArrayMap createMap(List<String> fields, Comparable[] values) {
         List<DocValueFormat> formats = IntStream.range(0, fields.size())
             .mapToObj(i -> DocValueFormat.RAW).collect(Collectors.toList());