1
0
Эх сурвалжийг харах

Preserve `date_histogram` format when aggregating on unmapped fields (#35254)

Currently when aggregating on an unmapped date field (e.g. using a
date_histogram) we don't preserve the aggregations `format` setting but instead
use the default format. This can lead to loosing the aggregations `format` when
aggregating over several indices where some of them contain unmapped date fields
and are encountered first in the reduce phase.

Related to #31760
Christoph Büscher 7 жил өмнө
parent
commit
14b811446f

+ 8 - 4
server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java

@@ -21,6 +21,7 @@ package org.elasticsearch.search.aggregations.support;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.geo.GeoPoint;
+import org.elasticsearch.common.joda.Joda;
 import org.elasticsearch.index.fielddata.IndexFieldData;
 import org.elasticsearch.index.fielddata.IndexGeoPointFieldData;
 import org.elasticsearch.index.fielddata.IndexNumericFieldData;
@@ -53,7 +54,7 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
         if (field == null) {
             if (script == null) {
                 ValuesSourceConfig<VS> config = new ValuesSourceConfig<>(ValuesSourceType.ANY);
-                config.format(resolveFormat(null, valueType));
+                config.format(resolveFormat(null, valueType, timeZone));
                 return config;
             }
             ValuesSourceType valuesSourceType = valueType != null ? valueType.getValuesSourceType() : ValuesSourceType.ANY;
@@ -67,7 +68,7 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
             ValuesSourceConfig<VS> config = new ValuesSourceConfig<>(valuesSourceType);
             config.missing(missing);
             config.timezone(timeZone);
-            config.format(resolveFormat(format, valueType));
+            config.format(resolveFormat(format, valueType, timeZone));
             config.script(createScript(script, context));
             config.scriptValueType(valueType);
             return config;
@@ -79,7 +80,7 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
             ValuesSourceConfig<VS> config = new ValuesSourceConfig<>(valuesSourceType);
             config.missing(missing);
             config.timezone(timeZone);
-            config.format(resolveFormat(format, valueType));
+            config.format(resolveFormat(format, valueType, timeZone));
             config.unmapped(true);
             if (valueType != null) {
                 // todo do we really need this for unmapped?
@@ -120,7 +121,7 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
         }
     }
 
-    private static DocValueFormat resolveFormat(@Nullable String format, @Nullable ValueType valueType) {
+    private static DocValueFormat resolveFormat(@Nullable String format, @Nullable ValueType valueType, @Nullable DateTimeZone tz) {
         if (valueType == null) {
             return DocValueFormat.RAW; // we can't figure it out
         }
@@ -128,6 +129,9 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
         if (valueFormat instanceof DocValueFormat.Decimal && format != null) {
             valueFormat = new DocValueFormat.Decimal(format);
         }
+        if (valueFormat instanceof DocValueFormat.DateTime && format != null) {
+            valueFormat = new DocValueFormat.DateTime(Joda.forPattern(format), tz != null ? tz : DateTimeZone.UTC);
+        }
         return valueFormat;
     }
 

+ 25 - 0
server/src/test/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java

@@ -1341,6 +1341,31 @@ public class DateHistogramIT extends ESIntegTestCase {
         }
     }
 
+    /**
+     * https://github.com/elastic/elasticsearch/issues/31760 shows an edge case where an unmapped "date" field in two indices
+     * that are queried simultaneously can lead to the "format" parameter in the aggregation not being preserved correctly.
+     *
+     * The error happens when the bucket from the "unmapped" index is received first in the reduce phase, however the case can
+     * be recreated when aggregating about a single index with an unmapped date field and also getting "empty" buckets.
+     */
+    public void testFormatIndexUnmapped() throws InterruptedException, ExecutionException {
+        String indexDateUnmapped = "test31760";
+        indexRandom(true, client().prepareIndex(indexDateUnmapped, "_doc").setSource("foo", "bar"));
+        ensureSearchable(indexDateUnmapped);
+
+        SearchResponse response = client().prepareSearch(indexDateUnmapped)
+                .addAggregation(
+                        dateHistogram("histo").field("dateField").dateHistogramInterval(DateHistogramInterval.MONTH).format("YYYY-MM")
+                                .minDocCount(0).extendedBounds(new ExtendedBounds("2018-01", "2018-01")))
+                .execute().actionGet();
+        assertSearchResponse(response);
+        Histogram histo = response.getAggregations().get("histo");
+        assertThat(histo.getBuckets().size(), equalTo(1));
+        assertThat(histo.getBuckets().get(0).getKeyAsString(), equalTo("2018-01"));
+        assertThat(histo.getBuckets().get(0).getDocCount(), equalTo(0L));
+        internalCluster().wipeIndices(indexDateUnmapped);
+    }
+
     /**
      * https://github.com/elastic/elasticsearch/issues/31392 demonstrates an edge case where a date field mapping with
      * "format" = "epoch_millis" can lead for the date histogram aggregation to throw an error if a non-UTC time zone