1
0
Эх сурвалжийг харах

Adjust InternalTimeSeries reduce logic (#92240)

The `InternalTimeSeries#reduce(...)` method tries to be clever to not
perform reduce when no duplicate tsid are encountered, since there is
only bucket for a tsid.

However some multi bucket aggregations always require that a reduce
operation happens. (all aggregations that change
`mustReduceOnSingleInternalAgg()` to return true) Also in tests we do
assertions that implicitly require a reduce operation to happen, even if
no reduce is really required.

This change makes sure that reduce operation for the sub aggregations of
time_series aggregation always happens.
Martijn van Groningen 2 жил өмнө
parent
commit
93b04a5105

+ 1 - 0
modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/InternalTimeSeries.java

@@ -240,6 +240,7 @@ public class InternalTimeSeries extends InternalMultiBucketAggregation<InternalT
             InternalBucket reducedBucket;
             if (bucketsWithSameKey.size() == 1) {
                 reducedBucket = bucketsWithSameKey.get(0);
+                reducedBucket.aggregations = InternalAggregations.reduce(List.of(reducedBucket.aggregations), reduceContext);
             } else {
                 reducedBucket = reduceBucket(bucketsWithSameKey, reduceContext);
             }

+ 45 - 0
modules/aggregations/src/test/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregatorTests.java

@@ -27,6 +27,7 @@ import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.NumberFieldMapper;
 import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
 import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper.TimeSeriesIdBuilder;
+import org.elasticsearch.search.aggregations.InternalAggregations;
 import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder;
 import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
 import org.elasticsearch.search.aggregations.bucket.histogram.InternalDateHistogram;
@@ -39,6 +40,7 @@ import java.util.List;
 import java.util.function.Consumer;
 
 import static org.elasticsearch.search.aggregations.AggregationBuilders.sum;
+import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.hasSize;
 
@@ -135,6 +137,49 @@ public class TimeSeriesAggregatorTests extends AggregationTestCase {
         );
     }
 
+    public void testMultiBucketAggregationAsSubAggregation() throws IOException {
+        long startTime = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-01-01T00:00:00Z");
+        CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
+            writeTS(iw, startTime + 1, new Object[] { "dim1", "aaa", "dim2", "xxx" }, new Object[] {});
+            writeTS(iw, startTime + 2, new Object[] { "dim1", "aaa", "dim2", "yyy" }, new Object[] {});
+            writeTS(iw, startTime + 3, new Object[] { "dim1", "bbb", "dim2", "zzz" }, new Object[] {});
+            writeTS(iw, startTime + 4, new Object[] { "dim1", "bbb", "dim2", "zzz" }, new Object[] {});
+            writeTS(iw, startTime + 5, new Object[] { "dim1", "aaa", "dim2", "xxx" }, new Object[] {});
+            writeTS(iw, startTime + 6, new Object[] { "dim1", "aaa", "dim2", "yyy" }, new Object[] {});
+            writeTS(iw, startTime + 7, new Object[] { "dim1", "bbb", "dim2", "zzz" }, new Object[] {});
+            writeTS(iw, startTime + 8, new Object[] { "dim1", "bbb", "dim2", "zzz" }, new Object[] {});
+        };
+        Consumer<InternalTimeSeries> verifier = ts -> {
+            assertThat(ts.getBuckets(), hasSize(3));
+
+            assertThat(ts.getBucketByKey("{dim1=aaa, dim2=xxx}").docCount, equalTo(2L));
+            InternalDateHistogram byTimeStampBucket = ts.getBucketByKey("{dim1=aaa, dim2=xxx}").getAggregations().get("by_timestamp");
+            assertThat(
+                byTimeStampBucket.getBuckets(),
+                contains(new InternalDateHistogram.Bucket(startTime, 2, false, null, InternalAggregations.EMPTY))
+            );
+            assertThat(ts.getBucketByKey("{dim1=aaa, dim2=yyy}").docCount, equalTo(2L));
+            byTimeStampBucket = ts.getBucketByKey("{dim1=aaa, dim2=yyy}").getAggregations().get("by_timestamp");
+            assertThat(
+                byTimeStampBucket.getBuckets(),
+                contains(new InternalDateHistogram.Bucket(startTime, 2, false, null, InternalAggregations.EMPTY))
+            );
+            assertThat(ts.getBucketByKey("{dim1=bbb, dim2=zzz}").docCount, equalTo(4L));
+            byTimeStampBucket = ts.getBucketByKey("{dim1=bbb, dim2=zzz}").getAggregations().get("by_timestamp");
+            assertThat(
+                byTimeStampBucket.getBuckets(),
+                contains(new InternalDateHistogram.Bucket(startTime, 4, false, null, InternalAggregations.EMPTY))
+            );
+        };
+
+        DateHistogramAggregationBuilder dateBuilder = new DateHistogramAggregationBuilder("by_timestamp");
+        dateBuilder.field("@timestamp");
+        dateBuilder.fixedInterval(DateHistogramInterval.seconds(1));
+        TimeSeriesAggregationBuilder tsBuilder = new TimeSeriesAggregationBuilder("by_tsid");
+        tsBuilder.subAggregation(dateBuilder);
+        timeSeriesTestCase(tsBuilder, new MatchAllDocsQuery(), buildIndex, verifier);
+    }
+
     private void timeSeriesTestCase(
         TimeSeriesAggregationBuilder builder,
         Query query,