5 сар өмнө · 7eb393b921
--- a/docs/changelog/127225.yaml
+++ b/docs/changelog/127225.yaml
@@ -0,0 +1,6 @@
 
				+pr: 127225
			
 
				+summary: Fix count optimization with pushable union types
			
 
				+area: ES|QL
			
 
				+type: bug
			
 
				+issues:
			
 
				+ - 127200
			
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec
@@ -393,6 +393,73 @@ count:long | @timestamp:date
 
				          0 | 2023-10-23T13:50:00.000Z
			
 
				 ;
			
 
				 
			
 
				+multiIndexIpStatsNonPushableCount
			
 
				+// Could be pushed to Lucene if we knew whether ip fields are single valued or not.
			
 
				+// May still be pushed down on multi-node environments if a node has only the index where client_ip is keyword.
			
 
				+required_capability: union_types
			
 
				+required_capability: fix_count_pushdown_for_union_types
			
 
				+
			
 
				+FROM sample_data, sample_data_str
			
 
				+| STATS count=count(client_ip::ip)
			
 
				+;
			
 
				+
			
 
				+count:long
			
 
				+14
			
 
				+;
			
 
				+
			
 
				+multiIndexIpStatsNonPushableCountEval
			
 
				+// Could be pushed to Lucene if we knew whether ip fields are single valued or not.
			
 
				+// May still be pushed down on multi-node environments if a node has only the index where client_ip is keyword.
			
 
				+required_capability: union_types
			
 
				+required_capability: fix_count_pushdown_for_union_types
			
 
				+
			
 
				+FROM sample_data, sample_data_str
			
 
				+| EVAL client_ip = client_ip::ip
			
 
				+| STATS count=count(client_ip)
			
 
				+;
			
 
				+
			
 
				+count:long
			
 
				+14
			
 
				+;
			
 
				+
			
 
				+multiIndexIpStatsNonPushableCountWithFilter
			
 
				+// Currently not pushable: has 2 aggs and we don't consider multi-typed fields aggregatable.
			
 
				+required_capability: union_types
			
 
				+required_capability: fix_count_pushdown_for_union_types
			
 
				+
			
 
				+FROM sample_data, sample_data_ts_long
			
 
				+| STATS count_matching=count(@timestamp::long) WHERE @timestamp::long >= 1698069301543,
			
 
				+        total_count=count(@timestamp::long)
			
 
				+;
			
 
				+
			
 
				+count_matching:long | total_count:long
			
 
				+2                   | 14
			
 
				+;
			
 
				+
			
 
				+multiIndexIpStatsPushableCount
			
 
				+required_capability: union_types
			
 
				+required_capability: fix_count_pushdown_for_union_types
			
 
				+
			
 
				+FROM sample_data, sample_data_ts_long
			
 
				+| STATS count=count(@timestamp::long)
			
 
				+;
			
 
				+
			
 
				+count:long
			
 
				+14
			
 
				+;
			
 
				+
			
 
				+multiIndexIpStatsPushableCountEval
			
 
				+required_capability: union_types
			
 
				+required_capability: fix_count_pushdown_for_union_types
			
 
				+
			
 
				+FROM sample_data, sample_data_ts_long
			
 
				+| EVAL @timestamp = @timestamp::long
			
 
				+| STATS count=count(@timestamp)
			
 
				+;
			
 
				+
			
 
				+count:long
			
 
				+14
			
 
				+;
			
 
				 
			
 
				 multiIndexIpStringStatsInline2
			
 
				 required_capability: union_types
			
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -247,6 +247,12 @@ public class EsqlCapabilities {
 
				          */
			
 
				         UNION_TYPES_AGG_CAST,
			
 
				 
			
 
				+        /**
			
 
				+         * When pushing down {@code STATS count(field::type)} for a union type field, we wrongly used a synthetic attribute name in the
			
 
				+         * query instead of the actual field name. This led to 0 counts instead of the correct result.
			
 
				+         */
			
 
				+        FIX_COUNT_PUSHDOWN_FOR_UNION_TYPES,
			
 
				+
			
 
				         /**
			
 
				          * Fix to GROK validation in case of multiple fields with same name and different types
			
 
				          * https://github.com/elastic/elasticsearch/issues/110533
			
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java
@@ -64,12 +64,12 @@ public class ReplaceMissingFieldWithNull extends ParameterizedRule<LogicalPlan,
 
				 
			
 
				     private LogicalPlan missingToNull(LogicalPlan plan, Predicate<FieldAttribute> shouldBeRetained) {
			
 
				         if (plan instanceof EsRelation relation) {
			
 
				-            // Remove missing fields from the EsRelation because this is not where we will obtain them from; replace them by an Eval right
			
 
				-            // after, instead. This allows us to safely re-use the attribute ids of the corresponding FieldAttributes.
			
 
				+            // For any missing field, place an Eval right after the EsRelation to assign null values to that attribute (using the same name
			
 
				+            // id!), thus avoiding that InsertFieldExtrations inserts a field extraction later.
			
 
				             // This means that an EsRelation[field1, field2, field3] where field1 and field 3 are missing will be replaced by
			
 
				             // Project[field1, field2, field3] <- keeps the ordering intact
			
 
				             // \_Eval[field1 = null, field3 = null]
			
 
				-            // \_EsRelation[field2]
			
 
				+            // \_EsRelation[field1, field2, field3]
			
 
				             List<Attribute> relationOutput = relation.output();
			
 
				             Map<DataType, Alias> nullLiterals = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size());
			
 
				             List<NamedExpression> newProjections = new ArrayList<>(relationOutput.size());
			
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java
@@ -94,15 +94,19 @@ public class PushStatsToSource extends PhysicalOptimizerRules.ParameterizedOptim
 
				                             // check if regular field
			
 
				                             else {
			
 
				                                 if (target instanceof FieldAttribute fa) {
			
 
				-                                    var fName = fa.name();
			
 
				+                                    var fName = fa.fieldName();
			
 
				                                     if (context.searchStats().isSingleValue(fName)) {
			
 
				-                                        fieldName = fa.name();
			
 
				+                                        fieldName = fName;
			
 
				                                         query = QueryBuilders.existsQuery(fieldName);
			
 
				                                     }
			
 
				                                 }
			
 
				                             }
			
 
				                             if (fieldName != null) {
			
 
				                                 if (count.hasFilter()) {
			
 
				+                                    // Note: currently, it seems like we never actually perform stats pushdown if we reach here.
			
 
				+                                    // That's because stats pushdown only works for 1 agg function (without BY); but in that case, filters
			
 
				+                                    // are extracted into a separate filter node upstream from the aggregation (and hopefully pushed into
			
 
				+                                    // the EsQueryExec separately).
			
 
				                                     if (canPushToSource(count.filter()) == false) {
			
 
				                                         return null; // can't push down
			
 
				                                     }