Browse Source

Add more values to estimated sizes for ES|QL field types (#108412)

* Add more values to estimated sizes for ES|QL field types

* Increased shape estimate to 200 and added version estimate
Craig Taverner 1 year ago
parent
commit
9977af92aa

+ 7 - 7
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EstimatesRowSize.java

@@ -13,7 +13,6 @@ import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException;
 import org.elasticsearch.xpack.esql.planner.PlannerUtils;
 import org.elasticsearch.xpack.ql.expression.Expression;
 import org.elasticsearch.xpack.ql.type.DataType;
-import org.elasticsearch.xpack.ql.type.DataTypes;
 
 import java.util.List;
 
@@ -106,12 +105,13 @@ public interface EstimatesRowSize {
         ElementType elementType = PlannerUtils.toElementType(dataType);
         return switch (elementType) {
             case BOOLEAN -> 1;
-            case BYTES_REF -> {
-                if (dataType == DataTypes.IP) {
-                    yield 16;
-                }
-                yield 50; // wild estimate for the size of a string.
-            }
+            case BYTES_REF -> switch (dataType.typeName()) {
+                case "ip" -> 16;      // IP addresses, both IPv4 and IPv6, are encoded using 16 bytes.
+                case "version" -> 15; // 8.15.2-SNAPSHOT is 15 bytes, most are shorter, some can be longer
+                case "geo_point", "cartesian_point" -> 21;  // WKB for points is typically 21 bytes.
+                case "geo_shape", "cartesian_shape" -> 200; // wild estimate, based on some test data (airport_city_boundaries)
+                default -> 50; // wild estimate for the size of a string.
+            };
             case DOC -> throw new EsqlIllegalArgumentException("can't load a [doc] with field extraction");
             case DOUBLE -> Double.BYTES;
             case INT -> Integer.BYTES;