Bläddra i källkod

Introduce ordinal bytesref block (#106852)

This PR introduces ordinal-based blocks for BytesRef. These blocks 
consist of a pair: an IntBlock for ordinals and a BytesRefVector for the
dictionary. Compared to the regular BytesRefBlock, these blocks are
slower due to indirect access and consume more memory because of the
additional ordinals block. However, they offer significant speed
improvements and reduced memory usage when byte values are frequently
repeated.

There are several use cases where these blocks can be beneficial.
Nhat Nguyen 1 år sedan
förälder
incheckning
f930544dcc
17 ändrade filer med 553 tillägg och 20 borttagningar
  1. 6 0
      docs/changelog/106852.yaml
  2. 1 0
      server/src/main/java/org/elasticsearch/TransportVersions.java
  3. 1 1
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java
  4. 7 3
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefBlock.java
  5. 6 1
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefVector.java
  6. 1 1
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleBlock.java
  7. 1 1
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBlock.java
  8. 1 1
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongBlock.java
  9. 1 0
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Block.java
  10. 203 0
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java
  11. 129 0
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefVector.java
  12. 58 3
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java
  13. 1 0
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Vector.java
  14. 15 4
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st
  15. 10 3
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st
  16. 102 0
      x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockSerializationTests.java
  17. 10 2
      x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilderTests.java

+ 6 - 0
docs/changelog/106852.yaml

@@ -0,0 +1,6 @@
+pr: 106852
+summary: Introduce ordinal bytesref block
+area: ES|QL
+type: enhancement
+issues:
+ - 106387

+ 1 - 0
server/src/main/java/org/elasticsearch/TransportVersions.java

@@ -161,6 +161,7 @@ public class TransportVersions {
     public static final TransportVersion ESQL_REDUCER_NODE_FRAGMENT = def(8_620_00_0);
     public static final TransportVersion FAILURE_STORE_ROLLOVER = def(8_621_00_0);
     public static final TransportVersion CCR_STATS_API_TIMEOUT_PARAM = def(8_622_00_0);
+    public static final TransportVersion ESQL_ORDINAL_BLOCK = def(8_623_00_0);
 
     /*
      * STOP! READ THIS FIRST! No, really,

+ 1 - 1
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java

@@ -52,7 +52,7 @@ public sealed interface BooleanBlock extends Block permits BooleanArrayBlock, Bo
         return readFrom((BlockStreamInput) in);
     }
 
-    private static BooleanBlock readFrom(BlockStreamInput in) throws IOException {
+    static BooleanBlock readFrom(BlockStreamInput in) throws IOException {
         final byte serializationType = in.readByte();
         return switch (serializationType) {
             case SERIALIZE_BLOCK_VALUES -> BooleanBlock.readValues(in);

+ 7 - 3
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefBlock.java

@@ -20,8 +20,8 @@ import java.io.IOException;
  * Block that stores BytesRef values.
  * This class is generated. Do not edit it.
  */
-public sealed interface BytesRefBlock extends Block permits BytesRefArrayBlock, BytesRefVectorBlock, ConstantNullBlock {
-
+public sealed interface BytesRefBlock extends Block permits BytesRefArrayBlock, BytesRefVectorBlock, ConstantNullBlock,
+    OrdinalBytesRefBlock {
     BytesRef NULL_VALUE = new BytesRef();
 
     /**
@@ -56,12 +56,13 @@ public sealed interface BytesRefBlock extends Block permits BytesRefArrayBlock,
         return readFrom((BlockStreamInput) in);
     }
 
-    private static BytesRefBlock readFrom(BlockStreamInput in) throws IOException {
+    static BytesRefBlock readFrom(BlockStreamInput in) throws IOException {
         final byte serializationType = in.readByte();
         return switch (serializationType) {
             case SERIALIZE_BLOCK_VALUES -> BytesRefBlock.readValues(in);
             case SERIALIZE_BLOCK_VECTOR -> BytesRefVector.readFrom(in.blockFactory(), in).asBlock();
             case SERIALIZE_BLOCK_ARRAY -> BytesRefArrayBlock.readArrayBlock(in.blockFactory(), in);
+            case SERIALIZE_BLOCK_ORDINAL -> OrdinalBytesRefBlock.readOrdinalBlock(in.blockFactory(), in);
             default -> {
                 assert false : "invalid block serialization type " + serializationType;
                 throw new IllegalStateException("invalid serialization type " + serializationType);
@@ -98,6 +99,9 @@ public sealed interface BytesRefBlock extends Block permits BytesRefArrayBlock,
         } else if (version.onOrAfter(TransportVersions.ESQL_SERIALIZE_ARRAY_BLOCK) && this instanceof BytesRefArrayBlock b) {
             out.writeByte(SERIALIZE_BLOCK_ARRAY);
             b.writeArrayBlock(out);
+        } else if (version.onOrAfter(TransportVersions.ESQL_ORDINAL_BLOCK) && this instanceof OrdinalBytesRefBlock b && b.isDense()) {
+            out.writeByte(SERIALIZE_BLOCK_ORDINAL);
+            b.writeOrdinalBlock(out);
         } else {
             out.writeByte(SERIALIZE_BLOCK_VALUES);
             BytesRefBlock.writeValues(this, out);

+ 6 - 1
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefVector.java

@@ -18,7 +18,8 @@ import java.io.IOException;
  * Vector that stores BytesRef values.
  * This class is generated. Do not edit it.
  */
-public sealed interface BytesRefVector extends Vector permits ConstantBytesRefVector, BytesRefArrayVector, ConstantNullVector {
+public sealed interface BytesRefVector extends Vector permits ConstantBytesRefVector, BytesRefArrayVector, ConstantNullVector,
+    OrdinalBytesRefVector {
     BytesRef getBytesRef(int position, BytesRef dest);
 
     @Override
@@ -80,6 +81,7 @@ public sealed interface BytesRefVector extends Vector permits ConstantBytesRefVe
             case SERIALIZE_VECTOR_VALUES -> readValues(positions, in, blockFactory);
             case SERIALIZE_VECTOR_CONSTANT -> blockFactory.newConstantBytesRefVector(in.readBytesRef(), positions);
             case SERIALIZE_VECTOR_ARRAY -> BytesRefArrayVector.readArrayVector(positions, in, blockFactory);
+            case SERIALIZE_VECTOR_ORDINAL -> OrdinalBytesRefVector.readOrdinalVector(blockFactory, in);
             default -> {
                 assert false : "invalid vector serialization type [" + serializationType + "]";
                 throw new IllegalStateException("invalid vector serialization type [" + serializationType + "]");
@@ -98,6 +100,9 @@ public sealed interface BytesRefVector extends Vector permits ConstantBytesRefVe
         } else if (version.onOrAfter(TransportVersions.ESQL_SERIALIZE_ARRAY_VECTOR) && this instanceof BytesRefArrayVector v) {
             out.writeByte(SERIALIZE_VECTOR_ARRAY);
             v.writeArrayVector(positions, out);
+        } else if (version.onOrAfter(TransportVersions.ESQL_ORDINAL_BLOCK) && this instanceof OrdinalBytesRefVector v && v.isDense()) {
+            out.writeByte(SERIALIZE_VECTOR_ORDINAL);
+            v.writeOrdinalVector(out);
         } else {
             out.writeByte(SERIALIZE_VECTOR_VALUES);
             writeValues(this, positions, out);

+ 1 - 1
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleBlock.java

@@ -52,7 +52,7 @@ public sealed interface DoubleBlock extends Block permits DoubleArrayBlock, Doub
         return readFrom((BlockStreamInput) in);
     }
 
-    private static DoubleBlock readFrom(BlockStreamInput in) throws IOException {
+    static DoubleBlock readFrom(BlockStreamInput in) throws IOException {
         final byte serializationType = in.readByte();
         return switch (serializationType) {
             case SERIALIZE_BLOCK_VALUES -> DoubleBlock.readValues(in);

+ 1 - 1
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBlock.java

@@ -52,7 +52,7 @@ public sealed interface IntBlock extends Block permits IntArrayBlock, IntVectorB
         return readFrom((BlockStreamInput) in);
     }
 
-    private static IntBlock readFrom(BlockStreamInput in) throws IOException {
+    static IntBlock readFrom(BlockStreamInput in) throws IOException {
         final byte serializationType = in.readByte();
         return switch (serializationType) {
             case SERIALIZE_BLOCK_VALUES -> IntBlock.readValues(in);

+ 1 - 1
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongBlock.java

@@ -52,7 +52,7 @@ public sealed interface LongBlock extends Block permits LongArrayBlock, LongVect
         return readFrom((BlockStreamInput) in);
     }
 
-    private static LongBlock readFrom(BlockStreamInput in) throws IOException {
+    static LongBlock readFrom(BlockStreamInput in) throws IOException {
         final byte serializationType = in.readByte();
         return switch (serializationType) {
             case SERIALIZE_BLOCK_VALUES -> LongBlock.readValues(in);

+ 1 - 0
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Block.java

@@ -247,4 +247,5 @@ public interface Block extends Accountable, BlockLoader.Block, NamedWriteable, R
     byte SERIALIZE_BLOCK_VECTOR = 1;
     byte SERIALIZE_BLOCK_ARRAY = 2;
     byte SERIALIZE_BLOCK_BIG_ARRAY = 3;
+    byte SERIALIZE_BLOCK_ORDINAL = 3;
 }

+ 203 - 0
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java

@@ -0,0 +1,203 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.core.Releasables;
+
+import java.io.IOException;
+
+/**
+ * A {@link BytesRefBlock} consists of a pair: an {@link IntBlock} for ordinals and a {@link BytesRefVector} for the dictionary.
+ * Compared to the regular {@link BytesRefBlock}, this block is slower due to indirect access and consume more memory because of
+ * the additional ordinals block. However, they offer significant speed improvements and reduced memory usage when byte values are
+ * frequently repeated
+ */
+public final class OrdinalBytesRefBlock extends AbstractNonThreadSafeRefCounted implements BytesRefBlock {
+    private final IntBlock ordinals;
+    private final BytesRefVector bytes;
+
+    public OrdinalBytesRefBlock(IntBlock ordinals, BytesRefVector bytes) {
+        this.ordinals = ordinals;
+        this.bytes = bytes;
+    }
+
+    static OrdinalBytesRefBlock readOrdinalBlock(BlockFactory blockFactory, BlockStreamInput in) throws IOException {
+        BytesRefVector bytes = null;
+        OrdinalBytesRefBlock result = null;
+        IntBlock ordinals = IntBlock.readFrom(in);
+        try {
+            bytes = BytesRefVector.readFrom(blockFactory, in);
+            result = new OrdinalBytesRefBlock(ordinals, bytes);
+        } finally {
+            if (result == null) {
+                Releasables.close(ordinals, bytes);
+            }
+        }
+        return result;
+    }
+
+    void writeOrdinalBlock(StreamOutput out) throws IOException {
+        ordinals.writeTo(out);
+        bytes.writeTo(out);
+    }
+
+    /**
+     * Returns true if this ordinal block is dense enough to enable optimizations using its ordinals
+     */
+    public boolean isDense() {
+        return ordinals.getTotalValueCount() * 2 / 3 >= bytes.getPositionCount();
+    }
+
+    @Override
+    public BytesRef getBytesRef(int valueIndex, BytesRef dest) {
+        return bytes.getBytesRef(ordinals.getInt(valueIndex), dest);
+    }
+
+    @Override
+    public BytesRefVector asVector() {
+        IntVector vector = ordinals.asVector();
+        if (vector != null) {
+            return new OrdinalBytesRefVector(vector, bytes);
+        } else {
+            return null;
+        }
+    }
+
+    @Override
+    public BytesRefBlock filter(int... positions) {
+        if (positions.length * ordinals.getTotalValueCount() >= bytes.getPositionCount() * ordinals.getPositionCount()) {
+            OrdinalBytesRefBlock result = null;
+            IntBlock filteredOrdinals = ordinals.filter(positions);
+            try {
+                result = new OrdinalBytesRefBlock(filteredOrdinals, bytes);
+                bytes.incRef();
+            } finally {
+                if (result == null) {
+                    filteredOrdinals.close();
+                }
+            }
+            return result;
+        } else {
+            // TODO: merge this BytesRefArrayBlock#filter
+            BytesRef scratch = new BytesRef();
+            try (BytesRefBlock.Builder builder = blockFactory().newBytesRefBlockBuilder(positions.length)) {
+                for (int pos : positions) {
+                    if (isNull(pos)) {
+                        builder.appendNull();
+                        continue;
+                    }
+                    int valueCount = getValueCount(pos);
+                    int first = getFirstValueIndex(pos);
+                    if (valueCount == 1) {
+                        builder.appendBytesRef(getBytesRef(getFirstValueIndex(pos), scratch));
+                    } else {
+                        builder.beginPositionEntry();
+                        for (int c = 0; c < valueCount; c++) {
+                            builder.appendBytesRef(getBytesRef(first + c, scratch));
+                        }
+                        builder.endPositionEntry();
+                    }
+                }
+                return builder.mvOrdering(mvOrdering()).build();
+            }
+        }
+    }
+
+    @Override
+    protected void closeInternal() {
+        Releasables.close(ordinals, bytes);
+    }
+
+    @Override
+    public int getTotalValueCount() {
+        return ordinals.getTotalValueCount();
+    }
+
+    @Override
+    public int getPositionCount() {
+        return ordinals.getPositionCount();
+    }
+
+    @Override
+    public int getFirstValueIndex(int position) {
+        return ordinals.getFirstValueIndex(position);
+    }
+
+    @Override
+    public int getValueCount(int position) {
+        return ordinals.getValueCount(position);
+    }
+
+    @Override
+    public ElementType elementType() {
+        return bytes.elementType();
+    }
+
+    @Override
+    public BlockFactory blockFactory() {
+        return ordinals.blockFactory();
+    }
+
+    @Override
+    public void allowPassingToDifferentDriver() {
+        ordinals.allowPassingToDifferentDriver();
+        bytes.allowPassingToDifferentDriver();
+    }
+
+    @Override
+    public boolean isNull(int position) {
+        return ordinals.isNull(position);
+    }
+
+    @Override
+    public int nullValuesCount() {
+        return ordinals.nullValuesCount();
+    }
+
+    @Override
+    public boolean mayHaveNulls() {
+        return ordinals.mayHaveNulls();
+    }
+
+    @Override
+    public boolean areAllValuesNull() {
+        return ordinals.areAllValuesNull();
+    }
+
+    @Override
+    public boolean mayHaveMultivaluedFields() {
+        return ordinals.mayHaveMultivaluedFields();
+    }
+
+    @Override
+    public MvOrdering mvOrdering() {
+        return ordinals.mvOrdering();
+    }
+
+    @Override
+    public OrdinalBytesRefBlock expand() {
+        OrdinalBytesRefBlock result = null;
+        IntBlock expandedOrdinals = ordinals.expand();
+        try {
+            result = new OrdinalBytesRefBlock(expandedOrdinals, bytes);
+            bytes.incRef();
+        } finally {
+            if (result == null) {
+                expandedOrdinals.close();
+            }
+        }
+        return result;
+    }
+
+    @Override
+    public long ramBytesUsed() {
+        return ordinals.ramBytesUsed() + bytes.ramBytesUsed();
+    }
+}

+ 129 - 0
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefVector.java

@@ -0,0 +1,129 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.core.Releasables;
+
+import java.io.IOException;
+
+/**
+ * A {@link BytesRefVector} consists of a pair: an {@link IntVector} for ordinals and a {@link BytesRefVector} for the dictionary.
+ * Compared to the regular {@link BytesRefVector}, this block is slower due to indirect access and consume more memory because of
+ * the additional ordinals vector. However, they offer significant speed improvements and reduced memory usage when byte values are
+ * frequently repeated
+ */
+public final class OrdinalBytesRefVector extends AbstractNonThreadSafeRefCounted implements BytesRefVector {
+    private final IntVector ordinals;
+    private final BytesRefVector bytes;
+
+    public OrdinalBytesRefVector(IntVector ordinals, BytesRefVector bytes) {
+        this.ordinals = ordinals;
+        this.bytes = bytes;
+    }
+
+    static OrdinalBytesRefVector readOrdinalVector(BlockFactory blockFactory, StreamInput in) throws IOException {
+        IntVector ordinals = IntVector.readFrom(blockFactory, in);
+        BytesRefVector bytes = null;
+        OrdinalBytesRefVector result = null;
+        try {
+            bytes = BytesRefVector.readFrom(blockFactory, in);
+            result = new OrdinalBytesRefVector(ordinals, bytes);
+        } finally {
+            if (result == null) {
+                Releasables.close(ordinals, bytes);
+            }
+        }
+        return result;
+    }
+
+    void writeOrdinalVector(StreamOutput out) throws IOException {
+        ordinals.writeTo(out);
+        bytes.writeTo(out);
+    }
+
+    /**
+     * Returns true if this ordinal vector is dense enough to enable optimizations using its ordinals
+     */
+    public boolean isDense() {
+        return ordinals.getPositionCount() * 2 / 3 >= bytes.getPositionCount();
+    }
+
+    @Override
+    public int getPositionCount() {
+        return ordinals.getPositionCount();
+    }
+
+    @Override
+    public BlockFactory blockFactory() {
+        return ordinals.blockFactory();
+    }
+
+    @Override
+    public void allowPassingToDifferentDriver() {
+        ordinals.allowPassingToDifferentDriver();
+        bytes.allowPassingToDifferentDriver();
+    }
+
+    @Override
+    public BytesRef getBytesRef(int position, BytesRef dest) {
+        return bytes.getBytesRef(ordinals.getInt(position), dest);
+    }
+
+    @Override
+    public BytesRefBlock asBlock() {
+        return new BytesRefVectorBlock(this);
+    }
+
+    @Override
+    public BytesRefVector filter(int... positions) {
+        if (positions.length >= ordinals.getPositionCount()) {
+            OrdinalBytesRefVector result = null;
+            IntVector filteredOrdinals = ordinals.filter(positions);
+            try {
+                result = new OrdinalBytesRefVector(filteredOrdinals, bytes);
+                bytes.incRef();
+            } finally {
+                if (result == null) {
+                    filteredOrdinals.close();
+                }
+            }
+            return result;
+        } else {
+            final BytesRef scratch = new BytesRef();
+            try (BytesRefVector.Builder builder = blockFactory().newBytesRefVectorBuilder(positions.length)) {
+                for (int p : positions) {
+                    builder.appendBytesRef(getBytesRef(p, scratch));
+                }
+                return builder.build();
+            }
+        }
+    }
+
+    @Override
+    public ElementType elementType() {
+        return bytes.elementType();
+    }
+
+    @Override
+    public boolean isConstant() {
+        return bytes.isConstant() || ordinals.isConstant();
+    }
+
+    @Override
+    public long ramBytesUsed() {
+        return ordinals.ramBytesUsed() + bytes.ramBytesUsed();
+    }
+
+    @Override
+    protected void closeInternal() {
+        Releasables.close(ordinals, bytes);
+    }
+}

+ 58 - 3
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java

@@ -12,6 +12,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
 import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
 import org.elasticsearch.index.mapper.BlockLoader;
 
 import java.io.IOException;
@@ -21,7 +22,7 @@ import java.util.Arrays;
 public class SingletonOrdinalsBuilder implements BlockLoader.SingletonOrdinalsBuilder, Releasable, Block.Builder {
     private final BlockFactory blockFactory;
     private final SortedDocValues docValues;
-    private int[] ords;
+    private final int[] ords;
     private int count;
 
     public SingletonOrdinalsBuilder(BlockFactory blockFactory, SortedDocValues docValues, int count) {
@@ -53,8 +54,53 @@ public class SingletonOrdinalsBuilder implements BlockLoader.SingletonOrdinalsBu
         throw new UnsupportedOperationException("should only have one value per doc");
     }
 
-    @Override
-    public BytesRefBlock build() {
+    BytesRefBlock buildOrdinal() {
+        int valueCount = docValues.getValueCount();
+        long breakerSize = ordsSize(valueCount);
+        blockFactory.adjustBreaker(breakerSize);
+        BytesRefVector bytesVector = null;
+        IntBlock ordinalBlock = null;
+        try {
+            int[] newOrds = new int[valueCount];
+            Arrays.fill(newOrds, -1);
+            for (int ord : ords) {
+                if (ord != -1) {
+                    newOrds[ord] = 0;
+                }
+            }
+            // resolve the ordinals and remaps the ordinals
+            int nextOrd = -1;
+            try (BytesRefVector.Builder bytesBuilder = blockFactory.newBytesRefVectorBuilder(Math.min(valueCount, ords.length))) {
+                for (int i = 0; i < newOrds.length; i++) {
+                    if (newOrds[i] != -1) {
+                        newOrds[i] = ++nextOrd;
+                        bytesBuilder.appendBytesRef(docValues.lookupOrd(i));
+                    }
+                }
+                bytesVector = bytesBuilder.build();
+            } catch (IOException e) {
+                throw new UncheckedIOException("error resolving ordinals", e);
+            }
+            try (IntBlock.Builder ordinalsBuilder = blockFactory.newIntBlockBuilder(ords.length)) {
+                for (int ord : ords) {
+                    if (ord == -1) {
+                        ordinalsBuilder.appendNull();
+                    } else {
+                        ordinalsBuilder.appendInt(newOrds[ord]);
+                    }
+                }
+                ordinalBlock = ordinalsBuilder.build();
+            }
+            final OrdinalBytesRefBlock result = new OrdinalBytesRefBlock(ordinalBlock, bytesVector);
+            bytesVector = null;
+            ordinalBlock = null;
+            return result;
+        } finally {
+            Releasables.close(() -> blockFactory.adjustBreaker(-breakerSize), ordinalBlock, bytesVector);
+        }
+    }
+
+    BytesRefBlock buildRegularBlock() {
         try {
             long breakerSize = ordsSize(ords.length);
             // Increment breaker for sorted ords.
@@ -105,6 +151,15 @@ public class SingletonOrdinalsBuilder implements BlockLoader.SingletonOrdinalsBu
         }
     }
 
+    @Override
+    public BytesRefBlock build() {
+        if (ords.length >= 2 * docValues.getValueCount() && ords.length >= 32) {
+            return buildOrdinal();
+        } else {
+            return buildRegularBlock();
+        }
+    }
+
     @Override
     public void close() {
         blockFactory.adjustBreaker(-ordsSize(ords.length));

+ 1 - 0
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Vector.java

@@ -80,4 +80,5 @@ public interface Vector extends Accountable, RefCounted, Releasable {
     byte SERIALIZE_VECTOR_CONSTANT = 1;
     byte SERIALIZE_VECTOR_ARRAY = 2;
     byte SERIALIZE_VECTOR_BIG_ARRAY = 3;
+    byte SERIALIZE_VECTOR_ORDINAL = 4;
 }

+ 15 - 4
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st

@@ -22,7 +22,12 @@ import java.io.IOException;
  * Block that stores $type$ values.
  * This class is generated. Do not edit it.
  */
-public sealed interface $Type$Block extends Block permits $Type$ArrayBlock, $Type$VectorBlock, ConstantNullBlock$if(BytesRef)$$else$, $Type$BigArrayBlock$endif$ {
+$if(BytesRef)$
+public sealed interface BytesRefBlock extends Block permits BytesRefArrayBlock, BytesRefVectorBlock, ConstantNullBlock,
+    OrdinalBytesRefBlock {
+$else$
+public sealed interface $Type$Block extends Block permits $Type$ArrayBlock, $Type$VectorBlock, ConstantNullBlock, $Type$BigArrayBlock {
+$endif$
 
 $if(BytesRef)$
     BytesRef NULL_VALUE = new BytesRef();
@@ -67,13 +72,15 @@ $endif$
         return readFrom((BlockStreamInput) in);
     }
 
-    private static $Type$Block readFrom(BlockStreamInput in) throws IOException {
+    static $Type$Block readFrom(BlockStreamInput in) throws IOException {
         final byte serializationType = in.readByte();
         return switch (serializationType) {
             case SERIALIZE_BLOCK_VALUES -> $Type$Block.readValues(in);
             case SERIALIZE_BLOCK_VECTOR -> $Type$Vector.readFrom(in.blockFactory(), in).asBlock();
             case SERIALIZE_BLOCK_ARRAY -> $Type$ArrayBlock.readArrayBlock(in.blockFactory(), in);
-$if(BytesRef)$$else$
+$if(BytesRef)$
+            case SERIALIZE_BLOCK_ORDINAL -> OrdinalBytesRefBlock.readOrdinalBlock(in.blockFactory(), in);
+$else$
             case SERIALIZE_BLOCK_BIG_ARRAY -> $Type$BigArrayBlock.readArrayBlock(in.blockFactory(), in);
 $endif$
             default -> {
@@ -112,7 +119,11 @@ $endif$
         } else if (version.onOrAfter(TransportVersions.ESQL_SERIALIZE_ARRAY_BLOCK) && this instanceof $Type$ArrayBlock b) {
             out.writeByte(SERIALIZE_BLOCK_ARRAY);
             b.writeArrayBlock(out);
-$if(BytesRef)$$else$
+$if(BytesRef)$
+        } else if (version.onOrAfter(TransportVersions.ESQL_ORDINAL_BLOCK) && this instanceof OrdinalBytesRefBlock b && b.isDense()) {
+            out.writeByte(SERIALIZE_BLOCK_ORDINAL);
+            b.writeOrdinalBlock(out);
+$else$
         } else if (version.onOrAfter(TransportVersions.ESQL_SERIALIZE_BIG_ARRAY) && this instanceof $Type$BigArrayBlock b) {
             out.writeByte(SERIALIZE_BLOCK_BIG_ARRAY);
             b.writeArrayBlock(out);

+ 10 - 3
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st

@@ -21,7 +21,8 @@ import java.io.IOException;
  * This class is generated. Do not edit it.
  */
 $if(BytesRef)$
-public sealed interface $Type$Vector extends Vector permits Constant$Type$Vector, $Type$ArrayVector, ConstantNullVector {
+public sealed interface BytesRefVector extends Vector permits ConstantBytesRefVector, BytesRefArrayVector, ConstantNullVector,
+    OrdinalBytesRefVector {
 $elseif(boolean)$
 public sealed interface $Type$Vector extends Vector permits Constant$Type$Vector, $Type$ArrayVector, $Type$BigArrayVector,
     ConstantNullVector {
@@ -114,7 +115,9 @@ $endif$
             case SERIALIZE_VECTOR_VALUES -> readValues(positions, in, blockFactory);
             case SERIALIZE_VECTOR_CONSTANT -> blockFactory.newConstant$Type$Vector(in.read$Type$(), positions);
             case SERIALIZE_VECTOR_ARRAY -> $Type$ArrayVector.readArrayVector(positions, in, blockFactory);
-$if(BytesRef)$$else$
+$if(BytesRef)$
+            case SERIALIZE_VECTOR_ORDINAL -> OrdinalBytesRefVector.readOrdinalVector(blockFactory, in);
+$else$
             case SERIALIZE_VECTOR_BIG_ARRAY -> $Type$BigArrayVector.readArrayVector(positions, in, blockFactory);
 $endif$
             default -> {
@@ -139,7 +142,11 @@ $endif$
         } else if (version.onOrAfter(TransportVersions.ESQL_SERIALIZE_ARRAY_VECTOR) && this instanceof $Type$ArrayVector v) {
             out.writeByte(SERIALIZE_VECTOR_ARRAY);
             v.writeArrayVector(positions, out);
-$if(BytesRef)$$else$
+$if(BytesRef)$
+        } else if (version.onOrAfter(TransportVersions.ESQL_ORDINAL_BLOCK) && this instanceof OrdinalBytesRefVector v && v.isDense()) {
+            out.writeByte(SERIALIZE_VECTOR_ORDINAL);
+            v.writeOrdinalVector(out);
+$else$
         } else if (version.onOrAfter(TransportVersions.ESQL_SERIALIZE_BIG_VECTOR) && this instanceof $Type$BigArrayVector v) {
             out.writeByte(SERIALIZE_VECTOR_BIG_ARRAY);
             v.writeArrayVector(positions, out);

+ 102 - 0
x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockSerializationTests.java

@@ -12,6 +12,7 @@ import org.elasticsearch.common.breaker.CircuitBreaker;
 import org.elasticsearch.common.breaker.CircuitBreakingException;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BytesRefHash;
 import org.elasticsearch.common.util.MockBigArrays;
 import org.elasticsearch.common.util.PageCacheRecycler;
 import org.elasticsearch.compute.aggregation.SumLongAggregatorFunction;
@@ -234,6 +235,107 @@ public class BlockSerializationTests extends SerializationTestCase {
         }
     }
 
+    public void testOrdinalVector() throws Exception {
+        int numValues = randomIntBetween(1, 1000);
+        BlockFactory blockFactory = driverContext().blockFactory();
+        BytesRef scratch = new BytesRef();
+        try (
+            BytesRefVector.Builder regular = blockFactory.newBytesRefVectorBuilder(between(1, numValues * 3));
+            BytesRefHash hash = new BytesRefHash(1, blockFactory.bigArrays());
+            IntVector.Builder ordinals = blockFactory.newIntVectorBuilder(between(1, numValues * 3));
+            BytesRefVector.Builder dictionary = blockFactory.newBytesRefVectorBuilder(between(1, numValues * 3));
+        ) {
+            BytesRef v = new BytesRef("value-" + randomIntBetween(1, 20));
+            int ord = Math.toIntExact(hash.add(v));
+            ord = ord < 0 ? -1 - ord : ord;
+            ordinals.appendInt(ord);
+            regular.appendBytesRef(v);
+            for (long l = 0; l < hash.size(); l++) {
+                dictionary.appendBytesRef(hash.get(l, scratch));
+            }
+            try (BytesRefVector v1 = regular.build(); BytesRefVector v2 = new OrdinalBytesRefVector(ordinals.build(), dictionary.build())) {
+                BytesRefVector.equals(v1, v2);
+                for (BytesRefVector vector : List.of(v1, v2)) {
+                    try (BytesRefBlock deserBlock = serializeDeserializeBlock(vector.asBlock())) {
+                        EqualsHashCodeTestUtils.checkEqualsAndHashCode(deserBlock, unused -> deserBlock);
+                    }
+                }
+                for (int p = 0; p < v1.getPositionCount(); p++) {
+                    try (BytesRefVector f1 = v1.filter(p); BytesRefVector f2 = v2.filter(p)) {
+                        BytesRefVector.equals(f1, f2);
+                        for (BytesRefVector vector : List.of(f1, f2)) {
+                            try (BytesRefBlock deserBlock = serializeDeserializeBlock(vector.asBlock())) {
+                                EqualsHashCodeTestUtils.checkEqualsAndHashCode(deserBlock, unused -> deserBlock);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    public void testOrdinalBlock() throws Exception {
+        int numValues = randomIntBetween(1, 1000);
+        BlockFactory blockFactory = driverContext().blockFactory();
+        BytesRef scratch = new BytesRef();
+        try (
+            BytesRefBlock.Builder regular = blockFactory.newBytesRefBlockBuilder(between(1, numValues * 3));
+            BytesRefHash hash = new BytesRefHash(1, blockFactory.bigArrays());
+            IntBlock.Builder ordinals = blockFactory.newIntBlockBuilder(between(1, numValues * 3));
+            BytesRefVector.Builder dictionary = blockFactory.newBytesRefVectorBuilder(between(1, numValues * 3));
+        ) {
+            int valueCount = randomIntBetween(0, 3);
+            if (valueCount == 0) {
+                regular.appendNull();
+                ordinals.appendNull();
+            }
+            if (valueCount > 1) {
+                regular.beginPositionEntry();
+                ordinals.beginPositionEntry();
+            }
+            for (int v = 0; v < valueCount; v++) {
+                BytesRef bytes = new BytesRef("value-" + randomIntBetween(1, 20));
+                int ord = Math.toIntExact(hash.add(bytes));
+                ord = ord < 0 ? -1 - ord : ord;
+                ordinals.appendInt(ord);
+                regular.appendBytesRef(bytes);
+            }
+            if (valueCount > 1) {
+                regular.endPositionEntry();
+                ordinals.endPositionEntry();
+            }
+            for (long l = 0; l < hash.size(); l++) {
+                dictionary.appendBytesRef(hash.get(l, scratch));
+            }
+            try (BytesRefBlock b1 = regular.build(); BytesRefBlock b2 = new OrdinalBytesRefBlock(ordinals.build(), dictionary.build())) {
+                BytesRefBlock.equals(b1, b2);
+                for (BytesRefBlock block : List.of(b1, b2)) {
+                    try (BytesRefBlock deserBlock = serializeDeserializeBlock(block)) {
+                        EqualsHashCodeTestUtils.checkEqualsAndHashCode(deserBlock, unused -> deserBlock);
+                    }
+                }
+                for (int p = 0; p < b1.getPositionCount(); p++) {
+                    try (BytesRefBlock f1 = b1.filter(p); BytesRefBlock f2 = b2.filter(p)) {
+                        BytesRefBlock.equals(f1, f2);
+                        for (BytesRefBlock block : List.of(f1, f2)) {
+                            try (BytesRefBlock deserBlock = serializeDeserializeBlock(block)) {
+                                EqualsHashCodeTestUtils.checkEqualsAndHashCode(deserBlock, unused -> deserBlock);
+                            }
+                        }
+                    }
+                }
+                try (BytesRefBlock e1 = b1.expand(); BytesRefBlock e2 = b2.expand()) {
+                    BytesRefBlock.equals(e1, e2);
+                    for (BytesRefBlock block : List.of(e1, e2)) {
+                        try (BytesRefBlock deserBlock = serializeDeserializeBlock(block)) {
+                            EqualsHashCodeTestUtils.checkEqualsAndHashCode(deserBlock, unused -> deserBlock);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
     static BytesRef randomBytesRef() {
         return new BytesRef(randomAlphaOfLengthBetween(0, 10));
     }

+ 10 - 2
x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilderTests.java

@@ -74,7 +74,7 @@ public class SingletonOrdinalsBuilderTests extends ESTestCase {
                                 builder.appendOrd(docValues.ordValue());
                             }
                         }
-                        try (BytesRefBlock build = builder.build()) {
+                        try (BytesRefBlock build = buildOrdinalsBuilder(builder)) {
                             for (int i = 0; i < build.getPositionCount(); i++) {
                                 counts.merge(build.getBytesRef(i, new BytesRef()).utf8ToString(), 1, (lhs, rhs) -> lhs + rhs);
                             }
@@ -141,7 +141,7 @@ public class SingletonOrdinalsBuilderTests extends ESTestCase {
                                 builder.appendNull();
                             }
                         }
-                        try (BytesRefBlock built = builder.build()) {
+                        try (BytesRefBlock built = buildOrdinalsBuilder(builder)) {
                             for (int p = 0; p < built.getPositionCount(); p++) {
                                 assertThat(built.isNull(p), equalTo(true));
                             }
@@ -153,6 +153,14 @@ public class SingletonOrdinalsBuilderTests extends ESTestCase {
         }
     }
 
+    static BytesRefBlock buildOrdinalsBuilder(SingletonOrdinalsBuilder builder) {
+        if (randomBoolean()) {
+            return builder.buildRegularBlock();
+        } else {
+            return builder.buildOrdinal();
+        }
+    }
+
     @After
     public void allBreakersEmpty() throws Exception {
         // first check that all big arrays are released, which can affect breakers