|
@@ -24,7 +24,6 @@ import org.elasticsearch.core.Releasables;
|
|
|
import org.elasticsearch.search.sort.BucketedSort;
|
|
|
import org.elasticsearch.search.sort.SortOrder;
|
|
|
|
|
|
-import java.util.Arrays;
|
|
|
import java.util.stream.IntStream;
|
|
|
import java.util.stream.LongStream;
|
|
|
|
|
@@ -122,7 +121,7 @@ public class BytesRefBucketedSort implements Releasable {
|
|
|
if (common.inHeapMode(bucket)) {
|
|
|
if (betterThan(value, values.get(rootIndex).bytesRefView())) {
|
|
|
clearedBytesAt(rootIndex).append(value);
|
|
|
- downHeap(rootIndex, 0);
|
|
|
+ downHeap(rootIndex, 0, common.bucketSize);
|
|
|
}
|
|
|
checkInvariant(bucket);
|
|
|
return;
|
|
@@ -138,7 +137,7 @@ public class BytesRefBucketedSort implements Releasable {
|
|
|
clearedBytesAt(index).append(value);
|
|
|
if (next == 0) {
|
|
|
common.enableHeapMode(bucket);
|
|
|
- heapify(rootIndex);
|
|
|
+ heapify(rootIndex, common.bucketSize);
|
|
|
} else {
|
|
|
ByteUtils.writeIntLE(next - 1, values.get(rootIndex).bytes(), 0);
|
|
|
}
|
|
@@ -182,9 +181,6 @@ public class BytesRefBucketedSort implements Releasable {
|
|
|
return blockFactory.newConstantNullBlock(selected.getPositionCount());
|
|
|
}
|
|
|
|
|
|
- // Used to sort the values in the bucket.
|
|
|
- BytesRef[] bucketValues = new BytesRef[common.bucketSize];
|
|
|
-
|
|
|
try (var builder = blockFactory.newBytesRefBlockBuilder(selected.getPositionCount())) {
|
|
|
for (int s = 0; s < selected.getPositionCount(); s++) {
|
|
|
int bucket = selected.getInt(s);
|
|
@@ -212,25 +208,18 @@ public class BytesRefBucketedSort implements Releasable {
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
- for (int i = 0; i < size; i++) {
|
|
|
- try (BreakingBytesRefBuilder bytes = values.get(start + i)) {
|
|
|
- bucketValues[i] = bytes.bytesRefView();
|
|
|
- }
|
|
|
- values.set(start + i, null);
|
|
|
+ // If we are in the gathering mode, we need to heapify before sorting.
|
|
|
+ if (common.inHeapMode(bucket) == false) {
|
|
|
+ heapify(start, (int) size);
|
|
|
}
|
|
|
-
|
|
|
- // TODO: Make use of heap structures to faster iterate in order instead of copying and sorting
|
|
|
- Arrays.sort(bucketValues, 0, (int) size);
|
|
|
+ heapSort(start, (int) size);
|
|
|
|
|
|
builder.beginPositionEntry();
|
|
|
- if (common.order == SortOrder.ASC) {
|
|
|
- for (int i = 0; i < size; i++) {
|
|
|
- builder.appendBytesRef(bucketValues[i]);
|
|
|
- }
|
|
|
- } else {
|
|
|
- for (int i = (int) size - 1; i >= 0; i--) {
|
|
|
- builder.appendBytesRef(bucketValues[i]);
|
|
|
+ for (int i = 0; i < size; i++) {
|
|
|
+ try (BreakingBytesRefBuilder bytes = values.get(start + i)) {
|
|
|
+ builder.appendBytesRef(bytes.bytesRefView());
|
|
|
}
|
|
|
+ values.set(start + i, null);
|
|
|
}
|
|
|
builder.endPositionEntry();
|
|
|
}
|
|
@@ -339,10 +328,28 @@ public class BytesRefBucketedSort implements Releasable {
|
|
|
* </ul>
|
|
|
* @param rootIndex the index the start of the bucket
|
|
|
*/
|
|
|
- private void heapify(long rootIndex) {
|
|
|
- int maxParent = common.bucketSize / 2 - 1;
|
|
|
+ private void heapify(long rootIndex, int heapSize) {
|
|
|
+ int maxParent = heapSize / 2 - 1;
|
|
|
for (int parent = maxParent; parent >= 0; parent--) {
|
|
|
- downHeap(rootIndex, parent);
|
|
|
+ downHeap(rootIndex, parent, heapSize);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Sorts all the values in the heap using heap sort algorithm.
|
|
|
+ * This runs in {@code O(n log n)} time.
|
|
|
+ * @param rootIndex index of the start of the bucket
|
|
|
+ * @param heapSize Number of values that belong to the heap.
|
|
|
+ * Can be less than bucketSize.
|
|
|
+ * In such a case, the remaining values in range
|
|
|
+ * (rootIndex + heapSize, rootIndex + bucketSize)
|
|
|
+ * are *not* considered part of the heap.
|
|
|
+ */
|
|
|
+ private void heapSort(long rootIndex, int heapSize) {
|
|
|
+ while (heapSize > 0) {
|
|
|
+ swap(rootIndex, rootIndex + heapSize - 1);
|
|
|
+ heapSize--;
|
|
|
+ downHeap(rootIndex, 0, heapSize);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -352,24 +359,27 @@ public class BytesRefBucketedSort implements Releasable {
|
|
|
* @param rootIndex index of the start of the bucket
|
|
|
* @param parent Index within the bucket of the parent to check.
|
|
|
* For example, 0 is the "root".
|
|
|
+ * @param heapSize Number of values that belong to the heap.
|
|
|
+ * Can be less than bucketSize.
|
|
|
+ * In such a case, the remaining values in range
|
|
|
+ * (rootIndex + heapSize, rootIndex + bucketSize)
|
|
|
+ * are *not* considered part of the heap.
|
|
|
*/
|
|
|
- private void downHeap(long rootIndex, int parent) {
|
|
|
+ private void downHeap(long rootIndex, int parent, int heapSize) {
|
|
|
while (true) {
|
|
|
long parentIndex = rootIndex + parent;
|
|
|
int worst = parent;
|
|
|
long worstIndex = parentIndex;
|
|
|
int leftChild = parent * 2 + 1;
|
|
|
long leftIndex = rootIndex + leftChild;
|
|
|
- if (leftChild < common.bucketSize) {
|
|
|
+ if (leftChild < heapSize) {
|
|
|
if (betterThan(values.get(worstIndex).bytesRefView(), values.get(leftIndex).bytesRefView())) {
|
|
|
worst = leftChild;
|
|
|
worstIndex = leftIndex;
|
|
|
}
|
|
|
int rightChild = leftChild + 1;
|
|
|
long rightIndex = rootIndex + rightChild;
|
|
|
- if (rightChild < common.bucketSize
|
|
|
- && betterThan(values.get(worstIndex).bytesRefView(), values.get(rightIndex).bytesRefView())) {
|
|
|
-
|
|
|
+ if (rightChild < heapSize && betterThan(values.get(worstIndex).bytesRefView(), values.get(rightIndex).bytesRefView())) {
|
|
|
worst = rightChild;
|
|
|
worstIndex = rightIndex;
|
|
|
}
|