3 månader sedan · cf5d40fa1f
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java
@@ -420,6 +420,7 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
 
				         private final OptimizedScalarQuantizer quantizer;
			
 
				         private final byte[] quantizedVector;
			
 
				         private final int[] quantizedVectorScratch;
			
 
				+        private final float[] floatVectorScratch;
			
 
				         private OptimizedScalarQuantizer.QuantizationResult corrections;
			
 
				         private float[] currentCentroid;
			
 
				         private IntToIntFunction ordTransformer = null;
			
@@ -430,6 +431,7 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
 
				             this.vectorValues = vectorValues;
			
 
				             this.quantizer = quantizer;
			
 
				             this.quantizedVector = new byte[BQVectorUtils.discretize(dimension, 64) / 8];
			
 
				+            this.floatVectorScratch = new float[dimension];
			
 
				             this.quantizedVectorScratch = new int[dimension];
			
 
				             this.corrections = null;
			
 
				         }
			
@@ -454,7 +456,10 @@ public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
 
				             currOrd++;
			
 
				             int ord = ordTransformer.apply(currOrd);
			
 
				             float[] vector = vectorValues.vectorValue(ord);
			
 
				-            corrections = quantizer.scalarQuantize(vector, quantizedVectorScratch, (byte) 1, currentCentroid);
			
 
				+            // Its possible that the vectors are on-heap and we cannot mutate them as we may quantize twice
			
 
				+            // due to overspill, so we copy the vector to a scratch array
			
 
				+            System.arraycopy(vector, 0, floatVectorScratch, 0, vector.length);
			
 
				+            corrections = quantizer.scalarQuantize(floatVectorScratch, quantizedVectorScratch, (byte) 1, currentCentroid);
			
 
				             BQVectorUtils.packAsBinary(quantizedVectorScratch, quantizedVector);
			
 
				             return quantizedVector;
			
 
				         }
			
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/HierarchicalKMeans.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/HierarchicalKMeans.java
@@ -57,10 +57,20 @@ public class HierarchicalKMeans {
 
				             return new KMeansIntermediate();
			
 
				         }
			
 
				 
			
 
				-        // if we have a small number of vectors pick one and output that as the centroid
			
 
				+        // if we have a small number of vectors calculate the centroid directly
			
 
				         if (vectors.size() <= targetSize) {
			
 
				             float[] centroid = new float[dimension];
			
 
				-            System.arraycopy(vectors.vectorValue(0), 0, centroid, 0, dimension);
			
 
				+            // sum the vectors
			
 
				+            for (int i = 0; i < vectors.size(); i++) {
			
 
				+                float[] vector = vectors.vectorValue(i);
			
 
				+                for (int j = 0; j < dimension; j++) {
			
 
				+                    centroid[j] += vector[j];
			
 
				+                }
			
 
				+            }
			
 
				+            // average the vectors
			
 
				+            for (int j = 0; j < dimension; j++) {
			
 
				+                centroid[j] /= vectors.size();
			
 
				+            }
			
 
				             return new KMeansIntermediate(new float[][] { centroid }, new int[vectors.size()]);
			
 
				         }