11 months ago · 94498b4b41
--- a/docs/changelog/115655.yaml
+++ b/docs/changelog/115655.yaml
@@ -0,0 +1,5 @@
 
				+pr: 115655
			
 
				+summary: Better sizing `BytesRef` for Strings in Queries
			
 
				+area: Search
			
 
				+type: enhancement
			
 
				+issues: []
			
--- a/server/src/main/java/org/elasticsearch/common/lucene/BytesRefs.java
+++ b/server/src/main/java/org/elasticsearch/common/lucene/BytesRefs.java
@@ -11,6 +11,7 @@ package org.elasticsearch.common.lucene;
 
				 
			
 
				 import org.apache.lucene.index.IndexWriter;
			
 
				 import org.apache.lucene.util.BytesRef;
			
 
				+import org.apache.lucene.util.UnicodeUtil;
			
 
				 
			
 
				 public class BytesRefs {
			
 
				 
			
@@ -56,6 +57,25 @@ public class BytesRefs {
 
				         return input;
			
 
				     }
			
 
				 
			
 
				+    /**
			
 
				+     * Converts a given string to a {@link BytesRef} object with an exactly sized byte array.
			
 
				+     * <p>
			
 
				+     * This method alternative method to the standard {@link BytesRef} constructor's allocates the
			
 
				+     * exact byte array size needed for the string. This is done by parsing the UTF-16 string two
			
 
				+     * times the first to estimate the array length and the second to copy the string value inside
			
 
				+     * the array.
			
 
				+     * </p>
			
 
				+     *
			
 
				+     * @param s the input string to convert
			
 
				+     * @return a BytesRef object representing the input string
			
 
				+     */
			
 
				+    public static BytesRef toExactSizedBytesRef(String s) {
			
 
				+        int l = s.length();
			
 
				+        byte[] b = new byte[UnicodeUtil.calcUTF16toUTF8Length(s, 0, l)];
			
 
				+        UnicodeUtil.UTF16toUTF8(s, 0, l, b);
			
 
				+        return new BytesRef(b, 0, b.length);
			
 
				+    }
			
 
				+
			
 
				     /**
			
 
				      * Produces a UTF-string prefix of the input BytesRef.  If the prefix cutoff would produce
			
 
				      * ill-formed UTF, it falls back to the hexadecimal representation.
			
@@ -70,5 +90,4 @@ public class BytesRefs {
 
				             return prefix.toString();
			
 
				         }
			
 
				     }
			
 
				-
			
 
				 }
			
--- a/server/src/main/java/org/elasticsearch/index/query/AbstractQueryBuilder.java
+++ b/server/src/main/java/org/elasticsearch/index/query/AbstractQueryBuilder.java
@@ -216,12 +216,12 @@ public abstract class AbstractQueryBuilder<QB extends AbstractQueryBuilder<QB>>
 
				      * @return the same input object or a {@link BytesRef} representation if input was of type string
			
 
				      */
			
 
				     static Object maybeConvertToBytesRef(Object obj) {
			
 
				-        if (obj instanceof String) {
			
 
				-            return BytesRefs.checkIndexableLength(BytesRefs.toBytesRef(obj));
			
 
				-        } else if (obj instanceof CharBuffer) {
			
 
				-            return BytesRefs.checkIndexableLength(new BytesRef((CharBuffer) obj));
			
 
				-        } else if (obj instanceof BigInteger) {
			
 
				-            return BytesRefs.toBytesRef(obj);
			
 
				+        if (obj instanceof String v) {
			
 
				+            return BytesRefs.checkIndexableLength(BytesRefs.toExactSizedBytesRef(v));
			
 
				+        } else if (obj instanceof CharBuffer v) {
			
 
				+            return BytesRefs.checkIndexableLength(new BytesRef(v));
			
 
				+        } else if (obj instanceof BigInteger v) {
			
 
				+            return BytesRefs.toBytesRef(v);
			
 
				         }
			
 
				         return obj;
			
 
				     }
			
--- a/server/src/test/java/org/elasticsearch/index/query/AbstractQueryBuilderTests.java
+++ b/server/src/test/java/org/elasticsearch/index/query/AbstractQueryBuilderTests.java
@@ -10,6 +10,7 @@
 
				 package org.elasticsearch.index.query;
			
 
				 
			
 
				 import org.apache.lucene.index.IndexWriter;
			
 
				+import org.apache.lucene.util.BytesRef;
			
 
				 import org.elasticsearch.common.ParsingException;
			
 
				 import org.elasticsearch.common.settings.Settings;
			
 
				 import org.elasticsearch.search.SearchModule;
			
@@ -93,4 +94,25 @@ public class AbstractQueryBuilderTests extends ESTestCase {
 
				         assertThat(e.getMessage(), containsString("term starting with [aaaaa"));
			
 
				     }
			
 
				 
			
 
				+    public void testMaybeConvertToBytesRefStringCorrectSize() {
			
 
				+        int capacity = randomIntBetween(20, 40);
			
 
				+        StringBuilder termBuilder = new StringBuilder(capacity);
			
 
				+        int correctSize = 0;
			
 
				+        for (int i = 0; i < capacity; i++) {
			
 
				+            if (i < capacity / 3) {
			
 
				+                termBuilder.append((char) randomIntBetween(0, 127));
			
 
				+                ++correctSize; // use only one byte for char < 128
			
 
				+            } else if (i < 2 * capacity / 3) {
			
 
				+                termBuilder.append((char) randomIntBetween(128, 2047));
			
 
				+                correctSize += 2; // use two bytes for char < 2048
			
 
				+            } else {
			
 
				+                termBuilder.append((char) randomIntBetween(2048, 4092));
			
 
				+                correctSize += 3; // use three bytes for char >= 2048
			
 
				+            }
			
 
				+        }
			
 
				+        BytesRef bytesRef = (BytesRef) AbstractQueryBuilder.maybeConvertToBytesRef(termBuilder.toString());
			
 
				+        assertEquals(correctSize, bytesRef.bytes.length);
			
 
				+        assertEquals(correctSize, bytesRef.length);
			
 
				+    }
			
 
				+
			
 
				 }