Browse Source

Optimize dotCount in expanding dot parser (#135263)

This commit optimises the implementation of dotCount, in order to avoid costly walking each character. The implementation change is small, but defers to String::indexOf, which is backed by a vectorized JVM Hotspot intrinsic.

I noticed this method showing up in cpu profiles, in some cases consuming ~2% when there are even no fields with dots! After the change this mostly drops away.
Chris Hegarty 3 weeks ago
parent
commit
32ff026c8a

+ 5 - 0
docs/changelog/135263.yaml

@@ -0,0 +1,5 @@
+pr: 135263
+summary: Optimize `dotCount` in expanding dot parser
+area: "Mapping"
+type: enhancement
+issues: []

+ 3 - 4
server/src/main/java/org/elasticsearch/index/mapper/FieldTypeLookup.java

@@ -153,10 +153,9 @@ final class FieldTypeLookup {
 
     public static int dotCount(String path) {
         int dotCount = 0;
-        for (int i = 0; i < path.length(); i++) {
-            if (path.charAt(i) == '.') {
-                dotCount++;
-            }
+        int index = -1;
+        while ((index = path.indexOf('.', index + 1)) != -1) {
+            dotCount++;
         }
         return dotCount;
     }

+ 26 - 0
server/src/test/java/org/elasticsearch/index/mapper/FieldTypeLookupTests.java

@@ -14,6 +14,7 @@ import org.elasticsearch.index.mapper.flattened.FlattenedFieldMapper;
 import org.elasticsearch.test.ESTestCase;
 import org.hamcrest.Matchers;
 
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -522,6 +523,31 @@ public class FieldTypeLookupTests extends ESTestCase {
         assertEquals(foo.fieldType(), lookup.get("foo"));
     }
 
+    public void testDotCount() {
+        assertEquals(0, FieldTypeLookup.dotCount(""));
+        assertEquals(1, FieldTypeLookup.dotCount("."));
+        assertEquals(2, FieldTypeLookup.dotCount(".."));
+        assertEquals(3, FieldTypeLookup.dotCount("..."));
+        assertEquals(4, FieldTypeLookup.dotCount("...."));
+        assertEquals(0, FieldTypeLookup.dotCount("foo"));
+        assertEquals(1, FieldTypeLookup.dotCount("foo.bar"));
+        assertEquals(2, FieldTypeLookup.dotCount("foo.bar.baz"));
+        assertEquals(3, FieldTypeLookup.dotCount("foo.bar.baz.bob"));
+        assertEquals(4, FieldTypeLookup.dotCount("foo.bar.baz.bob."));
+        assertEquals(4, FieldTypeLookup.dotCount("foo..bar.baz.bob"));
+        assertEquals(5, FieldTypeLookup.dotCount("foo..bar..baz.bob"));
+        assertEquals(6, FieldTypeLookup.dotCount("foo..bar..baz.bob."));
+
+        int times = atLeast(50);
+        for (int i = 0; i < times; i++) {
+            byte[] bytes = new byte[randomInt(1024)];
+            random().nextBytes(bytes);
+            String s = new String(bytes, StandardCharsets.UTF_8);
+            int expected = s.chars().map(c -> c == '.' ? 1 : 0).sum();
+            assertEquals(expected, FieldTypeLookup.dotCount(s));
+        }
+    }
+
     @SafeVarargs
     @SuppressWarnings("varargs")
     static <T> List<T> randomizedList(T... values) {