Ver código fonte

Allow trailing empty string field names in paths of flattened field (#133611) (#133655)

Flattened objects allow for keys with leading and trailing path separators (the period character .). They also allow field names consisting only of path separators. These fields names are then joined by the path separator and whole paths are stored with the associated value as key value pairs. The whole paths are then separated back into their component fields by splitting on the path separator. There is ambiguity whether a given period is meant as a path separator or part of a path name. Because of this, we assume that all periods are path separators. This means that any two adjacent periods in the whole path have an empty string field name between them. The same is true of leading and trailing periods. This is how we already handle adjacent periods that are at the front or middle of the whole path. But due to how the split function works, this is not how trailing adjacent periods are handled. By default the split function does not return trailing empty strings. By adding a negative limit to split, any trailing empty strings are now returned. Thus trailing adjacent periods are now treated the same as leading periods or periods in the middle of the string. More importantly, if the whole path consists of periods, the resulting path string was previously empty, causing an ArrayOutOfBoundsException. With this change, any paths consisting solely of periods will be treated has having empty string field names before and after every period.

Fixes #130139

(cherry picked from commit 4661d06a155ad8fdabb3b103f22c7322788721ca)

# Conflicts:
#	server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java
Parker Timmins 1 mês atrás
pai
commit
4b90302c00

+ 6 - 0
docs/changelog/133611.yaml

@@ -0,0 +1,6 @@
+pr: 133611
+summary: Allow trailing empty string field names in paths of flattened field
+area: Mapping
+type: bug
+issues:
+ - 130139

+ 3 - 1
server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelper.java

@@ -116,7 +116,9 @@ public class FlattenedFieldSyntheticWriterHelper {
 
         KeyValue(final BytesRef keyValue) {
             this(
-                FlattenedFieldParser.extractKey(keyValue).utf8ToString().split(PATH_SEPARATOR_PATTERN),
+                // Splitting with a negative limit includes trailing empty strings.
+                // This is needed in case the provide path has trailing path separators.
+                FlattenedFieldParser.extractKey(keyValue).utf8ToString().split(PATH_SEPARATOR_PATTERN, -1),
                 FlattenedFieldParser.extractValue(keyValue).utf8ToString()
             );
         }

+ 50 - 0
server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java

@@ -931,6 +931,56 @@ public class FlattenedFieldMapperTests extends MapperTestCase {
             {"field":{"key1":"foo"}}"""));
     }
 
+    public void testSyntheticSourceWithMatchesInNestedPath() throws IOException {
+        DocumentMapper mapper = createSytheticSourceMapperService(
+            mapping(b -> { b.startObject("field").field("type", "flattened").endObject(); })
+        ).documentMapper();
+
+        // This test covers a scenario that previously had a bug.
+        // Since a.b.c and b.b.d have a matching middle key `b`, and b.b.d starts with a `b`,
+        // startObject was not called for the first `b` in b.b.d.
+        // For a full explanation see this comment: https://github.com/elastic/elasticsearch/pull/129600#issuecomment-3024476134
+        var syntheticSource = syntheticSource(mapper, b -> {
+            b.startObject("field");
+            {
+                b.startObject("a");
+                {
+                    b.startObject("b").field("c", "1").endObject();
+                }
+                b.endObject();
+                b.startObject("b");
+                {
+                    b.startObject("b").field("d", "2").endObject();
+                }
+                b.endObject();
+            }
+            b.endObject();
+        });
+        assertThat(syntheticSource, equalTo("""
+            {"field":{"a":{"b":{"c":"1"}},"b":{"b":{"d":"2"}}}}"""));
+    }
+
+    public void testMultipleDotsInPath() throws IOException {
+        DocumentMapper mapper = createSytheticSourceMapperService(
+            mapping(b -> { b.startObject("field").field("type", "flattened").endObject(); })
+        ).documentMapper();
+
+        var syntheticSource = syntheticSource(mapper, b -> {
+            b.startObject("field");
+            {
+                b.startObject(".");
+                {
+                    b.field(".", "bar");
+                }
+                b.endObject();
+            }
+            b.endObject();
+        });
+        // This behavior is weird to say the least. But this is the only reasonable way to interpret the meaning of the path `...`
+        assertThat(syntheticSource, equalTo("""
+            {"field":{"":{"":{"":{"":"bar"}}}}}"""));
+    }
+
     @Override
     protected boolean supportsCopyTo() {
         return false;

+ 49 - 0
server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelperTests.java

@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
@@ -246,6 +247,54 @@ public class FlattenedFieldSyntheticWriterHelperTests extends ESTestCase {
         assertEquals("{\"a\":{\"b\":{\"c\":\"10\",\"c.d\":\"20\"}}}", baos.toString(StandardCharsets.UTF_8));
     }
 
+    public void testSingleDotPath() throws IOException {
+        // GIVEN
+        final SortedSetDocValues dv = mock(SortedSetDocValues.class);
+        final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(new SortedSetSortedKeyedValues(dv));
+        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        final XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), baos);
+        final List<byte[]> bytes = Stream.of("." + '\0' + "10").map(x -> x.getBytes(StandardCharsets.UTF_8)).toList();
+        when(dv.getValueCount()).thenReturn(Long.valueOf(bytes.size()));
+        when(dv.docValueCount()).thenReturn(bytes.size());
+        for (int i = 0; i < bytes.size(); i++) {
+            when(dv.nextOrd()).thenReturn((long) i);
+            when(dv.lookupOrd(ArgumentMatchers.eq((long) i))).thenReturn(new BytesRef(bytes.get(i), 0, bytes.get(i).length));
+        }
+
+        // WHEN
+        builder.startObject();
+        writer.write(builder);
+        builder.endObject();
+        builder.flush();
+
+        // THEN
+        assertEquals("{\"\":{\"\":\"10\"}}", baos.toString(StandardCharsets.UTF_8));
+    }
+
+    public void testTrailingDotsPath() throws IOException {
+        // GIVEN
+        final SortedSetDocValues dv = mock(SortedSetDocValues.class);
+        final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(new SortedSetSortedKeyedValues(dv));
+        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        final XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), baos);
+        final List<byte[]> bytes = Stream.of("cat.." + '\0' + "10").map(x -> x.getBytes(StandardCharsets.UTF_8)).toList();
+        when(dv.getValueCount()).thenReturn(Long.valueOf(bytes.size()));
+        when(dv.docValueCount()).thenReturn(bytes.size());
+        for (int i = 0; i < bytes.size(); i++) {
+            when(dv.nextOrd()).thenReturn((long) i);
+            when(dv.lookupOrd(ArgumentMatchers.eq((long) i))).thenReturn(new BytesRef(bytes.get(i), 0, bytes.get(i).length));
+        }
+
+        // WHEN
+        builder.startObject();
+        writer.write(builder);
+        builder.endObject();
+        builder.flush();
+
+        // THEN
+        assertEquals("{\"cat\":{\"\":{\"\":\"10\"}}}", baos.toString(StandardCharsets.UTF_8));
+    }
+
     private class SortedSetSortedKeyedValues implements FlattenedFieldSyntheticWriterHelper.SortedKeyedValues {
         private final SortedSetDocValues dv;
         private int seen = 0;