Browse Source

Fix decoding of non-ascii field names in ignored source (#132018) (#132030)

When encoding an ignored source entry, we write the string length of the 
field name, not the encoded byte count; however, the decode logic treats
this encoded value as the byte length. This patch updates the decode logic
to instead properly treat the value as the string length.
Jordan Powers 2 months ago
parent
commit
09489ea33d

+ 5 - 0
docs/changelog/132018.yaml

@@ -0,0 +1,5 @@
+pr: 132018
+summary: Fix decoding of non-ascii field names in ignored source
+area: Mapping
+type: bug
+issues: []

+ 6 - 2
server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java

@@ -179,8 +179,12 @@ public class IgnoredSourceFieldMapper extends MetadataFieldMapper {
         int encodedSize = ByteUtils.readIntLE(bytes, 0);
         int nameSize = encodedSize % PARENT_OFFSET_IN_NAME_OFFSET;
         int parentOffset = encodedSize / PARENT_OFFSET_IN_NAME_OFFSET;
-        String name = new String(bytes, 4, nameSize, StandardCharsets.UTF_8);
-        BytesRef value = new BytesRef(bytes, 4 + nameSize, bytes.length - nameSize - 4);
+
+        String decoded = new String(bytes, 4, bytes.length - 4, StandardCharsets.UTF_8);
+        String name = decoded.substring(0, nameSize);
+        int nameByteCount = name.getBytes(StandardCharsets.UTF_8).length;
+
+        BytesRef value = new BytesRef(bytes, 4 + nameByteCount, bytes.length - nameByteCount - 4);
         return new NameValue(name, parentOffset, value, null);
     }
 

+ 11 - 0
server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java

@@ -10,12 +10,14 @@
 package org.elasticsearch.index.mapper;
 
 import org.apache.lucene.index.DirectoryReader;
+import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.core.CheckedConsumer;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.search.lookup.SourceFilter;
 import org.elasticsearch.test.FieldMaskingReader;
 import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.json.JsonXContent;
 import org.hamcrest.Matchers;
 import org.junit.Before;
 
@@ -123,6 +125,15 @@ public class IgnoredSourceFieldMapperTests extends MapperServiceTestCase {
         );
     }
 
+    public void testIgnoredStringFullUnicode() throws IOException {
+        String value = randomUnicodeOfCodepointLengthBetween(5, 20);
+        String fieldName = randomUnicodeOfCodepointLength(5);
+
+        String expected = Strings.toString(JsonXContent.contentBuilder().startObject().field(fieldName, value).endObject());
+
+        assertEquals(expected, getSyntheticSourceWithFieldLimit(b -> b.field(fieldName, value)));
+    }
+
     public void testIgnoredInt() throws IOException {
         int value = randomInt();
         assertEquals("{\"my_value\":" + value + "}", getSyntheticSourceWithFieldLimit(b -> b.field("my_value", value)));