Browse Source

Fix offsets not recording duplicate values (#125354)

Previously, when calculating the offsets, we just compared the values as-is 
without any loss of precision. However, when the values were saved into doc 
values and loaded in the doc values loader, they could have lost precision.
This meant that values that were not duplicates when calculating the
offsets could now be duplicates in the doc values loader. This interfered
with the de-duplication logic, causing incorrect values to be returned.

My solution is to apply the precision loss before calculating the offsets,
so that both the offsets calculation and the SortedNumericDocValues
de-duplication see the same values as duplicates.
Jordan Powers 7 months ago
parent
commit
db7317513d

+ 46 - 1
server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java

@@ -438,6 +438,11 @@ public class NumberFieldMapper extends FieldMapper {
                 }
             }
 
+            @Override
+            public long toSortableLong(Number value) {
+                return HalfFloatPoint.halfFloatToSortableShort(value.floatValue());
+            }
+
             @Override
             public IndexFieldData.Builder getFieldDataBuilder(MappedFieldType ft, ValuesSourceType valuesSourceType) {
                 return new SortedDoublesIndexFieldData.Builder(
@@ -622,6 +627,11 @@ public class NumberFieldMapper extends FieldMapper {
                 }
             }
 
+            @Override
+            public long toSortableLong(Number value) {
+                return NumericUtils.floatToSortableInt(value.floatValue());
+            }
+
             @Override
             public IndexFieldData.Builder getFieldDataBuilder(MappedFieldType ft, ValuesSourceType valuesSourceType) {
                 return new SortedDoublesIndexFieldData.Builder(
@@ -772,6 +782,11 @@ public class NumberFieldMapper extends FieldMapper {
                 }
             }
 
+            @Override
+            public long toSortableLong(Number value) {
+                return NumericUtils.doubleToSortableLong(value.doubleValue());
+            }
+
             @Override
             public IndexFieldData.Builder getFieldDataBuilder(MappedFieldType ft, ValuesSourceType valuesSourceType) {
                 return new SortedDoublesIndexFieldData.Builder(
@@ -891,6 +906,11 @@ public class NumberFieldMapper extends FieldMapper {
                 INTEGER.addFields(document, name, value, indexed, docValued, stored);
             }
 
+            @Override
+            public long toSortableLong(Number value) {
+                return INTEGER.toSortableLong(value);
+            }
+
             @Override
             Number valueForSearch(Number value) {
                 return value.byteValue();
@@ -1009,6 +1029,11 @@ public class NumberFieldMapper extends FieldMapper {
                 INTEGER.addFields(document, name, value, indexed, docValued, stored);
             }
 
+            @Override
+            public long toSortableLong(Number value) {
+                return INTEGER.toSortableLong(value);
+            }
+
             @Override
             Number valueForSearch(Number value) {
                 return value.shortValue();
@@ -1206,6 +1231,11 @@ public class NumberFieldMapper extends FieldMapper {
                 }
             }
 
+            @Override
+            public long toSortableLong(Number value) {
+                return value.intValue();
+            }
+
             @Override
             public IndexFieldData.Builder getFieldDataBuilder(MappedFieldType ft, ValuesSourceType valuesSourceType) {
                 return new SortedNumericIndexFieldData.Builder(
@@ -1358,6 +1388,11 @@ public class NumberFieldMapper extends FieldMapper {
                 }
             }
 
+            @Override
+            public long toSortableLong(Number value) {
+                return value.longValue();
+            }
+
             @Override
             public IndexFieldData.Builder getFieldDataBuilder(MappedFieldType ft, ValuesSourceType valuesSourceType) {
                 return new SortedNumericIndexFieldData.Builder(
@@ -1506,6 +1541,13 @@ public class NumberFieldMapper extends FieldMapper {
             boolean stored
         );
 
+        /**
+         * For a given {@code Number}, returns the sortable long representation that will be stored in the doc values.
+         * @param value number to convert
+         * @return sortable long representation
+         */
+        public abstract long toSortableLong(Number value);
+
         public FieldValues<Number> compile(String fieldName, Script script, ScriptCompiler compiler) {
             // only implemented for long and double fields
             throw new IllegalArgumentException("Unknown parameter [script] for mapper [" + fieldName + "]");
@@ -2140,7 +2182,10 @@ public class NumberFieldMapper extends FieldMapper {
         }
         if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.canAddIgnoredField()) {
             if (value != null) {
-                context.getOffSetContext().recordOffset(offsetsFieldName, (Comparable<?>) value);
+                // We cannot simply cast value to Comparable<> because we need to also capture the potential loss of precision that occurs
+                // when the value is stored into the doc values.
+                long sortableLongValue = type.toSortableLong(value);
+                context.getOffSetContext().recordOffset(offsetsFieldName, sortableLongValue);
             } else {
                 context.getOffSetContext().recordNull(offsetsFieldName);
             }

+ 24 - 0
server/src/test/java/org/elasticsearch/index/mapper/HalfFloatSyntheticSourceNativeArrayIntegrationTests.java

@@ -13,8 +13,32 @@ import com.carrotsearch.randomizedtesting.generators.RandomStrings;
 
 import org.apache.lucene.sandbox.document.HalfFloatPoint;
 
+import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
+
 public class HalfFloatSyntheticSourceNativeArrayIntegrationTests extends NativeArrayIntegrationTestCase {
 
+    public void testSynthesizeArray() throws Exception {
+        var inputArrayValues = new Float[][] { new Float[] { 0.78151345F, 0.6886488F, 0.6882413F } };
+        var expectedArrayValues = new Float[inputArrayValues.length][inputArrayValues[0].length];
+        for (int i = 0; i < inputArrayValues.length; i++) {
+            for (int j = 0; j < inputArrayValues[i].length; j++) {
+                expectedArrayValues[i][j] = HalfFloatPoint.sortableShortToHalfFloat(
+                    HalfFloatPoint.halfFloatToSortableShort(inputArrayValues[i][j])
+                );
+            }
+        }
+
+        var mapping = jsonBuilder().startObject()
+            .startObject("properties")
+            .startObject("field")
+            .field("type", getFieldTypeName())
+            .endObject()
+            .endObject()
+            .endObject();
+
+        verifySyntheticArray(inputArrayValues, expectedArrayValues, mapping, "_id");
+    }
+
     @Override
     protected String getFieldTypeName() {
         return "half_float";

+ 32 - 18
server/src/test/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java

@@ -259,39 +259,53 @@ public abstract class NativeArrayIntegrationTestCase extends ESSingleNodeTestCas
     }
 
     protected void verifySyntheticArray(Object[][] arrays, XContentBuilder mapping, String... expectedStoredFields) throws IOException {
+        verifySyntheticArray(arrays, arrays, mapping, expectedStoredFields);
+    }
+
+    private XContentBuilder arrayToSource(Object[] array) throws IOException {
+        var source = jsonBuilder().startObject();
+        if (array != null) {
+            source.startArray("field");
+            for (Object arrayValue : array) {
+                source.value(arrayValue);
+            }
+            source.endArray();
+        } else {
+            source.field("field").nullValue();
+        }
+        return source.endObject();
+    }
+
+    protected void verifySyntheticArray(
+        Object[][] inputArrays,
+        Object[][] expectedArrays,
+        XContentBuilder mapping,
+        String... expectedStoredFields
+    ) throws IOException {
+        assertThat(inputArrays.length, equalTo(expectedArrays.length));
+
         var indexService = createIndex(
             "test-index",
             Settings.builder().put("index.mapping.source.mode", "synthetic").put("index.mapping.synthetic_source_keep", "arrays").build(),
             mapping
         );
-        for (int i = 0; i < arrays.length; i++) {
-            var array = arrays[i];
-
+        for (int i = 0; i < inputArrays.length; i++) {
             var indexRequest = new IndexRequest("test-index");
             indexRequest.id("my-id-" + i);
-            var source = jsonBuilder().startObject();
-            if (array != null) {
-                source.startArray("field");
-                for (Object arrayValue : array) {
-                    source.value(arrayValue);
-                }
-                source.endArray();
-            } else {
-                source.field("field").nullValue();
-            }
-            source.endObject();
-            var expectedSource = Strings.toString(source);
-            indexRequest.source(source);
+            var inputSource = arrayToSource(inputArrays[i]);
+            indexRequest.source(inputSource);
             indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
             client().index(indexRequest).actionGet();
 
+            var expectedSource = arrayToSource(expectedArrays[i]);
+
             var searchRequest = new SearchRequest("test-index");
             searchRequest.source().query(new IdsQueryBuilder().addIds("my-id-" + i));
             var searchResponse = client().search(searchRequest).actionGet();
             try {
                 var hit = searchResponse.getHits().getHits()[0];
                 assertThat(hit.getId(), equalTo("my-id-" + i));
-                assertThat(hit.getSourceAsString(), equalTo(expectedSource));
+                assertThat(hit.getSourceAsString(), equalTo(Strings.toString(expectedSource)));
             } finally {
                 searchResponse.decRef();
             }
@@ -299,7 +313,7 @@ public abstract class NativeArrayIntegrationTestCase extends ESSingleNodeTestCas
 
         try (var searcher = indexService.getShard(0).acquireSearcher(getTestName())) {
             var reader = searcher.getDirectoryReader();
-            for (int i = 0; i < arrays.length; i++) {
+            for (int i = 0; i < expectedArrays.length; i++) {
                 var document = reader.storedFields().document(i);
                 // Verify that there is no ignored source:
                 Set<String> storedFieldNames = new LinkedHashSet<>(document.getFields().stream().map(IndexableField::name).toList());