Browse Source

Support synthetic source for scaled_float and unsigned_long when ignore_malformed is used (#109506)

Oleksandr Kolomiiets 1 year ago
parent
commit
c847235ed0

+ 6 - 0
docs/changelog/109506.yaml

@@ -0,0 +1,6 @@
+pr: 109506
+summary: Support synthetic source for `scaled_float` and `unsigned_long` when `ignore_malformed`
+  is used
+area: Mapping
+type: enhancement
+issues: []

+ 2 - 2
docs/reference/mapping/types/numeric.asciidoc

@@ -249,9 +249,9 @@ be changed or removed in a future release. Elastic will work to fix
 any issues, but features in technical preview are not subject to the support SLA
 of official GA features.
 
-All numeric fields except `unsigned_long` support <<synthetic-source,synthetic
+All numeric fields support <<synthetic-source,synthetic
 `_source`>> in their default configuration. Synthetic `_source` cannot be used
-together with <<ignore-malformed,`ignore_malformed`>>, <<copy-to,`copy_to`>>, or
+together with <<copy-to,`copy_to`>>, or
 with <<doc-values,`doc_values`>> disabled.
 
 Synthetic source always sorts numeric fields. For example:

+ 20 - 6
modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java

@@ -33,6 +33,7 @@ import org.elasticsearch.index.mapper.BlockLoader;
 import org.elasticsearch.index.mapper.BlockSourceReader;
 import org.elasticsearch.index.mapper.DocumentParserContext;
 import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.IgnoreMalformedStoredValues;
 import org.elasticsearch.index.mapper.MapperBuilderContext;
 import org.elasticsearch.index.mapper.NumberFieldMapper;
 import org.elasticsearch.index.mapper.SimpleMappedFieldType;
@@ -196,7 +197,14 @@ public class ScaledFloatFieldMapper extends FieldMapper {
                 metric.getValue(),
                 indexMode
             );
-            return new ScaledFloatFieldMapper(name(), type, multiFieldsBuilder.build(this, context), copyTo, this);
+            return new ScaledFloatFieldMapper(
+                name(),
+                type,
+                multiFieldsBuilder.build(this, context),
+                copyTo,
+                context.isSourceSynthetic(),
+                this
+            );
         }
     }
 
@@ -452,6 +460,7 @@ public class ScaledFloatFieldMapper extends FieldMapper {
     private final boolean stored;
     private final Double nullValue;
     private final double scalingFactor;
+    private final boolean isSourceSynthetic;
 
     private final boolean ignoreMalformedByDefault;
     private final boolean coerceByDefault;
@@ -463,9 +472,11 @@ public class ScaledFloatFieldMapper extends FieldMapper {
         ScaledFloatFieldType mappedFieldType,
         MultiFields multiFields,
         CopyTo copyTo,
+        boolean isSourceSynthetic,
         Builder builder
     ) {
         super(simpleName, mappedFieldType, multiFields, copyTo);
+        this.isSourceSynthetic = isSourceSynthetic;
         this.indexed = builder.indexed.getValue();
         this.hasDocValues = builder.hasDocValues.getValue();
         this.stored = builder.stored.getValue();
@@ -518,6 +529,10 @@ public class ScaledFloatFieldMapper extends FieldMapper {
             } catch (IllegalArgumentException e) {
                 if (ignoreMalformed.value()) {
                     context.addIgnoredField(mappedFieldType.name());
+                    if (isSourceSynthetic) {
+                        // Save a copy of the field so synthetic source can load it
+                        context.doc().add(IgnoreMalformedStoredValues.storedField(name(), context.parser()));
+                    }
                     return;
                 } else {
                     throw e;
@@ -542,6 +557,10 @@ public class ScaledFloatFieldMapper extends FieldMapper {
         if (Double.isFinite(doubleValue) == false) {
             if (ignoreMalformed.value()) {
                 context.addIgnoredField(mappedFieldType.name());
+                if (isSourceSynthetic) {
+                    // Save a copy of the field so synthetic source can load it
+                    context.doc().add(IgnoreMalformedStoredValues.storedField(name(), context.parser()));
+                }
                 return;
             } else {
                 // since we encode to a long, we have no way to carry NaNs and infinities
@@ -705,11 +724,6 @@ public class ScaledFloatFieldMapper extends FieldMapper {
                 "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it doesn't have doc values"
             );
         }
-        if (ignoreMalformed.value()) {
-            throw new IllegalArgumentException(
-                "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it ignores malformed numbers"
-            );
-        }
         if (copyTo.copyToFields().isEmpty() != true) {
             throw new IllegalArgumentException(
                 "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"

+ 47 - 17
modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapperTests.java

@@ -13,7 +13,6 @@ import org.apache.lucene.index.IndexableField;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.bytes.BytesReference;
-import org.elasticsearch.core.Tuple;
 import org.elasticsearch.index.IndexMode;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.mapper.DocumentMapper;
@@ -38,7 +37,10 @@ import java.io.IOException;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
+import java.util.Map;
 import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.stream.Stream;
 
 import static java.util.Collections.singletonList;
 import static org.hamcrest.Matchers.containsString;
@@ -239,7 +241,8 @@ public class ScaledFloatFieldMapperTests extends NumberFieldMapperTests {
             exampleMalformedValue("a").errorMatches("For input string: \"a\""),
             exampleMalformedValue("NaN").errorMatches("[scaled_float] only supports finite values, but got [NaN]"),
             exampleMalformedValue("Infinity").errorMatches("[scaled_float] only supports finite values, but got [Infinity]"),
-            exampleMalformedValue("-Infinity").errorMatches("[scaled_float] only supports finite values, but got [-Infinity]")
+            exampleMalformedValue("-Infinity").errorMatches("[scaled_float] only supports finite values, but got [-Infinity]"),
+            exampleMalformedValue(b -> b.value(true)).errorMatches("Current token (VALUE_TRUE) not numeric")
         );
     }
 
@@ -361,35 +364,63 @@ public class ScaledFloatFieldMapperTests extends NumberFieldMapperTests {
 
     @Override
     protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) {
-        assumeFalse("scaled_float doesn't support ignore_malformed with synthetic _source", ignoreMalformed);
-        return new ScaledFloatSyntheticSourceSupport();
+        return new ScaledFloatSyntheticSourceSupport(ignoreMalformed);
     }
 
     private static class ScaledFloatSyntheticSourceSupport implements SyntheticSourceSupport {
+        private final boolean ignoreMalformedEnabled;
         private final double scalingFactor = randomDoubleBetween(0, Double.MAX_VALUE, false);
         private final Double nullValue = usually() ? null : round(randomValue());
 
+        private ScaledFloatSyntheticSourceSupport(boolean ignoreMalformedEnabled) {
+            this.ignoreMalformedEnabled = ignoreMalformedEnabled;
+        }
+
         @Override
         public SyntheticSourceExample example(int maxValues) {
             if (randomBoolean()) {
-                Tuple<Double, Double> v = generateValue();
-                return new SyntheticSourceExample(v.v1(), v.v2(), roundDocValues(v.v2()), this::mapping);
+                Value v = generateValue();
+                if (v.malformedOutput == null) {
+                    return new SyntheticSourceExample(v.input, v.output, roundDocValues(v.output), this::mapping);
+                }
+                return new SyntheticSourceExample(v.input, v.malformedOutput, null, this::mapping);
             }
-            List<Tuple<Double, Double>> values = randomList(1, maxValues, this::generateValue);
-            List<Double> in = values.stream().map(Tuple::v1).toList();
-            List<Double> outList = values.stream().map(Tuple::v2).sorted().toList();
+            List<Value> values = randomList(1, maxValues, this::generateValue);
+            List<Object> in = values.stream().map(Value::input).toList();
+
+            List<Double> outputFromDocValues = values.stream().filter(v -> v.malformedOutput == null).map(Value::output).sorted().toList();
+            Stream<Object> malformedOutput = values.stream().filter(v -> v.malformedOutput != null).map(Value::malformedOutput);
+
+            // Malformed values are always last in the implementation.
+            List<Object> outList = Stream.concat(outputFromDocValues.stream(), malformedOutput).toList();
             Object out = outList.size() == 1 ? outList.get(0) : outList;
-            List<Double> outBlockList = values.stream().map(v -> roundDocValues(v.v2())).sorted().toList();
+
+            List<Double> outBlockList = outputFromDocValues.stream().map(this::roundDocValues).sorted().toList();
             Object outBlock = outBlockList.size() == 1 ? outBlockList.get(0) : outBlockList;
             return new SyntheticSourceExample(in, out, outBlock, this::mapping);
         }
 
-        private Tuple<Double, Double> generateValue() {
+        private record Value(Object input, Double output, Object malformedOutput) {}
+
+        private Value generateValue() {
             if (nullValue != null && randomBoolean()) {
-                return Tuple.tuple(null, nullValue);
+                return new Value(null, nullValue, null);
+            }
+            // Examples in #exampleMalformedValues() are also tested with synthetic source
+            // so this is not an exhaustive list.
+            // Here we mostly want to verify behavior of arrays that contain malformed
+            // values since there are modifications specific to synthetic source.
+            if (ignoreMalformedEnabled && randomBoolean()) {
+                List<Supplier<Object>> choices = List.of(
+                    () -> randomAlphaOfLengthBetween(1, 10),
+                    () -> Map.of(randomAlphaOfLengthBetween(1, 10), randomAlphaOfLengthBetween(1, 10)),
+                    () -> List.of(randomAlphaOfLengthBetween(1, 10), randomAlphaOfLengthBetween(1, 10))
+                );
+                var malformedInput = randomFrom(choices).get();
+                return new Value(malformedInput, null, malformedInput);
             }
             double d = randomValue();
-            return Tuple.tuple(d, round(d));
+            return new Value(d, round(d), null);
         }
 
         private double round(double d) {
@@ -433,6 +464,9 @@ public class ScaledFloatFieldMapperTests extends NumberFieldMapperTests {
             if (rarely()) {
                 b.field("store", false);
             }
+            if (ignoreMalformedEnabled) {
+                b.field("ignore_malformed", true);
+            }
         }
 
         @Override
@@ -441,10 +475,6 @@ public class ScaledFloatFieldMapperTests extends NumberFieldMapperTests {
                 new SyntheticSourceInvalidExample(
                     equalTo("field [field] of type [scaled_float] doesn't support synthetic source because it doesn't have doc values"),
                     b -> b.field("type", "scaled_float").field("scaling_factor", 10).field("doc_values", false)
-                ),
-                new SyntheticSourceInvalidExample(
-                    equalTo("field [field] of type [scaled_float] doesn't support synthetic source because it ignores malformed numbers"),
-                    b -> b.field("type", "scaled_float").field("scaling_factor", 10).field("ignore_malformed", true)
                 )
             );
         }

+ 16 - 6
x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java

@@ -29,6 +29,7 @@ import org.elasticsearch.index.mapper.BlockLoader;
 import org.elasticsearch.index.mapper.BlockSourceReader;
 import org.elasticsearch.index.mapper.DocumentParserContext;
 import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.IgnoreMalformedStoredValues;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.MapperBuilderContext;
 import org.elasticsearch.index.mapper.MapperParsingException;
@@ -209,7 +210,14 @@ public class UnsignedLongFieldMapper extends FieldMapper {
                 metric.getValue(),
                 indexMode
             );
-            return new UnsignedLongFieldMapper(name(), fieldType, multiFieldsBuilder.build(this, context), copyTo, this);
+            return new UnsignedLongFieldMapper(
+                name(),
+                fieldType,
+                multiFieldsBuilder.build(this, context),
+                copyTo,
+                context.isSourceSynthetic(),
+                this
+            );
         }
     }
 
@@ -554,6 +562,7 @@ public class UnsignedLongFieldMapper extends FieldMapper {
         }
     }
 
+    private final boolean isSourceSynthetic;
     private final boolean indexed;
     private final boolean hasDocValues;
     private final boolean stored;
@@ -570,9 +579,11 @@ public class UnsignedLongFieldMapper extends FieldMapper {
         MappedFieldType mappedFieldType,
         MultiFields multiFields,
         CopyTo copyTo,
+        boolean isSourceSynthetic,
         Builder builder
     ) {
         super(simpleName, mappedFieldType, multiFields, copyTo);
+        this.isSourceSynthetic = isSourceSynthetic;
         this.indexed = builder.indexed.getValue();
         this.hasDocValues = builder.hasDocValues.getValue();
         this.stored = builder.stored.getValue();
@@ -623,6 +634,10 @@ public class UnsignedLongFieldMapper extends FieldMapper {
             } catch (IllegalArgumentException e) {
                 if (ignoreMalformed.value() && parser.currentToken().isValue()) {
                     context.addIgnoredField(mappedFieldType.name());
+                    if (isSourceSynthetic) {
+                        // Save a copy of the field so synthetic source can load it
+                        context.doc().add(IgnoreMalformedStoredValues.storedField(name(), context.parser()));
+                    }
                     return;
                 } else {
                     throw e;
@@ -757,11 +772,6 @@ public class UnsignedLongFieldMapper extends FieldMapper {
                 "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it doesn't have doc values"
             );
         }
-        if (ignoreMalformed.value()) {
-            throw new IllegalArgumentException(
-                "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it ignores malformed numbers"
-            );
-        }
         if (copyTo.copyToFields().isEmpty() != true) {
             throw new IllegalArgumentException(
                 "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"

+ 47 - 19
x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java

@@ -11,7 +11,6 @@ import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.IndexableField;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.common.Strings;
-import org.elasticsearch.core.Tuple;
 import org.elasticsearch.index.IndexMode;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.mapper.DocumentMapper;
@@ -32,7 +31,10 @@ import java.math.BigInteger;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
+import java.util.Map;
 import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.stream.Stream;
 
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.empty;
@@ -362,8 +364,7 @@ public class UnsignedLongFieldMapperTests extends WholeNumberFieldMapperTests {
 
     @Override
     protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) {
-        assumeFalse("unsigned_long doesn't support ignore_malformed with synthetic _source", ignoreMalformed);
-        return new NumberSyntheticSourceSupport();
+        return new NumberSyntheticSourceSupport(ignoreMalformed);
     }
 
     @Override
@@ -417,30 +418,61 @@ public class UnsignedLongFieldMapperTests extends WholeNumberFieldMapperTests {
 
     final class NumberSyntheticSourceSupport implements SyntheticSourceSupport {
         private final BigInteger nullValue = usually() ? null : BigInteger.valueOf(randomNonNegativeLong());
+        private final boolean ignoreMalformedEnabled;
+
+        NumberSyntheticSourceSupport(boolean ignoreMalformedEnabled) {
+            this.ignoreMalformedEnabled = ignoreMalformedEnabled;
+        }
 
         @Override
         public SyntheticSourceExample example(int maxVals) {
             if (randomBoolean()) {
-                Tuple<Object, Object> v = generateValue();
-                return new SyntheticSourceExample(v.v1(), v.v2(), this::mapping);
+                Value v = generateValue();
+                if (v.malformedOutput == null) {
+                    return new SyntheticSourceExample(v.input, v.output, this::mapping);
+                }
+                return new SyntheticSourceExample(v.input, v.malformedOutput, null, this::mapping);
             }
-            List<Tuple<Object, Object>> values = randomList(1, maxVals, this::generateValue);
-            List<Object> in = values.stream().map(Tuple::v1).toList();
-            List<Object> outList = values.stream().map(Tuple::v2).sorted().toList();
+            List<Value> values = randomList(1, maxVals, this::generateValue);
+            List<Object> in = values.stream().map(Value::input).toList();
+
+            List<BigInteger> outputFromDocValues = values.stream()
+                .filter(v -> v.malformedOutput == null)
+                .map(Value::output)
+                .sorted()
+                .toList();
+            Stream<Object> malformedOutput = values.stream().filter(v -> v.malformedOutput != null).map(Value::malformedOutput);
+
+            // Malformed values are always last in the implementation.
+            List<Object> outList = Stream.concat(outputFromDocValues.stream(), malformedOutput).toList();
             Object out = outList.size() == 1 ? outList.get(0) : outList;
-            return new SyntheticSourceExample(in, out, this::mapping);
+
+            Object outBlock = outputFromDocValues.size() == 1 ? outputFromDocValues.get(0) : outputFromDocValues;
+
+            return new SyntheticSourceExample(in, out, outBlock, this::mapping);
         }
 
-        private Tuple<Object, Object> generateValue() {
+        private record Value(Object input, BigInteger output, Object malformedOutput) {}
+
+        private Value generateValue() {
             if (nullValue != null && randomBoolean()) {
-                return Tuple.tuple(null, nullValue);
+                return new Value(null, nullValue, null);
+            }
+            if (ignoreMalformedEnabled && randomBoolean()) {
+                List<Supplier<Object>> choices = List.of(
+                    () -> randomAlphaOfLengthBetween(1, 10),
+                    () -> Map.of(randomAlphaOfLengthBetween(1, 10), randomAlphaOfLengthBetween(1, 10)),
+                    () -> List.of(randomAlphaOfLengthBetween(1, 10), randomAlphaOfLengthBetween(1, 10))
+                );
+                var malformedInput = randomFrom(choices).get();
+                return new Value(malformedInput, null, malformedInput);
             }
             long n = randomNonNegativeLong();
             BigInteger b = BigInteger.valueOf(n);
             if (b.signum() < 0) {
                 b = b.add(BigInteger.ONE.shiftLeft(64));
             }
-            return Tuple.tuple(n, b);
+            return new Value(n, b, null);
         }
 
         private void mapping(XContentBuilder b) throws IOException {
@@ -454,6 +486,9 @@ public class UnsignedLongFieldMapperTests extends WholeNumberFieldMapperTests {
             if (rarely()) {
                 b.field("store", false);
             }
+            if (ignoreMalformedEnabled) {
+                b.field("ignore_malformed", "true");
+            }
         }
 
         @Override
@@ -465,13 +500,6 @@ public class UnsignedLongFieldMapperTests extends WholeNumberFieldMapperTests {
                         minimalMapping(b);
                         b.field("doc_values", false);
                     }
-                ),
-                new SyntheticSourceInvalidExample(
-                    matchesPattern("field \\[field] of type \\[.+] doesn't support synthetic source because it ignores malformed numbers"),
-                    b -> {
-                        minimalMapping(b);
-                        b.field("ignore_malformed", true);
-                    }
                 )
             );
         }