浏览代码

[ML] Allow retrieving `boolean` fields from _source in DFA jobs (#85672)

This commit fixes a bug where `boolean` mapped fields may only
be retrieved from doc values in Data Frame Analytics jobs.

While this problem may occur for `boolean` fields with disabled
doc values, a more common way of being affected by this bug is
running an analysis against data with a very high number of fields.
If there are more fields than the `index.max_docvalue_fields_search`
setting, we fall back to retrieving the fields from _source. Then
`boolean` mapped fields would fail with the error message "cannot
apply boolean mapping to field [<field>]".
Dimitris Athanasiou 3 年之前
父节点
当前提交
76eeeaff9e

+ 5 - 0
docs/changelog/85672.yaml

@@ -0,0 +1,5 @@
+pr: 85672
+summary: Allow retrieving `boolean` fields from `_source` in DFA jobs
+area: Machine Learning
+type: bug
+issues: []

+ 40 - 8
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedFields.java

@@ -8,12 +8,13 @@ package org.elasticsearch.xpack.ml.extractor;
 
 import org.elasticsearch.action.fieldcaps.FieldCapabilities;
 import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
-import org.elasticsearch.common.document.DocumentField;
+import org.elasticsearch.core.Booleans;
 import org.elasticsearch.index.mapper.BooleanFieldMapper;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.xpack.core.ml.utils.MlStrings;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -235,39 +236,70 @@ public class ExtractedFields {
     /**
      * Makes boolean fields behave as a field of different type.
      */
-    private static final class BooleanMapper<T> extends DocValueField {
+    private static final class BooleanMapper<T> extends AbstractField {
 
         private static final Set<String> TYPES = Collections.singleton(BooleanFieldMapper.CONTENT_TYPE);
 
+        private final ExtractedField field;
         private final T trueValue;
         private final T falseValue;
 
         BooleanMapper(ExtractedField field, T trueValue, T falseValue) {
             super(field.getName(), TYPES);
-            if (field.getMethod() != Method.DOC_VALUE || field.getTypes().contains(BooleanFieldMapper.CONTENT_TYPE) == false) {
+            this.field = field;
+            if (field.getTypes().contains(BooleanFieldMapper.CONTENT_TYPE) == false) {
                 throw new IllegalArgumentException("cannot apply boolean mapping to field [" + field.getName() + "]");
             }
             this.trueValue = trueValue;
             this.falseValue = falseValue;
         }
 
+        @Override
+        public Method getMethod() {
+            return field.getMethod();
+        }
+
         @Override
         public Object[] value(SearchHit hit) {
-            DocumentField keyValue = hit.field(getName());
-            if (keyValue != null) {
-                return keyValue.getValues().stream().map(v -> Boolean.TRUE.equals(v) ? trueValue : falseValue).toArray();
+            Object[] value = field.value(hit);
+            if (value != null) {
+                return Arrays.stream(value).map(v -> {
+                    boolean asBoolean;
+                    if (v instanceof Boolean vBoolean) {
+                        asBoolean = vBoolean;
+                    } else {
+                        asBoolean = Booleans.parseBoolean(v.toString());
+                    }
+                    return asBoolean ? trueValue : falseValue;
+                }).toArray();
             }
             return new Object[0];
         }
 
         @Override
         public boolean supportsFromSource() {
-            return false;
+            return field.supportsFromSource();
         }
 
         @Override
         public ExtractedField newFromSource() {
-            throw new UnsupportedOperationException();
+            return field.newFromSource();
+        }
+
+        @Override
+        public boolean isMultiField() {
+            return field.isMultiField();
         }
+
+        @Override
+        public String getParentField() {
+            return field.getParentField();
+        }
+
+        @Override
+        public String getDocValueFormat() {
+            return field.getDocValueFormat();
+        }
+
     }
 }

+ 30 - 3
x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ExtractedFieldsTests.java

@@ -119,7 +119,7 @@ public class ExtractedFieldsTests extends ESTestCase {
         assertThat(airportField.getParentField(), equalTo("airport"));
     }
 
-    public void testApplyBooleanMapping() {
+    public void testApplyBooleanMapping_GivenDocValueField() {
         DocValueField aBool = new DocValueField("a_bool", Collections.singleton("boolean"));
 
         ExtractedField mapped = ExtractedFields.applyBooleanMapping(aBool);
@@ -132,8 +132,35 @@ public class ExtractedFieldsTests extends ESTestCase {
 
         assertThat(mapped.getName(), equalTo(aBool.getName()));
         assertThat(mapped.getMethod(), equalTo(aBool.getMethod()));
-        assertThat(mapped.supportsFromSource(), is(false));
-        expectThrows(UnsupportedOperationException.class, mapped::newFromSource);
+        assertThat(mapped.supportsFromSource(), is(aBool.supportsFromSource()));
+    }
+
+    public void testApplyBooleanMapping_GivenSourceField() {
+        SourceField aBool = new SourceField("a_bool", Collections.singleton("boolean"));
+
+        ExtractedField mapped = ExtractedFields.applyBooleanMapping(aBool);
+
+        SearchHit hitTrue = new SearchHitBuilder(42).setSource("{\"a_bool\": true}").build();
+        SearchHit hitFalse = new SearchHitBuilder(42).setSource("{\"a_bool\": false}").build();
+        SearchHit hitTrueArray = new SearchHitBuilder(42).setSource("{\"a_bool\": [\"true\", true]}").build();
+        SearchHit hitFalseArray = new SearchHitBuilder(42).setSource("{\"a_bool\": [\"false\", false]}").build();
+
+        assertThat(mapped.value(hitTrue), equalTo(new Integer[] { 1 }));
+        assertThat(mapped.value(hitFalse), equalTo(new Integer[] { 0 }));
+        assertThat(mapped.value(hitTrueArray), equalTo(new Integer[] { 1, 1 }));
+        assertThat(mapped.value(hitFalseArray), equalTo(new Integer[] { 0, 0 }));
+
+        assertThat(mapped.getName(), equalTo(aBool.getName()));
+        assertThat(mapped.getMethod(), equalTo(aBool.getMethod()));
+        assertThat(mapped.supportsFromSource(), is(aBool.supportsFromSource()));
+    }
+
+    public void testApplyBooleanMapping_GivenNonBooleanField() {
+        SourceField aBool = new SourceField("not_a_bool", Collections.singleton("integer"));
+
+        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> ExtractedFields.applyBooleanMapping(aBool));
+
+        assertThat(e.getMessage(), equalTo("cannot apply boolean mapping to field [not_a_bool]"));
     }
 
     public void testBuildGivenFieldWithoutMappings() {