Browse Source

Fix match_only_text keyword multi-field bug (#131383) (#131467)

In #131314 we fixed match_only_text fields with ignore_above keyword
multi-fields in the case that the keyword multi-field is stored. However,
the issue is still present if the keyword field is not stored, but instead
has doc values.

This patch fixes that case.
Jordan Powers 3 months ago
parent
commit
44f9b883bf

+ 57 - 9
modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

@@ -67,7 +67,6 @@ import org.elasticsearch.xcontent.XContentBuilder;
 import java.io.IOException;
 import java.io.UncheckedIOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -253,11 +252,18 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
             if (searchExecutionContext.isSourceSynthetic() && withinMultiField) {
                 String parentField = searchExecutionContext.parentPath(name());
                 var parent = searchExecutionContext.lookup().fieldType(parentField);
-                if (parent.isStored()) {
-                    if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent
-                        && keywordParent.ignoreAbove() != Integer.MAX_VALUE) {
+
+                if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent
+                    && keywordParent.ignoreAbove() != Integer.MAX_VALUE) {
+                    if (parent.isStored()) {
                         return storedFieldFetcher(parentField, keywordParent.originalName());
+                    } else if (parent.hasDocValues()) {
+                        var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
+                        return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(keywordParent.originalName()));
                     }
+                }
+
+                if (parent.isStored()) {
                     return storedFieldFetcher(parentField);
                 } else if (parent.hasDocValues()) {
                     var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
@@ -268,14 +274,21 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
             } else if (searchExecutionContext.isSourceSynthetic() && hasCompatibleMultiFields) {
                 var mapper = (MatchOnlyTextFieldMapper) searchExecutionContext.getMappingLookup().getMapper(name());
                 var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(mapper);
+
                 if (kwd != null) {
                     var fieldType = kwd.fieldType();
-                    if (fieldType.isStored()) {
-                        if (fieldType.ignoreAbove() != Integer.MAX_VALUE) {
+
+                    if (fieldType.ignoreAbove() != Integer.MAX_VALUE) {
+                        if (fieldType.isStored()) {
                             return storedFieldFetcher(fieldType.name(), fieldType.originalName());
-                        } else {
-                            return storedFieldFetcher(fieldType.name());
+                        } else if (fieldType.hasDocValues()) {
+                            var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH);
+                            return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(fieldType.originalName()));
                         }
+                    }
+
+                    if (fieldType.isStored()) {
+                        return storedFieldFetcher(fieldType.name());
                     } else if (fieldType.hasDocValues()) {
                         var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH);
                         return docValuesFieldFetcher(ifd);
@@ -332,7 +345,42 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
                     if (names.length == 1) {
                         return storedFields.get(names[0]);
                     }
-                    return Arrays.stream(names).map(storedFields::get).filter(Objects::nonNull).flatMap(List::stream).toList();
+
+                    List<Object> values = new ArrayList<>();
+                    for (var name : names) {
+                        var currValues = storedFields.get(name);
+                        if (currValues != null) {
+                            values.addAll(currValues);
+                        }
+                    }
+
+                    return values;
+                };
+            };
+        }
+
+        private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> combineFieldFetchers(
+            IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> primaryFetcher,
+            IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> secondaryFetcher
+        ) {
+            return context -> {
+                var primaryGetter = primaryFetcher.apply(context);
+                var secondaryGetter = secondaryFetcher.apply(context);
+                return docId -> {
+                    List<Object> values = new ArrayList<>();
+                    var primary = primaryGetter.apply(docId);
+                    if (primary != null) {
+                        values.addAll(primary);
+                    }
+
+                    var secondary = secondaryGetter.apply(docId);
+                    if (secondary != null) {
+                        values.addAll(secondary);
+                    }
+
+                    assert primary != null || secondary != null;
+
+                    return values;
                 };
             };
         }

+ 88 - 0
modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml

@@ -435,6 +435,50 @@ synthetic_source match_only_text as multi-field:
   - match:
       hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
 
+---
+synthetic_source match_only_text as multi-field with ignored keyword as parent:
+  - requires:
+      cluster_features: [ "mapper.source.mode_from_index_setting" ]
+      reason: "Source mode configured through index setting"
+
+  - do:
+      indices.create:
+        index: synthetic_source_test
+        body:
+          settings:
+            index:
+              mapping.source.mode: synthetic
+          mappings:
+            properties:
+              foo:
+                type: keyword
+                store: false
+                doc_values: true
+                ignore_above: 10
+                fields:
+                  text:
+                    type: match_only_text
+
+  - do:
+      index:
+        index: synthetic_source_test
+        id: "1"
+        refresh: true
+        body:
+          foo: [ "Apache Lucene powers Elasticsearch", "Apache" ]
+
+  - do:
+      search:
+        index: synthetic_source_test
+        body:
+          query:
+            match_phrase:
+              foo.text: apache lucene
+
+  - match: { "hits.total.value": 1 }
+  - match:
+      hits.hits.0._source.foo: [ "Apache", "Apache Lucene powers Elasticsearch" ]
+
 ---
 synthetic_source match_only_text as multi-field with stored keyword as parent:
   - requires:
@@ -562,6 +606,50 @@ synthetic_source match_only_text with multi-field:
   - match:
       hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
 
+---
+synthetic_source match_only_text with ignored multi-field:
+  - requires:
+      cluster_features: [ "mapper.source.mode_from_index_setting" ]
+      reason: "Source mode configured through index setting"
+
+  - do:
+      indices.create:
+        index: synthetic_source_test
+        body:
+          settings:
+            index:
+              mapping.source.mode: synthetic
+          mappings:
+            properties:
+              foo:
+                type: match_only_text
+                fields:
+                  raw:
+                    type: keyword
+                    store: false
+                    doc_values: true
+                    ignore_above: 10
+
+  - do:
+      index:
+        index: synthetic_source_test
+        id: "1"
+        refresh: true
+        body:
+          foo: "Apache Lucene powers Elasticsearch"
+
+  - do:
+      search:
+        index: synthetic_source_test
+        body:
+          query:
+            match_phrase:
+              foo: apache lucene
+
+  - match: { "hits.total.value": 1 }
+  - match:
+      hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
+
 ---
 synthetic_source match_only_text with stored multi-field:
   - requires: