Quellcode durchsuchen

[9.1] Fixed match only text block loader not working when a keyword multi field is present (#134582) (#135025)

* Fixed match only text block loader not working when a keyword multi field is present (#134582)

* Fixed match only text block loader not working when a keyword multi field is present

* Update docs/changelog/134582.yaml

* Preemptively mute this test

* [CI] Auto commit changes from spotless

* Addressed feedback

* Update modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java

Co-authored-by: Jordan Powers <jordanpowers1227@gmail.com>

* Preemptively mute this test

* Fixed copyright

* Gate tests on feature presence

* [CI] Auto commit changes from spotless

* Revert muted-tests to main

---------

Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
Co-authored-by: Jordan Powers <jordanpowers1227@gmail.com>
(cherry picked from commit 86227fb2523fbef1e9ba7e1ab9db03aba6750e98)

# Conflicts:
#	server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

* Removed non existent line in 9.1
Dmitry Kubikov vor 2 Wochen
Ursprung
Commit
91493b0b99

+ 6 - 0
docs/changelog/134582.yaml

@@ -0,0 +1,6 @@
+pr: 134582
+summary: Fixed match only text block loader not working when a keyword multi field
+  is present
+area: Mapping
+type: bug
+issues: []

+ 44 - 32
modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

@@ -133,27 +133,28 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
             return new Parameter<?>[] { meta };
         }
 
-        private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
+        private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context, MultiFields multiFields) {
             NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();
             NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
             NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer();
             TextSearchInfo tsi = new TextSearchInfo(Defaults.FIELD_TYPE, null, searchAnalyzer, searchQuoteAnalyzer);
-            MatchOnlyTextFieldType ft = new MatchOnlyTextFieldType(
+            return new MatchOnlyTextFieldType(
                 context.buildFullName(leafName()),
                 tsi,
                 indexAnalyzer,
                 context.isSourceSynthetic(),
                 meta.getValue(),
                 withinMultiField,
-                multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField(),
-                storedFieldInBinaryFormat
+                storedFieldInBinaryFormat,
+                // match only text fields are not stored by definition
+                TextFieldMapper.SyntheticSourceHelper.syntheticSourceDelegate(false, multiFields)
             );
-            return ft;
         }
 
         @Override
         public MatchOnlyTextFieldMapper build(MapperBuilderContext context) {
-            MatchOnlyTextFieldType tft = buildFieldType(context);
+            BuilderParams builderParams = builderParams(this, context);
+            MatchOnlyTextFieldType tft = buildFieldType(context, builderParams.multiFields());
             final boolean storeSource;
             if (multiFieldsNotStoredByDefaultIndexVersionCheck(indexCreatedVersion)) {
                 storeSource = context.isSourceSynthetic()
@@ -164,6 +165,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
             }
             return new MatchOnlyTextFieldMapper(leafName(), Defaults.FIELD_TYPE, tft, builderParams(this, context), storeSource, this);
         }
+
     }
 
     private static boolean isSyntheticSourceStoredFieldInBinaryFormat(IndexVersion indexCreatedVersion) {
@@ -191,7 +193,6 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
         private final String originalName;
 
         private final boolean withinMultiField;
-        private final boolean hasCompatibleMultiFields;
         private final boolean storedFieldInBinaryFormat;
 
         public MatchOnlyTextFieldType(
@@ -201,15 +202,14 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
             boolean isSyntheticSource,
             Map<String, String> meta,
             boolean withinMultiField,
-            boolean hasCompatibleMultiFields,
-            boolean storedFieldInBinaryFormat
+            boolean storedFieldInBinaryFormat,
+            KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate
         ) {
             super(name, true, false, false, tsi, meta);
             this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
-            this.textFieldType = new TextFieldType(name, isSyntheticSource);
+            this.textFieldType = new TextFieldType(name, isSyntheticSource, syntheticSourceDelegate);
             this.originalName = isSyntheticSource ? name + "._original" : null;
             this.withinMultiField = withinMultiField;
-            this.hasCompatibleMultiFields = hasCompatibleMultiFields;
             this.storedFieldInBinaryFormat = storedFieldInBinaryFormat;
         }
 
@@ -222,7 +222,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
                 Collections.emptyMap(),
                 false,
                 false,
-                false
+                null
             );
         }
 
@@ -270,26 +270,23 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
                 } else {
                     assert false : "parent field should either be stored or have doc values";
                 }
-            } else if (searchExecutionContext.isSourceSynthetic() && hasCompatibleMultiFields) {
-                var mapper = (MatchOnlyTextFieldMapper) searchExecutionContext.getMappingLookup().getMapper(name());
-                var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(mapper);
+            } else if (searchExecutionContext.isSourceSynthetic() && textFieldType.syntheticSourceDelegate() != null) {
+                var kwd = textFieldType.syntheticSourceDelegate();
 
                 if (kwd != null) {
-                    var fieldType = kwd.fieldType();
-
-                    if (fieldType.ignoreAbove().isSet()) {
-                        if (fieldType.isStored()) {
-                            return storedFieldFetcher(fieldType.name(), fieldType.originalName());
-                        } else if (fieldType.hasDocValues()) {
-                            var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH);
-                            return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(fieldType.originalName()));
+                    if (kwd.ignoreAbove().isSet()) {
+                        if (kwd.isStored()) {
+                            return storedFieldFetcher(kwd.name(), kwd.originalName());
+                        } else if (kwd.hasDocValues()) {
+                            var ifd = searchExecutionContext.getForField(kwd, MappedFieldType.FielddataOperation.SEARCH);
+                            return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(kwd.originalName()));
                         }
                     }
 
-                    if (fieldType.isStored()) {
-                        return storedFieldFetcher(fieldType.name());
-                    } else if (fieldType.hasDocValues()) {
-                        var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH);
+                    if (kwd.isStored()) {
+                        return storedFieldFetcher(kwd.name());
+                    } else if (kwd.hasDocValues()) {
+                        var ifd = searchExecutionContext.getForField(kwd, MappedFieldType.FielddataOperation.SEARCH);
                         return docValuesFieldFetcher(ifd);
                     } else {
                         assert false : "multi field should either be stored or have doc values";
@@ -512,7 +509,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
             return toQuery(query, queryShardContext);
         }
 
-        private static class BytesFromMixedStringsBytesRefBlockLoader extends BlockStoredFieldsReader.StoredFieldsBlockLoader {
+        static class BytesFromMixedStringsBytesRefBlockLoader extends BlockStoredFieldsReader.StoredFieldsBlockLoader {
             BytesFromMixedStringsBytesRefBlockLoader(String field) {
                 super(field);
             }
@@ -543,12 +540,27 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
         @Override
         public BlockLoader blockLoader(BlockLoaderContext blContext) {
             if (textFieldType.isSyntheticSource()) {
-                if (storedFieldInBinaryFormat) {
-                    return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource());
-                } else {
-                    return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource());
+                // if there is no synthetic source delegate, then this match only text field would've created StoredFields for us to use
+                if (textFieldType.syntheticSourceDelegate() == null) {
+                    if (storedFieldInBinaryFormat) {
+                        return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource());
+                    } else {
+                        return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource());
+                    }
+                }
+
+                // otherwise, delegate block loading to the synthetic source delegate if possible
+                if (textFieldType.canUseSyntheticSourceDelegateForLoading()) {
+                    return new BlockLoader.Delegating(textFieldType.syntheticSourceDelegate().blockLoader(blContext)) {
+                        @Override
+                        protected String delegatingTo() {
+                            return textFieldType.syntheticSourceDelegate().name();
+                        }
+                    };
                 }
             }
+
+            // fallback to _source (synthetic or not)
             SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
             // MatchOnlyText never has norms, so we have to use the field names field
             BlockSourceReader.LeafIteratorLookup lookup = BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name());

+ 164 - 0
modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java

@@ -9,6 +9,7 @@
 package org.elasticsearch.index.mapper.extras;
 
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.intervals.Intervals;
 import org.apache.lucene.queries.intervals.IntervalsSource;
@@ -27,20 +28,38 @@ import org.apache.lucene.tests.analysis.CannedTokenStream;
 import org.apache.lucene.tests.analysis.Token;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.common.lucene.BytesRefs;
+import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.lucene.search.AutomatonQueries;
 import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
+import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.Fuzziness;
+import org.elasticsearch.index.IndexMode;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.IndexVersion;
+import org.elasticsearch.index.analysis.NamedAnalyzer;
+import org.elasticsearch.index.mapper.BlockLoader;
+import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
 import org.elasticsearch.index.mapper.FieldTypeTestCase;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.MappingParserContext;
+import org.elasticsearch.index.mapper.TextFieldMapper;
+import org.elasticsearch.index.mapper.TextSearchInfo;
 import org.elasticsearch.index.mapper.extras.MatchOnlyTextFieldMapper.MatchOnlyTextFieldType;
+import org.elasticsearch.script.ScriptCompiler;
 import org.hamcrest.Matchers;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.mock;
+
 public class MatchOnlyTextFieldTypeTests extends FieldTypeTestCase {
 
     public void testTermQuery() {
@@ -205,4 +224,149 @@ public class MatchOnlyTextFieldTypeTests extends FieldTypeTestCase {
             ((SourceIntervalsSource) rangeIntervals).getIntervalsSource()
         );
     }
+
+    public void test_block_loader_uses_stored_fields_for_loading_when_synthetic_source_delegate_is_absent() {
+        // given
+        MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
+            "parent",
+            new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
+            mock(NamedAnalyzer.class),
+            true,
+            Collections.emptyMap(),
+            false,
+            false,
+            null
+        );
+
+        // when
+        BlockLoader blockLoader = ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class));
+
+        // then
+        // verify that we delegate block loading to the synthetic source delegate
+        assertThat(blockLoader, Matchers.instanceOf(MatchOnlyTextFieldType.BytesFromMixedStringsBytesRefBlockLoader.class));
+    }
+
+    public void test_block_loader_uses_synthetic_source_delegate_when_ignore_above_is_not_set() {
+        // given
+        KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate = new KeywordFieldMapper.KeywordFieldType(
+            "child",
+            true,
+            true,
+            Collections.emptyMap()
+        );
+
+        MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
+            "parent",
+            new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
+            mock(NamedAnalyzer.class),
+            true,
+            Collections.emptyMap(),
+            false,
+            false,
+            syntheticSourceDelegate
+        );
+
+        // when
+        BlockLoader blockLoader = ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class));
+
+        // then
+        // verify that we delegate block loading to the synthetic source delegate
+        assertThat(blockLoader, Matchers.instanceOf(BlockLoader.Delegating.class));
+    }
+
+    public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore_above_is_set() {
+        // given
+        Settings settings = Settings.builder()
+            .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
+            .put(IndexSettings.MODE.getKey(), IndexMode.STANDARD)
+            .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
+            .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
+            .build();
+        IndexSettings indexSettings = new IndexSettings(IndexMetadata.builder("index").settings(settings).build(), settings);
+        MappingParserContext mappingParserContext = mock(MappingParserContext.class);
+        doReturn(settings).when(mappingParserContext).getSettings();
+        doReturn(indexSettings).when(mappingParserContext).getIndexSettings();
+        doReturn(mock(ScriptCompiler.class)).when(mappingParserContext).scriptCompiler();
+
+        KeywordFieldMapper.Builder builder = new KeywordFieldMapper.Builder("child", mappingParserContext);
+        builder.ignoreAbove(123);
+
+        KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate = new KeywordFieldMapper.KeywordFieldType(
+            "child",
+            mock(FieldType.class),
+            mock(NamedAnalyzer.class),
+            mock(NamedAnalyzer.class),
+            mock(NamedAnalyzer.class),
+            builder,
+            true
+        );
+
+        MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
+            "parent",
+            new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
+            mock(NamedAnalyzer.class),
+            true,
+            Collections.emptyMap(),
+            false,
+            false,
+            syntheticSourceDelegate
+        );
+
+        // when
+        MappedFieldType.BlockLoaderContext blContext = mock(MappedFieldType.BlockLoaderContext.class);
+        doReturn(FieldNamesFieldMapper.FieldNamesFieldType.get(false)).when(blContext).fieldNames();
+        BlockLoader blockLoader = ft.blockLoader(blContext);
+
+        // then
+        // verify that we don't delegate anything
+        assertThat(blockLoader, Matchers.not(Matchers.instanceOf(BlockLoader.Delegating.class)));
+    }
+
+    public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore_above_is_set_at_index_level() {
+        // given
+        Settings settings = Settings.builder()
+            .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
+            .put(IndexSettings.MODE.getKey(), IndexMode.STANDARD)
+            .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
+            .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
+            .put(IndexSettings.IGNORE_ABOVE_SETTING.getKey(), 123)
+            .build();
+        IndexSettings indexSettings = new IndexSettings(IndexMetadata.builder("index").settings(settings).build(), settings);
+        MappingParserContext mappingParserContext = mock(MappingParserContext.class);
+        doReturn(settings).when(mappingParserContext).getSettings();
+        doReturn(indexSettings).when(mappingParserContext).getIndexSettings();
+        doReturn(mock(ScriptCompiler.class)).when(mappingParserContext).scriptCompiler();
+
+        KeywordFieldMapper.Builder builder = new KeywordFieldMapper.Builder("child", mappingParserContext);
+
+        KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate = new KeywordFieldMapper.KeywordFieldType(
+            "child",
+            mock(FieldType.class),
+            mock(NamedAnalyzer.class),
+            mock(NamedAnalyzer.class),
+            mock(NamedAnalyzer.class),
+            builder,
+            true
+        );
+
+        MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
+            "parent",
+            new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
+            mock(NamedAnalyzer.class),
+            true,
+            Collections.emptyMap(),
+            false,
+            false,
+            syntheticSourceDelegate
+        );
+
+        // when
+        MappedFieldType.BlockLoaderContext blContext = mock(MappedFieldType.BlockLoaderContext.class);
+        doReturn(FieldNamesFieldMapper.FieldNamesFieldType.get(false)).when(blContext).fieldNames();
+        BlockLoader blockLoader = ft.blockLoader(blContext);
+
+        // then
+        // verify that we don't delegate anything
+        assertThat(blockLoader, Matchers.not(Matchers.instanceOf(BlockLoader.Delegating.class)));
+    }
 }

+ 1 - 1
plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java

@@ -135,7 +135,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
                 store.getValue(),
                 tsi,
                 context.isSourceSynthetic(),
-                TextFieldMapper.SyntheticSourceHelper.syntheticSourceDelegate(fieldType, multiFields),
+                TextFieldMapper.SyntheticSourceHelper.syntheticSourceDelegate(fieldType.stored(), multiFields),
                 meta.getValue()
             );
         }

+ 4 - 1
server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

@@ -38,6 +38,8 @@ public class MapperFeatures implements FeatureSpecification {
     public static final NodeFeature SORT_FIELDS_CHECK_FOR_NESTED_OBJECT_FIX = new NodeFeature("mapper.nested.sorting_fields_check_fix");
     public static final NodeFeature DYNAMIC_HANDLING_IN_COPY_TO = new NodeFeature("mapper.copy_to.dynamic_handling");
     public static final NodeFeature DOC_VALUES_SKIPPER = new NodeFeature("mapper.doc_values_skipper");
+    public static final NodeFeature MATCH_ONLY_TEXT_BLOCK_LOADER_FIX = new NodeFeature("mapper.match_only_text_block_loader_fix");
+
     static final NodeFeature UKNOWN_FIELD_MAPPING_UPDATE_ERROR_MESSAGE = new NodeFeature(
         "mapper.unknown_field_mapping_update_error_message"
     );
@@ -80,7 +82,8 @@ public class MapperFeatures implements FeatureSpecification {
             SEARCH_LOAD_PER_SHARD,
             SPARSE_VECTOR_INDEX_OPTIONS_FEATURE,
             PATTERNED_TEXT,
-            MULTI_FIELD_UNICODE_OPTIMISATION_FIX
+            MULTI_FIELD_UNICODE_OPTIMISATION_FIX,
+            MATCH_ONLY_TEXT_BLOCK_LOADER_FIX
         );
     }
 }

+ 18 - 3
server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

@@ -410,7 +410,7 @@ public final class TextFieldMapper extends FieldMapper {
                     store.getValue(),
                     tsi,
                     context.isSourceSynthetic(),
-                    SyntheticSourceHelper.syntheticSourceDelegate(fieldType, multiFields),
+                    SyntheticSourceHelper.syntheticSourceDelegate(fieldType.stored(), multiFields),
                     meta.getValue(),
                     eagerGlobalOrdinals.getValue(),
                     indexPhrases.getValue()
@@ -744,6 +744,20 @@ public final class TextFieldMapper extends FieldMapper {
             );
         }
 
+        public TextFieldType(String name, boolean isSyntheticSource, KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate) {
+            this(
+                name,
+                true,
+                false,
+                new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
+                isSyntheticSource,
+                syntheticSourceDelegate,
+                Collections.emptyMap(),
+                false,
+                false
+            );
+        }
+
         public boolean fielddata() {
             return fielddata;
         }
@@ -1598,8 +1612,9 @@ public final class TextFieldMapper extends FieldMapper {
     }
 
     public static class SyntheticSourceHelper {
-        public static KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate(FieldType fieldType, MultiFields multiFields) {
-            if (fieldType.stored()) {
+        public static KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate(boolean isParentFieldStored, MultiFields multiFields) {
+            // if the parent field is stored, there is no need to delegate anything as we can get source directly from the stored field
+            if (isParentFieldStored) {
                 return null;
             }
             var kwd = getKeywordFieldMapperForSyntheticSource(multiFields);

+ 0 - 3
server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java

@@ -323,7 +323,6 @@ public class TextFieldTypeTests extends FieldTypeTestCase {
         );
 
         // when
-        ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class));
         BlockLoader blockLoader = ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class));
 
         // then
@@ -372,7 +371,6 @@ public class TextFieldTypeTests extends FieldTypeTestCase {
         );
 
         // when
-        ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class));
         BlockLoader blockLoader = ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class));
 
         // then
@@ -420,7 +418,6 @@ public class TextFieldTypeTests extends FieldTypeTestCase {
         );
 
         // when
-        ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class));
         BlockLoader blockLoader = ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class));
 
         // then

+ 116 - 66
x-pack/plugin/logsdb/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/MatchOnlyTextRollingUpgradeIT.java

@@ -18,6 +18,7 @@ import org.elasticsearch.common.network.NetworkAddress;
 import org.elasticsearch.common.time.DateFormatter;
 import org.elasticsearch.common.time.FormatNames;
 import org.elasticsearch.common.xcontent.XContentHelper;
+import org.elasticsearch.index.mapper.MapperFeatures;
 import org.elasticsearch.test.rest.ObjectPath;
 import org.elasticsearch.xcontent.XContentType;
 
@@ -30,6 +31,7 @@ import java.util.Map;
 
 import static org.elasticsearch.upgrades.StandardToLogsDbIndexModeRollingUpgradeIT.enableLogsdbByDefault;
 import static org.elasticsearch.upgrades.StandardToLogsDbIndexModeRollingUpgradeIT.getWriteBackingIndex;
+import static org.elasticsearch.upgrades.TextRollingUpgradeIT.randomAlphasDelimitedBySpace;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
@@ -37,8 +39,15 @@ import static org.hamcrest.Matchers.notNullValue;
 
 public class MatchOnlyTextRollingUpgradeIT extends AbstractRollingUpgradeWithSecurityTestCase {
 
+    private static final String DATA_STREAM = "logs-bwc-test";
+
+    private static final int IGNORE_ABOVE_MAX = 256;
+    private static final int NUM_REQUESTS = 4;
+    private static final int NUM_DOCS_PER_REQUEST = 1024;
+
     static String BULK_ITEM_TEMPLATE =
         """
+            { "create": {} }
             {"@timestamp": "$now", "host.name": "$host", "method": "$method", "ip": "$ip", "message": "$message", "length": $length, "factor": $factor}
             """;
 
@@ -53,7 +62,13 @@ public class MatchOnlyTextRollingUpgradeIT extends AbstractRollingUpgradeWithSec
                   "type": "keyword"
                 },
                 "message": {
-                  "type": "match_only_text"
+                  "type": "match_only_text",
+                  "fields": {
+                    "keyword": {
+                      "ignore_above": $IGNORE_ABOVE,
+                      "type": "keyword"
+                    }
+                  }
                 },
                 "ip": {
                   "type": "ip"
@@ -68,55 +83,82 @@ public class MatchOnlyTextRollingUpgradeIT extends AbstractRollingUpgradeWithSec
             }
         }""";
 
+    // when sorted, this message will appear at the top and hence can be used to validate query results
+    private static String smallestMessage;
+
     public MatchOnlyTextRollingUpgradeIT(@Name("upgradedNodes") int upgradedNodes) {
         super(upgradedNodes);
     }
 
     public void testIndexing() throws Exception {
-        String dataStreamName = "logs-bwc-test";
+        assumeTrue(
+            "Match only text block loader fix is not present in this cluster",
+            oldClusterHasFeature(MapperFeatures.MATCH_ONLY_TEXT_BLOCK_LOADER_FIX)
+        );
+
         if (isOldCluster()) {
+            // given - enable logsdb and create a template
             startTrial();
             enableLogsdbByDefault();
-            createTemplate(dataStreamName, getClass().getSimpleName().toLowerCase(Locale.ROOT), TEMPLATE);
+            String templateId = getClass().getSimpleName().toLowerCase(Locale.ROOT);
+            createTemplate(DATA_STREAM, templateId, prepareTemplate());
 
-            Instant startTime = Instant.now().minusSeconds(60 * 60);
-            bulkIndex(dataStreamName, 4, 1024, startTime);
+            // when - index some documents
+            bulkIndex(NUM_REQUESTS, NUM_DOCS_PER_REQUEST);
 
-            String firstBackingIndex = getWriteBackingIndex(client(), dataStreamName, 0);
+            // then - verify that logsdb and synthetic source are both enabled
+            String firstBackingIndex = getWriteBackingIndex(client(), DATA_STREAM, 0);
             var settings = (Map<?, ?>) getIndexSettingsWithDefaults(firstBackingIndex).get(firstBackingIndex);
             assertThat(((Map<?, ?>) settings.get("settings")).get("index.mode"), equalTo("logsdb"));
             assertThat(((Map<?, ?>) settings.get("defaults")).get("index.mapping.source.mode"), equalTo("SYNTHETIC"));
 
-            ensureGreen(dataStreamName);
-            search(dataStreamName);
-            query(dataStreamName);
+            // when/then - run some queries and verify results
+            ensureGreen(DATA_STREAM);
+            search(DATA_STREAM);
+            query(DATA_STREAM);
+
         } else if (isMixedCluster()) {
-            Instant startTime = Instant.now().minusSeconds(60 * 30);
-            bulkIndex(dataStreamName, 4, 1024, startTime);
+            // when
+            bulkIndex(NUM_REQUESTS, NUM_DOCS_PER_REQUEST);
+
+            // when/then
+            ensureGreen(DATA_STREAM);
+            search(DATA_STREAM);
+            query(DATA_STREAM);
 
-            ensureGreen(dataStreamName);
-            search(dataStreamName);
-            query(dataStreamName);
         } else if (isUpgradedCluster()) {
-            ensureGreen(dataStreamName);
-            Instant startTime = Instant.now();
-            bulkIndex(dataStreamName, 4, 1024, startTime);
-            search(dataStreamName);
-            query(dataStreamName);
+            // when/then
+            ensureGreen(DATA_STREAM);
+            bulkIndex(NUM_REQUESTS, NUM_DOCS_PER_REQUEST);
+            search(DATA_STREAM);
+            query(DATA_STREAM);
 
-            var forceMergeRequest = new Request("POST", "/" + dataStreamName + "/_forcemerge");
+            // when/then continued - force merge all shard segments into one
+            var forceMergeRequest = new Request("POST", "/" + DATA_STREAM + "/_forcemerge");
             forceMergeRequest.addParameter("max_num_segments", "1");
             assertOK(client().performRequest(forceMergeRequest));
 
-            ensureGreen(dataStreamName);
-            search(dataStreamName);
-            query(dataStreamName);
+            // then continued
+            ensureGreen(DATA_STREAM);
+            search(DATA_STREAM);
+            query(DATA_STREAM);
+        }
+    }
+
+    private String prepareTemplate() {
+        boolean shouldSetIgnoreAbove = randomBoolean();
+        if (shouldSetIgnoreAbove) {
+            return TEMPLATE.replace("$IGNORE_ABOVE", String.valueOf(randomInt(IGNORE_ABOVE_MAX)));
         }
+
+        // removes the entire line that defines ignore_above
+        return TEMPLATE.replaceAll("(?m)^\\s*\"ignore_above\":\\s*\\$IGNORE_ABOVE\\s*,?\\s*\\n?", "");
     }
 
     static void createTemplate(String dataStreamName, String id, String template) throws IOException {
         final String INDEX_TEMPLATE = """
             {
+                "priority": 500,
                 "index_patterns": ["$DATASTREAM"],
                 "template": $TEMPLATE,
                 "data_stream": {
@@ -127,46 +169,59 @@ public class MatchOnlyTextRollingUpgradeIT extends AbstractRollingUpgradeWithSec
         assertOK(client().performRequest(putIndexTemplateRequest));
     }
 
-    static String bulkIndex(String dataStreamName, int numRequest, int numDocs, Instant startTime) throws Exception {
+    private void bulkIndex(int numRequest, int numDocs) throws Exception {
         String firstIndex = null;
+        Instant startTime = Instant.now().minusSeconds(60 * 60);
+
         for (int i = 0; i < numRequest; i++) {
-            var bulkRequest = new Request("POST", "/" + dataStreamName + "/_bulk");
-            StringBuilder requestBody = new StringBuilder();
-            for (int j = 0; j < numDocs; j++) {
-                String hostName = "host" + j % 50; // Not realistic, but makes asserting search / query response easier.
-                String methodName = "method" + j % 5;
-                String ip = NetworkAddress.format(randomIp(true));
-                String param = "chicken" + randomInt(5);
-                String message = "the quick brown fox jumps over the " + param;
-                long length = randomLong();
-                double factor = randomDouble();
-
-                requestBody.append("{\"create\": {}}");
-                requestBody.append('\n');
-                requestBody.append(
-                    BULK_ITEM_TEMPLATE.replace("$now", formatInstant(startTime))
-                        .replace("$host", hostName)
-                        .replace("$method", methodName)
-                        .replace("$ip", ip)
-                        .replace("$message", message)
-                        .replace("$length", Long.toString(length))
-                        .replace("$factor", Double.toString(factor))
-                );
-                requestBody.append('\n');
-
-                startTime = startTime.plusMillis(1);
-            }
-            bulkRequest.setJsonEntity(requestBody.toString());
+            var bulkRequest = new Request("POST", "/" + DATA_STREAM + "/_bulk");
+            bulkRequest.setJsonEntity(bulkIndexRequestBody(numDocs, startTime));
             bulkRequest.addParameter("refresh", "true");
+
             var response = client().performRequest(bulkRequest);
-            assertOK(response);
             var responseBody = entityAsMap(response);
+
+            assertOK(response);
             assertThat("errors in response:\n " + responseBody, responseBody.get("errors"), equalTo(false));
             if (firstIndex == null) {
                 firstIndex = (String) ((Map<?, ?>) ((Map<?, ?>) ((List<?>) responseBody.get("items")).get(0)).get("create")).get("_index");
             }
         }
-        return firstIndex;
+    }
+
+    private String bulkIndexRequestBody(int numDocs, Instant startTime) {
+        StringBuilder requestBody = new StringBuilder();
+
+        for (int j = 0; j < numDocs; j++) {
+            String hostName = "host" + j % 50; // Not realistic, but makes asserting search / query response easier.
+            String methodName = "method" + j % 5;
+            String ip = NetworkAddress.format(randomIp(true));
+            String message = randomAlphasDelimitedBySpace(10, 1, 15);
+            recordSmallestMessage(message);
+            long length = randomLong();
+            double factor = randomDouble();
+
+            requestBody.append(
+                BULK_ITEM_TEMPLATE.replace("$now", formatInstant(startTime))
+                    .replace("$host", hostName)
+                    .replace("$method", methodName)
+                    .replace("$ip", ip)
+                    .replace("$message", message)
+                    .replace("$length", Long.toString(length))
+                    .replace("$factor", Double.toString(factor))
+            );
+            requestBody.append('\n');
+
+            startTime = startTime.plusMillis(1);
+        }
+
+        return requestBody.toString();
+    }
+
+    private void recordSmallestMessage(final String message) {
+        if (smallestMessage == null || message.compareTo(smallestMessage) < 0) {
+            smallestMessage = message;
+        }
     }
 
     void search(String dataStreamName) throws Exception {
@@ -174,24 +229,19 @@ public class MatchOnlyTextRollingUpgradeIT extends AbstractRollingUpgradeWithSec
         searchRequest.addParameter("pretty", "true");
         searchRequest.setJsonEntity("""
             {
-                "size": 500,
-                "query": {
-                    "match_phrase": {
-                        "message": "chicken"
-                    }
-                }
+                "size": 500
             }
-            """.replace("chicken", "chicken" + randomInt(5)));
+            """);
         var response = client().performRequest(searchRequest);
         assertOK(response);
         var responseBody = entityAsMap(response);
         logger.info("{}", responseBody);
 
         Integer totalCount = ObjectPath.evaluate(responseBody, "hits.total.value");
-        assertThat(totalCount, greaterThanOrEqualTo(512));
+        assertThat(totalCount, greaterThanOrEqualTo(NUM_REQUESTS * NUM_DOCS_PER_REQUEST));
     }
 
-    void query(String dataStreamName) throws Exception {
+    private void query(String dataStreamName) throws Exception {
         var queryRequest = new Request("POST", "/_query");
         queryRequest.addParameter("pretty", "true");
         queryRequest.setJsonEntity("""
@@ -205,18 +255,18 @@ public class MatchOnlyTextRollingUpgradeIT extends AbstractRollingUpgradeWithSec
         logger.info("{}", responseBody);
 
         String column1 = ObjectPath.evaluate(responseBody, "columns.0.name");
-        String column2 = ObjectPath.evaluate(responseBody, "columns.1.name");
-        String column3 = ObjectPath.evaluate(responseBody, "columns.2.name");
         assertThat(column1, equalTo("max(length)"));
+        String column2 = ObjectPath.evaluate(responseBody, "columns.1.name");
         assertThat(column2, equalTo("max(factor)"));
+        String column3 = ObjectPath.evaluate(responseBody, "columns.2.name");
         assertThat(column3, equalTo("message"));
 
-        String key = ObjectPath.evaluate(responseBody, "values.0.2");
-        assertThat(key, equalTo("the quick brown fox jumps over the chicken0"));
         Long maxRx = ObjectPath.evaluate(responseBody, "values.0.0");
         assertThat(maxRx, notNullValue());
         Double maxTx = ObjectPath.evaluate(responseBody, "values.0.1");
         assertThat(maxTx, notNullValue());
+        String key = ObjectPath.evaluate(responseBody, "values.0.2");
+        assertThat(key, equalTo(smallestMessage));
     }
 
     protected static void startTrial() throws IOException {