Browse Source

Don't normalize fields of type text when the index mode is LogsDB or TSDB (#131317)

* Don't normalize fields of type text when the index mode is LOGSDB or TSDB.

In the context of logsdb and tsdb, not many fields are configured to be of type text,
and given the trade offs that logsdb and tsdb provide that is geared towards storage
reduction, it makes sense not to store a normalized version of text fields.

Closes #129183

* Update docs/changelog/131317.yaml

* Update 131317.yaml

* Update docs/changelog/131317.yaml

* Update 131317.yaml

* Update 131317.yaml

* Update docs/changelog/131317.yaml

* Update server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Co-authored-by: Martijn van Groningen <martijn.v.groningen@gmail.com>

* Update docs/changelog/131317.yaml

Co-authored-by: Martijn van Groningen <martijn.v.groningen@gmail.com>

* Update docs/changelog/131317.yaml

Co-authored-by: Martijn van Groningen <martijn.v.groningen@gmail.com>

---------

Co-authored-by: Martijn van Groningen <martijn.v.groningen@gmail.com>
Dmitry Kubikov 1 month ago
parent
commit
328fed87e2

+ 16 - 0
docs/changelog/131317.yaml

@@ -0,0 +1,16 @@
+pr: 131317
+summary: Don't enable norms for fields of type text when the index mode is LogsDB or TSDB
+area: Mapping
+type: breaking
+issues: []
+breaking:
+  title: Don't enable norms for fields of type text when the index mode is LogsDB or TSDB
+  area: Mapping
+  details: "This changes the default behavior for norms on `text` fields in logsdb\
+    \ and tsdb indices. Prior to this change, norms were enabled by default, with\
+    \ the option to disable them via manual configurations. After this change, norms\
+    \ will be disabled by default. Note, because we dont support enabling norms from\
+    \ a disabled state, users will not be able to enable norms on `text` fields in\
+    \ logsdb and tsdb indices."
+  impact: Text fields will no longer be normalized by default in LogsDB and TSDB indicies.
+  notable: false

+ 1 - 1
modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SearchAsYouTypeFieldMapper.java

@@ -138,7 +138,7 @@ public class SearchAsYouTypeFieldMapper extends FieldMapper {
         final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> builder(m).similarity.get());
 
         final Parameter<String> indexOptions = TextParams.textIndexOptions(m -> builder(m).indexOptions.get());
-        final Parameter<Boolean> norms = TextParams.norms(true, m -> builder(m).norms.get());
+        final Parameter<Boolean> norms = Parameter.normsParam(m -> builder(m).norms.get(), true);
         final Parameter<String> termVectors = TextParams.termVectors(m -> builder(m).termVectors.get());
 
         private final Parameter<Map<String, String>> meta = Parameter.metaParam();

+ 1 - 1
plugins/analysis-icu/src/main/java/org/elasticsearch/plugin/analysis/icu/ICUCollationKeywordFieldMapper.java

@@ -226,7 +226,7 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
         final Parameter<Boolean> stored = Parameter.storeParam(m -> toType(m).fieldType.stored(), false);
 
         final Parameter<String> indexOptions = TextParams.keywordIndexOptions(m -> toType(m).indexOptions);
-        final Parameter<Boolean> hasNorms = TextParams.norms(false, m -> toType(m).fieldType.omitNorms() == false);
+        final Parameter<Boolean> hasNorms = Parameter.normsParam(m -> toType(m).fieldType.omitNorms() == false, false);
 
         final Parameter<Map<String, String>> meta = Parameter.metaParam();
 

+ 1 - 1
plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java

@@ -82,7 +82,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
 
         final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> builder(m).similarity.getValue());
         final Parameter<String> indexOptions = TextParams.textIndexOptions(m -> builder(m).indexOptions.getValue());
-        final Parameter<Boolean> norms = TextParams.norms(true, m -> builder(m).norms.getValue());
+        final Parameter<Boolean> norms = Parameter.normsParam(m -> builder(m).norms.getValue(), true);
         final Parameter<String> termVectors = TextParams.termVectors(m -> builder(m).termVectors.getValue());
 
         private final Parameter<Map<String, String>> meta = Parameter.metaParam();

+ 12 - 0
server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

@@ -1325,6 +1325,18 @@ public abstract class FieldMapper extends Mapper {
             return Parameter.boolParam("doc_values", false, initializer, defaultValue);
         }
 
+        public static Parameter<Boolean> normsParam(Function<FieldMapper, Boolean> initializer, boolean defaultValue) {
+            // norms can be updated from 'true' to 'false' but not vice-versa
+            return Parameter.boolParam("norms", true, initializer, defaultValue)
+                .setMergeValidator((prev, curr, c) -> prev == curr || (prev && curr == false));
+        }
+
+        public static Parameter<Boolean> normsParam(Function<FieldMapper, Boolean> initializer, Supplier<Boolean> defaultValueSupplier) {
+            // norms can be updated from 'true' to 'false' but not vice-versa
+            return Parameter.boolParam("norms", true, initializer, defaultValueSupplier)
+                .setMergeValidator((prev, curr, c) -> prev == curr || (prev && curr == false));
+        }
+
         /**
          * Defines a script parameter
          * @param initializer   retrieves the equivalent parameter from an existing FieldMapper for use in merges

+ 1 - 1
server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

@@ -183,7 +183,7 @@ public final class KeywordFieldMapper extends FieldMapper {
         private final IndexSortConfig indexSortConfig;
         private final IndexMode indexMode;
         private final Parameter<String> indexOptions = TextParams.keywordIndexOptions(m -> toType(m).indexOptions);
-        private final Parameter<Boolean> hasNorms = TextParams.norms(false, m -> toType(m).fieldType.omitNorms() == false);
+        private final Parameter<Boolean> hasNorms = Parameter.normsParam(m -> toType(m).fieldType.omitNorms() == false, false);
         private final Parameter<SimilarityProvider> similarity = TextParams.similarity(
             m -> toType(m).fieldType().getTextSearchInfo().similarity()
         );

+ 23 - 6
server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

@@ -55,6 +55,7 @@ import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
 import org.elasticsearch.common.unit.Fuzziness;
 import org.elasticsearch.common.xcontent.support.XContentMapValues;
 import org.elasticsearch.core.Nullable;
+import org.elasticsearch.index.IndexMode;
 import org.elasticsearch.index.IndexVersion;
 import org.elasticsearch.index.IndexVersions;
 import org.elasticsearch.index.analysis.AnalyzerScope;
@@ -239,15 +240,17 @@ public final class TextFieldMapper extends FieldMapper {
 
         private final IndexVersion indexCreatedVersion;
         private final Parameter<Boolean> store;
+        private final Parameter<Boolean> norms;
 
         private final boolean isSyntheticSourceEnabled;
 
+        private final IndexMode indexMode;
+
         private final Parameter<Boolean> index = Parameter.indexParam(m -> ((TextFieldMapper) m).index, true);
 
         final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> ((TextFieldMapper) m).similarity);
 
         final Parameter<String> indexOptions = TextParams.textIndexOptions(m -> ((TextFieldMapper) m).indexOptions);
-        final Parameter<Boolean> norms = TextParams.norms(true, m -> ((TextFieldMapper) m).norms);
         final Parameter<String> termVectors = TextParams.termVectors(m -> ((TextFieldMapper) m).termVectors);
 
         final Parameter<Boolean> fieldData = Parameter.boolParam("fielddata", true, m -> ((TextFieldMapper) m).fieldData, false);
@@ -290,18 +293,30 @@ public final class TextFieldMapper extends FieldMapper {
         private final boolean withinMultiField;
 
         public Builder(String name, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
-            this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled, false);
+            this(name, IndexVersion.current(), null, indexAnalyzers, isSyntheticSourceEnabled, false);
         }
 
         public Builder(
             String name,
             IndexVersion indexCreatedVersion,
+            IndexMode indexMode,
             IndexAnalyzers indexAnalyzers,
             boolean isSyntheticSourceEnabled,
             boolean withinMultiField
         ) {
             super(name);
 
+            this.indexCreatedVersion = indexCreatedVersion;
+            this.indexMode = indexMode;
+            this.isSyntheticSourceEnabled = isSyntheticSourceEnabled;
+            this.withinMultiField = withinMultiField;
+
+            // don't enable norms by default if the index is LOGSDB or TSDB based
+            this.norms = Parameter.normsParam(
+                m -> ((TextFieldMapper) m).norms,
+                () -> indexMode != IndexMode.LOGSDB && indexMode != IndexMode.TIME_SERIES
+            );
+
             // If synthetic source is used we need to either store this field
             // to recreate the source or use keyword multi-fields for that.
             // So if there are no suitable multi-fields we will default to
@@ -309,7 +324,6 @@ public final class TextFieldMapper extends FieldMapper {
             //
             // If 'store' parameter was explicitly provided we'll reject the request.
             // Note that if current builder is a multi field, then we don't need to store, given that responsibility lies with parent field
-            this.withinMultiField = withinMultiField;
             this.store = Parameter.storeParam(m -> ((TextFieldMapper) m).store, () -> {
                 if (multiFieldsNotStoredByDefaultIndexVersionCheck(indexCreatedVersion)) {
                     return isSyntheticSourceEnabled
@@ -319,14 +333,12 @@ public final class TextFieldMapper extends FieldMapper {
                     return isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
                 }
             });
-            this.indexCreatedVersion = indexCreatedVersion;
             this.analyzers = new TextParams.Analyzers(
                 indexAnalyzers,
                 m -> ((TextFieldMapper) m).indexAnalyzer,
                 m -> (((TextFieldMapper) m).positionIncrementGap),
                 indexCreatedVersion
             );
-            this.isSyntheticSourceEnabled = isSyntheticSourceEnabled;
         }
 
         public static boolean multiFieldsNotStoredByDefaultIndexVersionCheck(IndexVersion indexCreatedVersion) {
@@ -508,6 +520,7 @@ public final class TextFieldMapper extends FieldMapper {
         (n, c) -> new Builder(
             n,
             c.indexVersionCreated(),
+            c.getIndexSettings().getMode(),
             c.getIndexAnalyzers(),
             SourceFieldMapper.isSynthetic(c.getIndexSettings()),
             c.isWithinMultiField()
@@ -1323,6 +1336,7 @@ public final class TextFieldMapper extends FieldMapper {
     }
 
     private final IndexVersion indexCreatedVersion;
+    private final IndexMode indexMode;
     private final boolean index;
     private final boolean store;
     private final String indexOptions;
@@ -1361,6 +1375,7 @@ public final class TextFieldMapper extends FieldMapper {
         this.prefixFieldInfo = prefixFieldInfo;
         this.phraseFieldInfo = phraseFieldInfo;
         this.indexCreatedVersion = builder.indexCreatedVersion;
+        this.indexMode = builder.indexMode;
         this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
         this.indexAnalyzers = builder.analyzers.indexAnalyzers;
         this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
@@ -1398,7 +1413,9 @@ public final class TextFieldMapper extends FieldMapper {
 
     @Override
     public FieldMapper.Builder getMergeBuilder() {
-        return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(this);
+        return new Builder(leafName(), indexCreatedVersion, indexMode, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(
+            this
+        );
     }
 
     @Override

+ 0 - 5
server/src/main/java/org/elasticsearch/index/mapper/TextParams.java

@@ -123,11 +123,6 @@ public final class TextParams {
         }
     }
 
-    public static Parameter<Boolean> norms(boolean defaultValue, Function<FieldMapper, Boolean> initializer) {
-        // norms can be updated from 'true' to 'false' but not vv
-        return Parameter.boolParam("norms", true, initializer, defaultValue).setMergeValidator((o, n, c) -> o == n || (o && n == false));
-    }
-
     public static Parameter<SimilarityProvider> similarity(Function<FieldMapper, SimilarityProvider> init) {
         return new Parameter<>(
             "similarity",

+ 142 - 0
server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java

@@ -48,6 +48,7 @@ import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
+import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.IndexMode;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.IndexVersion;
@@ -79,6 +80,8 @@ import org.elasticsearch.xcontent.XContentFactory;
 import org.junit.AssumptionViolatedException;
 
 import java.io.IOException;
+import java.time.Instant;
+import java.time.temporal.ChronoUnit;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
@@ -1432,4 +1435,143 @@ public class TextFieldMapperTests extends MapperTestCase {
             assertFalse(dv.advanceExact(3));
         });
     }
+
+    public void testNormalizeByDefault() throws IOException {
+        // given
+        Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
+        indexSettingsBuilder.put(IndexSettings.MODE.getKey(), IndexMode.STANDARD.getName());
+        Settings indexSettings = indexSettingsBuilder.build();
+
+        XContentBuilder mapping = mapping(b -> {
+            b.startObject("potato");
+            b.field("type", "text");
+            b.endObject();
+        });
+
+        var source = source(b -> b.field("potato", "a potato flew around my room"));
+
+        // when
+        DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
+        ParsedDocument doc = mapper.parse(source);
+
+        List<IndexableField> fields = doc.rootDoc().getFields("potato");
+        IndexableFieldType fieldType = fields.get(0).fieldType();
+
+        // then
+        assertThat(fieldType.omitNorms(), is(false));
+    }
+
+    public void testNormalizeWhenIndexModeIsNotGiven() throws IOException {
+        // given
+        Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
+        Settings indexSettings = indexSettingsBuilder.build();
+
+        XContentBuilder mapping = mapping(b -> {
+            b.startObject("potato");
+            b.field("type", "text");
+            b.endObject();
+        });
+
+        var source = source(b -> b.field("potato", "a potato flew around my room"));
+
+        // when
+        DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
+        ParsedDocument doc = mapper.parse(source);
+
+        List<IndexableField> fields = doc.rootDoc().getFields("potato");
+        IndexableFieldType fieldType = fields.get(0).fieldType();
+
+        // then
+        assertThat(fieldType.omitNorms(), is(false));
+    }
+
+    public void testNormalizeWhenIndexModeIsNull() throws IOException {
+        // given
+        Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
+        indexSettingsBuilder.put(IndexSettings.MODE.getKey(), (String) null);
+        Settings indexSettings = indexSettingsBuilder.build();
+
+        XContentBuilder mapping = mapping(b -> {
+            b.startObject("potato");
+            b.field("type", "text");
+            b.endObject();
+        });
+
+        var source = source(b -> b.field("potato", "a potato flew around my room"));
+
+        // when
+        DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
+        ParsedDocument doc = mapper.parse(source);
+
+        List<IndexableField> fields = doc.rootDoc().getFields("potato");
+        IndexableFieldType fieldType = fields.get(0).fieldType();
+
+        // then
+        assertThat(fieldType.omitNorms(), is(false));
+    }
+
+    public void testDontNormalizeWhenIndexModeIsLogsDB() throws IOException {
+        // given
+        Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
+        indexSettingsBuilder.put(IndexSettings.MODE.getKey(), IndexMode.LOGSDB.getName());
+        Settings indexSettings = indexSettingsBuilder.build();
+
+        XContentBuilder mapping = mapping(b -> {
+            b.startObject("potato");
+            b.field("type", "text");
+            b.endObject();
+        });
+
+        var source = source(b -> {
+            b.field("@timestamp", Instant.now());
+            b.field("potato", "a potato flew around my room");
+        });
+
+        // when
+        DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
+        ParsedDocument doc = mapper.parse(source);
+
+        List<IndexableField> fields = doc.rootDoc().getFields("potato");
+        IndexableFieldType fieldType = fields.get(0).fieldType();
+
+        // then
+        assertThat(fieldType.omitNorms(), is(true));
+    }
+
+    public void testDontNormalizeWhenIndexModeIsTSDB() throws IOException {
+        // given
+        Instant currentTime = Instant.now();
+        Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
+        indexSettingsBuilder.put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES.getName())
+            .put(IndexSettings.TIME_SERIES_START_TIME.getKey(), currentTime.minus(1, ChronoUnit.HOURS).toEpochMilli())
+            .put(IndexSettings.TIME_SERIES_END_TIME.getKey(), currentTime.plus(1, ChronoUnit.HOURS).toEpochMilli())
+            .put(IndexMetadata.INDEX_ROUTING_PATH.getKey(), "dimension");
+        Settings indexSettings = indexSettingsBuilder.build();
+
+        XContentBuilder mapping = mapping(b -> {
+            b.startObject("potato");
+            b.field("type", "text");
+            b.endObject();
+
+            b.startObject("@timestamp");
+            b.field("type", "date");
+            b.endObject();
+        });
+
+        var source = source(TimeSeriesRoutingHashFieldMapper.DUMMY_ENCODED_VALUE, b -> {
+            b.field("@timestamp", Instant.now());
+            b.field("potato", "a potato flew around my room");
+        }, null);
+
+        // when
+        DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
+        ParsedDocument doc = mapper.parse(source);
+
+        List<IndexableField> fields = doc.rootDoc().getFields("potato");
+        IndexableFieldType fieldType = fields.get(0).fieldType();
+
+        // then
+        assertThat(fieldType.omitNorms(), is(true));
+    }
+
 }