Procházet zdrojové kódy

Make field limit more predictable (#102885)

Today, we're counting all mappers, including mappers for subfields that
aren't explicitly added to the mapping towards the field limit.

This means that some field types, such as `search_as_you_type` or
`percolator` count as more than one field even though that's not
apparent to users as they're just defining them as a single field in the
mapping.

This change makes it so that each field mapper only counts as one. We're
still counting multi-fields.

This makes it easier to understand for users why the field limit is hit.

~In addition to that, it also simplifies
https://github.com/elastic/elasticsearch/pull/96235 as it makes the
implementation of `Mapper.Builder#getTotalFieldsCount` much easier and
easier to align with `Mapper#getTotalFieldsCount`. This reduces the risk
of over- or under-estimating the field count of a `Mapper.Builder` in
`DocumentParserContext#addDynamicMapper`, which in turn reduces the risk
of data loss due to the issue described here:
https://github.com/elastic/elasticsearch/pull/96235#discussion_r1402495749.~

*Edit: due to https://github.com/elastic/elasticsearch/pull/103865, we
don't need an implementation of `getTotalFieldsCount` or `mapperSize` in
`Mapper.Builder`. Still, this PR more closely aligns
`Mapper#getTotalFieldsCount` with `MappingLookup#getTotalFieldsCount`,
which  `DocumentParserContext#addDynamicMapper` uses to determine
whether the field limit is hit*

A potential risk of this is that we're now effectively allowing more
fields in the mapping. It may be surprising to users that more fields
can be added to a mapping. Although, I'd not expect negative
consequences from that. Generally, I'd  expect users to be happy about
any change that reduces the risk of data loss.

We could also think about whether to apply the new counting logic only
to new indices (depending on the `IndexVersion`). However, that would
add more complexity and I'm not convinced about the value. We'd then
need to maintain two different ways of counting fields and also require
passing in the `IndexVersion` to `MappingLookup` which previously didn't
require the `IndexVersion`.

This PR is meant as a conversation starter. It would also simplify
https://github.com/elastic/elasticsearch/pull/96235 but I don't think
this blocks that PR in any way.

I'm curious about the opinion of @javanna and @jpountz on this.
Felix Barnsteiner před 1 rokem
rodič
revize
ff0f83f59d

+ 5 - 0
docs/changelog/102885.yaml

@@ -0,0 +1,5 @@
+pr: 102885
+summary: Make field limit more predictable
+area: Mapping
+type: enhancement
+issues: []

+ 6 - 0
modules/legacy-geo/src/test/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapperTests.java

@@ -76,6 +76,12 @@ public class LegacyGeoShapeFieldMapperTests extends MapperTestCase {
         return false;
     }
 
+    @Override
+    public void testTotalFieldsCount() throws IOException {
+        super.testTotalFieldsCount();
+        assertWarnings("Parameter [strategy] is deprecated and will be removed in a future version");
+    }
+
     @Override
     protected void registerParameters(ParameterChecker checker) throws IOException {
 

+ 1 - 1
server/src/main/java/org/elasticsearch/index/IndexService.java

@@ -330,7 +330,7 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust
         if (mapperService == null) {
             return null;
         }
-        long totalCount = mapperService().mappingLookup().getTotalFieldsCount();
+        long totalCount = mapperService().mappingLookup().getTotalMapperCount();
         long totalEstimatedOverhead = totalCount * 1024L; // 1KiB estimated per mapping
         NodeMappingStats indexNodeMappingStats = new NodeMappingStats(totalCount, totalEstimatedOverhead);
         return indexNodeMappingStats;

+ 1 - 1
server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java

@@ -335,7 +335,7 @@ public abstract class DocumentParserContext {
         if (mappingLookup.getMapper(mapper.name()) == null
             && mappingLookup.objectMappers().containsKey(mapper.name()) == false
             && dynamicMappers.containsKey(mapper.name()) == false) {
-            int mapperSize = mapper.mapperSize();
+            int mapperSize = mapper.getTotalFieldsCount();
             int additionalFieldsToAdd = getNewFieldsSize() + mapperSize;
             if (indexSettings().isIgnoreDynamicFieldsBeyondLimit()) {
                 if (mappingLookup.exceedsLimit(indexSettings().getMappingTotalFieldsLimit(), additionalFieldsToAdd)) {

+ 5 - 0
server/src/main/java/org/elasticsearch/index/mapper/FieldAliasMapper.java

@@ -113,6 +113,11 @@ public final class FieldAliasMapper extends Mapper {
         }
     }
 
+    @Override
+    public int getTotalFieldsCount() {
+        return 1;
+    }
+
     public static class TypeParser implements Mapper.TypeParser {
         @Override
         public Mapper.Builder parse(String name, Map<String, Object> node, MappingParserContext parserContext)

+ 7 - 1
server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

@@ -51,6 +51,7 @@ import java.util.function.BiFunction;
 import java.util.function.Consumer;
 import java.util.function.Function;
 import java.util.function.Supplier;
+import java.util.stream.Stream;
 
 import static org.elasticsearch.core.Strings.format;
 
@@ -428,6 +429,11 @@ public abstract class FieldMapper extends Mapper {
 
     protected abstract String contentType();
 
+    @Override
+    public int getTotalFieldsCount() {
+        return 1 + Stream.of(multiFields.mappers).mapToInt(FieldMapper::getTotalFieldsCount).sum();
+    }
+
     public Map<String, NamedAnalyzer> indexAnalyzers() {
         return Map.of();
     }
@@ -455,7 +461,7 @@ public abstract class FieldMapper extends Mapper {
 
             private void update(FieldMapper toMerge, MapperMergeContext context) {
                 if (mapperBuilders.containsKey(toMerge.simpleName()) == false) {
-                    if (context.decrementFieldBudgetIfPossible(toMerge.mapperSize())) {
+                    if (context.decrementFieldBudgetIfPossible(toMerge.getTotalFieldsCount())) {
                         add(toMerge);
                     }
                 } else {

+ 3 - 11
server/src/main/java/org/elasticsearch/index/mapper/Mapper.java

@@ -137,16 +137,8 @@ public abstract class Mapper implements ToXContentFragment, Iterable<Mapper> {
     }
 
     /**
-     * Returns the size this mapper counts against the {@linkplain MapperService#INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING field limit}.
-     * <p>
-     * Needs to be in sync with {@link MappingLookup#getTotalFieldsCount()}.
+     * The total number of fields as defined in the mapping.
+     * Defines how this mapper counts towards {@link MapperService#INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING}.
      */
-    public int mapperSize() {
-        int size = 1;
-        for (Mapper mapper : this) {
-            size += mapper.mapperSize();
-        }
-        return size;
-    }
-
+    public abstract int getTotalFieldsCount();
 }

+ 11 - 1
server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java

@@ -55,6 +55,7 @@ public final class MappingLookup {
     private final List<FieldMapper> indexTimeScriptMappers;
     private final Mapping mapping;
     private final Set<String> completionFields;
+    private final int totalFieldsCount;
 
     /**
      * Creates a new {@link MappingLookup} instance by parsing the provided mapping and extracting its field definitions.
@@ -127,6 +128,7 @@ public final class MappingLookup {
         Collection<ObjectMapper> objectMappers,
         Collection<FieldAliasMapper> aliasMappers
     ) {
+        this.totalFieldsCount = mapping.getRoot().getTotalFieldsCount();
         this.mapping = mapping;
         Map<String, Mapper> fieldMappers = new HashMap<>();
         Map<String, ObjectMapper> objects = new HashMap<>();
@@ -223,6 +225,14 @@ public final class MappingLookup {
      * Returns the total number of fields defined in the mappings, including field mappers, object mappers as well as runtime fields.
      */
     public long getTotalFieldsCount() {
+        return totalFieldsCount;
+    }
+
+    /**
+     * Returns the total number of mappers defined in the mappings, including field mappers and their sub-fields
+     * (which are not explicitly defined in the mappings), multi-fields, object mappers, runtime fields and metadata field mappers.
+     */
+    public long getTotalMapperCount() {
         return fieldMappers.size() + objectMappers.size() + runtimeFieldMappersCount;
     }
 
@@ -286,7 +296,7 @@ public final class MappingLookup {
     }
 
     long remainingFieldsUntilLimit(long mappingTotalFieldsLimit) {
-        return mappingTotalFieldsLimit - getTotalFieldsCount() + mapping.getSortedMetadataMappers().length;
+        return mappingTotalFieldsLimit - totalFieldsCount;
     }
 
     private void checkDimensionFieldLimit(long limit) {

+ 7 - 2
server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java

@@ -182,6 +182,11 @@ public class ObjectMapper extends Mapper {
         }
     }
 
+    @Override
+    public int getTotalFieldsCount() {
+        return 1 + mappers.values().stream().mapToInt(Mapper::getTotalFieldsCount).sum();
+    }
+
     public static class TypeParser implements Mapper.TypeParser {
         @Override
         public boolean supportsVersion(IndexVersion indexCreatedVersion) {
@@ -547,7 +552,7 @@ public class ObjectMapper extends Mapper {
                 Mapper mergeIntoMapper = mergedMappers.get(mergeWithMapper.simpleName());
                 Mapper merged = null;
                 if (mergeIntoMapper == null) {
-                    if (objectMergeContext.decrementFieldBudgetIfPossible(mergeWithMapper.mapperSize())) {
+                    if (objectMergeContext.decrementFieldBudgetIfPossible(mergeWithMapper.getTotalFieldsCount())) {
                         merged = mergeWithMapper;
                     } else if (mergeWithMapper instanceof ObjectMapper om) {
                         merged = truncateObjectMapper(reason, objectMergeContext, om);
@@ -581,7 +586,7 @@ public class ObjectMapper extends Mapper {
             // there's not enough capacity for the whole object mapper,
             // so we're just trying to add the shallow object, without it's sub-fields
             ObjectMapper shallowObjectMapper = objectMapper.withoutMappers();
-            if (context.decrementFieldBudgetIfPossible(shallowObjectMapper.mapperSize())) {
+            if (context.decrementFieldBudgetIfPossible(shallowObjectMapper.getTotalFieldsCount())) {
                 // now trying to add the sub-fields one by one via a merge, until we hit the limit
                 return shallowObjectMapper.merge(objectMapper, reason, context);
             }

+ 2 - 6
server/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java

@@ -576,11 +576,7 @@ public class RootObjectMapper extends ObjectMapper {
     }
 
     @Override
-    public int mapperSize() {
-        int size = runtimeFields().size();
-        for (Mapper mapper : this) {
-            size += mapper.mapperSize();
-        }
-        return size;
+    public int getTotalFieldsCount() {
+        return mappers.values().stream().mapToInt(Mapper::getTotalFieldsCount).sum() + runtimeFields.size();
     }
 }

+ 1 - 1
server/src/test/java/org/elasticsearch/index/mapper/FieldAliasMapperTests.java

@@ -34,7 +34,7 @@ public class FieldAliasMapperTests extends MapperServiceTestCase {
         );
         DocumentMapper mapper = createDocumentMapper(mapping);
         assertEquals(mapping, mapper.mappingSource().toString());
-        assertEquals(2, mapper.mapping().getRoot().mapperSize());
+        assertEquals(2, mapper.mapping().getRoot().getTotalFieldsCount());
     }
 
     public void testParsingWithMissingPath() {

+ 20 - 20
server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperMergeTests.java

@@ -213,10 +213,10 @@ public final class ObjectMapperMergeTests extends ESTestCase {
         final ObjectMapper mergedAdd1 = rootObjectMapper.merge(mergeWith, MapperMergeContext.root(false, false, 1));
 
         // THEN "baz" new field is added to merged mapping
-        assertEquals(3, rootObjectMapper.mapperSize());
-        assertEquals(4, mergeWith.mapperSize());
-        assertEquals(3, mergedAdd0.mapperSize());
-        assertEquals(4, mergedAdd1.mapperSize());
+        assertEquals(3, rootObjectMapper.getTotalFieldsCount());
+        assertEquals(4, mergeWith.getTotalFieldsCount());
+        assertEquals(3, mergedAdd0.getTotalFieldsCount());
+        assertEquals(4, mergedAdd1.getTotalFieldsCount());
     }
 
     public void testMergeWithLimitTruncatedObjectField() {
@@ -231,11 +231,11 @@ public final class ObjectMapperMergeTests extends ESTestCase {
         ObjectMapper mergedAdd1 = root.merge(mergeWith, MapperMergeContext.root(false, false, 1));
         ObjectMapper mergedAdd2 = root.merge(mergeWith, MapperMergeContext.root(false, false, 2));
         ObjectMapper mergedAdd3 = root.merge(mergeWith, MapperMergeContext.root(false, false, 3));
-        assertEquals(0, root.mapperSize());
-        assertEquals(0, mergedAdd0.mapperSize());
-        assertEquals(1, mergedAdd1.mapperSize());
-        assertEquals(2, mergedAdd2.mapperSize());
-        assertEquals(3, mergedAdd3.mapperSize());
+        assertEquals(0, root.getTotalFieldsCount());
+        assertEquals(0, mergedAdd0.getTotalFieldsCount());
+        assertEquals(1, mergedAdd1.getTotalFieldsCount());
+        assertEquals(2, mergedAdd2.getTotalFieldsCount());
+        assertEquals(3, mergedAdd3.getTotalFieldsCount());
 
         ObjectMapper parent1 = (ObjectMapper) mergedAdd1.getMapper("parent");
         assertNull(parent1.getMapper("child1"));
@@ -262,9 +262,9 @@ public final class ObjectMapperMergeTests extends ESTestCase {
 
         ObjectMapper mergedAdd0 = root.merge(mergeWith, MapperMergeContext.root(false, false, 0));
         ObjectMapper mergedAdd1 = root.merge(mergeWith, MapperMergeContext.root(false, false, 1));
-        assertEquals(2, root.mapperSize());
-        assertEquals(2, mergedAdd0.mapperSize());
-        assertEquals(3, mergedAdd1.mapperSize());
+        assertEquals(2, root.getTotalFieldsCount());
+        assertEquals(2, mergedAdd0.getTotalFieldsCount());
+        assertEquals(3, mergedAdd1.getTotalFieldsCount());
 
         ObjectMapper parent0 = (ObjectMapper) mergedAdd0.getMapper("parent");
         assertNotNull(parent0.getMapper("child1"));
@@ -285,13 +285,13 @@ public final class ObjectMapperMergeTests extends ESTestCase {
             createTextKeywordMultiField("text", "keyword2")
         ).build(MapperBuilderContext.root(false, false));
 
-        assertEquals(2, mergeInto.mapperSize());
-        assertEquals(2, mergeWith.mapperSize());
+        assertEquals(2, mergeInto.getTotalFieldsCount());
+        assertEquals(2, mergeWith.getTotalFieldsCount());
 
         ObjectMapper mergedAdd0 = mergeInto.merge(mergeWith, MapperMergeContext.root(false, false, 0));
         ObjectMapper mergedAdd1 = mergeInto.merge(mergeWith, MapperMergeContext.root(false, false, 1));
-        assertEquals(2, mergedAdd0.mapperSize());
-        assertEquals(3, mergedAdd1.mapperSize());
+        assertEquals(2, mergedAdd0.getTotalFieldsCount());
+        assertEquals(3, mergedAdd1.getTotalFieldsCount());
     }
 
     public void testMergeWithLimitRuntimeField() {
@@ -302,13 +302,13 @@ public final class ObjectMapperMergeTests extends ESTestCase {
             new TestRuntimeField("existing_runtime_field", "keyword")
         ).addRuntimeField(new TestRuntimeField("new_runtime_field", "keyword")).build(MapperBuilderContext.root(false, false));
 
-        assertEquals(3, mergeInto.mapperSize());
-        assertEquals(2, mergeWith.mapperSize());
+        assertEquals(3, mergeInto.getTotalFieldsCount());
+        assertEquals(2, mergeWith.getTotalFieldsCount());
 
         ObjectMapper mergedAdd0 = mergeInto.merge(mergeWith, MapperMergeContext.root(false, false, 0));
         ObjectMapper mergedAdd1 = mergeInto.merge(mergeWith, MapperMergeContext.root(false, false, 1));
-        assertEquals(3, mergedAdd0.mapperSize());
-        assertEquals(4, mergedAdd1.mapperSize());
+        assertEquals(3, mergedAdd0.getTotalFieldsCount());
+        assertEquals(4, mergedAdd1.getTotalFieldsCount());
     }
 
     private static RootObjectMapper createRootSubobjectFalseLeafWithDots() {

+ 8 - 3
server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java

@@ -530,15 +530,20 @@ public class ObjectMapperTests extends MapperServiceTestCase {
         assertThat(mapper.mapping().getRoot().syntheticFieldLoader().docValuesLoader(null, null), nullValue());
     }
 
-    public void testNestedObjectWithMultiFieldsMapperSize() throws IOException {
+    public void testNestedObjectWithMultiFieldsgetTotalFieldsCount() {
         ObjectMapper.Builder mapperBuilder = new ObjectMapper.Builder("parent_size_1", Explicit.IMPLICIT_TRUE).add(
             new ObjectMapper.Builder("child_size_2", Explicit.IMPLICIT_TRUE).add(
                 new TextFieldMapper.Builder("grand_child_size_3", createDefaultIndexAnalyzers()).addMultiField(
                     new KeywordFieldMapper.Builder("multi_field_size_4", IndexVersion.current())
-                ).addMultiField(new KeywordFieldMapper.Builder("multi_field_size_5", IndexVersion.current()))
+                )
+                    .addMultiField(
+                        new TextFieldMapper.Builder("grand_child_size_5", createDefaultIndexAnalyzers()).addMultiField(
+                            new KeywordFieldMapper.Builder("multi_field_of_multi_field_size_6", IndexVersion.current())
+                        )
+                    )
             )
         );
-        assertThat(mapperBuilder.build(MapperBuilderContext.root(false, false)).mapperSize(), equalTo(5));
+        assertThat(mapperBuilder.build(MapperBuilderContext.root(false, false)).getTotalFieldsCount(), equalTo(6));
     }
 
     public void testWithoutMappers() throws IOException {

+ 1 - 1
server/src/test/java/org/elasticsearch/index/mapper/RootObjectMapperTests.java

@@ -166,7 +166,7 @@ public class RootObjectMapperTests extends MapperServiceTestCase {
         }));
         MapperService mapperService = createMapperService(mapping);
         assertEquals(mapping, mapperService.documentMapper().mappingSource().toString());
-        assertEquals(3, mapperService.documentMapper().mapping().getRoot().mapperSize());
+        assertEquals(3, mapperService.documentMapper().mapping().getRoot().getTotalFieldsCount());
     }
 
     public void testRuntimeSectionRejectedUpdate() throws IOException {

+ 5 - 0
test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java

@@ -427,6 +427,11 @@ public abstract class MapperTestCase extends MapperServiceTestCase {
         assertParseMaximalWarnings();
     }
 
+    public void testTotalFieldsCount() throws IOException {
+        MapperService mapperService = createMapperService(fieldMapping(this::minimalMapping));
+        assertEquals(1, mapperService.documentMapper().mapping().getRoot().getTotalFieldsCount());
+    }
+
     protected final void assertParseMinimalWarnings() {
         String[] warnings = getParseMinimalWarnings();
         if (warnings.length > 0) {