Explorar o código

Add Lucene segment-level fields stats (#111123)

This change returns the total number of fields at the segment level, 
allowing for a more accurate estimate of the memory used by Lucene. The
new estimate is expected to be closer to the actual memory usage than
the current estimate using the index-level field count, due to the
non-trivial overhead incurred by each Lucene segment. Two new fields are
introduced: total_segment_fields, which is the total number of fields at
the segment level, and average_fields_per_segment. The overhead per
field in segments with fewer fields is larger than in segments with many
fields.
Nhat Nguyen hai 1 ano
pai
achega
f275dff609

+ 5 - 0
docs/changelog/111123.yaml

@@ -0,0 +1,5 @@
+pr: 111123
+summary: Add Lucene segment-level fields stats
+area: Mapping
+type: enhancement
+issues: []

+ 8 - 0
docs/reference/cluster/nodes-stats.asciidoc

@@ -808,6 +808,14 @@ This is not shown for the `shards` level, since mappings may be shared across th
 `total_estimated_overhead_in_bytes`::
 (integer) Estimated heap overhead, in bytes, of mappings on this node, which allows for 1kiB of heap for every mapped field.
 
+`total_segments`::
+(integer) Estimated number of Lucene segments on this node
+
+`total_segment_fields`::
+(integer) Estimated number of fields at the segment level on this node
+
+`average_fields_per_segment`::
+(integer) Estimated average number of fields per segment on this node
 =======
 
 `dense_vector`::

+ 88 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/11_indices_metrics.yml

@@ -509,6 +509,94 @@
   - match: { nodes.$node_id.indices.indices.index1.mappings.total_estimated_overhead_in_bytes: 28672 }
 
 ---
+"Lucene segment level fields stats":
+
+  - requires:
+      cluster_features: ["mapper.segment_level_fields_stats"]
+      reason: "segment level fields stats"
+
+  - do:
+      indices.create:
+        index: index1
+        body:
+          settings:
+            number_of_shards: 1
+            number_of_replicas: 0
+            routing.rebalance.enable: none
+          mappings:
+            runtime:
+              a_source_field:
+                type: keyword
+            properties:
+              "@timestamp":
+                type: date
+              authors:
+                properties:
+                  age:
+                    type: long
+                  company:
+                    type: text
+                    fields:
+                      keyword:
+                        type: keyword
+                        ignore_above: 256
+                  name:
+                    properties:
+                      first_name:
+                        type: keyword
+                      full_name:
+                        type: text
+                      last_name:
+                        type: keyword
+              link:
+                type: alias
+                path: url
+              title:
+                type: text
+              url:
+                type: keyword
+  - do:
+      search_shards:
+        index:  index1
+  - set:
+      shards.0.0.node: node_id
+
+  - do:
+      nodes.stats: { metric: _all, level: "indices", human: true }
+
+  - do:
+      index:
+        index:  index1
+        body:   { "title": "foo", "@timestamp": "2023-10-15T14:12:12" }
+  - do:
+      indices.flush:
+        index: index1
+  - do:
+      nodes.stats: { metric: _all, level: "indices", human: true }
+
+  - match: { nodes.$node_id.indices.mappings.total_count: 28 }
+  - match: { nodes.$node_id.indices.mappings.total_estimated_overhead_in_bytes: 28672 }
+  - match: { nodes.$node_id.indices.mappings.total_segments: 1 }
+  - match: { nodes.$node_id.indices.mappings.total_segment_fields: 28 }
+  - match: { nodes.$node_id.indices.mappings.average_fields_per_segment: 28 }
+
+  - do:
+      index:
+        index:  index1
+        body:   { "title": "bar", "@timestamp": "2023-11-15T14:12:12" }
+  - do:
+      indices.flush:
+        index: index1
+  - do:
+      nodes.stats: { metric: _all, level: "indices", human: true }
+
+  - match: { nodes.$node_id.indices.mappings.total_count: 28 }
+  - match: { nodes.$node_id.indices.mappings.total_estimated_overhead_in_bytes: 28672 }
+  - match: { nodes.$node_id.indices.mappings.total_segments: 2 }
+  - match: { nodes.$node_id.indices.mappings.total_segment_fields: 56 }
+  - match: { nodes.$node_id.indices.mappings.average_fields_per_segment: 28 }
+---
+
 "indices mappings does not exist in shards level":
 
   - requires:

+ 1 - 0
server/src/main/java/org/elasticsearch/TransportVersions.java

@@ -218,6 +218,7 @@ public class TransportVersions {
     public static final TransportVersion ENTERPRISE_GEOIP_DOWNLOADER = def(8_708_00_0);
     public static final TransportVersion NODES_STATS_ENUM_SET = def(8_709_00_0);
     public static final TransportVersion MASTER_NODE_METRICS = def(8_710_00_0);
+    public static final TransportVersion SEGMENT_LEVEL_FIELDS_STATS = def(8_711_00_0);
 
     /*
      * STOP! READ THIS FIRST! No, really,

+ 12 - 4
server/src/main/java/org/elasticsearch/index/IndexService.java

@@ -332,10 +332,18 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust
         if (mapperService == null) {
             return null;
         }
-        long totalCount = mapperService().mappingLookup().getTotalMapperCount();
-        long totalEstimatedOverhead = totalCount * 1024L; // 1KiB estimated per mapping
-        NodeMappingStats indexNodeMappingStats = new NodeMappingStats(totalCount, totalEstimatedOverhead);
-        return indexNodeMappingStats;
+        long numFields = mapperService().mappingLookup().getTotalMapperCount();
+        long totalEstimatedOverhead = numFields * 1024L; // 1KiB estimated per mapping
+        // Assume all segments have the same mapping; otherwise, we need to acquire searchers to count the actual fields.
+        int numLeaves = 0;
+        for (IndexShard shard : shards.values()) {
+            try {
+                numLeaves += shard.commitStats().getNumLeaves();
+            } catch (AlreadyClosedException ignored) {
+
+            }
+        }
+        return new NodeMappingStats(numFields, totalEstimatedOverhead, numLeaves, numLeaves * numFields);
     }
 
     public Set<Integer> shardIds() {

+ 17 - 2
server/src/main/java/org/elasticsearch/index/engine/CommitStats.java

@@ -8,6 +8,7 @@
 package org.elasticsearch.index.engine;
 
 import org.apache.lucene.index.SegmentInfos;
+import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
@@ -27,6 +28,7 @@ public final class CommitStats implements Writeable, ToXContentFragment {
     private final long generation;
     private final String id; // lucene commit id in base 64;
     private final int numDocs;
+    private final int numLeaves;
 
     public CommitStats(SegmentInfos segmentInfos) {
         // clone the map to protect against concurrent changes
@@ -35,6 +37,7 @@ public final class CommitStats implements Writeable, ToXContentFragment {
         generation = segmentInfos.getLastGeneration();
         id = Base64.getEncoder().encodeToString(segmentInfos.getId());
         numDocs = Lucene.getNumDocs(segmentInfos);
+        numLeaves = segmentInfos.size();
     }
 
     CommitStats(StreamInput in) throws IOException {
@@ -42,6 +45,7 @@ public final class CommitStats implements Writeable, ToXContentFragment {
         generation = in.readLong();
         id = in.readOptionalString();
         numDocs = in.readInt();
+        numLeaves = in.getTransportVersion().onOrAfter(TransportVersions.SEGMENT_LEVEL_FIELDS_STATS) ? in.readVInt() : 0;
     }
 
     @Override
@@ -49,12 +53,16 @@ public final class CommitStats implements Writeable, ToXContentFragment {
         if (this == o) return true;
         if (o == null || getClass() != o.getClass()) return false;
         CommitStats that = (CommitStats) o;
-        return userData.equals(that.userData) && generation == that.generation && Objects.equals(id, that.id) && numDocs == that.numDocs;
+        return userData.equals(that.userData)
+            && generation == that.generation
+            && Objects.equals(id, that.id)
+            && numDocs == that.numDocs
+            && numLeaves == that.numLeaves;
     }
 
     @Override
     public int hashCode() {
-        return Objects.hash(userData, generation, id, numDocs);
+        return Objects.hash(userData, generation, id, numDocs, numLeaves);
     }
 
     public static CommitStats readOptionalCommitStatsFrom(StreamInput in) throws IOException {
@@ -81,12 +89,19 @@ public final class CommitStats implements Writeable, ToXContentFragment {
         return numDocs;
     }
 
+    public int getNumLeaves() {
+        return numLeaves;
+    }
+
     @Override
     public void writeTo(StreamOutput out) throws IOException {
         out.writeMap(userData, StreamOutput::writeString);
         out.writeLong(generation);
         out.writeOptionalString(id);
         out.writeInt(numDocs);
+        if (out.getTransportVersion().onOrAfter(TransportVersions.SEGMENT_LEVEL_FIELDS_STATS)) {
+            out.writeVInt(numLeaves);
+        }
     }
 
     static final class Fields {

+ 2 - 1
server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

@@ -29,7 +29,8 @@ public class MapperFeatures implements FeatureSpecification {
             DenseVectorFieldMapper.BIT_VECTORS,
             DocumentMapper.INDEX_SORTING_ON_NESTED,
             KeywordFieldMapper.KEYWORD_DIMENSION_IGNORE_ABOVE,
-            IndexModeFieldMapper.QUERYING_INDEX_MODE
+            IndexModeFieldMapper.QUERYING_INDEX_MODE,
+            NodeMappingStats.SEGMENT_LEVEL_FIELDS_STATS
         );
     }
 }

+ 38 - 3
server/src/main/java/org/elasticsearch/index/mapper/NodeMappingStats.java

@@ -8,11 +8,13 @@
 
 package org.elasticsearch.index.mapper;
 
+import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.Nullable;
+import org.elasticsearch.features.NodeFeature;
 import org.elasticsearch.xcontent.ToXContentFragment;
 import org.elasticsearch.xcontent.XContentBuilder;
 
@@ -25,15 +27,22 @@ import java.util.Objects;
  */
 public class NodeMappingStats implements Writeable, ToXContentFragment {
 
+    public static final NodeFeature SEGMENT_LEVEL_FIELDS_STATS = new NodeFeature("mapper.segment_level_fields_stats");
+
     private static final class Fields {
         static final String MAPPINGS = "mappings";
         static final String TOTAL_COUNT = "total_count";
         static final String TOTAL_ESTIMATED_OVERHEAD = "total_estimated_overhead";
         static final String TOTAL_ESTIMATED_OVERHEAD_IN_BYTES = "total_estimated_overhead_in_bytes";
+        static final String TOTAL_SEGMENTS = "total_segments";
+        static final String TOTAL_SEGMENT_FIELDS = "total_segment_fields";
+        static final String AVERAGE_FIELDS_PER_SEGMENT = "average_fields_per_segment";
     }
 
     private long totalCount;
     private long totalEstimatedOverhead;
+    private long totalSegments;
+    private long totalSegmentFields;
 
     public NodeMappingStats() {
 
@@ -42,17 +51,25 @@ public class NodeMappingStats implements Writeable, ToXContentFragment {
     public NodeMappingStats(StreamInput in) throws IOException {
         totalCount = in.readVLong();
         totalEstimatedOverhead = in.readVLong();
+        if (in.getTransportVersion().onOrAfter(TransportVersions.SEGMENT_LEVEL_FIELDS_STATS)) {
+            totalSegments = in.readVLong();
+            totalSegmentFields = in.readVLong();
+        }
     }
 
-    public NodeMappingStats(long totalCount, long totalEstimatedOverhead) {
+    public NodeMappingStats(long totalCount, long totalEstimatedOverhead, long totalSegments, long totalSegmentFields) {
         this.totalCount = totalCount;
         this.totalEstimatedOverhead = totalEstimatedOverhead;
+        this.totalSegments = totalSegments;
+        this.totalSegmentFields = totalSegmentFields;
     }
 
     public void add(@Nullable NodeMappingStats other) {
         if (other == null) return;
         this.totalCount += other.totalCount;
         this.totalEstimatedOverhead += other.totalEstimatedOverhead;
+        this.totalSegments += other.totalSegments;
+        this.totalSegmentFields += other.totalSegmentFields;
     }
 
     public long getTotalCount() {
@@ -63,10 +80,22 @@ public class NodeMappingStats implements Writeable, ToXContentFragment {
         return ByteSizeValue.ofBytes(totalEstimatedOverhead);
     }
 
+    public long getTotalSegments() {
+        return totalSegments;
+    }
+
+    public long getTotalSegmentFields() {
+        return totalSegmentFields;
+    }
+
     @Override
     public void writeTo(StreamOutput out) throws IOException {
         out.writeVLong(totalCount);
         out.writeVLong(totalEstimatedOverhead);
+        if (out.getTransportVersion().onOrAfter(TransportVersions.SEGMENT_LEVEL_FIELDS_STATS)) {
+            out.writeVLong(totalSegments);
+            out.writeVLong(totalSegmentFields);
+        }
     }
 
     @Override
@@ -74,6 +103,9 @@ public class NodeMappingStats implements Writeable, ToXContentFragment {
         builder.startObject(Fields.MAPPINGS);
         builder.field(Fields.TOTAL_COUNT, getTotalCount());
         builder.humanReadableField(Fields.TOTAL_ESTIMATED_OVERHEAD_IN_BYTES, Fields.TOTAL_ESTIMATED_OVERHEAD, getTotalEstimatedOverhead());
+        builder.field(Fields.TOTAL_SEGMENTS, totalSegments);
+        builder.field(Fields.TOTAL_SEGMENT_FIELDS, totalSegmentFields);
+        builder.field(Fields.AVERAGE_FIELDS_PER_SEGMENT, totalSegments == 0 ? 0 : totalSegmentFields / totalSegments);
         builder.endObject();
         return builder;
     }
@@ -83,11 +115,14 @@ public class NodeMappingStats implements Writeable, ToXContentFragment {
         if (this == o) return true;
         if (o == null || getClass() != o.getClass()) return false;
         NodeMappingStats that = (NodeMappingStats) o;
-        return totalCount == that.totalCount && totalEstimatedOverhead == that.totalEstimatedOverhead;
+        return totalCount == that.totalCount
+            && totalEstimatedOverhead == that.totalEstimatedOverhead
+            && totalSegments == that.totalSegments
+            && totalSegmentFields == that.totalSegmentFields;
     }
 
     @Override
     public int hashCode() {
-        return Objects.hash(totalCount, totalEstimatedOverhead);
+        return Objects.hash(totalCount, totalEstimatedOverhead, totalSegments, totalSegmentFields);
     }
 }

+ 6 - 1
server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java

@@ -561,7 +561,12 @@ public class NodeStatsTests extends ESTestCase {
 
     private static CommonStats createIndexLevelCommonStats() {
         CommonStats stats = new CommonStats(new CommonStatsFlags().clear().set(CommonStatsFlags.Flag.Mappings, true));
-        stats.nodeMappings = new NodeMappingStats(randomNonNegativeLong(), randomNonNegativeLong());
+        stats.nodeMappings = new NodeMappingStats(
+            randomNonNegativeLong(),
+            randomNonNegativeLong(),
+            randomNonNegativeLong(),
+            randomNonNegativeLong()
+        );
         return stats;
     }
 

+ 43 - 33
server/src/test/java/org/elasticsearch/index/mapper/NodeMappingStatsTests.java

@@ -7,52 +7,62 @@
  */
 package org.elasticsearch.index.mapper;
 
-import org.elasticsearch.common.io.stream.BytesStreamOutput;
-import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.test.AbstractWireSerializingTestCase;
 import org.elasticsearch.test.ESTestCase;
 
 import java.io.IOException;
 
-public class NodeMappingStatsTests extends ESTestCase {
-
-    public void testSerialize() throws IOException {
-        NodeMappingStats stats = randomNodeMappingStats();
-        BytesStreamOutput out = new BytesStreamOutput();
-        stats.writeTo(out);
-        StreamInput input = out.bytes().streamInput();
-        NodeMappingStats read = new NodeMappingStats(input);
-        assertEquals(-1, input.read());
-        assertEquals(stats.getTotalCount(), read.getTotalCount());
-        assertEquals(stats.getTotalEstimatedOverhead(), read.getTotalEstimatedOverhead());
-    }
+public class NodeMappingStatsTests extends AbstractWireSerializingTestCase<NodeMappingStats> {
 
-    public void testEqualityAndHashCode() {
-        NodeMappingStats stats = randomNodeMappingStats();
-        assertEquals(stats, stats);
-        assertEquals(stats.hashCode(), stats.hashCode());
+    @Override
+    protected Writeable.Reader<NodeMappingStats> instanceReader() {
+        return NodeMappingStats::new;
+    }
 
-        NodeMappingStats stats1 = new NodeMappingStats(1L, 2L);
-        NodeMappingStats stats2 = new NodeMappingStats(3L, 5L);
-        NodeMappingStats stats3 = new NodeMappingStats(3L, 5L);
+    @Override
+    protected NodeMappingStats createTestInstance() {
+        return new NodeMappingStats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong());
+    }
 
-        assertNotEquals(stats1, stats2);
-        assertNotEquals(stats1, stats3);
-        assertEquals(stats2, stats3);
+    @Override
+    protected NodeMappingStats mutateInstance(NodeMappingStats in) throws IOException {
+        return switch (between(0, 3)) {
+            case 0 -> new NodeMappingStats(
+                randomValueOtherThan(in.getTotalCount(), ESTestCase::randomNonNegativeLong),
+                in.getTotalEstimatedOverhead().getBytes(),
+                in.getTotalSegments(),
+                in.getTotalSegmentFields()
+            );
+            case 1 -> new NodeMappingStats(
+                in.getTotalCount(),
+                randomValueOtherThan(in.getTotalCount(), ESTestCase::randomNonNegativeLong),
+                in.getTotalSegments(),
+                in.getTotalSegmentFields()
+            );
+            case 2 -> new NodeMappingStats(
+                in.getTotalCount(),
+                in.getTotalEstimatedOverhead().getBytes(),
+                randomValueOtherThan(in.getTotalSegments(), ESTestCase::randomNonNegativeLong),
+                in.getTotalSegmentFields()
+            );
+            case 3 -> new NodeMappingStats(
+                in.getTotalCount(),
+                in.getTotalEstimatedOverhead().getBytes(),
+                in.getTotalSegments(),
+                randomValueOtherThan(in.getTotalSegmentFields(), ESTestCase::randomNonNegativeLong)
+            );
+            default -> throw new AssertionError("invalid option");
+        };
     }
 
     public void testAdd() {
-        NodeMappingStats stats1 = new NodeMappingStats(1L, 2L);
-        NodeMappingStats stats2 = new NodeMappingStats(2L, 3L);
-        NodeMappingStats stats3 = new NodeMappingStats(3L, 5L);
+        NodeMappingStats stats1 = new NodeMappingStats(1L, 2L, 4L, 6L);
+        NodeMappingStats stats2 = new NodeMappingStats(2L, 3L, 10L, 20L);
+        NodeMappingStats stats3 = new NodeMappingStats(3L, 5L, 14L, 26L);
 
         stats1.add(stats2);
         assertEquals(stats1, stats3);
         assertEquals(stats1.hashCode(), stats3.hashCode());
     }
-
-    private static NodeMappingStats randomNodeMappingStats() {
-        long totalCount = randomIntBetween(1, 100);
-        long estimatedOverhead = totalCount * 1024;
-        return new NodeMappingStats(totalCount, estimatedOverhead);
-    }
 }