Browse Source

Add more dense_vector details for cluster stats field stats (#113607) (#113884)

This adds some more counts for dense_vector field mapping stats. This
allows for seeing the number of mappings with a given element type,
similarity, or index type.

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
Benjamin Trent 1 year ago
parent
commit
f057ff72c4

+ 5 - 0
docs/changelog/113607.yaml

@@ -0,0 +1,5 @@
+pr: 113607
+summary: Add more `dense_vector` details for cluster stats field stats
+area: Search
+type: enhancement
+issues: []

+ 9 - 0
docs/reference/cluster/stats.asciidoc

@@ -432,6 +432,15 @@ To get information on segment files, use the <<cluster-nodes-stats,node stats AP
 `indexed_vector_dim_max`::
 (integer) For dense_vector field types, the maximum dimension of all indexed vector types in selected nodes.
 
+`vector_index_type_count`::
+(object) For dense_vector field types, the number of indexed vector types by index type in selected nodes.
+
+`vector_similarity_type_count`::
+(object) For dense_vector field types, the number of vector types by similarity type in selected nodes.
+
+`vector_element_type_count`::
+(object) For dense_vector field types, the number of vector types by element type in selected nodes.
+
 `script_count`::
 (integer) Number of fields that declare a script.
 

+ 32 - 2
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/70_dense_vector_telemetry.yml

@@ -1,4 +1,7 @@
 setup:
+  - requires:
+      cluster_features: [ "gte_v8.4.0" ]
+      reason: "Cluster mappings stats for indexed dense vector was added in 8.4"
   - skip:
       features: headers
 
@@ -15,11 +18,16 @@ setup:
                 dims: 768
                 index: true
                 similarity: l2_norm
+                element_type: byte
+                index_options:
+                  type: hnsw
               vector2:
                 type: dense_vector
                 dims: 1024
                 index: true
                 similarity: dot_product
+                index_options:
+                  type: int8_hnsw
               vector3:
                 type: dense_vector
                 dims: 100
@@ -41,9 +49,24 @@ setup:
 
 ---
 "Field mapping stats":
+  - do: { cluster.stats: { } }
+  - length: { indices.mappings.field_types: 1 }
+  - match: { indices.mappings.field_types.0.name: dense_vector }
+  - match: { indices.mappings.field_types.0.count: 4 }
+  - match: { indices.mappings.field_types.0.index_count: 2 }
+  - match: { indices.mappings.field_types.0.indexed_vector_count: 3 }
+  - match: { indices.mappings.field_types.0.indexed_vector_dim_min: 768 }
+  - match: { indices.mappings.field_types.0.indexed_vector_dim_max: 1024 }
+---
+"Field mapping stats with field details":
   - requires:
-      cluster_features: ["gte_v8.4.0"]
-      reason: "Cluster mappings stats for indexed dense vector was added in 8.4"
+      test_runner_features: [ capabilities ]
+      capabilities:
+        - method: GET
+          path: /_cluster/stats
+          capabilities:
+            - "verbose-dense-vector-mapping-stats"
+      reason: "Capability required to run test"
   - do: { cluster.stats: { } }
   - length: { indices.mappings.field_types: 1 }
   - match: { indices.mappings.field_types.0.name: dense_vector }
@@ -52,3 +75,10 @@ setup:
   - match: { indices.mappings.field_types.0.indexed_vector_count: 3 }
   - match: { indices.mappings.field_types.0.indexed_vector_dim_min: 768 }
   - match: { indices.mappings.field_types.0.indexed_vector_dim_max: 1024 }
+  - match: { indices.mappings.field_types.0.vector_index_type_count.hnsw: 1 }
+  - match: { indices.mappings.field_types.0.vector_index_type_count.int8_hnsw: 2 }
+  - match: { indices.mappings.field_types.0.vector_index_type_count.not_indexed: 1 }
+  - match: { indices.mappings.field_types.0.vector_similarity_type_count.l2_norm: 2 }
+  - match: { indices.mappings.field_types.0.vector_similarity_type_count.dot_product: 1 }
+  - match: { indices.mappings.field_types.0.vector_element_type_count.float: 3 }
+  - match: { indices.mappings.field_types.0.vector_element_type_count.byte: 1 }

+ 69 - 15
server/src/main/java/org/elasticsearch/action/admin/cluster/stats/DenseVectorFieldStats.java

@@ -9,11 +9,12 @@
 
 package org.elasticsearch.action.admin.cluster.stats;
 
-import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.xcontent.XContentBuilder;
 
 import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.Objects;
 
 /**
@@ -21,7 +22,10 @@ import java.util.Objects;
  */
 public final class DenseVectorFieldStats extends FieldStats {
     static final int UNSET = -1;
-
+    static final String NOT_INDEXED = "not_indexed";
+    Map<String, Integer> vectorIndexTypeCount; // count of mappings by index type
+    Map<String, Integer> vectorSimilarityTypeCount; // count of mappings by similarity
+    Map<String, Integer> vectorElementTypeCount; // count of mappings by element type
     int indexedVectorCount; // number of times vectors with index:true are used in mappings of this cluster
     int indexedVectorDimMin; // minimum dimension of indexed vectors in this cluster
     int indexedVectorDimMax; // maximum dimension of indexed vectors in this cluster
@@ -31,21 +35,14 @@ public final class DenseVectorFieldStats extends FieldStats {
         indexedVectorCount = 0;
         indexedVectorDimMin = UNSET;
         indexedVectorDimMax = UNSET;
-    }
-
-    DenseVectorFieldStats(StreamInput in) throws IOException {
-        super(in);
-        indexedVectorCount = in.readVInt();
-        indexedVectorDimMin = in.readVInt();
-        indexedVectorDimMax = in.readVInt();
+        vectorIndexTypeCount = new HashMap<>();
+        vectorSimilarityTypeCount = new HashMap<>();
+        vectorElementTypeCount = new HashMap<>();
     }
 
     @Override
     public void writeTo(StreamOutput out) throws IOException {
-        super.writeTo(out);
-        out.writeVInt(indexedVectorCount);
-        out.writeVInt(indexedVectorDimMin);
-        out.writeVInt(indexedVectorDimMax);
+        assert false : "writeTo should not be called on DenseVectorFieldStats";
     }
 
     @Override
@@ -53,6 +50,21 @@ public final class DenseVectorFieldStats extends FieldStats {
         builder.field("indexed_vector_count", indexedVectorCount);
         builder.field("indexed_vector_dim_min", indexedVectorDimMin);
         builder.field("indexed_vector_dim_max", indexedVectorDimMax);
+        if (vectorIndexTypeCount.isEmpty() == false) {
+            builder.startObject("vector_index_type_count");
+            builder.mapContents(vectorIndexTypeCount);
+            builder.endObject();
+        }
+        if (vectorSimilarityTypeCount.isEmpty() == false) {
+            builder.startObject("vector_similarity_type_count");
+            builder.mapContents(vectorSimilarityTypeCount);
+            builder.endObject();
+        }
+        if (vectorElementTypeCount.isEmpty() == false) {
+            builder.startObject("vector_element_type_count");
+            builder.mapContents(vectorElementTypeCount);
+            builder.endObject();
+        }
     }
 
     @Override
@@ -69,11 +81,53 @@ public final class DenseVectorFieldStats extends FieldStats {
         DenseVectorFieldStats that = (DenseVectorFieldStats) o;
         return indexedVectorCount == that.indexedVectorCount
             && indexedVectorDimMin == that.indexedVectorDimMin
-            && indexedVectorDimMax == that.indexedVectorDimMax;
+            && indexedVectorDimMax == that.indexedVectorDimMax
+            && Objects.equals(vectorIndexTypeCount, that.vectorIndexTypeCount)
+            && Objects.equals(vectorSimilarityTypeCount, that.vectorSimilarityTypeCount)
+            && Objects.equals(vectorElementTypeCount, that.vectorElementTypeCount);
     }
 
     @Override
     public int hashCode() {
-        return Objects.hash(super.hashCode(), indexedVectorCount, indexedVectorDimMin, indexedVectorDimMax);
+        return Objects.hash(
+            super.hashCode(),
+            indexedVectorCount,
+            indexedVectorDimMin,
+            indexedVectorDimMax,
+            vectorIndexTypeCount,
+            vectorSimilarityTypeCount,
+            vectorElementTypeCount
+        );
+    }
+
+    @Override
+    public String toString() {
+        return "DenseVectorFieldStats{"
+            + "vectorIndexTypeCount="
+            + vectorIndexTypeCount
+            + ", vectorSimilarityTypeCount="
+            + vectorSimilarityTypeCount
+            + ", vectorElementTypeCount="
+            + vectorElementTypeCount
+            + ", indexedVectorCount="
+            + indexedVectorCount
+            + ", indexedVectorDimMin="
+            + indexedVectorDimMin
+            + ", indexedVectorDimMax="
+            + indexedVectorDimMax
+            + ", scriptCount="
+            + scriptCount
+            + ", scriptLangs="
+            + scriptLangs
+            + ", fieldScriptStats="
+            + fieldScriptStats
+            + ", name='"
+            + name
+            + '\''
+            + ", count="
+            + count
+            + ", indexCount="
+            + indexCount
+            + '}';
     }
 }

+ 25 - 2
server/src/main/java/org/elasticsearch/action/admin/cluster/stats/MappingStats.java

@@ -86,9 +86,30 @@ public final class MappingStats implements ToXContentFragment, Writeable {
                     FieldStats stats;
                     if (type.equals("dense_vector")) {
                         stats = fieldTypes.computeIfAbsent(type, DenseVectorFieldStats::new);
-                        boolean indexed = fieldMapping.containsKey("index") ? (boolean) fieldMapping.get("index") : false;
+                        DenseVectorFieldStats vStats = (DenseVectorFieldStats) stats;
+                        if (fieldMapping.containsKey("similarity")) {
+                            Object similarity = fieldMapping.get("similarity");
+                            vStats.vectorSimilarityTypeCount.compute(similarity.toString(), (t, c) -> c == null ? count : c + count);
+                        }
+                        String elementTypeStr = "float";
+                        if (fieldMapping.containsKey("element_type")) {
+                            Object elementType = fieldMapping.get("element_type");
+                            elementTypeStr = elementType.toString();
+                        }
+                        vStats.vectorElementTypeCount.compute(elementTypeStr, (t, c) -> c == null ? count : c + count);
+                        boolean indexed = fieldMapping.containsKey("index") && (boolean) fieldMapping.get("index");
                         if (indexed) {
-                            DenseVectorFieldStats vStats = (DenseVectorFieldStats) stats;
+                            Object indexOptions = fieldMapping.get("index_options");
+                            // NOTE, while the default for `float` is now `int8_hnsw`, that is actually added to the mapping
+                            // if the value is truly missing & we are indexed, we default to hnsw.
+                            String indexTypeStr = "hnsw";
+                            if (indexOptions instanceof Map<?, ?> indexOptionsMap) {
+                                Object indexType = indexOptionsMap.get("type");
+                                if (indexType != null) {
+                                    indexTypeStr = indexType.toString();
+                                }
+                            }
+                            vStats.vectorIndexTypeCount.compute(indexTypeStr, (t, c) -> c == null ? count : c + count);
                             vStats.indexedVectorCount += count;
                             Object obj = fieldMapping.get("dims");
                             if (obj != null) {
@@ -100,6 +121,8 @@ public final class MappingStats implements ToXContentFragment, Writeable {
                                     vStats.indexedVectorDimMax = dims;
                                 }
                             }
+                        } else {
+                            vStats.vectorIndexTypeCount.compute(DenseVectorFieldStats.NOT_INDEXED, (t, c) -> c == null ? 1 : c + 1);
                         }
                     } else {
                         stats = fieldTypes.computeIfAbsent(type, FieldStats::new);

+ 4 - 1
server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java

@@ -31,7 +31,10 @@ import static org.elasticsearch.rest.RestUtils.getTimeout;
 @ServerlessScope(Scope.INTERNAL)
 public class RestClusterStatsAction extends BaseRestHandler {
 
-    private static final Set<String> SUPPORTED_CAPABILITIES = Set.of("human-readable-total-docs-size");
+    private static final Set<String> SUPPORTED_CAPABILITIES = Set.of(
+        "human-readable-total-docs-size",
+        "verbose-dense-vector-mapping-stats"
+    );
     private static final Set<String> SUPPORTED_CAPABILITIES_CCS_STATS = Set.copyOf(Sets.union(SUPPORTED_CAPABILITIES, Set.of("ccs-stats")));
     public static final FeatureFlag CCS_TELEMETRY_FEATURE_FLAG = new FeatureFlag("ccs_telemetry");
     private static final Set<String> SUPPORTED_QUERY_PARAMETERS = Set.of("include_remotes", "nodeId", REST_TIMEOUT_PARAM);

+ 25 - 2
server/src/test/java/org/elasticsearch/action/admin/cluster/stats/MappingStatsTests.java

@@ -114,7 +114,16 @@ public class MappingStatsTests extends AbstractWireSerializingTestCase<MappingSt
                     "index_count" : 2,
                     "indexed_vector_count" : 2,
                     "indexed_vector_dim_min" : 100,
-                    "indexed_vector_dim_max" : 100
+                    "indexed_vector_dim_max" : 100,
+                    "vector_index_type_count" : {
+                      "hnsw" : 2
+                    },
+                    "vector_similarity_type_count" : {
+                      "dot_product" : 2
+                    },
+                    "vector_element_type_count" : {
+                      "float" : 2
+                    }
                   },
                   {
                     "name" : "keyword",
@@ -234,7 +243,16 @@ public class MappingStatsTests extends AbstractWireSerializingTestCase<MappingSt
                     "index_count" : 3,
                     "indexed_vector_count" : 3,
                     "indexed_vector_dim_min" : 100,
-                    "indexed_vector_dim_max" : 100
+                    "indexed_vector_dim_max" : 100,
+                    "vector_index_type_count" : {
+                      "hnsw" : 3
+                    },
+                    "vector_similarity_type_count" : {
+                      "dot_product" : 3
+                    },
+                    "vector_element_type_count" : {
+                      "float" : 3
+                    }
                   },
                   {
                     "name" : "keyword",
@@ -460,6 +478,11 @@ public class MappingStatsTests extends AbstractWireSerializingTestCase<MappingSt
         expectedStats.indexedVectorCount = 2 * indicesCount;
         expectedStats.indexedVectorDimMin = 768;
         expectedStats.indexedVectorDimMax = 1024;
+        expectedStats.vectorIndexTypeCount.put("hnsw", 2 * indicesCount);
+        expectedStats.vectorIndexTypeCount.put("not_indexed", 2);
+        expectedStats.vectorSimilarityTypeCount.put("dot_product", 3);
+        expectedStats.vectorSimilarityTypeCount.put("cosine", 3);
+        expectedStats.vectorElementTypeCount.put("float", 4 * indicesCount);
         assertEquals(Collections.singletonList(expectedStats), mappingStats.getFieldTypeStats());
     }