浏览代码

Add field usage support for vectors (#80608)

Add field usage tracking support for the new vectors functionality.
Yannick Welsch 3 年之前
父节点
当前提交
064936e790

+ 4 - 2
docs/reference/indices/field-usage-stats.asciidoc

@@ -128,7 +128,8 @@ The API returns the following response:
                         "doc_values" : 1,
                         "points" : 0,
                         "norms" : 1,
-                        "term_vectors" : 0
+                        "term_vectors" : 0,
+                        "knn_vectors" : 0
                     },
                     "fields": {
                         "_id": {
@@ -146,7 +147,8 @@ The API returns the following response:
                             "doc_values" : 0,
                             "points" : 0,
                             "norms" : 0,
-                            "term_vectors" : 0
+                            "term_vectors" : 0,
+                            "knn_vectors" : 0
                         },
                         "_source": {...},
                         "context": {...},

+ 26 - 3
server/src/main/java/org/elasticsearch/index/search/stats/FieldUsageStats.java

@@ -8,6 +8,7 @@
 
 package org.elasticsearch.index.search.stats;
 
+import org.elasticsearch.Version;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
@@ -39,6 +40,7 @@ public class FieldUsageStats implements ToXContentObject, Writeable {
     public static final String TERM_VECTORS = "term_vectors"; // possibly refine this one
     public static final String POINTS = "points";
     public static final String PROXIMITY = "proximity";
+    public static final String KNN_VECTORS = "knn_vectors";
 
     private final Map<String, PerFieldUsageStats> stats;
 
@@ -122,11 +124,12 @@ public class FieldUsageStats implements ToXContentObject, Writeable {
         PAYLOADS,
         TERM_VECTORS, // possibly refine this one
         POINTS,
+        KNN_VECTORS,
     }
 
     public static class PerFieldUsageStats implements ToXContentFragment, Writeable {
 
-        static final PerFieldUsageStats EMPTY = new PerFieldUsageStats(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        static final PerFieldUsageStats EMPTY = new PerFieldUsageStats(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 
         private final long any;
         private final long proximity;
@@ -141,6 +144,7 @@ public class FieldUsageStats implements ToXContentObject, Writeable {
         private final long payloads;
         private final long termVectors;
         private final long points;
+        private final long knnVectors;
 
         public PerFieldUsageStats(
             long any,
@@ -155,7 +159,8 @@ public class FieldUsageStats implements ToXContentObject, Writeable {
             long norms,
             long payloads,
             long termVectors,
-            long points
+            long points,
+            long knnVectors
         ) {
             this.any = any;
             this.proximity = proximity;
@@ -170,6 +175,7 @@ public class FieldUsageStats implements ToXContentObject, Writeable {
             this.payloads = payloads;
             this.termVectors = termVectors;
             this.points = points;
+            this.knnVectors = knnVectors;
         }
 
         private PerFieldUsageStats add(PerFieldUsageStats other) {
@@ -186,7 +192,8 @@ public class FieldUsageStats implements ToXContentObject, Writeable {
                 norms + other.norms,
                 payloads + other.payloads,
                 termVectors + other.termVectors,
-                points + other.points
+                points + other.points,
+                knnVectors + other.knnVectors
             );
         }
 
@@ -204,6 +211,11 @@ public class FieldUsageStats implements ToXContentObject, Writeable {
             payloads = in.readVLong();
             termVectors = in.readVLong();
             points = in.readVLong();
+            if (in.getVersion().onOrAfter(Version.V_8_1_0)) {
+                knnVectors = in.readVLong();
+            } else {
+                knnVectors = 0;
+            }
         }
 
         @Override
@@ -221,6 +233,9 @@ public class FieldUsageStats implements ToXContentObject, Writeable {
             out.writeVLong(payloads);
             out.writeVLong(termVectors);
             out.writeVLong(points);
+            if (out.getVersion().onOrAfter(Version.V_8_1_0)) {
+                out.writeVLong(knnVectors);
+            }
         }
 
         @Override
@@ -240,6 +255,7 @@ public class FieldUsageStats implements ToXContentObject, Writeable {
             builder.field(POINTS, points);
             builder.field(NORMS, norms);
             builder.field(TERM_VECTORS, termVectors);
+            builder.field(KNN_VECTORS, knnVectors);
             return builder;
         }
 
@@ -278,6 +294,9 @@ public class FieldUsageStats implements ToXContentObject, Writeable {
             if (points > 0L) {
                 set.add(UsageContext.POINTS);
             }
+            if (knnVectors > 0L) {
+                set.add(UsageContext.KNN_VECTORS);
+            }
             return set;
         }
 
@@ -325,6 +344,10 @@ public class FieldUsageStats implements ToXContentObject, Writeable {
             return points;
         }
 
+        public long getKnnVectors() {
+            return knnVectors;
+        }
+
         public long getProximity() {
             return proximity;
         }

+ 13 - 1
server/src/main/java/org/elasticsearch/index/search/stats/ShardFieldUsageTracker.java

@@ -61,7 +61,8 @@ public class ShardFieldUsageTracker {
                     ifs.norms.longValue(),
                     ifs.payloads.longValue(),
                     ifs.termVectors.longValue(),
-                    ifs.points.longValue()
+                    ifs.points.longValue(),
+                    ifs.knnVectors.longValue()
                 );
                 stats.put(entry.getKey(), pf);
             }
@@ -83,6 +84,7 @@ public class ShardFieldUsageTracker {
         final LongAdder payloads = new LongAdder();
         final LongAdder termVectors = new LongAdder();
         final LongAdder points = new LongAdder();
+        final LongAdder knnVectors = new LongAdder();
     }
 
     static class PerField {
@@ -98,6 +100,7 @@ public class ShardFieldUsageTracker {
         volatile boolean payloads;
         volatile boolean termVectors;
         volatile boolean points;
+        volatile boolean knnVectors;
     }
 
     public class FieldUsageStatsTrackingSession implements FieldUsageNotifier, Releasable {
@@ -159,6 +162,10 @@ public class ShardFieldUsageTracker {
                     any = true;
                     fieldStats.termVectors.increment();
                 }
+                if (pf.knnVectors) {
+                    any = true;
+                    fieldStats.knnVectors.increment();
+                }
                 if (any) {
                     fieldStats.any.increment();
                 }
@@ -227,5 +234,10 @@ public class ShardFieldUsageTracker {
         public void onTermVectorsUsed(String field) {
             getOrAdd(field).termVectors = true;
         }
+
+        @Override
+        public void onKnnVectorsUsed(String field) {
+            getOrAdd(field).knnVectors = true;
+        }
     }
 }

+ 23 - 0
server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java

@@ -25,7 +25,10 @@ import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.VectorValues;
+import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.suggest.document.CompletionTerms;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
@@ -83,6 +86,8 @@ public class FieldUsageTrackingDirectoryReader extends FilterDirectoryReader {
         void onPointsUsed(String field);
 
         void onTermVectorsUsed(String field);
+
+        void onKnnVectorsUsed(String field);
     }
 
     public static final class FieldUsageTrackingLeafReader extends SequentialStoredFieldsLeafReader {
@@ -181,6 +186,24 @@ public class FieldUsageTrackingDirectoryReader extends FilterDirectoryReader {
             return numericDocValues;
         }
 
+        @Override
+        public VectorValues getVectorValues(String field) throws IOException {
+            VectorValues vectorValues = super.getVectorValues(field);
+            if (vectorValues != null) {
+                notifier.onKnnVectorsUsed(field);
+            }
+            return vectorValues;
+        }
+
+        @Override
+        public TopDocs searchNearestVectors(String field, float[] target, int k, Bits acceptDocs) throws IOException {
+            TopDocs topDocs = super.searchNearestVectors(field, target, k, acceptDocs);
+            if (topDocs != null) {
+                notifier.onKnnVectorsUsed(field);
+            }
+            return topDocs;
+        }
+
         @Override
         public String toString() {
             final StringBuilder sb = new StringBuilder("FieldUsageTrackingLeafReader(reader=");

+ 81 - 0
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/50_dense_vector_field_usage.yml

@@ -0,0 +1,81 @@
+setup:
+  - skip:
+      features: headers
+
+  - do:
+      indices.create:
+        index: futest
+        body:
+          settings:
+            routing.rebalance.enable: none
+            index.number_of_shards: 1
+            index.number_of_replicas: 0
+          mappings:
+            properties:
+              name:
+                type: keyword
+              vector:
+                type: dense_vector
+                dims: 5
+                index: true
+                similarity: l2_norm
+
+  - do:
+      index:
+        index: futest
+        body:
+          name: cow.jpg
+          vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ]
+
+  - do:
+      index:
+        index: futest
+        id: 2
+        body:
+          name: moose.jpg
+          vector: [ -0.5, 100.0, -13, 14.8, -156.0 ]
+
+  - do:
+      index:
+        index: futest
+        id: 3
+        body:
+          name: rabbit.jpg
+          vector: [ 0.5, 111.3, -13.0, 14.8, -156.0 ]
+
+  - do:
+      indices.refresh: { }
+
+---
+"Field usage":
+  - do:
+      knn_search:
+        index: futest
+        body:
+          fields: [ "name" ]
+          knn:
+            field: vector
+            query_vector: [-0.5, 90.0, -10, 14.8, -156.0]
+            k: 2
+            num_candidates: 3
+
+  - match: {hits.hits.0._id: "2"}
+  - match: {hits.hits.0.fields.name.0: "moose.jpg"}
+
+  - match: {hits.hits.1._id: "3"}
+  - match: {hits.hits.1.fields.name.0: "rabbit.jpg"}
+
+  - do:
+      indices.field_usage_stats: { index: futest }
+
+  - is_true: futest
+  - length: { futest.shards: 1 }
+
+  - gt: { futest.shards.0.stats.all_fields.any: 0 }
+  - gt: { futest.shards.0.stats.all_fields.knn_vectors: 0 }
+
+  - gt: { futest.shards.0.stats.fields.vector.any: 0 }
+  - gt: { futest.shards.0.stats.fields.vector.knn_vectors: 0 }
+
+  - gt: { futest.shards.0.stats.fields._id.stored_fields: 0 }
+  - match: { futest.shards.0.stats.fields._id.knn_vectors: 0 }