Browse Source

Synthetic _source: support dense_vector (#89840)

This adds support for synthetic _source to `dense_vector` fields.

![image](https://user-images.githubusercontent.com/215970/188734496-0f0772c7-4c7a-46b6-b978-0c220e73474d.png)
Nik Everett 3 years ago
parent
commit
c4a77d572d

+ 5 - 0
docs/changelog/89840.yaml

@@ -0,0 +1,5 @@
+pr: 89840
+summary: "Synthetic _source: support `dense_vector`"
+area: Vector Search
+type: feature
+issues: []

+ 1 - 0
docs/reference/mapping/fields/synthetic-source.asciidoc

@@ -31,6 +31,7 @@ types:
 ** <<aggregate-metric-double-synthetic-source, `aggregate_metric_double`>>
 ** <<boolean-synthetic-source,`boolean`>>
 ** <<numeric-synthetic-source,`byte`>>
+** <<dense-vector-synthetic-source,`dense_vector`>>
 ** <<numeric-synthetic-source,`double`>>
 ** <<numeric-synthetic-source,`float`>>
 ** <<geo-point-synthetic-source,`geo_point`>>

+ 4 - 0
docs/reference/mapping/types/dense-vector.asciidoc

@@ -178,3 +178,7 @@ Defaults to `16`.
 The number of candidates to track while assembling the list of nearest
 neighbors for each new node. Defaults to `100`.
 ====
+
+[[dense-vector-synthetic-source]]
+==== Synthetic source preview:[]
+`dense_vector` fields support <<synthetic-source,synthetic `_source`>> .

+ 87 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml

@@ -454,3 +454,90 @@ stored keyword with ignore_above:
           - short
           - jumped over the lazy dog # fields saved by ignore_above are returned after doc values fields
   - is_false: fields
+
+---
+indexed dense vectors:
+  - skip:
+      version: " - 8.4.99"
+      reason: introduced in 8.5.0
+
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            _source:
+              mode: synthetic
+            properties:
+              name:
+                type: keyword
+              vector:
+                type: dense_vector
+                dims: 5
+                index: true
+                similarity: l2_norm
+
+  - do:
+      index:
+        index: test
+        id: 1
+        body:
+          name: cow.jpg
+          vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ]
+
+  - do:
+      get:
+        index: test
+        id:    1
+  - match: {_index: "test"}
+  - match: {_id: "1"}
+  - match: {_version: 1}
+  - match: {found: true}
+  - match:
+      _source:
+        name: cow.jpg
+        vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ]
+  - is_false: fields
+
+---
+non-indexed dense vectors:
+  - skip:
+      version: " - 8.4.99"
+      reason: introduced in 8.5.0
+
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            _source:
+              mode: synthetic
+            properties:
+              name:
+                type: keyword
+              vector:
+                type: dense_vector
+                dims: 5
+                index: false
+
+  - do:
+      index:
+        index: test
+        id: 1
+        body:
+          name: cow.jpg
+          vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ]
+
+  - do:
+      get:
+        index: test
+        id:    1
+  - match: {_index: "test"}
+  - match: {_id: "1"}
+  - match: {_version: 1}
+  - match: {found: true}
+  - match:
+      _source:
+        name: cow.jpg
+        vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ]
+  - is_false: fields

+ 98 - 0
server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

@@ -13,7 +13,10 @@ import org.apache.lucene.codecs.lucene94.Lucene94HnswVectorsFormat;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.KnnVectorField;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.index.VectorValues;
 import org.apache.lucene.search.FieldExistsQuery;
 import org.apache.lucene.search.KnnVectorQuery;
 import org.apache.lucene.search.Query;
@@ -31,6 +34,7 @@ import org.elasticsearch.index.mapper.MapperParsingException;
 import org.elasticsearch.index.mapper.MappingLookup;
 import org.elasticsearch.index.mapper.MappingParser;
 import org.elasticsearch.index.mapper.SimpleMappedFieldType;
+import org.elasticsearch.index.mapper.SourceLoader;
 import org.elasticsearch.index.mapper.TextSearchInfo;
 import org.elasticsearch.index.mapper.ValueFetcher;
 import org.elasticsearch.index.query.SearchExecutionContext;
@@ -45,6 +49,7 @@ import java.nio.ByteBuffer;
 import java.time.ZoneId;
 import java.util.Map;
 import java.util.Objects;
+import java.util.stream.Stream;
 
 import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
 
@@ -525,4 +530,97 @@ public class DenseVectorFieldMapper extends FieldMapper {
             return new Lucene94HnswVectorsFormat(hnswIndexOptions.m, hnswIndexOptions.efConstruction);
         }
     }
+
+    @Override
+    public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
+        if (copyTo.copyToFields().isEmpty() != true) {
+            throw new IllegalArgumentException(
+                "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"
+            );
+        }
+        if (indexed) {
+            return new IndexedSyntheticFieldLoader();
+        }
+        return new DocValuesSyntheticFieldLoader();
+    }
+
+    private class IndexedSyntheticFieldLoader implements SourceLoader.SyntheticFieldLoader {
+        private VectorValues values;
+        private boolean hasValue;
+
+        @Override
+        public Stream<Map.Entry<String, StoredFieldLoader>> storedFieldLoaders() {
+            return Stream.of();
+        }
+
+        @Override
+        public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
+            values = leafReader.getVectorValues(name());
+            if (values == null) {
+                return null;
+            }
+            return docId -> {
+                hasValue = docId == values.advance(docId);
+                return hasValue;
+            };
+        }
+
+        @Override
+        public boolean hasValue() {
+            return hasValue;
+        }
+
+        @Override
+        public void write(XContentBuilder b) throws IOException {
+            if (false == hasValue) {
+                return;
+            }
+            b.startArray(simpleName());
+            for (float v : values.vectorValue()) {
+                b.value(v);
+            }
+            b.endArray();
+        }
+    }
+
+    private class DocValuesSyntheticFieldLoader implements SourceLoader.SyntheticFieldLoader {
+        private BinaryDocValues values;
+        private boolean hasValue;
+
+        @Override
+        public Stream<Map.Entry<String, StoredFieldLoader>> storedFieldLoaders() {
+            return Stream.of();
+        }
+
+        @Override
+        public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
+            values = leafReader.getBinaryDocValues(name());
+            if (values == null) {
+                return null;
+            }
+            return docId -> {
+                hasValue = docId == values.advance(docId);
+                return hasValue;
+            };
+        }
+
+        @Override
+        public boolean hasValue() {
+            return hasValue;
+        }
+
+        @Override
+        public void write(XContentBuilder b) throws IOException {
+            if (false == hasValue) {
+                return;
+            }
+            b.startArray(simpleName());
+            BytesRef ref = values.binaryValue();
+            ByteBuffer byteBuffer = ByteBuffer.wrap(ref.bytes, ref.offset, ref.length);
+            for (int dim = 0; dim < dims; dim++) {
+                b.value(byteBuffer.getFloat());
+            }
+            b.endArray();
+        }
+    }
 }

+ 42 - 3
server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

@@ -32,6 +32,7 @@ import org.elasticsearch.index.mapper.MapperTestCase;
 import org.elasticsearch.index.mapper.ParsedDocument;
 import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DenseVectorFieldType;
 import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.VectorSimilarity;
+import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.xcontent.XContentBuilder;
 import org.junit.AssumptionViolatedException;
 
@@ -465,12 +466,50 @@ public class DenseVectorFieldMapperTests extends MapperTestCase {
     }
 
     @Override
-    protected SyntheticSourceSupport syntheticSourceSupport() {
+    protected IngestScriptSupport ingestScriptSupport() {
         throw new AssumptionViolatedException("not supported");
     }
 
     @Override
-    protected IngestScriptSupport ingestScriptSupport() {
-        throw new AssumptionViolatedException("not supported");
+    protected SyntheticSourceSupport syntheticSourceSupport() {
+        return new DenseVectorSyntheticSourceSupport();
+    }
+
+    @Override
+    protected boolean supportsEmptyInputArray() {
+        return false;
+    }
+
+    private static class DenseVectorSyntheticSourceSupport implements SyntheticSourceSupport {
+        private final int dims = between(5, 1000);
+        private final boolean indexed = randomBoolean();
+        private final boolean indexOptionsSet = indexed && randomBoolean();
+
+        @Override
+        public SyntheticSourceExample example(int maxValues) throws IOException {
+            List<Float> value = randomList(dims, dims, ESTestCase::randomFloat);
+            return new SyntheticSourceExample(value, value, this::mapping);
+        }
+
+        private void mapping(XContentBuilder b) throws IOException {
+            b.field("type", "dense_vector");
+            b.field("dims", dims);
+            if (indexed) {
+                b.field("index", true);
+                b.field("similarity", "l2_norm");
+                if (indexOptionsSet) {
+                    b.startObject("index_options");
+                    b.field("type", "hnsw");
+                    b.field("m", 5);
+                    b.field("ef_construction", 50);
+                    b.endObject();
+                }
+            }
+        }
+
+        @Override
+        public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
+            return List.of();
+        }
     }
 }

+ 14 - 2
test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java

@@ -914,6 +914,19 @@ public abstract class MapperTestCase extends MapperServiceTestCase {
 
     public final void testSyntheticEmptyListNoDocValuesLoader() throws IOException {
         assumeTrue("Field does not support [] as input", supportsEmptyInputArray());
+        assertNoDocValueLoader(b -> b.startArray("field").endArray());
+    }
+
+    public final void testEmptyDocumentNoDocValueLoader() throws IOException {
+        assumeFalse("Field will add values even if no fields are supplied", addsValueWhenNotSupplied());
+        assertNoDocValueLoader(b -> {});
+    }
+
+    protected boolean addsValueWhenNotSupplied() {
+        return false;
+    }
+
+    private void assertNoDocValueLoader(CheckedConsumer<XContentBuilder, IOException> doc) throws IOException {
         SyntheticSourceExample syntheticSourceExample = syntheticSourceSupport().example(5);
         DocumentMapper mapper = createDocumentMapper(syntheticSourceMapping(b -> {
             b.startObject("field");
@@ -922,8 +935,7 @@ public abstract class MapperTestCase extends MapperServiceTestCase {
         }));
         try (Directory directory = newDirectory()) {
             RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
-            LuceneDocument doc = mapper.parse(source(b -> b.startArray("field").endArray())).rootDoc();
-            iw.addDocument(doc);
+            iw.addDocument(mapper.parse(source(doc)).rootDoc());
             iw.close();
             try (DirectoryReader reader = DirectoryReader.open(directory)) {
                 LeafReader leafReader = getOnlyLeafReader(reader);

+ 5 - 0
x-pack/plugin/mapper-constant-keyword/src/internalClusterTest/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapperTests.java

@@ -243,4 +243,9 @@ public class ConstantKeywordFieldMapperTests extends MapperTestCase {
     protected boolean supportsEmptyInputArray() {
         return false;
     }
+
+    @Override
+    protected boolean addsValueWhenNotSupplied() {
+        return true;
+    }
 }