Browse Source

Distance measures for dense and sparse vectors (#37947)

* Distance measures for dense and sparse vectors

Introduce painless functions of
cosineSimilarity and dotProduct distance
measures for dense and sparse vector fields.

```js
{
  "query": {
    "script_score": {
      "query": {
        "match_all": {}
      },
      "script": {
        "source": "cosineSimilarity(params.queryVector, doc['my_dense_vector'].value)",
        "params": {
          "queryVector": [4, 3.4, -1.2]
        }
      }
    }
  }
}
```

```js
{
  "query": {
    "script_score": {
      "query": {
        "match_all": {}
      },
      "script": {
        "source": "cosineSimilaritySparse(params.queryVector, doc['my_sparse_vector'].value)",
        "params": {
          "queryVector": {"2": -0.5, "10" : 111.3, "50": -13.0, "113": 14.8, "4545": -156.0}
        }
      }
    }
  }
}
```

Closes #31615
Mayya Sharipova 6 years ago
parent
commit
3260fd1fc8
22 changed files with 1339 additions and 74 deletions
  1. 1 1
      docs/reference/mapping/types/dense-vector.asciidoc
  2. 1 1
      docs/reference/mapping/types/sparse-vector.asciidoc
  3. 107 0
      docs/reference/query-dsl/script-score-query.asciidoc
  4. 9 0
      modules/mapper-extras/build.gradle
  5. 2 2
      modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/DenseVectorFieldMapper.java
  6. 2 2
      modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SparseVectorFieldMapper.java
  7. 58 13
      modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/VectorEncoderDecoder.java
  8. 42 0
      modules/mapper-extras/src/main/java/org/elasticsearch/index/query/DocValuesWhitelistExtension.java
  9. 218 0
      modules/mapper-extras/src/main/java/org/elasticsearch/index/query/ScoreScriptUtils.java
  10. 80 0
      modules/mapper-extras/src/main/java/org/elasticsearch/index/query/VectorDVAtomicFieldData.java
  11. 74 0
      modules/mapper-extras/src/main/java/org/elasticsearch/index/query/VectorDVIndexFieldData.java
  12. 78 0
      modules/mapper-extras/src/main/java/org/elasticsearch/index/query/VectorScriptDocValues.java
  13. 1 0
      modules/mapper-extras/src/main/resources/META-INF/services/org.elasticsearch.painless.spi.PainlessExtension
  14. 32 0
      modules/mapper-extras/src/main/resources/org/elasticsearch/index/query/docvalues_whitelist.txt
  15. 1 1
      modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/VectorEncoderDecoderTests.java
  16. 82 0
      modules/mapper-extras/src/test/java/org/elasticsearch/index/query/ScoreScriptUtilsTests.java
  17. 100 0
      modules/mapper-extras/src/test/resources/rest-api-spec/test/dense-vector/10_basic.yml
  18. 0 27
      modules/mapper-extras/src/test/resources/rest-api-spec/test/dense-vector/10_indexing.yml
  19. 150 0
      modules/mapper-extras/src/test/resources/rest-api-spec/test/dense-vector/20_special_cases.yml
  20. 100 0
      modules/mapper-extras/src/test/resources/rest-api-spec/test/sparse-vector/10_basic.yml
  21. 0 27
      modules/mapper-extras/src/test/resources/rest-api-spec/test/sparse-vector/10_indexing.yml
  22. 201 0
      modules/mapper-extras/src/test/resources/rest-api-spec/test/sparse-vector/20_special_cases.yml

+ 1 - 1
docs/reference/mapping/types/dense-vector.asciidoc

@@ -9,7 +9,7 @@ not exceed 500. The number of dimensions can be
 different across documents. A `dense_vector` field is
 a single-valued field.
 
-These vectors can be used for document scoring.
+These vectors can be used for <<vector-functions,document scoring>>.
 For example, a document score can represent a distance between
 a given query vector and the indexed document vector.
 

+ 1 - 1
docs/reference/mapping/types/sparse-vector.asciidoc

@@ -9,7 +9,7 @@ not exceed 500. The number of dimensions can be
 different across documents. A `sparse_vector` field is
 a single-valued field.
 
-These vectors can be used for document scoring.
+These vectors can be used for <<vector-functions,document scoring>>.
 For example, a document score can represent a distance between
 a given query vector and the indexed document vector.
 

+ 107 - 0
docs/reference/query-dsl/script-score-query.asciidoc

@@ -74,6 +74,113 @@ to be the most efficient by using the internal mechanisms.
 --------------------------------------------------
 // NOTCONSOLE
 
+[[vector-functions]]
+===== Functions for vector fields
+These functions are used for
+for <<dense-vector,`dense_vector`>>  and
+<<sparse-vector,`sparse_vector`>> fields.
+
+For dense_vector fields, `cosineSimilarity` calculates the measure of
+cosine similarity between a given query vector and document vectors.
+
+[source,js]
+--------------------------------------------------
+{
+  "query": {
+    "script_score": {
+      "query": {
+        "match_all": {}
+      },
+      "script": {
+        "source": "cosineSimilarity(params.queryVector, doc['my_dense_vector'])",
+        "params": {
+          "queryVector": [4, 3.4, -0.2]  <1>
+        }
+      }
+    }
+  }
+}
+--------------------------------------------------
+// NOTCONSOLE
+<1> To take advantage of the script optimizations, provide a query vector as a script parameter.
+
+Similarly, for sparse_vector fields, `cosineSimilaritySparse` calculates cosine similarity
+between a given query vector and document vectors.
+
+[source,js]
+--------------------------------------------------
+{
+  "query": {
+    "script_score": {
+      "query": {
+        "match_all": {}
+      },
+      "script": {
+        "source": "cosineSimilaritySparse(params.queryVector, doc['my_sparse_vector'])",
+        "params": {
+          "queryVector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0}
+        }
+      }
+    }
+  }
+}
+--------------------------------------------------
+// NOTCONSOLE
+
+For dense_vector fields, `dotProduct` calculates the measure of
+dot product between a given query vector and document vectors.
+
+[source,js]
+--------------------------------------------------
+{
+  "query": {
+    "script_score": {
+      "query": {
+        "match_all": {}
+      },
+      "script": {
+        "source": "dotProduct(params.queryVector, doc['my_dense_vector'])",
+        "params": {
+          "queryVector": [4, 3.4, -0.2]
+        }
+      }
+    }
+  }
+}
+--------------------------------------------------
+// NOTCONSOLE
+
+Similarly, for sparse_vector fields, `dotProductSparse` calculates dot product
+between a given query vector and document vectors.
+
+[source,js]
+--------------------------------------------------
+{
+  "query": {
+    "script_score": {
+      "query": {
+        "match_all": {}
+      },
+      "script": {
+        "source": "dotProductSparse(params.queryVector, doc['my_sparse_vector'])",
+        "params": {
+          "queryVector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0}
+        }
+      }
+    }
+  }
+}
+--------------------------------------------------
+// NOTCONSOLE
+
+NOTE: If a document doesn't have a value for a vector field on which
+a vector function is executed, 0 is returned as a result
+for this document.
+
+NOTE: If a document's dense vector field has a number of dimensions
+different from the query's vector, 0 is used for missing dimensions
+in the calculations of vector functions.
+
 
 [[random-functions]]
 ===== Random functions

+ 9 - 0
modules/mapper-extras/build.gradle

@@ -20,4 +20,13 @@
 esplugin {
     description 'Adds advanced field mappers'
     classname 'org.elasticsearch.index.mapper.MapperExtrasPlugin'
+    extendedPlugins = ['lang-painless']
 }
+
+dependencies {
+    compileOnly project(':modules:lang-painless')
+}
+
+integTestCluster {
+    module project(':modules:lang-painless')
+}

+ 2 - 2
modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/DenseVectorFieldMapper.java

@@ -30,6 +30,7 @@ import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentParser.Token;
 import org.elasticsearch.index.fielddata.IndexFieldData;
 import org.elasticsearch.index.query.QueryShardContext;
+import org.elasticsearch.index.query.VectorDVIndexFieldData;
 import org.elasticsearch.search.DocValueFormat;
 
 import java.io.IOException;
@@ -119,8 +120,7 @@ public class DenseVectorFieldMapper extends FieldMapper implements ArrayValueMap
 
         @Override
         public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
-            throw new UnsupportedOperationException(
-                "Field [" + name() + "] of type [" + typeName() + "] doesn't support sorting, scripting or aggregating");
+            return new VectorDVIndexFieldData.Builder(true);
         }
 
         @Override

+ 2 - 2
modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SparseVectorFieldMapper.java

@@ -30,6 +30,7 @@ import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentParser.Token;
 import org.elasticsearch.index.fielddata.IndexFieldData;
 import org.elasticsearch.index.query.QueryShardContext;
+import org.elasticsearch.index.query.VectorDVIndexFieldData;
 import org.elasticsearch.search.DocValueFormat;
 
 import java.io.IOException;
@@ -119,8 +120,7 @@ public class SparseVectorFieldMapper extends FieldMapper {
 
         @Override
         public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
-            throw new UnsupportedOperationException(
-                "Field [" + name() + "] of type [" + typeName() + "] doesn't support sorting, scripting or aggregating");
+            return new VectorDVIndexFieldData.Builder(false);
         }
 
         @Override

+ 58 - 13
modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/VectorEncoderDecoder.java

@@ -23,7 +23,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.InPlaceMergeSorter;
 
 // static utility functions for encoding and decoding dense_vector and sparse_vector fields
-final class VectorEncoderDecoder {
+public final class VectorEncoderDecoder {
     static final byte INT_BYTES = 4;
     static final byte SHORT_BYTES = 2;
 
@@ -34,10 +34,11 @@ final class VectorEncoderDecoder {
      * BytesRef: int[] floats encoded as integers values, 2 bytes for each dimension
      * @param values - values of the sparse array
      * @param dims - dims of the sparse array
-     * @param dimCount - number of the dimension
+     * @param dimCount - number of the dimensions, necessary as values and dims are dynamically created arrays,
+     *          and may be over-allocated
      * @return BytesRef
      */
-    static BytesRef encodeSparseVector(int[] dims, float[] values, int dimCount) {
+    public static BytesRef encodeSparseVector(int[] dims, float[] values, int dimCount) {
         // 1. Sort dims and values
         sortSparseDimsValues(dims, values, dimCount);
         byte[] buf = new byte[dimCount * (INT_BYTES + SHORT_BYTES)];
@@ -66,9 +67,12 @@ final class VectorEncoderDecoder {
 
     /**
      * Decodes the first part of BytesRef into sparse vector dimensions
-     * @param vectorBR - vector decoded in BytesRef
+     * @param vectorBR - sparse vector encoded in BytesRef
      */
-    static int[] decodeSparseVectorDims(BytesRef vectorBR) {
+    public static int[] decodeSparseVectorDims(BytesRef vectorBR) {
+        if (vectorBR == null) {
+            throw new IllegalArgumentException("A document doesn't have a value for a vector field!");
+        }
         int dimCount = vectorBR.length / (INT_BYTES + SHORT_BYTES);
         int[] dims = new int[dimCount];
         int offset = vectorBR.offset;
@@ -81,9 +85,12 @@ final class VectorEncoderDecoder {
 
     /**
      * Decodes the second part of the BytesRef into sparse vector values
-     * @param vectorBR - vector decoded in BytesRef
+     * @param vectorBR - sparse vector encoded in BytesRef
      */
-    static float[] decodeSparseVector(BytesRef vectorBR) {
+    public static float[] decodeSparseVector(BytesRef vectorBR) {
+        if (vectorBR == null) {
+            throw new IllegalArgumentException("A document doesn't have a value for a vector field!");
+        }
         int dimCount = vectorBR.length / (INT_BYTES + SHORT_BYTES);
         int offset =  vectorBR.offset + SHORT_BYTES * dimCount; //calculate the offset from where values are encoded
         float[] vector = new float[dimCount];
@@ -100,10 +107,14 @@ final class VectorEncoderDecoder {
 
 
     /**
-    Sort dimensions in the ascending order and
-    sort values in the same order as their corresponding dimensions
-    **/
-    static void sortSparseDimsValues(int[] dims, float[] values, int n) {
+     * Sorts dimensions in the ascending order and
+     * sorts values in the same order as their corresponding dimensions
+     *
+     * @param dims - dimensions of the sparse query vector
+     * @param values - values for the sparse query vector
+     * @param n - number of dimensions
+     */
+    public static void sortSparseDimsValues(int[] dims, float[] values, int n) {
         new InPlaceMergeSorter() {
             @Override
             public int compare(int i, int j) {
@@ -123,8 +134,42 @@ final class VectorEncoderDecoder {
         }.sort(0, n);
     }
 
-    // Decodes a BytesRef into an array of floats
-    static float[] decodeDenseVector(BytesRef vectorBR) {
+    /**
+     * Sorts dimensions in the ascending order and
+     * sorts values in the same order as their corresponding dimensions
+     *
+     * @param dims - dimensions of the sparse query vector
+     * @param values - values for the sparse query vector
+     * @param n - number of dimensions
+     */
+    public static void sortSparseDimsDoubleValues(int[] dims, double[] values, int n) {
+        new InPlaceMergeSorter() {
+            @Override
+            public int compare(int i, int j) {
+                return Integer.compare(dims[i], dims[j]);
+            }
+
+            @Override
+            public void swap(int i, int j) {
+                int tempDim = dims[i];
+                dims[i] = dims[j];
+                dims[j] = tempDim;
+
+                double tempValue = values[j];
+                values[j] = values[i];
+                values[i] = tempValue;
+            }
+        }.sort(0, n);
+    }
+
+    /**
+     * Decodes a BytesRef into an array of floats
+     * @param vectorBR - dense vector encoded in BytesRef
+     */
+    public static float[] decodeDenseVector(BytesRef vectorBR) {
+        if (vectorBR == null) {
+            throw new IllegalArgumentException("A document doesn't have a value for a vector field!");
+        }
         int dimCount = vectorBR.length / INT_BYTES;
         float[] vector = new float[dimCount];
         int offset = vectorBR.offset;

+ 42 - 0
modules/mapper-extras/src/main/java/org/elasticsearch/index/query/DocValuesWhitelistExtension.java

@@ -0,0 +1,42 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.query;
+
+
+import org.elasticsearch.painless.spi.PainlessExtension;
+import org.elasticsearch.painless.spi.Whitelist;
+import org.elasticsearch.painless.spi.WhitelistLoader;
+import org.elasticsearch.script.ScoreScript;
+import org.elasticsearch.script.ScriptContext;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+public class DocValuesWhitelistExtension implements PainlessExtension {
+
+    private static final Whitelist WHITELIST =
+        WhitelistLoader.loadFromResourceFiles(DocValuesWhitelistExtension.class, "docvalues_whitelist.txt");
+
+    @Override
+    public Map<ScriptContext<?>, List<Whitelist>> getContextWhitelists() {
+        return Collections.singletonMap(ScoreScript.CONTEXT, Collections.singletonList(WHITELIST));
+    }
+}

+ 218 - 0
modules/mapper-extras/src/main/java/org/elasticsearch/index/query/ScoreScriptUtils.java

@@ -0,0 +1,218 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.query;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.index.mapper.VectorEncoderDecoder;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import static org.elasticsearch.index.mapper.VectorEncoderDecoder.sortSparseDimsDoubleValues;
+
+public class ScoreScriptUtils {
+
+    //**************FUNCTIONS FOR DENSE VECTORS
+
+    /**
+     * Calculate a dot product between a query's dense vector and documents' dense vectors
+     *
+     * @param queryVector the query vector parsed as {@code List<Number>} from json
+     * @param dvs VectorScriptDocValues representing encoded documents' vectors
+     */
+    public static double dotProduct(List<Number> queryVector, VectorScriptDocValues.DenseVectorScriptDocValues dvs){
+        BytesRef value = dvs.getEncodedValue();
+        if (value == null) return 0;
+        float[] docVector = VectorEncoderDecoder.decodeDenseVector(value);
+        return intDotProduct(queryVector, docVector);
+    }
+
+    /**
+     * Calculate cosine similarity between a query's dense vector and documents' dense vectors
+     *
+     * CosineSimilarity is implemented as a class to use
+     * painless script caching to calculate queryVectorMagnitude
+     * only once per script execution for all documents.
+     * A user will call `cosineSimilarity(params.queryVector, doc['my_vector'])`
+     */
+    public static final class CosineSimilarity {
+        final double queryVectorMagnitude;
+        final List<Number> queryVector;
+
+        // calculate queryVectorMagnitude once per query execution
+        public CosineSimilarity(List<Number> queryVector) {
+            this.queryVector = queryVector;
+            double doubleValue;
+            double dotProduct = 0;
+            for (Number value : queryVector) {
+                doubleValue = value.doubleValue();
+                dotProduct += doubleValue * doubleValue;
+            }
+            this.queryVectorMagnitude = Math.sqrt(dotProduct);
+        }
+
+        public double cosineSimilarity(VectorScriptDocValues.DenseVectorScriptDocValues dvs) {
+            BytesRef value = dvs.getEncodedValue();
+            if (value == null) return 0;
+            float[] docVector = VectorEncoderDecoder.decodeDenseVector(value);
+
+            // calculate docVector magnitude
+            double dotProduct = 0f;
+            for (int dim = 0; dim < docVector.length; dim++) {
+                dotProduct += (double) docVector[dim] * docVector[dim];
+            }
+            final double docVectorMagnitude = Math.sqrt(dotProduct);
+
+            double docQueryDotProduct = intDotProduct(queryVector, docVector);
+            return docQueryDotProduct / (docVectorMagnitude * queryVectorMagnitude);
+        }
+    }
+
+    private static double intDotProduct(List<Number> v1, float[] v2){
+        int dims = Math.min(v1.size(), v2.length);
+        double v1v2DotProduct = 0;
+        int dim = 0;
+        Iterator<Number> v1Iter = v1.iterator();
+        while(dim < dims) {
+            v1v2DotProduct += v1Iter.next().doubleValue() * v2[dim];
+            dim++;
+        }
+        return v1v2DotProduct;
+    }
+
+
+    //**************FUNCTIONS FOR SPARSE VECTORS
+
+    /**
+     * Calculate a dot product between a query's sparse vector and documents' sparse vectors
+     *
+     * DotProductSparse is implemented as a class to use
+     * painless script caching to prepare queryVector
+     * only once per script execution for all documents.
+     * A user will call `dotProductSparse(params.queryVector, doc['my_vector'])`
+     */
+    public static final class DotProductSparse {
+        final double[] queryValues;
+        final int[] queryDims;
+
+        // prepare queryVector once per script execution
+        // queryVector represents a map of dimensions to values
+        public DotProductSparse(Map<String, Number> queryVector) {
+            //break vector into two arrays dims and values
+            int n = queryVector.size();
+            queryDims = new int[n];
+            queryValues = new double[n];
+            int i = 0;
+            for (Map.Entry<String, Number> dimValue : queryVector.entrySet()) {
+                try {
+                    queryDims[i] = Integer.parseInt(dimValue.getKey());
+                } catch (final NumberFormatException e) {
+                    throw new IllegalArgumentException("Failed to parse a query vector dimension, it must be an integer!", e);
+                }
+                queryValues[i] = dimValue.getValue().doubleValue();
+                i++;
+            }
+            // Sort dimensions in the ascending order and sort values in the same order as their corresponding dimensions
+            sortSparseDimsDoubleValues(queryDims, queryValues, n);
+        }
+
+        public double dotProductSparse(VectorScriptDocValues.SparseVectorScriptDocValues dvs) {
+            BytesRef value = dvs.getEncodedValue();
+            if (value == null) return 0;
+            int[] docDims = VectorEncoderDecoder.decodeSparseVectorDims(value);
+            float[] docValues = VectorEncoderDecoder.decodeSparseVector(value);
+            return intDotProductSparse(queryValues, queryDims, docValues, docDims);
+        }
+    }
+
+    /**
+     * Calculate cosine similarity between a query's sparse vector and documents' sparse vectors
+     *
+     * CosineSimilaritySparse is implemented as a class to use
+     * painless script caching to prepare queryVector and calculate queryVectorMagnitude
+     * only once per script execution for all documents.
+     * A user will call `cosineSimilaritySparse(params.queryVector, doc['my_vector'])`
+     */
+    public static final class CosineSimilaritySparse {
+        final double[] queryValues;
+        final int[] queryDims;
+        final double queryVectorMagnitude;
+
+        // prepare queryVector once per script execution
+        public CosineSimilaritySparse(Map<String, Number> queryVector) {
+            //break vector into two arrays dims and values
+            int n = queryVector.size();
+            queryValues = new double[n];
+            queryDims = new int[n];
+            double dotProduct = 0;
+            int i = 0;
+            for (Map.Entry<String, Number> dimValue : queryVector.entrySet()) {
+                try {
+                    queryDims[i] = Integer.parseInt(dimValue.getKey());
+                } catch (final NumberFormatException e) {
+                    throw new IllegalArgumentException("Failed to parse a query vector dimension, it must be an integer!", e);
+                }
+                queryValues[i] = dimValue.getValue().doubleValue();
+                dotProduct +=  queryValues[i] *  queryValues[i];
+                i++;
+            }
+            this.queryVectorMagnitude = Math.sqrt(dotProduct);
+            // Sort dimensions in the ascending order and sort values in the same order as their corresponding dimensions
+            sortSparseDimsDoubleValues(queryDims, queryValues, n);
+        }
+
+        public double cosineSimilaritySparse(VectorScriptDocValues.SparseVectorScriptDocValues dvs) {
+            BytesRef value = dvs.getEncodedValue();
+            if (value == null) return 0;
+            int[] docDims = VectorEncoderDecoder.decodeSparseVectorDims(value);
+            float[] docValues = VectorEncoderDecoder.decodeSparseVector(value);
+
+            // calculate docVector magnitude
+            double dotProduct = 0;
+            for (float docValue : docValues) {
+                dotProduct += (double) docValue * docValue;
+            }
+            final double docVectorMagnitude = Math.sqrt(dotProduct);
+
+            double docQueryDotProduct = intDotProductSparse(queryValues, queryDims, docValues, docDims);
+            return docQueryDotProduct / (docVectorMagnitude * queryVectorMagnitude);
+        }
+    }
+
+    private static double intDotProductSparse(double[] v1Values, int[] v1Dims, float[] v2Values, int[] v2Dims) {
+        double v1v2DotProduct = 0;
+        int v1Index = 0;
+        int v2Index = 0;
+        // find common dimensions among vectors v1 and v2 and calculate dotProduct based on common dimensions
+        while (v1Index < v1Values.length && v2Index < v2Values.length) {
+            if (v1Dims[v1Index] == v2Dims[v2Index]) {
+                v1v2DotProduct += v1Values[v1Index] * v2Values[v2Index];
+                v1Index++;
+                v2Index++;
+            } else if (v1Dims[v1Index] > v2Dims[v2Index]) {
+                v2Index++;
+            } else {
+                v1Index++;
+            }
+        }
+        return v1v2DotProduct;
+    }
+}

+ 80 - 0
modules/mapper-extras/src/main/java/org/elasticsearch/index/query/VectorDVAtomicFieldData.java

@@ -0,0 +1,80 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.query;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.index.fielddata.AtomicFieldData;
+import org.elasticsearch.index.fielddata.ScriptDocValues;
+import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+
+final class VectorDVAtomicFieldData implements AtomicFieldData {
+
+    private final LeafReader reader;
+    private final String field;
+    private final boolean isDense;
+
+    VectorDVAtomicFieldData(LeafReader reader, String field, boolean isDense) {
+        this.reader = reader;
+        this.field = field;
+        this.isDense = isDense;
+    }
+
+    @Override
+    public long ramBytesUsed() {
+        return 0; // not exposed by Lucene
+    }
+
+    @Override
+    public Collection<Accountable> getChildResources() {
+        return Collections.emptyList();
+    }
+
+    @Override
+    public SortedBinaryDocValues getBytesValues() {
+        throw new UnsupportedOperationException("String representation of doc values for vector fields is not supported");
+    }
+
+    @Override
+    public ScriptDocValues<BytesRef> getScriptValues() {
+        try {
+            final BinaryDocValues values = DocValues.getBinary(reader, field);
+            if (isDense) {
+                return new VectorScriptDocValues.DenseVectorScriptDocValues(values);
+            } else {
+                return new VectorScriptDocValues.SparseVectorScriptDocValues(values);
+            }
+        } catch (IOException e) {
+            throw new IllegalStateException("Cannot load doc values for vector field!", e);
+        }
+    }
+
+    @Override
+    public void close() {
+        // no-op
+    }
+}

+ 74 - 0
modules/mapper-extras/src/main/java/org/elasticsearch/index/query/VectorDVIndexFieldData.java

@@ -0,0 +1,74 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.query;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.SortField;
+import org.elasticsearch.common.Nullable;
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.fielddata.IndexFieldData;
+import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
+import org.elasticsearch.index.fielddata.IndexFieldDataCache;
+import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.indices.breaker.CircuitBreakerService;
+import org.elasticsearch.search.MultiValueMode;
+
+
+public class VectorDVIndexFieldData extends DocValuesIndexFieldData implements IndexFieldData<VectorDVAtomicFieldData> {
+    private final boolean isDense;
+
+    public VectorDVIndexFieldData(Index index, String fieldName, boolean isDense) {
+        super(index, fieldName);
+        this.isDense = isDense;
+    }
+
+    @Override
+    public SortField sortField(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) {
+        throw new IllegalArgumentException("can't sort on the vector field");
+    }
+
+    @Override
+    public VectorDVAtomicFieldData load(LeafReaderContext context) {
+        return new VectorDVAtomicFieldData(context.reader(), fieldName, isDense);
+    }
+
+    @Override
+    public VectorDVAtomicFieldData loadDirect(LeafReaderContext context) throws Exception {
+        return load(context);
+    }
+
+    public static class Builder implements IndexFieldData.Builder {
+        private final boolean isDense;
+        public Builder(boolean isDense) {
+            this.isDense = isDense;
+        }
+
+        @Override
+        public IndexFieldData<?> build(IndexSettings indexSettings, MappedFieldType fieldType, IndexFieldDataCache cache,
+                                       CircuitBreakerService breakerService, MapperService mapperService) {
+            final String fieldName = fieldType.name();
+            return new VectorDVIndexFieldData(indexSettings.getIndex(), fieldName, isDense);
+        }
+
+    }
+}

+ 78 - 0
modules/mapper-extras/src/main/java/org/elasticsearch/index/query/VectorScriptDocValues.java

@@ -0,0 +1,78 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.query;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.index.fielddata.ScriptDocValues;
+
+import java.io.IOException;
+
+/**
+ * VectorScriptDocValues represents docValues for dense and sparse vector fields
+ */
+public abstract class VectorScriptDocValues extends ScriptDocValues<BytesRef> {
+
+    private final BinaryDocValues in;
+    private BytesRef value;
+
+    VectorScriptDocValues(BinaryDocValues in) {
+        this.in = in;
+    }
+
+    @Override
+    public void setNextDocId(int docId) throws IOException {
+        if (in.advanceExact(docId)) {
+            value = in.binaryValue();
+        } else {
+            value = null;
+        }
+    }
+
+    // package private access only for {@link ScoreScriptUtils}
+    BytesRef getEncodedValue() {
+        return value;
+    }
+
+    @Override
+    public BytesRef get(int index) {
+        throw new UnsupportedOperationException("vector fields may only be used via vector functions in scripts");
+    }
+
+    @Override
+    public int size() {
+        throw new UnsupportedOperationException("vector fields may only be used via vector functions in scripts");
+    }
+
+    // not final, as it needs to be extended by Mockito for tests
+    public static class DenseVectorScriptDocValues extends VectorScriptDocValues {
+        public DenseVectorScriptDocValues(BinaryDocValues in) {
+            super(in);
+        }
+    }
+
+    // not final, as it needs to be extended by Mockito for tests
+    public static class SparseVectorScriptDocValues extends VectorScriptDocValues {
+        public SparseVectorScriptDocValues(BinaryDocValues in) {
+            super(in);
+        }
+    }
+
+}

+ 1 - 0
modules/mapper-extras/src/main/resources/META-INF/services/org.elasticsearch.painless.spi.PainlessExtension

@@ -0,0 +1 @@
+org.elasticsearch.index.query.DocValuesWhitelistExtension

+ 32 - 0
modules/mapper-extras/src/main/resources/org/elasticsearch/index/query/docvalues_whitelist.txt

@@ -0,0 +1,32 @@
+#
+# Licensed to Elasticsearch under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Elasticsearch licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+class org.elasticsearch.index.query.VectorScriptDocValues {
+}
+class org.elasticsearch.index.query.VectorScriptDocValues$DenseVectorScriptDocValues {
+}
+class org.elasticsearch.index.query.VectorScriptDocValues$SparseVectorScriptDocValues {
+}
+
+static_import {
+    double cosineSimilarity(List, VectorScriptDocValues.DenseVectorScriptDocValues) bound_to org.elasticsearch.index.query.ScoreScriptUtils$CosineSimilarity
+    double dotProduct(List, VectorScriptDocValues.DenseVectorScriptDocValues) from_class org.elasticsearch.index.query.ScoreScriptUtils
+    double dotProductSparse(Map, VectorScriptDocValues.SparseVectorScriptDocValues) bound_to org.elasticsearch.index.query.ScoreScriptUtils$DotProductSparse
+    double cosineSimilaritySparse(Map, VectorScriptDocValues.SparseVectorScriptDocValues) bound_to org.elasticsearch.index.query.ScoreScriptUtils$CosineSimilaritySparse
+}

+ 1 - 1
modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/VectorEncoderDecoderTests.java

@@ -83,7 +83,7 @@ public class VectorEncoderDecoderTests extends ESTestCase {
     }
 
     // imitates the code in DenseVectorFieldMapper::parse
-    private BytesRef mockEncodeDenseVector(float[] dims) {
+    public static BytesRef mockEncodeDenseVector(float[] dims) {
         final short INT_BYTES = VectorEncoderDecoder.INT_BYTES;
         byte[] buf = new byte[INT_BYTES * dims.length];
         int offset = 0;

+ 82 - 0
modules/mapper-extras/src/test/java/org/elasticsearch/index/query/ScoreScriptUtilsTests.java

@@ -0,0 +1,82 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.query;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.index.mapper.VectorEncoderDecoder;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.index.query.ScoreScriptUtils.CosineSimilarity;
+import org.elasticsearch.index.query.ScoreScriptUtils.DotProductSparse;
+import org.elasticsearch.index.query.ScoreScriptUtils.CosineSimilaritySparse;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.elasticsearch.index.mapper.VectorEncoderDecoderTests.mockEncodeDenseVector;
+import static org.elasticsearch.index.query.ScoreScriptUtils.dotProduct;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+
+public class ScoreScriptUtilsTests extends ESTestCase {
+    public void testDenseVectorFunctions() {
+        float[] docVector = {230.0f, 300.33f, -34.8988f, 15.555f, -200.0f};
+        BytesRef encodedDocVector =  mockEncodeDenseVector(docVector);
+        VectorScriptDocValues.DenseVectorScriptDocValues dvs = mock(VectorScriptDocValues.DenseVectorScriptDocValues.class);
+        when(dvs.getEncodedValue()).thenReturn(encodedDocVector);
+        List<Number> queryVector = Arrays.asList(0.5, 111.3, -13.0, 14.8, -156.0);
+
+        // test dotProduct
+        double result = dotProduct(queryVector, dvs);
+        assertEquals("dotProduct result is not equal to the expected value!", 65425.62, result, 0.1);
+
+        // test cosineSimilarity
+        CosineSimilarity cosineSimilarity = new CosineSimilarity(queryVector);
+        double result2 = cosineSimilarity.cosineSimilarity(dvs);
+        assertEquals("cosineSimilarity result is not equal to the expected value!", 0.78, result2, 0.1);
+    }
+
+    public void testSparseVectorFunctions() {
+        int[] docVectorDims = {2, 10, 50, 113, 4545};
+        float[] docVectorValues = {230.0f, 300.33f, -34.8988f, 15.555f, -200.0f};
+        BytesRef encodedDocVector = VectorEncoderDecoder.encodeSparseVector(docVectorDims, docVectorValues, docVectorDims.length);
+        VectorScriptDocValues.SparseVectorScriptDocValues dvs = mock(VectorScriptDocValues.SparseVectorScriptDocValues.class);
+        when(dvs.getEncodedValue()).thenReturn(encodedDocVector);
+        Map<String, Number> queryVector = new HashMap<String, Number>() {{
+            put("2", 0.5);
+            put("10", 111.3);
+            put("50", -13.0);
+            put("113", 14.8);
+            put("4545", -156.0);
+        }};
+
+        // test dotProduct
+        DotProductSparse docProductSparse = new DotProductSparse(queryVector);
+        double result = docProductSparse.dotProductSparse(dvs);
+        assertEquals("dotProductSparse result is not equal to the expected value!", 65425.62, result, 0.1);
+
+        // test cosineSimilarity
+        CosineSimilaritySparse cosineSimilaritySparse = new CosineSimilaritySparse(queryVector);
+        double result2 = cosineSimilaritySparse.cosineSimilaritySparse(dvs);
+        assertEquals("cosineSimilaritySparse result is not equal to the expected value!", 0.78, result2, 0.1);
+    }
+}

+ 100 - 0
modules/mapper-extras/src/test/resources/rest-api-spec/test/dense-vector/10_basic.yml

@@ -0,0 +1,100 @@
+setup:
+  - skip:
+      features: headers
+      version: " - 7.0.99"
+      reason: "dense_vector functions were introduced in 7.1.0"
+
+  - do:
+      indices.create:
+        include_type_name: false
+        index: test-index
+        body:
+          settings:
+            number_of_replicas: 0
+          mappings:
+            properties:
+              my_dense_vector:
+                 type: dense_vector
+  - do:
+      index:
+        index: test-index
+        id: 1
+        body:
+          my_dense_vector: [230.0, 300.33, -34.8988, 15.555, -200.0]
+
+  - do:
+      index:
+        index: test-index
+        id: 2
+        body:
+          my_dense_vector: [-0.5, 100.0, -13, 14.8, -156.0]
+
+  - do:
+      index:
+        index: test-index
+        id: 3
+        body:
+          my_dense_vector: [0.5, 111.3, -13.0, 14.8, -156.0]
+
+  - do:
+      indices.refresh: {}
+
+---
+"Dot Product":
+  - do:
+      headers:
+        Content-Type: application/json
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            script_score:
+              query: {match_all: {} }
+              script:
+                source: "dotProduct(params.query_vector, doc['my_dense_vector'])"
+                params:
+                  query_vector: [0.5, 111.3, -13.0, 14.8, -156.0]
+
+  - match: {hits.total: 3}
+
+  - match: {hits.hits.0._id: "1"}
+  - gte: {hits.hits.0._score: 65425.62}
+  - lte: {hits.hits.0._score: 65425.63}
+
+  - match: {hits.hits.1._id: "3"}
+  - gte: {hits.hits.1._score: 37111.98}
+  - lte: {hits.hits.1._score: 37111.99}
+
+  - match: {hits.hits.2._id: "2"}
+  - gte: {hits.hits.2._score: 35853.78}
+  - lte: {hits.hits.2._score: 35853.79}
+
+---
+"Cosine Similarity":
+  - do:
+      headers:
+        Content-Type: application/json
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            script_score:
+              query: {match_all: {} }
+              script:
+                source: "cosineSimilarity(params.query_vector, doc['my_dense_vector'])"
+                params:
+                  query_vector: [0.5, 111.3, -13.0, 14.8, -156.0]
+
+  - match: {hits.total: 3}
+
+  - match: {hits.hits.0._id: "3"}
+  - gte: {hits.hits.0._score: 0.999}
+  - lte: {hits.hits.0._score: 1.001}
+
+  - match: {hits.hits.1._id: "2"}
+  - gte: {hits.hits.1._score: 0.998}
+  - lte: {hits.hits.1._score: 1.0}
+
+  - match: {hits.hits.2._id: "1"}
+  - gte: {hits.hits.2._score: 0.78}
+  - lte: {hits.hits.2._score: 0.791}

+ 0 - 27
modules/mapper-extras/src/test/resources/rest-api-spec/test/dense-vector/10_indexing.yml

@@ -1,27 +0,0 @@
-setup:
-  - skip:
-      version: " - 6.99.99"
-      reason: "dense_vector field was introduced in 7.0.0"
-
-  - do:
-      indices.create:
-        index: test-index
-        body:
-          settings:
-            number_of_replicas: 0
-          mappings:
-            properties:
-              my_dense_vector:
-                 type: dense_vector
-
-
----
-"Indexing":
-  - do:
-      index:
-        index: test-index
-        id: 1
-        body:
-          my_dense_vector: [1.5, -10, 3455, 345452.4545]
-
-  - match: { result: created }

+ 150 - 0
modules/mapper-extras/src/test/resources/rest-api-spec/test/dense-vector/20_special_cases.yml

@@ -0,0 +1,150 @@
+setup:
+  - skip:
+      features: headers
+      version: " - 7.0.99"
+      reason: "dense_vector functions were introduced in 7.1.0"
+
+  - do:
+      indices.create:
+        include_type_name: false
+        index: test-index
+        body:
+          settings:
+            number_of_replicas: 0
+          mappings:
+            properties:
+              my_dense_vector:
+                 type: dense_vector
+
+
+---
+"Vectors of different dimensions and data types":
+# document vectors of different dimensions
+  - do:
+      index:
+        index: test-index
+        id: 1
+        body:
+          my_dense_vector: [10]
+
+  - do:
+      index:
+        index: test-index
+        id: 2
+        body:
+          my_dense_vector: [10, 10.5]
+
+  - do:
+      index:
+        index: test-index
+        id: 3
+        body:
+          my_dense_vector: [10, 10.5, 100.5]
+
+  - do:
+      indices.refresh: {}
+
+# query vector of type integer
+  - do:
+      headers:
+        Content-Type: application/json
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            script_score:
+              query: {match_all: {} }
+              script:
+                source: "cosineSimilarity(params.query_vector, doc['my_dense_vector'])"
+                params:
+                  query_vector: [10]
+
+  - match: {hits.total: 3}
+  - match: {hits.hits.0._id: "1"}
+  - match: {hits.hits.1._id: "2"}
+  - match: {hits.hits.2._id: "3"}
+
+# query vector of type double
+  - do:
+      headers:
+        Content-Type: application/json
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            script_score:
+              query: {match_all: {} }
+              script:
+                source: "cosineSimilarity(params.query_vector, doc['my_dense_vector'])"
+                params:
+                  query_vector: [10.0]
+
+  - match: {hits.total: 3}
+  - match: {hits.hits.0._id: "1"}
+  - match: {hits.hits.1._id: "2"}
+  - match: {hits.hits.2._id: "3"}
+
+---
+"Distance functions for documents missing vector field should return 0":
+- do:
+    index:
+      index: test-index
+      id: 1
+      body:
+        my_dense_vector: [10]
+
+- do:
+    index:
+      index: test-index
+      id: 2
+      body:
+        some_other_field: "random_value"
+
+- do:
+    indices.refresh: {}
+
+- do:
+    headers:
+      Content-Type: application/json
+    search:
+      rest_total_hits_as_int: true
+      body:
+        query:
+          script_score:
+            query: {match_all: {} }
+            script:
+              source: "cosineSimilarity(params.query_vector, doc['my_dense_vector'])"
+              params:
+                query_vector: [10.0]
+
+- match: {hits.total: 2}
+- match: {hits.hits.0._id: "1"}
+- match: {hits.hits.1._id: "2"}
+- match: {hits.hits.1._score: 0.0}
+
+---
+"Dense vectors should error with sparse vector functions":
+- do:
+    index:
+      index: test-index
+      id: 1
+      body:
+        my_dense_vector: [10, 2, 0.15]
+
+- do:
+    indices.refresh: {}
+
+- do:
+    catch: bad_request
+    headers:
+      Content-Type: application/json
+    search:
+      body:
+        query:
+          script_score:
+            query: {match_all: {} }
+            script:
+              source: "dotProductSparse(params.query_vector, doc['my_dense_vector'])"
+              params:
+                query_vector: {"2": 0.5, "10" : 111.3}
+- match: { error.root_cause.0.type: "script_exception" }

+ 100 - 0
modules/mapper-extras/src/test/resources/rest-api-spec/test/sparse-vector/10_basic.yml

@@ -0,0 +1,100 @@
+setup:
+  - skip:
+      features: headers
+      version: " - 7.0.99"
+      reason: "sparse_vector functions were introduced in 7.1.0"
+
+  - do:
+      indices.create:
+        include_type_name: false
+        index: test-index
+        body:
+          settings:
+            number_of_replicas: 0
+          mappings:
+            properties:
+              my_sparse_vector:
+                 type: sparse_vector
+  - do:
+      index:
+        index: test-index
+        id: 1
+        body:
+          my_sparse_vector: {"2": 230.0, "10" : 300.33, "50": -34.8988, "113": 15.555, "4545": -200.0}
+
+  - do:
+      index:
+        index: test-index
+        id: 2
+        body:
+          my_sparse_vector: {"2": -0.5, "10" : 100.0, "50": -13, "113": 14.8, "4545": -156.0}
+
+  - do:
+      index:
+        index: test-index
+        id: 3
+        body:
+          my_sparse_vector: {"2": 0.5, "10" : 111.3, "50": -13.0, "113": 14.8, "4545": -156.0}
+
+  - do:
+      indices.refresh: {}
+
+---
+"Dot Product":
+- do:
+    headers:
+      Content-Type: application/json
+    search:
+      rest_total_hits_as_int: true
+      body:
+        query:
+          script_score:
+            query: {match_all: {} }
+            script:
+              source: "dotProductSparse(params.query_vector, doc['my_sparse_vector'])"
+              params:
+                query_vector: {"2": 0.5, "10" : 111.3, "50": -13.0, "113": 14.8, "4545": -156.0}
+
+- match: {hits.total: 3}
+
+- match: {hits.hits.0._id: "1"}
+- gte: {hits.hits.0._score: 65425.62}
+- lte: {hits.hits.0._score: 65425.63}
+
+- match: {hits.hits.1._id: "3"}
+- gte: {hits.hits.1._score: 37111.98}
+- lte: {hits.hits.1._score: 37111.99}
+
+- match: {hits.hits.2._id: "2"}
+- gte: {hits.hits.2._score: 35853.78}
+- lte: {hits.hits.2._score: 35853.79}
+
+---
+"Cosine Similarity":
+- do:
+    headers:
+      Content-Type: application/json
+    search:
+      rest_total_hits_as_int: true
+      body:
+        query:
+          script_score:
+            query: {match_all: {} }
+            script:
+              source: "cosineSimilaritySparse(params.query_vector, doc['my_sparse_vector'])"
+              params:
+                query_vector: {"2": -0.5, "10" : 111.3, "50": -13.0, "113": 14.8, "4545": -156.0}
+
+- match: {hits.total: 3}
+
+- match: {hits.hits.0._id: "3"}
+- gte: {hits.hits.0._score: 0.999}
+- lte: {hits.hits.0._score: 1.001}
+
+- match: {hits.hits.1._id: "2"}
+- gte: {hits.hits.1._score: 0.998}
+- lte: {hits.hits.1._score: 1.0}
+
+- match: {hits.hits.2._id: "1"}
+- gte: {hits.hits.2._score: 0.78}
+- lte: {hits.hits.2._score: 0.791}

+ 0 - 27
modules/mapper-extras/src/test/resources/rest-api-spec/test/sparse-vector/10_indexing.yml

@@ -1,27 +0,0 @@
-setup:
-  - skip:
-      version: " - 6.99.99"
-      reason: "sparse_vector field was introduced in 7.0.0"
-
-  - do:
-      indices.create:
-        index: test-index
-        body:
-          settings:
-            number_of_replicas: 0
-          mappings:
-            properties:
-              my_sparse_vector:
-                 type: sparse_vector
-
-
----
-"Indexing":
-  - do:
-      index:
-        index: test-index
-        id: 1
-        body:
-          my_sparse_vector: { "50" : 1.8, "2" : -0.4, "10" : 1000.3, "4545" : -0.00004}
-
-  - match: { result: created }

+ 201 - 0
modules/mapper-extras/src/test/resources/rest-api-spec/test/sparse-vector/20_special_cases.yml

@@ -0,0 +1,201 @@
+setup:
+  - skip:
+      features: headers
+      version: " - 7.0.99"
+      reason: "sparse_vector functions were introduced in 7.1.0"
+
+  - do:
+      indices.create:
+        include_type_name: false
+        index: test-index
+        body:
+          settings:
+            number_of_replicas: 0
+          mappings:
+            properties:
+              my_sparse_vector:
+                 type: sparse_vector
+
+
+---
+"Vectors of different dimensions and data types":
+# document vectors of different dimensions
+  - do:
+      index:
+        index: test-index
+        id: 1
+        body:
+          my_sparse_vector: {"1": 10}
+
+  - do:
+      index:
+        index: test-index
+        id: 2
+        body:
+          my_sparse_vector: {"1": 10, "10" : 10.5}
+
+  - do:
+      index:
+        index: test-index
+        id: 3
+        body:
+          my_sparse_vector: {"1": 10, "10" : 10.5, "100": 100.5}
+
+  - do:
+      indices.refresh: {}
+
+# query vector of type integer
+  - do:
+      headers:
+        Content-Type: application/json
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            script_score:
+              query: {match_all: {} }
+              script:
+                source: "cosineSimilaritySparse(params.query_vector, doc['my_sparse_vector'])"
+                params:
+                  query_vector: {"1": 10}
+
+  - match: {hits.total: 3}
+  - match: {hits.hits.0._id: "1"}
+  - match: {hits.hits.1._id: "2"}
+  - match: {hits.hits.2._id: "3"}
+
+# query vector of type double
+  - do:
+      headers:
+        Content-Type: application/json
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            script_score:
+              query: {match_all: {} }
+              script:
+                source: "cosineSimilaritySparse(params.query_vector, doc['my_sparse_vector'])"
+                params:
+                  query_vector: {"1": 10.0}
+
+  - match: {hits.total: 3}
+  - match: {hits.hits.0._id: "1"}
+  - match: {hits.hits.1._id: "2"}
+  - match: {hits.hits.2._id: "3"}
+
+---
+"Distance functions for documents missing vector field should return 0":
+- do:
+    index:
+      index: test-index
+      id: 1
+      body:
+        my_sparse_vector: {"1": 10}
+
+- do:
+    index:
+      index: test-index
+      id: 2
+      body:
+        some_other_field: "random_value"
+
+- do:
+    indices.refresh: {}
+
+- do:
+    headers:
+      Content-Type: application/json
+    search:
+      rest_total_hits_as_int: true
+      body:
+        query:
+          script_score:
+            query: {match_all: {} }
+            script:
+              source: "cosineSimilaritySparse(params.query_vector, doc['my_sparse_vector'])"
+              params:
+                query_vector: {"1": 10.0}
+
+- match: {hits.total: 2}
+- match: {hits.hits.0._id: "1"}
+- match: {hits.hits.1._id: "2"}
+- match: {hits.hits.1._score: 0.0}
+
+
+---
+"Dimensions can be sorted differently":
+# All the documents' and query's vectors are the same, and should return cosineSimilarity equal to 1
+- do:
+    index:
+      index: test-index
+      id: 1
+      body:
+        my_sparse_vector: {"2": 230.0, "11" : 300.33, "12": -34.8988, "30": 15.555, "100": -200.0}
+
+- do:
+    index:
+      index: test-index
+      id: 2
+      body:
+        my_sparse_vector: {"100": -200.0, "12": -34.8988, "11" : 300.33, "113": 15.555, "2": 230.0}
+
+- do:
+    index:
+      index: test-index
+      id: 3
+      body:
+        my_sparse_vector: {"100": -200.0, "30": 15.555, "12": -34.8988, "11" : 300.33, "2": 230.0}
+
+- do:
+    indices.refresh: {}
+
+- do:
+    headers:
+      Content-Type: application/json
+    search:
+      rest_total_hits_as_int: true
+      body:
+        query:
+          script_score:
+            query: {match_all: {} }
+            script:
+              source: "cosineSimilaritySparse(params.query_vector, doc['my_sparse_vector'])"
+              params:
+                query_vector: {"100": -200.0, "11" : 300.33, "12": -34.8988, "2": 230.0, "30": 15.555}
+
+- match: {hits.total: 3}
+
+- gte: {hits.hits.0._score: 0.99}
+- lte: {hits.hits.0._score: 1.001}
+- gte: {hits.hits.1._score: 0.99}
+- lte: {hits.hits.1._score: 1.001}
+- gte: {hits.hits.2._score: 0.99}
+- lte: {hits.hits.2._score: 1.001}
+
+---
+"Sparse vectors should error with dense vector functions":
+- do:
+    index:
+      index: test-index
+      id: 1
+      body:
+        my_sparse_vector: {"100": -200.0, "30": 15.555}
+
+- do:
+    indices.refresh: {}
+
+- do:
+    catch: bad_request
+    headers:
+      Content-Type: application/json
+    search:
+      body:
+        query:
+          script_score:
+            query: {match_all: {} }
+            script:
+              source: "dotProduct(params.query_vector, doc['my_sparse_vector'])"
+              params:
+                query_vector: [0.5, 111]
+- match: { error.root_cause.0.type: "script_exception" }