|  | @@ -5,16 +5,14 @@
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  experimental[]
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -These functions are used for
 | 
	
		
			
				|  |  | -for <<dense-vector,`dense_vector`>>  and
 | 
	
		
			
				|  |  | -<<sparse-vector,`sparse_vector`>> fields.
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |  NOTE: During vector functions' calculation, all matched documents are
 | 
	
		
			
				|  |  | -linearly scanned. Thus, expect the query time grow linearly 
 | 
	
		
			
				|  |  | +linearly scanned. Thus, expect the query time grow linearly
 | 
	
		
			
				|  |  |  with the number of matched documents. For this reason, we recommend
 | 
	
		
			
				|  |  |  to limit the number of matched documents with a `query` parameter.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -Let's create an index with the following mapping and index a couple
 | 
	
		
			
				|  |  | +====== `dense_vector` functions
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +Let's create an index with a `dense_vector` mapping and index a couple
 | 
	
		
			
				|  |  |  of documents into it.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  [source,console]
 | 
	
	
		
			
				|  | @@ -27,9 +25,6 @@ PUT my_index
 | 
	
		
			
				|  |  |          "type": "dense_vector",
 | 
	
		
			
				|  |  |          "dims": 3
 | 
	
		
			
				|  |  |        },
 | 
	
		
			
				|  |  | -      "my_sparse_vector" : {
 | 
	
		
			
				|  |  | -        "type" : "sparse_vector"
 | 
	
		
			
				|  |  | -      },
 | 
	
		
			
				|  |  |        "status" : {
 | 
	
		
			
				|  |  |          "type" : "keyword"
 | 
	
		
			
				|  |  |        }
 | 
	
	
		
			
				|  | @@ -40,21 +35,21 @@ PUT my_index
 | 
	
		
			
				|  |  |  PUT my_index/_doc/1
 | 
	
		
			
				|  |  |  {
 | 
	
		
			
				|  |  |    "my_dense_vector": [0.5, 10, 6],
 | 
	
		
			
				|  |  | -  "my_sparse_vector": {"2": 1.5, "15" : 2, "50": -1.1, "4545": 1.1},
 | 
	
		
			
				|  |  |    "status" : "published"
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  PUT my_index/_doc/2
 | 
	
		
			
				|  |  |  {
 | 
	
		
			
				|  |  |    "my_dense_vector": [-0.5, 10, 10],
 | 
	
		
			
				|  |  | -  "my_sparse_vector": {"2": 2.5, "10" : 1.3, "55": -2.3, "113": 1.6},
 | 
	
		
			
				|  |  |    "status" : "published"
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +POST my_index/_refresh
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  --------------------------------------------------
 | 
	
		
			
				|  |  |  // TESTSETUP
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -For dense_vector fields, `cosineSimilarity` calculates the measure of
 | 
	
		
			
				|  |  | +The `cosineSimilarity` function calculates the measure of
 | 
	
		
			
				|  |  |  cosine similarity between a given query vector and document vectors.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  [source,console]
 | 
	
	
		
			
				|  | @@ -90,8 +85,8 @@ GET my_index/_search
 | 
	
		
			
				|  |  |  NOTE: If a document's dense vector field has a number of dimensions
 | 
	
		
			
				|  |  |  different from the query's vector, an error will be thrown.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -Similarly, for sparse_vector fields, `cosineSimilaritySparse` calculates cosine similarity
 | 
	
		
			
				|  |  | -between a given query vector and document vectors.
 | 
	
		
			
				|  |  | +The `dotProduct` function calculates the measure of
 | 
	
		
			
				|  |  | +dot product between a given query vector and document vectors.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  [source,console]
 | 
	
		
			
				|  |  |  --------------------------------------------------
 | 
	
	
		
			
				|  | @@ -109,9 +104,12 @@ GET my_index/_search
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |        },
 | 
	
		
			
				|  |  |        "script": {
 | 
	
		
			
				|  |  | -        "source": "cosineSimilaritySparse(params.query_vector, doc['my_sparse_vector']) + 1.0",
 | 
	
		
			
				|  |  | +        "source": """
 | 
	
		
			
				|  |  | +          double value = dotProduct(params.query_vector, doc['my_dense_vector']);
 | 
	
		
			
				|  |  | +          return sigmoid(1, Math.E, -value); <1>
 | 
	
		
			
				|  |  | +        """,
 | 
	
		
			
				|  |  |          "params": {
 | 
	
		
			
				|  |  | -          "query_vector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0}
 | 
	
		
			
				|  |  | +          "query_vector": [4, 3.4, -0.2]
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |      }
 | 
	
	
		
			
				|  | @@ -119,8 +117,11 @@ GET my_index/_search
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  --------------------------------------------------
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -For dense_vector fields, `dotProduct` calculates the measure of
 | 
	
		
			
				|  |  | -dot product between a given query vector and document vectors.
 | 
	
		
			
				|  |  | +<1> Using the standard sigmoid function prevents scores from being negative.
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +The `l1norm` function calculates L^1^ distance
 | 
	
		
			
				|  |  | +(Manhattan distance) between a given query vector and
 | 
	
		
			
				|  |  | +document vectors.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  [source,console]
 | 
	
		
			
				|  |  |  --------------------------------------------------
 | 
	
	
		
			
				|  | @@ -138,12 +139,9 @@ GET my_index/_search
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |        },
 | 
	
		
			
				|  |  |        "script": {
 | 
	
		
			
				|  |  | -        "source": """
 | 
	
		
			
				|  |  | -          double value = dotProduct(params.query_vector, doc['my_dense_vector']);
 | 
	
		
			
				|  |  | -          return sigmoid(1, Math.E, -value); <1>
 | 
	
		
			
				|  |  | -        """,
 | 
	
		
			
				|  |  | +        "source": "1 / (1 + l1norm(params.queryVector, doc['my_dense_vector']))", <1>
 | 
	
		
			
				|  |  |          "params": {
 | 
	
		
			
				|  |  | -          "query_vector": [4, 3.4, -0.2]
 | 
	
		
			
				|  |  | +          "queryVector": [4, 3.4, -0.2]
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |      }
 | 
	
	
		
			
				|  | @@ -151,10 +149,18 @@ GET my_index/_search
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  --------------------------------------------------
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -<1> Using the standard sigmoid function prevents scores from being negative.
 | 
	
		
			
				|  |  | +<1> Unlike `cosineSimilarity` that represent similarity, `l1norm` and
 | 
	
		
			
				|  |  | +`l2norm` shown below represent distances or differences. This means, that
 | 
	
		
			
				|  |  | +the more similar the vectors are, the lower the scores will be that are
 | 
	
		
			
				|  |  | +produced by the `l1norm` and `l2norm` functions.
 | 
	
		
			
				|  |  | +Thus, as we need more similar vectors to score higher,
 | 
	
		
			
				|  |  | +we reversed the output from `l1norm` and `l2norm`. Also, to avoid
 | 
	
		
			
				|  |  | +division by 0 when a document vector matches the query exactly,
 | 
	
		
			
				|  |  | +we added `1` in the denominator.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -Similarly, for sparse_vector fields, `dotProductSparse` calculates dot product
 | 
	
		
			
				|  |  | -between a given query vector and document vectors.
 | 
	
		
			
				|  |  | +The `l2norm` function calculates L^2^ distance
 | 
	
		
			
				|  |  | +(Euclidean distance) between a given query vector and
 | 
	
		
			
				|  |  | +document vectors.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  [source,console]
 | 
	
		
			
				|  |  |  --------------------------------------------------
 | 
	
	
		
			
				|  | @@ -172,12 +178,9 @@ GET my_index/_search
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |        },
 | 
	
		
			
				|  |  |        "script": {
 | 
	
		
			
				|  |  | -        "source": """
 | 
	
		
			
				|  |  | -          double value = dotProductSparse(params.query_vector, doc['my_sparse_vector']);
 | 
	
		
			
				|  |  | -          return sigmoid(1, Math.E, -value);
 | 
	
		
			
				|  |  | -        """,
 | 
	
		
			
				|  |  | -         "params": {
 | 
	
		
			
				|  |  | -          "query_vector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0}
 | 
	
		
			
				|  |  | +        "source": "1 / (1 + l2norm(params.queryVector, doc['my_dense_vector']))",
 | 
	
		
			
				|  |  | +        "params": {
 | 
	
		
			
				|  |  | +          "queryVector": [4, 3.4, -0.2]
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |      }
 | 
	
	
		
			
				|  | @@ -185,13 +188,67 @@ GET my_index/_search
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  --------------------------------------------------
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -For dense_vector fields, `l1norm` calculates L^1^ distance
 | 
	
		
			
				|  |  | -(Manhattan distance) between a given query vector and
 | 
	
		
			
				|  |  | -document vectors.
 | 
	
		
			
				|  |  | +NOTE: If a document doesn't have a value for a vector field on which
 | 
	
		
			
				|  |  | +a vector function is executed, an error will be thrown.
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +You can check if a document has a value for the field `my_vector` by
 | 
	
		
			
				|  |  | +`doc['my_vector'].size() == 0`. Your overall script can look like this:
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +[source,js]
 | 
	
		
			
				|  |  | +--------------------------------------------------
 | 
	
		
			
				|  |  | +"source": "doc['my_vector'].size() == 0 ? 0 : cosineSimilarity(params.queryVector, doc['my_vector'])"
 | 
	
		
			
				|  |  | +--------------------------------------------------
 | 
	
		
			
				|  |  | +// NOTCONSOLE
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +====== `sparse_vector` functions
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +deprecated[7.6, The `sparse_vector` type is deprecated and will be removed in 8.0.]
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +Let's create an index with a `sparse_vector` mapping and index a couple
 | 
	
		
			
				|  |  | +of documents into it.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  [source,console]
 | 
	
		
			
				|  |  |  --------------------------------------------------
 | 
	
		
			
				|  |  | -GET my_index/_search
 | 
	
		
			
				|  |  | +PUT my_sparse_index
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +  "mappings": {
 | 
	
		
			
				|  |  | +    "properties": {
 | 
	
		
			
				|  |  | +      "my_sparse_vector": {
 | 
	
		
			
				|  |  | +        "type": "sparse_vector"
 | 
	
		
			
				|  |  | +      },
 | 
	
		
			
				|  |  | +      "status" : {
 | 
	
		
			
				|  |  | +        "type" : "keyword"
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +--------------------------------------------------
 | 
	
		
			
				|  |  | +// TEST[warning:The [sparse_vector] field type is deprecated and will be removed in 8.0.]
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +[source,console]
 | 
	
		
			
				|  |  | +--------------------------------------------------
 | 
	
		
			
				|  |  | +PUT my_sparse_index/_doc/1
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +  "my_sparse_vector": {"2": 1.5, "15" : 2, "50": -1.1, "4545": 1.1},
 | 
	
		
			
				|  |  | +  "status" : "published"
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +PUT my_sparse_index/_doc/2
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +  "my_sparse_vector": {"2": 2.5, "10" : 1.3, "55": -2.3, "113": 1.6},
 | 
	
		
			
				|  |  | +  "status" : "published"
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +POST my_sparse_index/_refresh
 | 
	
		
			
				|  |  | +--------------------------------------------------
 | 
	
		
			
				|  |  | +// TEST[continued]
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +The `cosineSimilaritySparse` function calculates cosine similarity
 | 
	
		
			
				|  |  | +between a given query vector and document vectors.
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +[source,console]
 | 
	
		
			
				|  |  | +--------------------------------------------------
 | 
	
		
			
				|  |  | +GET my_sparse_index/_search
 | 
	
		
			
				|  |  |  {
 | 
	
		
			
				|  |  |    "query": {
 | 
	
		
			
				|  |  |      "script_score": {
 | 
	
	
		
			
				|  | @@ -205,31 +262,24 @@ GET my_index/_search
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |        },
 | 
	
		
			
				|  |  |        "script": {
 | 
	
		
			
				|  |  | -        "source": "1 / (1 + l1norm(params.queryVector, doc['my_dense_vector']))", <1>
 | 
	
		
			
				|  |  | +        "source": "cosineSimilaritySparse(params.query_vector, doc['my_sparse_vector']) + 1.0",
 | 
	
		
			
				|  |  |          "params": {
 | 
	
		
			
				|  |  | -          "queryVector": [4, 3.4, -0.2]
 | 
	
		
			
				|  |  | +          "query_vector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0}
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |    }
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  --------------------------------------------------
 | 
	
		
			
				|  |  | +// TEST[continued]
 | 
	
		
			
				|  |  | +// TEST[warning:The [sparse_vector] field type is deprecated and will be removed in 8.0.]
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -<1> Unlike `cosineSimilarity` that represent similarity, `l1norm` and
 | 
	
		
			
				|  |  | -`l2norm` shown below represent distances or differences. This means, that
 | 
	
		
			
				|  |  | -the more similar the vectors are, the lower the scores will be that are
 | 
	
		
			
				|  |  | -produced by the `l1norm` and `l2norm` functions.
 | 
	
		
			
				|  |  | -Thus, as we need more similar vectors to score higher,
 | 
	
		
			
				|  |  | -we reversed the output from `l1norm` and `l2norm`. Also, to avoid
 | 
	
		
			
				|  |  | -division by 0 when a document vector matches the query exactly,
 | 
	
		
			
				|  |  | -we added `1` in the denominator.
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -For sparse_vector fields, `l1normSparse` calculates L^1^ distance
 | 
	
		
			
				|  |  | +The `dotProductSparse` function calculates dot product
 | 
	
		
			
				|  |  |  between a given query vector and document vectors.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  [source,console]
 | 
	
		
			
				|  |  |  --------------------------------------------------
 | 
	
		
			
				|  |  | -GET my_index/_search
 | 
	
		
			
				|  |  | +GET my_sparse_index/_search
 | 
	
		
			
				|  |  |  {
 | 
	
		
			
				|  |  |    "query": {
 | 
	
		
			
				|  |  |      "script_score": {
 | 
	
	
		
			
				|  | @@ -243,23 +293,27 @@ GET my_index/_search
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |        },
 | 
	
		
			
				|  |  |        "script": {
 | 
	
		
			
				|  |  | -        "source": "1 / (1 + l1normSparse(params.queryVector, doc['my_sparse_vector']))",
 | 
	
		
			
				|  |  | -        "params": {
 | 
	
		
			
				|  |  | -          "queryVector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0}
 | 
	
		
			
				|  |  | +        "source": """
 | 
	
		
			
				|  |  | +          double value = dotProductSparse(params.query_vector, doc['my_sparse_vector']);
 | 
	
		
			
				|  |  | +          return sigmoid(1, Math.E, -value);
 | 
	
		
			
				|  |  | +        """,
 | 
	
		
			
				|  |  | +         "params": {
 | 
	
		
			
				|  |  | +          "query_vector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0}
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |    }
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  --------------------------------------------------
 | 
	
		
			
				|  |  | +// TEST[continued]
 | 
	
		
			
				|  |  | +// TEST[warning:The [sparse_vector] field type is deprecated and will be removed in 8.0.]
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -For dense_vector fields, `l2norm` calculates L^2^ distance
 | 
	
		
			
				|  |  | -(Euclidean distance) between a given query vector and
 | 
	
		
			
				|  |  | -document vectors.
 | 
	
		
			
				|  |  | +The `l1normSparse` function calculates L^1^ distance
 | 
	
		
			
				|  |  | +between a given query vector and document vectors.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  [source,console]
 | 
	
		
			
				|  |  |  --------------------------------------------------
 | 
	
		
			
				|  |  | -GET my_index/_search
 | 
	
		
			
				|  |  | +GET my_sparse_index/_search
 | 
	
		
			
				|  |  |  {
 | 
	
		
			
				|  |  |    "query": {
 | 
	
		
			
				|  |  |      "script_score": {
 | 
	
	
		
			
				|  | @@ -273,22 +327,24 @@ GET my_index/_search
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |        },
 | 
	
		
			
				|  |  |        "script": {
 | 
	
		
			
				|  |  | -        "source": "1 / (1 + l2norm(params.queryVector, doc['my_dense_vector']))",
 | 
	
		
			
				|  |  | +        "source": "1 / (1 + l1normSparse(params.queryVector, doc['my_sparse_vector']))",
 | 
	
		
			
				|  |  |          "params": {
 | 
	
		
			
				|  |  | -          "queryVector": [4, 3.4, -0.2]
 | 
	
		
			
				|  |  | +          "queryVector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0}
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |    }
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  --------------------------------------------------
 | 
	
		
			
				|  |  | +// TEST[continued]
 | 
	
		
			
				|  |  | +// TEST[warning:The [sparse_vector] field type is deprecated and will be removed in 8.0.]
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -Similarly, for sparse_vector fields, `l2normSparse` calculates L^2^ distance
 | 
	
		
			
				|  |  | +The `l2normSparse` function calculates L^2^ distance
 | 
	
		
			
				|  |  |  between a given query vector and document vectors.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  [source,console]
 | 
	
		
			
				|  |  |  --------------------------------------------------
 | 
	
		
			
				|  |  | -GET my_index/_search
 | 
	
		
			
				|  |  | +GET my_sparse_index/_search
 | 
	
		
			
				|  |  |  {
 | 
	
		
			
				|  |  |    "query": {
 | 
	
		
			
				|  |  |      "script_score": {
 | 
	
	
		
			
				|  | @@ -311,15 +367,5 @@ GET my_index/_search
 | 
	
		
			
				|  |  |    }
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  --------------------------------------------------
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -NOTE: If a document doesn't have a value for a vector field on which
 | 
	
		
			
				|  |  | -a vector function is executed, an error will be thrown.
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -You can check if a document has a value for the field `my_vector` by
 | 
	
		
			
				|  |  | -`doc['my_vector'].size() == 0`. Your overall script can look like this:
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -[source,js]
 | 
	
		
			
				|  |  | ---------------------------------------------------
 | 
	
		
			
				|  |  | -"source": "doc['my_vector'].size() == 0 ? 0 : cosineSimilarity(params.queryVector, doc['my_vector'])"
 | 
	
		
			
				|  |  | ---------------------------------------------------
 | 
	
		
			
				|  |  | -// NOTCONSOLE
 | 
	
		
			
				|  |  | +// TEST[continued]
 | 
	
		
			
				|  |  | +// TEST[warning:The [sparse_vector] field type is deprecated and will be removed in 8.0.]
 |