Browse Source

Add more test coverage for nested searches over flat vector indices (#130263)

PR https://github.com/elastic/elasticsearch/pull/130251 made me realize
we were missing some important coverage.

This adds nested vector query (and top level knn) tests for flat indices
in our yaml tests.
Benjamin Trent 3 months ago
parent
commit
50c2cd3e47

+ 150 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_bbq_flat.yml

@@ -350,3 +350,153 @@ setup:
         index: bbq_flat
 
   - match: { bbq_flat.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 }
+---
+"Test nested queries":
+  - do:
+      indices.create:
+        index: bbq_flat_nested
+        body:
+          settings:
+            index:
+              number_of_shards: 1
+          mappings:
+            properties:
+              name:
+                type: keyword
+              nested:
+                type: nested
+                properties:
+                  paragraph_id:
+                    type: keyword
+                  vector:
+                    type: dense_vector
+                    dims: 64
+                    index: true
+                    similarity: max_inner_product
+                    index_options:
+                      type: bbq_flat
+
+  - do:
+      index:
+        index: bbq_flat_nested
+        id: "1"
+        body:
+          nested:
+            - paragraph_id: "1"
+              vector: [ 0.077,  0.32 , -0.205,  0.63 ,  0.032,  0.201,  0.167, -0.313,
+                        0.176,  0.531, -0.375,  0.334, -0.046,  0.078, -0.349,  0.272,
+                        0.307, -0.083,  0.504,  0.255, -0.404,  0.289, -0.226, -0.132,
+                        -0.216,  0.49 ,  0.039,  0.507, -0.307,  0.107,  0.09 , -0.265,
+                        -0.285,  0.336, -0.272,  0.369, -0.282,  0.086, -0.132,  0.475,
+                        -0.224,  0.203,  0.439,  0.064,  0.246, -0.396,  0.297,  0.242,
+                        -0.028,  0.321, -0.022, -0.009, -0.001 , 0.031, -0.533,   0.45,
+                        -0.028,  0.321, -0.022, -0.009, -0.001 , 0.031, -0.533,   0.45 ]
+            - paragraph_id: "2"
+              vector: [ 0.7,  0.2 , 0.205,  0.63 ,  0.032,  0.201,  0.167, 0.313,
+                        0.176,  0.1, 0.375,  0.334, 0.046,  0.078, 0.349,  0.272,
+                        0.307, 0.083,  0.504,  0.255, 0.404,  0.289, 0.226, 0.132,
+                        0.216,  0.49 ,  0.039,  0.507, -0.307,  0.107,  0.09 , 0.265,
+                        0.285,  0.336, 0.272,  0.369, -0.282,  0.086, 0.132,  0.475,
+                        0.224,  0.203,  0.439,  0.064,  0.246, 0.396,  0.297,  0.242,
+                        0.224,  0.203,  0.439,  0.064,  0.246, 0.396,  0.297,  0.242,
+                        0.028,  0.321, 0.022, 0.009, 0.001 ,   0.031, -0.533,   0.45]
+  - do:
+      index:
+        index: bbq_flat_nested
+        id: "2"
+        body:
+          nested:
+            - paragraph_id: 0
+              vector: [ 0.196,  0.514,  0.039,  0.555, -0.042,  0.242,  0.463, -0.348,
+                        -0.08 ,  0.442, -0.067, -0.05 , -0.001,  0.298, -0.377,  0.048,
+                        0.307,  0.159,  0.278,  0.119, -0.057,  0.333, -0.289, -0.438,
+                        -0.014,  0.361, -0.169,  0.292, -0.229,  0.123,  0.031, -0.138,
+                        -0.139,  0.315, -0.216,  0.322, -0.445, -0.059,  0.071,  0.429,
+                        -0.602, -0.142,  0.11 ,  0.192,  0.259, -0.241,  0.181, -0.166,
+                        0.082,  0.107, -0.05 ,  0.155,  0.011,  0.161, -0.486,  0.569,
+                        -0.489,  0.901,  0.208,  0.011, -0.209, -0.153, -0.27, -0.013 ]
+            - paragraph_id: 2
+              vector: [ 0.196,  0.514,  0.039,  0.555, 0.042,  0.242,  0.463, -0.348,
+                        -0.08 ,  0.442, -0.067, -0.05 , -0.001,  0.298, -0.377,  0.048,
+                        0.307,  0.159,  0.278,  0.119, -0.057,  0.333, -0.289, 0.438,
+                        -0.014,  0.361, -0.169,  0.292, -0.229,  0.123,  0.031, 0.138,
+                        -0.139,  0.315, -0.216,  0.322, -0.445, -0.059,  0.071,  0.429,
+                        -0.602, 0.142,  0.11 ,  0.192,  0.259, -0.241,  0.181, 0.166,
+                        0.082,  0.107, -0.05 ,  0.155,  0.011,  0.161, -0.486,  0.569,
+                        -0.489,  0.901,  0.208,  0.011, -0.209, -0.153, -0.27, 0.013 ]
+            - paragraph_id: 3
+              vector: [ 0.196,  0.514,  0.039,  0.555, 0.042,  0.242,  0.463, -0.348,
+                        0.08 ,  0.442, -0.067, -0.05 , 0.001,  0.298, -0.377,  0.048,
+                        0.307,  0.159,  0.278,  0.119, 0.057,  0.333, -0.289, -0.438,
+                        -0.014,  0.361, -0.169,  0.292, 0.229,  0.123,  0.031, -0.138,
+                        -0.139,  0.315, -0.216,  0.322, 0.445, -0.059,  0.071,  0.429,
+                        -0.602, -0.142,  0.11 ,  0.192,  0.259, -0.241,  0.181, -0.166,
+                        0.082,  0.107, -0.05 ,  0.155,  0.011,  0.161, -0.486,  0.569,
+                        -0.489,  0.901,  0.208,  0.011, 0.209, -0.153, -0.27, -0.013 ]
+
+  - do:
+      index:
+        index: bbq_flat_nested
+        id: "3"
+        body:
+          nested:
+            - paragraph_id: 0
+              vector: [ 0.139,  0.178, -0.117,  0.399,  0.014, -0.139,  0.347, -0.33 ,
+                        0.139,  0.34 , -0.052, -0.052, -0.249,  0.327, -0.288,  0.049,
+                        0.464,  0.338,  0.516,  0.247, -0.104,  0.259, -0.209, -0.246,
+                        -0.11 ,  0.323,  0.091,  0.442, -0.254,  0.195, -0.109, -0.058,
+                        -0.279,  0.402, -0.107,  0.308, -0.273,  0.019,  0.082,  0.399,
+                        -0.658, -0.03 ,  0.276,  0.041,  0.187, -0.331,  0.165,  0.017,
+                        0.171, -0.203, -0.198,  0.115, -0.007,  0.337, -0.444,  0.615,
+                        -0.657,  1.285,  0.2  , -0.062,  0.038,  0.089, -0.068, -0.058 ]
+
+  - do:
+      indices.flush:
+        index: bbq_flat_nested
+
+  - do:
+      indices.forcemerge:
+        index: bbq_flat_nested
+        max_num_segments: 1
+
+  - do:
+      search:
+        index: bbq_flat_nested
+        body:
+          query:
+            nested:
+              path: nested
+              query:
+                knn:
+                  field: nested.vector
+                  query_vector: [0.128,  0.067, -0.08 ,  0.395, -0.11 , -0.259,  0.473, -0.393,
+                                 0.292,  0.571, -0.491,  0.444, -0.288,  0.198, -0.343,  0.015,
+                                 0.232,  0.088,  0.228,  0.151, -0.136,  0.236, -0.273, -0.259,
+                                 -0.217,  0.359, -0.207,  0.352, -0.142,  0.192, -0.061, -0.17 ,
+                                 -0.343,  0.189, -0.221,  0.32 , -0.301, -0.1  ,  0.005,  0.232,
+                                 -0.344,  0.136,  0.252,  0.157, -0.13 , -0.244,  0.193, -0.034,
+                                 -0.12 , -0.193, -0.102,  0.252, -0.185, -0.167, -0.575,  0.582,
+                                 -0.426,  0.983,  0.212,  0.204,  0.03 , -0.276, -0.425, -0.158]
+                  num_candidates: 3
+                  k: 2
+
+  - match: {hits.hits.0._id: "3"}
+
+  - do:
+      search:
+        index: bbq_flat_nested
+        body:
+          knn:
+            field: nested.vector
+            query_vector: [0.128,  0.067, -0.08 ,  0.395, -0.11 , -0.259,  0.473, -0.393,
+                           0.292,  0.571, -0.491,  0.444, -0.288,  0.198, -0.343,  0.015,
+                           0.232,  0.088,  0.228,  0.151, -0.136,  0.236, -0.273, -0.259,
+                           -0.217,  0.359, -0.207,  0.352, -0.142,  0.192, -0.061, -0.17 ,
+                           -0.343,  0.189, -0.221,  0.32 , -0.301, -0.1  ,  0.005,  0.232,
+                           -0.344,  0.136,  0.252,  0.157, -0.13 , -0.244,  0.193, -0.034,
+                           -0.12 , -0.193, -0.102,  0.252, -0.185, -0.167, -0.575,  0.582,
+                           -0.426,  0.983,  0.212,  0.204,  0.03 , -0.276, -0.425, -0.158]
+            num_candidates: 3
+            k: 2
+
+  - match: {hits.hits.0._id: "3"}

+ 100 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_flat.yml

@@ -327,3 +327,103 @@ setup:
                 index_options:
                   type: flat
                   m: 42
+---
+"Nested flat search":
+  - do:
+      indices.create:
+        index: flat_nested
+        body:
+          settings:
+            index:
+              number_of_shards: 1
+          mappings:
+            properties:
+              name:
+                type: keyword
+              nested:
+                type: nested
+                properties:
+                  paragraph_id:
+                    type: keyword
+                  vector:
+                    type: dense_vector
+                    dims: 5
+                    index: true
+                    similarity: l2_norm
+                    index_options:
+                      type: flat
+  - do:
+      index:
+        index: flat_nested
+        id: "1"
+        body:
+          name: cow.jpg
+          nested:
+            - paragraph_id: 0
+              vector: [230.0, 300.33, -34.8988, 15.555, -200.0]
+            - paragraph_id: 1
+              vector: [240.0, 300, -3, 1, -20]
+
+  - do:
+      index:
+        index: flat_nested
+        id: "2"
+        body:
+          name: moose.jpg
+          nested:
+            - paragraph_id: 0
+              vector: [-0.5, 100.0, -13, 14.8, -156.0]
+            - paragraph_id: 2
+              vector: [0, 100.0, 0, 14.8, -156.0]
+            - paragraph_id: 3
+              vector: [0, 1.0, 0, 1.8, -15.0]
+
+  - do:
+      index:
+        index: flat_nested
+        id: "3"
+        body:
+          name: rabbit.jpg
+          nested:
+            - paragraph_id: 0
+              vector: [0.5, 111.3, -13.0, 14.8, -156.0]
+
+  - do:
+      indices.refresh: {}
+  - do:
+      search:
+        index: flat_nested
+        body:
+          fields: [ "name" ]
+          query:
+            nested:
+              path: nested
+              query:
+                knn:
+                  field: nested.vector
+                  query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ]
+                  num_candidates: 3
+                  k: 2
+
+  - match: { hits.hits.0._id: "2" }
+  - match: { hits.hits.0.fields.name.0: "moose.jpg" }
+
+  - match: { hits.hits.1._id: "3" }
+  - match: { hits.hits.1.fields.name.0: "rabbit.jpg" }
+
+  - do:
+      search:
+        index: flat_nested
+        body:
+          fields: [ "name" ]
+          knn:
+            field: nested.vector
+            query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ]
+            num_candidates: 3
+            k: 2
+
+  - match: { hits.hits.0._id: "2" }
+  - match: { hits.hits.0.fields.name.0: "moose.jpg" }
+
+  - match: { hits.hits.1._id: "3" }
+  - match: { hits.hits.1.fields.name.0: "rabbit.jpg" }

+ 101 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int4_flat.yml

@@ -506,3 +506,104 @@ setup:
         index: int4_flat
 
   - not_exists: int4_flat.mappings.properties.vector.index_options.rescore_vector
+---
+"Nested flat search":
+  - do:
+      indices.create:
+        index: int4_flat_nested
+        body:
+          settings:
+            index:
+              number_of_shards: 1
+          mappings:
+            properties:
+              name:
+                type: keyword
+              nested:
+                type: nested
+                properties:
+                  paragraph_id:
+                    type: keyword
+                  vector:
+                    type: dense_vector
+                    dims: 4
+                    index: true
+                    similarity: l2_norm
+                    index_options:
+                      type: int4_flat
+  - do:
+      index:
+        index: int4_flat_nested
+        id: "1"
+        body:
+          name: cow.jpg
+          nested:
+            - paragraph_id: 0
+              vector: [230.0, 300.33, -34.8988, 15.555 ]
+            - paragraph_id: 1
+              vector: [240.0, 300, -3, 1 ]
+
+  - do:
+      index:
+        index: int4_flat_nested
+        id: "2"
+        body:
+          name: moose.jpg
+          nested:
+            - paragraph_id: 0
+              vector: [-0.5, 100.0, -13, 14.8]
+            - paragraph_id: 2
+              vector: [0, 100.0, 0, 14.8]
+            - paragraph_id: 3
+              vector: [0, 1.0, 0, 1.8]
+
+  - do:
+      index:
+        index: int4_flat_nested
+        id: "3"
+        body:
+          name: rabbit.jpg
+          nested:
+            - paragraph_id: 0
+              vector: [0.5, 111.3, -13.0, 14.8]
+
+  - do:
+      indices.refresh: {}
+  - do:
+      search:
+        index: int4_flat_nested
+        body:
+          fields: [ "name" ]
+          query:
+            nested:
+              path: nested
+              query:
+                knn:
+                  field: nested.vector
+                  query_vector: [ -0.5, 90.0, -10, 14.8]
+                  num_candidates: 3
+                  k: 2
+
+  - match: { hits.hits.0._id: "2" }
+  - match: { hits.hits.0.fields.name.0: "moose.jpg" }
+
+  - match: { hits.hits.1._id: "3" }
+  - match: { hits.hits.1.fields.name.0: "rabbit.jpg" }
+
+  - do:
+      search:
+        index: int4_flat_nested
+        body:
+          fields: [ "name" ]
+          knn:
+            field: nested.vector
+            query_vector: [ -0.5, 90.0, -10, 14.8]
+            num_candidates: 3
+            k: 2
+
+  - match: { hits.hits.0._id: "2" }
+  - match: { hits.hits.0.fields.name.0: "moose.jpg" }
+
+  - match: { hits.hits.1._id: "3" }
+  - match: { hits.hits.1.fields.name.0: "rabbit.jpg" }
+

+ 101 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int8_flat.yml

@@ -447,3 +447,104 @@ setup:
         index: int8_flat
 
   - not_exists: int8_flat.mappings.properties.vector.index_options.rescore_vector
+---
+"Nested flat search":
+  - do:
+      indices.create:
+        index: int8_flat_nested
+        body:
+          settings:
+            index:
+              number_of_shards: 1
+          mappings:
+            properties:
+              name:
+                type: keyword
+              nested:
+                type: nested
+                properties:
+                  paragraph_id:
+                    type: keyword
+                  vector:
+                    type: dense_vector
+                    dims: 5
+                    index: true
+                    similarity: l2_norm
+                    index_options:
+                      type: int8_flat
+  - do:
+      index:
+        index: int8_flat_nested
+        id: "1"
+        body:
+          name: cow.jpg
+          nested:
+            - paragraph_id: 0
+              vector: [230.0, 300.33, -34.8988, 15.555, -200.0]
+            - paragraph_id: 1
+              vector: [240.0, 300, -3, 1, -20]
+
+  - do:
+      index:
+        index: int8_flat_nested
+        id: "2"
+        body:
+          name: moose.jpg
+          nested:
+            - paragraph_id: 0
+              vector: [-0.5, 100.0, -13, 14.8, -156.0]
+            - paragraph_id: 2
+              vector: [0, 100.0, 0, 14.8, -156.0]
+            - paragraph_id: 3
+              vector: [0, 1.0, 0, 1.8, -15.0]
+
+  - do:
+      index:
+        index: int8_flat_nested
+        id: "3"
+        body:
+          name: rabbit.jpg
+          nested:
+            - paragraph_id: 0
+              vector: [0.5, 111.3, -13.0, 14.8, -156.0]
+
+  - do:
+      indices.refresh: {}
+  - do:
+      search:
+        index: int8_flat_nested
+        body:
+          fields: [ "name" ]
+          query:
+            nested:
+              path: nested
+              query:
+                knn:
+                  field: nested.vector
+                  query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ]
+                  num_candidates: 3
+                  k: 2
+
+  - match: { hits.hits.0._id: "2" }
+  - match: { hits.hits.0.fields.name.0: "moose.jpg" }
+
+  - match: { hits.hits.1._id: "3" }
+  - match: { hits.hits.1.fields.name.0: "rabbit.jpg" }
+
+  - do:
+      search:
+        index: int8_flat_nested
+        body:
+          fields: [ "name" ]
+          knn:
+            field: nested.vector
+            query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ]
+            num_candidates: 3
+            k: 2
+
+  - match: { hits.hits.0._id: "2" }
+  - match: { hits.hits.0.fields.name.0: "moose.jpg" }
+
+  - match: { hits.hits.1._id: "3" }
+  - match: { hits.hits.1.fields.name.0: "rabbit.jpg" }
+

+ 98 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/45_knn_search_bit_flat.yml

@@ -273,3 +273,101 @@ setup:
   - match: { hits.hits.0._score: $knn_score0 }
   - match: { hits.hits.1._score: $knn_score1 }
   - match: { hits.hits.2._score: $knn_score2 }
+---
+"Nested flat search":
+  - do:
+      indices.create:
+        index: flat_nested
+        body:
+          settings:
+            index:
+              number_of_shards: 1
+          mappings:
+            properties:
+              name:
+                type: keyword
+              nested:
+                type: nested
+                properties:
+                  paragraph_id:
+                    type: keyword
+                  vector:
+                    type: dense_vector
+                    element_type: bit
+                    dims: 40
+                    index: true
+                    similarity: l2_norm
+                    index_options:
+                      type: flat
+  - do:
+      index:
+        index: flat_nested
+        id: "1"
+        body:
+          name: cow.jpg
+          nested:
+            - paragraph_id: 0
+              vector: [-26, 44, -34, 15, 56]
+            - paragraph_id: 1
+              vector: [-26, 44, -3, 1, -20]
+
+  - do:
+      index:
+        index: flat_nested
+        id: "2"
+        body:
+          name: moose.jpg
+          nested:
+            - paragraph_id: 0
+              vector: [0, 100, -13, 14, 100]
+            - paragraph_id: 2
+              vector: [0, 100, 0, 14, 100]
+            - paragraph_id: 3
+              vector: [0, 1, 0, 1, -15]
+
+  - do:
+      index:
+        index: flat_nested
+        id: "3"
+        body:
+          name: rabbit.jpg
+          nested:
+            - paragraph_id: 0
+              vector: [0, 111, 13, 14, -6]
+
+  - do:
+      indices.refresh: {}
+  - do:
+      search:
+        index: flat_nested
+        body:
+          fields: [ "name" ]
+          query:
+            nested:
+              path: nested
+              query:
+                knn:
+                  field: nested.vector
+                  query_vector: [ 0, 90, -10, 14, 100 ]
+                  num_candidates: 3
+                  k: 2
+
+  - match: { hits.hits.0._id: "2" }
+  - match: { hits.hits.0.fields.name.0: "moose.jpg" }
+
+  - match: { hits.hits.1._id: "3" }
+  - match: { hits.hits.1.fields.name.0: "rabbit.jpg" }
+
+  - do:
+      search:
+        index: flat_nested
+        body:
+          fields: [ "name" ]
+          knn:
+            field: nested.vector
+            query_vector: [ 0, 90, 10, 14, -4 ]
+            num_candidates: 3
+            k: 2
+
+  - match: { hits.hits.0._id: "3" }
+  - match: { hits.hits.1._id: "2" }

+ 94 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/45_knn_search_byte.yml

@@ -310,4 +310,98 @@ setup:
   - match: { hits.hits.0._score: $knn_score0 }
   - match: { hits.hits.1._score: $knn_score1 }
   - match: { hits.hits.2._score: $knn_score2 }
+---
+"Nested flat search":
+  - do:
+      indices.create:
+        index: flat_nested
+        body:
+          settings:
+            index:
+              number_of_shards: 1
+          mappings:
+            properties:
+              name:
+                type: keyword
+              nested:
+                type: nested
+                properties:
+                  paragraph_id:
+                    type: keyword
+                  vector:
+                    type: dense_vector
+                    element_type: byte
+                    dims: 5
+                    index: true
+                    similarity: l2_norm
+                    index_options:
+                      type: flat
+  - do:
+      index:
+        index: flat_nested
+        id: "1"
+        body:
+          name: cow.jpg
+          nested:
+            - paragraph_id: 0
+              vector: [-26, 44, -34, 15, 56]
+            - paragraph_id: 1
+              vector: [-26, 44, -3, 1, -20]
+
+  - do:
+      index:
+        index: flat_nested
+        id: "2"
+        body:
+          name: moose.jpg
+          nested:
+            - paragraph_id: 0
+              vector: [0, 100, -13, 14, 100]
+            - paragraph_id: 2
+              vector: [0, 100, 0, 14, 100]
+            - paragraph_id: 3
+              vector: [0, 1, 0, 1, -15]
+
+  - do:
+      index:
+        index: flat_nested
+        id: "3"
+        body:
+          name: rabbit.jpg
+          nested:
+            - paragraph_id: 0
+              vector: [0, 111, 13, 14, -6]
+
+  - do:
+      indices.refresh: {}
+  - do:
+      search:
+        index: flat_nested
+        body:
+          fields: [ "name" ]
+          query:
+            nested:
+              path: nested
+              query:
+                knn:
+                  field: nested.vector
+                  query_vector: [ 0, 90, -10, 14, 100 ]
+                  num_candidates: 3
+                  k: 2
+
+  - match: { hits.hits.0._id: "2" }
+  - match: { hits.hits.1._id: "1" }
+
+  - do:
+      search:
+        index: flat_nested
+        body:
+          fields: [ "name" ]
+          knn:
+            field: nested.vector
+            query_vector: [ 0, 90, 10, 14, -4 ]
+            num_candidates: 3
+            k: 2
 
+  - match: { hits.hits.0._id: "3" }
+  - match: { hits.hits.1._id: "1" }