Browse Source

Update Semantic Query To Handle Zero Size Responses (#116277) (#116978)

Mike Pellegrini 11 months ago
parent
commit
9c64fabda7

+ 6 - 0
docs/changelog/116277.yaml

@@ -0,0 +1,6 @@
+pr: 116277
+summary: Update Semantic Query To Handle Zero Size Responses
+area: Vector Search
+type: bug
+issues:
+  - 116083

+ 2 - 1
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java

@@ -36,7 +36,8 @@ public class InferenceFeatures implements FeatureSpecification {
         return Set.of(
             SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX,
             SemanticTextFieldMapper.SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX,
-            SemanticTextFieldMapper.SEMANTIC_TEXT_DELETE_FIX
+            SemanticTextFieldMapper.SEMANTIC_TEXT_DELETE_FIX,
+            SemanticTextFieldMapper.SEMANTIC_TEXT_ZERO_SIZE_FIX
         );
     }
 }

+ 10 - 2
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

@@ -69,6 +69,7 @@ import java.util.Optional;
 import java.util.Set;
 import java.util.function.Function;
 
+import static org.elasticsearch.search.SearchService.DEFAULT_SIZE;
 import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_EMBEDDINGS_FIELD;
 import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_TEXT_FIELD;
 import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKS_FIELD;
@@ -91,6 +92,7 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
     public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");
     public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix");
     public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix");
+    public static final NodeFeature SEMANTIC_TEXT_ZERO_SIZE_FIX = new NodeFeature("semantic_text.zero_size_fix");
 
     public static final String CONTENT_TYPE = "semantic_text";
     public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;
@@ -507,7 +509,7 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
             return fieldInfos.fieldInfo(getEmbeddingsFieldName(name())) != null;
         }
 
-        public QueryBuilder semanticQuery(InferenceResults inferenceResults, float boost, String queryName) {
+        public QueryBuilder semanticQuery(InferenceResults inferenceResults, Integer requestSize, float boost, String queryName) {
             String nestedFieldPath = getChunksFieldName(name());
             String inferenceResultsFieldName = getEmbeddingsFieldName(name());
             QueryBuilder childQueryBuilder;
@@ -551,7 +553,13 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
                             );
                         }
 
-                        yield new KnnVectorQueryBuilder(inferenceResultsFieldName, inference, null, null, null);
+                        Integer k = requestSize;
+                        if (k != null) {
+                            // Ensure that k is at least the default size so that aggregations work when size is set to 0 in the request
+                            k = Math.max(k, DEFAULT_SIZE);
+                        }
+
+                        yield new KnnVectorQueryBuilder(inferenceResultsFieldName, inference, k, null, null);
                     }
                     default -> throw new IllegalStateException(
                         "Field ["

+ 1 - 1
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java

@@ -166,7 +166,7 @@ public class SemanticQueryBuilder extends AbstractQueryBuilder<SemanticQueryBuil
                 );
             }
 
-            return semanticTextFieldType.semanticQuery(inferenceResults, boost(), queryName());
+            return semanticTextFieldType.semanticQuery(inferenceResults, searchExecutionContext.requestSize(), boost(), queryName());
         } else {
             throw new IllegalArgumentException(
                 "Field [" + fieldName + "] of type [" + fieldType.typeName() + "] does not support " + NAME + " queries"

+ 114 - 0
x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml

@@ -878,3 +878,117 @@ setup:
 
   - match: { hits.total.value: 1 }
   - match: { hits.hits.0._id: "doc_1" }
+
+---
+"Query using a sparse embedding model with size set to zero":
+  - requires:
+      cluster_features: "semantic_text.zero_size_fix"
+      reason: zero size fix added in 8.16.1 & 8.15.5
+
+  - do:
+      indices.create:
+        index: test-sparse-index-with-agg-id
+        body:
+          mappings:
+            properties:
+              inference_field:
+                type: semantic_text
+                inference_id: sparse-inference-id
+              non_inference_field:
+                type: text
+              agg_id:
+                type: keyword
+
+  - do:
+      index:
+        index: test-sparse-index-with-agg-id
+        id: doc_1
+        body:
+          inference_field: "inference test"
+          agg_id: "doc_1"
+
+  - do:
+      index:
+        index: test-sparse-index-with-agg-id
+        id: doc_2
+        body:
+          non_inference_field: "non-inference test"
+          agg_id: "doc_2"
+        refresh: true
+
+  - do:
+      search:
+        index: test-sparse-index-with-agg-id
+        body:
+          size: 0
+          query:
+            semantic:
+              field: "inference_field"
+              query: "inference test"
+          aggs:
+            agg_ids:
+              terms:
+                field: agg_id
+
+  - match: { hits.total.value: 1 }
+  - length: { hits.hits: 0 }
+  - length: { aggregations.agg_ids.buckets: 1 }
+  - match: { aggregations.agg_ids.buckets.0.key: "doc_1" }
+  - match: { aggregations.agg_ids.buckets.0.doc_count: 1 }
+
+---
+"Query using a dense embedding model with size set to zero":
+  - requires:
+      cluster_features: "semantic_text.zero_size_fix"
+      reason: zero size fix added in 8.16.1 & 8.15.5
+
+  - do:
+      indices.create:
+        index: test-dense-index-with-agg-id
+        body:
+          mappings:
+            properties:
+              inference_field:
+                type: semantic_text
+                inference_id: dense-inference-id
+              non_inference_field:
+                type: text
+              agg_id:
+                type: keyword
+
+  - do:
+      index:
+        index: test-dense-index-with-agg-id
+        id: doc_1
+        body:
+          inference_field: "inference test"
+          agg_id: "doc_1"
+
+  - do:
+      index:
+        index: test-dense-index-with-agg-id
+        id: doc_2
+        body:
+          non_inference_field: "non-inference test"
+          agg_id: "doc_2"
+        refresh: true
+
+  - do:
+      search:
+        index: test-dense-index-with-agg-id
+        body:
+          size: 0
+          query:
+            semantic:
+              field: "inference_field"
+              query: "inference test"
+          aggs:
+            agg_ids:
+              terms:
+                field: agg_id
+
+  - match: { hits.total.value: 1 }
+  - length: { hits.hits: 0 }
+  - length: { aggregations.agg_ids.buckets: 1 }
+  - match: { aggregations.agg_ids.buckets.0.key: "doc_1" }
+  - match: { aggregations.agg_ids.buckets.0.doc_count: 1 }