Browse Source

fix: use the correct field name when reading data from multi fields (#84752)

When using a multi-field we need to extract data from the document
using the correct field name. That is the name of the top field.
Here we delegate extraction of the correct name to a method in the
SearchContext that is wrapped by the AggregationContext.

Issue: #82918
Salvatore Campagna 3 years ago
parent
commit
db6c58ed45

+ 5 - 0
benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/AggConstructionContentionBenchmark.java

@@ -351,6 +351,11 @@ public class AggConstructionContentionBenchmark {
             return false;
         }
 
+        @Override
+        public Set<String> sourcePath(String fullName) {
+            return Set.of(fullName);
+        }
+
         @Override
         public void close() {
             List<Releasable> releaseMe = new ArrayList<>(this.releaseMe);

+ 5 - 0
docs/changelog/84752.yaml

@@ -0,0 +1,5 @@
+pr: 84752
+summary: "Fix: use the correct field name when reading data from multi fields"
+area: Aggregations
+type: "bug"
+issues: []

+ 2 - 2
docs/reference/aggregations/bucket/significanttext-aggregation.asciidoc

@@ -30,8 +30,8 @@ that is significant and probably very relevant to their search. 5/10,000,000 vs
 
 ==== Basic use
 
-In the typical use case, the _foreground_ set of interest is a selection of the top-matching search results for a query 
-and the _background_set used for statistical comparisons is the index or indices from which the results were gathered.
+In the typical use case, the _foreground_ set of interest is a selection of the top-matching search results for a query
+and the _background_ set used for statistical comparisons is the index or indices from which the results were gathered.
 
 Example:
 

+ 210 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/470_significant_texts.yml

@@ -0,0 +1,210 @@
+setup:
+  - do:
+      indices.create:
+        index: test
+        body:
+          settings:
+            number_of_replicas: 0
+          mappings:
+            properties:
+              full_text:
+                type: text
+                analyzer: standard
+                fields:
+                  eng:
+                    type: text
+                    store: true
+                    analyzer: stop
+
+  - do:
+      bulk:
+        index: test
+        refresh: true
+        body:
+          - { "index": {} }
+          - { "full_text": "the apple a banana a pear the melon" }
+          - { "index": {} }
+          - { "full_text": "an apple a banana the pear" }
+          - { "index": { } }
+          - { "full_text": "the apple the banana the melon" }
+          - { "index": { } }
+          - { "full_text": "an apple a pear a melon" }
+          - { "index": { } }
+          - { "full_text": "an apple a melon" }
+          - { "index": { } }
+          - { "full_text": "elma muz armut kavun" }
+          - { "index": { } }
+          - { "full_text": "elma muz armut" }
+          - { "index": { } }
+          - { "full_text": "elma armut kavun" }
+
+
+---
+"significant_texts all terms":
+  - skip:
+      version: " - 8.1.99"
+      reason: bug fixed in 8.2.0
+  - do:
+      search:
+        body:
+          query:
+            match_all: {}
+          size: 0
+          aggs:
+            significant_texts:
+              sampler:
+                shard_size: 7
+              aggs:
+                keywords:
+                  significant_text:
+                    field: full_text.eng
+
+  - match: { hits.total.value: 8 }
+  - match: { hits.total.relation: "eq" }
+  - length: { aggregations.significant_texts.keywords.buckets: 4 }
+  - match: { aggregations.significant_texts.keywords.doc_count: 7 }
+  - match: { aggregations.significant_texts.keywords.bg_count: 8 }
+  - match: { aggregations.significant_texts.keywords.buckets.0.key: "apple" }
+  - match: { aggregations.significant_texts.keywords.buckets.0.doc_count: 5 }
+  - match: { aggregations.significant_texts.keywords.buckets.0.bg_count: 5 }
+  - match: { aggregations.significant_texts.keywords.buckets.1.key: "melon" }
+  - match: { aggregations.significant_texts.keywords.buckets.1.doc_count: 4 }
+  - match: { aggregations.significant_texts.keywords.buckets.1.bg_count: 4 }
+  - match: { aggregations.significant_texts.keywords.buckets.2.key: "pear" }
+  - match: { aggregations.significant_texts.keywords.buckets.2.doc_count: 3 }
+  - match: { aggregations.significant_texts.keywords.buckets.2.bg_count: 3 }
+  - match: { aggregations.significant_texts.keywords.buckets.3.key: "banana" }
+  - match: { aggregations.significant_texts.keywords.buckets.3.doc_count: 3 }
+  - match: { aggregations.significant_texts.keywords.buckets.3.bg_count: 3 }
+
+---
+"significant_texts limited size":
+  - skip:
+      version: " - 8.1.99"
+      reason: bug fixed in 8.2.0
+  - do:
+      search:
+        body:
+          query:
+            match_all: {}
+          size: 0
+          aggs:
+            significant_texts:
+              sampler:
+                shard_size: 7
+              aggs:
+                keywords:
+                  significant_text:
+                    field: full_text.eng
+                    size: 2
+
+  - match: { hits.total.value: 8 }
+  - match: { hits.total.relation: "eq" }
+  - length: { aggregations.significant_texts.keywords.buckets: 2 }
+  - match: { aggregations.significant_texts.keywords.doc_count: 7 }
+  - match: { aggregations.significant_texts.keywords.bg_count: 8 }
+  - match: { aggregations.significant_texts.keywords.buckets.0.key: "apple" }
+  - match: { aggregations.significant_texts.keywords.buckets.0.doc_count: 5 }
+  - match: { aggregations.significant_texts.keywords.buckets.0.bg_count: 5 }
+  - match: { aggregations.significant_texts.keywords.buckets.1.key: "melon" }
+  - match: { aggregations.significant_texts.keywords.buckets.1.doc_count: 4 }
+  - match: { aggregations.significant_texts.keywords.buckets.1.bg_count: 4 }
+
+---
+"significant_texts with min_doc_count":
+  - skip:
+      version: " - 8.1.99"
+      reason: bug fixed in 8.2.0
+  - do:
+      search:
+        body:
+          query:
+            match_all: {}
+          size: 0
+          aggs:
+            significant_texts:
+              sampler:
+                shard_size: 7
+              aggs:
+                keywords:
+                  significant_text:
+                    field: full_text.eng
+                    min_doc_count: 5
+
+  - match: { hits.total.value: 8 }
+  - match: { hits.total.relation: "eq" }
+  - length: { aggregations.significant_texts.keywords.buckets: 1 }
+  - match: { aggregations.significant_texts.keywords.doc_count: 7 }
+  - match: { aggregations.significant_texts.keywords.bg_count: 8 }
+  - match: { aggregations.significant_texts.keywords.buckets.0.key: "apple" }
+  - match: { aggregations.significant_texts.keywords.buckets.0.doc_count: 5 }
+  - match: { aggregations.significant_texts.keywords.buckets.0.bg_count: 5 }
+
+---
+"significant_texts with exclude":
+  - skip:
+      version: " - 8.1.99"
+      reason: bug fixed in 8.2.0
+  - do:
+      search:
+        body:
+          query:
+            match_all: {}
+          size: 0
+          aggs:
+            significant_texts:
+              sampler:
+                shard_size: 7
+              aggs:
+                keywords:
+                  significant_text:
+                    field: full_text.eng
+                    exclude: "app.*"
+
+  - match: { hits.total.value: 8 }
+  - match: { hits.total.relation: "eq" }
+  - length: { aggregations.significant_texts.keywords.buckets: 3 }
+  - match: { aggregations.significant_texts.keywords.doc_count: 7 }
+  - match: { aggregations.significant_texts.keywords.bg_count: 8 }
+  - match: { aggregations.significant_texts.keywords.buckets.0.key: "melon" }
+  - match: { aggregations.significant_texts.keywords.buckets.0.doc_count: 4 }
+  - match: { aggregations.significant_texts.keywords.buckets.0.bg_count: 4 }
+  - match: { aggregations.significant_texts.keywords.buckets.1.key: "pear" }
+  - match: { aggregations.significant_texts.keywords.buckets.1.doc_count: 3 }
+  - match: { aggregations.significant_texts.keywords.buckets.1.bg_count: 3 }
+  - match: { aggregations.significant_texts.keywords.buckets.2.key: "banana" }
+  - match: { aggregations.significant_texts.keywords.buckets.2.doc_count: 3 }
+  - match: { aggregations.significant_texts.keywords.buckets.2.bg_count: 3 }
+
+---
+"significant_texts with include":
+  - skip:
+      version: " - 8.1.99"
+      reason: bug fixed in 8.2.0
+  - do:
+      search:
+        body:
+          query:
+            match_all: {}
+          size: 0
+          aggs:
+            significant_texts:
+              sampler:
+                shard_size: 7
+              aggs:
+                keywords:
+                  significant_text:
+                    field: full_text.eng
+                    include: ["melon", "banana"]
+
+  - match: { hits.total.value: 8 }
+  - match: { hits.total.relation: "eq" }
+  - length: { aggregations.significant_texts.keywords.buckets: 2 }
+  - match: { aggregations.significant_texts.keywords.doc_count: 7 }
+  - match: { aggregations.significant_texts.keywords.bg_count: 8 }
+  - match: { aggregations.significant_texts.keywords.buckets.0.key: "melon" }
+  - match: { aggregations.significant_texts.keywords.buckets.0.doc_count: 4 }
+  - match: { aggregations.significant_texts.keywords.buckets.0.bg_count: 4 }
+  - match: { aggregations.significant_texts.keywords.buckets.1.key: "banana" }
+  - match: { aggregations.significant_texts.keywords.buckets.1.doc_count: 3 }
+  - match: { aggregations.significant_texts.keywords.buckets.1.bg_count: 3 }

+ 6 - 2
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java

@@ -47,6 +47,7 @@ import org.elasticsearch.search.lookup.SourceLookup;
 import org.elasticsearch.search.profile.Timer;
 
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -218,13 +219,16 @@ public class SignificantTextAggregatorFactory extends AggregatorFactory {
      */
     private CollectorSource createCollectorSource() {
         Analyzer analyzer = context.getIndexAnalyzer(f -> { throw new IllegalArgumentException("No analyzer configured for field " + f); });
+        String[] fieldNames = Arrays.stream(this.sourceFieldNames)
+            .flatMap(sourceFieldName -> context.sourcePath(sourceFieldName).stream())
+            .toArray(String[]::new);
         if (context.profiling()) {
             return new ProfilingSignificantTextCollectorSource(
                 context.lookup().source(),
                 context.bigArrays(),
                 fieldType,
                 analyzer,
-                sourceFieldNames,
+                fieldNames,
                 filterDuplicateText
             );
         }
@@ -233,7 +237,7 @@ public class SignificantTextAggregatorFactory extends AggregatorFactory {
             context.bigArrays(),
             fieldType,
             analyzer,
-            sourceFieldNames,
+            fieldNames,
             filterDuplicateText
         );
     }

+ 7 - 0
server/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java

@@ -292,6 +292,8 @@ public abstract class AggregationContext implements Releasable {
      */
     public abstract boolean isInSortOrderExecutionRequired();
 
+    public abstract Set<String> sourcePath(String fullName);
+
     /**
      * Implementation of {@linkplain AggregationContext} for production usage
      * that wraps our ubiquitous {@link SearchExecutionContext} and anything else
@@ -549,6 +551,11 @@ public abstract class AggregationContext implements Releasable {
             return inSortOrderExecutionRequired;
         }
 
+        @Override
+        public Set<String> sourcePath(String fullName) {
+            return context.sourcePath(fullName);
+        }
+
         @Override
         public void close() {
             /*

+ 5 - 0
test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java

@@ -530,6 +530,11 @@ public abstract class MapperServiceTestCase extends ESTestCase {
                 return false;
             }
 
+            @Override
+            public Set<String> sourcePath(String fullName) {
+                return Set.of(fullName);
+            }
+
             @Override
             public void close() {
                 throw new UnsupportedOperationException();