Browse Source

Fix bug where fvh fragments could be loaded from wrong doc (#65641)

This PR fixes a regression where fvh fragments could be loaded from the wrong
document _source.

Some `FragmentsBuilder` implementations contain a `SourceLookup` to load from
_source. The lookup should be positioned to load from the current hit document.
However, since `FragmentsBuilder` are cached and shared across hits, the lookup
is never updated to load from the new documents. This means we accidentally
load _source from a different document.

The regression was introduced in #60179, which started storing `SourceLookup`
on `FragmentsBuilder`.

Fixes #65533.
Julie Tibshirani 4 years ago
parent
commit
ddf1f4cdb8

+ 95 - 26
rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/20_fvh.yml

@@ -1,23 +1,42 @@
 setup:
   - do:
       indices.create:
-          index: test
-          body:
-            mappings:
-              "properties":
-                "title":
-                   "type": "text"
-                   "term_vector": "with_positions_offsets"
-                "description":
-                   "type": "text"
-                   "term_vector": "with_positions_offsets"
+        index: test
+        body:
+          mappings:
+            "properties":
+              "id":
+                "type": "integer"
+              "title":
+                "type": "text"
+                "term_vector": "with_positions_offsets"
+              "description":
+                "type": "text"
+                "term_vector": "with_positions_offsets"
+              "nested":
+                "type": "nested"
+                "properties":
+                  "title":
+                    "type": "text"
+                    "term_vector": "with_positions_offsets"
+  - do:
+      index:
+        index: test
+        body:
+          id: 1
+          "title" : "The quick brown fox is brown"
+          "description" : "The quick pink panther is pink"
+
   - do:
       index:
         index: test
-        id:    1
         body:
-            "title" : "The quick brown fox is brown"
-            "description" : "The quick pink panther is pink"
+          id: 2
+          "title" : "The quick blue fox is blue"
+          "nested":
+            - "title": "purple octopus"
+            - "title": "purple fish"
+
   - do:
       indices.refresh: {}
 
@@ -27,19 +46,69 @@ setup:
       search:
         rest_total_hits_as_int: true
         body:
-         highlight:
-          type: fvh
-          fields:
-            description:
-              type: fvh
-              highlight_query:
-                prefix:
-                  description: br
-            title:
-              type: fvh
-              highlight_query:
-                prefix:
-                  title: br
+          highlight:
+            type: fvh
+            fields:
+              description:
+                type: fvh
+                highlight_query:
+                  prefix:
+                    description: br
+              title:
+                type: fvh
+                highlight_query:
+                  prefix:
+                    title: br
 
   - match: {hits.hits.0.highlight.title.0: "The quick <em>brown</em> fox is <em>brown</em>"}
   - is_false: hits.hits.0.highlight.description
+
+---
+"Highlight multiple documents":
+  - skip:
+      version: " - 7.99.99"
+      reason: Bug fix not yet backported
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            match:
+              title: fox
+          sort: ["id"]
+          highlight:
+            type: fvh
+            fields:
+              title:
+                type: fvh
+
+  - match: {hits.hits.0.highlight.title.0: "The quick brown <em>fox</em> is brown"}
+  - is_false: hits.hits.0.highlight.description
+  - match: {hits.hits.1.highlight.title.0: "The quick blue <em>fox</em> is blue"}
+  - is_false: hits.hits.1.highlight.description
+
+---
+"Highlight multiple nested documents":
+  - skip:
+      version: " - 7.99.99"
+      reason: Bug fix not yet backported
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query:
+            nested:
+              path: nested
+              query:
+                match:
+                  nested.title: purple
+              inner_hits:
+                name: nested_hits
+                highlight:
+                  type: fvh
+                  fields:
+                    nested.title:
+                      type: fvh
+
+  - match: {hits.hits.0.inner_hits.nested_hits.hits.hits.0.highlight.nested\\.title.0: "<em>purple</em> octopus"}
+  - match: {hits.hits.0.inner_hits.nested_hits.hits.hits.1.highlight.nested\\.title.0: "<em>purple</em> fish"}

+ 43 - 35
server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java

@@ -41,6 +41,7 @@ import org.elasticsearch.index.mapper.TextSearchInfo;
 import org.elasticsearch.search.fetch.FetchSubPhase;
 import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field;
 import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.FieldOptions;
+import org.elasticsearch.search.lookup.SourceLookup;
 
 import java.io.IOException;
 import java.text.BreakIterator;
@@ -48,6 +49,7 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
+import java.util.function.Function;
 
 public class FastVectorHighlighter implements Highlighter {
     private static final BoundaryScanner DEFAULT_SIMPLE_BOUNDARY_SCANNER = new SimpleBoundaryScanner();
@@ -89,42 +91,16 @@ public class FastVectorHighlighter implements Highlighter {
         FieldHighlightEntry entry = cache.fields.get(fieldType);
         if (entry == null) {
             FragListBuilder fragListBuilder;
-            BaseFragmentsBuilder fragmentsBuilder;
-
-            final BoundaryScanner boundaryScanner = getBoundaryScanner(field);
             if (field.fieldOptions().numberOfFragments() == 0) {
                 fragListBuilder = new SingleFragListBuilder();
-
-                if (!forceSource && fieldType.isStored()) {
-                    fragmentsBuilder = new SimpleFragmentsBuilder(fieldType, fixBrokenAnalysis, field.fieldOptions().preTags(),
-                        field.fieldOptions().postTags(), boundaryScanner);
-                } else {
-                    fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldType, fixBrokenAnalysis, hitContext.sourceLookup(),
-                        field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
-                }
             } else {
                 fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ?
                     new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset());
-                if (field.fieldOptions().scoreOrdered()) {
-                    if (!forceSource && fieldType.isStored()) {
-                        fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.fieldOptions().preTags(),
-                            field.fieldOptions().postTags(), boundaryScanner);
-                    } else {
-                        fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(fieldType, fixBrokenAnalysis, hitContext.sourceLookup(),
-                            field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
-                    }
-                } else {
-                    if (!forceSource && fieldType.isStored()) {
-                        fragmentsBuilder = new SimpleFragmentsBuilder(fieldType, fixBrokenAnalysis, field.fieldOptions().preTags(),
-                            field.fieldOptions().postTags(), boundaryScanner);
-                    } else {
-                        fragmentsBuilder =
-                            new SourceSimpleFragmentsBuilder(fieldType, fixBrokenAnalysis, hitContext.sourceLookup(),
-                                field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
-                    }
-                }
             }
-            fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
+
+            Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier = fragmentsBuilderSupplier(
+                field, fieldType, forceSource, fixBrokenAnalysis);
+
             entry = new FieldHighlightEntry();
             if (field.fieldOptions().requireFieldMatch()) {
                 /*
@@ -142,7 +118,7 @@ public class FastVectorHighlighter implements Highlighter {
                     hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
             }
             entry.fragListBuilder = fragListBuilder;
-            entry.fragmentsBuilder = fragmentsBuilder;
+            entry.fragmentsBuilderSupplier = fragmentsBuilderSupplier;
             if (cache.fvh == null) {
                 // parameters to FVH are not requires since:
                 // first two booleans are not relevant since they are set on the CustomFieldQuery
@@ -161,6 +137,7 @@ public class FastVectorHighlighter implements Highlighter {
         cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit());
 
         String[] fragments;
+        FragmentsBuilder fragmentsBuilder = entry.fragmentsBuilderSupplier.apply(hitContext.sourceLookup());
 
         // a HACK to make highlighter do highlighting, even though its using the single frag list builder
         int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ?
@@ -172,12 +149,12 @@ public class FastVectorHighlighter implements Highlighter {
         if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) {
             fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(),
                 fieldType.name(), field.fieldOptions().matchedFields(), fragmentCharSize,
-                numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(),
+                numberOfFragments, entry.fragListBuilder, fragmentsBuilder, field.fieldOptions().preTags(),
                 field.fieldOptions().postTags(), encoder);
         } else {
             fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(),
                 fieldType.name(), fragmentCharSize, numberOfFragments, entry.fragListBuilder,
-                entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
+                fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
         }
 
         if (CollectionUtils.isEmpty(fragments) == false) {
@@ -190,7 +167,7 @@ public class FastVectorHighlighter implements Highlighter {
             // the normal fragmentsBuilder
             FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/);
             fieldFragList.add(0, noMatchSize, Collections.emptyList());
-            fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(),
+            fragments = fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(),
                 fieldType.name(), fieldFragList, 1, field.fieldOptions().preTags(),
                 field.fieldOptions().postTags(), encoder);
             if (CollectionUtils.isEmpty(fragments) == false) {
@@ -201,6 +178,37 @@ public class FastVectorHighlighter implements Highlighter {
         return null;
     }
 
+    private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(SearchHighlightContext.Field field,
+                                                                              MappedFieldType fieldType,
+                                                                              boolean forceSource,
+                                                                              boolean fixBrokenAnalysis) {
+        BoundaryScanner boundaryScanner = getBoundaryScanner(field);
+        FieldOptions options = field.fieldOptions();
+        Function<SourceLookup, BaseFragmentsBuilder> supplier;
+        if (!forceSource && fieldType.isStored()) {
+            if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
+                supplier = ignored -> new ScoreOrderFragmentsBuilder(options.preTags(), options.postTags(), boundaryScanner);
+            } else {
+                supplier = ignored -> new SimpleFragmentsBuilder(fieldType, fixBrokenAnalysis,
+                    options.preTags(), options.postTags(), boundaryScanner);
+            }
+        } else {
+            if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
+                supplier = lookup -> new SourceScoreOrderFragmentsBuilder(fieldType, fixBrokenAnalysis, lookup,
+                        options.preTags(), options.postTags(), boundaryScanner);
+            } else {
+                supplier = lookup -> new SourceSimpleFragmentsBuilder(fieldType, fixBrokenAnalysis, lookup,
+                        options.preTags(), options.postTags(), boundaryScanner);
+            }
+        }
+
+        return lookup -> {
+            BaseFragmentsBuilder builder = supplier.apply(lookup);
+            builder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
+            return builder;
+        };
+    }
+
     @Override
     public boolean canHighlight(MappedFieldType ft) {
         return ft.getTextSearchInfo().termVectors() == TextSearchInfo.TermVector.OFFSETS;
@@ -238,7 +246,7 @@ public class FastVectorHighlighter implements Highlighter {
 
     private static class FieldHighlightEntry {
         public FragListBuilder fragListBuilder;
-        public FragmentsBuilder fragmentsBuilder;
+        public Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier;
         public FieldQuery noFieldMatchFieldQuery;
         public FieldQuery fieldMatchFieldQuery;
     }