Jelajahi Sumber

Unified highlighter should respect no_match_size with number_of_fragments set to 0 (#41069)

The unified highlighter returns the first sentence of the text when number_of_fragments
is set to 0 (full highlighting). This is a legacy of the removed postings highlighter
that was based on sentence break only. This commit changes this behavior in order
to respect the provided no_match_size value when number_of_fragments is set to 0.
This means that the behavior will be consistent for any value of the number_of_fragments option.

Closes #41066
Jim Ferenczi 6 tahun lalu
induk
melakukan
8dd5dee7eb

+ 0 - 37
server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java

@@ -124,8 +124,6 @@ public class UnifiedHighlighter implements Highlighter {
                 "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
         }
 
-        snippets = filterSnippets(snippets, field.fieldOptions().numberOfFragments());
-
         if (field.fieldOptions().scoreOrdered()) {
             //let's sort the snippets by score if needed
             CollectionUtil.introSort(snippets, (o1, o2) -> Double.compare(o2.getScore(), o1.getScore()));
@@ -185,41 +183,6 @@ public class UnifiedHighlighter implements Highlighter {
         }
     }
 
-    protected static List<Snippet> filterSnippets(List<Snippet> snippets, int numberOfFragments) {
-
-        //We need to filter the snippets as due to no_match_size we could have
-        //either highlighted snippets or non highlighted ones and we don't want to mix those up
-        List<Snippet> filteredSnippets = new ArrayList<>(snippets.size());
-        for (Snippet snippet : snippets) {
-            if (snippet.isHighlighted()) {
-                filteredSnippets.add(snippet);
-            }
-        }
-
-        //if there's at least one highlighted snippet, we return all the highlighted ones
-        //otherwise we return the first non highlighted one if available
-        if (filteredSnippets.size() == 0) {
-            if (snippets.size() > 0) {
-                Snippet snippet = snippets.get(0);
-                //if we tried highlighting the whole content using whole break iterator (as number_of_fragments was 0)
-                //we need to return the first sentence of the content rather than the whole content
-                if (numberOfFragments == 0) {
-                    BreakIterator bi = BreakIterator.getSentenceInstance(Locale.ROOT);
-                    String text = snippet.getText();
-                    bi.setText(text);
-                    int next = bi.next();
-                    if (next != BreakIterator.DONE) {
-                        String newText = text.substring(0, next).trim();
-                        snippet = new Snippet(newText, snippet.getScore(), snippet.isHighlighted());
-                    }
-                }
-                filteredSnippets.add(snippet);
-            }
-        }
-
-        return filteredSnippets;
-    }
-
     protected static String convertFieldValue(MappedFieldType type, Object value) {
         if (value instanceof BytesRef) {
             return type.valueForDisplay(value).toString();

+ 5 - 3
server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java

@@ -1715,9 +1715,11 @@ public class HighlighterSearchIT extends ESIntegTestCase {
         assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some"));
 
         // We can also ask for a fragment longer than the input string and get the whole string
-        field.highlighterType("plain").noMatchSize(text.length() * 2);
-        response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();
-        assertHighlight(response, 0, "text", 0, 1, equalTo(text));
+        for (String type : new String[] { "plain", "unified" }) {
+            field.highlighterType(type).noMatchSize(text.length() * 2).numOfFragments(0);
+            response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();
+            assertHighlight(response, 0, "text", 0, 1, equalTo(text));
+        }
 
         field.highlighterType("fvh");
         response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();