Browse Source

Add query param to limit highlighting to specified length (#67325)

Add a `max_analyzed_offset` query parameter to allow users
to limit the highlighting of text fields to a value less than or equal to the
`index.highlight.max_analyzed_offset`, thus avoiding an exception when
the length of the text field exceeds the limit. The highlighting still takes place,
but stops at the length defined by the new parameter.

Closes: #52155
Marios Trivyzas 4 years ago
parent
commit
f9af60bf69
14 changed files with 564 additions and 214 deletions
  1. 1 0
      docs/reference/index-modules.asciidoc
  2. 13 2
      docs/reference/search/search-your-data/highlighting.asciidoc
  3. 2 2
      plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java
  4. 143 73
      plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighterTests.java
  5. 62 2
      rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/30_max_analyzed_offset.yml
  6. 21 18
      server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java
  7. 37 1
      server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java
  8. 6 0
      server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilder.java
  9. 58 0
      server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/LimitTokenOffsetAnalyzer.java
  10. 20 8
      server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java
  11. 14 0
      server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java
  12. 9 3
      server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java
  13. 103 47
      server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java
  14. 75 58
      server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilderTests.java

+ 1 - 0
docs/reference/index-modules.asciidoc

@@ -223,6 +223,7 @@ specific index module:
     The maximum number of tokens that can be produced using _analyze API.
     Defaults to `10000`.
 
+[[index-max-analyzed-offset]]
  `index.highlight.max_analyzed_offset`::
 
      The maximum number of characters that will be analyzed for a highlight request.

+ 13 - 2
docs/reference/search/search-your-data/highlighting.asciidoc

@@ -117,7 +117,7 @@ needs highlighting. The `plain` highlighter always uses plain highlighting.
 Plain highlighting for large texts may require substantial amount of time and memory.
 To protect against this, the maximum number of text characters that will be analyzed has been
 limited to 1000000. This default limit can be changed
-for a particular index with the index setting `index.highlight.max_analyzed_offset`.
+for a particular index with the index setting <<index-max-analyzed-offset,`index.highlight.max_analyzed_offset`>>.
 
 [discrete]
 [[highlighting-settings]]
@@ -242,6 +242,17 @@ require_field_match:: By default, only fields that contains a query match are
 highlighted. Set `require_field_match` to `false` to highlight all fields.
 Defaults to `true`.
 
+[[max-analyzed-offset]]
+max_analyzed_offset:: By default, the maximum number of characters
+analyzed for a highlight request is bounded by the value defined in the
+<<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>> setting,
+and when the number of characters exceeds this limit an error is returned. If
+this setting is set to a non-negative value, the highlighting stops at this defined
+maximum limit, and the rest of the text is not processed, thus not highlighted and
+no error is returned. The <<max-analyzed-offset, `max_analyzed_offset`>> query setting
+does *not* override the <<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>>
+which prevails when it's set to lower value than the query setting.
+
 tags_schema:: Set to `styled` to use the built-in tag schema. The `styled`
 schema defines the following `pre_tags` and defines `post_tags` as
 `</em>`.
@@ -1119,4 +1130,4 @@ using the passages's `matchStarts` and `matchEnds` information:
     I'll be the <em>only</em> <em>fox</em> in the world for you.
 
 This kind of formatted strings are the final result of the highlighter returned
-to the user.
+to the user.

+ 2 - 2
plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java

@@ -50,8 +50,8 @@ public class AnnotatedTextHighlighter extends UnifiedHighlighter {
     }
 
     @Override
-    protected Analyzer wrapAnalyzer(Analyzer analyzer) {
-        return new AnnotatedHighlighterAnalyzer(super.wrapAnalyzer(analyzer));
+    protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
+        return new AnnotatedHighlighterAnalyzer(super.wrapAnalyzer(analyzer, maxAnalyzedOffset));
     }
 
     @Override

+ 143 - 73
plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighterTests.java

@@ -8,6 +8,14 @@
 
 package org.elasticsearch.search.fetch.subphase.highlight;
 
+import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
+import static org.hamcrest.CoreMatchers.equalTo;
+
+import java.net.URLEncoder;
+import java.text.BreakIterator;
+import java.util.ArrayList;
+import java.util.Locale;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
@@ -31,6 +39,7 @@ import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator;
 import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter;
 import org.apache.lucene.search.uhighlight.Snippet;
 import org.apache.lucene.search.uhighlight.SplittingBreakIterator;
+import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
 import org.apache.lucene.store.Directory;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
@@ -38,88 +47,88 @@ import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.Ann
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
 import org.elasticsearch.test.ESTestCase;
 
-import java.net.URLEncoder;
-import java.text.BreakIterator;
-import java.util.ArrayList;
-import java.util.Locale;
+public class AnnotatedTextHighlighterTests extends ESTestCase {
 
-import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
-import static org.hamcrest.CoreMatchers.equalTo;
+    private void assertHighlightOneDoc(String fieldName, String[] markedUpInputs,
+                                       Query query, Locale locale, BreakIterator breakIterator,
+                                       int noMatchSize, String[] expectedPassages) throws Exception {
 
-public class AnnotatedTextHighlighterTests extends ESTestCase {
+        assertHighlightOneDoc(fieldName, markedUpInputs, query, locale, breakIterator, noMatchSize, expectedPassages,
+                Integer.MAX_VALUE, null);
+    }
 
     private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
             Query query, Locale locale, BreakIterator breakIterator,
-            int noMatchSize, String[] expectedPassages) throws Exception {
-
-
-        // Annotated fields wrap the usual analyzer with one that injects extra tokens
-        Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer());
-        Directory dir = newDirectory();
-        IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer);
-        iwc.setMergePolicy(newTieredMergePolicy(random()));
-        RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-        FieldType ft = new FieldType(TextField.TYPE_STORED);
-        if (randomBoolean()) {
-            ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
-        } else {
-            ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
-        }
-        ft.freeze();
-        Document doc = new Document();
-        for (String input : markedUpInputs) {
-            Field field = new Field(fieldName, "", ft);
-            field.setStringValue(input);
-            doc.add(field);
-        }
-        iw.addDocument(doc);
-        DirectoryReader reader = iw.getReader();
-        IndexSearcher searcher = newSearcher(reader);
-        iw.close();
-
-        AnnotatedText[] annotations = new AnnotatedText[markedUpInputs.length];
-        for (int i = 0; i < markedUpInputs.length; i++) {
-            annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
-        }
-        AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
-        hiliteAnalyzer.setAnnotations(annotations);
-        AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(new DefaultEncoder());
-        passageFormatter.setAnnotations(annotations);
-
-        ArrayList<Object> plainTextForHighlighter = new ArrayList<>(annotations.length);
-        for (int i = 0; i < annotations.length; i++) {
-            plainTextForHighlighter.add(annotations[i].textMinusMarkup);
-        }
+            int noMatchSize, String[] expectedPassages,
+            int maxAnalyzedOffset, Integer queryMaxAnalyzedOffset) throws Exception {
 
-        TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
-        assertThat(topDocs.totalHits.value, equalTo(1L));
-        String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
-        CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(
-            searcher,
-            hiliteAnalyzer,
-            null,
-            passageFormatter,
-            locale,
-            breakIterator,
-            "index",
-            "text",
-            query,
-            noMatchSize,
-            expectedPassages.length,
-            name -> "text".equals(name),
-            Integer.MAX_VALUE
-        );
-        highlighter.setFieldMatcher((name) -> "text".equals(name));
-        final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);
-        assertEquals(expectedPassages.length, snippets.length);
-        for (int i = 0; i < snippets.length; i++) {
-            assertEquals(expectedPassages[i], snippets[i].getText());
+        try (Directory dir = newDirectory()) {
+            // Annotated fields wrap the usual analyzer with one that injects extra tokens
+            Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer());
+            IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer);
+            iwc.setMergePolicy(newTieredMergePolicy(random()));
+            RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+            FieldType ft = new FieldType(TextField.TYPE_STORED);
+            if (randomBoolean()) {
+                ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+            } else {
+                ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
+            }
+            ft.freeze();
+            Document doc = new Document();
+            for (String input : markedUpInputs) {
+                Field field = new Field(fieldName, "", ft);
+                field.setStringValue(input);
+                doc.add(field);
+            }
+            iw.addDocument(doc);
+            try (DirectoryReader reader = iw.getReader()) {
+                IndexSearcher searcher = newSearcher(reader);
+                iw.close();
+
+                AnnotatedText[] annotations = new AnnotatedText[markedUpInputs.length];
+                for (int i = 0; i < markedUpInputs.length; i++) {
+                    annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
+                }
+                AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
+                hiliteAnalyzer.setAnnotations(annotations);
+                AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(new DefaultEncoder());
+                passageFormatter.setAnnotations(annotations);
+
+                ArrayList<Object> plainTextForHighlighter = new ArrayList<>(annotations.length);
+                for (int i = 0; i < annotations.length; i++) {
+                    plainTextForHighlighter.add(annotations[i].textMinusMarkup);
+                }
+
+                TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
+                assertThat(topDocs.totalHits.value, equalTo(1L));
+                String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
+                CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(
+                        searcher,
+                        hiliteAnalyzer,
+                        UnifiedHighlighter.OffsetSource.ANALYSIS,
+                        passageFormatter,
+                        locale,
+                        breakIterator,
+                        "index",
+                        "text",
+                        query,
+                        noMatchSize,
+                        expectedPassages.length,
+                        name -> "text".equals(name),
+                        maxAnalyzedOffset,
+                        queryMaxAnalyzedOffset
+                );
+                highlighter.setFieldMatcher((name) -> "text".equals(name));
+                final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);
+                assertEquals(expectedPassages.length, snippets.length);
+                for (int i = 0; i < snippets.length; i++) {
+                    assertEquals(expectedPassages[i], snippets[i].getText());
+                }
+            }
         }
-        reader.close();
-        dir.close();
     }
 
-
     public void testAnnotatedTextStructuredMatch() throws Exception {
         // Check that a structured token eg a URL can be highlighted in a query
         // on marked-up
@@ -191,4 +200,65 @@ public class AnnotatedTextHighlighterTests extends ESTestCase {
         assertHighlightOneDoc("text", markedUpInputs, query, Locale.ROOT, breakIterator, 0, expectedPassages);
     }
 
+    public void testExceedMaxAnalyzedOffset() throws Exception {
+        TermQuery query = new TermQuery(new Term("text", "exceeds"));
+        BreakIterator breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
+        assertHighlightOneDoc("text", new String[] { "[Short Text](Short+Text)" }, query, Locale.ROOT, breakIterator, 0, new String[] {},
+                10, null);
+
+        IllegalArgumentException e = expectThrows(
+            IllegalArgumentException.class,
+            () -> assertHighlightOneDoc(
+                "text",
+                new String[] { "[Long Text exceeds](Long+Text+exceeds) MAX analyzed offset)" },
+                query,
+                Locale.ROOT,
+                breakIterator,
+                0,
+                new String[] {},
+                20,
+                null
+            )
+        );
+        assertEquals(
+            "The length [38] of field [text] in doc[0]/index[index] exceeds the [index.highlight.max_analyzed_offset] limit [20]. "
+                + "To avoid this error, set the query parameter [max_analyzed_offset] to a value less than index setting [20] and this "
+                + "will tolerate long field values by truncating them.",
+            e.getMessage()
+        );
+
+        final Integer queryMaxOffset = randomIntBetween(21, 1000);
+        e = expectThrows(
+                IllegalArgumentException.class,
+                () -> assertHighlightOneDoc(
+                        "text",
+                        new String[] { "[Long Text exceeds](Long+Text+exceeds) MAX analyzed offset)" },
+                        query,
+                        Locale.ROOT,
+                        breakIterator,
+                        0,
+                        new String[] {},
+                        20,
+                        queryMaxOffset
+                )
+        );
+        assertEquals(
+            "The length [38] of field [text] in doc[0]/index[index] exceeds the [index.highlight.max_analyzed_offset] limit [20]. "
+                + "To avoid this error, set the query parameter [max_analyzed_offset] to a value less than index setting [20] and this "
+                + "will tolerate long field values by truncating them.",
+            e.getMessage()
+        );
+
+        assertHighlightOneDoc(
+            "text",
+            new String[] { "[Long Text Exceeds](Long+Text+Exceeds) MAX analyzed offset [Long Text Exceeds](Long+Text+Exceeds)" },
+            query,
+            Locale.ROOT,
+            breakIterator,
+            0,
+            new String[] { "Long Text [Exceeds](_hit_term=exceeds) MAX analyzed offset [Long Text Exceeds](Long+Text+Exceeds)" },
+            20,
+            15
+        );
+    }
 }

+ 62 - 2
rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/30_max_analyzed_offset.yml

@@ -6,7 +6,7 @@ setup:
           body:
               settings:
                   number_of_shards: 1
-                  index.highlight.max_analyzed_offset: 10
+                  index.highlight.max_analyzed_offset: 30
               mappings:
                   properties:
                       field1:
@@ -37,6 +37,20 @@ setup:
           body: {"query" : {"match" : {"field1" : "fox"}}, "highlight" : {"type" : "unified", "fields" : {"field1" : {}}}}
   - match: { error.root_cause.0.type: "illegal_argument_exception" }
 
+---
+"Unified highlighter on a field WITHOUT OFFSETS exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=20 should SUCCEED":
+
+  - skip:
+      version: " - 7.99.99"
+      reason: max_analyzed_offset query param added in 8.0
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        index: test1
+        body: {"query" : {"match" : {"field1" : "fox"}}, "highlight" : {"type" : "unified", "fields" : {"field1" : {}}, "max_analyzed_offset": "20"}}
+  - match: {hits.hits.0.highlight.field1.0: "The quick brown <em>fox</em> went to the forest and saw another fox."}
+
 
 ---
 "Plain highlighter on a field WITHOUT OFFSETS exceeding index.highlight.max_analyzed_offset should FAIL":
@@ -46,9 +60,23 @@ setup:
       search:
           rest_total_hits_as_int: true
           index: test1
-          body: {"query" : {"match" : {"field1" : "fox"}}, "highlight" : {"type" : "unified", "fields" : {"field1" : {}}}}
+          body: {"query" : {"match" : {"field1" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field1" : {}}}}
   - match: { error.root_cause.0.type: "illegal_argument_exception" }
 
+---
+"Plain highlighter on a field WITHOUT OFFSETS exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=20 should SUCCEED":
+
+  - skip:
+      version: " - 7.99.99"
+      reason: max_analyzed_offset query param added in 8.0
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        index: test1
+        body: {"query" : {"match" : {"field1" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field1" : {}}, "max_analyzed_offset": 20}}
+  - match: {hits.hits.0.highlight.field1.0: "The quick brown <em>fox</em> went to the forest and saw another fox."}
+
 
 ---
 "Unified highlighter on a field WITH OFFSETS exceeding index.highlight.max_analyzed_offset should SUCCEED":
@@ -71,3 +99,35 @@ setup:
           index: test1
           body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}}}
   - match: { error.root_cause.0.type: "illegal_argument_exception" }
+
+---
+"Plain highlighter on a field WITH OFFSETS exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=20 should SUCCEED":
+
+  - skip:
+      version: " - 7.99.99"
+      reason: max_analyzed_offset query param added in 8.0
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        index: test1
+        body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": 20}}
+  - match: {hits.hits.0.highlight.field2.0: "The quick brown <em>fox</em> went to the forest and saw another fox."}
+
+---
+"Plain highlighter with max_analyzed_offset < 0 should FAIL":
+
+  - skip:
+      version: " - 7.99.99"
+      reason: max_analyzed_offset query param added in 8.0
+
+  - do:
+      catch: bad_request
+      search:
+        rest_total_hits_as_int: true
+        index: test1
+        body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": -10}}
+  - match: { status: 400 }
+  - match: { error.root_cause.0.type: "x_content_parse_exception" }
+  - match: { error.caused_by.type: "illegal_argument_exception" }
+  - match: { error.caused_by.reason: "[max_analyzed_offset] must be a positive integer" }

+ 21 - 18
server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java

@@ -24,6 +24,7 @@ import org.elasticsearch.common.CheckedSupplier;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.search.fetch.subphase.highlight.LimitTokenOffsetAnalyzer;
 
 import java.io.IOException;
 import java.text.BreakIterator;
@@ -33,6 +34,8 @@ import java.util.Locale;
 import java.util.Set;
 import java.util.function.Predicate;
 
+import static org.elasticsearch.search.fetch.subphase.highlight.AbstractHighlighterBuilder.MAX_ANALYZED_OFFSET_FIELD;
+
 /**
  * Subclass of the {@link UnifiedHighlighter} that works for a single field in a single document.
  * Uses a custom {@link PassageFormatter}. Accepts field content as a constructor
@@ -54,6 +57,7 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
     private final int noMatchSize;
     private final FieldHighlighter fieldHighlighter;
     private final int maxAnalyzedOffset;
+    private final Integer queryMaxAnalyzedOffset;
 
     /**
      * Creates a new instance of {@link CustomUnifiedHighlighter}
@@ -85,8 +89,9 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
                                     int noMatchSize,
                                     int maxPassages,
                                     Predicate<String> fieldMatcher,
-                                    int maxAnalyzedOffset) throws IOException {
-        super(searcher, analyzer);
+                                    int maxAnalyzedOffset,
+                                    Integer queryMaxAnalyzedOffset) throws IOException {
+        super(searcher, wrapAnalyzer(analyzer, queryMaxAnalyzedOffset));
         this.offsetSource = offsetSource;
         this.breakIterator = breakIterator;
         this.breakIteratorLocale = breakIteratorLocale == null ? Locale.ROOT : breakIteratorLocale;
@@ -96,9 +101,17 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
         this.noMatchSize = noMatchSize;
         this.setFieldMatcher(fieldMatcher);
         this.maxAnalyzedOffset = maxAnalyzedOffset;
+        this.queryMaxAnalyzedOffset = queryMaxAnalyzedOffset;
         fieldHighlighter = getFieldHighlighter(field, query, extractTerms(query), maxPassages);
     }
 
+    protected static Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
+        if (maxAnalyzedOffset != null) {
+            analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset);
+        }
+        return analyzer;
+    }
+
     /**
      * Highlights the field value.
      */
@@ -112,22 +125,13 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
             return null;
         }
         int fieldValueLength = fieldValue.length();
-        if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
+        if (((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset > maxAnalyzedOffset) &&
+                (offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset))) {
             throw new IllegalArgumentException(
-                "The length of ["
-                    + field
-                    + "] field of ["
-                    + docId
-                    + "] doc of ["
-                    + index
-                    + "] index "
-                    + "has exceeded ["
-                    + maxAnalyzedOffset
-                    + "] - maximum allowed to be analyzed for highlighting. "
-                    + "This maximum can be set by changing the ["
-                    + IndexSettings.MAX_ANALYZED_OFFSET_SETTING.getKey()
-                    + "] index level setting. "
-                    + "For large texts, indexing with offsets or term vectors is recommended!"
+                "The length [" + fieldValueLength + "] of field [" + field +"] in doc[" + docId + "]/index[" + index +"] exceeds the ["
+                    + IndexSettings.MAX_ANALYZED_OFFSET_SETTING.getKey() + "] limit [" + maxAnalyzedOffset + "]. To avoid this error, set "
+                    + "the query parameter [" + MAX_ANALYZED_OFFSET_FIELD.toString() + "] to a value less than index setting ["
+                    + maxAnalyzedOffset + "] and this will tolerate long field values by truncating them."
             );
         }
         Snippet[] result = (Snippet[]) fieldHighlighter.highlightFieldForDoc(reader, docId, fieldValue);
@@ -223,5 +227,4 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
         }
         return offsetSource;
     }
-
 }

+ 37 - 1
server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java

@@ -10,6 +10,7 @@ package org.elasticsearch.search.fetch.subphase.highlight;
 
 import org.apache.lucene.search.highlight.SimpleFragmenter;
 import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.ParsingException;
 import org.elasticsearch.common.Strings;
@@ -63,6 +64,7 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
     public static final ParseField OPTIONS_FIELD = new ParseField("options");
     public static final ParseField HIGHLIGHT_QUERY_FIELD = new ParseField("highlight_query");
     public static final ParseField MATCHED_FIELDS_FIELD = new ParseField("matched_fields");
+    public static final ParseField MAX_ANALYZED_OFFSET_FIELD = new ParseField("max_analyzed_offset");
 
     protected String[] preTags;
 
@@ -100,6 +102,8 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
 
     protected Boolean requireFieldMatch;
 
+    protected Integer maxAnalyzedOffset;
+
     public AbstractHighlighterBuilder() {
     }
 
@@ -122,6 +126,7 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
         phraseLimit = template.phraseLimit;
         options = template.options;
         requireFieldMatch = template.requireFieldMatch;
+        this.maxAnalyzedOffset = template.maxAnalyzedOffset;
     }
 
     /**
@@ -154,6 +159,9 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
             options(in.readMap());
         }
         requireFieldMatch(in.readOptionalBoolean());
+        if (in.getVersion().onOrAfter(Version.V_8_0_0)) {
+            maxAnalyzedOffset(in.readOptionalInt());
+        }
     }
 
     /**
@@ -195,6 +203,9 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
             out.writeMap(options);
         }
         out.writeOptionalBoolean(requireFieldMatch);
+        if (out.getVersion().onOrAfter(Version.V_8_0_0)) {
+            out.writeOptionalInt(maxAnalyzedOffset);
+        }
         doWriteTo(out);
     }
 
@@ -530,6 +541,26 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
         return this.forceSource;
     }
 
+    /**
+     * Set to a non-negative value which represents the max offset used to analyze
+     * the field thus avoiding exceptions if the field exceeds this limit.
+     */
+    @SuppressWarnings("unchecked")
+    public HB maxAnalyzedOffset(Integer maxAnalyzedOffset) {
+        if (maxAnalyzedOffset != null && maxAnalyzedOffset <= 0) {
+            throw new IllegalArgumentException("[" + MAX_ANALYZED_OFFSET_FIELD.toString() + "] must be a positive integer");
+        }
+        this.maxAnalyzedOffset = maxAnalyzedOffset;
+        return (HB) this;
+    }
+
+    /**
+     * @return the value set by {@link #maxAnalyzedOffset(Integer)}
+     */
+    public Integer maxAnalyzedOffset() {
+        return this.maxAnalyzedOffset;
+    }
+
     @Override
     public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
         builder.startObject();
@@ -595,6 +626,9 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
         if (phraseLimit != null) {
             builder.field(PHRASE_LIMIT_FIELD.getPreferredName(), phraseLimit);
         }
+        if (maxAnalyzedOffset != null) {
+            builder.field(MAX_ANALYZED_OFFSET_FIELD.getPreferredName(), maxAnalyzedOffset);
+        }
     }
 
     static <HB extends AbstractHighlighterBuilder<HB>> BiFunction<XContentParser, HB, HB> setupParser(
@@ -615,6 +649,7 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
         parser.declareInt(HB::noMatchSize, NO_MATCH_SIZE_FIELD);
         parser.declareBoolean(HB::forceSource, FORCE_SOURCE_FIELD);
         parser.declareInt(HB::phraseLimit, PHRASE_LIMIT_FIELD);
+        parser.declareInt(HB::maxAnalyzedOffset, MAX_ANALYZED_OFFSET_FIELD);
         parser.declareObject(HB::options, (XContentParser p, Void c) -> {
             try {
                 return p.map();
@@ -648,7 +683,7 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
         return Objects.hash(getClass(), Arrays.hashCode(preTags), Arrays.hashCode(postTags), fragmentSize,
                 numOfFragments, highlighterType, fragmenter, highlightQuery, order, highlightFilter,
                 forceSource, boundaryScannerType, boundaryMaxScan, Arrays.hashCode(boundaryChars), boundaryScannerLocale,
-                noMatchSize, phraseLimit, options, requireFieldMatch, doHashCode());
+                noMatchSize, phraseLimit, options, requireFieldMatch, maxAnalyzedOffset, doHashCode());
     }
 
     /**
@@ -684,6 +719,7 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
                Objects.equals(phraseLimit, other.phraseLimit) &&
                Objects.equals(options, other.options) &&
                Objects.equals(requireFieldMatch, other.requireFieldMatch) &&
+               Objects.equals(maxAnalyzedOffset, other.maxAnalyzedOffset) &&
                doEquals(other);
     }
 

+ 6 - 0
server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilder.java

@@ -49,6 +49,8 @@ public class HighlightBuilder extends AbstractHighlighterBuilder<HighlightBuilde
     public static final boolean DEFAULT_FORCE_SOURCE = false;
     /** default for whether a field should be highlighted only if a query matches that field */
     public static final boolean DEFAULT_REQUIRE_FIELD_MATCH = true;
+    /** default for whether to stop highlighting at the defined max_analyzed_offset to avoid exceptions for longer texts */
+    public static final Integer DEFAULT_MAX_ANALYZED_OFFSET = null;
     /** default for whether {@code fvh} should provide highlighting on filter clauses */
     public static final boolean DEFAULT_HIGHLIGHT_FILTER = false;
     /** default for highlight fragments being ordered by score */
@@ -84,6 +86,7 @@ public class HighlightBuilder extends AbstractHighlighterBuilder<HighlightBuilde
     static final FieldOptions defaultOptions = new SearchHighlightContext.FieldOptions.Builder()
             .preTags(DEFAULT_PRE_TAGS).postTags(DEFAULT_POST_TAGS).scoreOrdered(DEFAULT_SCORE_ORDERED)
             .highlightFilter(DEFAULT_HIGHLIGHT_FILTER).requireFieldMatch(DEFAULT_REQUIRE_FIELD_MATCH)
+            .maxAnalyzedOffset(DEFAULT_MAX_ANALYZED_OFFSET)
             .forceSource(DEFAULT_FORCE_SOURCE).fragmentCharSize(DEFAULT_FRAGMENT_CHAR_SIZE)
             .numberOfFragments(DEFAULT_NUMBER_OF_FRAGMENTS).encoder(DEFAULT_ENCODER)
             .boundaryMaxScan(SimpleBoundaryScanner.DEFAULT_MAX_SCAN).boundaryChars(SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS)
@@ -321,6 +324,9 @@ public class HighlightBuilder extends AbstractHighlighterBuilder<HighlightBuilde
         if (highlighterBuilder.requireFieldMatch != null) {
             targetOptionsBuilder.requireFieldMatch(highlighterBuilder.requireFieldMatch);
         }
+        if (highlighterBuilder.maxAnalyzedOffset != null) {
+            targetOptionsBuilder.maxAnalyzedOffset(highlighterBuilder.maxAnalyzedOffset);
+        }
         if (highlighterBuilder.boundaryScannerType != null) {
             targetOptionsBuilder.boundaryScannerType(highlighterBuilder.boundaryScannerType);
         }

+ 58 - 0
server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/LimitTokenOffsetAnalyzer.java

@@ -0,0 +1,58 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.AnalyzerWrapper;
+import org.apache.lucene.analysis.miscellaneous.LimitTokenOffsetFilter;
+
+/**
+ * This analyzer limits the highlighting once it sees a token with a start offset &lt;= the configured limit,
+ * which won't pass and will end the stream.
+ * @see LimitTokenOffsetFilter
+ */
+public final class LimitTokenOffsetAnalyzer extends AnalyzerWrapper {
+
+    private final Analyzer delegate;
+    private final int maxOffset;
+
+    /**
+     * Build an analyzer that limits the highlighting once it sees a token with a start offset &lt;= the configured limit,
+     * which won't pass and will end the stream. See {@link LimitTokenOffsetFilter} for more details.
+     *
+     * @param delegate the analyzer to wrap
+     * @param maxOffset max number of tokens to produce
+     */
+    public LimitTokenOffsetAnalyzer(Analyzer delegate, int maxOffset) {
+        super(delegate.getReuseStrategy());
+        this.delegate = delegate;
+        this.maxOffset = maxOffset;
+    }
+
+    @Override
+    protected Analyzer getWrappedAnalyzer(String fieldName) {
+        return delegate;
+    }
+
+    @Override
+    protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
+        return new TokenStreamComponents(
+                components.getSource(),
+                new LimitTokenOffsetFilter(components.getTokenStream(), maxOffset, false)
+        );
+    }
+
+    @Override
+    public String toString() {
+        return "LimitTokenOffsetAnalyzer("
+                + delegate.toString()
+                + ", maxOffset="
+                + maxOffset
+                + ")";
+    }
+}

+ 20 - 8
server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java

@@ -36,6 +36,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import static org.elasticsearch.search.fetch.subphase.highlight.AbstractHighlighterBuilder.MAX_ANALYZED_OFFSET_FIELD;
 import static org.elasticsearch.search.fetch.subphase.highlight.UnifiedHighlighter.convertFieldValue;
 
 public class PlainHighlighter implements Highlighter {
@@ -89,8 +90,12 @@ public class PlainHighlighter implements Highlighter {
         int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1 : field.fieldOptions().numberOfFragments();
         ArrayList<TextFragment> fragsList = new ArrayList<>();
         List<Object> textsToHighlight;
-        Analyzer analyzer = context.getSearchExecutionContext().getIndexAnalyzer(f -> Lucene.KEYWORD_ANALYZER);
         final int maxAnalyzedOffset = context.getSearchExecutionContext().getIndexSettings().getHighlightMaxAnalyzedOffset();
+        Integer queryMaxAnalyzedOffset = fieldContext.field.fieldOptions().maxAnalyzedOffset();
+        Analyzer analyzer = wrapAnalyzer(
+            context.getSearchExecutionContext().getIndexAnalyzer(f -> Lucene.KEYWORD_ANALYZER),
+            queryMaxAnalyzedOffset
+        );
 
         textsToHighlight
             = HighlightUtils.loadFieldValues(fieldType, context.getSearchExecutionContext(), hitContext, fieldContext.forceSource);
@@ -98,14 +103,14 @@ public class PlainHighlighter implements Highlighter {
         for (Object textToHighlight : textsToHighlight) {
             String text = convertFieldValue(fieldType, textToHighlight);
             int textLength = text.length();
-            if (textLength > maxAnalyzedOffset) {
+            if ((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset > maxAnalyzedOffset) && (textLength > maxAnalyzedOffset)) {
                 throw new IllegalArgumentException(
-                    "The length of [" + fieldContext.fieldName + "] field of [" + hitContext.hit().getId() +
-                        "] doc of [" + context.getIndexName() + "] index " +
-                        "has exceeded [" + maxAnalyzedOffset + "] - maximum allowed to be analyzed for highlighting. " +
-                        "This maximum can be set by changing the [" + IndexSettings.MAX_ANALYZED_OFFSET_SETTING.getKey() +
-                        "] index level setting. " + "For large texts, indexing with offsets or term vectors, and highlighting " +
-                        "with unified or fvh highlighter is recommended!");
+                    "The length [" + textLength + "] of field [" + field +"] in doc[" + hitContext.hit().getId() + "]/index["
+                        + context.getIndexName() +"] exceeds the [" + IndexSettings.MAX_ANALYZED_OFFSET_SETTING.getKey() + "] "
+                        + "limit [" + maxAnalyzedOffset + "]. To avoid this error, set the query parameter ["
+                        + MAX_ANALYZED_OFFSET_FIELD.toString() + "] to a value less than index setting [" + maxAnalyzedOffset + "] and "
+                        + "this will tolerate long field values by truncating them."
+                );
             }
 
             try (TokenStream tokenStream = analyzer.tokenStream(fieldType.name(), text)) {
@@ -195,4 +200,11 @@ public class PlainHighlighter implements Highlighter {
             return end;
         }
     }
+
+    private Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
+        if (maxAnalyzedOffset != null) {
+            return new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset);
+        }
+        return analyzer;
+    }
 }

+ 14 - 0
server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java

@@ -91,6 +91,8 @@ public class SearchHighlightContext {
 
         private Boolean requireFieldMatch;
 
+        private Integer maxAnalyzedOffset;
+
         private String highlighterType;
 
         private Boolean forceSource;
@@ -151,6 +153,10 @@ public class SearchHighlightContext {
             return requireFieldMatch;
         }
 
+        public Integer maxAnalyzedOffset() {
+            return maxAnalyzedOffset;
+        }
+
         public String highlighterType() {
             return highlighterType;
         }
@@ -244,6 +250,11 @@ public class SearchHighlightContext {
                 return this;
             }
 
+            Builder maxAnalyzedOffset(Integer maxAnalyzedOffset) {
+                fieldOptions.maxAnalyzedOffset = maxAnalyzedOffset;
+                return this;
+            }
+
             Builder highlighterType(String type) {
                 fieldOptions.highlighterType = type;
                 return this;
@@ -333,6 +344,9 @@ public class SearchHighlightContext {
                 if (fieldOptions.requireFieldMatch == null) {
                     fieldOptions.requireFieldMatch = globalOptions.requireFieldMatch;
                 }
+                if (fieldOptions.maxAnalyzedOffset == null) {
+                    fieldOptions.maxAnalyzedOffset = globalOptions.maxAnalyzedOffset;
+                }
                 if (fieldOptions.boundaryScannerType == null) {
                     fieldOptions.boundaryScannerType = globalOptions.boundaryScannerType;
                 }

+ 9 - 3
server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java

@@ -103,7 +103,9 @@ public class UnifiedHighlighter implements Highlighter {
             : HighlightUtils.Encoders.DEFAULT;
         int maxAnalyzedOffset = fieldContext.context.getSearchExecutionContext().getIndexSettings().getHighlightMaxAnalyzedOffset();
         int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments();
-        Analyzer analyzer = wrapAnalyzer(fieldContext.context.getSearchExecutionContext().getIndexAnalyzer(f -> Lucene.KEYWORD_ANALYZER));
+        Integer queryMaxAnalyzedOffset = fieldContext.field.fieldOptions().maxAnalyzedOffset();
+        Analyzer analyzer = wrapAnalyzer(fieldContext.context.getSearchExecutionContext().getIndexAnalyzer(f -> Lucene.KEYWORD_ANALYZER),
+                queryMaxAnalyzedOffset);
         PassageFormatter passageFormatter = getPassageFormatter(fieldContext.hitContext, fieldContext.field, encoder);
         IndexSearcher searcher = fieldContext.context.searcher();
         OffsetSource offsetSource = getOffsetSource(fieldContext.fieldType);
@@ -138,7 +140,8 @@ public class UnifiedHighlighter implements Highlighter {
             fieldContext.field.fieldOptions().noMatchSize(),
             higlighterNumberOfFragments,
             fieldMatcher(fieldContext),
-            maxAnalyzedOffset
+            maxAnalyzedOffset,
+            fieldContext.field.fieldOptions().maxAnalyzedOffset()
         );
     }
 
@@ -147,7 +150,10 @@ public class UnifiedHighlighter implements Highlighter {
             field.fieldOptions().postTags()[0], encoder);
     }
 
-    protected Analyzer wrapAnalyzer(Analyzer analyzer) {
+    protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
+        if (maxAnalyzedOffset != null) {
+            analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset);
+        }
         return analyzer;
     }
 

+ 103 - 47
server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java

@@ -8,6 +8,12 @@
 
 package org.apache.lucene.search.uhighlight;
 
+import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
+import static org.hamcrest.CoreMatchers.equalTo;
+
+import java.text.BreakIterator;
+import java.util.Locale;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.custom.CustomAnalyzer;
 import org.apache.lucene.analysis.ngram.EdgeNGramTokenizerFactory;
@@ -37,58 +43,63 @@ import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
 import org.elasticsearch.test.ESTestCase;
 
-import java.text.BreakIterator;
-import java.util.Locale;
-
-import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
-import static org.hamcrest.CoreMatchers.equalTo;
-
 public class CustomUnifiedHighlighterTests extends ESTestCase {
+
     private void assertHighlightOneDoc(String fieldName, String[] inputs, Analyzer analyzer, Query query,
                                        Locale locale, BreakIterator breakIterator,
                                        int noMatchSize, String[] expectedPassages) throws Exception {
-        Directory dir = newDirectory();
-        IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
-        iwc.setMergePolicy(newTieredMergePolicy(random()));
-        RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-        FieldType ft = new FieldType(TextField.TYPE_STORED);
-        ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
-        ft.freeze();
-        Document doc = new Document();
-        for (String input : inputs) {
-            Field field = new Field(fieldName, "", ft);
-            field.setStringValue(input);
-            doc.add(field);
-        }
-        iw.addDocument(doc);
-        DirectoryReader reader = iw.getReader();
-        IndexSearcher searcher = newSearcher(reader);
-        iw.close();
-        TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
-        assertThat(topDocs.totalHits.value, equalTo(1L));
-        String rawValue = Strings.arrayToDelimitedString(inputs, String.valueOf(MULTIVAL_SEP_CHAR));
-        CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(
-            searcher,
-            analyzer,
-            null,
-            new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()),
-            locale,
-            breakIterator,
-            "index",
-            "text",
-            query,
-            noMatchSize,
-            expectedPassages.length,
-            name -> "text".equals(name),
-            Integer.MAX_VALUE
-        );
-        final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);
-        assertEquals(snippets.length, expectedPassages.length);
-        for (int i = 0; i < snippets.length; i++) {
-            assertEquals(snippets[i].getText(), expectedPassages[i]);
+
+        assertHighlightOneDoc(fieldName, inputs, analyzer, query, locale, breakIterator, noMatchSize, expectedPassages,
+                Integer.MAX_VALUE, null);
+    }
+
+    private void assertHighlightOneDoc(String fieldName, String[] inputs, Analyzer analyzer, Query query,
+                                       Locale locale, BreakIterator breakIterator,
+                                       int noMatchSize, String[] expectedPassages,
+                                       int maxAnalyzedOffset, Integer queryMaxAnalyzedOffset) throws Exception {
+        try (Directory dir = newDirectory()){
+            IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
+            iwc.setMergePolicy(newTieredMergePolicy(random()));
+            RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+            FieldType ft = new FieldType(TextField.TYPE_STORED);
+            ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+            ft.freeze();
+            Document doc = new Document();
+            for (String input : inputs) {
+                Field field = new Field(fieldName, "", ft);
+                field.setStringValue(input);
+                doc.add(field);
+            }
+            iw.addDocument(doc);
+            try (DirectoryReader reader = iw.getReader()) {
+                IndexSearcher searcher = newSearcher(reader);
+                iw.close();
+                TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
+                assertThat(topDocs.totalHits.value, equalTo(1L));
+                String rawValue = Strings.arrayToDelimitedString(inputs, String.valueOf(MULTIVAL_SEP_CHAR));
+                CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(
+                        searcher,
+                        analyzer,
+                        UnifiedHighlighter.OffsetSource.ANALYSIS,
+                        new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()),
+                        locale,
+                        breakIterator,
+                        "index",
+                        "text",
+                        query,
+                        noMatchSize,
+                        expectedPassages.length,
+                        name -> "text".equals(name),
+                        maxAnalyzedOffset,
+                        queryMaxAnalyzedOffset
+                );
+                final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);
+                assertEquals(snippets.length, expectedPassages.length);
+                for (int i = 0; i < snippets.length; i++) {
+                    assertEquals(snippets[i].getText(), expectedPassages[i]);
+                }
+            }
         }
-        reader.close();
-        dir.close();
     }
 
     public void testSimple() throws Exception {
@@ -255,4 +266,49 @@ public class CustomUnifiedHighlighterTests extends ESTestCase {
             analyzer, query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 0, outputs);
     }
 
+    public void testExceedMaxAnalyzedOffset() throws Exception {
+        TermQuery query = new TermQuery(new Term("text", "max"));
+        Analyzer analyzer = CustomAnalyzer.builder()
+                .withTokenizer(EdgeNGramTokenizerFactory.class, "minGramSize", "1", "maxGramSize", "10")
+                .build();
+
+        assertHighlightOneDoc("text", new String[] {"short text"},
+                analyzer, query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 0, new String[] {}, 10, null);
+
+        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> {
+            assertHighlightOneDoc("text", new String[] {"exceeds max analyzed offset"},
+                    analyzer, query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 0, new String[] {}, 10, null);
+        });
+        assertEquals(
+            "The length [27] of field [text] in doc[0]/index[index] exceeds the [index.highlight.max_analyzed_offset] limit [10]. "
+                + "To avoid this error, set the query parameter [max_analyzed_offset] to a value less than index setting [10] and this "
+                + "will tolerate long field values by truncating them.",
+            e.getMessage()
+        );
+
+        final Integer queryMaxAnalyzedOffset = randomIntBetween(11, 1000);
+        e = expectThrows(IllegalArgumentException.class, () -> {
+            assertHighlightOneDoc(
+                "text",
+                new String[] { "exceeds max analyzed offset" },
+                analyzer,
+                query,
+                Locale.ROOT,
+                BreakIterator.getSentenceInstance(Locale.ROOT),
+                0,
+                new String[] {},
+                10,
+                queryMaxAnalyzedOffset
+            );
+        });
+        assertEquals(
+            "The length [27] of field [text] in doc[0]/index[index] exceeds the [index.highlight.max_analyzed_offset] limit [10]. "
+                + "To avoid this error, set the query parameter [max_analyzed_offset] to a value less than index setting [10] and this "
+                + "will tolerate long field values by truncating them.",
+            e.getMessage()
+        );
+
+        assertHighlightOneDoc("text", new String[] {"exceeds max analyzed offset"},
+                analyzer, query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 1, new String[] {"exceeds"}, 10, 10);
+    }
 }

+ 75 - 58
server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilderTests.java

@@ -58,6 +58,7 @@ import java.util.function.Function;
 
 import static java.util.Collections.emptyList;
 import static java.util.Collections.emptyMap;
+import static org.elasticsearch.search.fetch.subphase.highlight.AbstractHighlighterBuilder.MAX_ANALYZED_OFFSET_FIELD;
 import static org.elasticsearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
@@ -306,6 +307,7 @@ public class HighlightBuilderTests extends ESTestCase {
                 checkSame.accept(AbstractHighlighterBuilder::postTags, FieldOptions::postTags);
                 checkSame.accept(AbstractHighlighterBuilder::options, FieldOptions::options);
                 checkSame.accept(AbstractHighlighterBuilder::order, op -> op.scoreOrdered() ? Order.SCORE : Order.NONE);
+                checkSame.accept(AbstractHighlighterBuilder::maxAnalyzedOffset, FieldOptions::maxAnalyzedOffset);
                 assertEquals(fieldBuilder.fragmentOffset, fieldOptions.fragmentOffset());
                 if (fieldBuilder.matchedFields != null) {
                     String[] copy = Arrays.copyOf(fieldBuilder.matchedFields, fieldBuilder.matchedFields.length);
@@ -451,6 +453,13 @@ public class HighlightBuilderTests extends ESTestCase {
         assertEquals("pre_tags are set but post_tags are not set", e.getCause().getCause().getMessage());
     }
 
+    public void testInvalidMaxAnalyzedOffset() throws IOException {
+        XContentParseException e = expectParseThrows(XContentParseException.class,
+                "{ \"max_analyzed_offset\" : " + randomIntBetween(-100, 0) + "}");
+        assertThat(e.getMessage(), containsString("[highlight] failed to parse field [" + MAX_ANALYZED_OFFSET_FIELD.toString() +"]"));
+        assertThat(e.getCause().getMessage(), containsString("[max_analyzed_offset] must be a positive integer"));
+    }
+
     /**
      * test ordinals of {@link Order}, since serialization depends on it
      */
@@ -582,6 +591,9 @@ public class HighlightBuilderTests extends ESTestCase {
         if (randomBoolean()) {
             highlightBuilder.phraseLimit(randomIntBetween(0, 10));
         }
+        if (randomBoolean()) {
+            highlightBuilder.maxAnalyzedOffset(randomIntBetween(1, 100));
+        }
         if (randomBoolean()) {
             int items = randomIntBetween(0, 5);
             Map<String, Object> options = new HashMap<>(items);
@@ -608,64 +620,69 @@ public class HighlightBuilderTests extends ESTestCase {
 
     @SuppressWarnings({ "unchecked", "rawtypes" })
     private static void mutateCommonOptions(AbstractHighlighterBuilder highlightBuilder) {
-        switch (randomIntBetween(1, 16)) {
-        case 1:
-            highlightBuilder.preTags(randomStringArray(4, 6));
-            break;
-        case 2:
-            highlightBuilder.postTags(randomStringArray(4, 6));
-            break;
-        case 3:
-            highlightBuilder.fragmentSize(randomIntBetween(101, 200));
-            break;
-        case 4:
-            highlightBuilder.numOfFragments(randomIntBetween(11, 20));
-            break;
-        case 5:
-            highlightBuilder.highlighterType(randomAlphaOfLengthBetween(11, 20));
-            break;
-        case 6:
-            highlightBuilder.fragmenter(randomAlphaOfLengthBetween(11, 20));
-            break;
-        case 7:
-            highlightBuilder.highlightQuery(new TermQueryBuilder(randomAlphaOfLengthBetween(11, 20), randomAlphaOfLengthBetween(11, 20)));
-            break;
-        case 8:
-            if (highlightBuilder.order() == Order.NONE) {
-                highlightBuilder.order(Order.SCORE);
-            } else {
-                highlightBuilder.order(Order.NONE);
-            }
-            break;
-        case 9:
-            highlightBuilder.highlightFilter(toggleOrSet(highlightBuilder.highlightFilter()));
-            break;
-        case 10:
-            highlightBuilder.forceSource(toggleOrSet(highlightBuilder.forceSource()));
-            break;
-        case 11:
-            highlightBuilder.boundaryMaxScan(randomIntBetween(11, 20));
-            break;
-        case 12:
-            highlightBuilder.boundaryChars(randomAlphaOfLengthBetween(11, 20).toCharArray());
-            break;
-        case 13:
-            highlightBuilder.noMatchSize(randomIntBetween(11, 20));
-            break;
-        case 14:
-            highlightBuilder.phraseLimit(randomIntBetween(11, 20));
-            break;
-        case 15:
-            int items = 6;
-            Map<String, Object> options = new HashMap<>(items);
-            for (int i = 0; i < items; i++) {
-                options.put(randomAlphaOfLengthBetween(1, 10), randomAlphaOfLengthBetween(1, 10));
-            }
-            highlightBuilder.options(options);
-            break;
-        case 16:
-            highlightBuilder.requireFieldMatch(toggleOrSet(highlightBuilder.requireFieldMatch()));
-            break;
+        switch (randomIntBetween(1, 17)) {
+            case 1:
+                highlightBuilder.preTags(randomStringArray(4, 6));
+                break;
+            case 2:
+                highlightBuilder.postTags(randomStringArray(4, 6));
+                break;
+            case 3:
+                highlightBuilder.fragmentSize(randomIntBetween(101, 200));
+                break;
+            case 4:
+                highlightBuilder.numOfFragments(randomIntBetween(11, 20));
+                break;
+            case 5:
+                highlightBuilder.highlighterType(randomAlphaOfLengthBetween(11, 20));
+                break;
+            case 6:
+                highlightBuilder.fragmenter(randomAlphaOfLengthBetween(11, 20));
+                break;
+            case 7:
+                highlightBuilder.highlightQuery(
+                    new TermQueryBuilder(randomAlphaOfLengthBetween(11, 20), randomAlphaOfLengthBetween(11, 20))
+                );
+                break;
+            case 8:
+                if (highlightBuilder.order() == Order.NONE) {
+                    highlightBuilder.order(Order.SCORE);
+                } else {
+                    highlightBuilder.order(Order.NONE);
+                }
+                break;
+            case 9:
+                highlightBuilder.highlightFilter(toggleOrSet(highlightBuilder.highlightFilter()));
+                break;
+            case 10:
+                highlightBuilder.forceSource(toggleOrSet(highlightBuilder.forceSource()));
+                break;
+            case 11:
+                highlightBuilder.boundaryMaxScan(randomIntBetween(11, 20));
+                break;
+            case 12:
+                highlightBuilder.boundaryChars(randomAlphaOfLengthBetween(11, 20).toCharArray());
+                break;
+            case 13:
+                highlightBuilder.noMatchSize(randomIntBetween(11, 20));
+                break;
+            case 14:
+                highlightBuilder.phraseLimit(randomIntBetween(11, 20));
+                break;
+            case 15:
+                int items = 6;
+                Map<String, Object> options = new HashMap<>(items);
+                for (int i = 0; i < items; i++) {
+                    options.put(randomAlphaOfLengthBetween(1, 10), randomAlphaOfLengthBetween(1, 10));
+                }
+                highlightBuilder.options(options);
+                break;
+            case 16:
+                highlightBuilder.requireFieldMatch(toggleOrSet(highlightBuilder.requireFieldMatch()));
+                break;
+            case 17:
+                highlightBuilder.maxAnalyzedOffset(randomIntBetween(1, 100));
+                break;
         }
     }