|
@@ -32,11 +32,11 @@ import org.apache.lucene.util.BytesRef;
|
|
|
import org.apache.lucene.util.CollectionUtil;
|
|
|
import org.elasticsearch.common.Strings;
|
|
|
import org.elasticsearch.common.text.Text;
|
|
|
+import org.elasticsearch.index.IndexSettings;
|
|
|
import org.elasticsearch.index.mapper.DocumentMapper;
|
|
|
import org.elasticsearch.index.mapper.FieldMapper;
|
|
|
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
|
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
|
|
-import org.elasticsearch.index.mapper.MapperService;
|
|
|
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
|
|
|
import org.elasticsearch.search.fetch.FetchSubPhase;
|
|
|
import org.elasticsearch.search.internal.SearchContext;
|
|
@@ -44,10 +44,8 @@ import org.elasticsearch.search.internal.SearchContext;
|
|
|
import java.io.IOException;
|
|
|
import java.text.BreakIterator;
|
|
|
import java.util.ArrayList;
|
|
|
-import java.util.HashMap;
|
|
|
import java.util.List;
|
|
|
import java.util.Locale;
|
|
|
-import java.util.Map;
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
|
import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
|
|
@@ -83,21 +81,28 @@ public class UnifiedHighlighter implements Highlighter {
|
|
|
final CustomUnifiedHighlighter highlighter;
|
|
|
final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
|
|
|
final OffsetSource offsetSource = getOffsetSource(fieldMapper.fieldType());
|
|
|
+ if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValue.length() > maxAnalyzedOffset)) {
|
|
|
+ throw new IllegalArgumentException(
|
|
|
+ "The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
|
|
|
+ "] doc of [" + context.indexShard().shardId().getIndexName() + "] index " + "has exceeded [" +
|
|
|
+ maxAnalyzedOffset + "] - maximum allowed to be analyzed for highlighting. " +
|
|
|
+ "This maximum can be set by changing the [" + IndexSettings.MAX_ANALYZED_OFFSET_SETTING.getKey() +
|
|
|
+ "] index level setting. " + "For large texts, indexing with offsets or term vectors is recommended!");
|
|
|
+ }
|
|
|
if (field.fieldOptions().numberOfFragments() == 0) {
|
|
|
// we use a control char to separate values, which is the only char that the custom break iterator
|
|
|
// breaks the text on, so we don't lose the distinction between the different values of a field and we
|
|
|
// get back a snippet per value
|
|
|
CustomSeparatorBreakIterator breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
|
|
|
highlighter = new CustomUnifiedHighlighter(searcher, analyzer, offsetSource, passageFormatter,
|
|
|
- field.fieldOptions().boundaryScannerLocale(), breakIterator, fieldValue, field.fieldOptions().noMatchSize(),
|
|
|
- maxAnalyzedOffset);
|
|
|
+ field.fieldOptions().boundaryScannerLocale(), breakIterator, fieldValue, field.fieldOptions().noMatchSize());
|
|
|
numberOfFragments = fieldValues.size(); // we are highlighting the whole content, one snippet per value
|
|
|
} else {
|
|
|
//using paragraph separator we make sure that each field value holds a discrete passage for highlighting
|
|
|
BreakIterator bi = getBreakIterator(field);
|
|
|
highlighter = new CustomUnifiedHighlighter(searcher, analyzer, offsetSource, passageFormatter,
|
|
|
field.fieldOptions().boundaryScannerLocale(), bi,
|
|
|
- fieldValue, field.fieldOptions().noMatchSize(), maxAnalyzedOffset);
|
|
|
+ fieldValue, field.fieldOptions().noMatchSize());
|
|
|
numberOfFragments = field.fieldOptions().numberOfFragments();
|
|
|
}
|
|
|
|