Browse Source

Bug fix for AnnotatedTextHighlighter - port of 39525 (#39747)

Bug fix for AnnotatedTextHighlighter - port of 39525
Relates to #39395
markharwood 6 years ago
parent
commit
a1fcb8a7e6

+ 7 - 38
plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java

@@ -54,6 +54,7 @@ import org.elasticsearch.index.mapper.StringFieldType;
 import org.elasticsearch.index.mapper.TextFieldMapper;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;
 import org.elasticsearch.index.query.QueryShardContext;
+import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
 
 import java.io.IOException;
 import java.io.Reader;
@@ -315,45 +316,12 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
     // When asked to tokenize plain-text versions by the highlighter it tokenizes the
     // original markup form in order to inject annotations.
     public static final class AnnotatedHighlighterAnalyzer extends AnalyzerWrapper {
-        private Analyzer delegate;
-        private AnnotatedText[] annotations;
-        public AnnotatedHighlighterAnalyzer(Analyzer delegate){
+        private final Analyzer delegate;
+        private final HitContext hitContext;
+        public AnnotatedHighlighterAnalyzer(Analyzer delegate, HitContext hitContext){
             super(delegate.getReuseStrategy());
             this.delegate = delegate;
-        }
-
-        public void init(String[] markedUpFieldValues) {
-            this.annotations = new AnnotatedText[markedUpFieldValues.length];
-            for (int i = 0; i < markedUpFieldValues.length; i++) {
-                annotations[i] = AnnotatedText.parse(markedUpFieldValues[i]);
-            }
-        }
-
-        public String []  getPlainTextValuesForHighlighter(){
-            String [] result = new String[annotations.length];
-            for (int i = 0; i < annotations.length; i++) {
-                result[i] = annotations[i].textMinusMarkup;
-            }
-            return result;
-        }
-
-        public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
-            List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
-            int fieldValueOffset =0;
-            for (AnnotatedText fieldValueAnnotations : this.annotations) {
-                //This is called from a highlighter where all of the field values are concatenated
-                // so each annotation offset will need to be adjusted so that it takes into account
-                // the previous values AND the MULTIVAL delimiter
-                for (AnnotationToken token : fieldValueAnnotations.annotations) {
-                    if(token.intersects(start - fieldValueOffset , end - fieldValueOffset)) {
-                        intersectingAnnotations.add(new AnnotationToken(token.offset + fieldValueOffset,
-                                token.endOffset + fieldValueOffset, token.value));
-                    }
-                }
-                //add 1 for the fieldvalue separator character
-                fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
-            }
-            return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
+            this.hitContext = hitContext;
         }
 
         @Override
@@ -364,10 +332,11 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
         @Override
         protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
             AnnotationsInjector injector = new AnnotationsInjector(components.getTokenStream());
+            AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName());
             AtomicInteger readerNum = new AtomicInteger(0);
             return new TokenStreamComponents(r -> {
                 String plainText = readToString(r);
-                AnnotatedText at = this.annotations[readerNum.getAndIncrement()];
+                AnnotatedText at = annotations[readerNum.getAndIncrement()];
                 assert at.textMinusMarkup.equals(plainText);
                 injector.setAnnotations(at);
                 components.getSource().accept(new StringReader(at.textMinusMarkup));

+ 26 - 5
plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java

@@ -23,7 +23,7 @@ import org.apache.lucene.search.highlight.Encoder;
 import org.apache.lucene.search.uhighlight.Passage;
 import org.apache.lucene.search.uhighlight.PassageFormatter;
 import org.apache.lucene.search.uhighlight.Snippet;
-import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;
 
 import java.io.UnsupportedEncodingException;
@@ -42,11 +42,11 @@ public class AnnotatedPassageFormatter extends PassageFormatter {
 
     public static final String SEARCH_HIT_TYPE = "_hit_term";
     private final Encoder encoder;
-    private AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer;
+    AnnotatedText[] annotations;
 
-    public AnnotatedPassageFormatter(AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer, Encoder encoder) {
-        this.annotatedHighlighterAnalyzer = annotatedHighlighterAnalyzer;
+    public AnnotatedPassageFormatter(AnnotatedText[] annotations, Encoder encoder) {
         this.encoder = encoder;
+        this.annotations = annotations;
     }
 
     static class MarkupPassage {
@@ -158,7 +158,7 @@ public class AnnotatedPassageFormatter extends PassageFormatter {
         int pos;
         int j = 0;
         for (Passage passage : passages) {
-            AnnotationToken [] annotations = annotatedHighlighterAnalyzer.getIntersectingAnnotations(passage.getStartOffset(), 
+            AnnotationToken [] annotations = getIntersectingAnnotations(passage.getStartOffset(), 
                     passage.getEndOffset());            
             MarkupPassage mergedMarkup = mergeAnnotations(annotations, passage);
             
@@ -194,6 +194,27 @@ public class AnnotatedPassageFormatter extends PassageFormatter {
         }                    
         return snippets;
     }
+    
+    public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
+        List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
+        int fieldValueOffset =0;
+        for (AnnotatedText fieldValueAnnotations : this.annotations) {
+            //This is called from a highlighter where all of the field values are concatenated
+            // so each annotation offset will need to be adjusted so that it takes into account 
+            // the previous values AND the MULTIVAL delimiter
+            for (int i = 0; i < fieldValueAnnotations.numAnnotations(); i++) {
+                AnnotationToken token = fieldValueAnnotations.getAnnotation(i);
+                if (token.intersects(start - fieldValueOffset, end - fieldValueOffset)) {
+                    intersectingAnnotations
+                            .add(new AnnotationToken(token.offset + fieldValueOffset, token.endOffset + 
+                                    fieldValueOffset, token.value));
+                }
+            }
+            //add 1 for the fieldvalue separator character
+            fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
+        }
+        return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
+    }     
 
     private void append(StringBuilder dest, String content, int start, int end) {
         dest.append(encoder.encodeText(content.substring(start, end)));

+ 21 - 11
plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java

@@ -25,24 +25,22 @@ import org.apache.lucene.search.uhighlight.PassageFormatter;
 import org.elasticsearch.index.mapper.DocumentMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
 import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
 import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field;
 import org.elasticsearch.search.internal.SearchContext;
 
 import java.io.IOException;
-import java.util.Arrays;
+import java.util.ArrayList;
 import java.util.List;
 
 public class AnnotatedTextHighlighter extends UnifiedHighlighter {
     
     public static final String NAME = "annotated";
-
-    AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer = null;    
     
     @Override
-    protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
-        annotatedHighlighterAnalyzer = new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type));
-        return annotatedHighlighterAnalyzer;
+    protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) {
+        return new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type, hitContext), hitContext);
     }
 
     // Convert the marked-up values held on-disk to plain-text versions for highlighting
@@ -51,14 +49,26 @@ public class AnnotatedTextHighlighter extends UnifiedHighlighter {
             throws IOException {
         List<Object> fieldValues = super.loadFieldValues(fieldType, field, context, hitContext);
         String[] fieldValuesAsString = fieldValues.toArray(new String[fieldValues.size()]);
-        annotatedHighlighterAnalyzer.init(fieldValuesAsString);
-        return Arrays.asList((Object[]) annotatedHighlighterAnalyzer.getPlainTextValuesForHighlighter());
+        
+        AnnotatedText[] annotations = new AnnotatedText[fieldValuesAsString.length];
+        for (int i = 0; i < fieldValuesAsString.length; i++) {
+            annotations[i] = AnnotatedText.parse(fieldValuesAsString[i]);
+        }
+        // Store the annotations in the hitContext
+        hitContext.cache().put(AnnotatedText.class.getName(), annotations);
+        
+        ArrayList<Object> result = new ArrayList<>(annotations.length);
+        for (int i = 0; i < annotations.length; i++) {
+            result.add(annotations[i].textMinusMarkup);
+        }
+        return result;
     }
 
     @Override
-    protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) {
-        return new AnnotatedPassageFormatter(annotatedHighlighterAnalyzer, encoder);
-
+    protected PassageFormatter getPassageFormatter(HitContext hitContext,SearchContextHighlight.Field field, Encoder encoder) {
+        // Retrieve the annotations from the hitContext
+        AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName());
+        return new AnnotatedPassageFormatter(annotations, encoder);
     }
 
 }

+ 19 - 6
plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java

@@ -40,18 +40,20 @@ import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.highlight.DefaultEncoder;
 import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator;
 import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter;
-import org.apache.lucene.search.uhighlight.PassageFormatter;
 import org.apache.lucene.search.uhighlight.Snippet;
 import org.apache.lucene.search.uhighlight.SplittingBreakIterator;
 import org.apache.lucene.store.Directory;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
+import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
 import org.elasticsearch.search.fetch.subphase.highlight.AnnotatedPassageFormatter;
 import org.elasticsearch.test.ESTestCase;
 
 import java.net.URLEncoder;
 import java.text.BreakIterator;
+import java.util.ArrayList;
 import java.util.Locale;
 
 import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
@@ -63,13 +65,24 @@ public class AnnotatedTextHighlighterTests extends ESTestCase {
             Query query, Locale locale, BreakIterator breakIterator,
             int noMatchSize, String[] expectedPassages) throws Exception {
         
+        
         // Annotated fields wrap the usual analyzer with one that injects extra tokens
         Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer());
-        AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
-        hiliteAnalyzer.init(markedUpInputs);
-        PassageFormatter passageFormatter = new AnnotatedPassageFormatter(hiliteAnalyzer,new DefaultEncoder());
-        String []plainTextForHighlighter = hiliteAnalyzer.getPlainTextValuesForHighlighter();
+        HitContext mockHitContext = new HitContext();
+        AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer, mockHitContext);
+        
+        AnnotatedText[] annotations = new AnnotatedText[markedUpInputs.length];
+        for (int i = 0; i < markedUpInputs.length; i++) {
+            annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
+        }
+        mockHitContext.cache().put(AnnotatedText.class.getName(), annotations);
 
+        AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(annotations,new DefaultEncoder());
+        
+        ArrayList<Object> plainTextForHighlighter = new ArrayList<>(annotations.length);
+        for (int i = 0; i < annotations.length; i++) {
+            plainTextForHighlighter.add(annotations[i].textMinusMarkup);
+        }        
         
         Directory dir = newDirectory();
         IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer);
@@ -94,7 +107,7 @@ public class AnnotatedTextHighlighterTests extends ESTestCase {
         iw.close();
         TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
         assertThat(topDocs.totalHits.value, equalTo(1L));
-        String rawValue = Strings.arrayToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
+        String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
         
         CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, hiliteAnalyzer, null,
                 passageFormatter, locale,

+ 74 - 0
plugins/mapper-annotated-text/src/test/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml

@@ -42,3 +42,77 @@
         body: { "query" : {"term" : { "text" : "quick" } }, "highlight" : { "type" : "annotated", "require_field_match": false, "fields" : { "text" : {} } } }
 
   - match: {hits.hits.0.highlight.text.0: "The [quick](_hit_term=quick) brown fox is brown."}
+
+---
+"issue 39395 thread safety issue -requires multiple calls to reveal":
+  - skip:
+      version: " - 6.4.99"
+      reason: Annotated text type introduced in 6.5.0
+
+  - do:
+      indices.create:
+        index: annotated
+        body:
+          settings:
+            number_of_shards: "5"
+            number_of_replicas: "0"
+          mappings:
+            properties:
+              my_field:
+                type: annotated_text
+
+  - do:
+      index:
+        index: annotated
+        id: 1
+        body: 
+            "my_field" : "[A](~MARK0&~MARK0) [B](~MARK1)"
+  - do:
+      index:
+        index: annotated
+        id: 2
+        body: 
+            "my_field" : "[A](~MARK0) [C](~MARK2)"
+        refresh: true
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+

+ 0 - 1
server/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java

@@ -74,7 +74,6 @@ public interface FetchSubPhase {
             }
             return cache;
         }
-
     }
 
     /**

+ 6 - 4
server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java

@@ -38,6 +38,7 @@ import org.elasticsearch.index.mapper.DocumentMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
 import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
 import org.elasticsearch.search.internal.SearchContext;
 
 import java.io.IOException;
@@ -68,12 +69,13 @@ public class UnifiedHighlighter implements Highlighter {
         int numberOfFragments;
         try {
 
-            final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType);
+            final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType,
+                    hitContext);
             List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext);
             if (fieldValues.size() == 0) {
                 return null;
             }
-            final PassageFormatter passageFormatter = getPassageFormatter(field, encoder);
+            final PassageFormatter passageFormatter = getPassageFormatter(hitContext, field, encoder);
             final IndexSearcher searcher = new IndexSearcher(hitContext.reader());
             final CustomUnifiedHighlighter highlighter;
             final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
@@ -140,14 +142,14 @@ public class UnifiedHighlighter implements Highlighter {
         return null;
     }
 
-    protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) {
+    protected PassageFormatter getPassageFormatter(HitContext hitContext, SearchContextHighlight.Field field, Encoder encoder) {
         CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0],
             field.fieldOptions().postTags()[0], encoder);
         return passageFormatter;
     }
 
     
-    protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
+    protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) {
         return HighlightUtils.getAnalyzer(docMapper, type);
     }