Procházet zdrojové kódy

Added support for highlighting multi term queries using the postings highlighter

Closes #4042
Luca Cavanna před 12 roky
rodič
revize
5474cffe8f

+ 42 - 8
src/main/java/org/elasticsearch/search/highlight/PostingsHighlighter.java

@@ -21,10 +21,11 @@ package org.elasticsearch.search.highlight;
 import com.google.common.collect.Maps;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiReader;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MultiTermQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoringRewrite;
+import org.apache.lucene.search.TopTermsRewrite;
 import org.apache.lucene.search.highlight.Encoder;
 import org.apache.lucene.search.postingshighlight.CustomPassageFormatter;
 import org.apache.lucene.search.postingshighlight.CustomPostingsHighlighter;
@@ -67,9 +68,10 @@ public class PostingsHighlighter implements Highlighter {
         FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
 
         if (!hitContext.cache().containsKey(CACHE_KEY)) {
+            //get the non rewritten query and rewrite it
             Query query;
             try {
-                query = rewrite(context.query());
+                query = rewrite(context, hitContext.topLevelReader());
             } catch (IOException e) {
                 throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
             }
@@ -107,7 +109,7 @@ public class PostingsHighlighter implements Highlighter {
             //we highlight every value separately calling the highlight method multiple times, only if we need to have back a snippet per value (whole value)
             int values = mergeValues ? 1 : textsToHighlight.size();
             for (int i = 0; i < values; i++) {
-                Snippet[] fieldSnippets = highlighter.highlightDoc(highlighterContext.fieldName, mapperHighlighterEntry.filteredQueryTerms, new IndexSearcher(hitContext.reader()), hitContext.docId(), numberOfFragments);
+                Snippet[] fieldSnippets = highlighter.highlightDoc(highlighterContext.fieldName, mapperHighlighterEntry.filteredQueryTerms, context.searcher(), hitContext.docId(), numberOfFragments);
                 if (fieldSnippets != null) {
                     for (Snippet fieldSnippet : fieldSnippets) {
                         if (Strings.hasText(fieldSnippet.getText())) {
@@ -144,17 +146,49 @@ public class PostingsHighlighter implements Highlighter {
         return null;
     }
 
-    private static final IndexReader EMPTY_INDEXREADER = new MultiReader();
+    private static Query rewrite(SearchContext searchContext, IndexReader reader) throws IOException {
+        //rewrite is expensive: if the query was already rewritten we try not to rewrite
+        boolean mustRewrite = !searchContext.queryRewritten();
+
+        Query original = searchContext.parsedQuery().query();
+
+        MultiTermQuery originalMultiTermQuery = null;
+        MultiTermQuery.RewriteMethod originalRewriteMethod = null;
+        if (original instanceof MultiTermQuery) {
+            originalMultiTermQuery = (MultiTermQuery) original;
+            if (!allowsForTermExtraction(originalMultiTermQuery.getRewriteMethod())) {
+                originalRewriteMethod = originalMultiTermQuery.getRewriteMethod();
+                originalMultiTermQuery.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50));
+                //we need to rewrite anyway if it is a multi term query which was rewritten with the wrong rewrite method
+                mustRewrite = true;
+            }
+        }
+
+        if (!mustRewrite) {
+            //return the rewritten query
+            return searchContext.query();
+        }
 
-    private static Query rewrite(Query original) throws IOException {
         Query query = original;
-        for (Query rewrittenQuery = query.rewrite(EMPTY_INDEXREADER); rewrittenQuery != query;
-             rewrittenQuery = query.rewrite(EMPTY_INDEXREADER)) {
+        for (Query rewrittenQuery = query.rewrite(reader); rewrittenQuery != query;
+             rewrittenQuery = query.rewrite(reader)) {
             query = rewrittenQuery;
         }
+
+        if (originalMultiTermQuery != null) {
+            if (originalRewriteMethod != null) {
+                //set back the original rewrite method after the rewrite is done
+                originalMultiTermQuery.setRewriteMethod(originalRewriteMethod);
+            }
+        }
+
         return query;
     }
 
+    private static boolean allowsForTermExtraction(MultiTermQuery.RewriteMethod rewriteMethod) {
+        return rewriteMethod instanceof TopTermsRewrite || rewriteMethod instanceof ScoringRewrite;
+    }
+
     private static SortedSet<Term> extractTerms(Query query) {
         SortedSet<Term> queryTerms = new TreeSet<Term>();
         query.extractTerms(queryTerms);

+ 138 - 8
src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java

@@ -2019,7 +2019,7 @@ public class HighlighterSearchTests extends AbstractIntegrationTest {
         ensureGreen();
 
         client().prepareIndex("test", "type1")
-                .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog").setRefresh(true).get();
+                .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy quick dog").setRefresh(true).get();
 
         logger.info("--> highlighting and searching on field1");
         SearchSourceBuilder source = searchSource()
@@ -2049,28 +2049,28 @@ public class HighlighterSearchTests extends AbstractIntegrationTest {
         searchResponse = client().search(searchRequest("test").source(source)).actionGet();
         assertHitCount(searchResponse, 1l);
 
-        assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(), equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
+        assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(), equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy <xxx>quick</xxx> dog"));
 
         logger.info("--> searching on _all, highlighting on field2");
         source = searchSource()
-                .query(prefixQuery("_all", "qui"))
+                .query(matchPhraseQuery("_all", "quick brown"))
                 .highlight(highlight().field("field2").preTags("<xxx>").postTags("</xxx>"));
 
         searchResponse = client().search(searchRequest("test").source(source)).actionGet();
         assertHitCount(searchResponse, 1l);
-        //no snippets produced for prefix query, not supported by postings highlighter
-        assertThat(searchResponse.getHits().getAt(0).highlightFields().size(), equalTo(0));
+        //phrase query results in highlighting all different terms regardless of their positions
+        assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(), equalTo("The <xxx>quick</xxx> <xxx>brown</xxx> fox jumps over the lazy <xxx>quick</xxx> dog"));
 
-        //lets fall back to the standard highlighter then, what people would do with unsupported queries
+        //lets fall back to the standard highlighter then, what people would do to highlight query matches
         logger.info("--> searching on _all, highlighting on field2, falling back to the plain highlighter");
         source = searchSource()
-                .query(prefixQuery("_all", "qui"))
+                .query(matchPhraseQuery("_all", "quick brown"))
                 .highlight(highlight().field("field2").preTags("<xxx>").postTags("</xxx>").highlighterType("highlighter"));
 
         searchResponse = client().search(searchRequest("test").source(source)).actionGet();
         assertHitCount(searchResponse, 1l);
 
-        assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(), equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
+        assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(), equalTo("The <xxx>quick</xxx> <xxx>brown</xxx> fox jumps over the lazy quick dog"));
     }
 
     @Test
@@ -2486,6 +2486,136 @@ public class HighlighterSearchTests extends AbstractIntegrationTest {
                 .endObject().endObject();
     }
 
+    private static final String[] REWRITE_METHODS = new String[]{"constant_score_auto", "scoring_boolean", "constant_score_boolean",
+            "constant_score_filter", "top_terms_boost_50", "top_terms_50"};
+
+    @Test
+    public void testPostingsHighlighterPrefixQuery() throws Exception {
+        assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
+        ensureGreen();
+
+        client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
+        refresh();
+        logger.info("--> highlighting and searching on field2");
+
+        for (String rewriteMethod : REWRITE_METHODS) {
+            SearchSourceBuilder source = searchSource().query(prefixQuery("field2", "qui").rewrite(rewriteMethod))
+                    .highlight(highlight().field("field2"));
+            SearchResponse searchResponse = client().search(searchRequest("test").source(source)
+                    .searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
+            assertHitCount(searchResponse, 1l);
+
+            assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
+                    equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
+        }
+    }
+
+    @Test
+    public void testPostingsHighlighterFuzzyQuery() throws Exception {
+        assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
+        ensureGreen();
+
+        client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
+        refresh();
+        logger.info("--> highlighting and searching on field2");
+        SearchSourceBuilder source = searchSource().query(fuzzyQuery("field2", "quck"))
+                .highlight(highlight().field("field2"));
+        SearchResponse searchResponse = client().search(searchRequest("test").source(source)
+                .searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
+        assertHitCount(searchResponse, 1l);
+
+        assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
+                equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
+    }
+
+    @Test
+    public void testPostingsHighlighterRegexpQuery() throws Exception {
+        assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
+        ensureGreen();
+
+        client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
+        refresh();
+        logger.info("--> highlighting and searching on field2");
+        for (String rewriteMethod : REWRITE_METHODS) {
+            SearchSourceBuilder source = searchSource().query(regexpQuery("field2", "qu[a-l]+k").rewrite(rewriteMethod))
+                    .highlight(highlight().field("field2"));
+            SearchResponse searchResponse = client().search(searchRequest("test").source(source)
+                    .searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
+            assertHitCount(searchResponse, 1l);
+
+            assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
+                    equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
+        }
+    }
+
+    @Test
+    public void testPostingsHighlighterWildcardQuery() throws Exception {
+        assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
+        ensureGreen();
+
+        client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
+        refresh();
+        logger.info("--> highlighting and searching on field2");
+        for (String rewriteMethod : REWRITE_METHODS) {
+            SearchSourceBuilder source = searchSource().query(wildcardQuery("field2", "qui*").rewrite(rewriteMethod))
+                    .highlight(highlight().field("field2"));
+            SearchResponse searchResponse = client().search(searchRequest("test").source(source)
+                    .searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
+            assertHitCount(searchResponse, 1l);
+
+            assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
+                    equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
+
+            source = searchSource().query(wildcardQuery("field2", "qu*k").rewrite(rewriteMethod))
+                    .highlight(highlight().field("field2"));
+            searchResponse = client().search(searchRequest("test").source(source)
+                    .searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
+            assertHitCount(searchResponse, 1l);
+
+            assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
+                    equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
+
+        }
+    }
+
+    @Test
+    public void testPostingsHighlighterTermRangeQuery() throws Exception {
+        assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
+        ensureGreen();
+
+        client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "aaab").get();
+        refresh();
+        logger.info("--> highlighting and searching on field2");
+        SearchSourceBuilder source = searchSource().query(rangeQuery("field2").gte("aaaa").lt("zzzz"))
+                .highlight(highlight().field("field2"));
+        SearchResponse searchResponse = client().search(searchRequest("test").source(source)
+                .searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
+        assertHitCount(searchResponse, 1l);
+
+        assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
+                equalTo("<em>aaab</em>"));
+    }
+
+    @Test
+    public void testPostingsHighlighterQueryString() throws Exception {
+        assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
+        ensureGreen();
+
+        client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
+        refresh();
+        logger.info("--> highlighting and searching on field2");
+        for (String rewriteMethod : REWRITE_METHODS) {
+            SearchSourceBuilder source = searchSource().query(queryString("qui*").defaultField("field2").rewrite(rewriteMethod))
+                    .highlight(highlight().field("field2"));
+            SearchResponse searchResponse = client().search(searchRequest("test").source(source)
+                    .searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
+            assertHitCount(searchResponse, 1l);
+
+            assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
+                    equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
+        }
+    }
+
     @Test
     @Slow
     public void testPostingsHighlighterManyDocs() throws Exception {