Răsfoiți Sursa

PhraseSuggester: Collate option should allow returning phrases with no matching docs

A new option `prune` has been added to allow users to control phrase suggestion pruning when `collate`
is set. If the new option is set, the phrase suggestion option will contain a boolean `collate_match`
indicating whether the respective result had hits in collation.

CLoses #6927
Areek Zillur 11 ani în urmă
părinte
comite
f39d4e1f89

+ 11 - 2
docs/reference/search/suggesters/phrase-suggest.asciidoc

@@ -169,9 +169,14 @@ can contain misspellings (See parameter descriptions below).
     automatically made available as the `{{suggestion}}`  variable, which
     should be used in your query/filter.  You can still specify  your own
     template `params` -- the `suggestion` value will be added to the
-    variables you specify. You can also specify a `preference` to control
+    variables you specify. You can specify a `preference` to control
     on which shards the query is executed (see <<search-request-preference>>).
-    The default value is `_only_local`.
+    The default value is `_only_local`. Additionally, you can specify
+    a `prune` to control if all phrase suggestions will be
+    returned, when set to `true` the suggestions will have an additional
+    option `collate_match`, which will be `true` if matching documents
+    for the phrase was found, `false` otherwise. The default value for
+    `prune` is `false`.
 
 [source,js]
 --------------------------------------------------
@@ -195,6 +200,7 @@ curl -XPOST 'localhost:9200/_search' -d {
            },
            "params": {"field_name" : "title"}, <3>
            "preference": "_primary", <4>
+           "prune": true <5>
          }
        }
      }
@@ -207,6 +213,9 @@ curl -XPOST 'localhost:9200/_search' -d {
 <3> An additional `field_name` variable has been specified in
     `params` and is used by the `match` query.
 <4> The default `preference` has been changed to `_primary`.
+<5> All suggestions will be returned with an extra `collate_match`
+    option indicating whether the generated phrase matched any
+    document.
 
 ==== Smoothing Models
 

+ 28 - 1
src/main/java/org/elasticsearch/search/suggest/Suggest.java

@@ -21,6 +21,7 @@ package org.elasticsearch.search.suggest;
 import org.apache.lucene.util.CollectionUtil;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ElasticsearchIllegalStateException;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Streamable;
@@ -521,17 +522,24 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
                     static final XContentBuilderString TEXT = new XContentBuilderString("text");
                     static final XContentBuilderString HIGHLIGHTED = new XContentBuilderString("highlighted");
                     static final XContentBuilderString SCORE = new XContentBuilderString("score");
+                    static final XContentBuilderString COLLATE_MATCH = new XContentBuilderString("collate_match");
 
                 }
 
                 private Text text;
                 private Text highlighted;
                 private float score;
+                private Boolean collateMatch;
 
-                public Option(Text text, Text highlighted, float score) {
+                public Option(Text text, Text highlighted, float score, Boolean collateMatch) {
                     this.text = text;
                     this.highlighted = highlighted;
                     this.score = score;
+                    this.collateMatch = collateMatch;
+                }
+
+                public Option(Text text, Text highlighted, float score) {
+                    this(text, highlighted, score, null);
                 }
 
                 public Option(Text text, float score) {
@@ -562,6 +570,14 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
                 public float getScore() {
                     return score;
                 }
+
+                /**
+                 * @return true if collation has found a match for the entry.
+                 * if collate was not set, the value defaults to <code>true</code>
+                 */
+                public boolean collateMatch() {
+                    return (collateMatch != null) ? collateMatch : true;
+                }
                 
                 protected void setScore(float score) {
                     this.score = score;
@@ -572,6 +588,10 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
                     text = in.readText();
                     score = in.readFloat();
                     highlighted = in.readOptionalText();
+
+                    if (in.getVersion().onOrAfter(Version.V_1_4_0)) {
+                        collateMatch = in.readOptionalBoolean();
+                    }
                 }
 
                 @Override
@@ -579,6 +599,10 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
                     out.writeText(text);
                     out.writeFloat(score);
                     out.writeOptionalText(highlighted);
+
+                    if (out.getVersion().onOrAfter(Version.V_1_4_0)) {
+                        out.writeOptionalBoolean(collateMatch);
+                    }
                 }
 
                 @Override
@@ -595,6 +619,9 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
                         builder.field(Fields.HIGHLIGHTED, highlighted);
                     }
                     builder.field(Fields.SCORE, score);
+                    if (collateMatch != null) {
+                        builder.field(Fields.COLLATE_MATCH, collateMatch.booleanValue());
+                    }
                     return builder;
                 }
                 

+ 6 - 0
src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java

@@ -158,6 +158,12 @@ public final class PhraseSuggestParser implements SuggestContextParser {
                             suggestion.setPreference(parser.text());
                         } else if ("params".equals(fieldName)) {
                             suggestion.setCollateScriptParams(parser.map());
+                        } else if ("prune".equals(fieldName)) {
+                            if (parser.isBooleanValue()) {
+                                suggestion.setCollatePrune(parser.booleanValue());
+                            } else {
+                                throw new ElasticsearchIllegalArgumentException("suggester[phrase][collate] prune must be either 'true' or 'false'");
+                            }
                         } else {
                             throw new ElasticsearchIllegalArgumentException(
                                     "suggester[phrase][collate] doesn't support field [" + fieldName + "]");

+ 9 - 2
src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java

@@ -106,9 +106,12 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
             BytesRef byteSpare = new BytesRef();
 
             MultiSearchResponse multiSearchResponse = collate(suggestion, checkerResult, byteSpare, spare);
+            final boolean collateEnabled = multiSearchResponse != null;
+            final boolean collatePrune = suggestion.collatePrune();
 
             for (int i = 0; i < checkerResult.corrections.length; i++) {
-                if (!hasMatchingDocs(multiSearchResponse, i)) {
+                boolean collateMatch = hasMatchingDocs(multiSearchResponse, i);
+                if (!collateMatch && !collatePrune) {
                     continue;
                 }
                 Correction correction = checkerResult.corrections[i];
@@ -119,7 +122,11 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
                     UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()), spare);
                     highlighted = new StringText(spare.toString());
                 }
-                resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
+                if (collateEnabled && collatePrune) {
+                    resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score), collateMatch));
+                } else {
+                    resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
+                }
             }
         } else {
             response.addTerm(buildResultEntry(suggestion, spare, Double.MIN_VALUE));

+ 12 - 0
src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java

@@ -46,6 +46,7 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
     private String collateFilter;
     private String collatePreference;
     private Map<String, Object> collateParams;
+    private Boolean collatePrune;
 
     public PhraseSuggestionBuilder(String name) {
         super(name, "phrase");
@@ -202,6 +203,14 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
         return this;
     }
 
+    /**
+     * Sets whether to prune suggestions after collation
+     */
+    public PhraseSuggestionBuilder collatePrune(boolean collatePrune) {
+        this.collatePrune = collatePrune;
+        return this;
+    }
+
     @Override
     public XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
         if (realWordErrorLikelihood != null) {
@@ -260,6 +269,9 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
             if (collateParams != null) {
                 builder.field("params", collateParams);
             }
+            if (collatePrune != null) {
+                builder.field("prune", collatePrune.booleanValue());
+            }
             builder.endObject();
         }
         return builder;

+ 9 - 0
src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionContext.java

@@ -52,6 +52,7 @@ class PhraseSuggestionContext extends SuggestionContext {
     private WordScorer.WordScorerFactory scorer;
 
     private boolean requireUnigram = true;
+    private boolean prune = false;
 
     public PhraseSuggestionContext(Suggester<? extends PhraseSuggestionContext> suggester) {
         super(suggester);
@@ -221,4 +222,12 @@ class PhraseSuggestionContext extends SuggestionContext {
         this.collateScriptParams = collateScriptParams;
     }
 
+    void setCollatePrune(boolean prune) {
+        this.prune = prune;
+    }
+
+    boolean collatePrune() {
+        return prune;
+    }
+
 }

+ 8 - 1
src/test/java/org/elasticsearch/search/suggest/SuggestSearchTests.java

@@ -1096,7 +1096,7 @@ public class SuggestSearchTests extends ElasticsearchIntegrationTest {
     }
 
     @Test
-    public void suggestPhrasesInIndex() throws InterruptedException, ExecutionException, IOException {
+    public void testPhraseSuggesterCollate() throws InterruptedException, ExecutionException, IOException {
         CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder()
                 .put(indexSettings())
                 .put(SETTING_NUMBER_OF_SHARDS, 1) // A single shard will help to keep the tests repeatable.
@@ -1253,6 +1253,13 @@ public class SuggestSearchTests extends ElasticsearchIntegrationTest {
         } catch (ElasticsearchException e) {
             // expected
         }
+
+        // collate request with prune set to true
+        PhraseSuggestionBuilder phraseSuggestWithParamsAndReturn = suggest.collateFilter(null).collateQuery(collateWithParams).collateParams(params).collatePrune(true);
+        searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", phraseSuggestWithParamsAndReturn);
+        assertSuggestionSize(searchSuggest, 0, 10, "title");
+        assertSuggestionPhraseCollateMatchExists(searchSuggest, "title", 2);
+
     }
 
     protected Suggest searchSuggest(SuggestionBuilder<?>... suggestion) {

+ 16 - 0
src/test/java/org/elasticsearch/test/hamcrest/ElasticsearchAssertions.java

@@ -314,6 +314,22 @@ public class ElasticsearchAssertions {
         assertVersionSerializable(searchSuggest);
     }
 
+    public static void assertSuggestionPhraseCollateMatchExists(Suggest searchSuggest, String key, int numberOfPhraseExists) {
+        int counter = 0;
+        assertThat(searchSuggest, notNullValue());
+        String msg = "Suggest result: " + searchSuggest.toString();
+        assertThat(msg, searchSuggest.size(), greaterThanOrEqualTo(1));
+        assertThat(msg, searchSuggest.getSuggestion(key).getName(), equalTo(key));
+
+        for (Suggest.Suggestion.Entry.Option option : searchSuggest.getSuggestion(key).getEntries().get(0).getOptions()) {
+            if (option.collateMatch()) {
+                counter++;
+            }
+        }
+
+        assertThat(counter, equalTo(numberOfPhraseExists));
+    }
+
     public static void assertSuggestion(Suggest searchSuggest, int entry, int ord, String key, String text) {
         assertThat(searchSuggest, notNullValue());
         String msg = "Suggest result: " + searchSuggest.toString();