Browse Source

Phrase Suggester: Add collate option to PhraseSuggester

The newly added collate option will let the user provide a template query/filter which will be executed for every phrase suggestions generated to ensure that the suggestion matches at least one document for the filter/query.
The user can also add routing preference `preference` to route the collate query/filter and additional `params` to inject into the collate template.

Closes #3482
Areek Zillur 11 years ago
parent
commit
76343899ea

+ 47 - 0
docs/reference/search/suggesters/phrase-suggest.asciidoc

@@ -161,6 +161,53 @@ can contain misspellings (See parameter descriptions below).
     in a row are changed the entire phrase of changed tokens 
     is wrapped rather than each token.
 
+`collate`::
+    Checks each suggestion against the specified `query` or `filter` to
+    prune suggestions for which no matching docs exist in the index. Either
+    a `query` or a `filter` must be specified, and it is run as a
+    <<query-dsl-template-query,`template` query>>. The current suggestion is
+    automatically made available as the `{{suggestion}}`  variable, which
+    should be used in your query/filter.  You can still specify  your own
+    template `params` -- the `suggestion` value will be added to the
+    variables you specify. You can also specify a `preference` to control
+    on which shards the query is executed (see <<search-request-preference>>).
+    The default value is `_only_local`.
+
+[source,js]
+--------------------------------------------------
+curl -XPOST 'localhost:9200/_search' -d {
+   "suggest" : {
+     "text" : "Xor the Got-Jewel",
+     "simple_phrase" : {
+       "phrase" : {
+         "field" :  "bigram",
+         "size" :   1,
+         "direct_generator" : [ {
+           "field" :            "body",
+           "suggest_mode" :     "always",
+           "min_word_length" :  1
+         } ],
+         "collate": {
+           "query": { <1>
+             "match": {
+                 "{{field_name}}" : "{{suggestion}}" <2>
+             }
+           },
+           "params": {"field_name" : "title"}, <3>
+           "preference": "_primary", <4>
+         }
+       }
+     }
+   }
+ }
+--------------------------------------------------
+<1> This query will be run once for every suggestion.
+<2> The `{{suggestion}}` variable will be replaced by the text
+    of each suggestion.
+<3> An additional `field_name` variable has been specified in
+    `params` and is used by the `match` query.
+<4> The default `preference` has been changed to `_primary`.
+
 ==== Smoothing Models
 
 The `phrase` suggester supports multiple smoothing models to balance

+ 23 - 22
src/main/java/org/elasticsearch/cluster/routing/operation/plain/PlainOperationRouting.java

@@ -167,14 +167,16 @@ public class PlainOperationRouting extends AbstractComponent implements Operatio
             }
         }
         if (preference.charAt(0) == '_') {
-            if (preference.startsWith("_shards:")) {
+            Preference preferenceType = Preference.parse(preference);
+            if (preferenceType == Preference.SHARDS) {
                 // starts with _shards, so execute on specific ones
                 int index = preference.indexOf(';');
+
                 String shards;
                 if (index == -1) {
-                    shards = preference.substring("_shards:".length());
+                    shards = preference.substring(Preference.SHARDS.type().length() + 1);
                 } else {
-                    shards = preference.substring("_shards:".length(), index);
+                    shards = preference.substring(Preference.SHARDS.type().length() + 1, index);
                 }
                 String[] ids = Strings.splitStringByCommaToArray(shards);
                 boolean found = false;
@@ -200,25 +202,24 @@ public class PlainOperationRouting extends AbstractComponent implements Operatio
                     preference = preference.substring(index + 1);
                 }
             }
-            if (preference.startsWith("_prefer_node:")) {
-                return indexShard.preferNodeActiveInitializingShardsIt(preference.substring("_prefer_node:".length()));
-            }
-            if ("_local".equals(preference)) {
-                return indexShard.preferNodeActiveInitializingShardsIt(localNodeId);
-            }
-            if ("_primary".equals(preference)) {
-                return indexShard.primaryActiveInitializingShardIt();
-            }
-            if ("_primary_first".equals(preference) || "_primaryFirst".equals(preference)) {
-                return indexShard.primaryFirstActiveInitializingShardsIt();
-            }
-            if ("_only_local".equals(preference) || "_onlyLocal".equals(preference)) {
-                return indexShard.onlyNodeActiveInitializingShardsIt(localNodeId);
-            }
-            if (preference.startsWith("_only_node:")) {
-                String nodeId = preference.substring("_only_node:".length());
-                ensureNodeIdExists(nodes, nodeId);
-                return indexShard.onlyNodeActiveInitializingShardsIt(nodeId);
+            preferenceType = Preference.parse(preference);
+            switch (preferenceType) {
+                case PREFER_NODE:
+                    return indexShard.preferNodeActiveInitializingShardsIt(preference.substring(Preference.PREFER_NODE.type().length() + 1));
+                case LOCAL:
+                    return indexShard.preferNodeActiveInitializingShardsIt(localNodeId);
+                case PRIMARY:
+                    return indexShard.primaryActiveInitializingShardIt();
+                case PRIMARY_FIRST:
+                    return indexShard.primaryFirstActiveInitializingShardsIt();
+                case ONLY_LOCAL:
+                    return indexShard.onlyNodeActiveInitializingShardsIt(localNodeId);
+                case ONLY_NODE:
+                    String nodeId = preference.substring(Preference.ONLY_NODE.type().length() + 1);
+                    ensureNodeIdExists(nodes, nodeId);
+                    return indexShard.onlyNodeActiveInitializingShardsIt(nodeId);
+                default:
+                    throw new ElasticsearchIllegalArgumentException("unknown preference [" + preferenceType + "]");
             }
         }
         // if not, then use it as the index

+ 109 - 0
src/main/java/org/elasticsearch/cluster/routing/operation/plain/Preference.java

@@ -0,0 +1,109 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.cluster.routing.operation.plain;
+
+import org.elasticsearch.ElasticsearchIllegalArgumentException;
+import org.elasticsearch.common.collect.Tuple;
+
+/**
+ * Routing Preference Type
+ */
+public enum  Preference {
+
+    /**
+     * Route to specific shards
+     */
+    SHARDS("_shards"),
+
+    /**
+     * Route to preferred node, if possible
+     */
+    PREFER_NODE("_prefer_node"),
+
+    /**
+     * Route to local node, if possible
+     */
+    LOCAL("_local"),
+
+    /**
+     * Route to primary shards
+     */
+    PRIMARY("_primary"),
+
+    /**
+     * Route to primary shards first
+     */
+    PRIMARY_FIRST("_primary_first"),
+
+    /**
+     * Route to the local shard only
+     */
+    ONLY_LOCAL("_only_local"),
+
+    /**
+     * Route to specific node only
+     */
+    ONLY_NODE("_only_node");
+
+    private final String type;
+
+    Preference(String type) {
+        this.type = type;
+    }
+
+    public String type() {
+        return type;
+    }
+    /**
+     * Parses the Preference Type given a string
+     */
+    public static Preference parse(String preference) {
+        String preferenceType;
+        int colonIndex = preference.indexOf(':');
+        if (colonIndex == -1) {
+            preferenceType = preference;
+        } else {
+            preferenceType = preference.substring(0, colonIndex);
+        }
+
+        switch (preferenceType) {
+            case "_shards":
+                return SHARDS;
+            case "_prefer_node":
+                return PREFER_NODE;
+            case "_only_node":
+                return ONLY_NODE;
+            case "_local":
+                return LOCAL;
+            case "_primary":
+                return PRIMARY;
+            case "_primary_first":
+            case "_primaryFirst":
+                return PRIMARY_FIRST;
+            case "_only_local":
+            case "_onlyLocal":
+                return ONLY_LOCAL;
+            default:
+                throw new ElasticsearchIllegalArgumentException("no Preference for [" + preferenceType + "]");
+        }
+    }
+}
+
+
+

+ 49 - 0
src/main/java/org/elasticsearch/index/query/BytesFilterBuilder.java

@@ -0,0 +1,49 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.query;
+
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.xcontent.*;
+
+import java.io.IOException;
+
+/**
+ * FilterBuilder that constructs filters from {@link org.elasticsearch.common.bytes.BytesReference}
+ * source
+ */
+public class BytesFilterBuilder extends BaseFilterBuilder {
+
+    private final BytesReference source;
+
+    public BytesFilterBuilder(BytesReference source) {
+        this.source = source;
+
+    }
+
+    @Override
+    protected void doXContent(XContentBuilder builder, Params params) throws IOException {
+        try (XContentParser parser = XContentFactory.xContent(source).createParser(source)) {
+            // unwrap the first layer of json dictionary
+            parser.nextToken();
+            parser.nextToken();
+            builder.copyCurrentStructure(parser);
+        }
+    }
+}

+ 10 - 0
src/main/java/org/elasticsearch/index/query/FilterBuilders.java

@@ -20,6 +20,7 @@
 package org.elasticsearch.index.query;
 
 import org.elasticsearch.common.Nullable;
+import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.geo.GeoPoint;
 import org.elasticsearch.common.geo.ShapeRelation;
 import org.elasticsearch.common.geo.builders.ShapeBuilder;
@@ -557,6 +558,15 @@ public abstract class FilterBuilders {
         return new WrapperFilterBuilder(data, offset, length);
     }
 
+    /**
+     * Constructs a bytes filter to generate a filter from a {@link BytesReference} source
+     *
+     * @param source The filter source
+     */
+    public static BytesFilterBuilder bytesFilter(BytesReference source) {
+        return new BytesFilterBuilder(source);
+    }
+
     private FilterBuilders() {
 
     }

+ 39 - 0
src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java

@@ -23,10 +23,12 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.ElasticsearchIllegalArgumentException;
+import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.common.xcontent.XContentParser.Token;
 import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
 import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.script.CompiledScript;
 import org.elasticsearch.search.suggest.SuggestContextParser;
 import org.elasticsearch.search.suggest.SuggestUtils;
 import org.elasticsearch.search.suggest.SuggestionSearchContext;
@@ -124,6 +126,43 @@ public final class PhraseSuggestParser implements SuggestContextParser {
                             }
                         }
                     }
+                } else if ("collate".equals(fieldName)) {
+                    while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+                        if (token == XContentParser.Token.FIELD_NAME) {
+                            fieldName = parser.currentName();
+                        } else if ("query".equals(fieldName) || "filter".equals(fieldName)) {
+                            String templateNameOrTemplateContent;
+                            if (token == XContentParser.Token.START_OBJECT && !parser.hasTextCharacters()) {
+                                XContentBuilder builder = XContentBuilder.builder(parser.contentType().xContent());
+                                builder.copyCurrentStructure(parser);
+                                templateNameOrTemplateContent = builder.string();
+                            } else {
+                                templateNameOrTemplateContent = parser.text();
+                            }
+                            if (templateNameOrTemplateContent == null) {
+                                throw new ElasticsearchIllegalArgumentException("suggester[phrase][collate] no query/filter found in collate object");
+                            }
+                            if (suggestion.getCollateFilterScript() != null) {
+                                throw new ElasticsearchIllegalArgumentException("suggester[phrase][collate] filter already set, doesn't support additional [" + fieldName + "]");
+                            }
+                            if (suggestion.getCollateQueryScript() != null) {
+                                throw new ElasticsearchIllegalArgumentException("suggester[phrase][collate] query already set, doesn't support additional [" + fieldName + "]");
+                            }
+                            CompiledScript compiledScript = suggester.scriptService().compile("mustache", templateNameOrTemplateContent);
+                            if ("query".equals(fieldName)) {
+                                suggestion.setCollateQueryScript(compiledScript);
+                            } else {
+                                suggestion.setCollateFilterScript(compiledScript);
+                            }
+                        } else if ("preference".equals(fieldName)) {
+                            suggestion.setPreference(parser.text());
+                        } else if ("params".equals(fieldName)) {
+                            suggestion.setCollateScriptParams(parser.map());
+                        } else {
+                            throw new ElasticsearchIllegalArgumentException(
+                                    "suggester[phrase][collate] doesn't support field [" + fieldName + "]");
+                        }
+                    }
                 } else {
                     throw new ElasticsearchIllegalArgumentException("suggester[phrase]  doesn't support array field [" + fieldName + "]");
                 }

+ 92 - 2
src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java

@@ -27,8 +27,18 @@ import org.apache.lucene.search.spell.DirectSpellChecker;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.UnicodeUtil;
+import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.action.search.*;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.text.StringText;
 import org.elasticsearch.common.text.Text;
+import org.elasticsearch.index.query.FilterBuilders;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.script.CompiledScript;
+import org.elasticsearch.script.ExecutableScript;
+import org.elasticsearch.script.ScriptService;
 import org.elasticsearch.search.suggest.Suggest.Suggestion;
 import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
 import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
@@ -38,10 +48,20 @@ import org.elasticsearch.search.suggest.phrase.NoisyChannelSpellChecker.Result;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 
 public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
     private final BytesRef SEPARATOR = new BytesRef(" ");
-    
+    private static final String SUGGESTION_TEMPLATE_VAR_NAME = "suggestion";
+    private final Client client;
+    private final ScriptService scriptService;
+
+    @Inject
+    public PhraseSuggester(Client client, ScriptService scriptService) {
+        this.client = client;
+        this.scriptService = scriptService;
+    }
+
     /*
      * More Ideas:
      *   - add ability to find whitespace problems -> we can build a poor mans decompounder with our index based on a automaton?
@@ -84,7 +104,14 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
             response.addTerm(resultEntry);
 
             BytesRef byteSpare = new BytesRef();
-            for (Correction correction : checkerResult.corrections) {
+
+            MultiSearchResponse multiSearchResponse = collate(suggestion, checkerResult, byteSpare, spare);
+
+            for (int i = 0; i < checkerResult.corrections.length; i++) {
+                if (!hasMatchingDocs(multiSearchResponse, i)) {
+                    continue;
+                }
+                Correction correction = checkerResult.corrections[i];
                 UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, null, null), spare);
                 Text phrase = new StringText(spare.toString());
                 Text highlighted = null;
@@ -104,6 +131,69 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
         UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare);
         return new PhraseSuggestion.Entry(new StringText(spare.toString()), 0, spare.length, cutoffScore);
     }
+
+    private MultiSearchResponse collate(PhraseSuggestionContext suggestion, Result checkerResult, BytesRef byteSpare, CharsRef spare) throws IOException {
+        CompiledScript collateQueryScript = suggestion.getCollateQueryScript();
+        CompiledScript collateFilterScript = suggestion.getCollateFilterScript();
+        MultiSearchResponse multiSearchResponse = null;
+        if (collateQueryScript != null) {
+            multiSearchResponse = fetchMatchingDocCountResponses(checkerResult.corrections, collateQueryScript, false, suggestion, byteSpare, spare);
+        } else if (collateFilterScript != null) {
+            multiSearchResponse = fetchMatchingDocCountResponses(checkerResult.corrections, collateFilterScript, true, suggestion, byteSpare, spare);
+        }
+        return multiSearchResponse;
+    }
+
+    private MultiSearchResponse fetchMatchingDocCountResponses(Correction[] corrections, CompiledScript collateScript,
+                                                               boolean isFilter, PhraseSuggestionContext suggestions,
+                                                               BytesRef byteSpare, CharsRef spare) throws IOException {
+        Map<String, Object> vars = suggestions.getCollateScriptParams();
+        MultiSearchResponse multiSearchResponse = null;
+        MultiSearchRequestBuilder multiSearchRequestBuilder = client.prepareMultiSearch();
+        boolean requestAdded = false;
+        SearchRequestBuilder req;
+        for (Correction correction : corrections) {
+            UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, null, null), spare);
+            vars.put(SUGGESTION_TEMPLATE_VAR_NAME, spare.toString());
+            ExecutableScript executable = scriptService.executable(collateScript, vars);
+            BytesReference querySource = (BytesReference) executable.run();
+            requestAdded = true;
+            if (isFilter) {
+                req = client.prepareSearch()
+                        .setPreference(suggestions.getPreference())
+                        .setQuery(QueryBuilders.constantScoreQuery(FilterBuilders.bytesFilter(querySource)))
+                        .setSearchType(SearchType.COUNT);
+            } else {
+                req = client.prepareSearch()
+                        .setPreference(suggestions.getPreference())
+                        .setQuery(querySource)
+                        .setSearchType(SearchType.COUNT);
+            }
+            multiSearchRequestBuilder.add(req);
+        }
+        if (requestAdded) {
+            multiSearchResponse = multiSearchRequestBuilder.get();
+        }
+
+        return multiSearchResponse;
+    }
+
+    private static boolean hasMatchingDocs(MultiSearchResponse multiSearchResponse, int index) {
+        if (multiSearchResponse == null) {
+            return true;
+        }
+        MultiSearchResponse.Item item = multiSearchResponse.getResponses()[index];
+        if (!item.isFailure()) {
+            SearchResponse resp = item.getResponse();
+            return resp.getHits().totalHits() > 0;
+        } else {
+            throw new ElasticsearchException("Collate request failed: " + item.getFailureMessage());
+        }
+    }
+
+    ScriptService scriptService() {
+        return scriptService;
+    }
     
     @Override
     public String[] names() {

+ 53 - 1
src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java

@@ -42,6 +42,10 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
     private Integer tokenLimit;
     private String preTag;
     private String postTag;
+    private String collateQuery;
+    private String collateFilter;
+    private String collatePreference;
+    private Map<String, Object> collateParams;
 
     public PhraseSuggestionBuilder(String name) {
         super(name, "phrase");
@@ -166,6 +170,38 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
         return this;
     }
 
+    /**
+     * Sets a query used for filtering out suggested phrases (collation).
+     */
+    public PhraseSuggestionBuilder collateQuery(String collateQuery) {
+        this.collateQuery = collateQuery;
+        return this;
+    }
+
+    /**
+     * Sets a filter used for filtering out suggested phrases (collation).
+     */
+    public PhraseSuggestionBuilder collateFilter(String collateFilter) {
+        this.collateFilter = collateFilter;
+        return this;
+    }
+
+    /**
+     * Sets routing preferences for executing filter query (collation).
+     */
+    public PhraseSuggestionBuilder collatePreference(String collatePreference) {
+        this.collatePreference = collatePreference;
+        return this;
+    }
+
+    /**
+     * Sets additional params for collate script
+     */
+    public PhraseSuggestionBuilder collateParams(Map<String, Object> collateParams) {
+        this.collateParams = collateParams;
+        return this;
+    }
+
     @Override
     public XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
         if (realWordErrorLikelihood != null) {
@@ -210,6 +246,22 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
             builder.field("post_tag", postTag);
             builder.endObject();
         }
+        if (collateQuery != null || collateFilter != null) {
+            builder.startObject("collate");
+            if (collateQuery != null) {
+                builder.field("query", collateQuery);
+            }
+            if (collateFilter != null) {
+                builder.field("filter", collateFilter);
+            }
+            if (collatePreference != null) {
+                builder.field("preference", collatePreference);
+            }
+            if (collateParams != null) {
+                builder.field("params", collateParams);
+            }
+            builder.endObject();
+        }
         return builder;
     }
 
@@ -610,4 +662,4 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
 
     }
 
-}
+}

+ 41 - 0
src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionContext.java

@@ -19,11 +19,15 @@
 package org.elasticsearch.search.suggest.phrase;
 
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.ElasticsearchIllegalArgumentException;
+import org.elasticsearch.cluster.routing.operation.plain.Preference;
+import org.elasticsearch.script.CompiledScript;
 import org.elasticsearch.search.suggest.DirectSpellcheckerSettings;
 import org.elasticsearch.search.suggest.Suggester;
 import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext;
@@ -40,6 +44,10 @@ class PhraseSuggestionContext extends SuggestionContext {
     private int tokenLimit = NoisyChannelSpellChecker.DEFAULT_TOKEN_LIMIT;
     private BytesRef preTag;
     private BytesRef postTag;
+    private CompiledScript collateQueryScript;
+    private CompiledScript collateFilterScript;
+    private String preference = Preference.ONLY_LOCAL.type();
+    private Map<String, Object> collateScriptParams = new HashMap<>(1);
 
     private WordScorer.WordScorerFactory scorer;
 
@@ -180,4 +188,37 @@ class PhraseSuggestionContext extends SuggestionContext {
     public BytesRef getPostTag() {
         return postTag;
     }
+
+    CompiledScript getCollateQueryScript() {
+        return collateQueryScript;
+    }
+
+    void setCollateQueryScript(CompiledScript collateQueryScript) {
+        this.collateQueryScript = collateQueryScript;
+    }
+
+    CompiledScript getCollateFilterScript() {
+        return collateFilterScript;
+    }
+
+    void setCollateFilterScript(CompiledScript collateFilterScript) {
+        this.collateFilterScript = collateFilterScript;
+    }
+
+    String getPreference() {
+        return preference;
+    }
+
+    void setPreference(String preference) {
+        this.preference = preference;
+    }
+
+    Map<String, Object> getCollateScriptParams() {
+        return collateScriptParams;
+    }
+
+    void setCollateScriptParams(Map<String, Object> collateScriptParams) {
+        this.collateScriptParams = collateScriptParams;
+    }
+
 }

+ 159 - 1
src/test/java/org/elasticsearch/search/suggest/SuggestSearchTests.java

@@ -1094,7 +1094,165 @@ public class SuggestSearchTests extends ElasticsearchIntegrationTest {
         assertSuggestion(searchSuggest, 0, 0, "title", "united states house of representatives elections in washington 2006");
         // assertThat(total, lessThan(1000L)); // Takes many seconds without fix - just for debugging
     }
-    
+
+    @Test
+    public void suggestPhrasesInIndex() throws InterruptedException, ExecutionException, IOException {
+        CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder()
+                .put(indexSettings())
+                .put(SETTING_NUMBER_OF_SHARDS, 1) // A single shard will help to keep the tests repeatable.
+                .put("index.analysis.analyzer.text.tokenizer", "standard")
+                .putArray("index.analysis.analyzer.text.filter", "lowercase", "my_shingle")
+                .put("index.analysis.filter.my_shingle.type", "shingle")
+                .put("index.analysis.filter.my_shingle.output_unigrams", true)
+                .put("index.analysis.filter.my_shingle.min_shingle_size", 2)
+                .put("index.analysis.filter.my_shingle.max_shingle_size", 3));
+
+        XContentBuilder mapping = XContentFactory.jsonBuilder()
+                .startObject()
+                .startObject("type1")
+                .startObject("properties")
+                .startObject("title")
+                .field("type", "string")
+                .field("analyzer", "text")
+                .endObject()
+                .endObject()
+                .endObject()
+                .endObject();
+        assertAcked(builder.addMapping("type1", mapping));
+        ensureGreen();
+
+        ImmutableList.Builder<String> titles = ImmutableList.<String>builder();
+
+        titles.add("United States House of Representatives Elections in Washington 2006");
+        titles.add("United States House of Representatives Elections in Washington 2005");
+        titles.add("State");
+        titles.add("Houses of Parliament");
+        titles.add("Representative Government");
+        titles.add("Election");
+
+        List<IndexRequestBuilder> builders = new ArrayList<>();
+        for (String title: titles.build()) {
+            builders.add(client().prepareIndex("test", "type1").setSource("title", title));
+        }
+        indexRandom(true, builders);
+
+        // suggest without filtering
+        PhraseSuggestionBuilder suggest = phraseSuggestion("title")
+                .field("title")
+                .addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("title")
+                        .suggestMode("always")
+                        .maxTermFreq(.99f)
+                        .size(10)
+                        .maxInspections(200)
+                )
+                .confidence(0f)
+                .maxErrors(2f)
+                .shardSize(30000)
+                .size(10);
+        Suggest searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", suggest);
+        assertSuggestionSize(searchSuggest, 0, 10, "title");
+
+        // suggest with filtering
+        String filterString = XContentFactory.jsonBuilder()
+                    .startObject()
+                        .startObject("match_phrase")
+                            .field("title", "{{suggestion}}")
+                        .endObject()
+                    .endObject()
+                .string();
+        PhraseSuggestionBuilder filteredQuerySuggest = suggest.collateQuery(filterString);
+        searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", filteredQuerySuggest);
+        assertSuggestionSize(searchSuggest, 0, 2, "title");
+
+        // filtered suggest with no result (boundary case)
+        searchSuggest = searchSuggest("Elections of Representatives Parliament", filteredQuerySuggest);
+        assertSuggestionSize(searchSuggest, 0, 0, "title");
+
+        // filtered suggest with bad query
+        String incorrectFilterString = XContentFactory.jsonBuilder()
+                .startObject()
+                    .startObject("test")
+                        .field("title", "{{suggestion}}")
+                    .endObject()
+                .endObject()
+                .string();
+        PhraseSuggestionBuilder incorrectFilteredSuggest = suggest.collateQuery(incorrectFilterString);
+        try {
+            searchSuggest("united states house of representatives elections in washington 2006", incorrectFilteredSuggest);
+            fail("Post query error has been swallowed");
+        } catch(ElasticsearchException e) {
+            // expected
+        }
+
+        // suggest with filter collation
+        String filterStringAsFilter = XContentFactory.jsonBuilder()
+                .startObject()
+                .startObject("query")
+                .startObject("match_phrase")
+                .field("title", "{{suggestion}}")
+                .endObject()
+                .endObject()
+                .endObject()
+                .string();
+
+        PhraseSuggestionBuilder filteredFilterSuggest = suggest.collateQuery(null).collateFilter(filterStringAsFilter);
+        searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", filteredFilterSuggest);
+        assertSuggestionSize(searchSuggest, 0, 2, "title");
+
+        // filtered suggest with bad filter
+        String filterStr = XContentFactory.jsonBuilder()
+                .startObject()
+                .startObject("pprefix")
+                        .field("title", "{{suggestion}}")
+                .endObject()
+                .endObject()
+                .string();
+
+        PhraseSuggestionBuilder in = suggest.collateQuery(null).collateFilter(filterStr);
+        try {
+            searchSuggest("united states house of representatives elections in washington 2006", in);
+            fail("Post filter error has been swallowed");
+        } catch(ElasticsearchException e) {
+            //expected
+        }
+
+        // collate script failure due to no additional params
+        String collateWithParams = XContentFactory.jsonBuilder()
+                .startObject()
+                .startObject("{{query_type}}")
+                    .field("{{query_field}}", "{{suggestion}}")
+                .endObject()
+                .endObject()
+                .string();
+
+
+        PhraseSuggestionBuilder phraseSuggestWithNoParams = suggest.collateFilter(null).collateQuery(collateWithParams);
+        try {
+            searchSuggest("united states house of representatives elections in washington 2006", phraseSuggestWithNoParams);
+            fail("Malformed query (lack of additional params) should fail");
+        } catch (ElasticsearchException e) {
+            // expected
+        }
+
+        // collate script with additional params
+        Map<String, Object> params = new HashMap<>();
+        params.put("query_type", "match_phrase");
+        params.put("query_field", "title");
+
+        PhraseSuggestionBuilder phraseSuggestWithParams = suggest.collateFilter(null).collateQuery(collateWithParams).collateParams(params);
+        searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", phraseSuggestWithParams);
+        assertSuggestionSize(searchSuggest, 0, 2, "title");
+
+        //collate request defining both query/filter should fail
+        PhraseSuggestionBuilder phraseSuggestWithFilterAndQuery = suggest.collateFilter(filterStringAsFilter).collateQuery(filterString);
+        try {
+            searchSuggest("united states house of representatives elections in washington 2006", phraseSuggestWithFilterAndQuery);
+            fail("expected parse failure, as both filter and query are set in collate");
+        } catch (ElasticsearchException e) {
+            // expected
+        }
+    }
+
     protected Suggest searchSuggest(SuggestionBuilder<?>... suggestion) {
         return searchSuggest(null, suggestion);
     }