Browse Source

Expose duplicate removal in the completion suggester (#26496)

This change exposes the duplicate removal option added in Lucene for the completion suggester
with a new option called `skip_duplicates` (defaults to false).
This commit also adapts the custom suggest collector to handle deduplication when multiple contexts match the input.

Closes #23364
Jim Ferenczi 8 years ago
parent
commit
d68d8c9cef

+ 54 - 80
core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java

@@ -18,17 +18,16 @@
  */
 package org.elasticsearch.search.suggest.completion;
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.BulkScorer;
 import org.apache.lucene.search.CollectionTerminatedException;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Weight;
-import org.apache.lucene.search.suggest.Lookup;
 import org.apache.lucene.search.suggest.document.CompletionQuery;
 import org.apache.lucene.search.suggest.document.TopSuggestDocs;
 import org.apache.lucene.search.suggest.document.TopSuggestDocsCollector;
 import org.apache.lucene.util.CharsRefBuilder;
-import org.apache.lucene.util.PriorityQueue;
 import org.elasticsearch.common.text.Text;
 import org.elasticsearch.index.mapper.CompletionFieldMapper;
 import org.elasticsearch.search.suggest.Suggest;
@@ -53,12 +52,14 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
             final CompletionSuggestionContext suggestionContext, final IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
         if (suggestionContext.getFieldType() != null) {
             final CompletionFieldMapper.CompletionFieldType fieldType = suggestionContext.getFieldType();
-            CompletionSuggestion completionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize());
+            CompletionSuggestion completionSuggestion =
+                new CompletionSuggestion(name, suggestionContext.getSize(), suggestionContext.isSkipDuplicates());
             spare.copyUTF8Bytes(suggestionContext.getText());
             CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(
                 new Text(spare.toString()), 0, spare.length());
             completionSuggestion.addTerm(completionSuggestEntry);
-            TopSuggestDocsCollector collector = new TopDocumentsCollector(suggestionContext.getSize());
+            TopSuggestDocsCollector collector =
+                new TopDocumentsCollector(suggestionContext.getSize(), suggestionContext.isSkipDuplicates());
             suggest(searcher, suggestionContext.toQuery(), collector);
             int numResult = 0;
             for (TopSuggestDocs.SuggestScoreDoc suggestScoreDoc : collector.get().scoreLookupDocs()) {
@@ -97,8 +98,21 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
         }
     }
 
-    // TODO: this should be refactored and moved to lucene
-    // see https://issues.apache.org/jira/browse/LUCENE-6880
+    /**
+     * TODO: this should be refactored and moved to lucene see https://issues.apache.org/jira/browse/LUCENE-6880
+     *
+     * Custom collector that returns top documents from the completion suggester.
+     * When suggestions are augmented with contexts values this collector groups suggestions coming from the same document
+     * but matching different contexts together. Each document is counted as 1 entry and the provided size is the expected number
+     * of documents that should be returned (not the number of suggestions).
+     * This collector is also able to filter duplicate suggestion coming from different documents.
+     * When different contexts match the same suggestion form only the best one (sorted by weight) is kept.
+     * In order to keep this feature fast, the de-duplication of suggestions with different contexts is done
+     * only on the top N*num_contexts (where N is the number of documents to return) suggestions per segment.
+     * This means that skip_duplicates will visit at most N*num_contexts suggestions per segment to find unique suggestions
+     * that match the input. If more than N*num_contexts suggestions are duplicated with different contexts this collector
+     * will not be able to return more than one suggestion even when N is greater than 1.
+     **/
     private static final class TopDocumentsCollector extends TopSuggestDocsCollector {
 
         /**
@@ -150,93 +164,53 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
             }
         }
 
-        private static final class SuggestDocPriorityQueue extends PriorityQueue<SuggestDoc> {
+        private final Map<Integer, SuggestDoc> docsMap;
 
-            SuggestDocPriorityQueue(int maxSize) {
-                super(maxSize);
-            }
-
-            @Override
-            protected boolean lessThan(SuggestDoc a, SuggestDoc b) {
-                if (a.score == b.score) {
-                    int cmp = Lookup.CHARSEQUENCE_COMPARATOR.compare(a.key, b.key);
-                    if (cmp == 0) {
-                        // prefer smaller doc id, in case of a tie
-                        return a.doc > b.doc;
-                    } else {
-                        return cmp > 0;
-                    }
-                }
-                return a.score < b.score;
-            }
-
-            public SuggestDoc[] getResults() {
-                int size = size();
-                SuggestDoc[] res = new SuggestDoc[size];
-                for (int i = size - 1; i >= 0; i--) {
-                    res[i] = pop();
-                }
-                return res;
-            }
-        }
-
-        private final int num;
-        private final SuggestDocPriorityQueue pq;
-        private final Map<Integer, SuggestDoc> scoreDocMap;
-
-        // TODO: expose dup removal
-
-        TopDocumentsCollector(int num) {
-            super(1, false); // TODO hack, we don't use the underlying pq, so we allocate a size of 1
-            this.num = num;
-            this.scoreDocMap = new LinkedHashMap<>(num);
-            this.pq = new SuggestDocPriorityQueue(num);
-        }
-
-        @Override
-        public int getCountToCollect() {
-            // This is only needed because we initialize
-            // the base class with 1 instead of the actual num
-            return num;
-        }
-
-
-        @Override
-        protected void doSetNextReader(LeafReaderContext context) throws IOException {
-            super.doSetNextReader(context);
-            updateResults();
-        }
-
-        private void updateResults() {
-            for (SuggestDoc suggestDoc : scoreDocMap.values()) {
-                if (pq.insertWithOverflow(suggestDoc) == suggestDoc) {
-                    break;
-                }
-            }
-            scoreDocMap.clear();
+        TopDocumentsCollector(int num, boolean skipDuplicates) {
+            super(Math.max(1, num), skipDuplicates);
+            this.docsMap = new LinkedHashMap<>(num);
         }
 
         @Override
         public void collect(int docID, CharSequence key, CharSequence context, float score) throws IOException {
-            if (scoreDocMap.containsKey(docID)) {
-                SuggestDoc suggestDoc = scoreDocMap.get(docID);
-                suggestDoc.add(key, context, score);
-            } else if (scoreDocMap.size() <= num) {
-                scoreDocMap.put(docID, new SuggestDoc(docBase + docID, key, context, score));
+            int globalDoc = docID + docBase;
+            if (docsMap.containsKey(globalDoc)) {
+                docsMap.get(globalDoc).add(key, context, score);
             } else {
-                throw new CollectionTerminatedException();
+                docsMap.put(globalDoc, new SuggestDoc(globalDoc, key, context, score));
+                super.collect(docID, key, context, score);
             }
         }
 
         @Override
         public TopSuggestDocs get() throws IOException {
-            updateResults(); // to empty the last set of collected suggest docs
-            TopSuggestDocs.SuggestScoreDoc[] suggestScoreDocs = pq.getResults();
-            if (suggestScoreDocs.length > 0) {
-                return new TopSuggestDocs(suggestScoreDocs.length, suggestScoreDocs, suggestScoreDocs[0].score);
-            } else {
+            TopSuggestDocs entries = super.get();
+            if (entries.scoreDocs.length == 0) {
                 return TopSuggestDocs.EMPTY;
             }
+            // The parent class returns suggestions, not documents, and dedup only the surface form (without contexts).
+            // The following code groups suggestions matching different contexts by document id and dedup the surface form + contexts
+            // if needed (skip_duplicates).
+            int size = entries.scoreDocs.length;
+            final List<TopSuggestDocs.SuggestScoreDoc> suggestDocs = new ArrayList(size);
+            final CharArraySet seenSurfaceForms = doSkipDuplicates() ? new CharArraySet(size, false) : null;
+            for (TopSuggestDocs.SuggestScoreDoc suggestEntry : entries.scoreLookupDocs()) {
+                final SuggestDoc suggestDoc;
+                if (docsMap != null) {
+                    suggestDoc = docsMap.get(suggestEntry.doc);
+                } else {
+                    suggestDoc = new SuggestDoc(suggestEntry.doc, suggestEntry.key, suggestEntry.context, suggestEntry.score);
+                }
+                if (doSkipDuplicates()) {
+                    if (seenSurfaceForms.contains(suggestDoc.key)) {
+                        continue;
+                    }
+                    seenSurfaceForms.add(suggestDoc.key);
+                }
+                suggestDocs.add(suggestDoc);
+            }
+            return new TopSuggestDocs((int) entries.totalHits,
+                suggestDocs.toArray(new TopSuggestDocs.SuggestScoreDoc[0]), entries.getMaxScore());
         }
     }
 }

+ 42 - 3
core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestion.java

@@ -18,8 +18,10 @@
  */
 package org.elasticsearch.search.suggest.completion;
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.suggest.Lookup;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
@@ -68,11 +70,38 @@ public final class CompletionSuggestion extends Suggest.Suggestion<CompletionSug
 
     public static final int TYPE = 4;
 
+    private boolean skipDuplicates;
+
     public CompletionSuggestion() {
     }
 
-    public CompletionSuggestion(String name, int size) {
+    /**
+     * Ctr
+     * @param name The name for the suggestions
+     * @param size The number of suggestions to return
+     * @param skipDuplicates Whether duplicate suggestions should be filtered out
+     */
+    public CompletionSuggestion(String name, int size, boolean skipDuplicates) {
         super(name, size);
+        this.skipDuplicates = skipDuplicates;
+    }
+
+    @Override
+    public void readFrom(StreamInput in) throws IOException {
+        super.readFrom(in);
+        // TODO should be backported to 6.1.0
+        if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            skipDuplicates = in.readBoolean();
+        }
+    }
+
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        super.writeTo(out);
+        // TODO should be backported to 6.1.0
+        if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            out.writeBoolean(skipDuplicates);
+        }
     }
 
     /**
@@ -95,7 +124,7 @@ public final class CompletionSuggestion extends Suggest.Suggestion<CompletionSug
     }
 
     public static CompletionSuggestion fromXContent(XContentParser parser, String name) throws IOException {
-        CompletionSuggestion suggestion = new CompletionSuggestion(name, -1);
+        CompletionSuggestion suggestion = new CompletionSuggestion(name, -1, false);
         parseEntries(parser, suggestion, CompletionSuggestion.Entry::fromXContent);
         return suggestion;
     }
@@ -146,9 +175,19 @@ public final class CompletionSuggestion extends Suggest.Suggestion<CompletionSug
                 // the global top <code>size</code> entries are collected from the shard results
                 // using a priority queue
                 OptionPriorityQueue priorityQueue = new OptionPriorityQueue(leader.getSize(), COMPARATOR);
+                // Dedup duplicate suggestions (based on the surface form) if skip duplicates is activated
+                final CharArraySet seenSurfaceForms = leader.skipDuplicates ? new CharArraySet(leader.getSize(), false) : null;
                 for (Suggest.Suggestion<Entry> suggestion : toReduce) {
                     assert suggestion.getName().equals(name) : "name should be identical across all suggestions";
                     for (Entry.Option option : ((CompletionSuggestion) suggestion).getOptions()) {
+                        if (leader.skipDuplicates) {
+                            assert ((CompletionSuggestion) suggestion).skipDuplicates;
+                            String text = option.getText().string();
+                            if (seenSurfaceForms.contains(text)) {
+                                continue;
+                            }
+                            seenSurfaceForms.add(text);
+                        }
                         if (option == priorityQueue.insertWithOverflow(option)) {
                             // if the current option has overflown from pq,
                             // we can assume all of the successive options
@@ -157,7 +196,7 @@ public final class CompletionSuggestion extends Suggest.Suggestion<CompletionSug
                         }
                     }
                 }
-                final CompletionSuggestion suggestion = new CompletionSuggestion(leader.getName(), leader.getSize());
+                final CompletionSuggestion suggestion = new CompletionSuggestion(leader.getName(), leader.getSize(), leader.skipDuplicates);
                 final Entry entry = new Entry(leaderEntry.getText(), leaderEntry.getOffset(), leaderEntry.getLength());
                 Collections.addAll(entry.getOptions(), priorityQueue.get());
                 suggestion.addTerm(entry);

+ 35 - 2
core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java

@@ -19,6 +19,7 @@
 package org.elasticsearch.search.suggest.completion;
 
 import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.io.stream.StreamInput;
@@ -57,6 +58,7 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
     private static final XContentType CONTEXT_BYTES_XCONTENT_TYPE = XContentType.JSON;
     static final String SUGGESTION_NAME = "completion";
     static final ParseField CONTEXTS_FIELD = new ParseField("contexts", "context");
+    static final ParseField SKIP_DUPLICATES_FIELD = new ParseField("skip_duplicates");
 
     /**
      * {
@@ -94,11 +96,13 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
             v.contextBytes = builder.bytes();
             p.skipChildren();
         }, CONTEXTS_FIELD, ObjectParser.ValueType.OBJECT); // context is deprecated
+        PARSER.declareBoolean(CompletionSuggestionBuilder::skipDuplicates, SKIP_DUPLICATES_FIELD);
     }
 
     protected FuzzyOptions fuzzyOptions;
     protected RegexOptions regexOptions;
     protected BytesReference contextBytes = null;
+    protected boolean skipDuplicates = false;
 
     public CompletionSuggestionBuilder(String field) {
         super(field);
@@ -113,6 +117,7 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
         fuzzyOptions = in.fuzzyOptions;
         regexOptions = in.regexOptions;
         contextBytes = in.contextBytes;
+        skipDuplicates = in.skipDuplicates;
     }
 
     /**
@@ -123,6 +128,10 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
         fuzzyOptions = in.readOptionalWriteable(FuzzyOptions::new);
         regexOptions = in.readOptionalWriteable(RegexOptions::new);
         contextBytes = in.readOptionalBytesReference();
+        // TODO should be backported to 6.1.0
+        if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            skipDuplicates = in.readBoolean();
+        }
     }
 
     @Override
@@ -130,6 +139,10 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
         out.writeOptionalWriteable(fuzzyOptions);
         out.writeOptionalWriteable(regexOptions);
         out.writeOptionalBytesReference(contextBytes);
+        // TODO should be backported to 6.1.0
+        if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            out.writeBoolean(skipDuplicates);
+        }
     }
 
     /**
@@ -210,6 +223,21 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
         return this;
     }
 
+    /**
+     * Returns whether duplicate suggestions should be filtered out.
+     */
+    public boolean skipDuplicates() {
+        return skipDuplicates;
+    }
+
+    /**
+     * Should duplicates be filtered or not. Defaults to <tt>false</tt>.
+     */
+    public CompletionSuggestionBuilder skipDuplicates(boolean skipDuplicates) {
+        this.skipDuplicates = skipDuplicates;
+        return this;
+    }
+
     private static class InnerBuilder extends CompletionSuggestionBuilder {
         private String field;
 
@@ -231,6 +259,9 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
         if (regexOptions != null) {
             regexOptions.toXContent(builder, params);
         }
+        if (skipDuplicates) {
+            builder.field(SKIP_DUPLICATES_FIELD.getPreferredName(), skipDuplicates);
+        }
         if (contextBytes != null) {
             builder.rawField(CONTEXTS_FIELD.getPreferredName(), contextBytes);
         }
@@ -255,6 +286,7 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
         // copy over common settings to each suggestion builder
         final MapperService mapperService = context.getMapperService();
         populateCommonFields(mapperService, suggestionContext);
+        suggestionContext.setSkipDuplicates(skipDuplicates);
         suggestionContext.setFuzzyOptions(fuzzyOptions);
         suggestionContext.setRegexOptions(regexOptions);
         MappedFieldType mappedFieldType = mapperService.fullName(suggestionContext.getField());
@@ -302,13 +334,14 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
 
     @Override
     protected boolean doEquals(CompletionSuggestionBuilder other) {
-        return Objects.equals(fuzzyOptions, other.fuzzyOptions) &&
+        return skipDuplicates == other.skipDuplicates &&
+            Objects.equals(fuzzyOptions, other.fuzzyOptions) &&
             Objects.equals(regexOptions, other.regexOptions) &&
             Objects.equals(contextBytes, other.contextBytes);
     }
 
     @Override
     protected int doHashCode() {
-        return Objects.hash(fuzzyOptions, regexOptions, contextBytes);
+        return Objects.hash(fuzzyOptions, regexOptions, contextBytes, skipDuplicates);
     }
 }

+ 9 - 0
core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java

@@ -40,6 +40,7 @@ public class CompletionSuggestionContext extends SuggestionSearchContext.Suggest
     private CompletionFieldMapper.CompletionFieldType fieldType;
     private FuzzyOptions fuzzyOptions;
     private RegexOptions regexOptions;
+    private boolean skipDuplicates;
     private Map<String, List<ContextMapping.InternalQueryContext>> queryContexts = Collections.emptyMap();
 
     CompletionFieldMapper.CompletionFieldType getFieldType() {
@@ -62,6 +63,10 @@ public class CompletionSuggestionContext extends SuggestionSearchContext.Suggest
         this.queryContexts = queryContexts;
     }
 
+    void setSkipDuplicates(boolean skipDuplicates) {
+        this.skipDuplicates = skipDuplicates;
+    }
+
     public FuzzyOptions getFuzzyOptions() {
         return fuzzyOptions;
     }
@@ -74,6 +79,10 @@ public class CompletionSuggestionContext extends SuggestionSearchContext.Suggest
         return queryContexts;
     }
 
+    public boolean isSkipDuplicates() {
+        return skipDuplicates;
+    }
+
     CompletionQuery toQuery() {
         CompletionFieldMapper.CompletionFieldType fieldType = getFieldType();
         final CompletionQuery query;

+ 3 - 3
core/src/test/java/org/elasticsearch/action/search/SearchPhaseControllerTests.java

@@ -72,7 +72,7 @@ public class SearchPhaseControllerTests extends ESTestCase {
     public void testSort() throws Exception {
         List<CompletionSuggestion> suggestions = new ArrayList<>();
         for (int i = 0; i < randomIntBetween(1, 5); i++) {
-            suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), randomIntBetween(1, 20)));
+            suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), randomIntBetween(1, 20), false));
         }
         int nShards = randomIntBetween(1, 20);
         int queryResultSize = randomBoolean() ? 0 : randomIntBetween(1, nShards * 2);
@@ -139,7 +139,7 @@ public class SearchPhaseControllerTests extends ESTestCase {
         for (int i = 0; i < randomIntBetween(1, 5); i++) {
             int size = randomIntBetween(1, 20);
             maxSuggestSize += size;
-            suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), size));
+            suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), size, false));
         }
         int nShards = randomIntBetween(1, 20);
         int queryResultSize = randomBoolean() ? 0 : randomIntBetween(1, nShards * 2);
@@ -202,7 +202,7 @@ public class SearchPhaseControllerTests extends ESTestCase {
             List<CompletionSuggestion> shardSuggestion = new ArrayList<>();
             for (CompletionSuggestion completionSuggestion : suggestions) {
                 CompletionSuggestion suggestion = new CompletionSuggestion(
-                    completionSuggestion.getName(), completionSuggestion.getSize());
+                    completionSuggestion.getName(), completionSuggestion.getSize(), false);
                 final CompletionSuggestion.Entry completionEntry = new CompletionSuggestion.Entry(new Text(""), 0, 5);
                 suggestion.addTerm(completionEntry);
                 int optionSize = randomIntBetween(1, suggestion.getSize());

+ 54 - 0
core/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchIT.java

@@ -858,6 +858,38 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase {
         }
     }
 
+    public void testSkipDuplicates() throws Exception {
+        final CompletionMappingBuilder mapping = new CompletionMappingBuilder();
+        createIndexAndMapping(mapping);
+        int numDocs = randomIntBetween(10, 100);
+        int numUnique = randomIntBetween(1, numDocs);
+        List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>();
+        for (int i = 1; i <= numDocs; i++) {
+            int id = i % numUnique;
+            indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i)
+                .setSource(jsonBuilder()
+                    .startObject()
+                        .startObject(FIELD)
+                            .field("input", "suggestion" + id)
+                            .field("weight", id)
+                        .endObject()
+                    .endObject()
+                ));
+        }
+        String[] expected = new String[numUnique];
+        int sugg = numUnique - 1;
+        for (int i = 0; i < numUnique; i++) {
+            expected[i] = "suggestion" + sugg--;
+        }
+        indexRandom(true, indexRequestBuilders);
+        CompletionSuggestionBuilder completionSuggestionBuilder =
+            SuggestBuilders.completionSuggestion(FIELD).prefix("sugg").skipDuplicates(true).size(numUnique);
+
+        SearchResponse searchResponse = client().prepareSearch(INDEX)
+            .suggest(new SuggestBuilder().addSuggestion("suggestions", completionSuggestionBuilder)).execute().actionGet();
+        assertSuggestions(searchResponse, true, "suggestions", expected);
+    }
+
     public void assertSuggestions(String suggestionName, SuggestionBuilder suggestBuilder, String... suggestions) {
         SearchResponse searchResponse = client().prepareSearch(INDEX).suggest(new SuggestBuilder().addSuggestion(suggestionName, suggestBuilder)).execute().actionGet();
         assertSuggestions(searchResponse, suggestionName, suggestions);
@@ -1108,6 +1140,28 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase {
         }
     }
 
+    public void testMultiDocSuggestions() throws Exception {
+        final CompletionMappingBuilder mapping = new CompletionMappingBuilder();
+        createIndexAndMapping(mapping);
+        int numDocs = 10;
+        List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>();
+        for (int i = 1; i <= numDocs; i++) {
+            indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i)
+                .setSource(jsonBuilder()
+                    .startObject()
+                    .startObject(FIELD)
+                    .array("input", "suggestion" + i, "suggestions" + i, "suggester" + i)
+                    .field("weight", i)
+                    .endObject()
+                    .endObject()
+                ));
+        }
+        indexRandom(true, indexRequestBuilders);
+        CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion(FIELD).prefix("sugg");
+        assertSuggestions("foo", prefix, "suggester10", "suggester9", "suggester8", "suggester7", "suggester6");
+    }
+
+
     public static boolean isReservedChar(char c) {
         switch (c) {
             case '\u001F':

+ 44 - 0
core/src/test/java/org/elasticsearch/search/suggest/ContextCompletionSuggestSearchIT.java

@@ -639,6 +639,50 @@ public class ContextCompletionSuggestSearchIT extends ESIntegTestCase {
         assertEquals("Hotel Amsterdam in Berlin", searchResponse.getSuggest().getSuggestion(suggestionName).iterator().next().getOptions().iterator().next().getText().string());
     }
 
+    public void testSkipDuplicatesWithContexts() throws Exception {
+        LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<>();
+        map.put("type", ContextBuilder.category("type").field("type").build());
+        map.put("cat", ContextBuilder.category("cat").field("cat").build());
+        final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map);
+        createIndexAndMapping(mapping);
+        int numDocs = randomIntBetween(10, 100);
+        int numUnique = randomIntBetween(1, numDocs);
+        List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>();
+        for (int i = 0; i < numDocs; i++) {
+            int id = i % numUnique;
+            XContentBuilder source = jsonBuilder()
+                .startObject()
+                    .startObject(FIELD)
+                        .field("input", "suggestion" + id)
+                        .field("weight", id)
+                    .endObject()
+                    .field("cat", "cat" + id % 2)
+                    .field("type", "type" + id)
+                .endObject();
+            indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i)
+                .setSource(source));
+        }
+        String[] expected = new String[numUnique];
+        for (int i = 0; i < numUnique; i++) {
+            expected[i] = "suggestion" + (numUnique-1-i);
+        }
+        indexRandom(true, indexRequestBuilders);
+        CompletionSuggestionBuilder completionSuggestionBuilder =
+            SuggestBuilders.completionSuggestion(FIELD).prefix("sugg").skipDuplicates(true).size(numUnique);
+
+        assertSuggestions("suggestions", completionSuggestionBuilder, expected);
+
+        Map<String, List<? extends ToXContent>> contextMap = new HashMap<>();
+        contextMap.put("cat", Arrays.asList(CategoryQueryContext.builder().setCategory("cat0").build()));
+        completionSuggestionBuilder =
+            SuggestBuilders.completionSuggestion(FIELD).prefix("sugg").contexts(contextMap).skipDuplicates(true).size(numUnique);
+
+        String[] expectedModulo = Arrays.stream(expected)
+            .filter((s) -> Integer.parseInt(s.substring("suggestion".length())) % 2 == 0)
+            .toArray(String[]::new);
+        assertSuggestions("suggestions", completionSuggestionBuilder, expectedModulo);
+    }
+
     public void assertSuggestions(String suggestionName, SuggestionBuilder suggestBuilder, String... suggestions) {
         SearchResponse searchResponse = client().prepareSearch(INDEX).suggest(
             new SuggestBuilder().addSuggestion(suggestionName, suggestBuilder)

+ 2 - 2
core/src/test/java/org/elasticsearch/search/suggest/SuggestTests.java

@@ -139,7 +139,7 @@ public class SuggestTests extends ESTestCase {
 
     public void testFilter() throws Exception {
         List<Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>>> suggestions;
-        CompletionSuggestion completionSuggestion = new CompletionSuggestion(randomAlphaOfLength(10), 2);
+        CompletionSuggestion completionSuggestion = new CompletionSuggestion(randomAlphaOfLength(10), 2, false);
         PhraseSuggestion phraseSuggestion = new PhraseSuggestion(randomAlphaOfLength(10), 2);
         TermSuggestion termSuggestion = new TermSuggestion(randomAlphaOfLength(10), 2, SortBy.SCORE);
         suggestions = Arrays.asList(completionSuggestion, phraseSuggestion, termSuggestion);
@@ -160,7 +160,7 @@ public class SuggestTests extends ESTestCase {
         suggestions = new ArrayList<>();
         int n = randomIntBetween(2, 5);
         for (int i = 0; i < n; i++) {
-            suggestions.add(new CompletionSuggestion(randomAlphaOfLength(10), randomIntBetween(3, 5)));
+            suggestions.add(new CompletionSuggestion(randomAlphaOfLength(10), randomIntBetween(3, 5), false));
         }
         Collections.shuffle(suggestions, random());
         Suggest suggest = new Suggest(suggestions);

+ 3 - 3
core/src/test/java/org/elasticsearch/search/suggest/SuggestionTests.java

@@ -79,7 +79,7 @@ public class SuggestionTests extends ESTestCase {
             suggestion = new PhraseSuggestion(name, size);
             entrySupplier = () -> SuggestionEntryTests.createTestItem(PhraseSuggestion.Entry.class);
         } else if (type == CompletionSuggestion.class) {
-            suggestion = new CompletionSuggestion(name, size);
+            suggestion = new CompletionSuggestion(name, size, randomBoolean());
             entrySupplier = () -> SuggestionEntryTests.createTestItem(CompletionSuggestion.Entry.class);
         } else {
             throw new UnsupportedOperationException("type not supported [" + type + "]");
@@ -249,7 +249,7 @@ public class SuggestionTests extends ESTestCase {
             CompletionSuggestion.Entry.Option option = new CompletionSuggestion.Entry.Option(1, new Text("someText"), 1.3f, contexts);
             CompletionSuggestion.Entry entry = new CompletionSuggestion.Entry(new Text("entryText"), 42, 313);
             entry.addOption(option);
-            CompletionSuggestion suggestion = new CompletionSuggestion("suggestionName", 5);
+            CompletionSuggestion suggestion = new CompletionSuggestion("suggestionName", 5, randomBoolean());
             suggestion.addTerm(entry);
             BytesReference xContent = toXContent(suggestion, XContentType.JSON, params, randomBoolean());
             assertEquals(
@@ -265,4 +265,4 @@ public class SuggestionTests extends ESTestCase {
                     + "}]}", xContent.utf8ToString());
         }
     }
-}
+}

+ 6 - 1
core/src/test/java/org/elasticsearch/search/suggest/completion/CompletionSuggesterBuilderTests.java

@@ -114,6 +114,7 @@ public class CompletionSuggesterBuilderTests extends AbstractSuggestionBuilderTe
             contextMap.put(geoQueryContextName, contexts);
         }
         testBuilder.contexts(contextMap);
+        testBuilder.skipDuplicates(randomBoolean());
         return testBuilder;
     }
 
@@ -128,7 +129,7 @@ public class CompletionSuggesterBuilderTests extends AbstractSuggestionBuilderTe
 
     @Override
     protected void mutateSpecificParameters(CompletionSuggestionBuilder builder) throws IOException {
-        switch (randomIntBetween(0, 4)) {
+        switch (randomIntBetween(0, 5)) {
             case 0:
                 int nCatContext = randomIntBetween(1, 5);
                 List<CategoryQueryContext> contexts = new ArrayList<>(nCatContext);
@@ -154,6 +155,9 @@ public class CompletionSuggesterBuilderTests extends AbstractSuggestionBuilderTe
             case 4:
                 builder.regex(randomAlphaOfLength(10), RegexOptionsTests.randomRegexOptions());
                 break;
+            case 5:
+                builder.skipDuplicates(!builder.skipDuplicates);
+                break;
             default:
                 throw new IllegalStateException("should not through");
         }
@@ -182,5 +186,6 @@ public class CompletionSuggesterBuilderTests extends AbstractSuggestionBuilderTe
             assertEquals(parsedContextBytes.get(contextName), queryContexts.get(contextName));
         }
         assertEquals(builder.regexOptions, completionSuggestionCtx.getRegexOptions());
+        assertEquals(builder.skipDuplicates, completionSuggestionCtx.isSkipDuplicates());
     }
 }

+ 2 - 1
core/src/test/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionTests.java

@@ -24,6 +24,7 @@ import org.elasticsearch.search.suggest.Suggest;
 import org.elasticsearch.test.ESTestCase;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 
@@ -38,7 +39,7 @@ public class CompletionSuggestionTests extends ESTestCase {
         String name = randomAlphaOfLength(10);
         int size = randomIntBetween(3, 5);
         for (int i = 0; i < nShards; i++) {
-            CompletionSuggestion suggestion = new CompletionSuggestion(name, size);
+            CompletionSuggestion suggestion = new CompletionSuggestion(name, size, false);
             suggestion.addTerm(new CompletionSuggestion.Entry(new Text(""), 0, 0));
             shardSuggestions.add(suggestion);
         }

+ 28 - 0
docs/reference/search/suggesters/completion-suggest.asciidoc

@@ -277,6 +277,7 @@ The basic completion suggester query supports the following parameters:
 
 `field`:: The name of the field on which to run the query (required).
 `size`::  The number of suggestions to return (defaults to `5`).
+`skip_duplicates`:: Whether duplicate suggestions should be filtered out (defaults to `false`).
 
 NOTE: The completion suggester considers all documents in the index.
 See <<suggester-context>> for an explanation of how to query a subset of
@@ -291,6 +292,33 @@ index completions into a single shard index. In case of high heap usage due to
 shard size, it is still recommended to break index into multiple shards instead
 of optimizing for completion performance.
 
+[[skip_duplicates]]
+==== Skip duplicate suggestions
+
+Queries can return duplicate suggestions coming from different documents.
+It is possible to modify this behavior by setting `skip_duplicates` to true.
+When set, this option filters out documents with duplicate suggestions from the result.
+
+[source,js]
+--------------------------------------------------
+POST music/_search?pretty
+{
+    "suggest": {
+        "song-suggest" : {
+            "prefix" : "nor",
+            "completion" : {
+                "field" : "suggest",
+                "skip_duplicates": true
+            }
+        }
+    }
+}
+--------------------------------------------------
+// CONSOLE
+
+WARNING: when set to true this option can slow down search because more suggestions
+need to be visited to find the top N.
+
 [[fuzzy]]
 ==== Fuzzy queries
 

+ 39 - 0
rest-api-spec/src/main/resources/rest-api-spec/test/suggest/20_completion.yml

@@ -291,3 +291,42 @@ setup:
   - match:  { suggest.result.0.options.1._type: "test" }
   - match:  { suggest.result.0.options.1._source.title: "title_bar" }
   - match:  { suggest.result.0.options.1._source.count: 4 }
+
+---
+"Skip duplicates should work":
+  - skip:
+      version: " - 6.99.99"
+      reason: skip_duplicates was added in 7.0 (TODO should be backported to 6.1)
+
+  - do:
+      index:
+        index: test
+        type:  test
+        id:    1
+        body:
+          suggest_1: "bar"
+
+  - do:
+      index:
+        index: test
+        type:  test
+        id:    2
+        body:
+          suggest_1: "bar"
+
+  - do:
+      indices.refresh: {}
+
+  - do:
+      search:
+        body:
+          suggest:
+            result:
+              text: "b"
+              completion:
+                field: suggest_1
+                skip_duplicates: true
+
+  - length: { suggest.result: 1  }
+  - length: { suggest.result.0.options: 1  }
+  - match:  { suggest.result.0.options.0.text: "bar" }

+ 73 - 1
rest-api-spec/src/main/resources/rest-api-spec/test/suggest/30_context.yml

@@ -276,4 +276,76 @@ setup:
 
   - length: { suggest.result: 1  }
   - length: { suggest.result.0.options: 1  }
-  - match:  { suggest.result.0.options.0.text: "Marriot in Berlin" }
+  - match:  { suggest.result.0.options.0.text: "Marriot in Berlin"  }
+
+---
+"Skip duplicates with contexts should work":
+  - skip:
+      version: " - 6.99.99"
+      reason: skip_duplicates was added in 7.0 (TODO should be backported to 6.1)
+
+  - do:
+      index:
+        index: test
+        type:  test
+        id:    1
+        body:
+          suggest_context:
+            input: "foo"
+            contexts:
+              color: "red"
+
+  - do:
+      index:
+        index: test
+        type:  test
+        id:    1
+        body:
+          suggest_context:
+            input: "foo"
+            contexts:
+              color: "red"
+
+  - do:
+      index:
+        index: test
+        type:  test
+        id:    2
+        body:
+          suggest_context:
+            input: "foo"
+            contexts:
+              color: "blue"
+
+  - do:
+      indices.refresh: {}
+
+  - do:
+      search:
+        body:
+          suggest:
+            result:
+              text: "foo"
+              completion:
+                field: suggest_context
+                skip_duplicates: true
+                contexts:
+                  color: "red"
+
+  - length: { suggest.result: 1  }
+  - length: { suggest.result.0.options: 1  }
+  - match:  { suggest.result.0.options.0.text: "foo" }
+
+  - do:
+       search:
+        body:
+          suggest:
+            result:
+              text: "foo"
+              completion:
+                skip_duplicates: true
+                field: suggest_context
+
+  - length: { suggest.result: 1  }
+  - length: { suggest.result.0.options: 1  }
+  - match:  { suggest.result.0.options.0.text: "foo" }