瀏覽代碼

MLT Query: Support for ignore docs

Adds a `ignore_like` parameter to the MLT Query, which simply tells the
algorithm to skip all the terms from the given documents. This could be useful
in order to better guide nearest neighbor search by telling the algorithm to
never explore the space spanned by the given `ignore_like` docs. In essence we
are interested about the characteristic of a given item, but not of the ones
provided by `ignore_like`, thereby forcing the algorithm to go deeper in its
selection of terms. Note that this is different than simply performing a must
not boolean query on the unliked items. The syntax is exactly the same as the
`like` parameter.

Closes #8674
Alex Ksikes 11 年之前
父節點
當前提交
256712640f

+ 5 - 0
docs/reference/query-dsl/queries/mlt-query.asciidoc

@@ -129,6 +129,11 @@ The text is fetched from `fields` unless specified otherwise in each `doc`.
 The text is analyzed by the default analyzer at the field, unless specified by the
 `per_field_analyzer` parameter of the <<docs-termvectors-per-field-analyzer,Term Vectors API>>.
 
+|`ignore_like`|coming[2.0] The `ignore_like` parameter is used to skip terms
+from the documents specified by `like`.  In other words, we could ask for
+documents `like: "Apple"`, but `ignore_like: "cake crumble tree"`. Follows the
+same syntax as `like`.
+
 |`include` |When using `like` with document requests, specifies whether the documents should be
 included from the search. Defaults to `false`.
 

+ 56 - 0
rest-api-spec/test/mlt/30_ignore.yaml

@@ -0,0 +1,56 @@
+---
+"Basic mlt query with ignore like":
+  - do:
+      indices.create:
+        index: test_1
+        body:
+          settings:
+            number_of_replicas: 0
+  - do:
+      index:
+          index:  test_1
+          type:   test
+          id:     1
+          body:   { foo: bar baz selected }
+
+  - do:
+      index:
+          index:  test_1
+          type:   test
+          id:     2
+          body:   { foo: bar }
+
+  - do:
+      index:
+          index:  test_1
+          type:   test
+          id:     3
+          body:   { foo: bar baz }
+
+  - do:
+      indices.refresh: {}
+
+  - do:
+      cluster.health:
+           wait_for_status: green
+
+  - do:
+      search:
+          index:   test_1
+          type:    test
+          body:
+            query:
+              more_like_this:
+                like:
+                    _index: test_1
+                    _type: test
+                    _id: 1
+                ignore_like:
+                    _index: test_1
+                    _type: test
+                    _id: 3
+                include: true
+                min_doc_freq: 0
+                min_term_freq: 0
+
+  - match: { hits.total: 1 }

+ 4 - 0
src/main/java/org/elasticsearch/action/termvectors/MultiTermVectorsRequest.java

@@ -161,4 +161,8 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque
             this.ids.add(id.replaceAll("\\s", ""));
         }
     }
+
+    public int size() {
+        return requests.size();
+    }
 }

+ 54 - 2
src/main/java/org/elasticsearch/common/lucene/search/MoreLikeThisQuery.java

@@ -20,20 +20,23 @@
 package org.elasticsearch.common.lucene.search;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.index.*;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.search.similarities.TFIDFSimilarity;
+import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.io.FastStringReader;
 
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Arrays;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
@@ -48,6 +51,8 @@ public class MoreLikeThisQuery extends Query {
 
     private String[] likeText;
     private Fields[] likeFields;
+    private String[] ignoreText;
+    private Fields[] ignoreFields;
     private String[] moreLikeFields;
     private Analyzer analyzer;
     private String minimumShouldMatch = DEFAULT_MINIMUM_SHOULD_MATCH;
@@ -150,6 +155,10 @@ public class MoreLikeThisQuery extends Query {
         mlt.setBoost(boostTerms);
         mlt.setBoostFactor(boostTermsFactor);
 
+        if (this.ignoreText != null || this.ignoreFields != null) {
+            handleSkipTerms(mlt, this.ignoreText, this.ignoreFields);
+        }
+
         BooleanQuery bq = new BooleanQuery();
         if (this.likeFields != null) {
             Query mltQuery = mlt.like(this.likeFields);
@@ -171,6 +180,41 @@ public class MoreLikeThisQuery extends Query {
         return bq;
     }
 
+    private void handleSkipTerms(XMoreLikeThis mlt, String[] ignoreText, Fields[] ignoreFields) throws IOException {
+        Set<Term> skipTerms = new HashSet<>();
+        // handle like text
+        if (ignoreText != null) {
+            for (String text : ignoreText) {
+                // only use the first field to be consistent
+                String fieldName = moreLikeFields[0];
+                try (TokenStream ts = analyzer.tokenStream(fieldName, text)) {
+                    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+                    ts.reset();
+                    while (ts.incrementToken()) {
+                        skipTerms.add(new Term(fieldName, termAtt.toString()));
+                    }
+                    ts.end();
+                }
+            }
+        }
+        // handle like fields
+        if (ignoreFields != null) {
+            for (Fields fields : ignoreFields) {
+                for (String fieldName : fields) {
+                    Terms terms = fields.terms(fieldName);
+                    final TermsEnum termsEnum = terms.iterator(null);
+                    BytesRef text;
+                    while ((text = termsEnum.next()) != null) {
+                        skipTerms.add(new Term(fieldName, text.utf8ToString()));
+                    }
+                }
+            }
+        }
+        if (!skipTerms.isEmpty()) {
+            mlt.setSkipTerms(skipTerms);
+        }
+    }
+
     @Override
     public String toString(String field) {
         return "like:" + Arrays.toString(likeText);
@@ -204,6 +248,14 @@ public class MoreLikeThisQuery extends Query {
         setLikeText(likeText.toArray(Strings.EMPTY_ARRAY));
     }
 
+    public void setIgnoreText(Fields... ignoreFields) {
+        this.ignoreFields = ignoreFields;
+    }
+
+    public void setIgnoreText(List<String> ignoreText) {
+        this.ignoreText = ignoreText.toArray(Strings.EMPTY_ARRAY);
+    }
+
     public String[] getMoreLikeFields() {
         return moreLikeFields;
     }

+ 40 - 2
src/main/java/org/elasticsearch/common/lucene/search/XMoreLikeThis.java

@@ -48,6 +48,7 @@ import org.apache.lucene.search.similarities.TFIDFSimilarity;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRefBuilder;
 import org.apache.lucene.util.PriorityQueue;
+import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.io.FastStringReader;
 
 import java.io.IOException;
@@ -267,6 +268,12 @@ public final class XMoreLikeThis {
      */
     private boolean boost = DEFAULT_BOOST;
 
+    /**
+     * Current set of skip terms.
+     */
+    private Set<Term> skipTerms = null;
+
+
     /**
      * Field name we'll analyze.
      */
@@ -326,6 +333,13 @@ public final class XMoreLikeThis {
         this.boostFactor = boostFactor;
     }
 
+    /**
+     * Sets a list of terms to never select from
+     */
+    public void setSkipTerms(Set<Term> skipTerms) {
+        this.skipTerms = skipTerms;
+    }
+
     /**
      * Constructor requiring an IndexReader.
      */
@@ -646,7 +660,7 @@ public final class XMoreLikeThis {
             for (Fields fields : likeFields) {
                 Terms vector = fields.terms(fieldName);
                 if (vector != null) {
-                    addTermFrequencies(termFreqMap, vector);
+                    addTermFrequencies(termFreqMap, vector, fieldName);
                 }
             }
             addToQuery(createQueue(termFreqMap, fieldName), bq);
@@ -803,7 +817,7 @@ public final class XMoreLikeThis {
                     }
                 }
             } else {
-                addTermFrequencies(termFreqMap, vector);
+                addTermFrequencies(termFreqMap, vector, fieldName);
             }
         }
 
@@ -817,6 +831,17 @@ public final class XMoreLikeThis {
      * @param vector List of terms and their frequencies for a doc/field
      */
     private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException {
+        addTermFrequencies(termFreqMap, vector, null);
+    }
+
+    /**
+     * Adds terms and frequencies found in vector into the Map termFreqMap
+     *
+     * @param termFreqMap a Map of terms and their frequencies
+     * @param vector List of terms and their frequencies for a doc/field
+     * @param fieldName Optional field name of the terms for skip terms
+     */
+    private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName) throws IOException {
         final TermsEnum termsEnum = vector.iterator(null);
         final CharsRefBuilder spare = new CharsRefBuilder();
         BytesRef text;
@@ -826,6 +851,9 @@ public final class XMoreLikeThis {
             if (isNoiseWord(term)) {
                 continue;
             }
+            if (isSkipTerm(fieldName, term)) {
+                continue;
+            }
 
             DocsEnum docs = termsEnum.docs(null, null);
             final int freq = docs.freq();
@@ -869,6 +897,9 @@ public final class XMoreLikeThis {
                 if (isNoiseWord(word)) {
                     continue;
                 }
+                if (isSkipTerm(fieldName, word)) {
+                    continue;
+                }
 
                 // increment frequency
                 Int cnt = termFreqMap.get(word);
@@ -900,6 +931,13 @@ public final class XMoreLikeThis {
         return stopWords != null && stopWords.contains(term);
     }
 
+    /**
+     * determines if the passed term is to be skipped all together
+     */
+    private boolean isSkipTerm(@Nullable String field, String value) {
+        return field != null && skipTerms != null && skipTerms.contains(new Term(field, value));
+    }
+
 
     /**
      * Find words for a more-like-this query former.

+ 42 - 8
src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java

@@ -19,10 +19,7 @@
 
 package org.elasticsearch.index.query;
 
-import com.google.common.collect.Lists;
-import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ElasticsearchIllegalArgumentException;
-import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.action.get.MultiGetRequest;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.bytes.BytesReference;
@@ -47,6 +44,8 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
      * A single get item. Pure delegate to multi get.
      */
     public static final class Item extends MultiGetRequest.Item implements ToXContent {
+        public static final Item[] EMPTY_ARRAY = new Item[0];
+
         private BytesReference doc;
         private String likeText;
 
@@ -132,6 +131,7 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
 
     private final String[] fields;
     private List<Item> docs = new ArrayList<>();
+    private List<Item> ignoreDocs = new ArrayList<>();
     private Boolean include = null;
     private String minimumShouldMatch = null;
     private int minTermFreq = -1;
@@ -163,11 +163,21 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
         this.fields = fields;
     }
 
+    /**
+     * Sets the documents to use in order to find documents that are "like" this.
+     *
+     * @param docs the documents to use when generating the 'More Like This' query.
+     */
     public MoreLikeThisQueryBuilder like(Item... docs) {
         this.docs = Arrays.asList(docs);
         return this;
     }
 
+    /**
+     * Sets the text to use in order to find documents that are "like" this.
+     *
+     * @param likeText the text to use when generating the 'More Like This' query.
+     */
     public MoreLikeThisQueryBuilder like(String... likeText) {
         this.docs = new ArrayList<>();
         for (String text : likeText) {
@@ -176,11 +186,36 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
         return this;
     }
 
+    /**
+     * Sets the documents from which the terms should not be selected from.
+     */
+    public MoreLikeThisQueryBuilder ignoreLike(Item... docs) {
+        this.ignoreDocs = Arrays.asList(docs);
+        return this;
+    }
+
+    /**
+     * Sets the text from which the terms should not be selected from.
+     */
+    public MoreLikeThisQueryBuilder ignoreLike(String... likeText) {
+        this.ignoreDocs = new ArrayList<>();
+        for (String text : likeText) {
+            this.ignoreDocs.add(new Item(text));
+        }
+        return this;
+    }
+
+    /**
+     * Adds a document to use in order to find documents that are "like" this.
+     */
     public MoreLikeThisQueryBuilder addItem(Item item) {
         this.docs.add(item);
         return this;
     }
 
+    /**
+     * Adds some text to use in order to find documents that are "like" this.
+     */
     public MoreLikeThisQueryBuilder addLikeText(String likeText) {
         this.docs.add(new Item(likeText));
         return this;
@@ -349,11 +384,10 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
         if (this.docs.isEmpty()) {
             throw new ElasticsearchIllegalArgumentException("more_like_this requires '" + likeFieldName + "' to be provided");
         } else {
-            if (docs.size() == 1) {
-                builder.field(likeFieldName, docs);
-            } else {
-                builder.array(likeFieldName, docs);
-            }
+            builder.field(likeFieldName, docs);
+        }
+        if (!ignoreDocs.isEmpty()) {
+            builder.field(MoreLikeThisQueryParser.Fields.LIKE.getPreferredName(), ignoreDocs);
         }
         if (minimumShouldMatch != null) {
             builder.field(MoreLikeThisQueryParser.Fields.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch);

+ 46 - 12
src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java

@@ -30,6 +30,7 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.ElasticsearchIllegalArgumentException;
 import org.elasticsearch.action.termvectors.MultiTermVectorsRequest;
+import org.elasticsearch.action.termvectors.MultiTermVectorsResponse;
 import org.elasticsearch.action.termvectors.TermVectorsRequest;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.ParseField;
@@ -73,6 +74,7 @@ public class MoreLikeThisQueryParser implements QueryParser {
         public static final ParseField DOCUMENT_IDS = new ParseField("ids").withAllDeprecated("like");
         public static final ParseField DOCUMENTS = new ParseField("docs").withAllDeprecated("like");
         public static final ParseField LIKE = new ParseField("like");
+        public static final ParseField IGNORE_LIKE = new ParseField("ignore_like");
         public static final ParseField INCLUDE = new ParseField("include");
     }
 
@@ -106,7 +108,10 @@ public class MoreLikeThisQueryParser implements QueryParser {
         String currentFieldName = null;
 
         List<String> likeTexts = new ArrayList<>();
-        MultiTermVectorsRequest items = new MultiTermVectorsRequest();
+        MultiTermVectorsRequest likeItems = new MultiTermVectorsRequest();
+
+        List<String> ignoreTexts = new ArrayList<>();
+        MultiTermVectorsRequest ignoreItems = new MultiTermVectorsRequest();
 
         while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
             if (token == XContentParser.Token.FIELD_NAME) {
@@ -115,7 +120,9 @@ public class MoreLikeThisQueryParser implements QueryParser {
                 if (Fields.LIKE_TEXT.match(currentFieldName, parseContext.parseFlags())) {
                     likeTexts.add(parser.text());
                 } else if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
-                    parseLikeField(parser, likeTexts, items);
+                    parseLikeField(parser, likeTexts, likeItems);
+                } else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) {
+                    parseLikeField(parser, ignoreTexts, ignoreItems);
                 } else if (Fields.MIN_TERM_FREQ.match(currentFieldName, parseContext.parseFlags())) {
                     mltQuery.setMinTermFrequency(parser.intValue());
                 } else if (Fields.MAX_QUERY_TERMS.match(currentFieldName, parseContext.parseFlags())) {
@@ -168,33 +175,40 @@ public class MoreLikeThisQueryParser implements QueryParser {
                         if (!token.isValue()) {
                             throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids");
                         }
-                        items.add(newTermVectorsRequest().id(parser.text()));
+                        likeItems.add(newTermVectorsRequest().id(parser.text()));
                     }
                 } else if (Fields.DOCUMENTS.match(currentFieldName, parseContext.parseFlags())) {
                     while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                         if (token != XContentParser.Token.START_OBJECT) {
                             throw new ElasticsearchIllegalArgumentException("docs array element should include an object");
                         }
-                        items.add(parseDocument(parser));
+                        likeItems.add(parseDocument(parser));
                     }
                 } else if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
                     while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
-                        parseLikeField(parser, likeTexts, items);
+                        parseLikeField(parser, likeTexts, likeItems);
+                    }
+                } else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) {
+                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
+                        parseLikeField(parser, ignoreTexts, ignoreItems);
                     }
                 } else {
                     throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]");
                 }
             } else if (token == XContentParser.Token.START_OBJECT) {
                 if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
-                    parseLikeField(parser, likeTexts, items);
+                    parseLikeField(parser, likeTexts, likeItems);
+                }
+                else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) {
+                    parseLikeField(parser, ignoreTexts, ignoreItems);
                 } else {
                     throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]");
                 }
             }
         }
 
-        if (likeTexts.isEmpty() && items.isEmpty()) {
-            throw new QueryParsingException(parseContext.index(), "more_like_this requires at least 'like_text' or 'ids/docs' to be specified");
+        if (likeTexts.isEmpty() && likeItems.isEmpty()) {
+            throw new QueryParsingException(parseContext.index(), "more_like_this requires 'like' to be specified");
         }
         if (moreLikeFields != null && moreLikeFields.isEmpty()) {
             throw new QueryParsingException(parseContext.index(), "more_like_this requires 'fields' to be non-empty");
@@ -227,10 +241,18 @@ public class MoreLikeThisQueryParser implements QueryParser {
         if (!likeTexts.isEmpty()) {
             mltQuery.setLikeText(likeTexts);
         }
+        if (!ignoreTexts.isEmpty()) {
+            mltQuery.setIgnoreText(ignoreTexts);
+        }
 
         // handle items
-        if (!items.isEmpty()) {
+        if (!likeItems.isEmpty()) {
             // set default index, type and fields if not specified
+            MultiTermVectorsRequest items = likeItems;
+            for (TermVectorsRequest item : ignoreItems) {
+                items.add(item);
+            }
+
             for (TermVectorsRequest item : items) {
                 if (item.index() == null) {
                     item.index(parseContext.index().name());
@@ -254,12 +276,24 @@ public class MoreLikeThisQueryParser implements QueryParser {
             }
             // fetching the items with multi-termvectors API
             BooleanQuery boolQuery = new BooleanQuery();
-            org.apache.lucene.index.Fields[] likeFields = fetchService.fetch(items);
-            mltQuery.setLikeText(likeFields);
+            MultiTermVectorsResponse responses = fetchService.fetchResponse(items);
+
+            // getting the Fields for liked items
+            mltQuery.setLikeText(MoreLikeThisFetchService.getFields(responses, likeItems));
+
+            // getting the Fields for ignored items
+            if (!ignoreItems.isEmpty()) {
+                org.apache.lucene.index.Fields[] ignoreFields = MoreLikeThisFetchService.getFields(responses, ignoreItems);
+                if (ignoreFields.length > 0) {
+                    mltQuery.setIgnoreText(ignoreFields);
+                }
+            }
+
             boolQuery.add(mltQuery, BooleanClause.Occur.SHOULD);
+
             // exclude the items from the search
             if (!include) {
-                handleExclude(boolQuery, items);
+                handleExclude(boolQuery, likeItems);
             }
             return boolQuery;
         }

+ 26 - 6
src/main/java/org/elasticsearch/index/search/morelikethis/MoreLikeThisFetchService.java

@@ -20,18 +20,18 @@
 package org.elasticsearch.index.search.morelikethis;
 
 import org.apache.lucene.index.Fields;
-import org.elasticsearch.action.termvectors.MultiTermVectorsItemResponse;
-import org.elasticsearch.action.termvectors.MultiTermVectorsRequest;
-import org.elasticsearch.action.termvectors.MultiTermVectorsResponse;
-import org.elasticsearch.action.termvectors.TermVectorsResponse;
+import org.elasticsearch.action.termvectors.*;
 import org.elasticsearch.client.Client;
 import org.elasticsearch.common.component.AbstractComponent;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 
 /**
  *
@@ -46,10 +46,26 @@ public class MoreLikeThisFetchService extends AbstractComponent {
         this.client = client;
     }
 
-    public Fields[] fetch(MultiTermVectorsRequest request) throws IOException {
+    public Fields[] fetch(MultiTermVectorsRequest requests) throws IOException {
+        return getFields(fetchResponse(requests), requests);
+    }
+
+    public MultiTermVectorsResponse fetchResponse(MultiTermVectorsRequest requests) throws IOException {
+        return client.multiTermVectors(requests).actionGet();
+    }
+
+    public static Fields[] getFields(MultiTermVectorsResponse responses, MultiTermVectorsRequest requests) throws IOException {
         List<Fields> likeFields = new ArrayList<>();
-        MultiTermVectorsResponse responses = client.multiTermVectors(request).actionGet();
+
+        Set<Item> items = new HashSet<>();
+        for (TermVectorsRequest request : requests) {
+            items.add(new Item(request.index(), request.type(), request.id()));
+        }
+
         for (MultiTermVectorsItemResponse response : responses) {
+            if (!hasResponseFromRequest(response, items)) {
+                continue;
+            }
             if (response.isFailed()) {
                 continue;
             }
@@ -61,4 +77,8 @@ public class MoreLikeThisFetchService extends AbstractComponent {
         }
         return likeFields.toArray(Fields.EMPTY_ARRAY);
     }
+
+    private static boolean hasResponseFromRequest(MultiTermVectorsItemResponse response, Set<Item> items) {
+        return items.contains(new Item(response.getIndex(), response.getType(), response.getId()));
+    }
 }

+ 21 - 97
src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java

@@ -21,46 +21,13 @@ package org.elasticsearch.index.query;
 
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
-
 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
-import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.*;
 import org.apache.lucene.index.memory.MemoryIndex;
-import org.apache.lucene.queries.BoostingQuery;
-import org.apache.lucene.queries.ExtendedCommonTermsQuery;
-import org.apache.lucene.queries.FilterClause;
-import org.apache.lucene.queries.TermFilter;
-import org.apache.lucene.queries.TermsFilter;
+import org.apache.lucene.queries.*;
 import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.ConstantScoreQuery;
-import org.apache.lucene.search.DisjunctionMaxQuery;
-import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.FilteredQuery;
-import org.apache.lucene.search.FuzzyQuery;
-import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.MultiTermQuery;
-import org.apache.lucene.search.NumericRangeFilter;
-import org.apache.lucene.search.NumericRangeQuery;
-import org.apache.lucene.search.PrefixFilter;
-import org.apache.lucene.search.PrefixQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.QueryWrapperFilter;
-import org.apache.lucene.search.RegexpQuery;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TermRangeQuery;
-import org.apache.lucene.search.WildcardQuery;
-import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
-import org.apache.lucene.search.spans.SpanFirstQuery;
-import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
-import org.apache.lucene.search.spans.SpanNearQuery;
-import org.apache.lucene.search.spans.SpanNotQuery;
-import org.apache.lucene.search.spans.SpanOrQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.search.*;
+import org.apache.lucene.search.spans.*;
 import org.apache.lucene.spatial.prefix.IntersectsPrefixTreeFilter;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
@@ -69,20 +36,11 @@ import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ElasticsearchIllegalArgumentException;
-import org.elasticsearch.action.termvectors.MultiTermVectorsRequest;
-import org.elasticsearch.action.termvectors.TermVectorsRequest;
+import org.elasticsearch.action.termvectors.*;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.bytes.BytesArray;
 import org.elasticsearch.common.compress.CompressedString;
-import org.elasticsearch.common.lucene.search.AndFilter;
-import org.elasticsearch.common.lucene.search.LimitFilter;
-import org.elasticsearch.common.lucene.search.MatchAllDocsFilter;
-import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
-import org.elasticsearch.common.lucene.search.NotFilter;
-import org.elasticsearch.common.lucene.search.OrFilter;
-import org.elasticsearch.common.lucene.search.Queries;
-import org.elasticsearch.common.lucene.search.RegexpFilter;
-import org.elasticsearch.common.lucene.search.XBooleanFilter;
+import org.elasticsearch.common.lucene.search.*;
 import org.elasticsearch.common.lucene.search.function.BoostScoreFunction;
 import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
 import org.elasticsearch.common.lucene.search.function.WeightFactorFunction;
@@ -112,59 +70,19 @@ import org.junit.Test;
 
 import java.io.IOException;
 import java.lang.reflect.Field;
-import java.util.ArrayList;
+import java.util.EnumSet;
 import java.util.Iterator;
 import java.util.List;
 
 import static org.elasticsearch.common.io.Streams.copyToBytesFromClasspath;
 import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath;
 import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
-import static org.elasticsearch.index.query.FilterBuilders.andFilter;
-import static org.elasticsearch.index.query.FilterBuilders.boolFilter;
-import static org.elasticsearch.index.query.FilterBuilders.notFilter;
-import static org.elasticsearch.index.query.FilterBuilders.orFilter;
-import static org.elasticsearch.index.query.FilterBuilders.prefixFilter;
-import static org.elasticsearch.index.query.FilterBuilders.queryFilter;
-import static org.elasticsearch.index.query.FilterBuilders.rangeFilter;
-import static org.elasticsearch.index.query.FilterBuilders.termFilter;
-import static org.elasticsearch.index.query.FilterBuilders.termsFilter;
-import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
-import static org.elasticsearch.index.query.QueryBuilders.boostingQuery;
-import static org.elasticsearch.index.query.QueryBuilders.constantScoreQuery;
-import static org.elasticsearch.index.query.QueryBuilders.disMaxQuery;
-import static org.elasticsearch.index.query.QueryBuilders.filteredQuery;
-import static org.elasticsearch.index.query.QueryBuilders.functionScoreQuery;
-import static org.elasticsearch.index.query.QueryBuilders.fuzzyLikeThisFieldQuery;
-import static org.elasticsearch.index.query.QueryBuilders.fuzzyLikeThisQuery;
-import static org.elasticsearch.index.query.QueryBuilders.fuzzyQuery;
-import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
-import static org.elasticsearch.index.query.QueryBuilders.moreLikeThisQuery;
-import static org.elasticsearch.index.query.QueryBuilders.prefixQuery;
-import static org.elasticsearch.index.query.QueryBuilders.queryString;
-import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
-import static org.elasticsearch.index.query.QueryBuilders.regexpQuery;
-import static org.elasticsearch.index.query.QueryBuilders.spanFirstQuery;
-import static org.elasticsearch.index.query.QueryBuilders.spanNearQuery;
-import static org.elasticsearch.index.query.QueryBuilders.spanNotQuery;
-import static org.elasticsearch.index.query.QueryBuilders.spanOrQuery;
-import static org.elasticsearch.index.query.QueryBuilders.spanTermQuery;
-import static org.elasticsearch.index.query.QueryBuilders.termQuery;
-import static org.elasticsearch.index.query.QueryBuilders.termsQuery;
-import static org.elasticsearch.index.query.QueryBuilders.wildcardQuery;
-import static org.elasticsearch.index.query.RegexpFlag.COMPLEMENT;
-import static org.elasticsearch.index.query.RegexpFlag.EMPTY;
-import static org.elasticsearch.index.query.RegexpFlag.INTERSECTION;
+import static org.elasticsearch.index.query.FilterBuilders.*;
+import static org.elasticsearch.index.query.QueryBuilders.*;
+import static org.elasticsearch.index.query.RegexpFlag.*;
 import static org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders.factorFunction;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertBooleanSubQuery;
-import static org.hamcrest.Matchers.closeTo;
-import static org.hamcrest.Matchers.containsString;
-import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.instanceOf;
-import static org.hamcrest.Matchers.is;
-import static org.hamcrest.Matchers.not;
-import static org.hamcrest.Matchers.notNullValue;
-import static org.hamcrest.Matchers.nullValue;
-import static org.hamcrest.Matchers.sameInstance;
+import static org.hamcrest.Matchers.*;
 
 /**
  *
@@ -1845,12 +1763,18 @@ public class SimpleIndexQueryParserTests extends ElasticsearchSingleNodeTest {
         }
 
         @Override
-        public Fields[] fetch(MultiTermVectorsRequest items) throws IOException {
-            List<Fields> likeTexts = new ArrayList<>();
+        public MultiTermVectorsResponse fetchResponse(MultiTermVectorsRequest items) throws IOException {
+            MultiTermVectorsItemResponse[] responses = new MultiTermVectorsItemResponse[items.size()];
+            int i = 0;
             for (TermVectorsRequest item : items) {
-                likeTexts.add(generateFields(item.selectedFields().toArray(Strings.EMPTY_ARRAY), item.id()));
+                TermVectorsResponse response = new TermVectorsResponse(item.index(), item.type(), item.id());
+                response.setExists(true);
+                Fields generatedFields = generateFields(item.selectedFields().toArray(Strings.EMPTY_ARRAY), item.id());
+                EnumSet<TermVectorsRequest.Flag> flags = EnumSet.of(TermVectorsRequest.Flag.Positions, TermVectorsRequest.Flag.Offsets);
+                response.setFields(generatedFields, item.selectedFields(), flags, generatedFields);
+                responses[i++] = new MultiTermVectorsItemResponse(response, null);
             }
-            return likeTexts.toArray(Fields.EMPTY_ARRAY);
+            return new MultiTermVectorsResponse(responses);
         }
     }
 

+ 2 - 2
src/test/java/org/elasticsearch/index/query/mlt-items.json

@@ -1,8 +1,8 @@
 {
-    more_like_this:{
+    "more_like_this" : {
         "fields" : ["name.first", "name.last"],
         "like_text": "Apache Lucene",
-        "docs" : [
+        "like" : [
         {
             "_index" : "test",
             "_type" : "person",

+ 51 - 0
src/test/java/org/elasticsearch/mlt/MoreLikeThisActionTests.java

@@ -39,6 +39,7 @@ import org.elasticsearch.search.builder.SearchSourceBuilder;
 import org.elasticsearch.test.ElasticsearchIntegrationTest;
 import org.junit.Test;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
@@ -665,4 +666,54 @@ public class MoreLikeThisActionTests extends ElasticsearchIntegrationTest {
         assertSearchResponse(response);
         assertHitCount(response, 1);
     }
+
+    @Test
+    public void testMoreLikeThisIgnoreLike() throws ExecutionException, InterruptedException, IOException {
+        createIndex("test");
+        ensureGreen();
+
+        int numFields = randomIntBetween(5, 35);
+
+        logger.info("Create a document that has all the fields.");
+        XContentBuilder doc = jsonBuilder().startObject();
+        for (int i = 0; i < numFields; i++) {
+            doc.field("field"+i, i+"");
+        }
+        doc.endObject();
+
+        logger.info("Indexing each field value of this document as a single document.");
+        List<IndexRequestBuilder> builders = new ArrayList<>();
+        for (int i = 0; i < numFields; i++) {
+            builders.add(client().prepareIndex("test", "type1", i+"").setSource("field"+i, i+""));
+        }
+        indexRandom(true, builders);
+
+        logger.info("First check the document matches all indexed docs.");
+        MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery("field0")
+                .like((Item) new Item().doc(doc).index("test").type("type1"))
+                .minTermFreq(0)
+                .minDocFreq(0)
+                .maxQueryTerms(100)
+                .minimumShouldMatch("0%");
+        SearchResponse response = client().prepareSearch("test").setTypes("type1")
+                .setQuery(mltQuery).get();
+        assertSearchResponse(response);
+        assertHitCount(response, numFields);
+
+        logger.info("Now check like this doc, but ignore one doc in the index, then two and so on...");
+        List<Item> docs = new ArrayList<>();
+        for (int i = 0; i < numFields; i++) {
+            docs.add(new Item("test", "type1", i+""));
+            mltQuery = moreLikeThisQuery()
+                    .like((Item) new Item().doc(doc).index("test").type("type1"))
+                    .ignoreLike(docs.toArray(Item.EMPTY_ARRAY))
+                    .minTermFreq(0)
+                    .minDocFreq(0)
+                    .maxQueryTerms(100)
+                    .minimumShouldMatch("0%");
+            response = client().prepareSearch("test").setTypes("type1").setQuery(mltQuery).get();
+            assertSearchResponse(response);
+            assertHitCount(response, numFields - (i + 1));
+        }
+    }
 }