浏览代码

Added new terms_set query

This query returns documents that match with at least one ore more
of the provided terms. The number of terms that must match varies
per document and is either controlled by a minimum should match
field or computed per document in a minimum should match script.

Closes #26915
Martijn van Groningen 8 年之前
父节点
当前提交
d805c41b28

+ 1 - 1
core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java

@@ -391,7 +391,7 @@ public class TermsQueryBuilder extends AbstractQueryBuilder<TermsQueryBuilder> {
                 .queryName(queryName);
     }
 
-    private static List<Object> parseValues(XContentParser parser) throws IOException {
+    static List<Object> parseValues(XContentParser parser) throws IOException {
         List<Object> values = new ArrayList<>();
         while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
             Object value = parser.objectBytes();

+ 369 - 0
core/src/main/java/org/elasticsearch/index/query/TermsSetQueryBuilder.java

@@ -0,0 +1,369 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.index.query;
+
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.CoveringQuery;
+import org.apache.lucene.search.DoubleValues;
+import org.apache.lucene.search.LongValues;
+import org.apache.lucene.search.LongValuesSource;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.ParsingException;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.lucene.BytesRefs;
+import org.elasticsearch.common.lucene.search.Queries;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.fielddata.IndexNumericFieldData;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.script.Script;
+import org.elasticsearch.script.SearchScript;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+public final class TermsSetQueryBuilder extends AbstractQueryBuilder<TermsSetQueryBuilder> {
+
+    public static final String NAME = "terms_set";
+
+    static final ParseField TERMS_FIELD = new ParseField("terms");
+    static final ParseField MINIMUM_SHOULD_MATCH_FIELD = new ParseField("minimum_should_match_field");
+    static final ParseField MINIMUM_SHOULD_MATCH_SCRIPT = new ParseField("minimum_should_match_script");
+
+    private final String fieldName;
+    private final List<?> values;
+
+    private String minimumShouldMatchField;
+    private Script minimumShouldMatchScript;
+
+    public TermsSetQueryBuilder(String fieldName, List<?> values) {
+        this.fieldName = Objects.requireNonNull(fieldName);
+        this.values = TermsQueryBuilder.convert(Objects.requireNonNull(values));
+    }
+
+    public TermsSetQueryBuilder(StreamInput in) throws IOException {
+        super(in);
+        this.fieldName = in.readString();
+        this.values = (List<?>) in.readGenericValue();
+        this.minimumShouldMatchField = in.readOptionalString();
+        this.minimumShouldMatchScript = in.readOptionalWriteable(Script::new);
+    }
+
+    @Override
+    protected void doWriteTo(StreamOutput out) throws IOException {
+        out.writeString(fieldName);
+        out.writeGenericValue(values);
+        out.writeOptionalString(minimumShouldMatchField);
+        out.writeOptionalWriteable(minimumShouldMatchScript);
+    }
+
+    public List<?> getValues() {
+        return values;
+    }
+
+    public String getMinimumShouldMatchField() {
+        return minimumShouldMatchField;
+    }
+
+    public TermsSetQueryBuilder setMinimumShouldMatchField(String minimumShouldMatchField) {
+        if (minimumShouldMatchScript != null) {
+            throw new IllegalArgumentException("A script has already been specified. Cannot specify both a field and script");
+        }
+        this.minimumShouldMatchField = minimumShouldMatchField;
+        return this;
+    }
+
+    public Script getMinimumShouldMatchScript() {
+        return minimumShouldMatchScript;
+    }
+
+    public TermsSetQueryBuilder setMinimumShouldMatchScript(Script minimumShouldMatchScript) {
+        if (minimumShouldMatchField != null) {
+            throw new IllegalArgumentException("A field has already been specified. Cannot specify both a field and script");
+        }
+        this.minimumShouldMatchScript = minimumShouldMatchScript;
+        return this;
+    }
+
+    @Override
+    protected boolean doEquals(TermsSetQueryBuilder other) {
+        return Objects.equals(fieldName, this.fieldName) && Objects.equals(values, this.values) &&
+                Objects.equals(minimumShouldMatchField, this.minimumShouldMatchField) &&
+                Objects.equals(minimumShouldMatchScript, this.minimumShouldMatchScript);
+    }
+
+    @Override
+    protected int doHashCode() {
+        return Objects.hash(fieldName, values, minimumShouldMatchField, minimumShouldMatchScript);
+    }
+
+    @Override
+    public String getWriteableName() {
+        return NAME;
+    }
+
+    @Override
+    protected void doXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.startObject(NAME);
+        builder.startObject(fieldName);
+        builder.field(TERMS_FIELD.getPreferredName(), TermsQueryBuilder.convertBack(values));
+        if (minimumShouldMatchField != null) {
+            builder.field(MINIMUM_SHOULD_MATCH_FIELD.getPreferredName(), minimumShouldMatchField);
+        }
+        if (minimumShouldMatchScript != null) {
+            builder.field(MINIMUM_SHOULD_MATCH_SCRIPT.getPreferredName(), minimumShouldMatchScript);
+        }
+        printBoostAndQueryName(builder);
+        builder.endObject();
+        builder.endObject();
+    }
+
+    public static TermsSetQueryBuilder fromXContent(XContentParser parser) throws IOException {
+        XContentParser.Token token = parser.nextToken();
+        if (token != XContentParser.Token.FIELD_NAME) {
+            throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "]");
+        }
+        String currentFieldName = parser.currentName();
+        String fieldName = currentFieldName;
+
+        token = parser.nextToken();
+        if (token != XContentParser.Token.START_OBJECT) {
+            throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "]");
+        }
+
+        List<Object> values = new ArrayList<>();
+        String minimumShouldMatchField = null;
+        Script minimumShouldMatchScript = null;
+        String queryName = null;
+        float boost = AbstractQueryBuilder.DEFAULT_BOOST;
+
+        while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+            if (token == XContentParser.Token.FIELD_NAME) {
+                currentFieldName = parser.currentName();
+            } else if (token == XContentParser.Token.START_ARRAY) {
+                if (TERMS_FIELD.match(currentFieldName)) {
+                    values = TermsQueryBuilder.parseValues(parser);
+                } else {
+                    throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] query does not support ["
+                            + currentFieldName + "]");
+                }
+            } else if (token == XContentParser.Token.START_OBJECT) {
+                if (MINIMUM_SHOULD_MATCH_SCRIPT.match(currentFieldName)) {
+                    minimumShouldMatchScript = Script.parse(parser);
+                } else {
+                    throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] query does not support ["
+                            + currentFieldName + "]");
+                }
+            } else if (token.isValue()) {
+                if (MINIMUM_SHOULD_MATCH_FIELD.match(currentFieldName)) {
+                    minimumShouldMatchField = parser.text();
+                } else if (AbstractQueryBuilder.BOOST_FIELD.match(currentFieldName)) {
+                    boost = parser.floatValue();
+                } else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName)) {
+                    queryName = parser.text();
+                } else {
+                    throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] query does not support ["
+                            + currentFieldName + "]");
+                }
+            } else {
+                throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token +
+                        "] after [" + currentFieldName + "]");
+            }
+        }
+
+        token = parser.nextToken();
+        if (token != XContentParser.Token.END_OBJECT) {
+            throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "]");
+        }
+
+        TermsSetQueryBuilder queryBuilder = new TermsSetQueryBuilder(fieldName, values)
+                .queryName(queryName).boost(boost);
+        if (minimumShouldMatchField != null) {
+            queryBuilder.setMinimumShouldMatchField(minimumShouldMatchField);
+        }
+        if (minimumShouldMatchScript != null) {
+            queryBuilder.setMinimumShouldMatchScript(minimumShouldMatchScript);
+        }
+        return queryBuilder;
+    }
+
+    @Override
+    protected Query doToQuery(QueryShardContext context) throws IOException {
+        if (values.isEmpty()) {
+            return Queries.newMatchNoDocsQuery("No terms supplied for \"" + getName() + "\" query.");
+        }
+        // Fail before we attempt to create the term queries:
+        if (values.size() > BooleanQuery.getMaxClauseCount()) {
+            throw new BooleanQuery.TooManyClauses();
+        }
+
+        final MappedFieldType fieldType = context.fieldMapper(fieldName);
+        final List<Query> queries = new ArrayList<>(values.size());
+        for (Object value : values) {
+            if (fieldType != null) {
+                queries.add(fieldType.termQuery(value, context));
+            } else {
+                queries.add(new TermQuery(new Term(fieldName, BytesRefs.toBytesRef(value))));
+            }
+        }
+        final LongValuesSource longValuesSource;
+        if (minimumShouldMatchField != null) {
+            MappedFieldType msmFieldType = context.fieldMapper(minimumShouldMatchField);
+            if (msmFieldType == null) {
+                throw new QueryShardException(context, "failed to find minimum_should_match field [" + minimumShouldMatchField + "]");
+            }
+
+            IndexNumericFieldData fieldData = context.getForField(msmFieldType);
+            longValuesSource = new FieldValuesSource(fieldData);
+        } else if (minimumShouldMatchScript != null) {
+            SearchScript.Factory factory = context.getScriptService().compile(minimumShouldMatchScript, SearchScript.CONTEXT);
+            Map<String, Object> params = new HashMap<>();
+            params.putAll(minimumShouldMatchScript.getParams());
+            params.put("num_terms", queries.size());
+            SearchScript.LeafFactory leafFactory = factory.newFactory(params, context.lookup());
+            longValuesSource = new ScriptLongValueSource(minimumShouldMatchScript, leafFactory);
+        } else {
+            throw new IllegalStateException("No minimum should match has been specified");
+        }
+        return new CoveringQuery(queries, longValuesSource);
+    }
+
+    static final class ScriptLongValueSource extends LongValuesSource {
+
+        private final Script script;
+        private final SearchScript.LeafFactory leafFactory;
+
+        ScriptLongValueSource(Script script, SearchScript.LeafFactory leafFactory) {
+            this.script = script;
+            this.leafFactory = leafFactory;
+        }
+
+        @Override
+        public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
+            SearchScript searchScript = leafFactory.newInstance(ctx);
+            return new LongValues() {
+                @Override
+                public long longValue() throws IOException {
+                    return searchScript.runAsLong();
+                }
+
+                @Override
+                public boolean advanceExact(int doc) throws IOException {
+                    searchScript.setDocument(doc);
+                    return searchScript.run() != null;
+                }
+            };
+        }
+
+        @Override
+        public boolean needsScores() {
+            return false;
+        }
+
+        @Override
+        public int hashCode() {
+            // CoveringQuery with this field value source cannot be cachable
+            return System.identityHashCode(this);
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            return this == obj;
+        }
+
+        @Override
+        public String toString() {
+            return "script(" + script.toString() + ")";
+        }
+
+    }
+
+    // Forked from LongValuesSource.FieldValuesSource and changed getValues() method to always use sorted numeric
+    // doc values, because that is what is being used in NumberFieldMapper.
+    static class FieldValuesSource extends LongValuesSource {
+
+        private final IndexNumericFieldData field;
+
+        FieldValuesSource(IndexNumericFieldData field) {
+            this.field = field;
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (o == null || getClass() != o.getClass()) return false;
+            FieldValuesSource that = (FieldValuesSource) o;
+            return Objects.equals(field, that.field);
+        }
+
+        @Override
+        public String toString() {
+            return "long(" + field + ")";
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(field);
+        }
+
+        @Override
+        public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
+            SortedNumericDocValues values = field.load(ctx).getLongValues();
+            return new LongValues() {
+
+                long current = -1;
+
+                @Override
+                public long longValue() throws IOException {
+                    return current;
+                }
+
+                @Override
+                public boolean advanceExact(int doc) throws IOException {
+                    boolean hasValue = values.advanceExact(doc);
+                    if (hasValue) {
+                        assert values.docValueCount() == 1;
+                        current = values.nextValue();
+                        return true;
+                    } else {
+                        return false;
+                    }
+                }
+            };
+        }
+
+        @Override
+        public boolean needsScores() {
+            return false;
+        }
+    }
+
+}

+ 2 - 0
core/src/main/java/org/elasticsearch/search/SearchModule.java

@@ -69,6 +69,7 @@ import org.elasticsearch.index.query.SpanTermQueryBuilder;
 import org.elasticsearch.index.query.SpanWithinQueryBuilder;
 import org.elasticsearch.index.query.TermQueryBuilder;
 import org.elasticsearch.index.query.TermsQueryBuilder;
+import org.elasticsearch.index.query.TermsSetQueryBuilder;
 import org.elasticsearch.index.query.TypeQueryBuilder;
 import org.elasticsearch.index.query.WildcardQueryBuilder;
 import org.elasticsearch.index.query.WrapperQueryBuilder;
@@ -748,6 +749,7 @@ public class SearchModule {
         registerQuery(new QuerySpec<>(GeoPolygonQueryBuilder.NAME, GeoPolygonQueryBuilder::new, GeoPolygonQueryBuilder::fromXContent));
         registerQuery(new QuerySpec<>(ExistsQueryBuilder.NAME, ExistsQueryBuilder::new, ExistsQueryBuilder::fromXContent));
         registerQuery(new QuerySpec<>(MatchNoneQueryBuilder.NAME, MatchNoneQueryBuilder::new, MatchNoneQueryBuilder::fromXContent));
+        registerQuery(new QuerySpec<>(TermsSetQueryBuilder.NAME, TermsSetQueryBuilder::new, TermsSetQueryBuilder::fromXContent));
 
         if (ShapesAvailability.JTS_AVAILABLE && ShapesAvailability.SPATIAL4J_AVAILABLE) {
             registerQuery(new QuerySpec<>(GeoShapeQueryBuilder.NAME, GeoShapeQueryBuilder::new, GeoShapeQueryBuilder::fromXContent));

+ 248 - 0
core/src/test/java/org/elasticsearch/index/query/TermsSetQueryBuilderTests.java

@@ -0,0 +1,248 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.index.query;
+
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.NoMergePolicy;
+import org.apache.lucene.search.CoveringQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchNoDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
+import org.elasticsearch.common.compress.CompressedXContent;
+import org.elasticsearch.index.fielddata.ScriptDocValues;
+import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.script.MockScriptEngine;
+import org.elasticsearch.script.MockScriptPlugin;
+import org.elasticsearch.script.Script;
+import org.elasticsearch.script.ScriptType;
+import org.elasticsearch.search.internal.SearchContext;
+import org.elasticsearch.test.AbstractQueryTestCase;
+import org.elasticsearch.test.rest.yaml.ObjectPath;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.instanceOf;
+
+public class TermsSetQueryBuilderTests extends AbstractQueryTestCase<TermsSetQueryBuilder> {
+
+    @Override
+    protected Collection<Class<? extends Plugin>> getPlugins() {
+        return Collections.singleton(CustomScriptPlugin.class);
+    }
+
+    @Override
+    protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
+        String docType = "doc";
+        mapperService.merge(docType, new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef(docType,
+                "m_s_m", "type=long"
+        ).string()), MapperService.MergeReason.MAPPING_UPDATE, false);
+    }
+
+    @Override
+    protected TermsSetQueryBuilder doCreateTestQueryBuilder() {
+        String fieldName;
+        do {
+            fieldName = randomFrom(MAPPED_FIELD_NAMES);
+        } while (fieldName.equals(GEO_POINT_FIELD_NAME) || fieldName.equals(GEO_SHAPE_FIELD_NAME));
+        int numValues = randomIntBetween(0, 10);
+        List<Object> randomTerms = new ArrayList<>(numValues);
+        for (int i = 0; i < numValues; i++) {
+            randomTerms.add(getRandomValueForFieldName(fieldName));
+        }
+        TermsSetQueryBuilder queryBuilder = new TermsSetQueryBuilder(STRING_FIELD_NAME, randomTerms);
+        if (randomBoolean()) {
+            queryBuilder.setMinimumShouldMatchField("m_s_m");
+        } else {
+            queryBuilder.setMinimumShouldMatchScript(
+                    new Script(ScriptType.INLINE, MockScriptEngine.NAME, "_script", Collections.emptyMap()));
+        }
+        return queryBuilder;
+    }
+
+    @Override
+    protected void doAssertLuceneQuery(TermsSetQueryBuilder queryBuilder, Query query, SearchContext context) throws IOException {
+        if (queryBuilder.getValues().isEmpty()) {
+            assertThat(query, instanceOf(MatchNoDocsQuery.class));
+            MatchNoDocsQuery matchNoDocsQuery = (MatchNoDocsQuery) query;
+            assertThat(matchNoDocsQuery.toString(), containsString("No terms supplied for \"terms_set\" query."));
+        } else {
+            assertThat(query, instanceOf(CoveringQuery.class));
+        }
+    }
+
+    @Override
+    protected boolean isCachable(TermsSetQueryBuilder queryBuilder) {
+        return queryBuilder.getMinimumShouldMatchField() != null ||
+                (queryBuilder.getMinimumShouldMatchScript() != null && queryBuilder.getValues().isEmpty());
+    }
+
+    @Override
+    protected boolean builderGeneratesCacheableQueries() {
+        return false;
+    }
+
+    public void testBothFieldAndScriptSpecified() {
+        TermsSetQueryBuilder queryBuilder = new TermsSetQueryBuilder("_field", Collections.emptyList());
+        queryBuilder.setMinimumShouldMatchScript(new Script(""));
+        expectThrows(IllegalArgumentException.class, () -> queryBuilder.setMinimumShouldMatchField("_field"));
+
+        queryBuilder.setMinimumShouldMatchScript(null);
+        queryBuilder.setMinimumShouldMatchField("_field");
+        expectThrows(IllegalArgumentException.class, () -> queryBuilder.setMinimumShouldMatchScript(new Script("")));
+    }
+
+    public void testDoToQuery() throws Exception {
+        try (Directory directory = newDirectory()) {
+            IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer());
+            config.setMergePolicy(NoMergePolicy.INSTANCE);
+            try (IndexWriter iw = new IndexWriter(directory, config)) {
+                Document document = new Document();
+                document.add(new TextField("message", "a b", Field.Store.NO));
+                document.add(new SortedNumericDocValuesField("m_s_m", 1));
+                iw.addDocument(document);
+
+                document = new Document();
+                document.add(new TextField("message", "a b c", Field.Store.NO));
+                document.add(new SortedNumericDocValuesField("m_s_m", 1));
+                iw.addDocument(document);
+
+                document = new Document();
+                document.add(new TextField("message", "a b c", Field.Store.NO));
+                document.add(new SortedNumericDocValuesField("m_s_m", 2));
+                iw.addDocument(document);
+
+                document = new Document();
+                document.add(new TextField("message", "a b c d", Field.Store.NO));
+                document.add(new SortedNumericDocValuesField("m_s_m", 1));
+                iw.addDocument(document);
+
+                document = new Document();
+                document.add(new TextField("message", "a b c d", Field.Store.NO));
+                document.add(new SortedNumericDocValuesField("m_s_m", 2));
+                iw.addDocument(document);
+
+                document = new Document();
+                document.add(new TextField("message", "a b c d", Field.Store.NO));
+                document.add(new SortedNumericDocValuesField("m_s_m", 3));
+                iw.addDocument(document);
+            }
+
+            try (IndexReader ir = DirectoryReader.open(directory)) {
+                QueryShardContext context = createShardContext();
+                Query query = new TermsSetQueryBuilder("message", Arrays.asList("c", "d"))
+                        .setMinimumShouldMatchField("m_s_m").doToQuery(context);
+                IndexSearcher searcher = new IndexSearcher(ir);
+                TopDocs topDocs = searcher.search(query, 10, new Sort(SortField.FIELD_DOC));
+                assertThat(topDocs.totalHits, equalTo(3L));
+                assertThat(topDocs.scoreDocs[0].doc, equalTo(1));
+                assertThat(topDocs.scoreDocs[1].doc, equalTo(3));
+                assertThat(topDocs.scoreDocs[2].doc, equalTo(4));
+            }
+        }
+    }
+
+    public void testDoToQuery_msmScriptField() throws Exception {
+        try (Directory directory = newDirectory()) {
+            IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer());
+            config.setMergePolicy(NoMergePolicy.INSTANCE);
+            try (IndexWriter iw = new IndexWriter(directory, config)) {
+                Document document = new Document();
+                document.add(new TextField("message", "a b x y", Field.Store.NO));
+                document.add(new SortedNumericDocValuesField("m_s_m", 50));
+                iw.addDocument(document);
+
+                document = new Document();
+                document.add(new TextField("message", "a b x y", Field.Store.NO));
+                document.add(new SortedNumericDocValuesField("m_s_m", 75));
+                iw.addDocument(document);
+
+                document = new Document();
+                document.add(new TextField("message", "a b c x", Field.Store.NO));
+                document.add(new SortedNumericDocValuesField("m_s_m", 75));
+                iw.addDocument(document);
+
+                document = new Document();
+                document.add(new TextField("message", "a b c x", Field.Store.NO));
+                document.add(new SortedNumericDocValuesField("m_s_m", 100));
+                iw.addDocument(document);
+
+                document = new Document();
+                document.add(new TextField("message", "a b c d", Field.Store.NO));
+                document.add(new SortedNumericDocValuesField("m_s_m", 100));
+                iw.addDocument(document);
+            }
+
+            try (IndexReader ir = DirectoryReader.open(directory)) {
+                QueryShardContext context = createShardContext();
+                Script script = new Script(ScriptType.INLINE, MockScriptEngine.NAME, "_script", Collections.emptyMap());
+                Query query = new TermsSetQueryBuilder("message", Arrays.asList("a", "b", "c", "d"))
+                        .setMinimumShouldMatchScript(script).doToQuery(context);
+                IndexSearcher searcher = new IndexSearcher(ir);
+                TopDocs topDocs = searcher.search(query, 10, new Sort(SortField.FIELD_DOC));
+                assertThat(topDocs.totalHits, equalTo(3L));
+                assertThat(topDocs.scoreDocs[0].doc, equalTo(0));
+                assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
+                assertThat(topDocs.scoreDocs[2].doc, equalTo(4));
+            }
+        }
+    }
+
+    public static class CustomScriptPlugin extends MockScriptPlugin {
+
+        @Override
+        protected Map<String, Function<Map<String, Object>, Object>> pluginScripts() {
+            return Collections.singletonMap("_script", args -> {
+                try {
+                    int clauseCount = ObjectPath.evaluate(args, "params.num_terms");
+                    long msm = ((ScriptDocValues.Longs) ObjectPath.evaluate(args, "doc.m_s_m")).getValue();
+                    return clauseCount * (msm / 100d);
+                } catch (IOException e) {
+                    throw new UncheckedIOException(e);
+                }
+            });
+        }
+    }
+
+}
+

+ 1 - 0
core/src/test/java/org/elasticsearch/search/SearchModuleTests.java

@@ -323,6 +323,7 @@ public class SearchModuleTests extends ModuleTestCase {
             "span_within",
             "term",
             "terms",
+            "terms_set",
             "type",
             "wildcard",
             "wrapper"

+ 8 - 0
docs/reference/query-dsl/term-level-queries.asciidoc

@@ -21,6 +21,12 @@ The queries in this group are:
     Find documents which contain any of the exact terms specified in the field
     specified.
 
+<<query-dsl-terms-set-query,`terms_set` query>>::
+
+    Find documents which match with one or more of the specified terms. The
+    number of terms that must match depend on the specified minimum should
+    match field or script.
+
 <<query-dsl-range-query,`range` query>>::
 
     Find documents where the field specified contains values (dates, numbers,
@@ -66,6 +72,8 @@ include::term-query.asciidoc[]
 
 include::terms-query.asciidoc[]
 
+include::terms-set-query.asciidoc[]
+
 include::range-query.asciidoc[]
 
 include::exists-query.asciidoc[]

+ 122 - 0
docs/reference/query-dsl/terms-set-query.asciidoc

@@ -0,0 +1,122 @@
+[[query-dsl-terms-set-query]]
+=== Terms Set Query
+
+experimental[The terms_set query is a new query and its syntax may change in the future]
+
+Returns any documents that match with at least one or more of the
+provided terms. The terms are not analyzed and thus must match exactly.
+The number of terms that must match varies per document and is either
+controlled by a minimum should match field or computed per document in
+a minimum should match script.
+
+The field that controls the number of required terms that must match must
+be a number field:
+
+[source,js]
+--------------------------------------------------
+PUT /my-index
+{
+    "mappings": {
+        "doc": {
+            "properties": {
+                "required_matches": {
+                    "type": "long"
+                }
+            }
+        }
+    }
+}
+
+PUT /my-index/doc/1?refresh
+{
+    "codes": ["ghi", "jkl"],
+    "required_matches": 2
+}
+
+PUT /my-index/doc/2?refresh
+{
+    "codes": ["def", "ghi"],
+    "required_matches": 2
+}
+--------------------------------------------------
+// CONSOLE
+// TESTSETUP
+
+An example that uses the minimum should match field:
+
+[source,js]
+--------------------------------------------------
+GET /my-index/_search
+{
+    "query": {
+        "terms_set": {
+            "codes" : {
+                "terms" : ["abc", "def", "ghi"],
+                "minimum_should_match_field": "required_matches"
+            }
+        }
+    }
+}
+--------------------------------------------------
+// CONSOLE
+
+Response:
+
+[source,js]
+--------------------------------------------------
+{
+  "took": 13,
+  "timed_out": false,
+  "_shards": {
+    "total": 5,
+    "successful": 5,
+    "skipped" : 0,
+    "failed": 0
+  },
+  "hits": {
+    "total": 1,
+    "max_score": 0.5753642,
+    "hits": [
+      {
+        "_index": "my-index",
+        "_type": "doc",
+        "_id": "2",
+        "_score": 0.5753642,
+        "_source": {
+          "codes": ["def", "ghi"],
+          "required_matches": 2
+        }
+      }
+    ]
+  }
+}
+--------------------------------------------------
+// TESTRESPONSE[s/"took": 13,/"took": "$body.took",/]
+
+Scripts can also be used to control how many terms are required to match
+in a more dynamic way. For example a create date or a popularity field
+can be used as basis for the number of required terms to match.
+
+Also the `params.num_terms` parameter is available in the script to indicate the
+number of terms that have been specified.
+
+An example that always limits the number of required terms to match to never
+become larger than the number of terms specified:
+
+[source,js]
+--------------------------------------------------
+GET /my-index/_search
+{
+    "query": {
+        "terms_set": {
+            "codes" : {
+                "terms" : ["abc", "def", "ghi"],
+                "minimum_should_match_script": {
+                   "source": "Math.min(params.num_terms, doc['required_matches'].value)"
+                }
+            }
+        }
+    }
+}
+--------------------------------------------------
+// CONSOLE