Browse Source

Add a field data based TermsFilter

Add FieldDataTermsFilter that compares terms out of
the fielddata cache. When filtering on a large
set of terms this filter can be considerably faster
than using a standard lucene terms filter.

Add the "fielddata" execution mode to the
terms filter parser to enable the use of
the new FieldDataTermsFilter.

Add supporting tests and documentation.

Closes #4209
Matt Weber 12 years ago
parent
commit
a841a422f6

+ 9 - 0
docs/reference/query-dsl/filters/terms-filter.asciidoc

@@ -35,6 +35,15 @@ The `execution` option now has the following options :
     building a bit set matching it, and filtering. The total filter is
     cached.
 
+`fielddata`::
+    Generates a terms filters that uses the fielddata cache to
+    compare terms.  This execution mode is great to use when filtering
+    on a field that is already loaded into the fielddata cache from 
+    faceting, sorting, or index warmers.  When filtering on
+    a large number of terms, this execution can be considerably faster
+    than the other modes.  The total filter is not cached unless
+    explicitly configured to do so.
+
 `bool`:: 
     Generates a term filter (which is cached) for each term, and
     wraps those in a bool filter. The bool filter itself is not cached as it

+ 3 - 0
src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

@@ -30,6 +30,7 @@ import org.elasticsearch.common.Nullable;
 import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
 import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
 import org.elasticsearch.index.fielddata.FieldDataType;
+import org.elasticsearch.index.fielddata.IndexFieldDataService;
 import org.elasticsearch.index.query.QueryParseContext;
 import org.elasticsearch.index.similarity.SimilarityProvider;
 
@@ -174,6 +175,8 @@ public interface FieldMapper<T> extends Mapper {
 
     Filter termsFilter(List values, @Nullable QueryParseContext context);
 
+    Filter termsFilter(IndexFieldDataService fieldData, List values, @Nullable QueryParseContext context);
+
     Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context);
 
     Filter rangeFilter(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context);

+ 18 - 0
src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java

@@ -19,6 +19,7 @@
 
 package org.elasticsearch.index.mapper.core;
 
+import com.carrotsearch.hppc.ObjectOpenHashSet;
 import com.google.common.base.Objects;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Field;
@@ -44,8 +45,10 @@ import org.elasticsearch.index.codec.postingsformat.PostingFormats;
 import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
 import org.elasticsearch.index.codec.postingsformat.PostingsFormatService;
 import org.elasticsearch.index.fielddata.FieldDataType;
+import org.elasticsearch.index.fielddata.IndexFieldDataService;
 import org.elasticsearch.index.mapper.*;
 import org.elasticsearch.index.query.QueryParseContext;
+import org.elasticsearch.index.search.FieldDataTermsFilter;
 import org.elasticsearch.index.similarity.SimilarityLookupService;
 import org.elasticsearch.index.similarity.SimilarityProvider;
 
@@ -407,6 +410,21 @@ public abstract class AbstractFieldMapper<T> implements FieldMapper<T> {
         return new TermsFilter(names.indexName(), bytesRefs);
     }
 
+    /**
+     * A terms filter based on the field data cache
+     */
+    @Override
+    public Filter termsFilter(IndexFieldDataService fieldDataService, List values, @Nullable QueryParseContext context) {
+        // create with initial size large enough to avoid rehashing
+        ObjectOpenHashSet<BytesRef> terms =
+                new ObjectOpenHashSet<BytesRef>((int) (values.size() * (1 + ObjectOpenHashSet.DEFAULT_LOAD_FACTOR)));
+        for (int i = 0, len = values.size(); i < len; i++) {
+            terms.add(indexedValueForSearch(values.get(i)));
+        }
+
+        return FieldDataTermsFilter.newBytes(fieldDataService.getForField(this), terms);
+    }
+
     @Override
     public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) {
         return new TermRangeQuery(names.indexName(),

+ 9 - 19
src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java

@@ -160,22 +160,12 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
 
     @Override
     public BytesRef indexedValueForSearch(Object value) {
-        long longValue = NumericUtils.doubleToSortableLong(parseValue(value));
+        long longValue = NumericUtils.doubleToSortableLong(parseDoubleValue(value));
         BytesRef bytesRef = new BytesRef();
         NumericUtils.longToPrefixCoded(longValue, 0, bytesRef);   // 0 because of exact match
         return bytesRef;
     }
 
-    private double parseValue(Object value) {
-        if (value instanceof Number) {
-            return ((Number) value).doubleValue();
-        }
-        if (value instanceof BytesRef) {
-            return Double.parseDouble(((BytesRef) value).utf8ToString());
-        }
-        return Double.parseDouble(value.toString());
-    }
-
     @Override
     public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) {
         double iValue = Double.parseDouble(value);
@@ -188,7 +178,7 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
 
     @Override
     public Query termQuery(Object value, @Nullable QueryParseContext context) {
-        double dValue = parseValue(value);
+        double dValue = parseDoubleValue(value);
         return NumericRangeQuery.newDoubleRange(names.indexName(), precisionStep,
                 dValue, dValue, true, true);
     }
@@ -196,14 +186,14 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
     @Override
     public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) {
         return NumericRangeQuery.newDoubleRange(names.indexName(), precisionStep,
-                lowerTerm == null ? null : parseValue(lowerTerm),
-                upperTerm == null ? null : parseValue(upperTerm),
+                lowerTerm == null ? null : parseDoubleValue(lowerTerm),
+                upperTerm == null ? null : parseDoubleValue(upperTerm),
                 includeLower, includeUpper);
     }
 
     @Override
     public Filter termFilter(Object value, @Nullable QueryParseContext context) {
-        double dValue = parseValue(value);
+        double dValue = parseDoubleValue(value);
         return NumericRangeFilter.newDoubleRange(names.indexName(), precisionStep,
                 dValue, dValue, true, true);
     }
@@ -211,8 +201,8 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
     @Override
     public Filter rangeFilter(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) {
         return NumericRangeFilter.newDoubleRange(names.indexName(), precisionStep,
-                lowerTerm == null ? null : parseValue(lowerTerm),
-                upperTerm == null ? null : parseValue(upperTerm),
+                lowerTerm == null ? null : parseDoubleValue(lowerTerm),
+                upperTerm == null ? null : parseDoubleValue(upperTerm),
                 includeLower, includeUpper);
     }
 
@@ -223,8 +213,8 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
     @Override
     public Filter rangeFilter(IndexFieldDataService fieldData, Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) {
         return NumericRangeFieldDataFilter.newDoubleRange((IndexNumericFieldData) fieldData.getForField(this),
-                lowerTerm == null ? null : parseValue(lowerTerm),
-                upperTerm == null ? null : parseValue(upperTerm),
+                lowerTerm == null ? null : parseDoubleValue(lowerTerm),
+                upperTerm == null ? null : parseDoubleValue(upperTerm),
                 includeLower, includeUpper);
     }
 

+ 9 - 19
src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java

@@ -160,20 +160,10 @@ public class LongFieldMapper extends NumberFieldMapper<Long> {
     @Override
     public BytesRef indexedValueForSearch(Object value) {
         BytesRef bytesRef = new BytesRef();
-        NumericUtils.longToPrefixCoded(parseValue(value), 0, bytesRef);  // 0 because of exact match
+        NumericUtils.longToPrefixCoded(parseLongValue(value), 0, bytesRef);  // 0 because of exact match
         return bytesRef;
     }
 
-    private long parseValue(Object value) {
-        if (value instanceof Number) {
-            return ((Number) value).longValue();
-        }
-        if (value instanceof BytesRef) {
-            return Long.parseLong(((BytesRef) value).utf8ToString());
-        }
-        return Long.parseLong(value.toString());
-    }
-
     @Override
     public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) {
         long iValue = Long.parseLong(value);
@@ -191,14 +181,14 @@ public class LongFieldMapper extends NumberFieldMapper<Long> {
 
     @Override
     public Query termQuery(Object value, @Nullable QueryParseContext context) {
-        long iValue = parseValue(value);
+        long iValue = parseLongValue(value);
         return NumericRangeQuery.newLongRange(names.indexName(), precisionStep,
                 iValue, iValue, true, true);
     }
 
     @Override
     public Filter termFilter(Object value, @Nullable QueryParseContext context) {
-        long iValue = parseValue(value);
+        long iValue = parseLongValue(value);
         return NumericRangeFilter.newLongRange(names.indexName(), precisionStep,
                 iValue, iValue, true, true);
     }
@@ -206,24 +196,24 @@ public class LongFieldMapper extends NumberFieldMapper<Long> {
     @Override
     public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) {
         return NumericRangeQuery.newLongRange(names.indexName(), precisionStep,
-                lowerTerm == null ? null : parseValue(lowerTerm),
-                upperTerm == null ? null : parseValue(upperTerm),
+                lowerTerm == null ? null : parseLongValue(lowerTerm),
+                upperTerm == null ? null : parseLongValue(upperTerm),
                 includeLower, includeUpper);
     }
 
     @Override
     public Filter rangeFilter(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) {
         return NumericRangeFilter.newLongRange(names.indexName(), precisionStep,
-                lowerTerm == null ? null : parseValue(lowerTerm),
-                upperTerm == null ? null : parseValue(upperTerm),
+                lowerTerm == null ? null : parseLongValue(lowerTerm),
+                upperTerm == null ? null : parseLongValue(upperTerm),
                 includeLower, includeUpper);
     }
 
     @Override
     public Filter rangeFilter(IndexFieldDataService fieldData, Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) {
         return NumericRangeFieldDataFilter.newLongRange((IndexNumericFieldData) fieldData.getForField(this),
-                lowerTerm == null ? null : parseValue(lowerTerm),
-                upperTerm == null ? null : parseValue(upperTerm),
+                lowerTerm == null ? null : parseLongValue(lowerTerm),
+                upperTerm == null ? null : parseLongValue(upperTerm),
                 includeLower, includeUpper);
     }
 

+ 61 - 0
src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java

@@ -19,6 +19,8 @@
 
 package org.elasticsearch.index.mapper.core;
 
+import com.carrotsearch.hppc.DoubleOpenHashSet;
+import com.carrotsearch.hppc.LongOpenHashSet;
 import org.apache.lucene.analysis.NumericTokenStream;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
@@ -36,9 +38,11 @@ import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
 import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
 import org.elasticsearch.index.fielddata.IndexFieldDataService;
+import org.elasticsearch.index.fielddata.IndexNumericFieldData;
 import org.elasticsearch.index.mapper.*;
 import org.elasticsearch.index.mapper.internal.AllFieldMapper;
 import org.elasticsearch.index.query.QueryParseContext;
+import org.elasticsearch.index.search.FieldDataTermsFilter;
 import org.elasticsearch.index.similarity.SimilarityProvider;
 
 import java.io.IOException;
@@ -256,6 +260,63 @@ public abstract class NumberFieldMapper<T extends Number> extends AbstractFieldM
      */
     public abstract Filter rangeFilter(IndexFieldDataService fieldData, Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context);
 
+    /**
+     * A terms filter based on the field data cache for numeric fields.
+     */
+    @Override
+    public Filter termsFilter(IndexFieldDataService fieldDataService, List values, @Nullable QueryParseContext context) {
+        IndexNumericFieldData fieldData = fieldDataService.getForField(this);
+        if (fieldData.getNumericType().isFloatingPoint()) {
+            // create with initial size large enough to avoid rehashing
+            DoubleOpenHashSet terms =
+                    new DoubleOpenHashSet((int) (values.size() * (1 + DoubleOpenHashSet.DEFAULT_LOAD_FACTOR)));
+            for (int i = 0, len = values.size(); i < len; i++) {
+                terms.add(parseDoubleValue(values.get(i)));
+            }
+
+            return FieldDataTermsFilter.newDoubles(fieldData, terms);
+        } else {
+            // create with initial size large enough to avoid rehashing
+            LongOpenHashSet terms =
+                    new LongOpenHashSet((int) (values.size() * (1 + LongOpenHashSet.DEFAULT_LOAD_FACTOR)));
+            for (int i = 0, len = values.size(); i < len; i++) {
+                terms.add(parseLongValue(values.get(i)));
+            }
+
+            return FieldDataTermsFilter.newLongs(fieldData, terms);
+        }
+    }
+
+    /**
+     * Converts an object value into a double
+     */
+    public double parseDoubleValue(Object value) {
+        if (value instanceof Number) {
+            return ((Number) value).doubleValue();
+        }
+
+        if (value instanceof BytesRef) {
+            return Double.parseDouble(((BytesRef) value).utf8ToString());
+        }
+
+        return Double.parseDouble(value.toString());
+    }
+
+    /**
+     * Converts an object value into a long
+     */
+    public long parseLongValue(Object value) {
+        if (value instanceof Number) {
+            return ((Number) value).longValue();
+        }
+
+        if (value instanceof BytesRef) {
+            return Long.parseLong(((BytesRef) value).utf8ToString());
+        }
+
+        return Long.parseLong(value.toString());
+    }
+
     /**
      * Override the default behavior (to return the string, and return the actual Number instance).
      *

+ 12 - 2
src/main/java/org/elasticsearch/index/query/TermsFilterParser.java

@@ -50,7 +50,6 @@ import static org.elasticsearch.index.query.support.QueryParsers.wrapSmartNameFi
 public class TermsFilterParser implements FilterParser {
 
     public static final String NAME = "terms";
-
     private IndicesTermsFilterCache termsFilterCache;
 
     @Inject
@@ -208,6 +207,17 @@ public class TermsFilterParser implements FilterParser {
                 if (cache == null || cache) {
                     filter = parseContext.cacheFilter(filter, cacheKey);
                 }
+            } else if ("fielddata".equals(execution)) {
+                // if there are no mappings, then nothing has been indexing yet against this shard, so we can return
+                // no match (but not cached!), since the FieldDataTermsFilter relies on a mapping...
+                if (fieldMapper == null) {
+                    return Queries.MATCH_NO_FILTER;
+                }
+
+                filter = fieldMapper.termsFilter(parseContext.fieldData(), terms, parseContext);
+                if (cache != null && cache) {
+                    filter = parseContext.cacheFilter(filter, cacheKey);
+                }
             } else if ("bool".equals(execution)) {
                 XBooleanFilter boolFiler = new XBooleanFilter();
                 if (fieldMapper != null) {
@@ -305,7 +315,7 @@ public class TermsFilterParser implements FilterParser {
                     filter = parseContext.cacheFilter(filter, cacheKey);
                 }
             } else {
-                throw new QueryParsingException(parseContext.index(), "bool filter execution value [" + execution + "] not supported");
+                throw new QueryParsingException(parseContext.index(), "terms filter execution value [" + execution + "] not supported");
             }
 
             filter = wrapSmartNameFilter(filter, smartNameFieldMappers, parseContext);

+ 267 - 0
src/main/java/org/elasticsearch/index/search/FieldDataTermsFilter.java

@@ -0,0 +1,267 @@
+/*
+ * Licensed to Elastic Search and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elastic Search licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.search;
+
+import com.carrotsearch.hppc.DoubleOpenHashSet;
+import com.carrotsearch.hppc.LongOpenHashSet;
+import com.carrotsearch.hppc.ObjectOpenHashSet;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.lucene.docset.MatchDocIdSet;
+import org.elasticsearch.index.fielddata.*;
+
+import java.io.IOException;
+
+/**
+ * Similar to a {@link org.apache.lucene.queries.TermsFilter} but pulls terms from the fielddata.
+ */
+public abstract class FieldDataTermsFilter extends Filter {
+
+    final IndexFieldData fieldData;
+
+    protected FieldDataTermsFilter(IndexFieldData fieldData) {
+        this.fieldData = fieldData;
+    }
+
+    /**
+     * Get a {@link FieldDataTermsFilter} that filters on non-numeric terms found in a hppc {@link ObjectOpenHashSet} of
+     * {@link BytesRef}.
+     *
+     * @param fieldData The fielddata for the field.
+     * @param terms     An {@link ObjectOpenHashSet} of terms.
+     * @return the filter.
+     */
+    public static FieldDataTermsFilter newBytes(IndexFieldData fieldData, ObjectOpenHashSet<BytesRef> terms) {
+        return new BytesFieldDataFilter(fieldData, terms);
+    }
+
+    /**
+     * Get a {@link FieldDataTermsFilter} that filters on non-floating point numeric terms found in a hppc
+     * {@link LongOpenHashSet}.
+     *
+     * @param fieldData The fielddata for the field.
+     * @param terms     A {@link LongOpenHashSet} of terms.
+     * @return the filter.
+     */
+    public static FieldDataTermsFilter newLongs(IndexNumericFieldData fieldData, LongOpenHashSet terms) {
+        return new LongsFieldDataFilter(fieldData, terms);
+    }
+
+    /**
+     * Get a {@link FieldDataTermsFilter} that filters on floating point numeric terms found in a hppc
+     * {@link DoubleOpenHashSet}.
+     *
+     * @param fieldData The fielddata for the field.
+     * @param terms     A {@link DoubleOpenHashSet} of terms.
+     * @return the filter.
+     */
+    public static FieldDataTermsFilter newDoubles(IndexNumericFieldData fieldData, DoubleOpenHashSet terms) {
+        return new DoublesFieldDataFilter(fieldData, terms);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj) return true;
+        if (obj == null || !(obj instanceof FieldDataTermsFilter)) return false;
+
+        FieldDataTermsFilter that = (FieldDataTermsFilter) obj;
+        if (!fieldData.getFieldNames().indexName().equals(that.fieldData.getFieldNames().indexName())) return false;
+        if (this.hashCode() != obj.hashCode()) return false;
+        return true;
+    }
+
+    @Override
+    public abstract int hashCode();
+
+    @Override
+    public abstract String toString();
+
+    /**
+     * Filters on non-numeric fields.
+     */
+    protected static class BytesFieldDataFilter extends FieldDataTermsFilter {
+
+        final ObjectOpenHashSet<BytesRef> terms;
+
+        protected BytesFieldDataFilter(IndexFieldData fieldData, ObjectOpenHashSet<BytesRef> terms) {
+            super(fieldData);
+            this.terms = terms;
+        }
+
+        @Override
+        public int hashCode() {
+            int hashcode = fieldData.getFieldNames().indexName().hashCode();
+            hashcode += terms != null ? terms.hashCode() : 0;
+            return hashcode;
+        }
+
+        @Override
+        public String toString() {
+            final StringBuilder sb = new StringBuilder("BytesFieldDataFilter:");
+            return sb
+                    .append(fieldData.getFieldNames().indexName())
+                    .append(":")
+                    .append(terms != null ? terms.toString() : "")
+                    .toString();
+        }
+
+        @Override
+        public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+            // make sure there are terms to filter on
+            if (terms == null || terms.isEmpty()) return null;
+
+            final BytesValues values = fieldData.load(context).getBytesValues(false); // load fielddata
+            return new MatchDocIdSet(context.reader().maxDoc(), acceptDocs) {
+                @Override
+                protected boolean matchDoc(int doc) {
+                    final int numVals = values.setDocument(doc);
+                    for (int i = 0; i < numVals; i++) {
+                        if (terms.contains(values.nextValue())) {
+                            return true;
+                        }
+                    }
+
+                    return false;
+                }
+            };
+        }
+    }
+
+    /**
+     * Filters on non-floating point numeric fields.
+     */
+    protected static class LongsFieldDataFilter extends FieldDataTermsFilter {
+
+        final LongOpenHashSet terms;
+
+        protected LongsFieldDataFilter(IndexNumericFieldData fieldData, LongOpenHashSet terms) {
+            super(fieldData);
+            this.terms = terms;
+        }
+
+        @Override
+        public int hashCode() {
+            int hashcode = fieldData.getFieldNames().indexName().hashCode();
+            hashcode += terms != null ? terms.hashCode() : 0;
+            return hashcode;
+        }
+
+        @Override
+        public String toString() {
+            final StringBuilder sb = new StringBuilder("LongsFieldDataFilter:");
+            return sb
+                    .append(fieldData.getFieldNames().indexName())
+                    .append(":")
+                    .append(terms != null ? terms.toString() : "")
+                    .toString();
+        }
+
+        @Override
+        public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+            // make sure there are terms to filter on
+            if (terms == null || terms.isEmpty()) return null;
+
+            IndexNumericFieldData numericFieldData = (IndexNumericFieldData) fieldData;
+            if (!numericFieldData.getNumericType().isFloatingPoint()) {
+                final LongValues values = numericFieldData.load(context).getLongValues(); // load fielddata
+                return new MatchDocIdSet(context.reader().maxDoc(), acceptDocs) {
+                    @Override
+                    protected boolean matchDoc(int doc) {
+                        final int numVals = values.setDocument(doc);
+                        for (int i = 0; i < numVals; i++) {
+                            if (terms.contains(values.nextValue())) {
+                                return true;
+                            }
+                        }
+
+                        return false;
+                    }
+                };
+            }
+
+            // only get here if wrong fielddata type in which case
+            // no docs will match so we just return null.
+            return null;
+        }
+    }
+
+    /**
+     * Filters on floating point numeric fields.
+     */
+    protected static class DoublesFieldDataFilter extends FieldDataTermsFilter {
+
+        final DoubleOpenHashSet terms;
+
+        protected DoublesFieldDataFilter(IndexNumericFieldData fieldData, DoubleOpenHashSet terms) {
+            super(fieldData);
+            this.terms = terms;
+        }
+
+        @Override
+        public int hashCode() {
+            int hashcode = fieldData.getFieldNames().indexName().hashCode();
+            hashcode += terms != null ? terms.hashCode() : 0;
+            return hashcode;
+        }
+
+        @Override
+        public String toString() {
+            final StringBuilder sb = new StringBuilder("DoublesFieldDataFilter");
+            return sb
+                    .append(fieldData.getFieldNames().indexName())
+                    .append(":")
+                    .append(terms != null ? terms.toString() : "")
+                    .toString();
+        }
+
+        @Override
+        public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+            // make sure there are terms to filter on
+            if (terms == null || terms.isEmpty()) return null;
+
+            // verify we have a floating point numeric fielddata
+            IndexNumericFieldData indexNumericFieldData = (IndexNumericFieldData) fieldData;
+            if (indexNumericFieldData.getNumericType().isFloatingPoint()) {
+                final DoubleValues values = indexNumericFieldData.load(context).getDoubleValues(); // load fielddata
+                return new MatchDocIdSet(context.reader().maxDoc(), acceptDocs) {
+                    @Override
+                    protected boolean matchDoc(int doc) {
+                        final int numVals = values.setDocument(doc);
+
+                        for (int i = 0; i < numVals; i++) {
+                            if (terms.contains(values.nextValue())) {
+                                return true;
+                            }
+                        }
+
+                        return false;
+                    }
+                };
+            }
+
+            // only get here if wrong fielddata type in which case
+            // no docs will match so we just return null.
+            return null;
+        }
+    }
+}

+ 252 - 0
src/test/java/org/elasticsearch/index/search/FieldDataTermsFilterTests.java

@@ -0,0 +1,252 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.search;
+
+import com.carrotsearch.hppc.DoubleOpenHashSet;
+import com.carrotsearch.hppc.LongOpenHashSet;
+import com.carrotsearch.hppc.ObjectOpenHashSet;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.*;
+import org.apache.lucene.index.*;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.elasticsearch.common.lucene.Lucene;
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.fielddata.IndexFieldData;
+import org.elasticsearch.index.fielddata.IndexFieldDataService;
+import org.elasticsearch.index.fielddata.IndexNumericFieldData;
+import org.elasticsearch.index.mapper.ContentPath;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.Mapper;
+import org.elasticsearch.index.mapper.core.DoubleFieldMapper;
+import org.elasticsearch.index.mapper.core.LongFieldMapper;
+import org.elasticsearch.index.mapper.core.NumberFieldMapper;
+import org.elasticsearch.index.mapper.core.StringFieldMapper;
+import org.elasticsearch.search.internal.SearchContext;
+import org.elasticsearch.test.ElasticsearchTestCase;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+
+/**
+ */
+public class FieldDataTermsFilterTests extends ElasticsearchTestCase {
+
+    protected IndexFieldDataService ifdService;
+    protected IndexWriter writer;
+    protected AtomicReader reader;
+    protected StringFieldMapper strMapper;
+    protected LongFieldMapper lngMapper;
+    protected DoubleFieldMapper dblMapper;
+
+    @Before
+    public void setup() throws Exception {
+        super.setUp();
+
+        // setup field mappers
+        strMapper = new StringFieldMapper.Builder("str_value")
+                .build(new Mapper.BuilderContext(null, new ContentPath(1)));
+
+        lngMapper = new LongFieldMapper.Builder("lng_value")
+                .build(new Mapper.BuilderContext(null, new ContentPath(1)));
+
+        dblMapper = new DoubleFieldMapper.Builder("dbl_value")
+                .build(new Mapper.BuilderContext(null, new ContentPath(1)));
+
+        // create index and fielddata service
+        ifdService = new IndexFieldDataService(new Index("test"));
+        writer = new IndexWriter(new RAMDirectory(),
+                new IndexWriterConfig(Lucene.VERSION, new StandardAnalyzer(Lucene.VERSION)));
+
+        int numDocs = 10;
+        for (int i = 0; i < numDocs; i++) {
+            Document d = new Document();
+            d.add(new StringField(strMapper.names().indexName(), "str" + i, Field.Store.NO));
+            d.add(new LongField(lngMapper.names().indexName(), i, Field.Store.NO));
+            d.add(new DoubleField(dblMapper.names().indexName(), Double.valueOf(i), Field.Store.NO));
+            writer.addDocument(d);
+        }
+
+        reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(writer, true));
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        super.tearDown();
+        reader.close();
+        writer.close();
+        ifdService.clear();
+        SearchContext.removeCurrent();
+    }
+
+    protected <IFD extends IndexFieldData> IFD getFieldData(FieldMapper fieldMapper) {
+        return ifdService.getForField(fieldMapper);
+    }
+
+    protected <IFD extends IndexNumericFieldData> IFD getFieldData(NumberFieldMapper fieldMapper) {
+        return ifdService.getForField(fieldMapper);
+    }
+
+    @Test
+    public void testBytes() throws Exception {
+        List<Integer> docs = Arrays.asList(1, 5, 7);
+
+        ObjectOpenHashSet<BytesRef> hTerms = new ObjectOpenHashSet<BytesRef>();
+        List<BytesRef> cTerms = new ArrayList<BytesRef>(docs.size());
+        for (int i = 0; i < docs.size(); i++) {
+            BytesRef term = new BytesRef("str" + docs.get(i));
+            hTerms.add(term);
+            cTerms.add(term);
+        }
+
+        FieldDataTermsFilter hFilter = FieldDataTermsFilter.newBytes(getFieldData(strMapper), hTerms);
+
+        int size = reader.maxDoc();
+        FixedBitSet result = new FixedBitSet(size);
+
+        result.clear(0, size);
+        assertThat(result.cardinality(), equalTo(0));
+        result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
+        assertThat(result.cardinality(), equalTo(docs.size()));
+        for (int i = 0; i < reader.maxDoc(); i++) {
+            assertThat(result.get(i), equalTo(docs.contains(i)));
+        }
+
+        // filter from mapper
+        result.clear(0, size);
+        assertThat(result.cardinality(), equalTo(0));
+        result.or(strMapper.termsFilter(ifdService, cTerms, null)
+                .getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
+        assertThat(result.cardinality(), equalTo(docs.size()));
+        for (int i = 0; i < reader.maxDoc(); i++) {
+            assertThat(result.get(i), equalTo(docs.contains(i)));
+        }
+
+        result.clear(0, size);
+        assertThat(result.cardinality(), equalTo(0));
+
+        // filter on a numeric field using BytesRef terms
+        // should not match any docs
+        hFilter = FieldDataTermsFilter.newBytes(getFieldData(lngMapper), hTerms);
+        result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
+        assertThat(result.cardinality(), equalTo(0));
+
+        // filter on a numeric field using BytesRef terms
+        // should not match any docs
+        hFilter = FieldDataTermsFilter.newBytes(getFieldData(dblMapper), hTerms);
+        result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
+        assertThat(result.cardinality(), equalTo(0));
+    }
+
+    @Test
+    public void testLongs() throws Exception {
+        List<Integer> docs = Arrays.asList(1, 5, 7);
+
+        LongOpenHashSet hTerms = new LongOpenHashSet();
+        List<Long> cTerms = new ArrayList<Long>(docs.size());
+        for (int i = 0; i < docs.size(); i++) {
+            long term = docs.get(i).longValue();
+            hTerms.add(term);
+            cTerms.add(term);
+        }
+
+        FieldDataTermsFilter hFilter = FieldDataTermsFilter.newLongs(getFieldData(lngMapper), hTerms);
+
+        int size = reader.maxDoc();
+        FixedBitSet result = new FixedBitSet(size);
+
+        result.clear(0, size);
+        assertThat(result.cardinality(), equalTo(0));
+        result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
+        assertThat(result.cardinality(), equalTo(docs.size()));
+        for (int i = 0; i < reader.maxDoc(); i++) {
+            assertThat(result.get(i), equalTo(docs.contains(i)));
+        }
+
+        // filter from mapper
+        result.clear(0, size);
+        assertThat(result.cardinality(), equalTo(0));
+        result.or(lngMapper.termsFilter(ifdService, cTerms, null)
+                .getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
+        assertThat(result.cardinality(), equalTo(docs.size()));
+        for (int i = 0; i < reader.maxDoc(); i++) {
+            assertThat(result.get(i), equalTo(docs.contains(i)));
+        }
+
+        hFilter = FieldDataTermsFilter.newLongs(getFieldData(dblMapper), hTerms);
+        assertNull(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
+    }
+
+    @Test
+    public void testDoubles() throws Exception {
+        List<Integer> docs = Arrays.asList(1, 5, 7);
+
+        DoubleOpenHashSet hTerms = new DoubleOpenHashSet();
+        List<Double> cTerms = new ArrayList<Double>(docs.size());
+        for (int i = 0; i < docs.size(); i++) {
+            double term = Double.valueOf(docs.get(i));
+            hTerms.add(term);
+            cTerms.add(term);
+        }
+
+        FieldDataTermsFilter hFilter = FieldDataTermsFilter.newDoubles(getFieldData(dblMapper), hTerms);
+
+        int size = reader.maxDoc();
+        FixedBitSet result = new FixedBitSet(size);
+
+        result.clear(0, size);
+        assertThat(result.cardinality(), equalTo(0));
+        result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
+        assertThat(result.cardinality(), equalTo(docs.size()));
+        for (int i = 0; i < reader.maxDoc(); i++) {
+            assertThat(result.get(i), equalTo(docs.contains(i)));
+        }
+
+        // filter from mapper
+        result.clear(0, size);
+        assertThat(result.cardinality(), equalTo(0));
+        result.or(dblMapper.termsFilter(ifdService, cTerms, null)
+                .getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
+        assertThat(result.cardinality(), equalTo(docs.size()));
+        for (int i = 0; i < reader.maxDoc(); i++) {
+            assertThat(result.get(i), equalTo(docs.contains(i)));
+        }
+
+        hFilter = FieldDataTermsFilter.newDoubles(getFieldData(lngMapper), hTerms);
+        assertNull(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
+    }
+
+    @Test
+    public void testNoTerms() throws Exception {
+        FieldDataTermsFilter hFilterBytes = FieldDataTermsFilter.newBytes(getFieldData(strMapper), new ObjectOpenHashSet<BytesRef>());
+        FieldDataTermsFilter hFilterLongs = FieldDataTermsFilter.newLongs(getFieldData(lngMapper), new LongOpenHashSet());
+        FieldDataTermsFilter hFilterDoubles = FieldDataTermsFilter.newDoubles(getFieldData(dblMapper), new DoubleOpenHashSet());
+        assertNull(hFilterBytes.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
+        assertNull(hFilterLongs.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
+        assertNull(hFilterDoubles.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
+    }
+}

+ 98 - 1
src/test/java/org/elasticsearch/search/query/SimpleQueryTests.java

@@ -195,7 +195,7 @@ public class SimpleQueryTests extends ElasticsearchIntegrationTest {
         }
     }
 
-    @Test // see #3521 
+    @Test // see #3521
     public void testAllDocsQueryString() throws InterruptedException, ExecutionException {
         client().admin().indices().prepareCreate("test")
                 .setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_replicas", 0)).execute().actionGet();
@@ -990,6 +990,103 @@ public class SimpleQueryTests extends ElasticsearchIntegrationTest {
         assertThat(searchResponse.getHits().getTotalHits(), equalTo(0l));
     }
 
+    @Test
+    public void testFieldDataTermsFilter() throws Exception {
+        assertAcked(prepareCreate("test").addMapping("type",
+                jsonBuilder().startObject().startObject("type").startObject("properties")
+                        .startObject("str").field("type", "string").endObject()
+                        .startObject("lng").field("type", "long").endObject()
+                        .startObject("dbl").field("type", "double").endObject()
+                        .endObject().endObject().endObject()));
+        ensureGreen();
+        client().prepareIndex("test", "type", "1").setSource("str", "1", "lng", 1l, "dbl", 1.0d).execute().actionGet();
+        client().prepareIndex("test", "type", "2").setSource("str", "2", "lng", 2l, "dbl", 2.0d).execute().actionGet();
+        client().prepareIndex("test", "type", "3").setSource("str", "3", "lng", 3l, "dbl", 3.0d).execute().actionGet();
+        client().prepareIndex("test", "type", "4").setSource("str", "4", "lng", 4l, "dbl", 4.0d).execute().actionGet();
+        refresh();
+
+        SearchResponse searchResponse = client().prepareSearch("test")
+                .setQuery(filteredQuery(matchAllQuery(), termsFilter("str", "1", "4").execution("fielddata")))
+                .execute().actionGet();
+        assertNoFailures(searchResponse);
+        assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
+        assertThat(searchResponse.getHits().getAt(0).getId(), anyOf(equalTo("1"), equalTo("4")));
+        assertThat(searchResponse.getHits().getAt(1).getId(), anyOf(equalTo("1"), equalTo("4")));
+
+        searchResponse = client().prepareSearch("test")
+                .setQuery(filteredQuery(matchAllQuery(), termsFilter("lng", new long[] {2, 3}).execution("fielddata")))
+                .execute().actionGet();
+        assertNoFailures(searchResponse);
+        assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
+        assertThat(searchResponse.getHits().getAt(0).getId(), anyOf(equalTo("2"), equalTo("3")));
+        assertThat(searchResponse.getHits().getAt(1).getId(), anyOf(equalTo("2"), equalTo("3")));
+
+        searchResponse = client().prepareSearch("test")
+                .setQuery(filteredQuery(matchAllQuery(), termsFilter("dbl", new double[] {2, 3}).execution("fielddata")))
+                .execute().actionGet();
+        assertNoFailures(searchResponse);
+        assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
+        assertThat(searchResponse.getHits().getAt(0).getId(), anyOf(equalTo("2"), equalTo("3")));
+        assertThat(searchResponse.getHits().getAt(1).getId(), anyOf(equalTo("2"), equalTo("3")));
+
+        searchResponse = client().prepareSearch("test")
+                .setQuery(filteredQuery(matchAllQuery(), termsFilter("lng", new int[] {1, 3}).execution("fielddata")))
+                .execute().actionGet();
+        assertNoFailures(searchResponse);
+        assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
+        assertThat(searchResponse.getHits().getAt(0).getId(), anyOf(equalTo("1"), equalTo("3")));
+        assertThat(searchResponse.getHits().getAt(1).getId(), anyOf(equalTo("1"), equalTo("3")));
+
+        searchResponse = client().prepareSearch("test")
+                .setQuery(filteredQuery(matchAllQuery(), termsFilter("dbl", new float[] {2, 4}).execution("fielddata")))
+                .execute().actionGet();
+        assertNoFailures(searchResponse);
+        assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
+        assertThat(searchResponse.getHits().getAt(0).getId(), anyOf(equalTo("2"), equalTo("4")));
+        assertThat(searchResponse.getHits().getAt(1).getId(), anyOf(equalTo("2"), equalTo("4")));
+
+        // test partial matching
+        searchResponse = client().prepareSearch("test")
+                .setQuery(filteredQuery(matchAllQuery(), termsFilter("str", "2", "5").execution("fielddata")))
+                .execute().actionGet();
+        assertNoFailures(searchResponse);
+        assertThat(searchResponse.getHits().getTotalHits(), equalTo(1l));
+        assertThat(searchResponse.getHits().getAt(0).getId(), equalTo("2"));
+
+        searchResponse = client().prepareSearch("test")
+                .setQuery(filteredQuery(matchAllQuery(), termsFilter("dbl", new double[] {2, 5}).execution("fielddata")))
+                .execute().actionGet();
+        assertNoFailures(searchResponse);
+        assertThat(searchResponse.getHits().getTotalHits(), equalTo(1l));
+        assertThat(searchResponse.getHits().getAt(0).getId(), equalTo("2"));
+
+        searchResponse = client().prepareSearch("test")
+                .setQuery(filteredQuery(matchAllQuery(), termsFilter("lng", new long[] {2, 5}).execution("fielddata")))
+                .execute().actionGet();
+        assertNoFailures(searchResponse);
+        assertThat(searchResponse.getHits().getTotalHits(), equalTo(1l));
+        assertThat(searchResponse.getHits().getAt(0).getId(), equalTo("2"));
+
+        // test valid type, but no matching terms
+        searchResponse = client().prepareSearch("test")
+                .setQuery(filteredQuery(matchAllQuery(), termsFilter("str", "5", "6").execution("fielddata")))
+                .execute().actionGet();
+        assertNoFailures(searchResponse);
+        assertThat(searchResponse.getHits().getTotalHits(), equalTo(0l));
+
+        searchResponse = client().prepareSearch("test")
+                .setQuery(filteredQuery(matchAllQuery(), termsFilter("dbl", new double[] {5, 6}).execution("fielddata")))
+                .execute().actionGet();
+        assertNoFailures(searchResponse);
+        assertThat(searchResponse.getHits().getTotalHits(), equalTo(0l));
+
+        searchResponse = client().prepareSearch("test")
+                .setQuery(filteredQuery(matchAllQuery(), termsFilter("lng", new long[] {5, 6}).execution("fielddata")))
+                .execute().actionGet();
+        assertNoFailures(searchResponse);
+        assertThat(searchResponse.getHits().getTotalHits(), equalTo(0l));
+    }
+
     @Test
     public void testTermsLookupFilter() throws Exception {
         assertAcked(prepareCreate("lookup").addMapping("type",