Browse Source

Allow doc-values only search on boolean fields (#82925)

Allows searching on boolean fields when those fields are not indexed (index: false) but just doc values are enabled.

This enables searches on archive data, which has access to doc values but not index structures. When combined with
searchable snapshots, it allows downloading only data for a given (doc value) field to quickly filter down to a select set
of documents.

Relates #81210 and #52728
Yannick Welsch 3 years ago
parent
commit
0592c4cd7e

+ 2 - 1
docs/reference/mapping/params/doc-values.asciidoc

@@ -17,7 +17,8 @@ makes this data access pattern possible. They store the same values as the
 sorting and aggregations. Doc values are supported on almost all field types,
 with the __notable exception of `text` and `annotated_text` fields__.
 
-<<number,Numeric types>>, <<date,date types>>, and the <<keyword, keyword type>>
+<<number,Numeric types>>, <<date,date types>>, the <<boolean,boolean type>>
+and the <<keyword,keyword type>>
 can also be queried using term or range-based queries
 when they are not <<mapping-index,indexed>> but only have doc values enabled.
 Query performance on doc values is much slower than on index structures, but

+ 4 - 1
docs/reference/mapping/types/boolean.asciidoc

@@ -174,7 +174,10 @@ The following parameters are accepted by `boolean` fields:
 
 <<mapping-index,`index`>>::
 
-    Should the field be searchable? Accepts `true` (default) and `false`.
+    Should the field be quickly searchable? Accepts `true` (default) and
+    `false`. Fields that only have <<doc-values,`doc_values`>>
+    enabled can still be queried using term or range-based queries,
+    albeit slower.
 
 <<null-value,`null_value`>>::
 

+ 1 - 1
docs/reference/query-dsl.asciidoc

@@ -33,7 +33,7 @@ the stability of the cluster. Those queries can be categorised as follows:
 
 * Queries that need to do linear scans to identify matches:
 ** <<query-dsl-script-query,`script` queries>>
-** queries on <<number,numeric>>, <<date,date>>, or <<keyword,keyword>> fields that are not indexed
+** queries on <<number,numeric>>, <<date,date>>, <<boolean,boolean>>, or <<keyword,keyword>> fields that are not indexed
    but have <<doc-values,doc values>> enabled
 
 * Queries that have a high up-front cost:

+ 15 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/field_caps/10_basic.yml

@@ -89,6 +89,9 @@ setup:
                   non_indexed_keyword:
                     type:     keyword
                     index:    false
+                  non_indexed_boolean:
+                    type:     boolean
+                    index:    false
                   geo:
                     type:     keyword
                   object:
@@ -240,6 +243,18 @@ setup:
 
   - match: {fields.non_indexed_keyword.keyword.searchable:                 true}
 
+---
+"Field caps for boolean field with only doc values":
+  - skip:
+      version: " - 8.0.99"
+      reason: "doc values search was added in 8.1.0"
+  - do:
+      field_caps:
+        index: 'test1,test2,test3'
+        fields: non_indexed_boolean
+
+  - match: {fields.non_indexed_boolean.boolean.searchable:                 true}
+
 ---
 "Get object and nested field caps":
 

+ 32 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml

@@ -39,6 +39,9 @@ setup:
               keyword:
                 type: keyword
                 index: false
+              boolean:
+                type: boolean
+                index: false
 
   - do:
       index:
@@ -54,6 +57,7 @@ setup:
           short: 1
           date: "2017/01/01"
           keyword: "key1"
+          boolean: "false"
 
   - do:
       index:
@@ -69,6 +73,7 @@ setup:
           short: 2
           date: "2017/01/02"
           keyword: "key2"
+          boolean: "true"
 
   - do:
       indices.refresh: {}
@@ -252,3 +257,30 @@ setup:
         index: test
         body: { query: { range: { keyword: { gte: "key1" } } } }
   - length:   { hits.hits: 2  }
+
+---
+"Test match query on boolean field where only doc values are enabled":
+
+  - do:
+      search:
+        index: test
+        body: { query: { match: { boolean: { query: "false" } } } }
+  - length:   { hits.hits: 1  }
+
+---
+"Test terms query on boolean field where only doc values are enabled":
+
+  - do:
+      search:
+        index: test
+        body: { query: { terms: { boolean: [ "false", "true" ] } } }
+  - length:   { hits.hits: 2  }
+
+---
+"Test range query on boolean field where only doc values are enabled":
+
+  - do:
+      search:
+        index: test
+        body: { query: { range: { boolean: { gte: "false" } } } }
+  - length:   { hits.hits: 2  }

+ 79 - 11
server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java

@@ -14,6 +14,10 @@ import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.util.BytesRef;
@@ -37,6 +41,7 @@ import org.elasticsearch.xcontent.XContentParser;
 
 import java.io.IOException;
 import java.time.ZoneId;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -155,11 +160,15 @@ public class BooleanFieldMapper extends FieldMapper {
         }
 
         public BooleanFieldType(String name) {
-            this(name, true, false, true, false, null, Collections.emptyMap());
+            this(name, true);
         }
 
-        public BooleanFieldType(String name, boolean searchable) {
-            this(name, searchable, false, true, false, null, Collections.emptyMap());
+        public BooleanFieldType(String name, boolean isIndexed) {
+            this(name, isIndexed, true);
+        }
+
+        public BooleanFieldType(String name, boolean isIndexed, boolean hasDocValues) {
+            this(name, isIndexed, isIndexed, hasDocValues, false, null, Collections.emptyMap());
         }
 
         @Override
@@ -167,6 +176,11 @@ public class BooleanFieldMapper extends FieldMapper {
             return CONTENT_TYPE;
         }
 
+        @Override
+        public boolean isSearchable() {
+            return isIndexed() || hasDocValues();
+        }
+
         @Override
         public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
             if (format != null) {
@@ -209,6 +223,15 @@ public class BooleanFieldMapper extends FieldMapper {
             };
         }
 
+        private long docValueForSearch(Object value) {
+            BytesRef ref = indexedValueForSearch(value);
+            if (Values.TRUE.equals(ref)) {
+                return 1;
+            } else {
+                return 0;
+            }
+        }
+
         @Override
         public Boolean valueForDisplay(Object value) {
             if (value == null) {
@@ -234,6 +257,30 @@ public class BooleanFieldMapper extends FieldMapper {
             return DocValueFormat.BOOLEAN;
         }
 
+        @Override
+        public Query termQuery(Object value, SearchExecutionContext context) {
+            failIfNotIndexedNorDocValuesFallback(context);
+            if (isIndexed()) {
+                return super.termQuery(value, context);
+            } else {
+                return SortedNumericDocValuesField.newSlowExactQuery(name(), docValueForSearch(value));
+            }
+        }
+
+        @Override
+        public Query termsQuery(Collection<?> values, SearchExecutionContext context) {
+            failIfNotIndexedNorDocValuesFallback(context);
+            if (isIndexed()) {
+                return super.termsQuery(values, context);
+            } else {
+                BooleanQuery.Builder builder = new BooleanQuery.Builder();
+                for (Object value : values) {
+                    builder.add(termQuery(value, context), BooleanClause.Occur.SHOULD);
+                }
+                return new ConstantScoreQuery(builder.build());
+            }
+        }
+
         @Override
         public Query rangeQuery(
             Object lowerTerm,
@@ -242,14 +289,35 @@ public class BooleanFieldMapper extends FieldMapper {
             boolean includeUpper,
             SearchExecutionContext context
         ) {
-            failIfNotIndexed();
-            return new TermRangeQuery(
-                name(),
-                lowerTerm == null ? null : indexedValueForSearch(lowerTerm),
-                upperTerm == null ? null : indexedValueForSearch(upperTerm),
-                includeLower,
-                includeUpper
-            );
+            failIfNotIndexedNorDocValuesFallback(context);
+            if (isIndexed()) {
+                return new TermRangeQuery(
+                    name(),
+                    lowerTerm == null ? null : indexedValueForSearch(lowerTerm),
+                    upperTerm == null ? null : indexedValueForSearch(upperTerm),
+                    includeLower,
+                    includeUpper
+                );
+            } else {
+                long l = 0;
+                long u = 1;
+                if (lowerTerm != null) {
+                    l = docValueForSearch(lowerTerm);
+                    if (includeLower == false) {
+                        l = Math.max(1, l + 1);
+                    }
+                }
+                if (upperTerm != null) {
+                    u = docValueForSearch(upperTerm);
+                    if (includeUpper == false) {
+                        l = Math.min(0, l - 1);
+                    }
+                }
+                if (l > u) {
+                    return new MatchNoDocsQuery();
+                }
+                return SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u);
+            }
         }
     }
 

+ 44 - 4
server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

@@ -20,6 +20,7 @@ import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiTerms;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.sandbox.search.DocValuesTermsQuery;
 import org.apache.lucene.search.MultiTermQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
@@ -50,6 +51,7 @@ import org.elasticsearch.xcontent.XContentParser;
 
 import java.io.IOException;
 import java.io.UncheckedIOException;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Locale;
@@ -339,13 +341,51 @@ public final class KeywordFieldMapper extends FieldMapper {
         }
 
         @Override
-        protected boolean allowDocValueBasedQueries() {
-            return true;
+        public boolean isSearchable() {
+            return isIndexed() || hasDocValues();
         }
 
         @Override
-        public boolean isSearchable() {
-            return isIndexed() || hasDocValues();
+        public Query termQuery(Object value, SearchExecutionContext context) {
+            failIfNotIndexedNorDocValuesFallback(context);
+            if (isIndexed()) {
+                return super.termQuery(value, context);
+            } else {
+                return SortedSetDocValuesField.newSlowExactQuery(name(), indexedValueForSearch(value));
+            }
+        }
+
+        @Override
+        public Query termsQuery(Collection<?> values, SearchExecutionContext context) {
+            failIfNotIndexedNorDocValuesFallback(context);
+            if (isIndexed()) {
+                return super.termsQuery(values, context);
+            } else {
+                BytesRef[] bytesRefs = values.stream().map(this::indexedValueForSearch).toArray(BytesRef[]::new);
+                return new DocValuesTermsQuery(name(), bytesRefs);
+            }
+        }
+
+        @Override
+        public Query rangeQuery(
+            Object lowerTerm,
+            Object upperTerm,
+            boolean includeLower,
+            boolean includeUpper,
+            SearchExecutionContext context
+        ) {
+            failIfNotIndexedNorDocValuesFallback(context);
+            if (isIndexed()) {
+                return super.rangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, context);
+            } else {
+                return SortedSetDocValuesField.newSlowRangeQuery(
+                    name(),
+                    lowerTerm == null ? null : indexedValueForSearch(lowerTerm),
+                    upperTerm == null ? null : indexedValueForSearch(upperTerm),
+                    includeLower,
+                    includeUpper
+                );
+            }
         }
 
         @Override

+ 8 - 23
server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java

@@ -9,7 +9,6 @@
 package org.elasticsearch.index.mapper;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.AutomatonQuery;
 import org.apache.lucene.search.FuzzyQuery;
@@ -211,27 +210,13 @@ public abstract class StringFieldType extends TermBasedFieldType {
                     + "' is set to false."
             );
         }
-        if (allowDocValueBasedQueries()) {
-            failIfNotIndexedNorDocValuesFallback(context);
-        } else {
-            failIfNotIndexed();
-        }
-        if (isIndexed()) {
-            return new TermRangeQuery(
-                name(),
-                lowerTerm == null ? null : indexedValueForSearch(lowerTerm),
-                upperTerm == null ? null : indexedValueForSearch(upperTerm),
-                includeLower,
-                includeUpper
-            );
-        } else {
-            return SortedSetDocValuesField.newSlowRangeQuery(
-                name(),
-                lowerTerm == null ? null : indexedValueForSearch(lowerTerm),
-                upperTerm == null ? null : indexedValueForSearch(upperTerm),
-                includeLower,
-                includeUpper
-            );
-        }
+        failIfNotIndexed();
+        return new TermRangeQuery(
+            name(),
+            lowerTerm == null ? null : indexedValueForSearch(lowerTerm),
+            upperTerm == null ? null : indexedValueForSearch(upperTerm),
+            includeLower,
+            includeUpper
+        );
     }
 }

+ 4 - 26
server/src/main/java/org/elasticsearch/index/mapper/TermBasedFieldType.java

@@ -8,9 +8,7 @@
 
 package org.elasticsearch.index.mapper;
 
-import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.sandbox.search.DocValuesTermsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermInSetQuery;
 import org.apache.lucene.search.TermQuery;
@@ -37,10 +35,6 @@ public abstract class TermBasedFieldType extends SimpleMappedFieldType {
         super(name, isIndexed, isStored, hasDocValues, textSearchInfo, meta);
     }
 
-    protected boolean allowDocValueBasedQueries() {
-        return false;
-    }
-
     /** Returns the indexed value used to construct search "values".
      *  This method is used for the default implementations of most
      *  query factory methods such as {@link #termQuery}. */
@@ -61,31 +55,15 @@ public abstract class TermBasedFieldType extends SimpleMappedFieldType {
 
     @Override
     public Query termQuery(Object value, SearchExecutionContext context) {
-        if (allowDocValueBasedQueries()) {
-            failIfNotIndexedNorDocValuesFallback(context);
-        } else {
-            failIfNotIndexed();
-        }
-        if (isIndexed()) {
-            return new TermQuery(new Term(name(), indexedValueForSearch(value)));
-        } else {
-            return SortedSetDocValuesField.newSlowExactQuery(name(), indexedValueForSearch(value));
-        }
+        failIfNotIndexed();
+        return new TermQuery(new Term(name(), indexedValueForSearch(value)));
     }
 
     @Override
     public Query termsQuery(Collection<?> values, SearchExecutionContext context) {
-        if (allowDocValueBasedQueries()) {
-            failIfNotIndexedNorDocValuesFallback(context);
-        } else {
-            failIfNotIndexed();
-        }
+        failIfNotIndexed();
         BytesRef[] bytesRefs = values.stream().map(this::indexedValueForSearch).toArray(BytesRef[]::new);
-        if (isIndexed()) {
-            return new TermInSetQuery(name(), bytesRefs);
-        } else {
-            return new DocValuesTermsQuery(name(), bytesRefs);
-        }
+        return new TermInSetQuery(name(), bytesRefs);
     }
 
 }

+ 29 - 5
server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldTypeTests.java

@@ -7,8 +7,11 @@
  */
 package org.elasticsearch.index.mapper;
 
+import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TermRangeQuery;
 
 import java.io.IOException;
 import java.util.Collections;
@@ -33,12 +36,33 @@ public class BooleanFieldTypeTests extends FieldTypeTestCase {
 
     public void testTermQuery() {
         MappedFieldType ft = new BooleanFieldMapper.BooleanFieldType("field");
-        assertEquals(new TermQuery(new Term("field", "T")), ft.termQuery("true", null));
-        assertEquals(new TermQuery(new Term("field", "F")), ft.termQuery("false", null));
+        assertEquals(new TermQuery(new Term("field", "T")), ft.termQuery("true", MOCK_CONTEXT));
+        assertEquals(new TermQuery(new Term("field", "F")), ft.termQuery("false", MOCK_CONTEXT));
 
-        MappedFieldType unsearchable = new BooleanFieldMapper.BooleanFieldType("field", false);
-        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("true", null));
-        assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
+        MappedFieldType ft2 = new BooleanFieldMapper.BooleanFieldType("field", false);
+        assertEquals(SortedNumericDocValuesField.newSlowExactQuery("field", 1), ft2.termQuery("true", MOCK_CONTEXT));
+        assertEquals(SortedNumericDocValuesField.newSlowExactQuery("field", 0), ft2.termQuery("false", MOCK_CONTEXT));
+
+        MappedFieldType unsearchable = new BooleanFieldMapper.BooleanFieldType("field", false, false);
+        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("true", MOCK_CONTEXT));
+        assertEquals("Cannot search on field [field] since it is not indexed nor has doc values.", e.getMessage());
+    }
+
+    public void testRangeQuery() {
+        MappedFieldType ft = new BooleanFieldMapper.BooleanFieldType("field");
+        Query expected = new TermRangeQuery("field", BooleanFieldMapper.Values.FALSE, BooleanFieldMapper.Values.TRUE, true, true);
+        assertEquals(expected, ft.rangeQuery("false", "true", true, true, null, null, null, MOCK_CONTEXT));
+
+        ft = new BooleanFieldMapper.BooleanFieldType("field", false);
+        expected = SortedNumericDocValuesField.newSlowRangeQuery("field", 0, 1);
+        assertEquals(expected, ft.rangeQuery("false", "true", true, true, null, null, null, MOCK_CONTEXT));
+
+        MappedFieldType unsearchable = new BooleanFieldMapper.BooleanFieldType("field", false, false);
+        IllegalArgumentException e = expectThrows(
+            IllegalArgumentException.class,
+            () -> unsearchable.rangeQuery("false", "true", true, true, null, null, null, MOCK_CONTEXT)
+        );
+        assertEquals("Cannot search on field [field] since it is not indexed nor has doc values.", e.getMessage());
     }
 
     public void testFetchSourceValue() throws IOException {