Browse Source

_terms_enum for doc value only keyword fields (#83482)

Provides support for the _terms_enum API on doc value only keyword fields

Closes #83451
Yannick Welsch 3 năm trước cách đây
mục cha
commit
6d7c1b2a26

+ 6 - 0
docs/changelog/83482.yaml

@@ -0,0 +1,6 @@
+pr: 83482
+summary: Terms enum support for doc value only keyword fields
+area: Mapping
+type: enhancement
+issues:
+ - 83451

+ 112 - 1
server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

@@ -18,6 +18,8 @@ import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiTerms;
+import org.apache.lucene.index.ReaderSlice;
+import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.sandbox.search.DocValuesTermsQuery;
@@ -59,6 +61,7 @@ import org.elasticsearch.xcontent.XContentParser;
 
 import java.io.IOException;
 import java.io.UncheckedIOException;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
@@ -463,7 +466,12 @@ public final class KeywordFieldMapper extends FieldMapper {
             throws IOException {
             IndexReader reader = queryShardContext.searcher().getTopReaderContext().reader();
 
-            Terms terms = MultiTerms.getTerms(reader, name());
+            Terms terms = null;
+            if (isIndexed()) {
+                terms = MultiTerms.getTerms(reader, name());
+            } else if (hasDocValues()) {
+                terms = SortedSetDocValuesTerms.getTerms(reader, name());
+            }
             if (terms == null) {
                 // Field does not exist on this shard.
                 return null;
@@ -506,6 +514,109 @@ public final class KeywordFieldMapper extends FieldMapper {
             }
         }
 
+        /**
+         * A simple terms implementation for SortedSetDocValues that only provides access to {@link TermsEnum} via
+         * {@link #iterator} and {@link #intersect(CompiledAutomaton, BytesRef)} methods.
+         * We have this custom implementation based on {@link MultiTerms} instead of using
+         * {@link org.apache.lucene.index.MultiDocValues#getSortedSetValues(IndexReader, String)}
+         * because {@link org.apache.lucene.index.MultiDocValues} builds global ordinals up-front whereas
+         * {@link MultiTerms}, which exposes the terms enum via {@link org.apache.lucene.index.MultiTermsEnum},
+         * merges terms on the fly.
+         */
+        static class SortedSetDocValuesTerms extends Terms {
+
+            public static Terms getTerms(IndexReader r, String field) throws IOException {
+                final List<LeafReaderContext> leaves = r.leaves();
+                if (leaves.size() == 1) {
+                    SortedSetDocValues sortedSetDocValues = leaves.get(0).reader().getSortedSetDocValues(field);
+                    if (sortedSetDocValues == null) {
+                        return null;
+                    } else {
+                        return new SortedSetDocValuesTerms(sortedSetDocValues);
+                    }
+                }
+
+                final List<Terms> termsPerLeaf = new ArrayList<>(leaves.size());
+                final List<ReaderSlice> slicePerLeaf = new ArrayList<>(leaves.size());
+
+                for (int leafIdx = 0; leafIdx < leaves.size(); leafIdx++) {
+                    LeafReaderContext ctx = leaves.get(leafIdx);
+                    SortedSetDocValues sortedSetDocValues = ctx.reader().getSortedSetDocValues(field);
+                    if (sortedSetDocValues != null) {
+                        termsPerLeaf.add(new SortedSetDocValuesTerms(sortedSetDocValues));
+                        slicePerLeaf.add(new ReaderSlice(ctx.docBase, r.maxDoc(), leafIdx));
+                    }
+                }
+
+                if (termsPerLeaf.isEmpty()) {
+                    return null;
+                } else {
+                    return new MultiTerms(termsPerLeaf.toArray(EMPTY_ARRAY), slicePerLeaf.toArray(ReaderSlice.EMPTY_ARRAY));
+                }
+            }
+
+            private final SortedSetDocValues values;
+
+            SortedSetDocValuesTerms(SortedSetDocValues values) {
+                this.values = values;
+            }
+
+            @Override
+            public TermsEnum iterator() throws IOException {
+                return values.termsEnum();
+            }
+
+            @Override
+            public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) throws IOException {
+                if (startTerm == null) {
+                    return values.intersect(compiled);
+                } else {
+                    return super.intersect(compiled, startTerm);
+                }
+            }
+
+            @Override
+            public long size() throws IOException {
+                throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public long getSumTotalTermFreq() throws IOException {
+                throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public long getSumDocFreq() throws IOException {
+                throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public int getDocCount() throws IOException {
+                throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public boolean hasFreqs() {
+                throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public boolean hasOffsets() {
+                throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public boolean hasPositions() {
+                throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public boolean hasPayloads() {
+                throw new UnsupportedOperationException();
+            }
+
+        }
+
         @Override
         public String typeName() {
             return CONTENT_TYPE;

+ 23 - 5
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/termsenum/action/NodeTermsEnumResponse.java

@@ -6,12 +6,15 @@
  */
 package org.elasticsearch.xpack.core.termsenum.action;
 
+import org.elasticsearch.Version;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Writeable;
 import org.elasticsearch.transport.TransportResponse;
 
 import java.io.IOException;
 import java.util.List;
+import java.util.stream.Collectors;
 
 /**
  * Internal response of a terms enum request executed directly against a specific shard.
@@ -23,18 +26,26 @@ class NodeTermsEnumResponse extends TransportResponse {
     private String error;
     private boolean complete;
 
-    private List<TermCount> terms;
+    private List<String> terms;
     private String nodeId;
 
     NodeTermsEnumResponse(StreamInput in) throws IOException {
         super(in);
-        terms = in.readList(TermCount::new);
+        if (in.getVersion().before(Version.V_8_2_0)) {
+            terms = in.readList(r -> {
+                String term = r.readString();
+                in.readLong(); // obsolete docCount field
+                return term;
+            });
+        } else {
+            terms = in.readStringList();
+        }
         error = in.readOptionalString();
         complete = in.readBoolean();
         nodeId = in.readString();
     }
 
-    NodeTermsEnumResponse(String nodeId, List<TermCount> terms, String error, boolean complete) {
+    NodeTermsEnumResponse(String nodeId, List<String> terms, String error, boolean complete) {
         this.nodeId = nodeId;
         this.terms = terms;
         this.error = error;
@@ -43,13 +54,20 @@ class NodeTermsEnumResponse extends TransportResponse {
 
     @Override
     public void writeTo(StreamOutput out) throws IOException {
-        out.writeCollection(terms);
+        if (out.getVersion().before(Version.V_8_2_0)) {
+            out.writeCollection(terms.stream().map(term -> (Writeable) out1 -> {
+                out1.writeString(term);
+                out1.writeLong(1); // obsolete docCount field
+            }).collect(Collectors.toList()));
+        } else {
+            out.writeStringCollection(terms);
+        }
         out.writeOptionalString(error);
         out.writeBoolean(complete);
         out.writeString(nodeId);
     }
 
-    public List<TermCount> terms() {
+    public List<String> terms() {
         return this.terms;
     }
 

+ 8 - 12
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/termsenum/action/SimpleTermCountEnum.java

@@ -16,7 +16,6 @@ import org.elasticsearch.index.mapper.MappedFieldType;
 
 import java.io.IOException;
 import java.util.Arrays;
-import java.util.Comparator;
 
 /**
  * A utility class for fields that need to support autocomplete via
@@ -25,16 +24,16 @@ import java.util.Comparator;
  */
 public class SimpleTermCountEnum extends TermsEnum {
     int index = -1;
-    TermCount[] sortedTerms;
-    TermCount current = null;
+    String[] sortedTerms;
+    String current = null;
 
-    public SimpleTermCountEnum(TermCount[] terms) {
+    public SimpleTermCountEnum(String[] terms) {
         sortedTerms = Arrays.copyOf(terms, terms.length);
-        Arrays.sort(sortedTerms, Comparator.comparing(TermCount::getTerm));
+        Arrays.sort(sortedTerms);
     }
 
-    public SimpleTermCountEnum(TermCount termCount) {
-        sortedTerms = new TermCount[1];
+    public SimpleTermCountEnum(String termCount) {
+        sortedTerms = new String[1];
         sortedTerms[0] = termCount;
     }
 
@@ -43,7 +42,7 @@ public class SimpleTermCountEnum extends TermsEnum {
         if (current == null) {
             return null;
         }
-        return new BytesRef(current.getTerm());
+        return new BytesRef(current);
     }
 
     @Override
@@ -59,10 +58,7 @@ public class SimpleTermCountEnum extends TermsEnum {
 
     @Override
     public int docFreq() throws IOException {
-        if (current == null) {
-            return 0;
-        }
-        return (int) current.getDocCount();
+        throw new UnsupportedOperationException();
     }
 
     // =============== All other TermsEnum methods not supported =================

+ 0 - 99
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/termsenum/action/TermCount.java

@@ -1,99 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-package org.elasticsearch.xpack.core.termsenum.action;
-
-import org.elasticsearch.common.io.stream.StreamInput;
-import org.elasticsearch.common.io.stream.StreamOutput;
-import org.elasticsearch.common.io.stream.Writeable;
-import org.elasticsearch.xcontent.ConstructingObjectParser;
-import org.elasticsearch.xcontent.ParseField;
-import org.elasticsearch.xcontent.ToXContentFragment;
-import org.elasticsearch.xcontent.XContentBuilder;
-import org.elasticsearch.xcontent.XContentParser;
-
-import java.io.IOException;
-import java.util.Objects;
-
-import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;
-
-public class TermCount implements Writeable, ToXContentFragment {
-
-    public static final String TERM_FIELD = "term";
-    public static final String DOC_COUNT_FIELD = "doc_count";
-
-    static final ConstructingObjectParser<TermCount, Void> PARSER = new ConstructingObjectParser<>(
-        "term_count",
-        true,
-        a -> { return new TermCount((String) a[0], (long) a[1]); }
-    );
-    static {
-        PARSER.declareString(constructorArg(), new ParseField(TERM_FIELD));
-        PARSER.declareLong(constructorArg(), new ParseField(DOC_COUNT_FIELD));
-    }
-
-    private final String term;
-
-    private long docCount;
-
-    public TermCount(StreamInput in) throws IOException {
-        term = in.readString();
-        docCount = in.readLong();
-    }
-
-    public TermCount(String term, long count) {
-        this.term = term;
-        this.docCount = count;
-    }
-
-    public String getTerm() {
-        return this.term;
-    }
-
-    public long getDocCount() {
-        return this.docCount;
-    }
-
-    @Override
-    public void writeTo(StreamOutput out) throws IOException {
-        out.writeString(term);
-        out.writeLong(docCount);
-    }
-
-    @Override
-    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
-        builder.field(TERM_FIELD, getTerm());
-        builder.field(DOC_COUNT_FIELD, getDocCount());
-        return builder;
-    }
-
-    public static TermCount fromXContent(XContentParser parser) {
-        return PARSER.apply(parser, null);
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o) return true;
-        if (o == null || getClass() != o.getClass()) return false;
-        TermCount other = (TermCount) o;
-        return Objects.equals(getTerm(), other.getTerm()) && Objects.equals(getDocCount(), other.getDocCount());
-    }
-
-    @Override
-    public int hashCode() {
-        return Objects.hash(getTerm(), getDocCount());
-    }
-
-    void addToDocCount(long extra) {
-        docCount += extra;
-    }
-
-    @Override
-    public String toString() {
-        return term + ":" + docCount;
-    }
-
-}

+ 25 - 36
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/termsenum/action/TransportTermsEnumAction.java

@@ -80,7 +80,6 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReferenceArray;
-import java.util.stream.Collectors;
 
 import static org.elasticsearch.action.search.TransportSearchHelper.checkCCSVersionCompatibility;
 import static org.elasticsearch.xpack.core.security.SecurityField.DOCUMENT_LEVEL_SECURITY_FEATURE;
@@ -220,7 +219,7 @@ public class TransportTermsEnumAction extends HandledTransportAction<TermsEnumRe
         int successfulShards = 0;
         int failedShards = 0;
         List<DefaultShardOperationFailedException> shardFailures = null;
-        List<List<TermCount>> termsList = new ArrayList<>();
+        List<List<String>> termsList = new ArrayList<>();
         for (int i = 0; i < atomicResponses.length(); i++) {
             Object atomicResponse = atomicResponses.get(i);
             if (atomicResponse == null) {
@@ -264,51 +263,46 @@ public class TransportTermsEnumAction extends HandledTransportAction<TermsEnumRe
                         new DefaultShardOperationFailedException(rc.clusterAlias + ":" + exc.index(), exc.shardId(), exc.getCause())
                     );
                 }
-                List<TermCount> terms = rc.resp.getTerms().stream().map(a -> new TermCount(a, 1)).collect(Collectors.toList());
-                termsList.add(terms);
+                termsList.add(rc.resp.getTerms());
             } else {
                 throw new AssertionError("Unknown atomic response type: " + atomicResponse.getClass().getName());
             }
         }
 
-        List<String> ans = termsList.size() == 1
-            ? termsList.get(0).stream().map(TermCount::getTerm).collect(Collectors.toList())
-            : mergeResponses(termsList, request.size());
+        List<String> ans = termsList.size() == 1 ? termsList.get(0) : mergeResponses(termsList, request.size());
         return new TermsEnumResponse(ans, (failedShards + successfulShards), successfulShards, failedShards, shardFailures, complete);
     }
 
-    private List<String> mergeResponses(List<List<TermCount>> termsList, int size) {
-        final PriorityQueue<TermCountIterator> pq = new PriorityQueue<>(termsList.size()) {
+    private List<String> mergeResponses(List<List<String>> termsList, int size) {
+        final PriorityQueue<TermIterator> pq = new PriorityQueue<>(termsList.size()) {
             @Override
-            protected boolean lessThan(TermCountIterator a, TermCountIterator b) {
+            protected boolean lessThan(TermIterator a, TermIterator b) {
                 return a.compareTo(b) < 0;
             }
         };
 
-        for (List<TermCount> terms : termsList) {
-            Iterator<TermCount> it = terms.iterator();
+        for (List<String> terms : termsList) {
+            Iterator<String> it = terms.iterator();
             if (it.hasNext()) {
-                pq.add(new TermCountIterator(it));
+                pq.add(new TermIterator(it));
             }
         }
 
-        TermCount lastTerm = null;
+        String lastTerm = null;
         final List<String> ans = new ArrayList<>();
         while (pq.size() != 0) {
-            TermCountIterator it = pq.top();
+            TermIterator it = pq.top();
             String term = it.term();
-            long docCount = it.docCount();
-            if (lastTerm != null && lastTerm.getTerm().compareTo(term) != 0) {
-                ans.add(lastTerm.getTerm());
+            if (lastTerm != null && lastTerm.compareTo(term) != 0) {
+                ans.add(lastTerm);
                 if (ans.size() == size) {
                     break;
                 }
                 lastTerm = null;
             }
             if (lastTerm == null) {
-                lastTerm = new TermCount(term, 0);
+                lastTerm = term;
             }
-            lastTerm.addToDocCount(docCount);
             if (it.hasNext()) {
                 String itTerm = it.term();
                 it.next();
@@ -319,13 +313,13 @@ public class TransportTermsEnumAction extends HandledTransportAction<TermsEnumRe
             }
         }
         if (lastTerm != null && ans.size() < size) {
-            ans.add(lastTerm.getTerm());
+            ans.add(lastTerm);
         }
         return ans;
     }
 
     protected NodeTermsEnumResponse dataNodeOperation(NodeTermsEnumRequest request, Task task) throws IOException {
-        List<TermCount> termsList = new ArrayList<>();
+        List<String> termsList = new ArrayList<>();
         String error = null;
 
         long timeout_millis = request.timeout();
@@ -389,9 +383,8 @@ public class TransportTermsEnumAction extends HandledTransportAction<TermsEnumRe
                     }
                     termCount = 0;
                 }
-                long df = te.docFreq();
                 BytesRef bytes = te.term();
-                termsList.add(new TermCount(bytes.utf8ToString(), df));
+                termsList.add(bytes.utf8ToString());
                 if (termsList.size() >= shard_size) {
                     break;
                 }
@@ -730,21 +723,17 @@ public class TransportTermsEnumAction extends HandledTransportAction<TermsEnumRe
         }
     }
 
-    private static class TermCountIterator implements Iterator<TermCount>, Comparable<TermCountIterator> {
-        private final Iterator<TermCount> iterator;
-        private TermCount current;
+    private static class TermIterator implements Iterator<String>, Comparable<TermIterator> {
+        private final Iterator<String> iterator;
+        private String current;
 
-        private TermCountIterator(Iterator<TermCount> iterator) {
+        private TermIterator(Iterator<String> iterator) {
             this.iterator = iterator;
             this.current = iterator.next();
         }
 
         public String term() {
-            return current.getTerm();
-        }
-
-        public long docCount() {
-            return current.getDocCount();
+            return current;
         }
 
         @Override
@@ -753,13 +742,13 @@ public class TransportTermsEnumAction extends HandledTransportAction<TermsEnumRe
         }
 
         @Override
-        public TermCount next() {
+        public String next() {
             return current = iterator.next();
         }
 
         @Override
-        public int compareTo(TermCountIterator o) {
-            return current.getTerm().compareTo(o.term());
+        public int compareTo(TermIterator o) {
+            return current.compareTo(o.term());
         }
     }
 }

+ 12 - 22
x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/termsenum/MultiShardTermsEnumTests.java

@@ -28,20 +28,18 @@ import org.elasticsearch.core.internal.io.IOUtils;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.xpack.core.termsenum.action.MultiShardTermsEnum;
 import org.elasticsearch.xpack.core.termsenum.action.SimpleTermCountEnum;
-import org.elasticsearch.xpack.core.termsenum.action.TermCount;
 
 import java.io.Closeable;
 import java.util.ArrayList;
-import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Locale;
-import java.util.Map;
-import java.util.Map.Entry;
+import java.util.Set;
 
 public class MultiShardTermsEnumTests extends ESTestCase {
 
     public void testRandomIndexFusion() throws Exception {
         String fieldName = "foo";
-        Map<String, Integer> globalTermCounts = new HashMap<>();
+        Set<String> globalTermCounts = new HashSet<>();
 
         int numShards = randomIntBetween(2, 15);
 
@@ -59,12 +57,7 @@ public class MultiShardTermsEnumTests extends ESTestCase {
                     String term = randomAlphaOfLengthBetween(1, 3).toLowerCase(Locale.ROOT);
                     document.add(new StringField(fieldName, term, Field.Store.YES));
                     writer.addDocument(document);
-                    int count = 0;
-                    if (globalTermCounts.containsKey(term)) {
-                        count = globalTermCounts.get(term);
-                    }
-                    count++;
-                    globalTermCounts.put(term, count);
+                    globalTermCounts.add(term);
 
                 }
                 DirectoryReader reader = DirectoryReader.open(writer);
@@ -89,32 +82,29 @@ public class MultiShardTermsEnumTests extends ESTestCase {
                     if (randomBoolean()) {
                         // Simulate fields like constant-keyword which use a SimpleTermCountEnum to present results
                         // rather than the raw TermsEnum from Lucene.
-                        ArrayList<TermCount> termCounts = new ArrayList<>();
+                        ArrayList<String> termCounts = new ArrayList<>();
                         while (te.next() != null) {
-                            termCounts.add(new TermCount(te.term().utf8ToString(), te.docFreq()));
+                            termCounts.add(te.term().utf8ToString());
                         }
-                        SimpleTermCountEnum simpleEnum = new SimpleTermCountEnum(termCounts.toArray(new TermCount[0]));
+                        SimpleTermCountEnum simpleEnum = new SimpleTermCountEnum(termCounts.toArray(new String[0]));
                         termsEnums.add(simpleEnum);
                     } else {
                         termsEnums.add(te);
                     }
                 }
                 MultiShardTermsEnum mte = new MultiShardTermsEnum(termsEnums.toArray(new TermsEnum[0]));
-                HashMap<String, Integer> expecteds = new HashMap<>();
+                Set<String> expecteds = new HashSet<>();
 
-                for (Entry<String, Integer> termCount : globalTermCounts.entrySet()) {
-                    if (termCount.getKey().startsWith(searchPrefix)) {
-                        expecteds.put(termCount.getKey(), termCount.getValue());
+                for (String term : globalTermCounts) {
+                    if (term.startsWith(searchPrefix)) {
+                        expecteds.add(term);
                     }
                 }
 
                 while (mte.next() != null) {
                     String teString = mte.term().utf8ToString();
-                    long actual = mte.docFreq();
-                    assertTrue(expecteds.containsKey(teString));
-                    long expected = expecteds.get(teString);
+                    assertTrue(expecteds.contains(teString));
                     expecteds.remove(teString);
-                    assertEquals(mte.term().utf8ToString() + " string count wrong", expected, actual);
                 }
                 assertEquals("Expected results not found", 0, expecteds.size());
 

+ 0 - 42
x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/termsenum/TermCountTests.java

@@ -1,42 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-package org.elasticsearch.xpack.core.termsenum;
-
-import org.elasticsearch.common.io.stream.Writeable;
-import org.elasticsearch.test.AbstractSerializingTestCase;
-import org.elasticsearch.xcontent.XContentParser;
-import org.elasticsearch.xpack.core.termsenum.action.TermCount;
-
-import java.io.IOException;
-
-public class TermCountTests extends AbstractSerializingTestCase<TermCount> {
-
-    static TermCount createRandomQueryExplanation(boolean isValid) {
-        int docCount = randomInt(100);
-        String term = randomAlphaOfLength(randomIntBetween(10, 100));
-        return new TermCount(term, docCount);
-    }
-
-    static TermCount createRandomQueryExplanation() {
-        return createRandomQueryExplanation(randomBoolean());
-    }
-
-    @Override
-    protected TermCount doParseInstance(XContentParser parser) throws IOException {
-        return TermCount.fromXContent(parser);
-    }
-
-    @Override
-    protected TermCount createTestInstance() {
-        return createRandomQueryExplanation();
-    }
-
-    @Override
-    protected Writeable.Reader<TermCount> instanceReader() {
-        return TermCount::new;
-    }
-}

+ 2 - 5
x-pack/plugin/mapper-constant-keyword/src/main/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapper.java

@@ -42,7 +42,6 @@ import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentParser;
 import org.elasticsearch.xpack.constantkeyword.ConstantKeywordDocValuesField;
 import org.elasticsearch.xpack.core.termsenum.action.SimpleTermCountEnum;
-import org.elasticsearch.xpack.core.termsenum.action.TermCount;
 
 import java.io.IOException;
 import java.time.ZoneId;
@@ -151,8 +150,7 @@ public class ConstantKeywordFieldMapper extends FieldMapper {
         }
 
         @Override
-        public TermsEnum getTerms(boolean caseInsensitive, String string, SearchExecutionContext queryShardContext, String searchAfter)
-            throws IOException {
+        public TermsEnum getTerms(boolean caseInsensitive, String string, SearchExecutionContext queryShardContext, String searchAfter) {
             boolean matches = caseInsensitive
                 ? value.toLowerCase(Locale.ROOT).startsWith(string.toLowerCase(Locale.ROOT))
                 : value.startsWith(string);
@@ -165,8 +163,7 @@ public class ConstantKeywordFieldMapper extends FieldMapper {
                     return null;
                 }
             }
-            int docCount = queryShardContext.searcher().getIndexReader().maxDoc();
-            return new SimpleTermCountEnum(new TermCount(value, docCount));
+            return new SimpleTermCountEnum(value);
         }
 
         @Override

+ 67 - 2
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/terms_enum/10_basic.yml

@@ -126,6 +126,9 @@ setup:
             properties:
               foo:
                 type : keyword
+              foo_non_indexed:
+                type: keyword
+                index: false
               timestamp:
                 type : date
   - do:
@@ -176,12 +179,15 @@ setup:
                 value: const
               foo:
                 type : keyword
+              foo_non_indexed:
+                type: keyword
+                index: false
 
   - do:
       index:
           index:  test_k
           id:     1
-          body:   { foo: "bar_k", "timestamp":"2021-01-01T01:01:01.000Z" }
+          body:   { foo: "bar_k", foo_non_indexed: "bar_k", "timestamp":"2021-01-01T01:01:01.000Z" }
 
   - do:
       index:
@@ -198,7 +204,7 @@ setup:
       index:
           index:  test_security
           id:     4
-          body:   { foo: "bar_dls"}
+          body:   { foo: "bar_dls", foo_non_indexed: "bar_dls"}
 
   - do: #superuser
       headers: { Authorization: "Basic dGVzdF9hZG1pbjp4LXBhY2stdGVzdC1wYXNzd29yZA==" } # admin
@@ -278,6 +284,18 @@ teardown:
         body:  {"field": "foo.bar", "string":"b"}
   - length: {terms: 1}
 
+---
+"Test basic term enumeration on doc value only field":
+  - skip:
+      version: " - 8.1.99"
+      reason: "terms enum support for doc value only keyword fields was added in 8.2.0"
+
+  - do:
+      terms_enum:
+        index:  test_*
+        body:  {"field": "foo_non_indexed", "string":"b"}
+  - length: {terms: 2}
+
 ---
 "Test case insensitivity":
   - do:
@@ -310,6 +328,24 @@ teardown:
         body:  {"field": "foo.Bar", "string":"B", "case_insensitive": true}
   - length: {terms: 0}
 
+---
+"Test case insensitivity on doc value only field":
+  - skip:
+      version: " - 8.1.99"
+      reason: "terms enum support for doc value only keyword fields was added in 8.2.0"
+
+  - do:
+      terms_enum:
+        index:  test_k
+        body:  {"field": "foo_non_indexed", "string":"B"}
+  - length: {terms: 0}
+
+  - do:
+      terms_enum:
+        index:  test_k
+        body:  {"field": "foo_non_indexed", "string":"B", "case_insensitive": true}
+  - length: {terms: 1}
+
 ---
 "Test null search string allowed":
   - skip:
@@ -348,6 +384,35 @@ teardown:
         body:  {"field": "foo", "string":"", "search_after":"baa"}
   - length: {terms: 1}
 
+---
+"Test search after on doc value only keyword field":
+  - skip:
+      version: " - 8.1.99"
+      reason: "terms enum support for doc value only keyword fields was added in 8.2.0"
+
+  - do:
+      terms_enum:
+        index:  test_k
+        body:  {"field": "foo_non_indexed", "string":"b", "search_after":"baz"}
+  - length: {terms: 0}
+
+  - do:
+      terms_enum:
+        index:  test_k
+        body:  {"field": "foo_non_indexed", "string":"b", "search_after":"bar_k"}
+  - length: {terms: 0}
+
+  - do:
+      terms_enum:
+        index:  test_k
+        body:  {"field": "foo_non_indexed", "string":"b", "search_after":"baa"}
+  - length: {terms: 1}
+
+  - do:
+      terms_enum:
+        index:  test_k
+        body:  {"field": "foo_non_indexed", "string":"", "search_after":"baa"}
+  - length: {terms: 1}
 
 ---
 "Test search after flattened field":