Browse Source

Add "search_after" support to new termsEnum api (#72933)

Adds an optional parameter to the _terms_enum request designed to allow paging.
The last term from a previous result can be passed as the search_after parameter to a subsequent request, meaning only terms after the given term (but still matching the provided string prefix) are returned
Relates to #72910
markharwood 4 years ago
parent
commit
ebb113a7e3

+ 7 - 0
docs/reference/search/terms-enum.asciidoc

@@ -98,3 +98,10 @@ Defaults to false.
 (Optional,  <<query-dsl,query object>> Allows to filter an index shard if the provided
 query rewrites to `match_none`.
 
+[[terms-enum-search_after-param]]
+`string`::
+(Optional, string)
+The string after which terms in the index should be returned. Allows for a form of
+pagination if the last result from one request is passed as the search_after
+parameter for a subsequent request.
+

+ 39 - 6
server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

@@ -13,18 +13,20 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.index.FilteredTermsEnum;
 import org.apache.lucene.index.IndexOptions;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.search.MultiTermQuery;
-import org.apache.lucene.search.Query;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiTerms;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.CompiledAutomaton.AUTOMATON_TYPE;
 import org.apache.lucene.util.automaton.MinimizationOperations;
 import org.apache.lucene.util.automaton.Operations;
 import org.elasticsearch.common.lucene.Lucene;
@@ -259,7 +261,8 @@ public final class KeywordFieldMapper extends FieldMapper {
         }
 
         @Override
-        public TermsEnum getTerms(boolean caseInsensitive, String string, SearchExecutionContext queryShardContext) throws IOException {
+        public TermsEnum getTerms(boolean caseInsensitive, String string, SearchExecutionContext queryShardContext, String searchAfter)
+            throws IOException {
             IndexReader reader = queryShardContext.searcher().getTopReaderContext().reader();
 
             Terms terms = MultiTerms.getTerms(reader, name());
@@ -274,8 +277,38 @@ public final class KeywordFieldMapper extends FieldMapper {
             a = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
 
             CompiledAutomaton automaton = new CompiledAutomaton(a);
-            return automaton.getTermsEnum(terms);            
-        }
+            
+            BytesRef searchBytes = searchAfter == null? null: new BytesRef(searchAfter);
+            
+            if (automaton.type == AUTOMATON_TYPE.ALL) {
+                TermsEnum result = terms.iterator();
+                if (searchAfter != null) {
+                    result = new SearchAfterTermsEnum(result, searchBytes);
+                }
+                return result;
+            }
+            return terms.intersect(automaton, searchBytes);
+        }
+        
+        // Initialises with a seek to a given term but excludes that term
+        // from any results. The problem it addresses is that termsEnum.seekCeil()
+        // would work but either leaves us positioned on the seek term (if it exists) or the 
+        // term after (if the seek term doesn't exist). That complicates any subsequent 
+        // iteration logic so this class simplifies the pagination use case. 
+        final class SearchAfterTermsEnum extends FilteredTermsEnum {
+            private final BytesRef afterRef;
+
+            SearchAfterTermsEnum(TermsEnum tenum, BytesRef termText) {
+                super(tenum);
+                afterRef = termText;
+                setInitialSeekTerm(termText);
+            }
+
+            @Override
+            protected AcceptStatus accept(BytesRef term) {
+                return term.equals(afterRef) ? AcceptStatus.NO : AcceptStatus.YES;
+            }
+        }          
 
         @Override
         public String typeName() {

+ 3 - 1
server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

@@ -440,10 +440,12 @@ public abstract class MappedFieldType {
      * @param caseInsensitive if matches should be case insensitive
      * @param string the partially complete word the user has typed (can be empty)
      * @param queryShardContext the shard context
+     * @param searchAfter - usually null. If supplied the TermsEnum result must be positioned after the provided term (used for pagination)
      * @return null or an enumeration of matching terms and their doc frequencies
      * @throws IOException Errors accessing data
      */
-    public TermsEnum getTerms(boolean caseInsensitive, String string, SearchExecutionContext queryShardContext) throws IOException {
+    public TermsEnum getTerms(boolean caseInsensitive, String string, SearchExecutionContext queryShardContext, String searchAfter)
+        throws IOException {
         return null;
     }
 }

+ 9 - 3
server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java

@@ -256,7 +256,8 @@ public final class FlattenedFieldMapper extends FieldMapper {
         }
         
         @Override
-        public TermsEnum getTerms(boolean caseInsensitive, String string, SearchExecutionContext queryShardContext) throws IOException {
+        public TermsEnum getTerms(boolean caseInsensitive, String string, SearchExecutionContext queryShardContext, String searchAfter)
+            throws IOException {
             IndexReader reader = queryShardContext.searcher().getTopReaderContext().reader();
             Terms terms = MultiTerms.getTerms(reader, name());
             if (terms == null) {
@@ -274,8 +275,13 @@ public final class FlattenedFieldMapper extends FieldMapper {
             a = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
 
             CompiledAutomaton automaton = new CompiledAutomaton(a);
-            // Wrap result in a class that strips field names from discovered terms
-            return new TranslatingTermsEnum(automaton.getTermsEnum(terms));            
+            if (searchAfter != null) {
+                BytesRef searchAfterWithFieldName = new BytesRef(key + FlattenedFieldParser.SEPARATOR + searchAfter);
+                TermsEnum seekedEnum = terms.intersect(automaton, searchAfterWithFieldName);
+                return new TranslatingTermsEnum(seekedEnum);
+            } else { 
+                return new TranslatingTermsEnum(automaton.getTermsEnum(terms));
+            }
         }        
 
         @Override

+ 8 - 2
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/termsenum/action/NodeTermsEnumRequest.java

@@ -27,6 +27,7 @@ public class NodeTermsEnumRequest extends TransportRequest implements IndicesReq
 
     private String field;
     private String string;
+    private String searchAfter;
     private long taskStartedTimeMillis;
     private long nodeStartedTimeMillis;
     private boolean caseInsensitive;
@@ -41,6 +42,7 @@ public class NodeTermsEnumRequest extends TransportRequest implements IndicesReq
         super(in);
         field = in.readString();
         string = in.readString();
+        searchAfter = in.readOptionalString();
         caseInsensitive = in.readBoolean();
         size = in.readVInt();
         timeout = in.readVLong();
@@ -57,6 +59,7 @@ public class NodeTermsEnumRequest extends TransportRequest implements IndicesReq
     public NodeTermsEnumRequest(final String nodeId, final Set<ShardId> shardIds, TermsEnumRequest request) {
         this.field = request.field();
         this.string = request.string();
+        this.searchAfter = request.searchAfter();
         this.caseInsensitive = request.caseInsensitive();
         this.size = request.size();
         this.timeout = request.timeout().getMillis();
@@ -64,8 +67,6 @@ public class NodeTermsEnumRequest extends TransportRequest implements IndicesReq
         this.indexFilter = request.indexFilter();
         this.nodeId = nodeId;
         this.shardIds = shardIds;        
-        
-        // TODO serialize shard ids
     }
 
     public String field() {
@@ -76,6 +77,10 @@ public class NodeTermsEnumRequest extends TransportRequest implements IndicesReq
         return string;
     }
 
+    public String searchAfter() {
+        return searchAfter;
+    }
+
     public long taskStartedTimeMillis() {
         return this.taskStartedTimeMillis;
     }
@@ -119,6 +124,7 @@ public class NodeTermsEnumRequest extends TransportRequest implements IndicesReq
         super.writeTo(out);
         out.writeString(field);
         out.writeString(string);
+        out.writeOptionalString(searchAfter);
         out.writeBoolean(caseInsensitive);
         out.writeVInt(size);
         // Adjust the amount of permitted time the shard has remaining to gather terms. 

+ 1 - 1
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/termsenum/action/SimpleTermCountEnum.java

@@ -20,7 +20,7 @@ import java.util.Comparator;
 
 /**
  * A utility class for fields that need to support autocomplete via
- * {@link MappedFieldType#getTerms(boolean, String, org.elasticsearch.index.query.SearchExecutionContext)}
+ * {@link MappedFieldType#getTerms(boolean, String, org.elasticsearch.index.query.SearchExecutionContext, String)}
  * but can't return a raw Lucene TermsEnum.
  */
 public class SimpleTermCountEnum extends TermsEnum {

+ 1 - 0
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/termsenum/action/TermsEnumAction.java

@@ -39,6 +39,7 @@ public class TermsEnumAction extends ActionType<TermsEnumResponse> {
     static {
         PARSER.declareString(TermsEnumRequest::field, new ParseField("field"));
         PARSER.declareString(TermsEnumRequest::string, new ParseField("string"));
+        PARSER.declareString(TermsEnumRequest::searchAfter, new ParseField("search_after"));
         PARSER.declareInt(TermsEnumRequest::size, new ParseField("size"));
         PARSER.declareBoolean(TermsEnumRequest::caseInsensitive, new ParseField("case_insensitive"));
         PARSER.declareField(TermsEnumRequest::timeout,

+ 22 - 1
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/termsenum/action/TermsEnumRequest.java

@@ -31,6 +31,7 @@ public class TermsEnumRequest extends BroadcastRequest<TermsEnumRequest> impleme
 
     private String field;
     private String string;
+    private String searchAfter;
     private int size = DEFAULT_SIZE;
     private boolean caseInsensitive;
     long taskStartTimeMillis;
@@ -44,6 +45,7 @@ public class TermsEnumRequest extends BroadcastRequest<TermsEnumRequest> impleme
         super(in);
         field = in.readString();
         string = in.readString();
+        searchAfter = in.readOptionalString();
         caseInsensitive = in.readBoolean();
         size = in.readVInt();
         indexFilter = in.readOptionalNamedWriteable(QueryBuilder.class);
@@ -103,6 +105,20 @@ public class TermsEnumRequest extends BroadcastRequest<TermsEnumRequest> impleme
     public String string() {
         return string;
     }
+    
+    /**
+     * The string after which to find matching field values (enables pagination of previous request)
+     */
+    public String searchAfter() {
+        return searchAfter;
+    }
+
+    /**
+     * The string after which to find matching field values (enables pagination of previous request)
+     */
+    public void searchAfter(String searchAfter) {
+        this.searchAfter = searchAfter;
+    }
 
     /**
      *  The number of terms to return
@@ -148,6 +164,7 @@ public class TermsEnumRequest extends BroadcastRequest<TermsEnumRequest> impleme
         super.writeTo(out);
         out.writeString(field);
         out.writeString(string);
+        out.writeOptionalString(searchAfter);
         out.writeBoolean(caseInsensitive);
         out.writeVInt(size);
         out.writeOptionalNamedWriteable(indexFilter);
@@ -157,7 +174,8 @@ public class TermsEnumRequest extends BroadcastRequest<TermsEnumRequest> impleme
     public String toString() {
         return "[" + Arrays.toString(indices) + "] field[" + field + "], string[" + string + "] "  + " size=" + size + " timeout="
             + timeout().getMillis() + " case_insensitive="
-            + caseInsensitive + " indexFilter = "+ indexFilter;
+            + caseInsensitive + " indexFilter = "+ indexFilter +
+            " searchAfter[" + searchAfter + "]" ;
     }
 
     @Override
@@ -165,6 +183,9 @@ public class TermsEnumRequest extends BroadcastRequest<TermsEnumRequest> impleme
         builder.startObject();
         builder.field("field", field);
         builder.field("string", string);
+        if (searchAfter != null) {
+            builder.field("search_after", searchAfter);            
+        }
         builder.field("size", size);
         builder.field("timeout", timeout().getMillis());
         builder.field("case_insensitive", caseInsensitive);

+ 13 - 3
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/termsenum/action/TransportTermsEnumAction.java

@@ -298,13 +298,23 @@ public class TransportTermsEnumAction extends HandledTransportAction<TermsEnumRe
                 );
                 final MappedFieldType mappedFieldType = indexShard.mapperService().fieldType(request.field());
                 if (mappedFieldType != null) {
-                    TermsEnum terms = mappedFieldType.getTerms(request.caseInsensitive(), request.string(), queryShardContext);
+                    TermsEnum terms = mappedFieldType.getTerms(
+                        request.caseInsensitive(),
+                        request.string(),
+                        queryShardContext,
+                        request.searchAfter()                        
+                    );
                     if (terms != null) {
                         shardTermsEnums.add(terms);
                     }
                 }
             }
+            if (shardTermsEnums.size() == 0) {
+                // No term enums available
+                return new NodeTermsEnumResponse(request.nodeId(), termsList, error, true);
+            }
             MultiShardTermsEnum te = new MultiShardTermsEnum(shardTermsEnums.toArray(new TermsEnum[0]));
+            
 
             int shard_size = request.size();
             // All the above prep might take a while - do a timer check now before we continue further.
@@ -330,7 +340,7 @@ public class TransportTermsEnumAction extends HandledTransportAction<TermsEnumRe
                 if (termsList.size() >= shard_size) {
                     break;
                 }
-            }
+            };
 
         } catch (Exception e) {
             error = ExceptionsHelper.stackTrace(e);
@@ -339,7 +349,7 @@ public class TransportTermsEnumAction extends HandledTransportAction<TermsEnumRe
         }
         return new NodeTermsEnumResponse(request.nodeId(), termsList, error, true);
     }
-
+    
     // TODO remove this so we can shift code to server module - write a separate Interceptor class to 
     // rewrite requests according to security rules 
     private boolean canAccess(

+ 1 - 0
x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/termsenum/action/RestTermsEnumActionTests.java

@@ -105,6 +105,7 @@ public class RestTermsEnumActionTests extends ESTestCase {
         final String content = "{"
             + "\"field\":\"a\", "
             + "\"string\":\"foo\", "
+            + "\"search_after\":\"football\", "
             + "\"index_filter\":{\"bool\":{\"must\":{\"term\":{\"user\":\"kimchy\"}}}}}";
 
         final RestRequest request = createRestRequest(content);

+ 8 - 1
x-pack/plugin/mapper-constant-keyword/src/main/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapper.java

@@ -148,13 +148,20 @@ public class ConstantKeywordFieldMapper extends FieldMapper {
         
 
         @Override
-        public TermsEnum getTerms(boolean caseInsensitive, String string, SearchExecutionContext queryShardContext) throws IOException {
+        public TermsEnum getTerms(boolean caseInsensitive, String string, SearchExecutionContext queryShardContext, String searchAfter)
+            throws IOException {
             boolean matches = caseInsensitive ? 
                 value.toLowerCase(Locale.ROOT).startsWith(string.toLowerCase(Locale.ROOT)) : 
                 value.startsWith(string);
             if (matches == false) {
                 return null;
             }
+            if (searchAfter != null) {
+                if (searchAfter.compareTo(value) >= 0) {
+                    // The constant value is before the searchAfter value so must be ignored
+                    return null;
+                }
+            }
             int docCount = queryShardContext.searcher().getIndexReader().maxDoc();
             return new SimpleTermCountEnum(new TermCount(value, docCount));
         }

+ 80 - 0
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/terms_enum/10_basic.yml

@@ -1,6 +1,8 @@
 ---
 setup:
   - skip:
+      version: " - 7.9.99"
+      reason:  TODO temporary version skip while we wait for backport to 7.x of search_after properties
       features: headers
   - do:
       cluster.health:
@@ -276,6 +278,84 @@ teardown:
         body:  {"field": "foo.Bar", "string":"B", "case_insensitive": true}
   - length: {terms: 0}
 
+---
+"Test search after keyword field":
+  - do:
+      termsenum:
+        index:  test_k
+        body:  {"field": "foo", "string":"b", "search_after":"baz"}
+  - length: {terms: 0}
+
+  - do:
+      termsenum:
+        index:  test_k
+        body:  {"field": "foo", "string":"b", "search_after":"bar_k"}
+  - length: {terms: 0}
+
+  - do:
+      termsenum:
+        index:  test_k
+        body:  {"field": "foo", "string":"b", "search_after":"baa"}
+  - length: {terms: 1}
+
+  - do:
+      termsenum:
+        index:  test_k
+        body:  {"field": "foo", "string":"", "search_after":"baa"}
+  - length: {terms: 1}
+
+
+---
+"Test search after flattened field":
+  - do:
+      termsenum:
+        index:  test_f
+        body:  {"field": "foo.bar", "string":"b", "search_after":"baz"}
+  - length: {terms: 0}
+
+  - do:
+      termsenum:
+        index:  test_f
+        body:  {"field": "foo.bar", "string":"b", "search_after":"bar_f"}
+  - length: {terms: 0}
+
+  - do:
+      termsenum:
+        index:  test_f
+        body:  {"field": "foo.bar", "string":"b", "search_after":"baa"}
+  - length: {terms: 1}
+
+  - do:
+      termsenum:
+        index:  test_f
+        body:  {"field": "foo.bar", "string":"", "search_after":"baa"}
+  - length: {terms: 1}
+
+---
+"Test search after constant keyword field":
+  - do:
+      termsenum:
+        index:  test_ck
+        body:  {"field": "foo", "string":"b", "search_after":"baz"}
+  - length: {terms: 0}
+
+  - do:
+      termsenum:
+        index:  test_ck
+        body:  {"field": "foo", "string":"b", "search_after":"bar_ck"}
+  - length: {terms: 0}
+
+  - do:
+      termsenum:
+        index:  test_ck
+        body:  {"field": "foo", "string":"b", "search_after":"baa"}
+  - length: {terms: 1}
+
+  - do:
+      termsenum:
+        index:  test_ck
+        body:  {"field": "foo", "string":"", "search_after":"baa"}
+  - length: {terms: 1}
 
 ---
 "Test index filtering":