Răsfoiți Sursa

Limit _terms_enum prefix size (#97488)

Currently the prefix size of the _terms_enum endpoint are not limited in size.
Since they run against a keyword field and build automata, this can lead to high memory
consumption and the danger of running OOM. This change check the size of the prefix
early in the rest request and throw a validation error in case it exceeds
IndexWriter.MAX_TERM_LENGTH, which is the same limit we apply to the length of
keyword field values anyway, so this comes at no loss in functionality.

Closes #96572
Christoph Büscher 2 ani în urmă
părinte
comite
192597d795

+ 6 - 0
docs/changelog/97488.yaml

@@ -0,0 +1,6 @@
+pr: 97488
+summary: Limit `_terms_enum` prefix size
+area: Search
+type: enhancement
+issues:
+ - 96572

+ 3 - 0
docs/reference/search/terms-enum.asciidoc

@@ -81,6 +81,9 @@ Which field to match
 The string to match at the start of indexed terms. If not provided, all terms in the field
 are considered.
 
+NOTE: The prefix string cannot be larger than the larges possible keyword value, which is
+Lucene's term byte-length limit of `32766`
+
 [[terms-enum-size-param]]
 `size`::
 (Optional, integer)

+ 14 - 0
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/termsenum/action/TermsEnumRequest.java

@@ -6,6 +6,7 @@
  */
 package org.elasticsearch.xpack.core.termsenum.action;
 
+import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.action.ActionRequestValidationException;
 import org.elasticsearch.action.ValidateActions;
 import org.elasticsearch.action.search.SearchRequest;
@@ -24,6 +25,8 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.Objects;
 
+import static org.apache.lucene.index.IndexWriter.MAX_TERM_LENGTH;
+
 /**
  * A request to gather terms for a given field matching a string prefix
  */
@@ -110,6 +113,17 @@ public class TermsEnumRequest extends BroadcastRequest<TermsEnumRequest> impleme
     @Override
     public ActionRequestValidationException validate() {
         ActionRequestValidationException validationException = super.validate();
+        if (string != null) {
+            // length calculation using BytesRef length like in KeywordFieldMapper to check against MAX_TERM_LENGTH
+            if (new BytesRef(string).length > MAX_TERM_LENGTH) {
+                validationException = ValidateActions.addValidationError(
+                    "prefix string larger than "
+                        + MAX_TERM_LENGTH
+                        + " characters, which is the maximum allowed term length for keyword fields.",
+                    validationException
+                );
+            }
+        }
         if (field == null) {
             validationException = ValidateActions.addValidationError("field cannot be null", validationException);
         }

+ 37 - 0
x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/termsenum/TermsEnumRequestTests.java

@@ -7,11 +7,14 @@
 
 package org.elasticsearch.xpack.core.termsenum;
 
+import org.apache.lucene.index.IndexWriter;
+import org.elasticsearch.action.ActionRequestValidationException;
 import org.elasticsearch.action.support.IndicesOptions;
 import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
 import org.elasticsearch.common.io.stream.Writeable;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.ArrayUtils;
+import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.indices.IndicesModule;
 import org.elasticsearch.search.SearchModule;
@@ -113,4 +116,38 @@ public class TermsEnumRequestTests extends AbstractXContentSerializingTestCase<T
         mutator.accept(mutatedInstance);
         return mutatedInstance;
     }
+
+    public void testValidation() {
+        TermsEnumRequest request = new TermsEnumRequest();
+        ActionRequestValidationException validationException = request.validate();
+        assertEquals(1, validationException.validationErrors().size());
+        assertEquals("field cannot be null", validationException.validationErrors().get(0));
+
+        request.field("field");
+        validationException = request.validate();
+        assertNull(validationException);
+
+        request.timeout(null);
+        validationException = request.validate();
+        assertEquals(1, validationException.validationErrors().size());
+        assertEquals("Timeout cannot be null", validationException.validationErrors().get(0));
+
+        request.timeout(TimeValue.timeValueSeconds(61));
+        validationException = request.validate();
+        assertEquals(1, validationException.validationErrors().size());
+        assertEquals("Timeout cannot be > 1 minute", validationException.validationErrors().get(0));
+
+        request.timeout(TimeValue.timeValueSeconds(10));
+        request.string(randomAlphaOfLengthBetween(1, IndexWriter.MAX_TERM_LENGTH));
+        validationException = request.validate();
+        assertNull(validationException);
+
+        request.string(randomAlphaOfLengthBetween(IndexWriter.MAX_TERM_LENGTH + 1, IndexWriter.MAX_TERM_LENGTH + 100));
+        validationException = request.validate();
+        assertEquals(1, validationException.validationErrors().size());
+        assertEquals(
+            "prefix string larger than 32766 characters, which is the maximum allowed term length for keyword fields.",
+            validationException.validationErrors().get(0)
+        );
+    }
 }