Browse Source

ESQL: Limit size of query (#117898) (#118024)

Queries bigger than a mb tend to take a lot of memory. In the worse case
it's an astounding amount of memory.
Nik Everett 10 months ago
parent
commit
570a8cb9f7

+ 5 - 0
docs/changelog/117898.yaml

@@ -0,0 +1,5 @@
+pr: 117898
+summary: Limit size of query
+area: ES|QL
+type: bug
+issues: []

+ 21 - 0
test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackIT.java

@@ -194,6 +194,13 @@ public class HeapAttackIT extends ESRestTestCase {
         );
     }
 
+    private void assertParseFailure(ThrowingRunnable r) throws IOException {
+        ResponseException e = expectThrows(ResponseException.class, r);
+        Map<?, ?> map = responseAsMap(e.getResponse());
+        logger.info("expected parse failure {}", map);
+        assertMap(map, matchesMap().entry("status", 400).entry("error", matchesMap().extraOk().entry("type", "parsing_exception")));
+    }
+
     private Response sortByManyLongs(int count) throws IOException {
         logger.info("sorting by {} longs", count);
         return query(makeSortByManyLongs(count).toString(), null);
@@ -318,6 +325,13 @@ public class HeapAttackIT extends ESRestTestCase {
         assertManyStrings(resp, strings);
     }
 
+    /**
+     * Fails to parse a huge huge query.
+     */
+    public void testHugeHugeManyConcatFromRow() throws IOException {
+        assertParseFailure(() -> manyConcat("ROW a=9999, b=9999, c=9999, d=9999, e=9999", 50000));
+    }
+
     /**
      * Tests that generate many moderately long strings.
      */
@@ -378,6 +392,13 @@ public class HeapAttackIT extends ESRestTestCase {
         assertManyStrings(resp, strings);
     }
 
+    /**
+     * Fails to parse a huge huge query.
+     */
+    public void testHugeHugeManyRepeatFromRow() throws IOException {
+        assertParseFailure(() -> manyRepeat("ROW a = 99", 100000));
+    }
+
     /**
      * Tests that generate many moderately long strings.
      */

+ 16 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java

@@ -33,6 +33,15 @@ public class EsqlParser {
 
     private static final Logger log = LogManager.getLogger(EsqlParser.class);
 
+    /**
+     * Maximum number of characters in an ESQL query. Antlr may parse the entire
+     * query into tokens to make the choices, buffering the world. There's a lot we
+     * can do in the grammar to prevent that, but let's be paranoid and assume we'll
+     * fail at preventing antlr from slurping in the world. Instead, let's make sure
+     * that the world just isn't that big.
+     */
+    public static final int MAX_LENGTH = 1_000_000;
+
     private EsqlConfig config = new EsqlConfig();
 
     public EsqlConfig config() {
@@ -60,8 +69,14 @@ public class EsqlParser {
         Function<EsqlBaseParser, ParserRuleContext> parseFunction,
         BiFunction<AstBuilder, ParserRuleContext, T> result
     ) {
+        if (query.length() > MAX_LENGTH) {
+            throw new org.elasticsearch.xpack.esql.core.ParsingException(
+                "ESQL statement is too large [{} characters > {}]",
+                query.length(),
+                MAX_LENGTH
+            );
+        }
         try {
-            // new CaseChangingCharStream()
             EsqlBaseLexer lexer = new EsqlBaseLexer(CharStreams.fromString(query));
 
             lexer.removeErrorListeners();

+ 8 - 0
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/ParsingTests.java

@@ -103,6 +103,14 @@ public class ParsingTests extends ESTestCase {
         logger.info("Wrote to file: {}", file);
     }
 
+    public void testTooBigQuery() {
+        StringBuilder query = new StringBuilder("FROM foo | EVAL a = a");
+        while (query.length() < EsqlParser.MAX_LENGTH) {
+            query.append(", a = CONCAT(a, a)");
+        }
+        assertEquals("-1:0: ESQL statement is too large [1000011 characters > 1000000]", error(query.toString()));
+    }
+
     private String functionName(EsqlFunctionRegistry registry, Expression functionCall) {
         for (FunctionDefinition def : registry.listFunctions()) {
             if (functionCall.getClass().equals(def.clazz())) {