فهرست منبع

[8.19] ES|QL: Check if cluster aliases and index patterns are valid before executing query (#122497) (#130021)

* ES|QL: Check if cluster aliases and index patterns are valid before executing query (#122497)

ES|QL index patterns validation: Ensure that the patterns in the query are syntactically and semantically valid

---------

Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
Co-authored-by: Andrei Stefan <astefan@users.noreply.github.com>
Co-authored-by: Alexander Spies <alexander.spies@elastic.co>
(cherry picked from commit c94c021d0e2695f571a1fc841c46ee43f2d5a3ae)

# Conflicts:
#	x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java

* Refactor dropping asterisk from invalid chars

* Assert error message and drop invalid chars due to randomisation issue

* Fix missing sort in `Strings#INVALID_FILENAME_CHARS` and assert invalid
chars in error string
Pawan Kartik 3 ماه پیش
والد
کامیت
e867dcdb61

+ 5 - 0
docs/changelog/122497.yaml

@@ -0,0 +1,5 @@
+pr: 122497
+summary: Check if index patterns conform to valid format before validation
+area: CCS
+type: enhancement
+issues: []

+ 1 - 0
server/src/main/java/org/elasticsearch/common/Strings.java

@@ -282,6 +282,7 @@ public class Strings {
     static final Set<Character> INVALID_CHARS = Set.of('\\', '/', '*', '?', '"', '<', '>', '|', ' ', ',');
 
     public static final String INVALID_FILENAME_CHARS = INVALID_CHARS.stream()
+        .sorted()
         .map(c -> "'" + c + "'")
         .collect(Collectors.joining(",", "[", "]"));
 

+ 192 - 69
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/IdentifierBuilder.java

@@ -20,17 +20,22 @@ import org.elasticsearch.xpack.esql.parser.EsqlBaseParser.IdentifierContext;
 import org.elasticsearch.xpack.esql.parser.EsqlBaseParser.IndexStringContext;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 
 import static org.elasticsearch.cluster.metadata.IndexNameExpressionResolver.SelectorResolver.SELECTOR_SEPARATOR;
 import static org.elasticsearch.transport.RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR;
-import static org.elasticsearch.transport.RemoteClusterAware.isRemoteIndexName;
+import static org.elasticsearch.transport.RemoteClusterAware.splitIndexName;
 import static org.elasticsearch.xpack.esql.core.util.StringUtils.EXCLUSION;
 import static org.elasticsearch.xpack.esql.core.util.StringUtils.WILDCARD;
 import static org.elasticsearch.xpack.esql.parser.ParserUtils.source;
 
 abstract class IdentifierBuilder extends AbstractBuilder {
 
+    private static final String BLANK_INDEX_ERROR_MESSAGE = "Blank index specified in index pattern";
+
+    private static final String INVALID_ESQL_CHARS = Strings.INVALID_FILENAME_CHARS.replace("'*',", "");
+
     @Override
     public String visitIdentifier(IdentifierContext ctx) {
         return ctx == null ? null : unquoteIdentifier(ctx.QUOTED_IDENTIFIER(), ctx.UNQUOTED_IDENTIFIER());
@@ -88,39 +93,21 @@ abstract class IdentifierBuilder extends AbstractBuilder {
             String indexPattern = c.unquotedIndexString() != null ? c.unquotedIndexString().getText() : visitIndexString(c.indexString());
             String clusterString = visitClusterString(c.clusterString());
             String selectorString = visitSelectorString(c.selectorString());
-            // skip validating index on remote cluster, because the behavior of remote cluster is not consistent with local cluster
-            // For example, invalid#index is an invalid index name, however FROM *:invalid#index does not return an error
-            if (clusterString == null) {
-                hasSeenStar.set(indexPattern.contains(WILDCARD) || hasSeenStar.get());
-                validateIndexPattern(indexPattern, c, hasSeenStar.get());
-                // Other instances of Elasticsearch may have differing selectors so only validate selector string if remote cluster
-                // string is unset
-                if (selectorString != null) {
-                    try {
-                        // Ensures that the selector provided is one of the valid kinds
-                        IndexNameExpressionResolver.SelectorResolver.validateIndexSelectorString(indexPattern, selectorString);
-                    } catch (InvalidIndexNameException e) {
-                        throw new ParsingException(e, source(c), e.getMessage());
-                    }
-                }
-            } else {
-                validateClusterString(clusterString, c);
-                // Do not allow selectors on remote cluster expressions until they are supported
-                if (selectorString != null) {
-                    throwOnMixingSelectorWithCluster(reassembleIndexName(clusterString, indexPattern, selectorString), c);
-                }
-            }
+
+            hasSeenStar.set(hasSeenStar.get() || indexPattern.contains(WILDCARD));
+            validate(clusterString, indexPattern, selectorString, c, hasSeenStar.get());
             patterns.add(reassembleIndexName(clusterString, indexPattern, selectorString));
         });
         return Strings.collectionToDelimitedString(patterns, ",");
     }
 
+    private static void throwInvalidIndexNameException(String indexPattern, String message, EsqlBaseParser.IndexPatternContext ctx) {
+        var ie = new InvalidIndexNameException(indexPattern, message);
+        throw new ParsingException(ie, source(ctx), ie.getMessage());
+    }
+
     private static void throwOnMixingSelectorWithCluster(String indexPattern, EsqlBaseParser.IndexPatternContext c) {
-        InvalidIndexNameException ie = new InvalidIndexNameException(
-            indexPattern,
-            "Selectors are not yet supported on remote cluster patterns"
-        );
-        throw new ParsingException(ie, source(c), ie.getMessage());
+        throwInvalidIndexNameException(indexPattern, "Selectors are not yet supported on remote cluster patterns", c);
     }
 
     private static String reassembleIndexName(String clusterString, String indexPattern, String selectorString) {
@@ -144,59 +131,195 @@ abstract class IdentifierBuilder extends AbstractBuilder {
         }
     }
 
-    private static void validateIndexPattern(String indexPattern, EsqlBaseParser.IndexPatternContext ctx, boolean hasSeenStar) {
-        // multiple index names can be in the same double quote, e.g. indexPattern = "idx1, *, -idx2"
-        String[] indices = indexPattern.split(",");
-        boolean hasExclusion = false;
-        for (String index : indices) {
-            // Strip spaces off first because validation checks are not written to handle them
-            index = index.strip();
-            if (isRemoteIndexName(index)) { // skip the validation if there is remote cluster
-                // Ensure that there are no selectors as they are not yet supported
-                if (index.contains(SELECTOR_SEPARATOR)) {
-                    throwOnMixingSelectorWithCluster(index, ctx);
-                }
-                continue;
+    /**
+     * Takes the parsed constituent strings and validates them.
+     * @param clusterString Name of the remote cluster. Can be null.
+     * @param indexPattern Name of the index or pattern; can also have multiple patterns in case of quoting,
+     *                     e.g. {@code FROM """index*,-index1"""}.
+     * @param selectorString Selector string, i.e. "::data" or "::failures". Can be null.
+     * @param ctx Index Pattern Context for generating error messages with offsets.
+     * @param hasSeenStar If we've seen an asterisk so far.
+     */
+    private static void validate(
+        String clusterString,
+        String indexPattern,
+        String selectorString,
+        EsqlBaseParser.IndexPatternContext ctx,
+        boolean hasSeenStar
+    ) {
+        /*
+         * At this point, only 3 formats are possible:
+         * "index_pattern(s)",
+         * remote:index_pattern, and,
+         * index_pattern::selector_string.
+         *
+         * The grammar prohibits remote:"index_pattern(s)" or "index_pattern(s)"::selector_string as they're
+         * partially quoted. So if either of cluster string or selector string are present, there's no need
+         * to split the pattern by comma since comma requires partial quoting.
+         */
+
+        String[] patterns;
+        if (clusterString == null && selectorString == null) {
+            // Pattern could be quoted or is singular like "index_name".
+            patterns = indexPattern.split(",", -1);
+        } else {
+            // Either of cluster string or selector string is present. Pattern is unquoted.
+            patterns = new String[] { indexPattern };
+        }
+
+        patterns = Arrays.stream(patterns).map(String::strip).toArray(String[]::new);
+        if (Arrays.stream(patterns).anyMatch(String::isBlank)) {
+            throwInvalidIndexNameException(indexPattern, BLANK_INDEX_ERROR_MESSAGE, ctx);
+        }
+
+        // Edge case: happens when all the index names in a pattern are empty like "FROM ",,,,,"".
+        if (patterns.length == 0) {
+            throwInvalidIndexNameException(indexPattern, BLANK_INDEX_ERROR_MESSAGE, ctx);
+        } else if (patterns.length == 1) {
+            // Pattern is either an unquoted string or a quoted string with a single index (no comma sep).
+            validateSingleIndexPattern(clusterString, patterns[0], selectorString, ctx, hasSeenStar);
+        } else {
+            /*
+             * Presence of multiple patterns requires a comma and comma requires quoting. If quoting is present,
+             * cluster string and selector string cannot be present; they need to be attached within the quoting.
+             * So we attempt to extract them later.
+             */
+            for (String pattern : patterns) {
+                validateSingleIndexPattern(null, pattern, null, ctx, hasSeenStar);
             }
+        }
+    }
+
+    /**
+     * Validates the constituent strings. Will extract the cluster string and/or selector string from the index
+     * name if clubbed together inside a quoted string.
+     *
+     * @param clusterString Name of the remote cluster. Can be null.
+     * @param indexName Name of the index.
+     * @param selectorString Selector string, i.e. "::data" or "::failures". Can be null.
+     * @param ctx Index Pattern Context for generating error messages with offsets.
+     * @param hasSeenStar If we've seen an asterisk so far.
+     */
+    private static void validateSingleIndexPattern(
+        String clusterString,
+        String indexName,
+        String selectorString,
+        EsqlBaseParser.IndexPatternContext ctx,
+        boolean hasSeenStar
+    ) {
+        indexName = indexName.strip();
+
+        /*
+         * Precedence:
+         * 1. Cannot mix cluster and selector strings.
+         * 2. Cluster string must be valid.
+         * 3. Index name must be valid.
+         * 4. Selector string must be valid.
+         *
+         * Since cluster string and/or selector string can be clubbed with the index name, we must try to
+         * manually extract them before we attempt to do #2, #3, and #4.
+         */
+
+        // It is possible to specify a pattern like "remote_cluster:index_name". Try to extract such details from the index string.
+        if (clusterString == null && selectorString == null) {
             try {
-                Tuple<String, String> splitPattern = IndexNameExpressionResolver.splitSelectorExpression(index);
-                if (splitPattern.v2() != null) {
-                    index = splitPattern.v1();
-                }
-            } catch (InvalidIndexNameException e) {
-                // throws exception if the selector expression is invalid. Selector resolution does not complain about exclusions
+                var split = splitIndexName(indexName);
+                clusterString = split[0];
+                indexName = split[1];
+            } catch (IllegalArgumentException e) {
                 throw new ParsingException(e, source(ctx), e.getMessage());
             }
-            hasSeenStar = index.contains(WILDCARD) || hasSeenStar;
-            index = index.replace(WILDCARD, "").strip();
-            if (index.isBlank()) {
-                continue;
-            }
-            hasExclusion = index.startsWith(EXCLUSION);
-            index = removeExclusion(index);
-            String tempName;
-            try {
-                // remove the exclusion outside of <>, from index names with DateMath expression,
-                // e.g. -<-logstash-{now/d}> becomes <-logstash-{now/d}> before calling resolveDateMathExpression
-                tempName = IndexNameExpressionResolver.resolveDateMathExpression(index);
-            } catch (ElasticsearchParseException e) {
-                // throws exception if the DateMath expression is invalid, resolveDateMathExpression does not complain about exclusions
-                throw new ParsingException(e, source(ctx), e.getMessage());
+        }
+
+        // At the moment, selector strings for remote indices is not allowed.
+        if (clusterString != null && selectorString != null) {
+            throwOnMixingSelectorWithCluster(reassembleIndexName(clusterString, indexName, selectorString), ctx);
+        }
+
+        // Validation in the right precedence.
+        if (clusterString != null) {
+            clusterString = clusterString.strip();
+            validateClusterString(clusterString, ctx);
+        }
+
+        /*
+         * It is possible for selector string to be attached to the index: "index_name::selector_string".
+         * Try to extract the selector string.
+         */
+        try {
+            Tuple<String, String> splitPattern = IndexNameExpressionResolver.splitSelectorExpression(indexName);
+            if (splitPattern.v2() != null) {
+                // Cluster string too was clubbed with the index name like selector string.
+                if (clusterString != null) {
+                    throwOnMixingSelectorWithCluster(reassembleIndexName(clusterString, splitPattern.v1(), splitPattern.v2()), ctx);
+                } else {
+                    // We've seen a selectorString. Use it.
+                    selectorString = splitPattern.v2();
+                }
             }
-            hasExclusion = tempName.startsWith(EXCLUSION) || hasExclusion;
-            index = tempName.equals(index) ? index : removeExclusion(tempName);
+
+            indexName = splitPattern.v1();
+        } catch (InvalidIndexNameException e) {
+            throw new ParsingException(e, source(ctx), e.getMessage());
+        }
+
+        resolveAndValidateIndex(indexName, ctx, hasSeenStar);
+        if (selectorString != null) {
+            selectorString = selectorString.strip();
             try {
-                MetadataCreateIndexService.validateIndexOrAliasName(index, InvalidIndexNameException::new);
+                // Ensures that the selector provided is one of the valid kinds.
+                IndexNameExpressionResolver.SelectorResolver.validateIndexSelectorString(indexName, selectorString);
             } catch (InvalidIndexNameException e) {
-                // ignore invalid index name if it has exclusions and there is an index with wildcard before it
-                if (hasSeenStar && hasExclusion) {
-                    continue;
-                }
                 throw new ParsingException(e, source(ctx), e.getMessage());
             }
         }
     }
 
+    private static void resolveAndValidateIndex(String index, EsqlBaseParser.IndexPatternContext ctx, boolean hasSeenStar) {
+        // If index name is blank without any replacements, it was likely blank right from the beginning and is invalid.
+        if (index.isBlank()) {
+            throwInvalidIndexNameException(index, BLANK_INDEX_ERROR_MESSAGE, ctx);
+        }
+
+        hasSeenStar = hasSeenStar || index.contains(WILDCARD);
+        index = index.replace(WILDCARD, "").strip();
+        if (index.isBlank()) {
+            return;
+        }
+        var hasExclusion = index.startsWith(EXCLUSION);
+        index = removeExclusion(index);
+        String tempName;
+        try {
+            // remove the exclusion outside of <>, from index names with DateMath expression,
+            // e.g. -<-logstash-{now/d}> becomes <-logstash-{now/d}> before calling resolveDateMathExpression
+            tempName = IndexNameExpressionResolver.resolveDateMathExpression(index);
+        } catch (ElasticsearchParseException e) {
+            // throws exception if the DateMath expression is invalid, resolveDateMathExpression does not complain about exclusions
+            throw new ParsingException(e, source(ctx), e.getMessage());
+        }
+        hasExclusion = tempName.startsWith(EXCLUSION) || hasExclusion;
+        index = tempName.equals(index) ? index : removeExclusion(tempName);
+        try {
+            MetadataCreateIndexService.validateIndexOrAliasName(index, InvalidIndexNameException::new);
+        } catch (InvalidIndexNameException e) {
+            // ignore invalid index name if it has exclusions and there is an index with wildcard before it
+            if (hasSeenStar && hasExclusion) {
+                return;
+            }
+
+            /*
+             * We only modify this particular message because it mentions '*' as an invalid char.
+             * However, we do allow asterisk in the index patterns: wildcarded patterns. Let's not
+             * mislead the user by mentioning this char in the error message.
+             */
+            if (e.getMessage().contains("must not contain the following characters")) {
+                throwInvalidIndexNameException(index, "must not contain the following characters " + INVALID_ESQL_CHARS, ctx);
+            }
+
+            throw new ParsingException(e, source(ctx), e.getMessage());
+        }
+    }
+
     private static String removeExclusion(String indexPattern) {
         return indexPattern.charAt(0) == EXCLUSION.charAt(0) ? indexPattern.substring(1) : indexPattern;
     }

+ 206 - 8
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java

@@ -484,10 +484,12 @@ public class StatementParserTests extends AbstractStatementParserTests {
                      "foo, test-*, abc, xyz", test123
                 """);
             assertStringAsIndexPattern("foo,test,xyz", command + " foo,   test,xyz");
+            assertStringAsIndexPattern("<logstash-{now/M{yyyy.MM}}>", command + " <logstash-{now/M{yyyy.MM}}>");
             assertStringAsIndexPattern(
                 "<logstash-{now/M{yyyy.MM}}>,<logstash-{now/d{yyyy.MM.dd|+12:00}}>",
                 command + " <logstash-{now/M{yyyy.MM}}>, \"<logstash-{now/d{yyyy.MM.dd|+12:00}}>\""
             );
+            assertStringAsIndexPattern("<logstash-{now/d{yyyy.MM.dd|+12:00}}>", command + " \"<logstash-{now/d{yyyy.MM.dd|+12:00}}>\"");
             assertStringAsIndexPattern(
                 "-<logstash-{now/M{yyyy.MM}}>,-<-logstash-{now/M{yyyy.MM}}>,"
                     + "-<logstash-{now/d{yyyy.MM.dd|+12:00}}>,-<-logstash-{now/d{yyyy.MM.dd|+12:00}}>",
@@ -509,18 +511,28 @@ public class StatementParserTests extends AbstractStatementParserTests {
                     ? "mismatched input '\"index|pattern\"' expecting UNQUOTED_SOURCE"
                     : "missing UNQUOTED_SOURCE at '\"index|pattern\"'"
             );
-            assertStringAsIndexPattern("*:index|pattern", command + " \"*:index|pattern\"");
+            // Entire index pattern is quoted. So it's not a parse error but a semantic error where the index name
+            // is invalid.
+            expectError(command + " \"*:index|pattern\"", "Invalid index name [index|pattern], must not contain the following characters");
             clusterAndIndexAsIndexPattern(command, "cluster:index");
             clusterAndIndexAsIndexPattern(command, "cluster:.index");
             clusterAndIndexAsIndexPattern(command, "cluster*:index*");
-            clusterAndIndexAsIndexPattern(command, "cluster*:<logstash-{now/D}>*");// this is not a valid pattern, * should be inside <>
-            clusterAndIndexAsIndexPattern(command, "cluster*:<logstash-{now/D}*>");
+            clusterAndIndexAsIndexPattern(command, "cluster*:<logstash-{now/d}>*");
             clusterAndIndexAsIndexPattern(command, "cluster*:*");
             clusterAndIndexAsIndexPattern(command, "*:index*");
             clusterAndIndexAsIndexPattern(command, "*:*");
+            expectError(
+                command + " \"cluster:index|pattern\"",
+                "Invalid index name [index|pattern], must not contain the following characters"
+            );
+            expectError(
+                command + " *:\"index|pattern\"",
+                command.startsWith("FROM") ? "expecting UNQUOTED_SOURCE" : "missing UNQUOTED_SOURCE"
+            );
             if (EsqlCapabilities.Cap.INDEX_COMPONENT_SELECTORS.isEnabled()) {
                 assertStringAsIndexPattern("foo::data", command + " foo::data");
                 assertStringAsIndexPattern("foo::failures", command + " foo::failures");
+                expectErrorWithLineNumber(command + " *,\"-foo\"::data", "*,-foo::data", lineNumber, "mismatched input '::'");
                 expectErrorWithLineNumber(
                     command + " cluster:\"foo::data\"",
                     " cluster:\"foo::data\"",
@@ -669,9 +681,8 @@ public class StatementParserTests extends AbstractStatementParserTests {
                         : "missing UNQUOTED_SOURCE at '\"foo\"'"
                 );
 
-                // TODO: Edge case that will be invalidated in follow up (https://github.com/elastic/elasticsearch/issues/122651)
-                // expectDoubleColonErrorWithLineNumber(command, "\"cluster:foo\"::data", parseLineNumber + 13);
-                // expectDoubleColonErrorWithLineNumber(command, "\"cluster:foo\"::failures", parseLineNumber + 13);
+                expectDoubleColonErrorWithLineNumber(command, "\"cluster:foo\"::data", parseLineNumber + 13);
+                expectDoubleColonErrorWithLineNumber(command, "\"cluster:foo\"::failures", parseLineNumber + 13);
 
                 expectErrorWithLineNumber(
                     command,
@@ -776,16 +787,33 @@ public class StatementParserTests extends AbstractStatementParserTests {
             clustersAndIndices(command, "index*", "-index#pattern");
             clustersAndIndices(command, "*", "-<--logstash-{now/M{yyyy.MM}}>");
             clustersAndIndices(command, "index*", "-<--logstash#-{now/M{yyyy.MM}}>");
+            expectInvalidIndexNameErrorWithLineNumber(command, "*, index#pattern", lineNumber, "index#pattern", "must not contain '#'");
+            expectInvalidIndexNameErrorWithLineNumber(
+                command,
+                "index*, index#pattern",
+                indexStarLineNumber,
+                "index#pattern",
+                "must not contain '#'"
+            );
+            expectDateMathErrorWithLineNumber(command, "cluster*:<logstash-{now/D}*>", commands.get(command), dateMathError);
             expectDateMathErrorWithLineNumber(command, "*, \"-<-logstash-{now/D}>\"", lineNumber, dateMathError);
             expectDateMathErrorWithLineNumber(command, "*, -<-logstash-{now/D}>", lineNumber, dateMathError);
             expectDateMathErrorWithLineNumber(command, "\"*, -<-logstash-{now/D}>\"", commands.get(command), dateMathError);
             expectDateMathErrorWithLineNumber(command, "\"*, -<-logst:ash-{now/D}>\"", commands.get(command), dateMathError);
             if (EsqlCapabilities.Cap.INDEX_COMPONENT_SELECTORS.isEnabled()) {
-                clustersAndIndices(command, "*", "-index#pattern::data");
-                clustersAndIndices(command, "*", "-index#pattern::data");
+                clustersAndIndices(command, "*", "-index::data");
+                clustersAndIndices(command, "*", "-index::failures");
+                clustersAndIndices(command, "*", "-index*pattern::data");
+                clustersAndIndices(command, "*", "-index*pattern::failures");
+
+                // This is by existing design: refer to the comment in IdentifierBuilder#resolveAndValidateIndex() in the last
+                // catch clause. If there's an index with a wildcard before an invalid index, we don't error out.
                 clustersAndIndices(command, "index*", "-index#pattern::data");
                 clustersAndIndices(command, "*", "-<--logstash-{now/M{yyyy.MM}}>::data");
                 clustersAndIndices(command, "index*", "-<--logstash#-{now/M{yyyy.MM}}>::data");
+
+                expectError(command + "index1,<logstash-{now+-/d}>", "unit [-] not supported for date math [+-/d]");
+
                 // Throw on invalid date math
                 var partialQuotingBeginOffset = (command.startsWith("FROM") ? 6 : 9) + 25;
                 expectDateMathErrorWithLineNumber(
@@ -3200,6 +3228,128 @@ public class StatementParserTests extends AbstractStatementParserTests {
         assertThat(joinType.coreJoin().joinName(), equalTo("LEFT OUTER"));
     }
 
+    public void testInvalidFromPatterns() {
+        var sourceCommands = new String[] { "FROM", "METRICS" };
+        var indexIsBlank = "Blank index specified in index pattern";
+        var remoteIsEmpty = "remote part is empty";
+        var invalidDoubleColonUsage = "invalid usage of :: separator";
+
+        expectError(randomFrom(sourceCommands) + " \"\"", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \" \"", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \",,,\"", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \",,, \"", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \", , ,,\"", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \",,,\",*", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \"*,\"", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \"*,,,\"", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \"index1,,,,\"", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \"index1,index2,,\"", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \"index1,<-+^,index2\",*", "must not contain the following characters");
+        expectError(randomFrom(sourceCommands) + " \"\",*", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \"*: ,*,\"", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \"*: ,*,\",validIndexName", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \"\", \" \", \"  \",validIndexName", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \"index1\", \"index2\", \"  ,index3,index4\"", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \"index1,index2,,index3\"", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \"index1,index2,  ,index3\"", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \"*, \"", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \"*\", \"\"", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \"*\", \" \"", indexIsBlank);
+        expectError(randomFrom(sourceCommands) + " \"*\", \":index1\"", remoteIsEmpty);
+        expectError(randomFrom(sourceCommands) + " \"index1,*,:index2\"", remoteIsEmpty);
+        expectError(randomFrom(sourceCommands) + " \"*\", \"::data\"", remoteIsEmpty);
+        expectError(randomFrom(sourceCommands) + " \"*\", \"::failures\"", remoteIsEmpty);
+        expectError(randomFrom(sourceCommands) + " \"*,index1::\"", invalidDoubleColonUsage);
+        expectError(randomFrom(sourceCommands) + " \"*\", index1, index2, \"index3:: \"", invalidDoubleColonUsage);
+        expectError(randomFrom(sourceCommands) + " \"*,index1::*\"", invalidDoubleColonUsage);
+    }
+
+    public void testInvalidPatternsWithIntermittentQuotes() {
+        // There are 3 ways of crafting invalid index patterns that conforms to the grammar defined through ANTLR.
+        // 1. Not quoting the pattern,
+        // 2. Quoting individual patterns ("index1", "index2", ...), and,
+        // 3. Clubbing all the patterns into a single quoted string ("index1,index2,...).
+        //
+        // Note that in these tests, we unquote a pattern and then quote it immediately.
+        // This is because when randomly generating an index pattern, it may look like: "foo"::data.
+        // To convert it into a quoted string like "foo::data", we need to unquote and then re-quote it.
+
+        // Prohibited char in a quoted cross cluster index pattern should result in an error.
+        {
+            var randomIndex = randomIndexPattern();
+            // Select an invalid char to sneak in.
+            // Note: some chars like '|' and '"' are excluded to generate a proper invalid name.
+            Character[] invalidChars = { ' ', '/', '<', '>', '?' };
+            var randomInvalidChar = randomFrom(invalidChars);
+
+            // Construct the new invalid index pattern.
+            var invalidIndexName = "foo" + randomInvalidChar + "bar";
+            var remoteIndexWithInvalidChar = quote(randomIdentifier() + ":" + invalidIndexName);
+            var query = "FROM " + randomIndex + "," + remoteIndexWithInvalidChar;
+            expectError(
+                query,
+                "Invalid index name ["
+                    + invalidIndexName
+                    + "], must not contain the following characters [' ','\"',',','/','<','>','?','\\','|']"
+            );
+        }
+
+        // Colon outside a quoted string should result in an ANTLR error: a comma is expected.
+        {
+            var randomIndex = randomIndexPattern();
+
+            // In the form of: "*|cluster alias:random string".
+            var malformedClusterAlias = quote((randomBoolean() ? "*" : randomIdentifier()) + ":" + randomIdentifier());
+
+            // We do not generate a cross cluster pattern or else we'd be getting a different error (which is tested in
+            // the next test).
+            var remoteIndex = quote(unquoteIndexPattern(randomIndexPattern(without(CROSS_CLUSTER))));
+            // Format: FROM <some index>, "<cluster alias: random string>":<remote index>
+            var query = "FROM " + randomIndex + "," + malformedClusterAlias + ":" + remoteIndex;
+            expectError(query, " mismatched input ':'");
+        }
+
+        // If an explicit cluster string is present, then we expect an unquoted string next.
+        {
+            var randomIndex = randomIndexPattern();
+            var remoteClusterAlias = randomBoolean() ? "*" : randomIdentifier();
+            // In the form of: random string:random string.
+            var malformedRemoteIndex = quote(unquoteIndexPattern(randomIndexPattern(CROSS_CLUSTER)));
+            // Format: FROM <some index>, <cluster alias>:"random string:random string"
+            var query = "FROM " + randomIndex + "," + remoteClusterAlias + ":" + malformedRemoteIndex;
+            // Since "random string:random string" is partially quoted, expect a ANTLR's parse error.
+            expectError(query, "expecting UNQUOTED_SOURCE");
+        }
+
+        if (EsqlCapabilities.Cap.INDEX_COMPONENT_SELECTORS.isEnabled()) {
+            // If a stream in on a remote and the pattern is entirely quoted, we should be able to validate it.
+            // Note: invalid selector syntax is covered in a different test.
+            {
+                var fromPattern = randomIndexPattern();
+                var malformedIndexSelectorPattern = quote(
+                    (randomIdentifier()) + ":" + unquoteIndexPattern(randomIndexPattern(INDEX_SELECTOR, without(CROSS_CLUSTER)))
+                );
+                // Format: FROM <some index>, "<cluster alias>:<some index>::<data|failures>"
+                var query = "FROM " + fromPattern + "," + malformedIndexSelectorPattern;
+                expectError(query, "Selectors are not yet supported on remote cluster patterns");
+            }
+
+            // If a stream in on a remote and the cluster alias and index pattern are separately quoted, we should
+            // still be able to validate it.
+            // Note: invalid selector syntax is covered in a different test.
+            {
+                var fromPattern = randomIndexPattern();
+                var malformedIndexSelectorPattern = quote(randomIdentifier())
+                    + ":"
+                    + quote(unquoteIndexPattern(randomIndexPattern(INDEX_SELECTOR, without(CROSS_CLUSTER))));
+                // Format: FROM <some index>, "<cluster alias>":"<some index>::<data|failures>"
+                var query = "FROM " + fromPattern + "," + malformedIndexSelectorPattern;
+                // Everything after "<cluster alias>" is extraneous input and hence ANTLR's error.
+                expectError(query, "mismatched input ':'");
+            }
+        }
+    }
+
     public void testInvalidJoinPatterns() {
         assumeTrue("LOOKUP JOIN requires corresponding capability", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
 
@@ -3232,6 +3382,18 @@ public class StatementParserTests extends AbstractStatementParserTests {
 
         // If one or more patterns participating in LOOKUP JOINs are partially quoted, we expect the partial quoting
         // error messages to take precedence over any LOOKUP JOIN error messages.
+
+        {
+            // Generate a syntactically invalid (partial quoted) pattern.
+            var fromPatterns = quote(randomIdentifier()) + ":" + unquoteIndexPattern(randomIndexPattern(without(CROSS_CLUSTER)));
+            var joinPattern = randomIndexPattern();
+            expectError(
+                "FROM " + fromPatterns + " | LOOKUP JOIN " + joinPattern + " ON " + randomIdentifier(),
+                // Since the from pattern is partially quoted, we get an error at the end of the partially quoted string.
+                " mismatched input ':'"
+            );
+        }
+
         {
             // Generate a syntactically invalid (partial quoted) pattern.
             var fromPatterns = randomIdentifier() + ":" + quote(randomIndexPatterns(without(CROSS_CLUSTER)));
@@ -3244,6 +3406,17 @@ public class StatementParserTests extends AbstractStatementParserTests {
             );
         }
 
+        {
+            var fromPatterns = randomIndexPattern();
+            // Generate a syntactically invalid (partial quoted) pattern.
+            var joinPattern = quote(randomIdentifier()) + ":" + unquoteIndexPattern(randomIndexPattern(without(CROSS_CLUSTER)));
+            expectError(
+                "FROM " + fromPatterns + " | LOOKUP JOIN " + joinPattern + " ON " + randomIdentifier(),
+                // Since the join pattern is partially quoted, we get an error at the end of the partially quoted string.
+                "mismatched input ':'"
+            );
+        }
+
         {
             var fromPatterns = randomIndexPattern();
             // Generate a syntactically invalid (partial quoted) pattern.
@@ -3311,6 +3484,31 @@ public class StatementParserTests extends AbstractStatementParserTests {
                         + "], index pattern selectors are not supported in LOOKUP JOIN"
                 );
             }
+
+            {
+                // Although we don't support selector strings for remote indices, it's alright.
+                // The parser error message takes precedence.
+                var fromPatterns = randomIndexPatterns();
+                var joinPattern = quote(randomIdentifier()) + "::" + randomFrom("data", "failures");
+                // After the end of the partially quoted string, i.e. the index name, parser now expects "ON..." and not a selector string.
+                expectError(
+                    "FROM " + fromPatterns + " | LOOKUP JOIN " + joinPattern + " ON " + randomIdentifier(),
+                    "mismatched input ':' expecting 'on'"
+                );
+            }
+
+            {
+                // Although we don't support selector strings for remote indices, it's alright.
+                // The parser error message takes precedence.
+                var fromPatterns = randomIndexPatterns();
+                var joinPattern = randomIdentifier() + "::" + quote(randomFrom("data", "failures"));
+                // After the index name and "::", parser expects an unquoted string, i.e. the selector string should not be
+                // partially quoted.
+                expectError(
+                    "FROM " + fromPatterns + " | LOOKUP JOIN " + joinPattern + " ON " + randomIdentifier(),
+                    " mismatched input ':' expecting UNQUOTED_SOURCE"
+                );
+            }
         }
     }