Bläddra i källkod

[8.19] ESQL: Avoid unintended attribute removal (#127563) (#128231)

* ESQL: Avoid unintended attribute removal (#127563)

---------

Co-authored-by: Andrei Stefan <astefan@users.noreply.github.com>

* Checkstyle

* Checkstyle again

* Slightly change the test because 8.19 has fewer indices in the
index pattern used (9.x also has host_inventory index).

---------

Co-authored-by: kanoshiou <73424326+kanoshiou@users.noreply.github.com>
Andrei Stefan 5 månader sedan
förälder
incheckning
60304563a4

+ 6 - 0
docs/changelog/127563.yaml

@@ -0,0 +1,6 @@
+pr: 127563
+summary: "ESQL: Avoid unintended attribute removal"
+area: ES|QL
+type: bug
+issues:
+  - 127468

+ 40 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec

@@ -331,3 +331,43 @@ ROW a="b c d x"| DISSECT a "%{b} %{} %{d} %{}";
 a:keyword  | b:keyword | d:keyword
 b c d x    | b         | d
 ;
+
+avoidAttributesRemoval
+// https://github.com/elastic/elasticsearch/issues/127468
+required_capability: keep_regex_extract_attributes
+required_capability: join_lookup_v12
+from message_types 
+| eval type = 1 
+| lookup join message_types_lookup on message 
+| drop message 
+| dissect type "%{b}" 
+| stats x = max(b) 
+| keep x
+;
+
+x:keyword
+Success
+;
+
+avoidAttributesRemoval2
+// https://github.com/elastic/elasticsearch/issues/127468
+required_capability: keep_regex_extract_attributes
+required_capability: join_lookup_v12
+FROM sample_data, employees
+| EVAL client_ip = client_ip::keyword
+| RENAME languages AS language_code
+| LOOKUP JOIN clientips_lookup ON client_ip
+| EVAL type = 1::keyword
+| EVAL type = 2
+| LOOKUP JOIN message_types_lookup ON message
+| LOOKUP JOIN languages_lookup ON language_code
+| DISSECT type "%{type_as_text}"
+| KEEP message
+| WHERE message IS NOT NULL
+| SORT message DESC
+| LIMIT 1
+;
+
+message:keyword
+Disconnected
+;

+ 32 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec

@@ -297,3 +297,35 @@ row text = "123 abc", int = 5 | sort int asc | grok text "%{NUMBER:text:int} %{W
 text:integer | int:integer | description:keyword
 123          | 5           | abc
 ;
+
+avoidAttributesRemoval
+// https://github.com/elastic/elasticsearch/issues/127468
+required_capability: union_types
+required_capability: join_lookup_v12
+required_capability: keep_regex_extract_attributes
+from multivalue_points,h*,messa* 
+| eval  `card` = true, PbehoQUqKSF = "VLGjhcgNkQiEVyCLo", DsxMWtGL = true, qSxTIvUorMim = true, `location` = 8593178066470220111, type = -446161601, FSkGQkgmS = false 
+| eval  PbehoQUqKSF = 753987034, HLNMQfQj = true, `within` = true, `id` = "JDKKkYwhhh", lk = null, aecuvjTkgZza = 510616700, aDAMpuVtNX = null, qCopgNZPt = "AjhJUtZefqKdJYH", BxHHlFoA = "isBrmhKLc"
+| rename message as message 
+| lookup join message_types_lookup on message 
+| sort PbehoQUqKSF DESC, ip1 DESC NULLS LAST 
+| limit 5845 
+| drop `subset`, ip*, `card`, `within`, description, `aecuvjTkgZza`, `ip0`, height_range, DsxMWtGL, `aDAMpuVtNX`, PbehoQUqKSF, `intersects`, aDAMpuVtNX, *ight_range, HLNMQfQj, `FSkGQkgmS`, BxHHlFoA, card 
+| grok type "%{WORD:GknCxQFo}" 
+| eval  `location` = null, ZjWUUvGusyyz = null, HeeKIpzgh = false, `id` = 4325287503714500302, host = false, `lk` = null, HvTQdOqFajpH = false, fKNlsYoT = true, `location` = -1158449473, `qCopgNZPt` = 1219986202615280617 
+| drop HeeKIpzg*, `ZjWUUvGusyyz`, `message`, `type`, `lk` 
+| grok GknCxQFo "%{WORD:location} %{WORD:HvTQdOqFajpH}" 
+| drop HvTQdOqFajpH, `location`, centroid 
+| mv_expand GknCxQFo 
+| limit 410 
+| limit 3815 
+| rename `id` AS `GknCxQFo` 
+| grok host_group "%{WORD:oGQQZHxQHj} %{WORD:qCopgNZPt} %{WORD:vHKOmmocPcTO}" 
+| stats  BkQXJRMeAM = min(GknCxQFo) 
+| keep `BkQXJRMeAM`
+;
+
+BkQXJRMeAM:long
+4325287503714500302
+;
+

+ 8 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

@@ -860,7 +860,14 @@ public class EsqlCapabilities {
          * During resolution (pre-analysis) we have to consider that joins can override regex extracted values
          * see <a href="https://github.com/elastic/elasticsearch/issues/127467"> ES|QL: pruning of JOINs leads to missing fields #127467</a>
          */
-        FIX_JOIN_MASKING_REGEX_EXTRACT;
+        FIX_JOIN_MASKING_REGEX_EXTRACT,
+
+        /**
+         * Avid GROK and DISSECT attributes being removed when resolving fields.
+         * see <a href="https://github.com/elastic/elasticsearch/issues/127468"> ES|QL: Grok only supports KEYWORD or TEXT values,
+         * found expression [type] type [INTEGER] #127468 </a>
+         */
+        KEEP_REGEX_EXTRACT_ATTRIBUTES;
 
         private final boolean enabled;
 

+ 3 - 2
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java

@@ -620,7 +620,7 @@ public class EsqlSession {
             //
             // and ips_policy enriches the results with the same name ip field),
             // these aliases should be kept in the list of fields.
-            if (canRemoveAliases[0] && couldOverrideAliases(p)) {
+            if (canRemoveAliases[0] && p.anyMatch(EsqlSession::couldOverrideAliases)) {
                 canRemoveAliases[0] = false;
             }
             if (canRemoveAliases[0]) {
@@ -687,7 +687,8 @@ public class EsqlSession {
             || p instanceof Project
             || p instanceof RegexExtract
             || p instanceof Rename
-            || p instanceof TopN) == false;
+            || p instanceof TopN
+            || p instanceof UnresolvedRelation) == false;
     }
 
     private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) {

+ 71 - 2
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java

@@ -604,7 +604,20 @@ public class IndexResolverFieldNamesTests extends ESTestCase {
                 | eval language = concat(x, "-", lang)
                 | keep emp_no, x, lang, language
                 | sort emp_no desc | limit 3""",
-            Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*", "lang", "lang.*")
+            Set.of(
+                "emp_no",
+                "x",
+                "lang",
+                "language",
+                "language_name",
+                "languages",
+                "x.*",
+                "language_name.*",
+                "languages.*",
+                "emp_no.*",
+                "lang.*",
+                "language.*"
+            )
         );
     }
 
@@ -1355,7 +1368,7 @@ public class IndexResolverFieldNamesTests extends ESTestCase {
             | grok type "%{WORD:b}"
             | stats x = max(b)
             | keep x""", Set.of());
-        assertThat(fieldNames, equalTo(Set.of("message", "x", "x.*", "message.*")));
+        assertThat(fieldNames, equalTo(Set.of("x", "b", "type", "message", "x.*", "message.*", "type.*", "b.*")));
     }
 
     public void testAvoidGrokAttributesRemoval2() {
@@ -1388,6 +1401,62 @@ public class IndexResolverFieldNamesTests extends ESTestCase {
 
     }
 
+    /**
+     * @see <a href="https://github.com/elastic/elasticsearch/issues/127468">ES|QL: Grok only supports KEYWORD or TEXT values,
+     * found expression [type] type [INTEGER]</a>
+     */
+    public void testAvoidGrokAttributesRemoval4() {
+        assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
+        Set<String> fieldNames = fieldNames("""
+            from message_types
+            | eval type = 1
+            | lookup join message_types_lookup on message
+            | drop  message
+            | grok type "%{WORD:b}"
+            | stats x = max(b)
+            | keep x""", Set.of());
+        assertThat(fieldNames, equalTo(Set.of("x", "b", "type", "message", "x.*", "message.*", "type.*", "b.*")));
+    }
+
+    /**
+     * @see <a href="https://github.com/elastic/elasticsearch/issues/127468">ES|QL: Grok only supports KEYWORD or TEXT values,
+     * found expression [type] type [INTEGER]</a>
+     */
+    public void testAvoidGrokAttributesRemoval5() {
+        assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
+        Set<String> fieldNames = fieldNames("""
+            FROM sample_data, employees
+            | EVAL client_ip = client_ip::keyword
+            | RENAME languages AS language_code
+            | LOOKUP JOIN clientips_lookup ON client_ip
+            | EVAL type = 1::keyword
+            | EVAL type = 2
+            | LOOKUP JOIN message_types_lookup ON message
+            | LOOKUP JOIN languages_lookup ON language_code
+            | DISSECT type "%{type_as_text}"
+            | KEEP message
+            | WHERE message IS NOT NULL
+            | SORT message DESC
+            | LIMIT 1""", Set.of());
+        assertThat(
+            fieldNames,
+            equalTo(
+                Set.of(
+                    "message",
+                    "type",
+                    "languages",
+                    "client_ip",
+                    "language_code",
+                    "language_code.*",
+                    "client_ip.*",
+                    "message.*",
+                    "type.*",
+                    "languages.*"
+                )
+            )
+        );
+    }
+
     public void testEnrichOnDefaultField() {
         Set<String> fieldNames = fieldNames("""
             from employees