Browse Source

[8.x] ESQL: use field_caps native nested fields filtering (#117201) (#117375)

* ESQL: use field_caps native nested fields filtering (#117201)

* Just filter the nested fields natively with field_caps support

---------

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
Co-authored-by: Craig Taverner <craig@amanzi.com>

* Add import

---------

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
Co-authored-by: Craig Taverner <craig@amanzi.com>
Andrei Stefan 10 months ago
parent
commit
ea056863a4

+ 6 - 0
docs/changelog/117201.yaml

@@ -0,0 +1,6 @@
+pr: 117201
+summary: "Use `field_caps` native nested fields filtering"
+area: ES|QL
+type: bug
+issues:
+ - 117054

+ 319 - 0
x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/FieldExtractorTestCase.java

@@ -14,6 +14,7 @@ import org.elasticsearch.client.Response;
 import org.elasticsearch.client.ResponseException;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.network.NetworkAddress;
+import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.core.CheckedConsumer;
 import org.elasticsearch.geo.GeometryTestUtils;
 import org.elasticsearch.index.mapper.BlockLoader;
@@ -27,6 +28,7 @@ import org.elasticsearch.test.rest.ESRestTestCase;
 import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentType;
 import org.elasticsearch.xcontent.json.JsonXContent;
+import org.elasticsearch.xpack.esql.action.EsqlCapabilities;
 import org.hamcrest.Matcher;
 import org.junit.Before;
 
@@ -1106,6 +1108,323 @@ public abstract class FieldExtractorTestCase extends ESRestTestCase {
         );
     }
 
+    /**
+     * Test for https://github.com/elastic/elasticsearch/issues/117054 fix
+     */
+    public void testOneNestedSubField_AndSameNameSupportedField() throws IOException {
+        assumeIndexResolverNestedFieldsNameClashFixed();
+        ESRestTestCase.createIndex("test", Settings.EMPTY, """
+            "properties": {
+              "Responses": {
+                "properties": {
+                  "process": {
+                    "type": "nested",
+                    "properties": {
+                      "pid": {
+                        "type": "long"
+                      }
+                    }
+                  }
+                }
+              },
+              "process": {
+                "properties": {
+                  "parent": {
+                    "properties": {
+                      "command_line": {
+                        "type": "wildcard",
+                        "fields": {
+                          "text": {
+                            "type": "text"
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+            """);
+
+        Map<String, Object> result = runEsql("FROM test");
+        assertMap(
+            result,
+            matchesMapWithOptionalTook(result.get("took")).entry(
+                "columns",
+                List.of(columnInfo("process.parent.command_line", "keyword"), columnInfo("process.parent.command_line.text", "text"))
+            ).entry("values", Collections.EMPTY_LIST)
+        );
+
+        index("test", """
+            {"Responses.process.pid": 123,"process.parent.command_line":"run.bat"}""");
+
+        result = runEsql("FROM test");
+        assertMap(
+            result,
+            matchesMapWithOptionalTook(result.get("took")).entry(
+                "columns",
+                List.of(columnInfo("process.parent.command_line", "keyword"), columnInfo("process.parent.command_line.text", "text"))
+            ).entry("values", List.of(matchesList().item("run.bat").item("run.bat")))
+        );
+
+        result = runEsql("""
+            FROM test | where process.parent.command_line == "run.bat"
+            """);
+        assertMap(
+            result,
+            matchesMapWithOptionalTook(result.get("took")).entry(
+                "columns",
+                List.of(columnInfo("process.parent.command_line", "keyword"), columnInfo("process.parent.command_line.text", "text"))
+            ).entry("values", List.of(matchesList().item("run.bat").item("run.bat")))
+        );
+
+        ResponseException e = expectThrows(ResponseException.class, () -> runEsql("FROM test | SORT Responses.process.pid"));
+        String err = EntityUtils.toString(e.getResponse().getEntity());
+        assertThat(err, containsString("line 1:18: Unknown column [Responses.process.pid]"));
+
+        e = expectThrows(ResponseException.class, () -> runEsql("""
+            FROM test
+            | SORT Responses.process.pid
+            | WHERE Responses.process IS NULL
+            """));
+        err = EntityUtils.toString(e.getResponse().getEntity());
+        assertThat(err, containsString("line 2:8: Unknown column [Responses.process.pid]"));
+    }
+
+    public void testOneNestedSubField_AndSameNameSupportedField_TwoIndices() throws IOException {
+        assumeIndexResolverNestedFieldsNameClashFixed();
+        ESRestTestCase.createIndex("test1", Settings.EMPTY, """
+                  "properties": {
+                    "Responses": {
+                      "properties": {
+                        "process": {
+                          "type": "nested",
+                          "properties": {
+                            "pid": {
+                              "type": "long"
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+            """);
+        ESRestTestCase.createIndex("test2", Settings.EMPTY, """
+                  "properties": {
+                    "process": {
+                      "properties": {
+                        "parent": {
+                          "properties": {
+                            "command_line": {
+                              "type": "wildcard",
+                              "fields": {
+                                "text": {
+                                  "type": "text"
+                                }
+                              }
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+            """);
+        index("test1", """
+            {"Responses.process.pid": 123}""");
+        index("test2", """
+            {"process.parent.command_line":"run.bat"}""");
+
+        Map<String, Object> result = runEsql("FROM test* | SORT process.parent.command_line ASC NULLS FIRST");
+        assertMap(
+            result,
+            matchesMapWithOptionalTook(result.get("took")).entry(
+                "columns",
+                List.of(columnInfo("process.parent.command_line", "keyword"), columnInfo("process.parent.command_line.text", "text"))
+            ).entry("values", List.of(matchesList().item(null).item(null), matchesList().item("run.bat").item("run.bat")))
+        );
+
+        result = runEsql("""
+            FROM test* | where process.parent.command_line == "run.bat"
+            """);
+        assertMap(
+            result,
+            matchesMapWithOptionalTook(result.get("took")).entry(
+                "columns",
+                List.of(columnInfo("process.parent.command_line", "keyword"), columnInfo("process.parent.command_line.text", "text"))
+            ).entry("values", List.of(matchesList().item("run.bat").item("run.bat")))
+        );
+
+        ResponseException e = expectThrows(ResponseException.class, () -> runEsql("FROM test* | SORT Responses.process.pid"));
+        String err = EntityUtils.toString(e.getResponse().getEntity());
+        assertThat(err, containsString("line 1:19: Unknown column [Responses.process.pid]"));
+
+        e = expectThrows(ResponseException.class, () -> runEsql("""
+            FROM test*
+            | SORT Responses.process.pid
+            | WHERE Responses.process IS NULL
+            """));
+        err = EntityUtils.toString(e.getResponse().getEntity());
+        assertThat(err, containsString("line 2:8: Unknown column [Responses.process.pid]"));
+    }
+
+    public void testOneNestedField_AndSameNameSupportedField_TwoIndices() throws IOException {
+        assumeIndexResolverNestedFieldsNameClashFixed();
+        ESRestTestCase.createIndex("test1", Settings.EMPTY, """
+            "properties": {
+              "Responses": {
+                "properties": {
+                  "process": {
+                    "type": "nested",
+                    "properties": {
+                      "pid": {
+                        "type": "long"
+                      }
+                    }
+                  }
+                }
+              },
+              "process": {
+                "properties": {
+                  "parent": {
+                    "properties": {
+                      "command_line": {
+                        "type": "wildcard",
+                        "fields": {
+                          "text": {
+                            "type": "text"
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+            """);
+        ESRestTestCase.createIndex("test2", Settings.EMPTY, """
+            "properties": {
+              "Responses": {
+                "properties": {
+                  "process": {
+                    "type": "integer",
+                    "fields": {
+                      "pid": {
+                        "type": "long"
+                      }
+                    }
+                  }
+                }
+              },
+              "process": {
+                "properties": {
+                  "parent": {
+                    "properties": {
+                      "command_line": {
+                        "type": "wildcard",
+                        "fields": {
+                          "text": {
+                            "type": "text"
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+            """);
+        index("test1", """
+            {"Responses.process.pid": 111,"process.parent.command_line":"run1.bat"}""");
+        index("test2", """
+            {"Responses.process": 222,"process.parent.command_line":"run2.bat"}""");
+
+        Map<String, Object> result = runEsql("FROM test* | SORT process.parent.command_line");
+        assertMap(
+            result,
+            matchesMapWithOptionalTook(result.get("took")).entry(
+                "columns",
+                List.of(
+                    columnInfo("Responses.process", "integer"),
+                    columnInfo("Responses.process.pid", "long"),
+                    columnInfo("process.parent.command_line", "keyword"),
+                    columnInfo("process.parent.command_line.text", "text")
+                )
+            )
+                .entry(
+                    "values",
+                    List.of(
+                        matchesList().item(null).item(null).item("run1.bat").item("run1.bat"),
+                        matchesList().item(222).item(222).item("run2.bat").item("run2.bat")
+                    )
+                )
+        );
+
+        result = runEsql("""
+            FROM test* | where Responses.process.pid == 111
+            """);
+        assertMap(
+            result,
+            matchesMapWithOptionalTook(result.get("took")).entry(
+                "columns",
+                List.of(
+                    columnInfo("Responses.process", "integer"),
+                    columnInfo("Responses.process.pid", "long"),
+                    columnInfo("process.parent.command_line", "keyword"),
+                    columnInfo("process.parent.command_line.text", "text")
+                )
+            ).entry("values", List.of())
+        );
+
+        result = runEsql("FROM test* | SORT process.parent.command_line");
+        assertMap(
+            result,
+            matchesMapWithOptionalTook(result.get("took")).entry(
+                "columns",
+                List.of(
+                    columnInfo("Responses.process", "integer"),
+                    columnInfo("Responses.process.pid", "long"),
+                    columnInfo("process.parent.command_line", "keyword"),
+                    columnInfo("process.parent.command_line.text", "text")
+                )
+            )
+                .entry(
+                    "values",
+                    List.of(
+                        matchesList().item(null).item(null).item("run1.bat").item("run1.bat"),
+                        matchesList().item(222).item(222).item("run2.bat").item("run2.bat")
+                    )
+                )
+        );
+
+        result = runEsql("""
+            FROM test*
+            | SORT process.parent.command_line
+            | WHERE Responses.process IS NULL
+            """);
+        assertMap(
+            result,
+            matchesMapWithOptionalTook(result.get("took")).entry(
+                "columns",
+                List.of(
+                    columnInfo("Responses.process", "integer"),
+                    columnInfo("Responses.process.pid", "long"),
+                    columnInfo("process.parent.command_line", "keyword"),
+                    columnInfo("process.parent.command_line.text", "text")
+                )
+            ).entry("values", List.of(matchesList().item(null).item(null).item("run1.bat").item("run1.bat")))
+        );
+    }
+
+    private void assumeIndexResolverNestedFieldsNameClashFixed() throws IOException {
+        // especially for BWC tests but also for regular tests
+        var capsName = EsqlCapabilities.Cap.FIX_NESTED_FIELDS_NAME_CLASH_IN_INDEXRESOLVER.name().toLowerCase(Locale.ROOT);
+        boolean requiredClusterCapability = clusterHasCapability("POST", "/_query", List.of(), List.of(capsName)).orElse(false);
+        assumeTrue(
+            "This test makes sense for versions that have the fix for https://github.com/elastic/elasticsearch/issues/117054",
+            requiredClusterCapability
+        );
+    }
+
     private CheckedConsumer<XContentBuilder, IOException> empNoInObject(String empNoType) {
         return index -> {
             index.startObject("properties");

+ 6 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

@@ -504,7 +504,12 @@ public class EsqlCapabilities {
         /**
          * LOOKUP JOIN
          */
-        JOIN_LOOKUP(Build.current().isSnapshot());
+        JOIN_LOOKUP(Build.current().isSnapshot()),
+
+        /**
+         * Fix for https://github.com/elastic/elasticsearch/issues/117054
+         */
+        FIX_NESTED_FIELDS_NAME_CLASH_IN_INDEXRESOLVER;
 
         private final boolean enabled;
 

+ 4 - 21
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java

@@ -98,9 +98,8 @@ public class IndexResolver {
         // TODO flattened is simpler - could we get away with that?
         String[] names = fieldsCaps.keySet().toArray(new String[0]);
         Arrays.sort(names);
-        Set<String> forbiddenFields = new HashSet<>();
         Map<String, EsField> rootFields = new HashMap<>();
-        name: for (String name : names) {
+        for (String name : names) {
             Map<String, EsField> fields = rootFields;
             String fullName = name;
             boolean isAlias = false;
@@ -111,9 +110,6 @@ public class IndexResolver {
                     break;
                 }
                 String parent = name.substring(0, nextDot);
-                if (forbiddenFields.contains(parent)) {
-                    continue name;
-                }
                 EsField obj = fields.get(parent);
                 if (obj == null) {
                     obj = new EsField(parent, OBJECT, new HashMap<>(), false, true);
@@ -125,16 +121,10 @@ public class IndexResolver {
                 fields = obj.getProperties();
                 name = name.substring(nextDot + 1);
             }
-
-            List<IndexFieldCapabilities> caps = fieldsCaps.get(fullName);
-            if (allNested(caps)) {
-                forbiddenFields.add(name);
-                continue;
-            }
             // TODO we're careful to make isAlias match IndexResolver - but do we use it?
 
             EsField field = firstUnsupportedParent == null
-                ? createField(fieldCapsResponse, name, fullName, caps, isAlias)
+                ? createField(fieldCapsResponse, name, fullName, fieldsCaps.get(fullName), isAlias)
                 : new UnsupportedEsField(
                     fullName,
                     firstUnsupportedParent.getOriginalType(),
@@ -164,15 +154,6 @@ public class IndexResolver {
         return IndexResolution.valid(new EsIndex(indexPattern, rootFields, concreteIndices), concreteIndices.keySet(), unavailableRemotes);
     }
 
-    private boolean allNested(List<IndexFieldCapabilities> caps) {
-        for (IndexFieldCapabilities cap : caps) {
-            if (false == cap.type().equalsIgnoreCase("nested")) {
-                return false;
-            }
-        }
-        return true;
-    }
-
     private static Map<String, List<IndexFieldCapabilities>> collectFieldCaps(FieldCapabilitiesResponse fieldCapsResponse) {
         Set<String> seenHashes = new HashSet<>();
         Map<String, List<IndexFieldCapabilities>> fieldsCaps = new HashMap<>();
@@ -278,6 +259,8 @@ public class IndexResolver {
         // lenient because we throw our own errors looking at the response e.g. if something was not resolved
         // also because this way security doesn't throw authorization exceptions but rather honors ignore_unavailable
         req.indicesOptions(FIELD_CAPS_INDICES_OPTIONS);
+        // we ignore the nested data type fields starting with https://github.com/elastic/elasticsearch/pull/111495
+        req.filters("-nested");
         req.setMergeResults(false);
         return req;
     }