瀏覽代碼

Reapply "ESQL: Expose "_ignored" metadata field" (#108864) (#108871)

Expose "_ignored" metadata field in ESQL queries.

This is the same code merged here:
https://github.com/elastic/elasticsearch/pull/108770 Which got reverted
here: https://github.com/elastic/elasticsearch/pull/108864

It was reverted because of a test failure:
https://gradle-enterprise.elastic.co/s/dpi2eib2x2fj2
Iván Cea Fontenla 1 年之前
父節點
當前提交
212fc05808

+ 5 - 0
docs/changelog/108871.yaml

@@ -0,0 +1,5 @@
+pr: 108871
+summary: "Reapply \"ESQL: Expose \"_ignored\" metadata field\""
+area: ES|QL
+type: feature
+issues: []

+ 3 - 0
docs/reference/esql/metadata-fields.asciidoc

@@ -17,6 +17,9 @@ supported ones are:
   * `_version`: the source document's version. The field is of the type
   <<number,long>>.
 
+  * <<mapping-ignored-field,`_ignored`>>: the ignored source document fields. The field is of the type
+  <<keyword,keyword>>.
+
 To enable the access to these fields, the <<esql-from,`FROM`>> source command needs
 to be provided with a dedicated directive:
 

+ 10 - 0
server/src/main/java/org/elasticsearch/index/mapper/IgnoredFieldMapper.java

@@ -63,6 +63,11 @@ public final class IgnoredFieldMapper extends MetadataFieldMapper {
             return CONTENT_TYPE;
         }
 
+        @Override
+        public BlockLoader blockLoader(BlockLoaderContext blContext) {
+            return new BlockStoredFieldsReader.BytesFromStringsBlockLoader(NAME);
+        }
+
         @Override
         public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
             return new StoredValueFetcher(context.lookup(), NAME);
@@ -89,6 +94,11 @@ public final class IgnoredFieldMapper extends MetadataFieldMapper {
             return CONTENT_TYPE;
         }
 
+        @Override
+        public BlockLoader blockLoader(BlockLoaderContext blContext) {
+            return new BlockDocValuesReader.BytesRefsFromOrdsBlockLoader(NAME);
+        }
+
         @Override
         public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
             return new DocValueFetcher(docValueFormat(format, null), context.getForField(this, FielddataOperation.SEARCH));

+ 3 - 0
x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java

@@ -9,6 +9,7 @@ package org.elasticsearch.xpack.esql.core.expression;
 
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.index.mapper.IdFieldMapper;
+import org.elasticsearch.index.mapper.IgnoredFieldMapper;
 import org.elasticsearch.index.mapper.SourceFieldMapper;
 import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
 import org.elasticsearch.xpack.esql.core.tree.Source;
@@ -28,6 +29,8 @@ public class MetadataAttribute extends TypedAttribute {
         tuple(DataTypes.KEYWORD, true),
         IdFieldMapper.NAME,
         tuple(DataTypes.KEYWORD, false), // actually searchable, but fielddata access on the _id field is disallowed by default
+        IgnoredFieldMapper.NAME,
+        tuple(DataTypes.KEYWORD, true),
         SourceFieldMapper.NAME,
         tuple(DataTypes.SOURCE, false)
     );

+ 1 - 1
x-pack/plugin/esql/qa/server/mixed-cluster/build.gradle

@@ -11,7 +11,7 @@ apply plugin: 'elasticsearch.bwc-test'
 
 restResources {
   restApi {
-    include '_common', 'bulk', 'get', 'indices', 'esql', 'xpack', 'enrich', 'cluster'
+    include '_common', 'bulk', 'get', 'indices', 'esql', 'xpack', 'enrich', 'cluster', 'capabilities'
   }
   restTests {
     includeXpack 'esql'

+ 1 - 1
x-pack/plugin/esql/qa/server/multi-node/build.gradle

@@ -18,7 +18,7 @@ tasks.named('javaRestTest') {
 
 restResources {
   restApi {
-    include '_common', 'bulk', 'get', 'indices', 'esql', 'xpack', 'enrich', 'cluster'
+    include '_common', 'bulk', 'get', 'indices', 'esql', 'xpack', 'enrich', 'cluster', 'capabilities'
   }
   restTests {
     includeXpack 'esql'

+ 1 - 1
x-pack/plugin/esql/qa/server/single-node/build.gradle

@@ -15,7 +15,7 @@ dependencies {
 
 restResources {
   restApi {
-    include '_common', 'bulk', 'get', 'indices', 'esql', 'xpack', 'enrich', 'cluster'
+    include '_common', 'bulk', 'get', 'indices', 'esql', 'xpack', 'enrich', 'cluster', 'capabilities'
   }
   restTests {
     includeXpack 'esql'

+ 2 - 1
x-pack/plugin/esql/qa/testFixtures/src/main/resources/README.md

@@ -170,12 +170,13 @@ Then you can skip it by adding a `required_capability` to your test like so:
 ```csv-spec
 mvSlice
 required_capability: mv_sort
+required_capability: mv_slice
 
 row a = [true, false, false, true]
 | eval a1 = mv_slice(a, 1), a2 = mv_slice(a, 2, 3);
 ```
 
-That skips nodes that don't have the `mv_sort` capability.
+That skips nodes that don't have both the `mv_sort` and `mv_slice` capabilities.
 
 NOTE: It is also possible to do this by creating a `NodeFeature` in `EsqlFeatures` for your change.
 In that case the feature should be prefixed with `esql.`, but this prefix should

+ 6 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

@@ -31,10 +31,15 @@ public class EsqlCapabilities {
      */
     private static final String ST_CENTROID_AGG_OPTIMIZED = "st_centroid_agg_optimized";
 
+    /**
+     * Support for requesting the "_ignored" metadata field.
+     */
+    private static final String METADATA_IGNORED_FIELD = "metadata_field_ignored";
+
     public static final Set<String> CAPABILITIES = capabilities();
 
     private static Set<String> capabilities() {
-        List<String> caps = new ArrayList<>(List.of(FN_CBRT, ST_CENTROID_AGG_OPTIMIZED));
+        List<String> caps = new ArrayList<>(List.of(FN_CBRT, ST_CENTROID_AGG_OPTIMIZED, METADATA_IGNORED_FIELD));
 
         /*
          * Add all of our cluster features without the leading "esql."

+ 2 - 2
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java

@@ -105,8 +105,8 @@ public class EsPhysicalOperationProviders extends AbstractPhysicalOperationProvi
             MappedFieldType.FieldExtractPreference fieldExtractPreference = PlannerUtils.extractPreference(docValuesAttrs.contains(attr));
             ElementType elementType = PlannerUtils.toElementType(dataType, fieldExtractPreference);
             String fieldName = attr.name();
-            boolean isSupported = EsqlDataTypes.isUnsupported(dataType);
-            IntFunction<BlockLoader> loader = s -> shardContexts.get(s).blockLoader(fieldName, isSupported, fieldExtractPreference);
+            boolean isUnsupported = EsqlDataTypes.isUnsupported(dataType);
+            IntFunction<BlockLoader> loader = s -> shardContexts.get(s).blockLoader(fieldName, isUnsupported, fieldExtractPreference);
             fields.add(new ValuesSourceReaderOperator.FieldInfo(fieldName, elementType, loader));
         }
         return source.with(new ValuesSourceReaderOperator.Factory(fields, readers, docChannel), layout.build());

+ 3 - 0
x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/MetadataAttribute.java

@@ -9,6 +9,7 @@ package org.elasticsearch.xpack.ql.expression;
 
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.index.mapper.IdFieldMapper;
+import org.elasticsearch.index.mapper.IgnoredFieldMapper;
 import org.elasticsearch.index.mapper.SourceFieldMapper;
 import org.elasticsearch.xpack.ql.tree.NodeInfo;
 import org.elasticsearch.xpack.ql.tree.Source;
@@ -28,6 +29,8 @@ public class MetadataAttribute extends TypedAttribute {
         tuple(DataTypes.KEYWORD, true),
         IdFieldMapper.NAME,
         tuple(DataTypes.KEYWORD, false), // actually searchable, but fielddata access on the _id field is disallowed by default
+        IgnoredFieldMapper.NAME,
+        tuple(DataTypes.KEYWORD, true),
         SourceFieldMapper.NAME,
         tuple(DataTypes.SOURCE, false)
     );

+ 148 - 0
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/140_metadata.yml

@@ -0,0 +1,148 @@
+setup:
+  - requires:
+      test_runner_features: [capabilities]
+      capabilities:
+        - method: POST
+          path: /_query
+          parameters: [method, path, parameters, capabilities]
+          capabilities: [metadata_fields, metadata_field_ignored]
+      reason: "Ignored metadata field capability required"
+
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            properties:
+              integer:
+                type: integer
+                ignore_malformed: true
+              keyword:
+                type: keyword
+                ignore_above: 3
+              case:
+                type: keyword
+
+  - do:
+      bulk:
+        index: test
+        refresh: true
+        body:
+          - { index: { } }
+          - { case: "ok", integer: 10, keyword: "ok" }
+          - { index: { } }
+          - { case: "integer_ignored", integer: "not-an-integer", keyword: "ok" }
+          - { index: { } }
+          - { case: "all_ignored", integer: "not-an-integer", keyword: "long-keyword" }
+
+---
+"All fields correct":
+  - do:
+      esql.query:
+        body:
+          query: 'from test metadata _ignored | where case == "ok" | limit 2 | keep integer, keyword, _ignored'
+          version: 2024.04.01
+
+  - length: { columns: 3 }
+  - match: { columns.0.name: integer }
+  - match: { columns.0.type: integer }
+  - match: { columns.1.name: keyword }
+  - match: { columns.1.type: keyword }
+  - match: { columns.2.name: _ignored }
+  - match: { columns.2.type: keyword }
+
+  - length: { values: 1 }
+  - match: { values.0.0: 10 }
+  - match: { values.0.1: "ok" }
+  - match: { values.0.2: null }
+
+---
+"One ignored field":
+  - do:
+      esql.query:
+        body:
+          query: 'from test metadata _ignored | where case == "integer_ignored" | limit 2 | keep integer, keyword, _ignored'
+          version: 2024.04.01
+
+  - length: { columns: 3 }
+  - match: { columns.0.name: integer }
+  - match: { columns.0.type: integer }
+  - match: { columns.1.name: keyword }
+  - match: { columns.1.type: keyword }
+  - match: { columns.2.name: _ignored }
+  - match: { columns.2.type: keyword }
+
+  - length: { values: 1 }
+  - match: { values.0.0: null }
+  - match: { values.0.1: "ok" }
+  - match: { values.0.2: "integer" }
+
+---
+"All fields ignored":
+  - do:
+      esql.query:
+        body:
+          query: 'from test metadata _ignored | where case == "all_ignored" | limit 2 | keep integer, keyword, _ignored'
+          version: 2024.04.01
+
+  - length: { columns: 3 }
+  - match: { columns.0.name: integer }
+  - match: { columns.0.type: integer }
+  - match: { columns.1.name: keyword }
+  - match: { columns.1.type: keyword }
+  - match: { columns.2.name: _ignored }
+  - match: { columns.2.type: keyword }
+
+  - length: { values: 1 }
+  - match: { values.0.0: null }
+  - match: { values.0.1: null }
+  - match: { values.0.2: ["integer", "keyword"] }
+
+---
+"Filter by ignored":
+  - do:
+      esql.query:
+        body:
+          query: 'from test metadata _ignored | where _ignored == "keyword" | limit 3 | stats count(*)'
+          version: 2024.04.01
+
+  - length: { columns: 1 }
+  - length: { values: 1 }
+  - match: { columns.0.name: "count(*)"}
+  - match: { columns.0.type: long }
+  - match: { values.0.0: 1 }
+
+---
+"Group by ignored field":
+  - do:
+      esql.query:
+        body:
+          query: 'from test metadata _ignored | limit 3 | stats count = count(*) by _ignored'
+          version: 2024.04.01
+
+  - length: { columns: 2 }
+  - length: { values: 3 }
+  - match: {columns.0.name: "count"}
+  - match: {columns.0.type: "long"}
+  - match: {columns.1.name: "_ignored"}
+  - match: {columns.1.type: "keyword"}
+  - match: {values.0.0: 1}
+  - match: {values.0.1: null}
+  - match: {values.1.0: 2}
+  - match: {values.1.1: "integer"}
+  - match: {values.2.0: 1}
+  - match: {values.2.1: "keyword"}
+
+---
+"Aggregate ignored field":
+  - do:
+      esql.query:
+        body:
+          query: 'from test metadata _ignored | limit 3 | stats count_distinct(_ignored)'
+          version: 2024.04.01
+
+  - length: { columns: 1 }
+  - length: { values: 1 }
+  - match: {columns.0.name: "count_distinct(_ignored)"}
+  - match: {columns.0.type: "long"}
+  - match: {values.0.0: 2}