소스 검색

Counted keyword: inherit source keep mode from index settings (#120678) (#120871)

This patch adds a property to CountedKeywordMapper to track the
synthetic_source_keep index setting. This property is then used to properly
implement synthetic source support in the counted_keyword field type, with
fallback to the ignore_source mechanism when synthetic_source_keep is set
in either the field mapping or the index settings.
Jordan Powers 8 달 전
부모
커밋
250c32bc54

+ 3 - 3
test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java

@@ -1093,12 +1093,12 @@ public abstract class MapperTestCase extends MapperServiceTestCase {
             this(b -> b.value(inputValue), b -> b.value(result), b -> b.value(blockLoaderResults), mapping);
         }
 
-        private void buildInput(XContentBuilder b) throws IOException {
+        public void buildInput(XContentBuilder b) throws IOException {
             b.field("field");
             inputValue.accept(b);
         }
 
-        private void buildInputArray(XContentBuilder b, int elementCount) throws IOException {
+        public void buildInputArray(XContentBuilder b, int elementCount) throws IOException {
             b.startArray("field");
             for (int i = 0; i < elementCount; i++) {
                 inputValue.accept(b);
@@ -1385,7 +1385,7 @@ public abstract class MapperTestCase extends MapperServiceTestCase {
         assertThat(syntheticSource(mapper, b -> b.startArray("field").endArray()), equalTo(expected));
     }
 
-    private boolean shouldUseIgnoreMalformed() {
+    protected boolean shouldUseIgnoreMalformed() {
         // 5% of test runs use ignore_malformed
         return supportsIgnoreMalformed() && randomDouble() <= 0.05;
     }

+ 16 - 9
x-pack/plugin/mapper-counted-keyword/src/main/java/org/elasticsearch/xpack/countedkeyword/CountedKeywordFieldMapper.java

@@ -76,8 +76,7 @@ import static org.elasticsearch.common.lucene.Lucene.KEYWORD_ANALYZER;
  * 2 for each key (one per document), a <code>counted_terms</code> aggregation on a <code>counted_keyword</code> field will consider
  * the actual count and report a count of 3 for each key.</p>
  *
- * <p>Synthetic source is supported, but uses the fallback "ignore source" infrastructure unless the <code>source_keep_mode</code> is
- *  explicitly set to <code>none</code> in the field mapping parameters.</p>
+ * <p>Synthetic source is fully supported.</p>
  */
 public class CountedKeywordFieldMapper extends FieldMapper {
     public static final String CONTENT_TYPE = "counted_keyword";
@@ -277,9 +276,11 @@ public class CountedKeywordFieldMapper extends FieldMapper {
     public static class Builder extends FieldMapper.Builder {
         private final Parameter<Boolean> indexed = Parameter.indexParam(m -> toType(m).mappedFieldType.isIndexed(), true);
         private final Parameter<Map<String, String>> meta = Parameter.metaParam();
+        private final SourceKeepMode indexSourceKeepMode;
 
-        protected Builder(String name) {
+        protected Builder(String name, SourceKeepMode indexSourceKeepMode) {
             super(name);
+            this.indexSourceKeepMode = indexSourceKeepMode;
         }
 
         @Override
@@ -309,7 +310,8 @@ public class CountedKeywordFieldMapper extends FieldMapper {
                     countFieldMapper.fieldType()
                 ),
                 builderParams(this, context),
-                countFieldMapper
+                countFieldMapper,
+                indexSourceKeepMode
             );
         }
     }
@@ -389,21 +391,26 @@ public class CountedKeywordFieldMapper extends FieldMapper {
         }
     }
 
-    public static TypeParser PARSER = new TypeParser((n, c) -> new CountedKeywordFieldMapper.Builder(n));
+    public static TypeParser PARSER = new TypeParser(
+        (n, c) -> new CountedKeywordFieldMapper.Builder(n, c.getIndexSettings().sourceKeepMode())
+    );
 
     private final FieldType fieldType;
     private final BinaryFieldMapper countFieldMapper;
+    private final SourceKeepMode indexSourceKeepMode;
 
     protected CountedKeywordFieldMapper(
         String simpleName,
         FieldType fieldType,
         MappedFieldType mappedFieldType,
         BuilderParams builderParams,
-        BinaryFieldMapper countFieldMapper
+        BinaryFieldMapper countFieldMapper,
+        SourceKeepMode indexSourceKeepMode
     ) {
         super(simpleName, mappedFieldType, builderParams);
         this.fieldType = fieldType;
         this.countFieldMapper = countFieldMapper;
+        this.indexSourceKeepMode = indexSourceKeepMode;
     }
 
     @Override
@@ -485,7 +492,7 @@ public class CountedKeywordFieldMapper extends FieldMapper {
 
     @Override
     public FieldMapper.Builder getMergeBuilder() {
-        return new Builder(leafName()).init(this);
+        return new Builder(leafName(), indexSourceKeepMode).init(this);
     }
 
     @Override
@@ -495,8 +502,8 @@ public class CountedKeywordFieldMapper extends FieldMapper {
 
     @Override
     protected SyntheticSourceSupport syntheticSourceSupport() {
-        var keepMode = sourceKeepMode();
-        if (keepMode.isPresent() == false || keepMode.get() != SourceKeepMode.NONE) {
+        var keepMode = sourceKeepMode().orElse(indexSourceKeepMode);
+        if (keepMode != SourceKeepMode.NONE) {
             return super.syntheticSourceSupport();
         }
 

+ 38 - 17
x-pack/plugin/mapper-counted-keyword/src/test/java/org/elasticsearch/xpack/countedkeyword/CountedKeywordFieldMapperTests.java

@@ -10,6 +10,8 @@ package org.elasticsearch.xpack.countedkeyword;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.core.CheckedConsumer;
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.index.mapper.DocumentMapper;
@@ -20,12 +22,15 @@ import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.search.lookup.SourceFilter;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.XContentFactory;
 import org.junit.AssumptionViolatedException;
 
 import java.io.IOException;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 import java.util.stream.Stream;
 
 import static org.hamcrest.Matchers.equalTo;
@@ -75,7 +80,6 @@ public class CountedKeywordFieldMapperTests extends MapperTestCase {
         DocumentMapper mapper = createSytheticSourceMapperService(mapping(b -> {
             b.startObject("field");
             minimalMapping(b);
-            b.field("synthetic_source_keep", "none");
             b.endObject();
         })).documentMapper();
 
@@ -94,7 +98,6 @@ public class CountedKeywordFieldMapperTests extends MapperTestCase {
         DocumentMapper mapper = createSytheticSourceMapperService(mapping(b -> {
             b.startObject("field");
             minimalMapping(b);
-            b.field("synthetic_source_keep", "none");
             b.endObject();
         })).documentMapper();
 
@@ -114,19 +117,32 @@ public class CountedKeywordFieldMapperTests extends MapperTestCase {
         assertThat(syntheticSource(mapper, new SourceFilter(null, new String[] { "field" }), buildInput), equalTo("{}"));
     }
 
-    @Override
-    public void testSyntheticSourceKeepAll() throws IOException {
-        // For now, native synthetic source is only supported when "synthetic_source_keep" mapping attribute is "none"
-    }
+    public void testSyntheticSourceIndexLevelKeepArrays() throws IOException {
+        SyntheticSourceExample example = syntheticSourceSupportForKeepTests(shouldUseIgnoreMalformed()).example(1);
+        XContentBuilder mappings = mapping(b -> {
+            b.startObject("field");
+            example.mapping().accept(b);
+            b.endObject();
+        });
 
-    @Override
-    public void testSyntheticSourceKeepArrays() throws IOException {
-        // For now, native synthetic source is only supported when "synthetic_source_keep" mapping attribute is "none"
-    }
+        var settings = Settings.builder()
+            .put("index.mapping.source.mode", "synthetic")
+            .put("index.mapping.synthetic_source_keep", "arrays")
+            .build();
+        DocumentMapper mapperAll = createMapperService(getVersion(), settings, () -> true, mappings).documentMapper();
 
-    @Override
-    public void testSyntheticSourceKeepNone() throws IOException {
-        // For now, native synthetic source is only supported when "synthetic_source_keep" mapping attribute is "none"
+        int elementCount = randomIntBetween(2, 5);
+        CheckedConsumer<XContentBuilder, IOException> buildInput = (XContentBuilder builder) -> {
+            example.buildInputArray(builder, elementCount);
+        };
+
+        var builder = XContentFactory.jsonBuilder();
+        builder.startObject();
+        buildInput.accept(builder);
+        builder.endObject();
+        String expected = Strings.toString(builder);
+        String actual = syntheticSource(mapperAll, buildInput);
+        assertThat(actual, equalTo(expected));
     }
 
     @Override
@@ -151,16 +167,21 @@ public class CountedKeywordFieldMapperTests extends MapperTestCase {
                 return new SyntheticSourceExample(in, out, this::mapping);
             }
 
+            private final Set<String> previousValues = new HashSet<>();
+
             private Tuple<String, String> generateValue() {
-                String v = ESTestCase.randomAlphaOfLength(5);
+                String v;
+                if (previousValues.size() > 0 && randomBoolean()) {
+                    v = randomFrom(previousValues);
+                } else {
+                    v = ESTestCase.randomAlphaOfLength(5);
+                    previousValues.add(v);
+                }
                 return Tuple.tuple(v, v);
             }
 
             private void mapping(XContentBuilder b) throws IOException {
                 minimalMapping(b);
-                // For now, synthetic source is only supported when "synthetic_source_keep" is "none".
-                // Once we implement true synthetic source support, we should remove this.
-                b.field("synthetic_source_keep", "none");
             }
 
             @Override

+ 4 - 1
x-pack/plugin/mapper-counted-keyword/src/test/java/org/elasticsearch/xpack/countedkeyword/CountedTermsAggregatorTests.java

@@ -12,6 +12,7 @@ import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.bytes.BytesArray;
 import org.elasticsearch.index.mapper.FieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.Mapper;
 import org.elasticsearch.index.mapper.MapperBuilderContext;
 import org.elasticsearch.index.mapper.MappingLookup;
 import org.elasticsearch.index.mapper.SourceToParse;
@@ -40,7 +41,9 @@ public class CountedTermsAggregatorTests extends AggregatorTestCase {
     }
 
     public void testAggregatesCountedKeywords() throws Exception {
-        FieldMapper mapper = new CountedKeywordFieldMapper.Builder("stacktraces").build(MapperBuilderContext.root(false, false));
+        FieldMapper mapper = new CountedKeywordFieldMapper.Builder("stacktraces", Mapper.SourceKeepMode.NONE).build(
+            MapperBuilderContext.root(false, false)
+        );
         MappedFieldType fieldType = mapper.fieldType();
 
         CountedTermsAggregationBuilder aggregationBuilder = new CountedTermsAggregationBuilder("st").field("stacktraces");

+ 492 - 39
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/counted_keyword/30_synthetic_source.yml

@@ -1,4 +1,4 @@
-setup:
+"Source values are mutated as expected":
   - requires:
       cluster_features: ["mapper.counted_keyword.synthetic_source_native_support"]
       reason: "Feature implemented"
@@ -14,7 +14,6 @@ setup:
             properties:
               events:
                 type: counted_keyword
-                synthetic_source_keep: none
 
 
   - do:
@@ -53,71 +52,525 @@ setup:
         id: "6"
         body: { "events": [null, null]}
 
+  - do:
+      index:
+        index: test-events
+        id: "7"
+        body: { "events": [["a", "b"], "a", ["c"], [["b"], "c"]]}
+
   - do:
       indices.refresh: { }
 
----
-"Source values are mutated as expected":
- - do:
-    search:
-      index: test-events
-      body:
-        query:
-          ids:
-            values: [1]
- - match:
-     hits.hits.0._source:
-       events: ["a", "a", "b", "c"]
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [1]
+  - match:
+      hits.hits.0._source:
+        events: ["a", "a", "b", "c"]
 
- - do:
-     search:
-       index: test-events
-       body:
+  - do:
+      search:
+        index: test-events
+        body:
          query:
            ids:
              values: [2]
- - match:
+  - match:
      hits.hits.0._source:
        events: ["a", "b", "b", "b", "c"]
 
- - do:
-     search:
-       index: test-events
-       body:
+  - do:
+      search:
+        index: test-events
+        body:
          query:
            ids:
              values: [3]
- - match:
+  - match:
      hits.hits.0._source:
        events: ["a", "b", "c", "c"]
 
- - do:
-     search:
-       index: test-events
-       body:
+  - do:
+      search:
+        index: test-events
+        body:
          query:
            ids:
              values: [4]
- - match:
+  - match:
      hits.hits.0._source:
        events: "a"
 
- - do:
-     search:
-       index: test-events
-       body:
+  - do:
+      search:
+        index: test-events
+        body:
          query:
            ids:
              values: [5]
- - match:
+  - match:
      hits.hits.0._source: {}
 
- - do:
-     search:
-       index: test-events
-       body:
+  - do:
+      search:
+        index: test-events
+        body:
          query:
            ids:
              values: [6]
- - match:
+  - match:
      hits.hits.0._source: {}
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [7]
+  - match:
+      hits.hits.0._source:
+        events: ["a", "a", "b", "b", "c", "c"]
+
+---
+
+"synthetic_source_keep value is respected":
+  - requires:
+      cluster_features: ["mapper.counted_keyword.synthetic_source_native_support"]
+      reason: "Feature implemented"
+
+  - do:
+      indices.create:
+        index: test-events
+        body:
+          settings:
+            index:
+              mapping.source.mode: synthetic
+          mappings:
+            properties:
+              events:
+                type: counted_keyword
+                synthetic_source_keep: all
+
+  - do:
+      index:
+        index: test-events
+        id: "1"
+        body: { "events": [ "a", "b", "a", "c" ] }
+
+  - do:
+      index:
+        index: test-events
+        id: "2"
+        body: { "events": [ "b", "b", "c", "a", "b" ] }
+
+  - do:
+      index:
+        index: test-events
+        id: "3"
+        body: { "events": [ "c", "a", null, "b", null, "c" ] }
+
+  - do:
+      index:
+        index: test-events
+        id: "4"
+        body: { "events": [ "a" ] }
+
+  - do:
+      index:
+        index: test-events
+        id: "5"
+        body: { "events": [ ] }
+
+  - do:
+      index:
+        index: test-events
+        id: "6"
+        body: { "events": [ null, null ] }
+
+  - do:
+      index:
+        index: test-events
+        id: "7"
+        body: { "events": [["a", "b"], "a", ["c"], [["b"], "c"]]}
+
+  - do:
+      indices.refresh: { }
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 1 ]
+  - match:
+      hits.hits.0._source:
+        events: [ "a", "b", "a", "c" ]
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 2 ]
+  - match:
+      hits.hits.0._source:
+        events: [ "b", "b", "c", "a", "b" ]
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 3 ]
+  - match:
+      hits.hits.0._source:
+        events: [ "c", "a", null, "b", null, "c" ]
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 4 ]
+  - match:
+      hits.hits.0._source:
+        events: [ "a" ]
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 5 ]
+  - match:
+      hits.hits.0._source:
+        events: [ ]
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 6 ]
+  - match:
+      hits.hits.0._source:
+        events: [ null, null ]
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 7 ]
+  - match:
+      hits.hits.0._source:
+        events: [["a", "b"], "a", ["c"], [["b"], "c"]]
+
+---
+
+"synthetic_source_keep value is not inherited":
+  - requires:
+      cluster_features: ["mapper.counted_keyword.synthetic_source_native_support"]
+      reason: "Feature implemented"
+
+  - do:
+      indices.create:
+        index: test-events
+        body:
+          settings:
+            index:
+              mapping.source.mode: synthetic
+          mappings:
+            properties:
+              event-object:
+                type: object
+                synthetic_source_keep: arrays
+                properties:
+                  event-object-2:
+                    type: object
+                    properties:
+                      events:
+                        type: counted_keyword
+  - do:
+      index:
+        index: test-events
+        id: "1"
+        body: { "event-object": { "event-object-2": { "events": [ "a", "b", "a", "c" ] } } }
+
+  - do:
+      index:
+        index: test-events
+        id: "2"
+        body: { "event-object": { "event-object-2": { "events": [ "b", "b", "c", "a", "b" ] } } }
+
+  - do:
+      index:
+        index: test-events
+        id: "3"
+        body: { "event-object": { "event-object-2": { "events": [ "c", "a", null, "b", null, "c" ] } } }
+
+  - do:
+      index:
+        index: test-events
+        id: "4"
+        body: { "event-object": { "event-object-2": { "events": [ "a" ] } } }
+
+  - do:
+      index:
+        index: test-events
+        id: "5"
+        body: { "event-object": { "event-object-2": { "events": [ ] } } }
+
+  - do:
+      index:
+        index: test-events
+        id: "6"
+        body: { "event-object": { "event-object-2": { "events": [ null, null ] } } }
+
+  - do:
+      index:
+        index: test-events
+        id: "7"
+        body: { "event-object": { "event-object-2": { "events": [["a", "b"], "a", ["c"], [["b"], "c"]] } } }
+
+  - do:
+      indices.refresh: { }
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 1 ]
+  - match:
+      hits.hits.0._source:
+        event-object:
+          event-object-2:
+            events: [ "a", "a", "b", "c" ]
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 2 ]
+  - match:
+      hits.hits.0._source:
+        event-object:
+          event-object-2:
+            events: [ "a", "b", "b", "b", "c" ]
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 3 ]
+  - match:
+      hits.hits.0._source:
+        event-object:
+          event-object-2:
+            events: [ "a", "b", "c", "c" ]
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 4 ]
+  - match:
+      hits.hits.0._source:
+        event-object:
+          event-object-2:
+            events: "a"
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 5 ]
+  - match:
+      hits.hits.0._source: {}
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 6 ]
+  - match:
+      hits.hits.0._source: {}
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 7 ]
+  - match:
+      hits.hits.0._source:
+        event-object:
+          event-object-2:
+            events: [ "a", "a", "b", "b", "c", "c" ]
+
+---
+
+"Index-level synthetic_source_keep value is respected":
+  - requires:
+      cluster_features: ["mapper.counted_keyword.synthetic_source_native_support"]
+      reason: "Feature implemented"
+
+  - do:
+      indices.create:
+        index: test-events
+        body:
+          settings:
+            index:
+              mapping.source.mode: synthetic
+              mapping.synthetic_source_keep: arrays
+          mappings:
+            properties:
+              events:
+                type: counted_keyword
+
+  - do:
+      index:
+        index: test-events
+        id: "1"
+        body: { "events": [ "a", "b", "a", "c" ] }
+
+  - do:
+      index:
+        index: test-events
+        id: "2"
+        body: { "events": [ "b", "b", "c", "a", "b" ] }
+
+  - do:
+      index:
+        index: test-events
+        id: "3"
+        body: { "events": [ "c", "a", null, "b", null, "c" ] }
+
+  - do:
+      index:
+        index: test-events
+        id: "4"
+        body: { "events": [ "a" ] }
+
+  - do:
+      index:
+        index: test-events
+        id: "5"
+        body: { "events": [ ] }
+
+  - do:
+      index:
+        index: test-events
+        id: "6"
+        body: { "events": [ null, null ] }
+
+  - do:
+      index:
+        index: test-events
+        id: "7"
+        body: { "events": [ [ "a", "b" ], "a", [ "c" ], [ [ "b" ], "c" ] ] }
+
+  - do:
+      indices.refresh: { }
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 1 ]
+  - match:
+      hits.hits.0._source:
+        events: [ "a", "b", "a", "c" ]
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 2 ]
+  - match:
+      hits.hits.0._source:
+        events: [ "b", "b", "c", "a", "b" ]
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 3 ]
+  - match:
+      hits.hits.0._source:
+        events: [ "c", "a", null, "b", null, "c" ]
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 4 ]
+  - match:
+      hits.hits.0._source:
+        events: [ "a" ]
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 5 ]
+  - match:
+      hits.hits.0._source:
+        events: [ ]
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 6 ]
+  - match:
+      hits.hits.0._source:
+        events: [ null, null ]
+
+  - do:
+      search:
+        index: test-events
+        body:
+          query:
+            ids:
+              values: [ 7 ]
+  - match:
+      hits.hits.0._source:
+        events: [["a", "b"], "a", ["c"], [["b"], "c"]]