Browse Source

Support index sorting with nested fields (#110251)

This PR piggy-backs on recent changes in Lucene 9.11.1
(https://github.com/apache/lucene/pull/12829,
https://github.com/apache/lucene/pull/13341/), setting the parent doc
when nested fields are present. This allows moving nested documents
along with parent ones during sorting.

With this change, sorting is now allowed on fields outside nested
objects. Sorting on fields within nested objects is still not supported
(throws an exception).

Fixes #107349
Kostas Krikellas 1 year ago
parent
commit
6ae652f90e

+ 13 - 0
docs/changelog/110251.yaml

@@ -0,0 +1,13 @@
+pr: 110251
+summary: Support index sorting with nested fields
+area: Logs
+type: enhancement
+issues:
+ - 107349
+highlight:
+  title: Index sorting on indexes with nested fields
+  body: |-
+    Index sorting is now supported for indexes with mappings containing nested objects.
+    The index sort spec (as specified by `index.sort.field`) can't contain any nested
+    fields, still.
+  notable: false

+ 2 - 3
docs/reference/index-modules/index-sorting.asciidoc

@@ -6,9 +6,8 @@ inside each Shard will be sorted. By default Lucene does not apply any sort.
 The `index.sort.*` settings define which fields should be used to sort the documents inside each Segment.
 
 [WARNING]
-nested fields are not compatible with index sorting because they rely on the assumption
-that nested documents are stored in contiguous doc ids, which can be broken by index sorting.
-An error will be thrown if index sorting is activated on an index that contains nested fields.
+It is allowed to apply index sorting to mappings with nested objects, so long as the
+`index.sort.*` setting contains no nested fields.
 
 For instance the following example shows how to define a sort on a single field:
 

+ 99 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.sort/20_nested.yml

@@ -0,0 +1,99 @@
+---
+sort doc with nested object:
+  - requires:
+      cluster_features: ["mapper.index_sorting_on_nested"]
+      reason: uses index sorting on nested fields
+  - do:
+      indices.create:
+        index: test
+        body:
+          settings:
+            index.sort.field: name
+          mappings:
+            properties:
+              name:
+                type: keyword
+              nested_field:
+                type: nested
+              nested_array:
+                type: nested
+              other:
+                type: object
+
+  - do:
+      bulk:
+        index: test
+        refresh: true
+        body:
+          - '{ "create": { } }'
+          - '{ "name": "aaaa", "nested_field": {"a": 1, "b": 2}, "nested_array": [{ "a": 10, "b": 20 }, { "a": 100, "b": 200 }], "other": { "value": "A" } }'
+          - '{ "create": { } }'
+          - '{ "name": "cccc", "nested_field": {"a": 3, "b": 4}, "nested_array": [{ "a": 30, "b": 40 }, { "a": 300, "b": 400 }], "other": { "value": "C"} }'
+          - '{ "create": { } }'
+          - '{ "nested_field": {"a": 7, "b": 8}, "nested_array": [{ "a": 70, "b": 80 }, { "a": 700, "b": 800 }], "other": { "value": "D"} }'
+          - '{ "create": { } }'
+          - '{ "name": "bbbb", "nested_field": {"a": 5, "b": 6}, "nested_array": [{ "a": 50, "b": 60 }, { "a": 500, "b": 600 }], "other": { "value": "B"} }'
+
+  - do:
+      search:
+        index: test
+
+  - match: { hits.total.value: 4 }
+  - match: { hits.hits.0._source.name: aaaa }
+  - match: { hits.hits.0._source.nested_field.a: 1 }
+  - match: { hits.hits.0._source.nested_field.b: 2 }
+  - match: { hits.hits.0._source.nested_array.0.a: 10 }
+  - match: { hits.hits.0._source.nested_array.0.b: 20 }
+  - match: { hits.hits.0._source.nested_array.1.a: 100 }
+  - match: { hits.hits.0._source.nested_array.1.b: 200 }
+  - match: { hits.hits.0._source.other.value: A }
+  - match: { hits.hits.1._source.name: bbbb }
+  - match: { hits.hits.1._source.nested_field.a: 5 }
+  - match: { hits.hits.1._source.nested_field.b: 6 }
+  - match: { hits.hits.1._source.nested_array.0.a: 50 }
+  - match: { hits.hits.1._source.nested_array.0.b: 60 }
+  - match: { hits.hits.1._source.nested_array.1.a: 500 }
+  - match: { hits.hits.1._source.nested_array.1.b: 600 }
+  - match: { hits.hits.1._source.other.value: B }
+  - match: { hits.hits.2._source.name: cccc }
+  - match: { hits.hits.2._source.nested_field.a: 3 }
+  - match: { hits.hits.2._source.nested_field.b: 4 }
+  - match: { hits.hits.2._source.nested_array.0.a: 30 }
+  - match: { hits.hits.2._source.nested_array.0.b: 40 }
+  - match: { hits.hits.2._source.nested_array.1.a: 300 }
+  - match: { hits.hits.2._source.nested_array.1.b: 400 }
+  - match: { hits.hits.2._source.other.value: C }
+  - is_false: hits.hits.3._source.name
+  - match: { hits.hits.3._source.nested_field.a: 7 }
+  - match: { hits.hits.3._source.nested_field.b: 8 }
+  - match: { hits.hits.3._source.nested_array.0.a: 70 }
+  - match: { hits.hits.3._source.nested_array.0.b: 80 }
+  - match: { hits.hits.3._source.nested_array.1.a: 700 }
+  - match: { hits.hits.3._source.nested_array.1.b: 800 }
+  - match: { hits.hits.3._source.other.value: D }
+
+
+---
+sort doc on nested field:
+  - requires:
+      cluster_features: [ "mapper.index_sorting_on_nested" ]
+      reason: uses index sorting on nested fields
+  - do:
+      catch: /cannot apply index sort to field \[nested_field\.foo\] under nested object \[nested_field\]/
+      indices.create:
+        index: test
+        body:
+          settings:
+            index.sort.field: nested_field.foo
+            index.sort.mode: min
+          mappings:
+            properties:
+              name:
+                type: keyword
+              nested_field:
+                type: nested
+                properties:
+                  foo:
+                    type: keyword
+                  bar:
+                    type: keyword

+ 4 - 6
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml

@@ -392,6 +392,7 @@ override sort mode settings:
 ---
 override sort field using nested field type in sorting:
   - requires:
+      cluster_features: ["mapper.index_sorting_on_nested"]
       test_runner_features: [ capabilities ]
       capabilities:
         - method: PUT
@@ -433,11 +434,12 @@ override sort field using nested field type in sorting:
 
   - match: { error.root_cause.0.type: "illegal_argument_exception" }
   - match: { error.type: "illegal_argument_exception" }
-  - match: { error.reason: "cannot have nested fields when index sort is activated" }
+  - match: { error.reason: "cannot apply index sort to field [nested] under nested object [nested]" }
 
 ---
 override sort field using nested field type:
   - requires:
+      cluster_features: ["mapper.index_sorting_on_nested"]
       test_runner_features: [ capabilities ]
       capabilities:
         - method: PUT
@@ -446,7 +448,6 @@ override sort field using nested field type:
       reason: "Support for 'logs' index mode capability required"
 
   - do:
-      catch: bad_request
       indices.create:
         index: test-nested
         body:
@@ -474,10 +475,7 @@ override sort field using nested field type:
                 properties:
                   keywords:
                     type: keyword
-
-  - match: { error.root_cause.0.type: "illegal_argument_exception" }
-  - match: { error.type: "illegal_argument_exception" }
-  - match: { error.reason: "cannot have nested fields when index sort is activated" }
+  - is_false: error
 
 ---
 routing path not allowed in logs mode:

+ 2 - 1
server/src/main/java/org/elasticsearch/index/IndexService.java

@@ -525,7 +525,8 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust
                 this.indexSettings,
                 directory,
                 lock,
-                new StoreCloseListener(shardId, () -> eventListener.onStoreClosed(shardId))
+                new StoreCloseListener(shardId, () -> eventListener.onStoreClosed(shardId)),
+                this.indexSettings.getIndexSortConfig().hasIndexSort()
             );
             eventListener.onStoreCreated(shardId);
             indexShard = new IndexShard(

+ 1 - 0
server/src/main/java/org/elasticsearch/index/IndexVersions.java

@@ -111,6 +111,7 @@ public class IndexVersions {
     public static final IndexVersion UNIQUE_TOKEN_FILTER_POS_FIX = def(8_509_00_0, Version.LUCENE_9_11_0);
     public static final IndexVersion ADD_SECURITY_MIGRATION = def(8_510_00_0, Version.LUCENE_9_11_0);
     public static final IndexVersion UPGRADE_TO_LUCENE_9_11_1 = def(8_511_00_0, Version.LUCENE_9_11_1);
+    public static final IndexVersion INDEX_SORTING_ON_NESTED = def(8_512_00_0, Version.LUCENE_9_11_1);
     /*
      * STOP! READ THIS FIRST! No, really,
      *        ____ _____ ___  ____  _        ____  _____    _    ____    _____ _   _ ___ ____    _____ ___ ____  ____ _____ _

+ 1 - 0
server/src/main/java/org/elasticsearch/index/engine/Engine.java

@@ -128,6 +128,7 @@ public abstract class Engine implements Closeable {
     public static final String CAN_MATCH_SEARCH_SOURCE = "can_match";
     protected static final String DOC_STATS_SOURCE = "doc_stats";
     public static final long UNKNOWN_PRIMARY_TERM = -1L;
+    public static final String ROOT_DOC_FIELD_NAME = "__root_doc_for_nested";
 
     protected final ShardId shardId;
     protected final Logger logger;

+ 5 - 0
server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

@@ -72,6 +72,7 @@ import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexMode;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.IndexVersion;
+import org.elasticsearch.index.IndexVersions;
 import org.elasticsearch.index.VersionType;
 import org.elasticsearch.index.cache.query.TrivialQueryCachingPolicy;
 import org.elasticsearch.index.mapper.DocumentParser;
@@ -2728,6 +2729,10 @@ public class InternalEngine extends Engine {
         }
         if (config().getIndexSort() != null) {
             iwc.setIndexSort(config().getIndexSort());
+            if (config().getIndexSettings().getIndexVersionCreated().onOrAfter(IndexVersions.INDEX_SORTING_ON_NESTED)) {
+                // Needed to support index sorting in the presence of nested objects.
+                iwc.setParentField(ROOT_DOC_FIELD_NAME);
+            }
         }
         // Provide a custom leaf sorter, so that index readers opened from this writer
         // will have its leaves sorted according the given leaf sorter.

+ 18 - 1
server/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java

@@ -9,7 +9,9 @@
 package org.elasticsearch.index.mapper;
 
 import org.elasticsearch.common.compress.CompressedXContent;
+import org.elasticsearch.features.NodeFeature;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.IndexSortConfig;
 import org.elasticsearch.index.IndexVersion;
 import org.elasticsearch.index.IndexVersions;
 
@@ -21,6 +23,9 @@ public class DocumentMapper {
     private final MappingLookup mappingLookup;
     private final DocumentParser documentParser;
     private final MapperMetrics mapperMetrics;
+    private final IndexVersion indexVersion;
+
+    static final NodeFeature INDEX_SORTING_ON_NESTED = new NodeFeature("mapper.index_sorting_on_nested");
 
     /**
      * Create a new {@link DocumentMapper} that holds empty mappings.
@@ -54,6 +59,7 @@ public class DocumentMapper {
         this.mappingLookup = MappingLookup.fromMapping(mapping);
         this.mappingSource = source;
         this.mapperMetrics = mapperMetrics;
+        this.indexVersion = version;
 
         assert mapping.toCompressedXContent().equals(source) || isSyntheticSourceMalformed(source, version)
             : "provided source [" + source + "] differs from mapping [" + mapping.toCompressedXContent() + "]";
@@ -134,7 +140,18 @@ public class DocumentMapper {
         }
 
         if (settings.getIndexSortConfig().hasIndexSort() && mappers().nestedLookup() != NestedLookup.EMPTY) {
-            throw new IllegalArgumentException("cannot have nested fields when index sort is activated");
+            if (indexVersion.before(IndexVersions.INDEX_SORTING_ON_NESTED)) {
+                throw new IllegalArgumentException("cannot have nested fields when index sort is activated");
+            }
+            for (String field : settings.getValue(IndexSortConfig.INDEX_SORT_FIELD_SETTING)) {
+                for (NestedObjectMapper nestedObjectMapper : mappers().nestedLookup().getNestedMappers().values()) {
+                    if (field.startsWith(nestedObjectMapper.fullPath())) {
+                        throw new IllegalArgumentException(
+                            "cannot apply index sort to field [" + field + "] under nested object [" + nestedObjectMapper.fullPath() + "]"
+                        );
+                    }
+                }
+            }
         }
         List<String> routingPaths = settings.getIndexMetadata().getRoutingPaths();
         for (String path : routingPaths) {

+ 2 - 1
server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

@@ -26,7 +26,8 @@ public class MapperFeatures implements FeatureSpecification {
             RangeFieldMapper.NULL_VALUES_OFF_BY_ONE_FIX,
             SourceFieldMapper.SYNTHETIC_SOURCE_FALLBACK,
             DenseVectorFieldMapper.INT4_QUANTIZATION,
-            DenseVectorFieldMapper.BIT_VECTORS
+            DenseVectorFieldMapper.BIT_VECTORS,
+            DocumentMapper.INDEX_SORTING_ON_NESTED
         );
     }
 }

+ 5 - 0
server/src/main/java/org/elasticsearch/index/shard/StoreRecovery.java

@@ -36,6 +36,7 @@ import org.elasticsearch.core.Releasables;
 import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.index.Index;
 import org.elasticsearch.index.IndexVersion;
+import org.elasticsearch.index.IndexVersions;
 import org.elasticsearch.index.engine.Engine;
 import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.seqno.SequenceNumbers;
@@ -197,6 +198,10 @@ public final class StoreRecovery {
             .setIndexCreatedVersionMajor(luceneIndexCreatedVersionMajor);
         if (indexSort != null) {
             iwc.setIndexSort(indexSort);
+            if (indexMetadata != null && indexMetadata.getCreationVersion().onOrAfter(IndexVersions.INDEX_SORTING_ON_NESTED)) {
+                // Needed to support index sorting in the presence of nested objects.
+                iwc.setParentField(Engine.ROOT_DOC_FIELD_NAME);
+            }
         }
 
         try (IndexWriter writer = new IndexWriter(new StatsDirectoryWrapper(hardLinkOrCopyTarget, indexRecoveryStats), iwc)) {

+ 20 - 6
server/src/main/java/org/elasticsearch/index/store/Store.java

@@ -155,12 +155,20 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
     private final OnClose onClose;
 
     private final AbstractRefCounted refCounter = AbstractRefCounted.of(this::closeInternal); // close us once we are done
+    private boolean hasIndexSort;
 
     public Store(ShardId shardId, IndexSettings indexSettings, Directory directory, ShardLock shardLock) {
-        this(shardId, indexSettings, directory, shardLock, OnClose.EMPTY);
+        this(shardId, indexSettings, directory, shardLock, OnClose.EMPTY, false);
     }
 
-    public Store(ShardId shardId, IndexSettings indexSettings, Directory directory, ShardLock shardLock, OnClose onClose) {
+    public Store(
+        ShardId shardId,
+        IndexSettings indexSettings,
+        Directory directory,
+        ShardLock shardLock,
+        OnClose onClose,
+        boolean hasIndexSort
+    ) {
         super(shardId, indexSettings);
         this.directory = new StoreDirectory(
             byteSizeDirectory(directory, indexSettings, logger),
@@ -168,6 +176,7 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
         );
         this.shardLock = shardLock;
         this.onClose = onClose;
+        this.hasIndexSort = hasIndexSort;
 
         assert onClose != null;
         assert shardLock != null;
@@ -1541,20 +1550,25 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
         return userData;
     }
 
-    private static IndexWriter newTemporaryAppendingIndexWriter(final Directory dir, final IndexCommit commit) throws IOException {
+    private IndexWriter newTemporaryAppendingIndexWriter(final Directory dir, final IndexCommit commit) throws IOException {
         IndexWriterConfig iwc = newTemporaryIndexWriterConfig().setIndexCommit(commit).setOpenMode(IndexWriterConfig.OpenMode.APPEND);
         return new IndexWriter(dir, iwc);
     }
 
-    private static IndexWriter newTemporaryEmptyIndexWriter(final Directory dir, final Version luceneVersion) throws IOException {
+    private IndexWriter newTemporaryEmptyIndexWriter(final Directory dir, final Version luceneVersion) throws IOException {
         IndexWriterConfig iwc = newTemporaryIndexWriterConfig().setOpenMode(IndexWriterConfig.OpenMode.CREATE)
             .setIndexCreatedVersionMajor(luceneVersion.major);
         return new IndexWriter(dir, iwc);
     }
 
-    private static IndexWriterConfig newTemporaryIndexWriterConfig() {
+    private IndexWriterConfig newTemporaryIndexWriterConfig() {
         // this config is only used for temporary IndexWriter instances, used to initialize the index or update the commit data,
         // so we don't want any merges to happen
-        return indexWriterConfigWithNoMerging(null).setSoftDeletesField(Lucene.SOFT_DELETES_FIELD).setCommitOnClose(false);
+        var iwc = indexWriterConfigWithNoMerging(null).setSoftDeletesField(Lucene.SOFT_DELETES_FIELD).setCommitOnClose(false);
+        if (hasIndexSort && indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.INDEX_SORTING_ON_NESTED)) {
+            // Needed to support index sorting in the presence of nested objects.
+            iwc.setParentField(Engine.ROOT_DOC_FIELD_NAME);
+        }
+        return iwc;
     }
 }

+ 30 - 5
server/src/test/java/org/elasticsearch/index/mapper/MapperServiceTests.java

@@ -125,27 +125,52 @@ public class MapperServiceTests extends MapperServiceTestCase {
     }
 
     public void testIndexSortWithNestedFields() throws IOException {
-        Settings settings = Settings.builder().put("index.sort.field", "foo").build();
+        IndexVersion oldVersion = IndexVersionUtils.getPreviousVersion(IndexVersions.INDEX_SORTING_ON_NESTED);
         IllegalArgumentException invalidNestedException = expectThrows(
             IllegalArgumentException.class,
-            () -> createMapperService(settings, mapping(b -> {
+            () -> createMapperService(oldVersion, settings(oldVersion).put("index.sort.field", "foo").build(), () -> true, mapping(b -> {
                 b.startObject("nested_field").field("type", "nested").endObject();
                 b.startObject("foo").field("type", "keyword").endObject();
             }))
         );
 
-        assertThat(invalidNestedException.getMessage(), containsString("cannot have nested fields when index sort is activated"));
+        Settings settings = settings(IndexVersions.INDEX_SORTING_ON_NESTED).put("index.sort.field", "foo").build();
+        DocumentMapper mapper = createMapperService(settings, mapping(b -> {
+            b.startObject("nested_field").field("type", "nested").endObject();
+            b.startObject("foo").field("type", "keyword").endObject();
+        })).documentMapper();
+
+        List<LuceneDocument> docs = mapper.parse(source(b -> {
+            b.field("name", "foo");
+            b.startObject("nested_field").field("foo", "bar").endObject();
+        })).docs();
+        assertEquals(2, docs.size());
+        assertEquals(docs.get(1), docs.get(0).getParent());
 
         MapperService mapperService = createMapperService(
             settings,
             mapping(b -> b.startObject("foo").field("type", "keyword").endObject())
         );
-        invalidNestedException = expectThrows(IllegalArgumentException.class, () -> merge(mapperService, mapping(b -> {
+        merge(mapperService, mapping(b -> {
             b.startObject("nested_field");
             b.field("type", "nested");
             b.endObject();
+        }));
+
+        Settings settings2 = Settings.builder().put("index.sort.field", "foo.bar").build();
+        invalidNestedException = expectThrows(IllegalArgumentException.class, () -> createMapperService(settings2, mapping(b -> {
+            b.startObject("foo");
+            {
+                b.field("type", "nested");
+                b.startObject("properties");
+                {
+                    b.startObject("bar").field("type", "keyword").endObject();
+                }
+                b.endObject();
+            }
+            b.endObject();
         })));
-        assertThat(invalidNestedException.getMessage(), containsString("cannot have nested fields when index sort is activated"));
+        assertEquals("cannot apply index sort to field [foo.bar] under nested object [foo]", invalidNestedException.getMessage());
     }
 
     public void testFieldAliasWithMismatchedNestedScope() throws Throwable {

+ 1 - 1
server/src/test/java/org/elasticsearch/index/store/StoreTests.java

@@ -744,7 +744,7 @@ public class StoreTests extends ESTestCase {
             assertEquals(shardId, theLock.getShardId());
             assertEquals(lock, theLock);
             count.incrementAndGet();
-        });
+        }, false);
         assertEquals(count.get(), 0);
 
         final int iters = randomIntBetween(1, 10);

+ 1 - 1
x-pack/plugin/core/src/main/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotRepository.java

@@ -173,7 +173,7 @@ public final class SourceOnlySnapshotRepository extends FilterRepository {
                 protected void closeInternal() {
                     // do nothing;
                 }
-            }, Store.OnClose.EMPTY);
+            }, Store.OnClose.EMPTY, mapperService.getIndexSettings().getIndexSortConfig().hasIndexSort());
             Supplier<Query> querySupplier = mapperService.hasNested()
                 ? () -> Queries.newNestedFilter(mapperService.getIndexSettings().getIndexVersionCreated())
                 : null;