소스 검색

fix synthetic _source for sparse _doc_count field (#91769)

If the `_doc_count` field is sparse we were using Lucene incorrectly to
read it's values. This fixes how we interact with the iterator to load
the values.

Closes #91731
Nik Everett 2 년 전
부모
커밋
dcfe6a3253

+ 6 - 0
docs/changelog/91769.yaml

@@ -0,0 +1,6 @@
+pr: 91769
+summary: Fix synthetic `_source` for sparse `_doc_count` field
+area: TSDB
+type: bug
+issues:
+ - 91731

+ 93 - 1
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml

@@ -600,6 +600,7 @@ _doc_count:
             _source:
               mode: synthetic
 
+  # with _doc_count
   - do:
       index:
         index:   test
@@ -608,7 +609,6 @@ _doc_count:
         body:
           _doc_count: 3
           foo: bar
-
   - do:
       get:
         index: test
@@ -623,6 +623,98 @@ _doc_count:
         foo: bar
   - is_false: fields
 
+  # without _doc_count
+  - do:
+      index:
+        index:   test
+        id:      2
+        refresh: true
+        body:
+          foo: baz
+  - do:
+      get:
+        index: test
+        id:    2
+  - match: {_index: "test"}
+  - match: {_id: "2"}
+  - match: {_version: 1}
+  - match: {found: true}
+  - match:
+      _source:
+        foo: baz
+  - is_false: fields
+
+  # without immediately refreshing with _doc_count
+  - do:
+      index:
+        index:   test
+        id:      3
+        body:
+          _doc_count: 3
+          foo: qux
+  - do:
+      get:
+        index: test
+        id:    3
+  - match: {_index: "test"}
+  - match: {_id: "3"}
+  - match: {_version: 1}
+  - match: {found: true}
+  - match:
+      _source:
+        _doc_count: 3
+        foo: qux
+  - is_false: fields
+
+  # without immediately refreshing without _doc_count
+  - do:
+      index:
+        index:   test
+        id:      4
+        body:
+          foo: quux
+  - do:
+      get:
+        index: test
+        id:    4
+  - match: {_index: "test"}
+  - match: {_id: "4"}
+  - match: {_version: 1}
+  - match: {found: true}
+  - match:
+      _source:
+        foo: quux
+  - is_false: fields
+
+  # refresh all at once
+  - do:
+      indices.refresh: {}
+  - do:
+      get:
+        index: test
+        id:    3
+  - match: {_index: "test"}
+  - match: {_id: "3"}
+  - match: {_version: 1}
+  - match: {found: true}
+  - match:
+      _source:
+        _doc_count: 3
+        foo: qux
+  - is_false: fields
+  - do:
+      get:
+        index: test
+        id:    4
+  - match: {_index: "test"}
+  - match: {_id: "4"}
+  - match: {_version: 1}
+  - match: {found: true}
+  - match:
+      _source:
+        foo: quux
+  - is_false: fields
+
 ---
 ip with ignore_malformed:
   - skip:

+ 60 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/400_synthetic_source.yml

@@ -436,3 +436,63 @@ _source filtering:
   - match:
       hits.hits.0._source:
         kwd: foo
+
+---
+_doc_count:
+  - skip:
+      version: " - 8.6.99"
+      reason: bug caused by many not having _doc_count fixed in 8.7.0
+
+  - do:
+      indices.create:
+        index: test
+        body:
+          settings:
+            number_of_replicas: 0
+          mappings:
+            _source:
+              mode: synthetic
+
+  - do:
+      index:
+        index:   test
+        id:      2
+        body:
+          foo: baz
+  - do:
+      index:
+        index:   test
+        id:      3
+        body:
+          foo: baz
+  - do:
+      index:
+        index:   test
+        id:      4
+        body:
+          foo: baz
+  - do:
+      index:
+        index:   test
+        id:      1
+        body:
+          _doc_count: 3
+          foo: bar
+  - do:
+      indices.refresh: {}
+
+  - do:
+      search:
+        index: test
+        body:
+          sort: foo.keyword
+  - is_false: hits.hits.0.fields
+  - is_false: hits.hits.1.fields
+  - match:
+      hits.hits.0._source:
+        _doc_count: 3
+        foo: bar
+  - match:
+      hits.hits.1._source:
+        foo: baz
+

+ 9 - 1
server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java

@@ -155,7 +155,15 @@ public class DocCountFieldMapper extends MetadataFieldMapper {
                 hasValue = false;
                 return null;
             }
-            return docId -> hasValue = docId == postings.advance(docId);
+            return docId -> {
+                if (docId < postings.docID()) {
+                    return hasValue = false;
+                }
+                if (docId == postings.docID()) {
+                    return hasValue = true;
+                }
+                return hasValue = docId == postings.advance(docId);
+            };
         }
 
         @Override

+ 31 - 0
server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java

@@ -110,4 +110,35 @@ public class DocCountFieldMapperTests extends MetadataMapperTestCase {
             }
         });
     }
+
+    public void testSyntheticSourceManyDoNotHave() throws IOException {
+        MapperService mapper = createMapperService(syntheticSourceMapping(b -> b.startObject("doc").field("type", "integer").endObject()));
+        List<Integer> counts = randomList(2, 10000, () -> randomBoolean() ? null : between(1, Integer.MAX_VALUE));
+        withLuceneIndex(mapper, iw -> {
+            int d = 0;
+            for (Integer c : counts) {
+                int doc = d++;
+                iw.addDocument(mapper.documentMapper().parse(source(b -> {
+                    b.field("doc", doc);
+                    if (c != null) {
+                        b.field(CONTENT_TYPE, c);
+                    }
+                })).rootDoc());
+            }
+        }, reader -> {
+            SourceLoader loader = mapper.mappingLookup().newSourceLoader();
+            assertTrue(loader.requiredStoredFields().isEmpty());
+            for (LeafReaderContext leaf : reader.leaves()) {
+                int[] docIds = IntStream.range(0, leaf.reader().maxDoc()).toArray();
+                SourceLoader.Leaf sourceLoaderLeaf = loader.leaf(leaf.reader(), docIds);
+                LeafStoredFieldLoader storedFieldLoader = StoredFieldLoader.empty().getLoader(leaf, docIds);
+                for (int docId : docIds) {
+                    String source = sourceLoaderLeaf.source(storedFieldLoader, docId).internalSourceRef().utf8ToString();
+                    int doc = (int) JsonXContent.jsonXContent.createParser(XContentParserConfiguration.EMPTY, source).map().get("doc");
+                    String docCountPart = counts.get(doc) == null ? "" : "\"_doc_count\":" + counts.get(doc) + ",";
+                    assertThat("doc " + docId, source, equalTo("{" + docCountPart + "\"doc\":" + doc + "}"));
+                }
+            }
+        });
+    }
 }