Browse Source

Deduplicate fetching doc-values fields (#89094)

If a docvalues field matches multiple field patterns, then ES will 
return the value of that doc-values field multiple times. Like fetching
fields from source, we should deduplicate the matching doc-values
fields.
Nhat Nguyen 3 years ago
parent
commit
e3c33e2acd

+ 5 - 0
docs/changelog/89094.yaml

@@ -0,0 +1,5 @@
+pr: 89094
+summary: Deduplicate fetching doc-values fields
+area: Search
+type: bug
+issues: []

+ 1 - 0
rest-api-spec/build.gradle

@@ -80,6 +80,7 @@ tasks.named("yamlRestTestV7CompatTransform").configure { task ->
   task.skipTest("search.aggregation/20_terms/numeric profiler", "The profiler results aren't backwards compatible.")
   task.skipTest("migration/10_get_feature_upgrade_status/Get feature upgrade status", "Awaits backport")
   task.skipTest("search/330_fetch_fields/Test disable source", "Error no longer thrown")
+  task.skipTest("search/240_date_nanos/doc value fields are working as expected across date and date_nanos fields", "Fetching docvalues field multiple times is no longer allowed")
 
   task.replaceValueInMatch("_type", "_doc")
   task.addAllowedWarningRegex("\\[types removal\\].*")

+ 34 - 2
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/240_date_nanos.yml

@@ -91,8 +91,40 @@ setup:
           docvalue_fields:
             - field: date
               format: strict_date_optional_time
+          sort:
+            - date: desc
+
+  - match: { hits.total: 2 }
+  - length: { hits.hits: 2 }
+  - match: { hits.hits.0._id: "date_ns_1" }
+  - match: { hits.hits.1._id: "date_ms_1" }
+  - match: { hits.hits.0.fields.date: [ "2018-10-29T12:12:12.123Z"] }
+  - match: { hits.hits.1.fields.date: [ "2018-10-29T12:12:12.987Z"] }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        index: date*
+        body:
+          docvalue_fields:
             - field: date
               format: epoch_millis
+          sort:
+            - date: desc
+
+  - match: { hits.total: 2 }
+  - length: { hits.hits: 2 }
+  - match: { hits.hits.0._id: "date_ns_1" }
+  - match: { hits.hits.1._id: "date_ms_1" }
+  - match: { hits.hits.0.fields.date: [ "1540815132123.456789" ] }
+  - match: { hits.hits.1.fields.date: [ "1540815132987" ] }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        index: date*
+        body:
+          docvalue_fields:
             - field: date
               format: uuuu-MM-dd'T'HH:mm:ss.SSSSSSSSSX
           sort:
@@ -102,8 +134,8 @@ setup:
   - length: { hits.hits: 2 }
   - match: { hits.hits.0._id: "date_ns_1" }
   - match: { hits.hits.1._id: "date_ms_1" }
-  - match: { hits.hits.0.fields.date: [ "2018-10-29T12:12:12.123Z", "1540815132123.456789", "2018-10-29T12:12:12.123456789Z" ] }
-  - match: { hits.hits.1.fields.date: [ "2018-10-29T12:12:12.987Z", "1540815132987", "2018-10-29T12:12:12.987000000Z" ] }
+  - match: { hits.hits.0.fields.date: [ "2018-10-29T12:12:12.123456789Z" ] }
+  - match: { hits.hits.1.fields.date: [ "2018-10-29T12:12:12.987000000Z" ] }
 
 ---
 "date histogram aggregation with date and date_nanos mapping":

+ 31 - 28
server/src/internalClusterTest/java/org/elasticsearch/search/fields/SearchFieldsIT.java

@@ -866,6 +866,9 @@ public class SearchFieldsIT extends ESIntegTestCase {
             .addDocValueField("boolean_field")
             .addDocValueField("binary_field")
             .addDocValueField("ip_field");
+        if (randomBoolean()) {
+            builder.addDocValueField("*_field");
+        }
         SearchResponse searchResponse = builder.get();
 
         assertThat(searchResponse.getHits().getTotalHits().value, equalTo(1L));
@@ -891,21 +894,21 @@ public class SearchFieldsIT extends ESIntegTestCase {
             )
         );
 
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("byte_field").getValue().toString(), equalTo("1"));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("short_field").getValue().toString(), equalTo("2"));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("integer_field").getValue(), equalTo((Object) 3L));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("long_field").getValue(), equalTo((Object) 4L));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("float_field").getValue(), equalTo((Object) 5.0));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("double_field").getValue(), equalTo((Object) 6.0d));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("byte_field").getValues(), equalTo(List.of(1L)));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("short_field").getValues(), equalTo(List.of(2L)));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("integer_field").getValues(), equalTo(List.of(3L)));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("long_field").getValues(), equalTo(List.of(4L)));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("float_field").getValues(), equalTo(List.of(5.0)));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("double_field").getValues(), equalTo(List.of(6.0d)));
         assertThat(
             searchResponse.getHits().getAt(0).getFields().get("date_field").getValue(),
             equalTo(DateFormatter.forPattern("date_optional_time").format(date))
         );
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("boolean_field").getValue(), equalTo((Object) true));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("text_field").getValue(), equalTo("foo"));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("keyword_field").getValue(), equalTo("foo"));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("binary_field").getValue(), equalTo("KmQ="));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("ip_field").getValue(), equalTo("::1"));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("boolean_field").getValues(), equalTo(List.of(true)));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("text_field").getValues(), equalTo(List.of("foo")));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("keyword_field").getValues(), equalTo(List.of("foo")));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("binary_field").getValues(), equalTo(List.of("KmQ=")));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("ip_field").getValues(), equalTo(List.of("::1")));
 
         builder = client().prepareSearch().setQuery(matchAllQuery()).addDocValueField("*field");
         searchResponse = builder.get();
@@ -933,21 +936,21 @@ public class SearchFieldsIT extends ESIntegTestCase {
             )
         );
 
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("byte_field").getValue().toString(), equalTo("1"));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("short_field").getValue().toString(), equalTo("2"));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("integer_field").getValue(), equalTo((Object) 3L));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("long_field").getValue(), equalTo((Object) 4L));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("float_field").getValue(), equalTo((Object) 5.0));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("double_field").getValue(), equalTo((Object) 6.0d));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("byte_field").getValues(), equalTo(List.of(1L)));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("short_field").getValues(), equalTo(List.of(2L)));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("integer_field").getValues(), equalTo(List.of(3L)));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("long_field").getValues(), equalTo(List.of(4L)));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("float_field").getValues(), equalTo(List.of(5.0)));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("double_field").getValues(), equalTo(List.of(6.0d)));
         assertThat(
             searchResponse.getHits().getAt(0).getFields().get("date_field").getValue(),
             equalTo(DateFormatter.forPattern("date_optional_time").format(date))
         );
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("boolean_field").getValue(), equalTo((Object) true));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("text_field").getValue(), equalTo("foo"));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("keyword_field").getValue(), equalTo("foo"));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("binary_field").getValue(), equalTo("KmQ="));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("ip_field").getValue(), equalTo("::1"));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("boolean_field").getValues(), equalTo(List.of(true)));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("text_field").getValues(), equalTo(List.of("foo")));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("keyword_field").getValues(), equalTo(List.of("foo")));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("binary_field").getValues(), equalTo(List.of("KmQ=")));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("ip_field").getValues(), equalTo(List.of("::1")));
 
         builder = client().prepareSearch()
             .setQuery(matchAllQuery())
@@ -968,12 +971,12 @@ public class SearchFieldsIT extends ESIntegTestCase {
             equalTo(newHashSet("byte_field", "short_field", "integer_field", "long_field", "float_field", "double_field", "date_field"))
         );
 
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("byte_field").getValue(), equalTo("1.0"));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("short_field").getValue(), equalTo("2.0"));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("integer_field").getValue(), equalTo("3.0"));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("long_field").getValue(), equalTo("4.0"));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("float_field").getValue(), equalTo("5.0"));
-        assertThat(searchResponse.getHits().getAt(0).getFields().get("double_field").getValue(), equalTo("6.0"));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("byte_field").getValues(), equalTo(List.of("1.0")));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("short_field").getValues(), equalTo(List.of("2.0")));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("integer_field").getValues(), equalTo(List.of("3.0")));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("long_field").getValues(), equalTo(List.of("4.0")));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("float_field").getValues(), equalTo(List.of("5.0")));
+        assertThat(searchResponse.getHits().getAt(0).getFields().get("double_field").getValues(), equalTo(List.of("6.0")));
         assertThat(
             searchResponse.getHits().getAt(0).getFields().get("date_field").getValue(),
             equalTo(DateFormatter.forPattern("epoch_millis").format(date))

+ 9 - 5
server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchDocValuesContext.java

@@ -10,9 +10,10 @@ package org.elasticsearch.search.fetch.subphase;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.query.SearchExecutionContext;
 
-import java.util.ArrayList;
 import java.util.Collection;
+import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
 
 /**
  * All the required context to pull a field from the doc values.
@@ -23,20 +24,23 @@ import java.util.List;
  */
 public class FetchDocValuesContext {
 
-    private final List<FieldAndFormat> fields = new ArrayList<>();
+    private final Collection<FieldAndFormat> fields;
 
     /**
      * Create a new FetchDocValuesContext using the provided input list.
      * Field patterns containing wildcards are resolved and unmapped fields are filtered out.
      */
     public FetchDocValuesContext(SearchExecutionContext searchExecutionContext, List<FieldAndFormat> fieldPatterns) {
+        // Use Linked HashMap to reserve the fetching order
+        final Map<String, FieldAndFormat> fieldToFormats = new LinkedHashMap<>();
         for (FieldAndFormat field : fieldPatterns) {
             Collection<String> fieldNames = searchExecutionContext.getMatchingFieldNames(field.field);
             for (String fieldName : fieldNames) {
-                fields.add(new FieldAndFormat(fieldName, field.format, field.includeUnmapped));
+                // the last matching field wins
+                fieldToFormats.put(fieldName, new FieldAndFormat(fieldName, field.format, field.includeUnmapped));
             }
         }
-
+        this.fields = fieldToFormats.values();
         int maxAllowedDocvalueFields = searchExecutionContext.getIndexSettings().getMaxDocvalueFields();
         if (fields.size() > maxAllowedDocvalueFields) {
             throw new IllegalArgumentException(
@@ -54,7 +58,7 @@ public class FetchDocValuesContext {
     /**
      * Returns the required docvalue fields.
      */
-    public List<FieldAndFormat> fields() {
+    public Collection<FieldAndFormat> fields() {
         return this.fields;
     }
 }

+ 43 - 0
server/src/test/java/org/elasticsearch/search/SearchServiceTests.java

@@ -79,6 +79,7 @@ import org.elasticsearch.search.builder.PointInTimeBuilder;
 import org.elasticsearch.search.builder.SearchSourceBuilder;
 import org.elasticsearch.search.fetch.FetchSearchResult;
 import org.elasticsearch.search.fetch.ShardFetchRequest;
+import org.elasticsearch.search.fetch.subphase.FieldAndFormat;
 import org.elasticsearch.search.internal.AliasFilter;
 import org.elasticsearch.search.internal.ReaderContext;
 import org.elasticsearch.search.internal.SearchContext;
@@ -126,6 +127,7 @@ import static org.hamcrest.CoreMatchers.instanceOf;
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.CoreMatchers.notNullValue;
 import static org.hamcrest.CoreMatchers.startsWith;
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.not;
 import static org.mockito.Mockito.mock;
 
@@ -592,6 +594,47 @@ public class SearchServiceTests extends ESSingleNodeTestCase {
         }
     }
 
+    public void testDeduplicateDocValuesFields() throws Exception {
+        createIndex("index", Settings.EMPTY, "_doc", "field1", "type=date", "field2", "type=date");
+        client().prepareIndex("index")
+            .setId("1")
+            .setSource("field1", "2022-08-03", "field2", "2022-08-04")
+            .setRefreshPolicy(IMMEDIATE)
+            .get();
+        SearchService service = getInstanceFromNode(SearchService.class);
+        IndicesService indicesService = getInstanceFromNode(IndicesService.class);
+        IndexService indexService = indicesService.indexServiceSafe(resolveIndex("index"));
+        IndexShard indexShard = indexService.getShard(0);
+
+        try (ReaderContext reader = createReaderContext(indexService, indexShard)) {
+            SearchRequest searchRequest = new SearchRequest().allowPartialSearchResults(true);
+            SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
+            searchRequest.source(searchSourceBuilder);
+            searchSourceBuilder.docValueField("f*");
+            if (randomBoolean()) {
+                searchSourceBuilder.docValueField("field*");
+            }
+            if (randomBoolean()) {
+                searchSourceBuilder.docValueField("*2");
+            }
+            ShardSearchRequest request = new ShardSearchRequest(
+                OriginalIndices.NONE,
+                searchRequest,
+                indexShard.shardId(),
+                0,
+                1,
+                new AliasFilter(null, Strings.EMPTY_ARRAY),
+                1.0f,
+                -1,
+                null
+            );
+            try (SearchContext context = service.createContext(reader, request, mock(SearchShardTask.class), randomBoolean())) {
+                Collection<FieldAndFormat> fields = context.docValuesContext().fields();
+                assertThat(fields, containsInAnyOrder(new FieldAndFormat("field1", null), new FieldAndFormat("field2", null)));
+            }
+        }
+    }
+
     /**
      * test that getting more than the allowed number of script_fields throws an exception
      */