瀏覽代碼

Small adjustments to metadata fields fetching (#107071)

While looking at #106325, which moves fetching of metadata fields out of the StoredFieldsPhase, I noticed some small adjustments that we can make to FieldsVisitor, CustomFieldsVisitor and StoredFieldsPhase.

These are not functional changes, the only goal is to make things simpler and clearer, hopefully.

- add test coverage for situation where _routing is provided with docs, hence returned by default
make a stronger connection between CustomFieldsVisitor and FieldsVisitor around fields that are treated differently (_ignored, _routing, _id and _source)
- explicitly exclude _id from StoredFieldsPhase like we already do for _source as it's retrieved separately
- move the _source exclusion in StoredFieldsPhase to after calling getMatchingFieldNames, so that patterns that match _source still exclude it
Luca Cavanna 1 年之前
父節點
當前提交
ddb1b7463f

+ 40 - 0
server/src/internalClusterTest/java/org/elasticsearch/search/source/MetadataFetchingIT.java

@@ -8,6 +8,7 @@
 package org.elasticsearch.search.source;
 
 import org.apache.lucene.search.join.ScoreMode;
+import org.elasticsearch.action.get.GetResponse;
 import org.elasticsearch.common.ValidationException;
 import org.elasticsearch.index.query.InnerHitBuilder;
 import org.elasticsearch.index.query.NestedQueryBuilder;
@@ -81,6 +82,11 @@ public class MetadataFetchingIT extends ESIntegTestCase {
         prepareIndex("test").setId("1").setSource("field", "value").setRouting("toto").get();
         refresh();
 
+        assertResponse(prepareSearch("test"), response -> {
+            assertThat(response.getHits().getAt(0).getId(), notNullValue());
+            assertThat(response.getHits().getAt(0).field("_routing"), notNullValue());
+            assertThat(response.getHits().getAt(0).getSourceAsString(), notNullValue());
+        });
         assertResponse(prepareSearch("test").storedFields("_none_").setFetchSource(false), response -> {
             assertThat(response.getHits().getAt(0).getId(), nullValue());
             assertThat(response.getHits().getAt(0).field("_routing"), nullValue());
@@ -90,6 +96,40 @@ public class MetadataFetchingIT extends ESIntegTestCase {
             assertThat(response.getHits().getAt(0).getId(), nullValue());
             assertThat(response.getHits().getAt(0).getSourceAsString(), nullValue());
         });
+
+        GetResponse getResponse = client().prepareGet("test", "1").setRouting("toto").get();
+        assertTrue(getResponse.isExists());
+        assertEquals("toto", getResponse.getFields().get("_routing").getValue());
+    }
+
+    public void testWithIgnored() {
+        assertAcked(prepareCreate("test").setMapping("ip", "type=ip,ignore_malformed=true"));
+        ensureGreen();
+
+        prepareIndex("test").setId("1").setSource("ip", "value").get();
+        refresh();
+
+        assertResponse(prepareSearch("test"), response -> {
+            assertThat(response.getHits().getAt(0).getId(), notNullValue());
+            assertThat(response.getHits().getAt(0).field("_ignored").getValue(), equalTo("ip"));
+            assertThat(response.getHits().getAt(0).getSourceAsString(), notNullValue());
+        });
+        assertResponse(prepareSearch("test").storedFields("_none_"), response -> {
+            assertThat(response.getHits().getAt(0).getId(), nullValue());
+            assertThat(response.getHits().getAt(0).field("_ignored"), nullValue());
+            assertThat(response.getHits().getAt(0).getSourceAsString(), nullValue());
+        });
+
+        {
+            GetResponse getResponse = client().prepareGet("test", "1").get();
+            assertTrue(getResponse.isExists());
+            assertThat(getResponse.getField("_ignored"), nullValue());
+        }
+        {
+            GetResponse getResponse = client().prepareGet("test", "1").setStoredFields("_ignored").get();
+            assertTrue(getResponse.isExists());
+            assertEquals("ip", getResponse.getField("_ignored").getValue());
+        }
     }
 
     public void testInvalid() {

+ 9 - 8
server/src/main/java/org/elasticsearch/index/fieldvisitor/CustomFieldsVisitor.java

@@ -8,26 +8,27 @@
 package org.elasticsearch.index.fieldvisitor;
 
 import org.apache.lucene.index.FieldInfo;
+import org.elasticsearch.index.mapper.IgnoredFieldMapper;
 
 import java.util.HashSet;
-import java.util.List;
 import java.util.Set;
 
 /**
- * A field visitor that allows to load a selection of the stored fields by exact name
- * {@code _id} and {@code _routing} fields are always loaded.
+ * A field visitor that allows to load a selection of the stored fields by exact name.
+ * {@code _id}, {@code _routing}, and {@code _ignored} fields are always loaded.
+ * {@code _source} is always loaded unless disabled explicitly.
  */
 public class CustomFieldsVisitor extends FieldsVisitor {
-
     private final Set<String> fields;
 
     public CustomFieldsVisitor(Set<String> fields, boolean loadSource) {
         super(loadSource);
         this.fields = new HashSet<>(fields);
-        // metadata fields are already handled by FieldsVisitor, so removing
-        // them here means that if the only fields requested are metadata
-        // fields then we can shortcut loading
-        List.of("_id", "_routing", "_source").forEach(this.fields::remove);
+        // metadata fields that are always retrieved are already handled by FieldsVisitor, so removing
+        // them here means that if the only fields requested are those metadata fields then we can shortcut loading
+        FieldsVisitor.BASE_REQUIRED_FIELDS.forEach(this.fields::remove);
+        this.fields.remove(this.sourceFieldName);
+        this.fields.remove(IgnoredFieldMapper.NAME);
     }
 
     @Override

+ 9 - 17
server/src/main/java/org/elasticsearch/index/fieldvisitor/FieldsVisitor.java

@@ -34,10 +34,10 @@ import static java.util.Collections.emptyMap;
  * Base {@link StoredFieldVisitor} that retrieves all non-redundant metadata.
  */
 public class FieldsVisitor extends FieldNamesProvidingStoredFieldsVisitor {
-    private static final Set<String> BASE_REQUIRED_FIELDS = Set.of(IdFieldMapper.NAME, RoutingFieldMapper.NAME);
+    static final Set<String> BASE_REQUIRED_FIELDS = Set.of(IdFieldMapper.NAME, RoutingFieldMapper.NAME);
 
     private final boolean loadSource;
-    private final String sourceFieldName;
+    final String sourceFieldName;
     private final Set<String> requiredFields;
     protected BytesReference source;
     protected String id;
@@ -63,6 +63,7 @@ public class FieldsVisitor extends FieldNamesProvidingStoredFieldsVisitor {
         // Always load _ignored to be explicit about ignored fields
         // This works because _ignored is added as the first metadata mapper,
         // so its stored fields always appear first in the list.
+        // Note that _ignored is also multi-valued, which is why it can't be removed from the set like other fields
         if (IgnoredFieldMapper.NAME.equals(fieldInfo.name)) {
             return Status.YES;
         }
@@ -72,8 +73,7 @@ public class FieldsVisitor extends FieldNamesProvidingStoredFieldsVisitor {
                 return Status.YES;
             }
         }
-        // All these fields are single-valued so we can stop when the set is
-        // empty
+        // All these fields are single-valued so we can stop when the set is empty
         return requiredFields.isEmpty() ? Status.STOP : Status.NO;
     }
 
@@ -100,7 +100,7 @@ public class FieldsVisitor extends FieldNamesProvidingStoredFieldsVisitor {
         binaryField(fieldInfo, new BytesRef(value));
     }
 
-    public void binaryField(FieldInfo fieldInfo, BytesRef value) {
+    private void binaryField(FieldInfo fieldInfo, BytesRef value) {
         if (sourceFieldName.equals(fieldInfo.name)) {
             source = new BytesArray(value);
         } else if (IdFieldMapper.NAME.equals(fieldInfo.name)) {
@@ -147,12 +147,6 @@ public class FieldsVisitor extends FieldNamesProvidingStoredFieldsVisitor {
         addValue(fieldInfo.name, value);
     }
 
-    public void objectField(FieldInfo fieldInfo, Object object) {
-        assert IdFieldMapper.NAME.equals(fieldInfo.name) == false : "_id field must go through binaryField";
-        assert sourceFieldName.equals(fieldInfo.name) == false : "source field must go through binaryField";
-        addValue(fieldInfo.name, object);
-    }
-
     public BytesReference source() {
         return source;
     }
@@ -178,7 +172,9 @@ public class FieldsVisitor extends FieldNamesProvidingStoredFieldsVisitor {
     }
 
     public void reset() {
-        if (fieldsValues != null) fieldsValues.clear();
+        if (fieldsValues != null) {
+            fieldsValues.clear();
+        }
         source = null;
         id = null;
 
@@ -193,11 +189,7 @@ public class FieldsVisitor extends FieldNamesProvidingStoredFieldsVisitor {
             fieldsValues = new HashMap<>();
         }
 
-        List<Object> values = fieldsValues.get(name);
-        if (values == null) {
-            values = new ArrayList<>(2);
-            fieldsValues.put(name, values);
-        }
+        List<Object> values = fieldsValues.computeIfAbsent(name, k -> new ArrayList<>(2));
         values.add(value);
     }
 }

+ 13 - 9
server/src/main/java/org/elasticsearch/search/fetch/subphase/StoredFieldsPhase.java

@@ -10,6 +10,7 @@ package org.elasticsearch.search.fetch.subphase;
 
 import org.apache.lucene.index.LeafReaderContext;
 import org.elasticsearch.common.document.DocumentField;
+import org.elasticsearch.index.mapper.IdFieldMapper;
 import org.elasticsearch.index.mapper.IgnoredFieldMapper;
 import org.elasticsearch.index.mapper.LegacyTypeFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
@@ -73,16 +74,19 @@ public class StoredFieldsPhase implements FetchSubPhase {
         if (storedFieldsContext.fieldNames() != null) {
             SearchExecutionContext sec = fetchContext.getSearchExecutionContext();
             for (String field : storedFieldsContext.fieldNames()) {
-                if (SourceFieldMapper.NAME.equals(field) == false) {
-                    Collection<String> fieldNames = sec.getMatchingFieldNames(field);
-                    for (String fieldName : fieldNames) {
-                        MappedFieldType ft = sec.getFieldType(fieldName);
-                        if (ft.isStored() == false) {
-                            continue;
-                        }
-                        storedFields.add(new StoredField(fieldName, ft, sec.isMetadataField(ft.name())));
-                        fieldsToLoad.add(ft.name());
+                Collection<String> fieldNames = sec.getMatchingFieldNames(field);
+                for (String fieldName : fieldNames) {
+                    // _id and _source are always retrieved anyway, no need to do it explicitly. See FieldsVisitor.
+                    // They are not returned as part of HitContext#loadedFields hence they are not added to documents by this sub-phase
+                    if (IdFieldMapper.NAME.equals(field) || SourceFieldMapper.NAME.equals(field)) {
+                        continue;
+                    }
+                    MappedFieldType ft = sec.getFieldType(fieldName);
+                    if (ft.isStored() == false) {
+                        continue;
                     }
+                    storedFields.add(new StoredField(fieldName, ft, sec.isMetadataField(ft.name())));
+                    fieldsToLoad.add(ft.name());
                 }
             }
         }