Browse Source

Optimize indexing for disabled keyword field (#95640)

Small optimization for indexing disabled keyword field.
If a keyword field is disabled (no indexing, no docvalues, no store)
return earlier without unnecessary processing (e.g. creating BytesRef).

This is useful if keyword field is used just as Key/Value store, where
we don't want ES to do any processing of Value except storing it
in source.
Mayya Sharipova 2 years ago
parent
commit
8ddb03937e

+ 27 - 27
server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

@@ -866,6 +866,10 @@ public final class KeywordFieldMapper extends FieldMapper {
         if (value == null) {
             return;
         }
+        // if field is disabled, skip indexing
+        if ((fieldType.indexOptions() == IndexOptions.NONE) && (fieldType.stored() == false) && (fieldType().hasDocValues() == false)) {
+            return;
+        }
 
         if (value.length() > fieldType().ignoreAbove()) {
             context.addIgnoredField(name());
@@ -885,35 +889,31 @@ public final class KeywordFieldMapper extends FieldMapper {
             context.getDimensions().addString(fieldType().name(), binaryValue);
         }
 
-        if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored() || fieldType().hasDocValues()) {
-            // If the UTF8 encoding of the field value is bigger than the max length 32766, Lucene fill fail the indexing request and, to
-            // roll back the changes, will mark the (possibly partially indexed) document as deleted. This results in deletes, even in an
-            // append-only workload, which in turn leads to slower merges, as these will potentially have to fall back to MergeStrategy.DOC
-            // instead of MergeStrategy.BULK. To avoid this, we do a preflight check here before indexing the document into Lucene.
-            if (binaryValue.length > MAX_TERM_LENGTH) {
-                byte[] prefix = new byte[30];
-                System.arraycopy(binaryValue.bytes, binaryValue.offset, prefix, 0, 30);
-                String msg = "Document contains at least one immense term in field=\""
-                    + fieldType().name()
-                    + "\" (whose "
-                    + "UTF8 encoding is longer than the max length "
-                    + MAX_TERM_LENGTH
-                    + "), all of which were "
-                    + "skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense "
-                    + "term is: '"
-                    + Arrays.toString(prefix)
-                    + "...'";
-                throw new IllegalArgumentException(msg);
-            }
+        // If the UTF8 encoding of the field value is bigger than the max length 32766, Lucene fill fail the indexing request and, to
+        // roll back the changes, will mark the (possibly partially indexed) document as deleted. This results in deletes, even in an
+        // append-only workload, which in turn leads to slower merges, as these will potentially have to fall back to MergeStrategy.DOC
+        // instead of MergeStrategy.BULK. To avoid this, we do a preflight check here before indexing the document into Lucene.
+        if (binaryValue.length > MAX_TERM_LENGTH) {
+            byte[] prefix = new byte[30];
+            System.arraycopy(binaryValue.bytes, binaryValue.offset, prefix, 0, 30);
+            String msg = "Document contains at least one immense term in field=\""
+                + fieldType().name()
+                + "\" (whose "
+                + "UTF8 encoding is longer than the max length "
+                + MAX_TERM_LENGTH
+                + "), all of which were "
+                + "skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense "
+                + "term is: '"
+                + Arrays.toString(prefix)
+                + "...'";
+            throw new IllegalArgumentException(msg);
+        }
 
-            if (fieldType.indexOptions() != IndexOptions.NONE || fieldType().hasDocValues() || fieldType.stored()) {
-                Field field = new KeywordField(fieldType().name(), binaryValue, fieldType);
-                context.doc().add(field);
+        Field field = new KeywordField(fieldType().name(), binaryValue, fieldType);
+        context.doc().add(field);
 
-                if (fieldType().hasDocValues() == false && fieldType.omitNorms()) {
-                    context.addToFieldNames(fieldType().name());
-                }
-            }
+        if (fieldType().hasDocValues() == false && fieldType.omitNorms()) {
+            context.addToFieldNames(fieldType().name());
         }
     }
 

+ 1 - 1
server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java

@@ -627,7 +627,7 @@ public class KeywordFieldMapperTests extends MapperTestCase {
     }
 
     /**
-     * Test that we don't error on exceeding field size if field is neither indexed nor has doc values
+     * Test that we don't error on exceeding field size if field is neither indexed nor has doc values nor stored
      */
     public void testKeywordFieldUtf8LongerThan32766SourceOnly() throws Exception {
         DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> {