Browse Source

Index sequence numbers via a single Lucene field. (#94504)

Following a similar approach yielded indexing speedups in Lucene benchmarks.
Furthermore, it arguably makes things simpler.
Adrien Grand 2 years ago
parent
commit
e093dda177

+ 5 - 0
docs/changelog/94504.yaml

@@ -0,0 +1,5 @@
+pr: 94504
+summary: Index sequence numbers via a single Lucene field
+area: Mapping
+type: enhancement
+issues: []

+ 3 - 3
modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java

@@ -670,7 +670,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
                     XContentType.JSON
                 )
             );
-        assertThat(doc.rootDoc().getFields().size(), equalTo(16)); // also includes all other meta fields
+        assertThat(doc.rootDoc().getFields().size(), equalTo(15)); // also includes all other meta fields
         BytesRef queryBuilderAsBytes = doc.rootDoc().getField("query_field1.query_builder_field").binaryValue();
         assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
 
@@ -711,7 +711,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
                     XContentType.JSON
                 )
             );
-        assertThat(doc.rootDoc().getFields().size(), equalTo(11)); // also includes all other meta fields
+        assertThat(doc.rootDoc().getFields().size(), equalTo(10)); // also includes all other meta fields
         IndexableField queryBuilderField = doc.rootDoc().getField("object_field.query_field.query_builder_field");
         assertTrue(queryBuilderField.fieldType().omitNorms());
         IndexableField extractionResultField = doc.rootDoc().getField("object_field.query_field.extraction_result");
@@ -735,7 +735,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
                     XContentType.JSON
                 )
             );
-        assertThat(doc.rootDoc().getFields().size(), equalTo(11)); // also includes all other meta fields
+        assertThat(doc.rootDoc().getFields().size(), equalTo(10)); // also includes all other meta fields
         queryBuilderAsBytes = doc.rootDoc().getField("object_field.query_field.query_builder_field").binaryValue();
         assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
 

+ 46 - 11
server/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java

@@ -9,11 +9,14 @@
 package org.elasticsearch.index.mapper;
 
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.NumericUtils;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.index.fielddata.FieldDataContext;
 import org.elasticsearch.index.fielddata.IndexFieldData;
@@ -44,6 +47,46 @@ import java.util.Objects;
  */
 public class SeqNoFieldMapper extends MetadataFieldMapper {
 
+    // Like Lucene's LongField but single-valued (NUMERIC doc values instead of SORTED_NUMERIC doc values)
+    private static class SingleValueLongField extends Field {
+
+        private static final FieldType FIELD_TYPE = new FieldType();
+        static {
+            FIELD_TYPE.setDimensions(1, Long.BYTES);
+            FIELD_TYPE.setDocValuesType(DocValuesType.NUMERIC);
+        }
+
+        private final BytesRef pointValue;
+
+        SingleValueLongField(String field, long value) {
+            super(field, FIELD_TYPE);
+            fieldsData = value;
+            pointValue = new BytesRef(new byte[Long.BYTES]);
+            assert pointValue.offset == 0;
+            assert pointValue.length == Long.BYTES;
+            NumericUtils.longToSortableBytes((Long) fieldsData, pointValue.bytes, 0);
+        }
+
+        @Override
+        public BytesRef binaryValue() {
+            return pointValue;
+        }
+
+        @Override
+        public void setLongValue(long value) {
+            super.setLongValue(value);
+            assert pointValue.offset == 0;
+            assert pointValue.length == Long.BYTES;
+            NumericUtils.longToSortableBytes((Long) fieldsData, pointValue.bytes, 0);
+        }
+
+        @Override
+        public String toString() {
+            return getClass().getSimpleName() + " <" + name + ':' + fieldsData + '>';
+        }
+
+    }
+
     /**
      * A sequence ID, which is made up of a sequence number (both the searchable
      * and doc_value version of the field) and the primary term.
@@ -51,23 +94,19 @@ public class SeqNoFieldMapper extends MetadataFieldMapper {
     public static class SequenceIDFields {
 
         private final Field seqNo;
-        private final Field seqNoDocValue;
         private final Field primaryTerm;
         private final Field tombstoneField;
 
-        private SequenceIDFields(Field seqNo, Field seqNoDocValue, Field primaryTerm, Field tombstoneField) {
+        private SequenceIDFields(Field seqNo, Field primaryTerm, Field tombstoneField) {
             Objects.requireNonNull(seqNo, "sequence number field cannot be null");
-            Objects.requireNonNull(seqNoDocValue, "sequence number dv field cannot be null");
             Objects.requireNonNull(primaryTerm, "primary term field cannot be null");
             this.seqNo = seqNo;
-            this.seqNoDocValue = seqNoDocValue;
             this.primaryTerm = primaryTerm;
             this.tombstoneField = tombstoneField;
         }
 
         public void addFields(LuceneDocument document) {
             document.add(seqNo);
-            document.add(seqNoDocValue);
             document.add(primaryTerm);
             if (tombstoneField != null) {
                 document.add(tombstoneField);
@@ -80,7 +119,6 @@ public class SeqNoFieldMapper extends MetadataFieldMapper {
          */
         public void set(long seqNo, long primaryTerm) {
             this.seqNo.setLongValue(seqNo);
-            this.seqNoDocValue.setLongValue(seqNo);
             this.primaryTerm.setLongValue(primaryTerm);
         }
 
@@ -90,8 +128,7 @@ public class SeqNoFieldMapper extends MetadataFieldMapper {
          */
         public static SequenceIDFields emptySeqID() {
             return new SequenceIDFields(
-                new LongPoint(NAME, SequenceNumbers.UNASSIGNED_SEQ_NO),
-                new NumericDocValuesField(NAME, SequenceNumbers.UNASSIGNED_SEQ_NO),
+                new SingleValueLongField(NAME, SequenceNumbers.UNASSIGNED_SEQ_NO),
                 new NumericDocValuesField(PRIMARY_TERM_NAME, 0),
                 null
             );
@@ -99,8 +136,7 @@ public class SeqNoFieldMapper extends MetadataFieldMapper {
 
         public static SequenceIDFields tombstone() {
             return new SequenceIDFields(
-                new LongPoint(NAME, SequenceNumbers.UNASSIGNED_SEQ_NO),
-                new NumericDocValuesField(NAME, SequenceNumbers.UNASSIGNED_SEQ_NO),
+                new SingleValueLongField(NAME, SequenceNumbers.UNASSIGNED_SEQ_NO),
                 new NumericDocValuesField(PRIMARY_TERM_NAME, 0),
                 new NumericDocValuesField(TOMBSTONE_NAME, 1)
             );
@@ -230,7 +266,6 @@ public class SeqNoFieldMapper extends MetadataFieldMapper {
         assert seqID != null;
         for (LuceneDocument doc : context.nonRootDocuments()) {
             doc.add(seqID.seqNo);
-            doc.add(seqID.seqNoDocValue);
         }
     }
 

+ 0 - 2
server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java

@@ -4054,7 +4054,6 @@ public class IndexShardTests extends IndexShardTestCase {
                 IdFieldMapper.NAME,
                 VersionFieldMapper.NAME,
                 SeqNoFieldMapper.NAME,
-                SeqNoFieldMapper.NAME,
                 SeqNoFieldMapper.PRIMARY_TERM_NAME,
                 SeqNoFieldMapper.TOMBSTONE_NAME
             )
@@ -4073,7 +4072,6 @@ public class IndexShardTests extends IndexShardTestCase {
                 SourceFieldMapper.NAME,
                 SeqNoFieldMapper.TOMBSTONE_NAME,
                 SeqNoFieldMapper.NAME,
-                SeqNoFieldMapper.NAME,
                 SeqNoFieldMapper.PRIMARY_TERM_NAME
             )
         );