Explorar o código

Enable bloom filter for _id field in tsdb indices. (#92115)

Martijn van Groningen %!s(int64=2) %!d(string=hai) anos
pai
achega
fda8ae2e41

+ 5 - 0
docs/changelog/92115.yaml

@@ -0,0 +1,5 @@
+pr: 92115
+summary: Enable bloom filter for `_id` field in tsdb indices
+area: TSDB
+type: enhancement
+issues: []

+ 15 - 5
server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java

@@ -16,6 +16,7 @@ import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
 import org.apache.lucene.codecs.lucene94.Lucene94Codec;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.index.IndexMode;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.codec.bloomfilter.ES85BloomFilterPostingsFormat;
 import org.elasticsearch.index.mapper.IdFieldMapper;
@@ -32,8 +33,8 @@ import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
  * configured for a specific field the default postings or vector format is used.
  */
 public class PerFieldMapperCodec extends Lucene94Codec {
-    private final MapperService mapperService;
 
+    private final MapperService mapperService;
     private final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
     private final ES85BloomFilterPostingsFormat bloomFilterPostingsFormat;
 
@@ -64,10 +65,19 @@ public class PerFieldMapperCodec extends Lucene94Codec {
         return super.getPostingsFormatForField(field);
     }
 
-    private boolean useBloomFilter(String field) {
-        return IdFieldMapper.NAME.equals(field)
-            && mapperService.mappingLookup().isDataStreamTimestampFieldEnabled() == false
-            && IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.get(mapperService.getIndexSettings().getSettings());
+    boolean useBloomFilter(String field) {
+        IndexSettings indexSettings = mapperService.getIndexSettings();
+        if (mapperService.mappingLookup().isDataStreamTimestampFieldEnabled()) {
+            // In case for time series indices, they _id isn't randomly generated,
+            // but based on dimension fields and timestamp field, so during indexing
+            // version/seq_no/term needs to be looked up and having a bloom filter
+            // can speed this up significantly.
+            return indexSettings.getMode() == IndexMode.TIME_SERIES
+                && IdFieldMapper.NAME.equals(field)
+                && IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.get(indexSettings.getSettings());
+        } else {
+            return IdFieldMapper.NAME.equals(field) && IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.get(indexSettings.getSettings());
+        }
     }
 
     @Override

+ 80 - 0
server/src/test/java/org/elasticsearch/index/codec/PerFieldMapperCodecTests.java

@@ -0,0 +1,80 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.codec;
+
+import org.apache.lucene.codecs.lucene94.Lucene94Codec;
+import org.elasticsearch.cluster.metadata.IndexMetadata;
+import org.elasticsearch.common.compress.CompressedXContent;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.MapperTestUtils;
+import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.test.ESTestCase;
+
+import java.io.IOException;
+
+import static org.hamcrest.Matchers.is;
+
+public class PerFieldMapperCodecTests extends ESTestCase {
+
+    public void testUseBloomFilter() throws IOException {
+        PerFieldMapperCodec perFieldMapperCodec = createCodec(false, randomBoolean(), false);
+        assertThat(perFieldMapperCodec.useBloomFilter("_id"), is(true));
+        assertThat(perFieldMapperCodec.useBloomFilter("another_field"), is(false));
+    }
+
+    public void testUseBloomFilterWithTimestampFieldEnabled() throws IOException {
+        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, true, false);
+        assertThat(perFieldMapperCodec.useBloomFilter("_id"), is(true));
+        assertThat(perFieldMapperCodec.useBloomFilter("another_field"), is(false));
+    }
+
+    public void testUseBloomFilterWithTimestampFieldEnabled_noTimeSeriesMode() throws IOException {
+        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, false, false);
+        assertThat(perFieldMapperCodec.useBloomFilter("_id"), is(false));
+    }
+
+    public void testUseBloomFilterWithTimestampFieldEnabled_disableBloomFilter() throws IOException {
+        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, true, true);
+        assertThat(perFieldMapperCodec.useBloomFilter("_id"), is(false));
+        assertWarnings(
+            "[index.bloom_filter_for_id_field.enabled] setting was deprecated in Elasticsearch and will be removed in a future release."
+        );
+    }
+
+    private PerFieldMapperCodec createCodec(boolean timestampField, boolean timeSeries, boolean disableBloomFilter) throws IOException {
+        Settings.Builder settings = Settings.builder();
+        if (timeSeries) {
+            settings.put(IndexSettings.MODE.getKey(), "time_series");
+            settings.put(IndexMetadata.INDEX_ROUTING_PATH.getKey(), "field");
+        }
+        if (disableBloomFilter) {
+            settings.put(IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.getKey(), false);
+        }
+        MapperService mapperService = MapperTestUtils.newMapperService(xContentRegistry(), createTempDir(), settings.build(), "test");
+        if (timestampField) {
+            String mapping = """
+                {
+                    "_data_stream_timestamp": {
+                        "enabled": true
+                    },
+                    "properties": {
+                        "@timestamp": {
+                            "type": "date"
+                        }
+                    }
+                }
+                """;
+            mapperService.merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE);
+        }
+        return new PerFieldMapperCodec(Lucene94Codec.Mode.BEST_SPEED, mapperService, BigArrays.NON_RECYCLING_INSTANCE);
+    }
+
+}