Browse Source

Set a fixed compound file threshold of 1GB. (#92659)

Lucene's `TieredMergePolicy`'s default consists of creating compound files for
segments that exceed 10% of the total index size at the time of writing these
segments. This sort of default makes sense in the context of a single Lucene
index, but much less for a system like Elasticsearch where a node may host
heterogeneous shards: it doesn't make sense to decide to use a coumpound file
for a 1GB segment on one shard because it has 30GB of data, and not on another
shard because it has 5GB of data.

This change does the following:
 - `index.compound_format` now accepts byte size values, in addition to a
   `boolean` or ratio in [0,1].
 - The default value of `index.compound_format` changes from `0.1` to `1gb`.
   Said otherwise, segments will be stored in compound files if they don't
   exceed 1GB.

In practice, this means that smaller shards will use compound files much more
and contribute less to the total number of open files or map regions, while the
bigger shards may use compound files a bit less.

Relates #92618
Adrien Grand 2 years ago
parent
commit
378cefe91a

+ 5 - 0
docs/changelog/92659.yaml

@@ -0,0 +1,5 @@
+pr: 92659
+summary: Set a fixed compound file threshold of 1GB
+area: Engine
+type: enhancement
+issues: []

+ 4 - 1
server/src/main/java/org/elasticsearch/index/IndexSettings.java

@@ -774,7 +774,10 @@ public final class IndexSettings {
         mappingDimensionFieldsLimit = scopedSettings.get(INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING);
         indexRouting = IndexRouting.fromIndexMetadata(indexMetadata);
 
-        scopedSettings.addSettingsUpdateConsumer(MergePolicyConfig.INDEX_COMPOUND_FORMAT_SETTING, mergePolicyConfig::setNoCFSRatio);
+        scopedSettings.addSettingsUpdateConsumer(
+            MergePolicyConfig.INDEX_COMPOUND_FORMAT_SETTING,
+            mergePolicyConfig::setCompoundFormatThreshold
+        );
         scopedSettings.addSettingsUpdateConsumer(
             MergePolicyConfig.INDEX_MERGE_POLICY_DELETES_PCT_ALLOWED_SETTING,
             mergePolicyConfig::setDeletesPctAllowed

+ 74 - 17
server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java

@@ -111,10 +111,11 @@ public final class MergePolicyConfig {
     public static final ByteSizeValue DEFAULT_MAX_MERGED_SEGMENT = new ByteSizeValue(5, ByteSizeUnit.GB);
     public static final double DEFAULT_SEGMENTS_PER_TIER = 10.0d;
     public static final double DEFAULT_DELETES_PCT_ALLOWED = 33.0d;
-    public static final Setting<Double> INDEX_COMPOUND_FORMAT_SETTING = new Setting<>(
-        "index.compound_format",
-        Double.toString(TieredMergePolicy.DEFAULT_NO_CFS_RATIO),
-        MergePolicyConfig::parseNoCFSRatio,
+    private static final String INDEX_COMPOUND_FORMAT_SETTING_KEY = "index.compound_format";
+    public static final Setting<CompoundFileThreshold> INDEX_COMPOUND_FORMAT_SETTING = new Setting<>(
+        INDEX_COMPOUND_FORMAT_SETTING_KEY,
+        "1gb",
+        MergePolicyConfig::parseCompoundFormat,
         Property.Dynamic,
         Property.IndexScope
     );
@@ -189,7 +190,7 @@ public final class MergePolicyConfig {
             );
         }
         maxMergeAtOnce = adjustMaxMergeAtOnceIfNeeded(maxMergeAtOnce, segmentsPerTier);
-        mergePolicy.setNoCFSRatio(indexSettings.getValue(INDEX_COMPOUND_FORMAT_SETTING));
+        indexSettings.getValue(INDEX_COMPOUND_FORMAT_SETTING).configure(mergePolicy);
         mergePolicy.setForceMergeDeletesPctAllowed(forceMergeDeletesPctAllowed);
         mergePolicy.setFloorSegmentMB(floorSegment.getMbFrac());
         mergePolicy.setMaxMergeAtOnce(maxMergeAtOnce);
@@ -229,8 +230,8 @@ public final class MergePolicyConfig {
         mergePolicy.setForceMergeDeletesPctAllowed(value);
     }
 
-    void setNoCFSRatio(Double noCFSRatio) {
-        mergePolicy.setNoCFSRatio(noCFSRatio);
+    void setCompoundFormatThreshold(CompoundFileThreshold compoundFileThreshold) {
+        compoundFileThreshold.configure(mergePolicy);
     }
 
     void setDeletesPctAllowed(Double deletesPctAllowed) {
@@ -261,25 +262,81 @@ public final class MergePolicyConfig {
         return mergesEnabled ? mergePolicy : NoMergePolicy.INSTANCE;
     }
 
-    private static double parseNoCFSRatio(String noCFSRatio) {
+    private static CompoundFileThreshold parseCompoundFormat(String noCFSRatio) {
         noCFSRatio = noCFSRatio.trim();
         if (noCFSRatio.equalsIgnoreCase("true")) {
-            return 1.0d;
+            return new CompoundFileThreshold(1.0d);
         } else if (noCFSRatio.equalsIgnoreCase("false")) {
-            return 0.0;
+            return new CompoundFileThreshold(0.0d);
         } else {
             try {
-                double value = Double.parseDouble(noCFSRatio);
-                if (value < 0.0 || value > 1.0) {
-                    throw new IllegalArgumentException("NoCFSRatio must be in the interval [0..1] but was: [" + value + "]");
+                try {
+                    return new CompoundFileThreshold(Double.parseDouble(noCFSRatio));
+                } catch (NumberFormatException ex) {
+                    throw new IllegalArgumentException(
+                        "index.compound_format must be a boolean, a non-negative byte size or a ratio in the interval [0..1] but was: ["
+                            + noCFSRatio
+                            + "]",
+                        ex
+                    );
                 }
-                return value;
-            } catch (NumberFormatException ex) {
+            } catch (IllegalArgumentException e) {
+                try {
+                    return new CompoundFileThreshold(ByteSizeValue.parseBytesSizeValue(noCFSRatio, INDEX_COMPOUND_FORMAT_SETTING_KEY));
+                } catch (RuntimeException e2) {
+                    e.addSuppressed(e2);
+                }
+                throw e;
+            }
+        }
+    }
+
+    public static class CompoundFileThreshold {
+        private Double noCFSRatio;
+        private ByteSizeValue noCFSSize;
+
+        private CompoundFileThreshold(double noCFSRatio) {
+            if (noCFSRatio < 0.0 || noCFSRatio > 1.0) {
                 throw new IllegalArgumentException(
-                    "Expected a boolean or a value in the interval [0..1] but was: " + "[" + noCFSRatio + "]",
-                    ex
+                    "index.compound_format must be a boolean, a non-negative byte size or a ratio in the interval [0..1] but was: ["
+                        + noCFSRatio
+                        + "]"
                 );
             }
+            this.noCFSRatio = noCFSRatio;
+            this.noCFSSize = null;
+        }
+
+        private CompoundFileThreshold(ByteSizeValue noCFSSize) {
+            if (noCFSSize.getBytes() < 0) {
+                throw new IllegalArgumentException(
+                    "index.compound_format must be a boolean, a non-negative byte size or a ratio in the interval [0..1] but was: ["
+                        + noCFSSize
+                        + "]"
+                );
+            }
+            this.noCFSRatio = null;
+            this.noCFSSize = noCFSSize;
+        }
+
+        void configure(MergePolicy mergePolicy) {
+            if (noCFSRatio != null) {
+                assert noCFSSize == null;
+                mergePolicy.setNoCFSRatio(noCFSRatio);
+                mergePolicy.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
+            } else {
+                mergePolicy.setNoCFSRatio(1.0);
+                mergePolicy.setMaxCFSSegmentSizeMB(noCFSSize.getMbFrac());
+            }
+        }
+
+        @Override
+        public String toString() {
+            if (noCFSRatio != null) {
+                return "max CFS ratio: " + noCFSRatio;
+            } else {
+                return "max CFS size: " + noCFSSize;
+            }
         }
     }
 }

+ 80 - 13
server/src/test/java/org/elasticsearch/index/MergePolicySettingsTests.java → server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java

@@ -7,8 +7,17 @@
  */
 package org.elasticsearch.index;
 
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.MergePolicy;
 import org.apache.lucene.index.NoMergePolicy;
+import org.apache.lucene.index.SegmentCommitInfo;
+import org.apache.lucene.index.SegmentReader;
 import org.apache.lucene.index.TieredMergePolicy;
+import org.apache.lucene.store.Directory;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.ByteSizeUnit;
 import org.elasticsearch.common.unit.ByteSizeValue;
@@ -21,21 +30,31 @@ import static org.elasticsearch.index.IndexSettingsTests.newIndexMeta;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 
-public class MergePolicySettingsTests extends ESTestCase {
+public class MergePolicyConfigTests extends ESTestCase {
     protected final ShardId shardId = new ShardId("index", "_na_", 1);
 
     public void testCompoundFileSettings() throws IOException {
-        assertThat(new MergePolicyConfig(logger, indexSettings(Settings.EMPTY)).getMergePolicy().getNoCFSRatio(), equalTo(0.1));
-        assertThat(new MergePolicyConfig(logger, indexSettings(build(true))).getMergePolicy().getNoCFSRatio(), equalTo(1.0));
-        assertThat(new MergePolicyConfig(logger, indexSettings(build(0.5))).getMergePolicy().getNoCFSRatio(), equalTo(0.5));
-        assertThat(new MergePolicyConfig(logger, indexSettings(build(1.0))).getMergePolicy().getNoCFSRatio(), equalTo(1.0));
-        assertThat(new MergePolicyConfig(logger, indexSettings(build("true"))).getMergePolicy().getNoCFSRatio(), equalTo(1.0));
-        assertThat(new MergePolicyConfig(logger, indexSettings(build("True"))).getMergePolicy().getNoCFSRatio(), equalTo(1.0));
-        assertThat(new MergePolicyConfig(logger, indexSettings(build("False"))).getMergePolicy().getNoCFSRatio(), equalTo(0.0));
-        assertThat(new MergePolicyConfig(logger, indexSettings(build("false"))).getMergePolicy().getNoCFSRatio(), equalTo(0.0));
-        assertThat(new MergePolicyConfig(logger, indexSettings(build(false))).getMergePolicy().getNoCFSRatio(), equalTo(0.0));
-        assertThat(new MergePolicyConfig(logger, indexSettings(build(0))).getMergePolicy().getNoCFSRatio(), equalTo(0.0));
-        assertThat(new MergePolicyConfig(logger, indexSettings(build(0.0))).getMergePolicy().getNoCFSRatio(), equalTo(0.0));
+        assertCompoundThreshold(Settings.EMPTY, 1.0, ByteSizeValue.ofGb(1));
+        assertCompoundThreshold(build(true), 1.0, ByteSizeValue.ofBytes(Long.MAX_VALUE));
+        assertCompoundThreshold(build(0.5), 0.5, ByteSizeValue.ofBytes(Long.MAX_VALUE));
+        assertCompoundThreshold(build(1.0), 1.0, ByteSizeValue.ofBytes(Long.MAX_VALUE));
+        assertCompoundThreshold(build("true"), 1.0, ByteSizeValue.ofBytes(Long.MAX_VALUE));
+        assertCompoundThreshold(build("True"), 1.0, ByteSizeValue.ofBytes(Long.MAX_VALUE));
+        assertCompoundThreshold(build("False"), 0.0, ByteSizeValue.ofBytes(Long.MAX_VALUE));
+        assertCompoundThreshold(build("false"), 0.0, ByteSizeValue.ofBytes(Long.MAX_VALUE));
+        assertCompoundThreshold(build(false), 0.0, ByteSizeValue.ofBytes(Long.MAX_VALUE));
+        assertCompoundThreshold(build(0), 0.0, ByteSizeValue.ofBytes(Long.MAX_VALUE));
+        assertCompoundThreshold(build(0.0), 0.0, ByteSizeValue.ofBytes(Long.MAX_VALUE));
+        assertCompoundThreshold(build(0.0), 0.0, ByteSizeValue.ofBytes(Long.MAX_VALUE));
+        assertCompoundThreshold(build("100MB"), 1.0, ByteSizeValue.ofMb(100));
+        assertCompoundThreshold(build("0MB"), 1.0, ByteSizeValue.ofBytes(0));
+        assertCompoundThreshold(build("0B"), 1.0, ByteSizeValue.ofBytes(0));
+    }
+
+    private void assertCompoundThreshold(Settings settings, double noCFSRatio, ByteSizeValue maxCFSSize) {
+        MergePolicy mp = new MergePolicyConfig(logger, indexSettings(settings)).getMergePolicy();
+        assertThat(mp.getNoCFSRatio(), equalTo(noCFSRatio));
+        assertThat(mp.getMaxCFSSegmentSizeMB(), equalTo(maxCFSSize.getMbFrac()));
     }
 
     private static IndexSettings indexSettings(Settings settings) {
@@ -52,17 +71,26 @@ public class MergePolicySettingsTests extends ESTestCase {
 
     public void testUpdateSettings() throws IOException {
         IndexSettings indexSettings = indexSettings(Settings.EMPTY);
-        assertThat(indexSettings.getMergePolicy().getNoCFSRatio(), equalTo(0.1));
+        assertThat(indexSettings.getMergePolicy().getNoCFSRatio(), equalTo(1.0));
+        assertThat(indexSettings.getMergePolicy().getMaxCFSSegmentSizeMB(), equalTo(1024d));
         indexSettings = indexSettings(build(0.9));
         assertThat((indexSettings.getMergePolicy()).getNoCFSRatio(), equalTo(0.9));
+        assertThat(indexSettings.getMergePolicy().getMaxCFSSegmentSizeMB(), equalTo(ByteSizeValue.ofBytes(Long.MAX_VALUE).getMbFrac()));
         indexSettings.updateIndexMetadata(newIndexMeta("index", build(0.1)));
         assertThat((indexSettings.getMergePolicy()).getNoCFSRatio(), equalTo(0.1));
+        assertThat(indexSettings.getMergePolicy().getMaxCFSSegmentSizeMB(), equalTo(ByteSizeValue.ofBytes(Long.MAX_VALUE).getMbFrac()));
         indexSettings.updateIndexMetadata(newIndexMeta("index", build(0.0)));
         assertThat((indexSettings.getMergePolicy()).getNoCFSRatio(), equalTo(0.0));
+        assertThat(indexSettings.getMergePolicy().getMaxCFSSegmentSizeMB(), equalTo(ByteSizeValue.ofBytes(Long.MAX_VALUE).getMbFrac()));
         indexSettings.updateIndexMetadata(newIndexMeta("index", build("true")));
         assertThat((indexSettings.getMergePolicy()).getNoCFSRatio(), equalTo(1.0));
+        assertThat(indexSettings.getMergePolicy().getMaxCFSSegmentSizeMB(), equalTo(ByteSizeValue.ofBytes(Long.MAX_VALUE).getMbFrac()));
         indexSettings.updateIndexMetadata(newIndexMeta("index", build("false")));
         assertThat((indexSettings.getMergePolicy()).getNoCFSRatio(), equalTo(0.0));
+        assertThat(indexSettings.getMergePolicy().getMaxCFSSegmentSizeMB(), equalTo(ByteSizeValue.ofBytes(Long.MAX_VALUE).getMbFrac()));
+        indexSettings.updateIndexMetadata(newIndexMeta("index", build("100mb")));
+        assertThat((indexSettings.getMergePolicy()).getNoCFSRatio(), equalTo(1.0));
+        assertThat(indexSettings.getMergePolicy().getMaxCFSSegmentSizeMB(), equalTo(100d));
     }
 
     public void testTieredMergePolicySettingsUpdate() throws IOException {
@@ -242,4 +270,43 @@ public class MergePolicySettingsTests extends ESTestCase {
         return Settings.builder().put(MergePolicyConfig.INDEX_COMPOUND_FORMAT_SETTING.getKey(), value).build();
     }
 
+    private Settings build(ByteSizeValue value) {
+        return Settings.builder().put(MergePolicyConfig.INDEX_COMPOUND_FORMAT_SETTING.getKey(), value).build();
+    }
+
+    public void testCompoundFileConfiguredByByteSize() throws IOException {
+        try (Directory dir = newDirectory()) {
+            // index.compound_format: 1gb, the merge will use a compound file
+            MergePolicy mp = new MergePolicyConfig(logger, indexSettings(Settings.EMPTY)).getMergePolicy();
+            try (IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null).setMergePolicy(mp))) {
+                w.addDocument(new Document());
+                w.flush();
+                w.addDocument(new Document());
+                w.forceMerge(1);
+            }
+            try (DirectoryReader reader = DirectoryReader.open(dir)) {
+                LeafReader leaf = getOnlyLeafReader(reader);
+                SegmentCommitInfo sci = ((SegmentReader) leaf).getSegmentInfo();
+                assertEquals(IndexWriter.SOURCE_MERGE, sci.info.getDiagnostics().get(IndexWriter.SOURCE));
+                assertTrue(sci.info.getUseCompoundFile());
+            }
+        }
+
+        // index.compound_format: 1b, the merge will not use a compound file
+        try (Directory dir = newDirectory()) {
+            MergePolicy mp = new MergePolicyConfig(logger, indexSettings(build("1b"))).getMergePolicy();
+            try (IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null).setMergePolicy(mp))) {
+                w.addDocument(new Document());
+                w.flush();
+                w.addDocument(new Document());
+                w.forceMerge(1);
+            }
+            try (DirectoryReader reader = DirectoryReader.open(dir)) {
+                LeafReader leaf = getOnlyLeafReader(reader);
+                SegmentCommitInfo sci = ((SegmentReader) leaf).getSegmentInfo();
+                assertEquals(IndexWriter.SOURCE_MERGE, sci.info.getDiagnostics().get(IndexWriter.SOURCE));
+                assertFalse(sci.info.getUseCompoundFile());
+            }
+        }
+    }
 }