Parcourir la source

Add max_multipart_parts setting to S3 repository (#113989) (#114161)

Mikhail Berezovskiy il y a 1 an
Parent
commit
519da179a7

+ 5 - 0
docs/changelog/113989.yaml

@@ -0,0 +1,5 @@
+pr: 113989
+summary: Add `max_multipart_parts` setting to S3 repository
+area: Snapshot/Restore
+type: enhancement
+issues: []

+ 13 - 3
docs/reference/snapshot-restore/repository-s3.asciidoc

@@ -261,9 +261,11 @@ multiple deployments may share the same bucket.
 
 `chunk_size`::
 
-    (<<byte-units,byte value>>) Big files can be broken down into chunks during snapshotting if needed.
-    Specify the chunk size as a value and unit, for example:
-    `1TB`, `1GB`, `10MB`. Defaults to the maximum size of a blob in the S3 which is `5TB`.
+    (<<byte-units,byte value>>) The maximum size of object that {es} will write to the repository
+    when creating a snapshot. Files which are larger than `chunk_size` will be chunked into several
+    smaller objects. {es} may also split a file across multiple objects to satisfy other constraints
+    such as the `max_multipart_parts` limit. Defaults to `5TB` which is the
+    https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html[maximum size of an object in AWS S3].
 
 `compress`::
 
@@ -292,6 +294,14 @@ include::repository-shared-settings.asciidoc[]
     size allowed by S3. Defaults to `100mb` or `5%` of JVM heap, whichever is
     smaller.
 
+`max_multipart_parts` ::
+
+    (integer) The maximum number of parts that {es} will write during a multipart upload of a single
+    object. Files which are larger than `buffer_size × max_multipart_parts` will be chunked into
+    several smaller objects. {es} may also split a file across multiple objects to satisfy other
+    constraints such as the `chunk_size` limit. Defaults to `10000` which is the
+    https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html[maximum number of parts in a multipart upload in AWS S3].
+
 `canned_acl`::
 
     The S3 repository supports all

+ 22 - 1
modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java

@@ -141,6 +141,11 @@ class S3Repository extends MeteredBlobStoreRepository {
         MAX_FILE_SIZE_USING_MULTIPART
     );
 
+    /**
+     * Maximum parts number for multipart upload. (see https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html)
+     */
+    static final Setting<Integer> MAX_MULTIPART_PARTS = Setting.intSetting("max_multipart_parts", 10_000, 1, 10_000);
+
     /**
      * Sets the S3 storage class type for the backup files. Values may be standard, reduced_redundancy,
      * standard_ia, onezone_ia and intelligent_tiering. Defaults to standard.
@@ -254,7 +259,9 @@ class S3Repository extends MeteredBlobStoreRepository {
         }
 
         this.bufferSize = BUFFER_SIZE_SETTING.get(metadata.settings());
-        this.chunkSize = CHUNK_SIZE_SETTING.get(metadata.settings());
+        var maxChunkSize = CHUNK_SIZE_SETTING.get(metadata.settings());
+        var maxPartsNum = MAX_MULTIPART_PARTS.get(metadata.settings());
+        this.chunkSize = objectSizeLimit(maxChunkSize, bufferSize, maxPartsNum);
 
         // We make sure that chunkSize is bigger or equal than/to bufferSize
         if (this.chunkSize.getBytes() < bufferSize.getBytes()) {
@@ -303,6 +310,20 @@ class S3Repository extends MeteredBlobStoreRepository {
         return Map.of("base_path", BASE_PATH_SETTING.get(metadata.settings()), "bucket", BUCKET_SETTING.get(metadata.settings()));
     }
 
+    /**
+     * Calculates S3 object size limit based on 2 constraints: maximum object(chunk) size
+     * and maximum number of parts for multipart upload.
+     * https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
+     *
+     * @param chunkSize s3 object size
+     * @param bufferSize s3 multipart upload part size
+     * @param maxPartsNum s3 multipart upload max parts number
+     */
+    private static ByteSizeValue objectSizeLimit(ByteSizeValue chunkSize, ByteSizeValue bufferSize, int maxPartsNum) {
+        var bytes = Math.min(chunkSize.getBytes(), bufferSize.getBytes() * maxPartsNum);
+        return ByteSizeValue.ofBytes(bytes);
+    }
+
     /**
      * Holds a reference to delayed repository operation {@link Scheduler.Cancellable} so it can be cancelled should the repository be
      * closed concurrently.

+ 33 - 0
modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java

@@ -175,4 +175,37 @@ public class S3RepositoryTests extends ESTestCase {
         }
     }
 
+    // ensures that chunkSize is limited to chunk_size setting, when buffer_size * parts_num is bigger
+    public void testChunkSizeLimit() {
+        var meta = new RepositoryMetadata(
+            "dummy-repo",
+            "mock",
+            Settings.builder()
+                .put(S3Repository.BUCKET_SETTING.getKey(), "bucket")
+                .put(S3Repository.CHUNK_SIZE_SETTING.getKey(), "1GB")
+                .put(S3Repository.BUFFER_SIZE_SETTING.getKey(), "100MB")
+                .put(S3Repository.MAX_MULTIPART_PARTS.getKey(), 10_000) // ~1TB
+                .build()
+        );
+        try (var repo = createS3Repo(meta)) {
+            assertEquals(ByteSizeValue.ofGb(1), repo.chunkSize());
+        }
+    }
+
+    // ensures that chunkSize is limited to buffer_size * parts_num, when chunk_size setting is bigger
+    public void testPartsNumLimit() {
+        var meta = new RepositoryMetadata(
+            "dummy-repo",
+            "mock",
+            Settings.builder()
+                .put(S3Repository.BUCKET_SETTING.getKey(), "bucket")
+                .put(S3Repository.CHUNK_SIZE_SETTING.getKey(), "5TB")
+                .put(S3Repository.BUFFER_SIZE_SETTING.getKey(), "100MB")
+                .put(S3Repository.MAX_MULTIPART_PARTS.getKey(), 10_000)
+                .build()
+        );
+        try (var repo = createS3Repo(meta)) {
+            assertEquals(ByteSizeValue.ofMb(1_000_000), repo.chunkSize());
+        }
+    }
 }