소스 검색

bulkImport add objectUrls paramster (#1525)

Signed-off-by: lentitude2tk <xushuang.hu@zilliz.com>
xushuang.hu 2 일 전
부모
커밋
741972a5b2

+ 5 - 2
examples/src/main/java/io/milvus/v1/BulkWriterExample.java

@@ -75,6 +75,7 @@ import java.io.File;
 import java.io.IOException;
 import java.net.URL;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -547,8 +548,9 @@ public class BulkWriterExample {
         String secretKey = StorageConsts.cloudStorage == CloudStorage.AZURE ? StorageConsts.AZURE_ACCOUNT_KEY : StorageConsts.STORAGE_SECRET_KEY;
 
         System.out.println("\n===================== call cloudImport ====================");
+        List<String> objectUrls = Lists.newArrayList(objectUrl);
         CloudImportRequest bulkImportRequest = CloudImportRequest.builder()
-                .objectUrl(objectUrl).accessKey(accessKey).secretKey(secretKey)
+                .objectUrls(Lists.newArrayList(Collections.singleton(objectUrls))).accessKey(accessKey).secretKey(secretKey)
                 .clusterId(CloudImportConsts.CLUSTER_ID).collectionName(collectionName).partitionName(partitionName)
                 .apiKey(CloudImportConsts.API_KEY)
                 .build();
@@ -710,8 +712,9 @@ public class BulkWriterExample {
 
     private static void exampleCloudImport() {
         System.out.println("\n===================== import files to cloud vectordb ====================");
+        List<String> objectUrls = Lists.newArrayList(CloudImportConsts.OBJECT_URL);
         CloudImportRequest request = CloudImportRequest.builder()
-                .objectUrl(CloudImportConsts.OBJECT_URL).accessKey(CloudImportConsts.OBJECT_ACCESS_KEY).secretKey(CloudImportConsts.OBJECT_SECRET_KEY)
+                .objectUrls(Lists.newArrayList(Collections.singleton(objectUrls))).accessKey(CloudImportConsts.OBJECT_ACCESS_KEY).secretKey(CloudImportConsts.OBJECT_SECRET_KEY)
                 .clusterId(CloudImportConsts.CLUSTER_ID).collectionName(CloudImportConsts.COLLECTION_NAME).partitionName(CloudImportConsts.PARTITION_NAME)
                 .apiKey(CloudImportConsts.API_KEY)
                 .build();

+ 2 - 1
examples/src/main/java/io/milvus/v2/bulkwriter/BulkWriterRemoteExample.java

@@ -688,8 +688,9 @@ public class BulkWriterRemoteExample {
 
     private static void exampleCloudImport() {
         System.out.println("\n===================== import files to cloud vectordb ====================");
+        List<String> objectUrls = Lists.newArrayList(CloudImportConsts.OBJECT_URL);
         CloudImportRequest request = CloudImportRequest.builder()
-                .objectUrl(CloudImportConsts.OBJECT_URL).accessKey(CloudImportConsts.OBJECT_ACCESS_KEY).secretKey(CloudImportConsts.OBJECT_SECRET_KEY)
+                .objectUrls(Lists.newArrayList(Collections.singleton(objectUrls))).accessKey(CloudImportConsts.OBJECT_ACCESS_KEY).secretKey(CloudImportConsts.OBJECT_SECRET_KEY)
                 .clusterId(CloudImportConsts.CLUSTER_ID).collectionName(CloudImportConsts.COLLECTION_NAME).partitionName(CloudImportConsts.PARTITION_NAME)
                 .apiKey(CloudImportConsts.API_KEY)
                 .build();

+ 62 - 4
sdk-bulkwriter/src/main/java/io/milvus/bulkwriter/request/import_/CloudImportRequest.java

@@ -24,18 +24,76 @@ import lombok.Data;
 import lombok.NoArgsConstructor;
 import lombok.experimental.SuperBuilder;
 
+import java.util.List;
+
 @Data
 @SuperBuilder
 @AllArgsConstructor
 @NoArgsConstructor
+/*
+  If you want to import data into a Zilliz cloud instance and your data is stored in a storage bucket,
+  you can use this method to import the data from the bucket.
+ */
 public class CloudImportRequest extends BaseImportRequest {
     private static final long serialVersionUID = 6487348610099924813L;
-    private String objectUrl;
-    private String accessKey;
-    private String secretKey;
-    private String token;
     private String clusterId;
+
+    /**
+     * For Free & Serverless deployments: specifying this parameter is not supported.
+     * For Dedicated deployments: this parameter can be specified; defaults to the "default" database.
+     */
     private String dbName;
     private String collectionName;
+
+    /**
+     * If the collection has partitionKey enabled:
+     *     - The partitionName parameter cannot be specified for import.
+     * If the collection does not have partitionKey enabled:
+     *     - You may specify partitionName for the import.
+     *     - Defaults to the "default" partition if not specified.
+     */
     private String partitionName;
+
+    /**
+     * Data import can be configured in multiple ways using `objectUrls`:
+     * <p>
+     * 1. Multi-path import (multiple folders or files):
+     *    "objectUrls": [
+     *        ["s3://bucket-name/parquet-folder-1/1.parquet"],
+     *        ["s3://bucket-name/parquet-folder-2/1.parquet"],
+     *        ["s3://bucket-name/parquet-folder-3/"]
+     *    ]
+     * <p>
+     * 2. Folder import:
+     *    "objectUrls": [
+     *        ["s3://bucket-name/parquet-folder/"]
+     *    ]
+     * <p>
+     * 3. Single file import:
+     *    "objectUrls": [
+     *        ["s3://bucket-name/parquet-folder/1.parquet"]
+     *    ]
+     */
+    private List<List<String>> objectUrls;
+
+    /**
+     * Use `objectUrls` instead for more flexible multi-path configuration.
+     * <p>
+     * Folder import:
+     *     "objectUrl": "s3://bucket-name/parquet-folder/"
+     * <p>
+     * File import:
+     *     "objectUrl": "s3://bucket-name/parquet-folder/1.parquet"
+     */
+    @Deprecated
+    private String objectUrl;
+
+    /** Specify `accessKey` and `secretKey`; for short-term credentials, also include `token`. */
+    private String accessKey;
+
+    /** Specify `accessKey` and `secretKey`; for short-term credentials, also include `token`. */
+    private String secretKey;
+
+    /** Specify `accessKey` and `secretKey`; for short-term credentials, also include `token`. */
+    private String token;
 }

+ 32 - 0
sdk-bulkwriter/src/main/java/io/milvus/bulkwriter/request/import_/MilvusImportRequest.java

@@ -30,10 +30,42 @@ import java.util.List;
 @SuperBuilder
 @AllArgsConstructor
 @NoArgsConstructor
+/*
+  If you want to import data into open-source Milvus,
+  you can use this method to import the data files stored in the bucket where Milvus resides.
+ */
 public class MilvusImportRequest extends BaseImportRequest {
     private static final long serialVersionUID = -1958858397962018740L;
+    /**
+     * This parameter can be specified; defaults to the "default" database.
+     */
     private String dbName;
+
     private String collectionName;
+
+    /**
+     * If the collection has partitionKey enabled:
+     *     - The partitionName parameter cannot be specified for import.
+     * If the collection does not have partitionKey enabled:
+     *     - You may specify partitionName for the import.
+     *     - Defaults to the "default" partition if not specified.
+     */
     private String partitionName;
+
+    /**
+     * Data import can be configured in multiple ways using `files`:
+     * <p>
+     * 1. Multi-path import (multiple files):
+     *    "files": [
+     *        ["parquet-folder-1/1.parquet"],
+     *        ["parquet-folder-2/1.parquet"],
+     *        ["parquet-folder-3/1.parquet"]
+     *    ]
+     * <p>
+     * 2. Single file import:
+     *    "files": [
+     *        ["parquet-folder/1.parquet"]
+     *    ]
+     */
     private List<List<String>> files;
 }

+ 41 - 3
sdk-bulkwriter/src/main/java/io/milvus/bulkwriter/request/import_/StageImportRequest.java

@@ -30,12 +30,50 @@ import java.util.List;
 @SuperBuilder
 @AllArgsConstructor
 @NoArgsConstructor
+/*
+  If you want to import data into a Zilliz cloud instance and your data is stored in a Zilliz stage,
+  you can use this method to import the data from the stage.
+ */
 public class StageImportRequest extends BaseImportRequest {
-    private String stageName;
-    private List<List<String>> dataPaths;
-
     private String clusterId;
+
+    /**
+     * For Free & Serverless deployments: specifying this parameter is not supported.
+     * For Dedicated deployments: this parameter can be specified; defaults to the "default" database.
+     */
     private String dbName;
     private String collectionName;
+
+    /**
+     * If the collection has partitionKey enabled:
+     *     - The partitionName parameter cannot be specified for import.
+     * If the collection does not have partitionKey enabled:
+     *     - You may specify partitionName for the import.
+     *     - Defaults to the "default" partition if not specified.
+     */
     private String partitionName;
+
+    private String stageName;
+
+    /**
+     * Data import can be configured in multiple ways using `dataPaths`:
+     * <p>
+     * 1. Multi-path import (multiple folders or files):
+     *    "dataPaths": [
+     *        ["parquet-folder-1/1.parquet"],
+     *        ["parquet-folder-2/1.parquet"],
+     *        ["parquet-folder-3/"]
+     *    ]
+     * <p>
+     * 2. Folder import:
+     *    "dataPaths": [
+     *        ["parquet-folder/"]
+     *    ]
+     * <p>
+     * 3. Single file import:
+     *    "dataPaths": [
+     *        ["parquet-folder/1.parquet"]
+     *    ]
+     */
+    private List<List<String>> dataPaths;
 }