|
@@ -24,6 +24,7 @@ import com.fasterxml.jackson.dataformat.csv.CsvSchema;
|
|
|
import com.google.common.collect.Lists;
|
|
|
import com.google.gson.Gson;
|
|
|
import com.google.gson.JsonElement;
|
|
|
+import com.google.gson.JsonNull;
|
|
|
import com.google.gson.JsonObject;
|
|
|
import com.google.gson.reflect.TypeToken;
|
|
|
import io.milvus.bulkwriter.BulkImport;
|
|
@@ -50,6 +51,7 @@ import io.milvus.v1.CommonUtils;
|
|
|
import io.milvus.v2.client.ConnectConfig;
|
|
|
import io.milvus.v2.client.MilvusClientV2;
|
|
|
import io.milvus.v2.common.ConsistencyLevel;
|
|
|
+import io.milvus.v2.common.DataType;
|
|
|
import io.milvus.v2.common.IndexParam;
|
|
|
import io.milvus.v2.service.collection.request.*;
|
|
|
import io.milvus.v2.service.index.request.CreateIndexReq;
|
|
@@ -75,9 +77,6 @@ public class BulkWriterExample {
|
|
|
|
|
|
private static final Gson GSON_INSTANCE = new Gson();
|
|
|
|
|
|
- private static final List<Integer> QUERY_IDS = Lists.newArrayList(100, 5000);
|
|
|
-
|
|
|
-
|
|
|
/**
|
|
|
* If you need to transfer the files generated by bulkWriter to the corresponding remote storage (AWS S3, GCP GCS, Azure Blob, Aliyun OSS, Tencent Cloud TOS),
|
|
|
* you need to configure it accordingly; Otherwise, you can ignore it.
|
|
@@ -145,25 +144,25 @@ public class BulkWriterExample {
|
|
|
private static final String ALL_TYPES_COLLECTION_NAME = "java_sdk_bulkwriter_all_v2";
|
|
|
private static final Integer DIM = 512;
|
|
|
private static final Integer ARRAY_CAPACITY = 10;
|
|
|
- private MilvusClientV2 milvusClient;
|
|
|
+ private static MilvusClientV2 milvusClient;
|
|
|
|
|
|
public static void main(String[] args) throws Exception {
|
|
|
-
|
|
|
- BulkWriterExample exampleBulkWriter = new BulkWriterExample();
|
|
|
- exampleBulkWriter.createConnection();
|
|
|
+ createConnection();
|
|
|
|
|
|
List<BulkFileType> fileTypes = Lists.newArrayList(
|
|
|
- BulkFileType.PARQUET
|
|
|
+ BulkFileType.PARQUET,
|
|
|
+ BulkFileType.JSON,
|
|
|
+ BulkFileType.CSV
|
|
|
);
|
|
|
|
|
|
- exampleSimpleCollection(exampleBulkWriter, fileTypes);
|
|
|
- exampleAllTypesCollectionRemote(exampleBulkWriter, fileTypes);
|
|
|
+ exampleSimpleCollection(fileTypes);
|
|
|
+ exampleAllTypesCollectionRemote(fileTypes);
|
|
|
|
|
|
// to call cloud import api, you need to apply a cloud service from Zilliz Cloud(https://zilliz.com/cloud)
|
|
|
// exampleCloudImport();
|
|
|
}
|
|
|
|
|
|
- private void createConnection() {
|
|
|
+ private static void createConnection() {
|
|
|
System.out.println("\nCreate connection...");
|
|
|
String url = String.format("http://%s:%s", HOST, PORT);
|
|
|
milvusClient = new MilvusClientV2(ConnectConfig.builder()
|
|
@@ -174,9 +173,9 @@ public class BulkWriterExample {
|
|
|
System.out.println("\nConnected");
|
|
|
}
|
|
|
|
|
|
- private static void exampleSimpleCollection(BulkWriterExample exampleBulkWriter, List<BulkFileType> fileTypes) throws Exception {
|
|
|
- CreateCollectionReq.CollectionSchema collectionSchema = exampleBulkWriter.buildSimpleSchema();
|
|
|
- exampleBulkWriter.createCollection(SIMPLE_COLLECTION_NAME, collectionSchema, false);
|
|
|
+ private static void exampleSimpleCollection(List<BulkFileType> fileTypes) throws Exception {
|
|
|
+ CreateCollectionReq.CollectionSchema collectionSchema = buildSimpleSchema();
|
|
|
+ createCollection(SIMPLE_COLLECTION_NAME, collectionSchema, false);
|
|
|
|
|
|
for (BulkFileType fileType : fileTypes) {
|
|
|
localWriter(collectionSchema, fileType);
|
|
@@ -190,23 +189,27 @@ public class BulkWriterExample {
|
|
|
parallelAppend(collectionSchema);
|
|
|
}
|
|
|
|
|
|
- private static void exampleAllTypesCollectionRemote(BulkWriterExample exampleBulkWriter, List<BulkFileType> fileTypes) throws Exception {
|
|
|
+ private static void exampleAllTypesCollectionRemote(List<BulkFileType> fileTypes) throws Exception {
|
|
|
+ List<Map<String, Object>> originalData = genOriginalData(5);
|
|
|
+ List<JsonObject> rows = genImportData(originalData, true);
|
|
|
+
|
|
|
// 4 types vectors + all scalar types + dynamic field enabled, use bulkInsert interface
|
|
|
for (BulkFileType fileType : fileTypes) {
|
|
|
CreateCollectionReq.CollectionSchema collectionSchema = buildAllTypesSchema();
|
|
|
- List<List<String>> batchFiles = exampleBulkWriter.allTypesRemoteWriter(collectionSchema, fileType);
|
|
|
- exampleBulkWriter.callBulkInsert(collectionSchema, batchFiles);
|
|
|
- exampleBulkWriter.retrieveImportData();
|
|
|
+ List<List<String>> batchFiles = allTypesRemoteWriter(collectionSchema, fileType, rows);
|
|
|
+ createCollection(ALL_TYPES_COLLECTION_NAME, collectionSchema, true);
|
|
|
+ callBulkInsert(collectionSchema, batchFiles);
|
|
|
+ verifyImportData(collectionSchema, originalData);
|
|
|
}
|
|
|
|
|
|
// // 4 types vectors + all scalar types + dynamic field enabled, use cloud import api.
|
|
|
// // You need to apply a cloud service from Zilliz Cloud(https://zilliz.com/cloud)
|
|
|
// for (BulkFileType fileType : fileTypes) {
|
|
|
-// CollectionSchemaParam collectionSchema = buildAllTypesSchema();
|
|
|
-// List<List<String>> batchFiles = exampleBulkWriter.allTypesRemoteWriter(collectionSchema, fileType);
|
|
|
-// exampleBulkWriter.createCollection(ALL_TYPES_COLLECTION_NAME, collectionSchema, false);
|
|
|
-// exampleBulkWriter.callCloudImport(batchFiles, ALL_TYPES_COLLECTION_NAME, StringUtils.EMPTY);
|
|
|
-// exampleBulkWriter.retrieveImportData();
|
|
|
+// CreateCollectionReq.CollectionSchema collectionSchema = buildAllTypesSchema();
|
|
|
+// List<List<String>> batchFiles = allTypesRemoteWriter(collectionSchema, fileType, rows);
|
|
|
+// createCollection(ALL_TYPES_COLLECTION_NAME, collectionSchema, true);
|
|
|
+// callCloudImport(batchFiles, ALL_TYPES_COLLECTION_NAME, "");
|
|
|
+// verifyImportData(collectionSchema, originalData);
|
|
|
// }
|
|
|
}
|
|
|
|
|
@@ -352,52 +355,127 @@ public class BulkWriterExample {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- private List<List<String>> allTypesRemoteWriter(CreateCollectionReq.CollectionSchema collectionSchema, BulkFileType fileType) throws Exception {
|
|
|
- System.out.printf("\n===================== all field types (%s) ====================%n", fileType.name());
|
|
|
+ private static List<Map<String, Object>> genOriginalData(int count) {
|
|
|
+ List<Map<String, Object>> data = new ArrayList<>();
|
|
|
+ for (int i = 0; i < count; ++i) {
|
|
|
+ Map<String, Object> row = new HashMap<>();
|
|
|
+ // scalar field
|
|
|
+ row.put("id", (long)i);
|
|
|
+ row.put("bool", i % 5 == 0);
|
|
|
+ row.put("int8", i % 128);
|
|
|
+ row.put("int16", i % 1000);
|
|
|
+ row.put("int32", i % 100000);
|
|
|
+ row.put("float", (float)i / 3);
|
|
|
+ row.put("double", (double)i / 7);
|
|
|
+ row.put("varchar", "varchar_" + i);
|
|
|
+ row.put("json", String.format("{\"dummy\": %s, \"ok\": \"name_%s\"}", i, i));
|
|
|
|
|
|
- try (RemoteBulkWriter remoteBulkWriter = buildRemoteBulkWriter(collectionSchema, fileType)) {
|
|
|
- System.out.println("Append rows");
|
|
|
- int batchCount = 10000;
|
|
|
-
|
|
|
- for (int i = 0; i < batchCount; ++i) {
|
|
|
- JsonObject rowObject = new JsonObject();
|
|
|
-
|
|
|
- // scalar field
|
|
|
- rowObject.addProperty("id", i);
|
|
|
- rowObject.addProperty("bool", i % 5 == 0);
|
|
|
- rowObject.addProperty("int8", i % 128);
|
|
|
- rowObject.addProperty("int16", i % 1000);
|
|
|
- rowObject.addProperty("int32", i % 100000);
|
|
|
- rowObject.addProperty("float", i / 3);
|
|
|
- rowObject.addProperty("double", i / 7);
|
|
|
- rowObject.addProperty("varchar", "varchar_" + i);
|
|
|
- rowObject.addProperty("json", String.format("{\"dummy\": %s, \"ok\": \"name_%s\"}", i, i));
|
|
|
-
|
|
|
- // vector field
|
|
|
- rowObject.add("float_vector", GSON_INSTANCE.toJsonTree(CommonUtils.generateFloatVector(DIM)));
|
|
|
- rowObject.add("binary_vector", GSON_INSTANCE.toJsonTree(CommonUtils.generateBinaryVector(DIM).array()));
|
|
|
- rowObject.add("float16_vector", GSON_INSTANCE.toJsonTree(CommonUtils.generateFloat16Vector(DIM, false).array()));
|
|
|
- rowObject.add("sparse_vector", GSON_INSTANCE.toJsonTree(CommonUtils.generateSparseVector()));
|
|
|
-
|
|
|
- // array field
|
|
|
- rowObject.add("array_bool", GSON_INSTANCE.toJsonTree(GeneratorUtils.generatorBoolValue(10)));
|
|
|
- rowObject.add("array_int8", GSON_INSTANCE.toJsonTree(GeneratorUtils.generatorInt8Value(10)));
|
|
|
- rowObject.add("array_int16", GSON_INSTANCE.toJsonTree(GeneratorUtils.generatorInt16Value(10)));
|
|
|
- rowObject.add("array_int32", GSON_INSTANCE.toJsonTree(GeneratorUtils.generatorInt32Value(10)));
|
|
|
- rowObject.add("array_int64", GSON_INSTANCE.toJsonTree(GeneratorUtils.generatorLongValue(10)));
|
|
|
- rowObject.add("array_varchar", GSON_INSTANCE.toJsonTree(GeneratorUtils.generatorVarcharValue(10, 10)));
|
|
|
- rowObject.add("array_float", GSON_INSTANCE.toJsonTree(GeneratorUtils.generatorFloatValue(10)));
|
|
|
- rowObject.add("array_double", GSON_INSTANCE.toJsonTree(GeneratorUtils.generatorDoubleValue(10)));
|
|
|
-
|
|
|
- // dynamic fields
|
|
|
- if (collectionSchema.isEnableDynamicField()) {
|
|
|
- rowObject.addProperty("dynamic", "dynamic_" + i);
|
|
|
- }
|
|
|
+ // vector field
|
|
|
+ row.put("float_vector", CommonUtils.generateFloatVector(DIM));
|
|
|
+ row.put("binary_vector", CommonUtils.generateBinaryVector(DIM).array());
|
|
|
+ row.put("float16_vector", CommonUtils.generateFloat16Vector(DIM, false).array());
|
|
|
+ row.put("sparse_vector", CommonUtils.generateSparseVector());
|
|
|
|
|
|
- if (QUERY_IDS.contains(i)) {
|
|
|
- System.out.println(rowObject);
|
|
|
- }
|
|
|
+ // array field
|
|
|
+ row.put("array_bool", GeneratorUtils.generatorBoolValue(3));
|
|
|
+ row.put("array_int8", GeneratorUtils.generatorInt8Value(4));
|
|
|
+ row.put("array_int16", GeneratorUtils.generatorInt16Value(5));
|
|
|
+ row.put("array_int32", GeneratorUtils.generatorInt32Value(6));
|
|
|
+ row.put("array_int64", GeneratorUtils.generatorLongValue(7));
|
|
|
+ row.put("array_varchar", GeneratorUtils.generatorVarcharValue(8, 10));
|
|
|
+ row.put("array_float", GeneratorUtils.generatorFloatValue(9));
|
|
|
+ row.put("array_double", GeneratorUtils.generatorDoubleValue(10));
|
|
|
+
|
|
|
+ data.add(row);
|
|
|
+ }
|
|
|
+ // a special record with null/default values
|
|
|
+ {
|
|
|
+ Map<String, Object> row = new HashMap<>();
|
|
|
+ // scalar field
|
|
|
+ row.put("id", (long)data.size());
|
|
|
+ row.put("bool", null);
|
|
|
+ row.put("int8", null);
|
|
|
+ row.put("int16", 16);
|
|
|
+ row.put("int32", null);
|
|
|
+ row.put("float", null);
|
|
|
+ row.put("double", null);
|
|
|
+ row.put("varchar", null);
|
|
|
+ row.put("json", null);
|
|
|
+
|
|
|
+ // vector field
|
|
|
+ row.put("float_vector", CommonUtils.generateFloatVector(DIM));
|
|
|
+ row.put("binary_vector", CommonUtils.generateBinaryVector(DIM).array());
|
|
|
+ row.put("float16_vector", CommonUtils.generateFloat16Vector(DIM, false).array());
|
|
|
+ row.put("sparse_vector", CommonUtils.generateSparseVector());
|
|
|
+
|
|
|
+ // array field
|
|
|
+ row.put("array_bool", GeneratorUtils.generatorBoolValue(10));
|
|
|
+ row.put("array_int8", GeneratorUtils.generatorInt8Value(9));
|
|
|
+ row.put("array_int16", null);
|
|
|
+ row.put("array_int32", GeneratorUtils.generatorInt32Value(7));
|
|
|
+ row.put("array_int64", GeneratorUtils.generatorLongValue(6));
|
|
|
+ row.put("array_varchar", GeneratorUtils.generatorVarcharValue(5, 10));
|
|
|
+ row.put("array_float", GeneratorUtils.generatorFloatValue(4));
|
|
|
+ row.put("array_double", null);
|
|
|
+
|
|
|
+ data.add(row);
|
|
|
+ }
|
|
|
+ return data;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static List<JsonObject> genImportData(List<Map<String, Object>> originalData, boolean isEnableDynamicField) {
|
|
|
+ List<JsonObject> data = new ArrayList<>();
|
|
|
+ for (Map<String, Object> row : originalData) {
|
|
|
+ JsonObject rowObject = new JsonObject();
|
|
|
+
|
|
|
+ // scalar field
|
|
|
+ rowObject.addProperty("id", (Number)row.get("id"));
|
|
|
+ if (row.get("bool") != null) { // nullable value can be missed
|
|
|
+ rowObject.addProperty("bool", (Boolean) row.get("bool"));
|
|
|
+ }
|
|
|
+ rowObject.addProperty("int8", row.get("int8") == null ? null : (Number) row.get("int8"));
|
|
|
+ rowObject.addProperty("int16", row.get("int16") == null ? null : (Number) row.get("int16"));
|
|
|
+ rowObject.addProperty("int32", row.get("int32") == null ? null : (Number) row.get("int32"));
|
|
|
+ rowObject.addProperty("float", row.get("float") == null ? null : (Number) row.get("float"));
|
|
|
+ if (row.get("double") != null) { // nullable value can be missed
|
|
|
+ rowObject.addProperty("double", (Number) row.get("double"));
|
|
|
+ }
|
|
|
+ rowObject.addProperty("varchar", row.get("varchar") == null ? null : (String) row.get("varchar"));
|
|
|
+ rowObject.addProperty("json", row.get("json") == null ? null : (String) row.get("json"));
|
|
|
+
|
|
|
+ // vector field
|
|
|
+ rowObject.add("float_vector", GSON_INSTANCE.toJsonTree(row.get("float_vector")));
|
|
|
+ rowObject.add("binary_vector", GSON_INSTANCE.toJsonTree(row.get("binary_vector")));
|
|
|
+ rowObject.add("float16_vector", GSON_INSTANCE.toJsonTree(row.get("float16_vector")));
|
|
|
+ rowObject.add("sparse_vector", GSON_INSTANCE.toJsonTree(row.get("sparse_vector")));
|
|
|
+
|
|
|
+ // array field
|
|
|
+ rowObject.add("array_bool", GSON_INSTANCE.toJsonTree(row.get("array_bool")));
|
|
|
+ rowObject.add("array_int8", GSON_INSTANCE.toJsonTree(row.get("array_int8")));
|
|
|
+ rowObject.add("array_int16", GSON_INSTANCE.toJsonTree(row.get("array_int16")));
|
|
|
+ rowObject.add("array_int32", GSON_INSTANCE.toJsonTree(row.get("array_int32")));
|
|
|
+ rowObject.add("array_int64", GSON_INSTANCE.toJsonTree(row.get("array_int64")));
|
|
|
+ rowObject.add("array_varchar", GSON_INSTANCE.toJsonTree(row.get("array_varchar")));
|
|
|
+ rowObject.add("array_float", GSON_INSTANCE.toJsonTree(row.get("array_float")));
|
|
|
+ rowObject.add("array_double", GSON_INSTANCE.toJsonTree(row.get("array_double")));
|
|
|
+
|
|
|
+ // dynamic fields
|
|
|
+ if (isEnableDynamicField) {
|
|
|
+ rowObject.addProperty("dynamic", "dynamic_" + row.get("id"));
|
|
|
+ }
|
|
|
+
|
|
|
+ data.add(rowObject);
|
|
|
+ }
|
|
|
+ return data;
|
|
|
+ }
|
|
|
|
|
|
+ private static List<List<String>> allTypesRemoteWriter(CreateCollectionReq.CollectionSchema collectionSchema,
|
|
|
+ BulkFileType fileType,
|
|
|
+ List<JsonObject> data) throws Exception {
|
|
|
+ System.out.printf("\n===================== all field types (%s) ====================%n", fileType.name());
|
|
|
+
|
|
|
+ try (RemoteBulkWriter remoteBulkWriter = buildRemoteBulkWriter(collectionSchema, fileType)) {
|
|
|
+ for (JsonObject rowObject : data) {
|
|
|
remoteBulkWriter.appendRow(rowObject);
|
|
|
}
|
|
|
System.out.printf("%s rows appends%n", remoteBulkWriter.getTotalRowCount());
|
|
@@ -491,9 +569,7 @@ public class BulkWriterExample {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- private void callBulkInsert(CreateCollectionReq.CollectionSchema collectionSchema, List<List<String>> batchFiles) throws InterruptedException {
|
|
|
- createCollection(ALL_TYPES_COLLECTION_NAME, collectionSchema, true);
|
|
|
-
|
|
|
+ private static void callBulkInsert(CreateCollectionReq.CollectionSchema collectionSchema, List<List<String>> batchFiles) throws InterruptedException {
|
|
|
String url = String.format("http://%s:%s", HOST, PORT);
|
|
|
System.out.println("\n===================== import files to milvus ====================");
|
|
|
MilvusImportRequest milvusImportRequest = MilvusImportRequest.builder()
|
|
@@ -538,7 +614,7 @@ public class BulkWriterExample {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- private void callCloudImport(List<List<String>> batchFiles, String collectionName, String partitionName) throws InterruptedException {
|
|
|
+ private static void callCloudImport(List<List<String>> batchFiles, String collectionName, String partitionName) throws InterruptedException {
|
|
|
String objectUrl = StorageConsts.cloudStorage == CloudStorage.AZURE
|
|
|
? StorageConsts.cloudStorage.getAzureObjectUrl(StorageConsts.AZURE_ACCOUNT_NAME, StorageConsts.AZURE_CONTAINER_NAME, ImportUtils.getCommonPrefix(batchFiles))
|
|
|
: StorageConsts.cloudStorage.getS3ObjectUrl(StorageConsts.STORAGE_BUCKET, ImportUtils.getCommonPrefix(batchFiles), StorageConsts.STORAGE_REGION);
|
|
@@ -589,7 +665,7 @@ public class BulkWriterExample {
|
|
|
* @param collectionSchema collection info
|
|
|
* @param dropIfExist if collection already exist, will drop firstly and then create again
|
|
|
*/
|
|
|
- private void createCollection(String collectionName, CreateCollectionReq.CollectionSchema collectionSchema, boolean dropIfExist) {
|
|
|
+ private static void createCollection(String collectionName, CreateCollectionReq.CollectionSchema collectionSchema, boolean dropIfExist) {
|
|
|
System.out.println("\n===================== create collection ====================");
|
|
|
checkMilvusClientIfExist();
|
|
|
|
|
@@ -612,9 +688,66 @@ public class BulkWriterExample {
|
|
|
System.out.printf("Collection %s created%n", collectionName);
|
|
|
}
|
|
|
|
|
|
- private void retrieveImportData() {
|
|
|
+ private static void comparePrint(CreateCollectionReq.CollectionSchema collectionSchema,
|
|
|
+ Map<String, Object> expectedData, Map<String, Object> fetchedData,
|
|
|
+ String fieldName) {
|
|
|
+ CreateCollectionReq.FieldSchema field = collectionSchema.getField(fieldName);
|
|
|
+ Object expectedValue = expectedData.get(fieldName);
|
|
|
+ if (expectedValue == null) {
|
|
|
+ if (field.getDefaultValue() != null) {
|
|
|
+ expectedValue = field.getDefaultValue();
|
|
|
+ // for Int8/Int16 value, the default value is Short type, the returned value is Integer type
|
|
|
+ if (expectedValue instanceof Short) {
|
|
|
+ expectedValue = ((Short)expectedValue).intValue();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ Object fetchedValue = fetchedData.get(fieldName);
|
|
|
+ if (fetchedValue == null || fetchedValue instanceof JsonNull) {
|
|
|
+ if (!field.getIsNullable()) {
|
|
|
+ throw new RuntimeException("Field is not nullable but fetched data is null");
|
|
|
+ }
|
|
|
+ if (expectedValue != null) {
|
|
|
+ throw new RuntimeException("Expected value is not null but fetched data is null");
|
|
|
+ }
|
|
|
+ return; // both fetchedValue and expectedValue are null
|
|
|
+ }
|
|
|
+
|
|
|
+ boolean matched;
|
|
|
+ if (fetchedValue instanceof Float) {
|
|
|
+ matched = Math.abs((Float)fetchedValue - (Float)expectedValue) < 1e-4;
|
|
|
+ } else if (fetchedValue instanceof Double) {
|
|
|
+ matched = Math.abs((Double)fetchedValue - (Double)expectedValue) < 1e-8;
|
|
|
+ } else if (fetchedValue instanceof JsonElement) {
|
|
|
+ String ss = fetchedValue.toString();
|
|
|
+ matched = ss.equals(((String)expectedValue).replaceAll("\\s", "")); // compare ignore space
|
|
|
+ } else if (fetchedValue instanceof ByteBuffer) {
|
|
|
+ byte[] bb = ((ByteBuffer)fetchedValue).array();
|
|
|
+ matched = Arrays.equals(bb, (byte[])expectedValue);
|
|
|
+ } else if (fetchedValue instanceof List) {
|
|
|
+ matched = fetchedValue.equals(expectedValue);
|
|
|
+ // currently, for array field, null value, the server returns an empty list
|
|
|
+ if (((List<?>) fetchedValue).isEmpty() && expectedValue==null) {
|
|
|
+ matched = true;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ matched = fetchedValue.equals(expectedValue);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!matched) {
|
|
|
+ System.out.print("Fetched value:");
|
|
|
+ System.out.println(fetchedValue);
|
|
|
+ System.out.print("Expected value:");
|
|
|
+ System.out.println(expectedValue);
|
|
|
+ throw new RuntimeException("Fetched data is unmatched");
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private static void verifyImportData(CreateCollectionReq.CollectionSchema collectionSchema, List<Map<String, Object>> rows) {
|
|
|
createIndex();
|
|
|
|
|
|
+ List<Long> QUERY_IDS = Lists.newArrayList(1L, (long)rows.get(rows.size()-1).get("id"));
|
|
|
System.out.printf("Load collection and query items %s%n", QUERY_IDS);
|
|
|
loadCollection();
|
|
|
|
|
@@ -622,45 +755,38 @@ public class BulkWriterExample {
|
|
|
System.out.println(expr);
|
|
|
|
|
|
List<QueryResp.QueryResult> results = query(expr, Lists.newArrayList("*"));
|
|
|
- System.out.println("Query results:");
|
|
|
+ System.out.println("Verify data...");
|
|
|
for (QueryResp.QueryResult result : results) {
|
|
|
- Map<String, Object> entity = result.getEntity();
|
|
|
- JsonObject rowObject = new JsonObject();
|
|
|
- // scalar field
|
|
|
- rowObject.addProperty("id", (Long)entity.get("id"));
|
|
|
- rowObject.addProperty("bool", (Boolean) entity.get("bool"));
|
|
|
- rowObject.addProperty("int8", (Integer) entity.get("int8"));
|
|
|
- rowObject.addProperty("int16", (Integer) entity.get("int16"));
|
|
|
- rowObject.addProperty("int32", (Integer) entity.get("int32"));
|
|
|
- rowObject.addProperty("float", (Float) entity.get("float"));
|
|
|
- rowObject.addProperty("double", (Double) entity.get("double"));
|
|
|
- rowObject.addProperty("varchar", (String) entity.get("varchar"));
|
|
|
- rowObject.add("json", (JsonElement) entity.get("json"));
|
|
|
-
|
|
|
- // vector field
|
|
|
- rowObject.add("float_vector", GSON_INSTANCE.toJsonTree(entity.get("float_vector")));
|
|
|
- rowObject.add("binary_vector", GSON_INSTANCE.toJsonTree(((ByteBuffer)entity.get("binary_vector")).array()));
|
|
|
- rowObject.add("float16_vector", GSON_INSTANCE.toJsonTree(((ByteBuffer)entity.get("float16_vector")).array()));
|
|
|
- rowObject.add("sparse_vector", GSON_INSTANCE.toJsonTree(entity.get("sparse_vector")));
|
|
|
-
|
|
|
- // array field
|
|
|
- rowObject.add("array_bool", GSON_INSTANCE.toJsonTree(entity.get("array_bool")));
|
|
|
- rowObject.add("array_int8", GSON_INSTANCE.toJsonTree(entity.get("array_int8")));
|
|
|
- rowObject.add("array_int16", GSON_INSTANCE.toJsonTree(entity.get("array_int16")));
|
|
|
- rowObject.add("array_int32", GSON_INSTANCE.toJsonTree(entity.get("array_int32")));
|
|
|
- rowObject.add("array_int64", GSON_INSTANCE.toJsonTree(entity.get("array_int64")));
|
|
|
- rowObject.add("array_varchar", GSON_INSTANCE.toJsonTree(entity.get("array_varchar")));
|
|
|
- rowObject.add("array_float", GSON_INSTANCE.toJsonTree(entity.get("array_float")));
|
|
|
- rowObject.add("array_double", GSON_INSTANCE.toJsonTree(entity.get("array_double")));
|
|
|
-
|
|
|
- // dynamic field
|
|
|
- rowObject.addProperty("dynamic", (String) entity.get("dynamic"));
|
|
|
-
|
|
|
- System.out.println(rowObject);
|
|
|
+ Map<String, Object> fetchedEntity = result.getEntity();
|
|
|
+ long id = (Long)fetchedEntity.get("id");
|
|
|
+ Map<String, Object> originalEntity = rows.get((int)id);
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "bool");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "int8");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "int16");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "int32");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "float");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "double");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "varchar");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "json");
|
|
|
+
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "array_bool");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "array_int8");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "array_int16");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "array_int32");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "array_int64");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "array_varchar");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "array_float");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "array_double");
|
|
|
+
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "float_vector");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "binary_vector");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "float16_vector");
|
|
|
+ comparePrint(collectionSchema, originalEntity, fetchedEntity, "sparse_vector");
|
|
|
}
|
|
|
+ System.out.println("Result is correct!");
|
|
|
}
|
|
|
|
|
|
- private void createIndex() {
|
|
|
+ private static void createIndex() {
|
|
|
System.out.println("Create index...");
|
|
|
checkMilvusClientIfExist();
|
|
|
|
|
@@ -696,7 +822,7 @@ public class BulkWriterExample {
|
|
|
.build());
|
|
|
}
|
|
|
|
|
|
- private void loadCollection() {
|
|
|
+ private static void loadCollection() {
|
|
|
System.out.println("Refresh load collection...");
|
|
|
checkMilvusClientIfExist();
|
|
|
// RefreshLoad is a new interface from v2.5.3,
|
|
@@ -708,7 +834,7 @@ public class BulkWriterExample {
|
|
|
System.out.println("Collection row number: " + getCollectionRowCount());
|
|
|
}
|
|
|
|
|
|
- private List<QueryResp.QueryResult> query(String expr, List<String> outputFields) {
|
|
|
+ private static List<QueryResp.QueryResult> query(String expr, List<String> outputFields) {
|
|
|
System.out.println("========== query() ==========");
|
|
|
checkMilvusClientIfExist();
|
|
|
QueryReq test = QueryReq.builder()
|
|
@@ -720,7 +846,7 @@ public class BulkWriterExample {
|
|
|
return response.getQueryResults();
|
|
|
}
|
|
|
|
|
|
- private Long getCollectionRowCount() {
|
|
|
+ private static Long getCollectionRowCount() {
|
|
|
System.out.println("========== getCollectionRowCount() ==========");
|
|
|
checkMilvusClientIfExist();
|
|
|
|
|
@@ -758,28 +884,28 @@ public class BulkWriterExample {
|
|
|
System.out.println(listImportJobsResult);
|
|
|
}
|
|
|
|
|
|
- private CreateCollectionReq.CollectionSchema buildSimpleSchema() {
|
|
|
+ private static CreateCollectionReq.CollectionSchema buildSimpleSchema() {
|
|
|
CreateCollectionReq.CollectionSchema schemaV2 = CreateCollectionReq.CollectionSchema.builder()
|
|
|
.build();
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("id")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Int64)
|
|
|
+ .dataType(DataType.Int64)
|
|
|
.isPrimaryKey(Boolean.TRUE)
|
|
|
.autoID(true)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("path")
|
|
|
- .dataType(io.milvus.v2.common.DataType.VarChar)
|
|
|
+ .dataType(DataType.VarChar)
|
|
|
.maxLength(512)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("label")
|
|
|
- .dataType(io.milvus.v2.common.DataType.VarChar)
|
|
|
+ .dataType(DataType.VarChar)
|
|
|
.maxLength(512)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("vector")
|
|
|
- .dataType(io.milvus.v2.common.DataType.FloatVector)
|
|
|
+ .dataType(DataType.FloatVector)
|
|
|
.dimension(DIM)
|
|
|
.build());
|
|
|
|
|
@@ -793,120 +919,132 @@ public class BulkWriterExample {
|
|
|
// scalar field
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("id")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Int64)
|
|
|
+ .dataType(DataType.Int64)
|
|
|
.isPrimaryKey(Boolean.TRUE)
|
|
|
.autoID(false)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("bool")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Bool)
|
|
|
+ .dataType(DataType.Bool)
|
|
|
+ .isNullable(true)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("int8")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Int8)
|
|
|
+ .dataType(DataType.Int8)
|
|
|
+ .defaultValue((short)88)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("int16")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Int16)
|
|
|
+ .dataType(DataType.Int16)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("int32")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Int32)
|
|
|
+ .dataType(DataType.Int32)
|
|
|
+ .isNullable(true)
|
|
|
+ .defaultValue(999999)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("float")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Float)
|
|
|
+ .dataType(DataType.Float)
|
|
|
+ .isNullable(true)
|
|
|
+ .defaultValue((float)3.14159)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("double")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Double)
|
|
|
+ .dataType(DataType.Double)
|
|
|
+ .isNullable(true)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("varchar")
|
|
|
- .dataType(io.milvus.v2.common.DataType.VarChar)
|
|
|
+ .dataType(DataType.VarChar)
|
|
|
.maxLength(512)
|
|
|
+ .isNullable(true)
|
|
|
+ .defaultValue("this is default value")
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("json")
|
|
|
- .dataType(io.milvus.v2.common.DataType.JSON)
|
|
|
+ .dataType(DataType.JSON)
|
|
|
+ .isNullable(true)
|
|
|
.build());
|
|
|
|
|
|
// vector fields
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("float_vector")
|
|
|
- .dataType(io.milvus.v2.common.DataType.FloatVector)
|
|
|
+ .dataType(DataType.FloatVector)
|
|
|
.dimension(DIM)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("binary_vector")
|
|
|
- .dataType(io.milvus.v2.common.DataType.BinaryVector)
|
|
|
+ .dataType(DataType.BinaryVector)
|
|
|
.dimension(DIM)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("float16_vector")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Float16Vector)
|
|
|
+ .dataType(DataType.Float16Vector)
|
|
|
.dimension(DIM)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("sparse_vector")
|
|
|
- .dataType(io.milvus.v2.common.DataType.SparseFloatVector)
|
|
|
+ .dataType(DataType.SparseFloatVector)
|
|
|
.build());
|
|
|
|
|
|
// array fields
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("array_bool")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Array)
|
|
|
+ .dataType(DataType.Array)
|
|
|
.maxCapacity(ARRAY_CAPACITY)
|
|
|
- .elementType(io.milvus.v2.common.DataType.Bool)
|
|
|
+ .elementType(DataType.Bool)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("array_int8")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Array)
|
|
|
+ .dataType(DataType.Array)
|
|
|
.maxCapacity(ARRAY_CAPACITY)
|
|
|
- .elementType(io.milvus.v2.common.DataType.Int8)
|
|
|
+ .elementType(DataType.Int8)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("array_int16")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Array)
|
|
|
+ .dataType(DataType.Array)
|
|
|
.maxCapacity(ARRAY_CAPACITY)
|
|
|
- .elementType(io.milvus.v2.common.DataType.Int16)
|
|
|
+ .elementType(DataType.Int16)
|
|
|
+ .isNullable(true)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("array_int32")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Array)
|
|
|
+ .dataType(DataType.Array)
|
|
|
.maxCapacity(ARRAY_CAPACITY)
|
|
|
- .elementType(io.milvus.v2.common.DataType.Int32)
|
|
|
+ .elementType(DataType.Int32)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("array_int64")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Array)
|
|
|
+ .dataType(DataType.Array)
|
|
|
.maxCapacity(ARRAY_CAPACITY)
|
|
|
- .elementType(io.milvus.v2.common.DataType.Int64)
|
|
|
+ .elementType(DataType.Int64)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("array_varchar")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Array)
|
|
|
+ .dataType(DataType.Array)
|
|
|
.maxCapacity(ARRAY_CAPACITY)
|
|
|
- .elementType(io.milvus.v2.common.DataType.VarChar)
|
|
|
+ .elementType(DataType.VarChar)
|
|
|
.maxLength(512)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("array_float")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Array)
|
|
|
+ .dataType(DataType.Array)
|
|
|
.maxCapacity(ARRAY_CAPACITY)
|
|
|
- .elementType(io.milvus.v2.common.DataType.Float)
|
|
|
+ .elementType(DataType.Float)
|
|
|
.build());
|
|
|
schemaV2.addField(AddFieldReq.builder()
|
|
|
.fieldName("array_double")
|
|
|
- .dataType(io.milvus.v2.common.DataType.Array)
|
|
|
+ .dataType(DataType.Array)
|
|
|
.maxCapacity(ARRAY_CAPACITY)
|
|
|
- .elementType(io.milvus.v2.common.DataType.Double)
|
|
|
+ .elementType(DataType.Double)
|
|
|
+ .isNullable(true)
|
|
|
.build());
|
|
|
|
|
|
return schemaV2;
|
|
|
}
|
|
|
|
|
|
- private void checkMilvusClientIfExist() {
|
|
|
+ private static void checkMilvusClientIfExist() {
|
|
|
if (milvusClient == null) {
|
|
|
String msg = "milvusClient is null. Please initialize it by calling createConnection() first before use.";
|
|
|
throw new RuntimeException(msg);
|