Просмотр исходного кода

New metric type for struct vectors (#1642)

Signed-off-by: yhmo <yihua.mo@zilliz.com>
groot 3 месяцев назад
Родитель
Сommit
a38d4cd3a8

+ 2 - 2
docker-compose.yml

@@ -3,7 +3,7 @@ version: '3.5'
 services:
   standalone:
     container_name: milvus-javasdk-standalone-1
-    image: milvusdb/milvus:master-20250929-ca1cc7c9-amd64
+    image: milvusdb/milvus:2.6-20251015-bb4446e5-amd64
     command: [ "milvus", "run", "standalone" ]
     environment:
       - COMMON_STORAGETYPE=local
@@ -24,7 +24,7 @@ services:
 
   standaloneslave:
     container_name: milvus-javasdk-standalone-2
-    image: milvusdb/milvus:master-20250929-ca1cc7c9-amd64
+    image: milvusdb/milvus:2.6-20251015-bb4446e5-amd64
     command: [ "milvus", "run", "standalone" ]
     environment:
       - COMMON_STORAGETYPE=local

+ 8 - 5
sdk-core/src/main/java/io/milvus/v2/common/IndexParam.java

@@ -53,8 +53,14 @@ public class IndexParam {
         // Only for sparse vector with BM25
         BM25,
 
-        // Only for struct vector
-        MAX_SIM,
+        // Only for float vector inside struct
+        MAX_SIM, // equal to MAX_SIM_COSINE
+        MAX_SIM_COSINE,
+        MAX_SIM_IP,
+        MAX_SIM_L2,
+        // Only for binary vector inside struct
+        MAX_SIM_JACCARD,
+        MAX_SIM_HAMMING,
         ;
     }
 
@@ -105,9 +111,6 @@ public class IndexParam {
         // From Milvus 2.5.4 onward, SPARSE_WAND is being deprecated. Instead, it is recommended to
         // use "inverted_index_algo": "DAAT_WAND" for equivalency while maintaining compatibility.
         SPARSE_WAND(301),
-
-        // Only for struct vector
-        EMB_LIST_HNSW(401),
         ;
 
         private final String name;

+ 18 - 11
sdk-core/src/main/java/io/milvus/v2/utils/DataUtils.java

@@ -371,10 +371,19 @@ public class DataUtils {
     @SuppressWarnings("unchecked")
     public static VectorArray genVectorArray(DataType dataType, List<?> objects) {
         VectorArray.Builder builder = VectorArray.newBuilder().setElementType(dataType);
-        if (dataType == DataType.FloatVector) {
-            // each object is List<List<Float>>
-            for (Object object : objects) {
-                if (object instanceof List) {
+        switch (dataType) {
+            case FloatVector:
+            case BinaryVector:
+            case Float16Vector:
+            case BFloat16Vector:
+            case Int8Vector: {
+                // for FloatVector, objects is List<List<List<Float>>>
+                // for others, objects is List<List<List<ByteBuffer>>>
+                for (Object object : objects) {
+                    if (!(object instanceof List)) {
+                        throw new MilvusClientException(ErrorCode.INVALID_PARAMS, "Input value is not List<> for type: " + dataType.name());
+                    }
+
                     List<?> listOfList = (List<?>) object;
                     if (listOfList.isEmpty()) {
                         // struct field value is empty, fill the VectorArray with zero-dim vectors?
@@ -394,16 +403,14 @@ public class DataUtils {
                         throw new MilvusClientException(ErrorCode.INVALID_PARAMS, msg);
                     }
                     builder.addData(vf);
-                } else {
-                    throw new MilvusClientException(ErrorCode.INVALID_PARAMS, "The type of FloatVector must be List<>");
                 }
+                return builder.build();
             }
-
-            return builder.build();
+            default:
+                // so far, struct field only supports FloatVector/BinaryVector/Float16Vector/BFloat16Vector/Int8Vector
+                String msg = String.format("Illegal vector dataType %s for struct field", dataType.name());
+                throw new MilvusClientException(ErrorCode.INVALID_PARAMS, msg);
         }
-        // so far, struct field only supports FloatVector
-        String msg = String.format("Illegal vector dataType %s for struct field", dataType.name());
-        throw new MilvusClientException(ErrorCode.INVALID_PARAMS, msg);
     }
 
     public DeleteRequest ConvertToGrpcDeleteRequest(DeleteReq request) {

+ 1 - 1
sdk-core/src/test/java/io/milvus/TestUtils.java

@@ -11,7 +11,7 @@ public class TestUtils {
     private int dimension = 256;
     private static final Random RANDOM = new Random();
 
-    public static final String MilvusDockerImageID = "milvusdb/milvus:master-20250929-ca1cc7c9-amd64";
+    public static final String MilvusDockerImageID = "milvusdb/milvus:2.6-20251015-bb4446e5-amd64";
 
     public TestUtils(int dimension) {
         this.dimension = dimension;

+ 28 - 11
sdk-core/src/test/java/io/milvus/v2/client/MilvusClientV2DockerTest.java

@@ -1028,6 +1028,7 @@ class MilvusClientV2DockerTest {
         String structField = "clips";
         String structScalarField = "desc";
         String structVectorField = "clip";
+        String structBinVectorField = "clip_bin";
         int structCapacity = 300;
         int varcharLength = 100;
         CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder()
@@ -1065,6 +1066,12 @@ class MilvusClientV2DockerTest {
                         .dataType(DataType.FloatVector)
                         .dimension(DIMENSION)
                         .build())
+//                .addStructField(AddFieldReq.builder()
+//                        .fieldName(structBinVectorField)
+//                        .description("dummy")
+//                        .dataType(DataType.BinaryVector)
+//                        .dimension(DIMENSION)
+//                        .build())
                 .build());
 
         client.dropCollection(DropCollectionReq.builder()
@@ -1084,10 +1091,17 @@ class MilvusClientV2DockerTest {
                 .metricType(IndexParam.MetricType.COSINE)
                 .build());
         indexParams.add(IndexParam.builder()
-                .fieldName(structVectorField)
-                .indexType(IndexParam.IndexType.EMB_LIST_HNSW)
-                .metricType(IndexParam.MetricType.MAX_SIM)
-                .build());
+                .fieldName("clips[clip]")
+                .indexName("index1")
+                .indexType(IndexParam.IndexType.HNSW)
+                .metricType(IndexParam.MetricType.MAX_SIM_L2)
+                .build());
+//        indexParams.add(IndexParam.builder()
+//                .fieldName("clips[clip_bin]")
+//                .indexName("index2")
+//                .indexType(IndexParam.IndexType.AUTOINDEX)
+//                .metricType(IndexParam.MetricType.MAX_SIM_HAMMING)
+//                .build());
         client.createIndex(CreateIndexReq.builder()
                 .collectionName(randomCollectionName)
                 .indexParams(indexParams)
@@ -1125,11 +1139,12 @@ class MilvusClientV2DockerTest {
         DescribeIndexResp indexDesc = client.describeIndex(DescribeIndexReq.builder()
                 .collectionName(randomCollectionName)
                 .fieldName(structVectorField)
+                .indexName("index1")
                 .build());
         Assertions.assertEquals(1, indexDesc.getIndexDescriptions().size());
         DescribeIndexResp.IndexDesc desc = indexDesc.getIndexDescriptions().get(0);
-        Assertions.assertEquals(IndexParam.IndexType.EMB_LIST_HNSW, desc.getIndexType());
-        Assertions.assertEquals(IndexParam.MetricType.MAX_SIM, desc.getMetricType());
+        Assertions.assertEquals(IndexParam.IndexType.HNSW, desc.getIndexType());
+        Assertions.assertEquals(IndexParam.MetricType.MAX_SIM_L2, desc.getMetricType());
 
         // insert
         List<JsonObject> rows = new ArrayList<>();
@@ -1144,6 +1159,7 @@ class MilvusClientV2DockerTest {
                 JsonObject struct = new JsonObject();
                 struct.addProperty(structScalarField, "No." + k);
                 struct.add(structVectorField, JsonUtils.toJsonTree(utils.generateFloatVector()));
+//                struct.add(structBinVectorField, JsonUtils.toJsonTree(utils.generateBinaryVector(DIMENSION).array()));
                 structArr.add(struct);
             }
             row.add(structField, structArr);
@@ -1166,6 +1182,7 @@ class MilvusClientV2DockerTest {
             JsonObject struct = new JsonObject();
             struct.addProperty(structScalarField, "updated_No." + k);
             struct.add(structVectorField, JsonUtils.toJsonTree(utils.generateFloatVector()));
+//            struct.add(structBinVectorField, JsonUtils.toJsonTree(utils.generateBinaryVector(DIMENSION).array()));
             structArr.add(struct);
         }
         row.add(structField, structArr);
@@ -1190,14 +1207,14 @@ class MilvusClientV2DockerTest {
         Assertions.assertTrue(queryResults.get(1).getEntity().containsKey(structField));
 
         // search
-        List<Map<String, Object>> structs0 = (List<Map<String, Object>>)queryResults.get(0).getEntity().get(structField);
         EmbeddingList embList0 = new EmbeddingList();
+        EmbeddingList embList1 = new EmbeddingList();
+
+        List<Map<String, Object>> structs0 = (List<Map<String, Object>>)queryResults.get(0).getEntity().get(structField);
         for (Map<String, Object> struct : structs0) {
             embList0.add(new FloatVec((List<Float>)struct.get(structVectorField)));
         }
-
         List<Map<String, Object>> structs1 = (List<Map<String, Object>>)queryResults.get(1).getEntity().get(structField);
-        EmbeddingList embList1 = new EmbeddingList();
         for (Map<String, Object> struct : structs1) {
             embList1.add(new FloatVec((List<Float>)struct.get(structVectorField)));
         }
@@ -1205,10 +1222,10 @@ class MilvusClientV2DockerTest {
         int topK = 5;
         SearchResp searchResp = client.search(SearchReq.builder()
                 .collectionName(randomCollectionName)
-                .annsField(structVectorField)
+                .annsField("clips[clip]")
                 .data(Arrays.asList(embList0, embList1))
                 .limit(topK)
-                .outputFields(Collections.singletonList(structScalarField))
+                .outputFields(Collections.singletonList("clips[desc]"))
                 .build());
         List<List<SearchResp.SearchResult>> searchResults = searchResp.getSearchResults();
         Assertions.assertEquals(2, searchResults.size());