Explorar o código

Support runAnalyzer() interface (#1394)

Signed-off-by: yhmo <yihua.mo@zilliz.com>
groot hai 2 semanas
pai
achega
a6d15614a3

+ 1 - 1
docker-compose.yml

@@ -77,7 +77,7 @@ services:
 
   standaloneslave:
     container_name: milvus-javasdk-test-slave-standalone
-    image: milvusdb/milvus:v2.5.8
+    image: milvusdb/milvus:v2.5.11
     command: ["milvus", "run", "standalone"]
     environment:
       ETCD_ENDPOINTS: etcdslave:2379

+ 2 - 2
pom.xml

@@ -99,13 +99,13 @@
         <maven.deploy.plugin.version>3.1.3</maven.deploy.plugin.version>
         <junit.platform.version>1.1.0</junit.platform.version>
         <junit.jupiter.engine.version>5.10.1</junit.jupiter.engine.version>
-        <gson.version>2.10.1</gson.version>
+        <gson.version>2.13.1</gson.version>
         <kotlin.version>1.9.10</kotlin.version>
         <mockito.version>4.11.0</mockito.version>
         <testcontainers.version>1.19.8</testcontainers.version>
         <apache.commons.pool2.version>2.12.0</apache.commons.pool2.version>
         <guava.version>32.1.3-jre</guava.version>
-        <errorprone.version>2.21.1</errorprone.version>
+        <errorprone.version>2.38.0</errorprone.version>
 
         <!--for BulkWriter-->
         <plexus.version>3.0.24</plexus.version>

+ 18 - 7
sdk-core/src/main/java/io/milvus/v2/client/MilvusClientV2.java

@@ -382,7 +382,7 @@ public class MilvusClientV2 {
     /**
      * Get information of all replicas from a collection.
      *
-     * @param request {@link DescribeReplicasReq}
+     * @param request describe replicas request
      */
     public DescribeReplicasResp describeReplicas(DescribeReplicasReq request) {
         return rpcUtils.retry(()->collectionService.describeReplicas(this.getRpcStub(), request));
@@ -528,8 +528,8 @@ public class MilvusClientV2 {
      * Get queryIterator based on scalar field(s) filtered by boolean expression.
      * Note that the order of the returned entities cannot be guaranteed.
      *
-     * @param request {@link QueryIteratorReq}
-     * @return {status:result code,data: QueryIterator}
+     * @param request query iterator request
+     * @return QueryIterator
      */
     public QueryIterator queryIterator(QueryIteratorReq request) {
         return rpcUtils.retry(()->vectorService.queryIterator(this.getRpcStub(), request));
@@ -538,8 +538,8 @@ public class MilvusClientV2 {
     /**
      * Get searchIterator based on a vector field. Use expression to do filtering before search.
      *
-     * @param request {@link SearchIteratorReq}
-     * @return {status:result code, data: SearchIterator}
+     * @param request search iterator request
+     * @return SearchIterator
      */
     public SearchIterator searchIterator(SearchIteratorReq request) {
         return rpcUtils.retry(()->vectorService.searchIterator(this.getRpcStub(), request));
@@ -548,13 +548,24 @@ public class MilvusClientV2 {
     /**
      * Get searchIteratorV2 based on a vector field. Use expression to do filtering before search.
      *
-     * @param request {@link SearchIteratorReqV2}
-     * @return {status:result code, data: SearchIteratorV2}
+     * @param request search iterator request V2
+     * @return SearchIteratorV2
      */
     public SearchIteratorV2 searchIteratorV2(SearchIteratorReqV2 request) {
         return rpcUtils.retry(()->vectorService.searchIteratorV2(this.getRpcStub(), request));
     }
 
+    /**
+     * Run analyzer. Return result tokens of analysis.
+     * Milvus server supports this interface from v2.5.11
+     *
+     * @param request run analyzer request
+     * @return RunAnalyzerResp
+     */
+    public RunAnalyzerResp runAnalyzer(RunAnalyzerReq request) {
+        return rpcUtils.retry(()->vectorService.runAnalyzer(this.getRpcStub(), request));
+    }
+
     /////////////////////////////////////////////////////////////////////////////////////////////
     // Partition Operations
     /////////////////////////////////////////////////////////////////////////////////////////////

+ 49 - 0
sdk-core/src/main/java/io/milvus/v2/service/vector/VectorService.java

@@ -19,7 +19,9 @@
 
 package io.milvus.v2.service.vector;
 
+import com.google.protobuf.ByteString;
 import io.milvus.common.utils.GTsDict;
+import io.milvus.common.utils.JsonUtils;
 import io.milvus.exception.ParamException;
 import io.milvus.grpc.*;
 import io.milvus.orm.iterator.*;
@@ -303,4 +305,51 @@ public class VectorService extends BaseService {
                 .getResults(queryResp.getQueryResults())
                 .build();
     }
+
+    public RunAnalyzerResp runAnalyzer(MilvusServiceGrpc.MilvusServiceBlockingStub blockingStub, RunAnalyzerReq request) {
+        String title = "RunAnalyzer";
+        if (request.getTexts().isEmpty()) {
+            throw new MilvusClientException(ErrorCode.INVALID_PARAMS, "Texts list is empty.");
+        }
+
+        RunAnalyzerRequest.Builder builder = RunAnalyzerRequest.newBuilder();
+        List<ByteString> byteStrings = new ArrayList<>();
+        for (String text : request.getTexts()) {
+            byteStrings.add(ByteString.copyFrom(text.getBytes()));
+        }
+
+        String params = JsonUtils.toJson(request.getAnalyzerParams());
+        System.out.println(params);
+        RunAnalyzerRequest runRequest = builder.addAllPlaceholder(byteStrings)
+                .setAnalyzerParams(params)
+                .setWithDetail(request.getWithDetail())
+                .setWithHash(request.getWithHash())
+                .build();
+        RunAnalyzerResponse response = blockingStub.runAnalyzer(runRequest);
+        rpcUtils.handleResponse(title, response.getStatus());
+
+        List<RunAnalyzerResp.AnalyzerResult> toResults = new ArrayList<>();
+        List<AnalyzerResult> results = response.getResultsList();
+        results.forEach((item)->{
+            List<RunAnalyzerResp.AnalyzerToken> toTokens = new ArrayList<>();
+            List<AnalyzerToken> tokens = item.getTokensList();
+            tokens.forEach((token)->{
+                toTokens.add(RunAnalyzerResp.AnalyzerToken.builder()
+                        .token(token.getToken())
+                        .startOffset(token.getStartOffset())
+                        .endOffset(token.getEndOffset())
+                        .position(token.getPosition())
+                        .positionLength(token.getPositionLength())
+                        .hash(token.getHash() & 0xFFFFFFFFL)
+                        .build());
+            });
+            toResults.add(RunAnalyzerResp.AnalyzerResult.builder()
+                    .tokens(toTokens)
+                    .build());
+        });
+
+        return RunAnalyzerResp.builder()
+                .results(toResults)
+                .build();
+    }
 }

+ 39 - 0
sdk-core/src/main/java/io/milvus/v2/service/vector/request/RunAnalyzerReq.java

@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package io.milvus.v2.service.vector.request;
+
+import lombok.Builder;
+import lombok.Data;
+import lombok.experimental.SuperBuilder;
+
+import java.util.*;
+
+@Data
+@SuperBuilder
+public class RunAnalyzerReq {
+    @Builder.Default
+    private List<String> texts = new ArrayList<>();
+    @Builder.Default
+    private Map<String, Object> analyzerParams = new HashMap<>();
+    @Builder.Default
+    private Boolean withDetail = Boolean.FALSE;
+    @Builder.Default
+    private Boolean withHash = Boolean.FALSE;
+}

+ 52 - 0
sdk-core/src/main/java/io/milvus/v2/service/vector/response/RunAnalyzerResp.java

@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package io.milvus.v2.service.vector.response;
+
+import lombok.Builder;
+import lombok.Data;
+import lombok.experimental.SuperBuilder;
+
+import java.util.ArrayList;
+import java.util.List;
+
+@Data
+@SuperBuilder
+public class RunAnalyzerResp {
+    @Builder.Default
+    List<AnalyzerResult> results = new ArrayList<>();
+
+    @Data
+    @SuperBuilder
+    public static final class AnalyzerResult {
+        @Builder.Default
+        List<AnalyzerToken> tokens = new ArrayList<>();
+    }
+
+    @Data
+    @SuperBuilder
+    public static final class AnalyzerToken {
+        private String token;
+        private Long startOffset;
+        private Long endOffset;
+        private Long position;
+        private Long positionLength;
+        private Long hash;
+    }
+}

+ 1 - 1
sdk-core/src/test/java/io/milvus/client/MilvusClientDockerTest.java

@@ -75,7 +75,7 @@ class MilvusClientDockerTest {
     private static final TestUtils utils = new TestUtils(DIMENSION);
 
     @Container
-    private static final MilvusContainer milvus = new MilvusContainer("milvusdb/milvus:v2.5.8");
+    private static final MilvusContainer milvus = new MilvusContainer("milvusdb/milvus:v2.5.11");
 
     @BeforeAll
     public static void setUp() {

+ 73 - 1
sdk-core/src/test/java/io/milvus/v2/client/MilvusClientV2DockerTest.java

@@ -81,7 +81,7 @@ class MilvusClientV2DockerTest {
     private static final TestUtils utils = new TestUtils(DIMENSION);
 
     @Container
-    private static final MilvusContainer milvus = new MilvusContainer("milvusdb/milvus:v2.5.8");
+    private static final MilvusContainer milvus = new MilvusContainer("milvusdb/milvus:v2.5.11");
 
     @BeforeAll
     public static void setUp() {
@@ -2377,4 +2377,76 @@ class MilvusClientV2DockerTest {
         Assertions.assertFalse(replica.getLeaderAddress().isEmpty());
         Assertions.assertNotEquals(0L, replica.getLeaderID());
     }
+
+    @Test
+    void testRunAnalyzer() {
+        List<String> texts = new ArrayList<>();
+        texts.add("Analyzers (tokenizers) for multi languages");
+        texts.add("2.5 to take advantage of enhancements and fixes!");
+
+        Map<String, Object> analyzerParams = new HashMap<>();
+        analyzerParams.put("tokenizer", "standard");
+        analyzerParams.put("filter",
+                Arrays.asList("lowercase",
+                        new HashMap<String, Object>() {{
+                            put("type", "stop");
+                            put("stop_words", Arrays.asList("to", "of", "for", "the"));
+                        }}));
+
+        RunAnalyzerResp resp = client.runAnalyzer(RunAnalyzerReq.builder()
+                .texts(texts)
+                .analyzerParams(analyzerParams)
+                .withDetail(true)
+                .withHash(true)
+                .build());
+
+        List<RunAnalyzerResp.AnalyzerResult> results = resp.getResults();
+        Assertions.assertEquals(texts.size(), results.size());
+
+        {
+            List<String> tokens1 = Arrays.asList("analyzers", "tokenizers", "multi", "languages");
+            List<Long> startOffset1 = Arrays.asList(0L, 11L, 27L, 33L);
+            List<Long> endOffset1 = Arrays.asList(9L, 21L, 32L, 42L);
+            List<Long> position1 = Arrays.asList(0L, 1L, 3L, 4L);
+            List<Long> positionLen1 = Arrays.asList(1L, 1L, 1L, 1L);
+            List<Long> hash1 = Arrays.asList(1356745679L, 4089107865L, 3314631429L, 2698072953L);
+
+            List<RunAnalyzerResp.AnalyzerToken> outTokens1 = results.get(0).getTokens();
+            System.out.printf("%d tokens%n", outTokens1.size());
+            Assertions.assertEquals(tokens1.size(), outTokens1.size());
+            for (int i = 0; i < outTokens1.size(); i++) {
+                RunAnalyzerResp.AnalyzerToken token = outTokens1.get(i);
+                System.out.println(token);
+                Assertions.assertEquals(tokens1.get(i), token.getToken());
+                Assertions.assertEquals(startOffset1.get(i), token.getStartOffset());
+                Assertions.assertEquals(endOffset1.get(i), token.getEndOffset());
+                Assertions.assertEquals(position1.get(i), token.getPosition());
+                Assertions.assertEquals(positionLen1.get(i), token.getPositionLength());
+                Assertions.assertEquals(hash1.get(i), token.getHash());
+            }
+        }
+
+        {
+            List<String> tokens2 = Arrays.asList("2", "5", "take", "advantage", "enhancements", "and", "fixes");
+            List<Long> startOffset2 = Arrays.asList(0L, 2L, 7L, 12L, 25L, 38L, 42L);
+            List<Long> endOffset2 = Arrays.asList(1L, 3L, 11L, 21L, 37L, 41L, 47L);
+            List<Long> position2 = Arrays.asList(0L, 1L, 3L, 4L, 6L, 7L, 8L);
+            List<Long> positionLen2 = Arrays.asList(1L, 1L, 1L, 1L, 1L, 1L, 1L);
+            List<Long> hash2 = Arrays.asList(450215437L, 2226203566L, 937258619L, 697180577L, 3403941281L, 133536621L, 488262645L);
+
+            List<RunAnalyzerResp.AnalyzerToken> outTokens2 = results.get(1).getTokens();
+            System.out.printf("%d tokens%n", outTokens2.size());
+            Assertions.assertEquals(tokens2.size(), outTokens2.size());
+            for (int i = 0; i < outTokens2.size(); i++) {
+                RunAnalyzerResp.AnalyzerToken token = outTokens2.get(i);
+                System.out.println(token);
+                Assertions.assertEquals(tokens2.get(i), token.getToken());
+                Assertions.assertEquals(startOffset2.get(i), token.getStartOffset());
+                Assertions.assertEquals(endOffset2.get(i), token.getEndOffset());
+                Assertions.assertEquals(position2.get(i), token.getPosition());
+                Assertions.assertEquals(positionLen2.get(i), token.getPositionLength());
+                Assertions.assertEquals(hash2.get(i), token.getHash());
+            }
+        }
+    }
 }