TextMatchExample.java 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. package io.milvus.v2;
  2. import com.google.gson.Gson;
  3. import com.google.gson.JsonObject;
  4. import io.milvus.v1.CommonUtils;
  5. import io.milvus.v2.client.ConnectConfig;
  6. import io.milvus.v2.client.MilvusClientV2;
  7. import io.milvus.v2.common.ConsistencyLevel;
  8. import io.milvus.v2.common.DataType;
  9. import io.milvus.v2.common.IndexParam;
  10. import io.milvus.v2.service.collection.request.AddFieldReq;
  11. import io.milvus.v2.service.collection.request.CreateCollectionReq;
  12. import io.milvus.v2.service.collection.request.DropCollectionReq;
  13. import io.milvus.v2.service.vector.request.InsertReq;
  14. import io.milvus.v2.service.vector.request.QueryReq;
  15. import io.milvus.v2.service.vector.request.SearchReq;
  16. import io.milvus.v2.service.vector.request.data.FloatVec;
  17. import io.milvus.v2.service.vector.response.QueryResp;
  18. import io.milvus.v2.service.vector.response.SearchResp;
  19. import java.util.*;
  20. public class TextMatchExample {
  21. private static final String COLLECTION_NAME = "java_sdk_example_text_match_v2";
  22. private static final String ID_FIELD = "id";
  23. private static final String VECTOR_FIELD = "vector";
  24. private static final Integer VECTOR_DIM = 128;
  25. private static void queryWithFilter(MilvusClientV2 client, String filter) {
  26. QueryResp queryRet = client.query(QueryReq.builder()
  27. .collectionName(COLLECTION_NAME)
  28. .filter(filter)
  29. .outputFields(Collections.singletonList("text"))
  30. .build());
  31. System.out.println("\nQuery with filter: " + filter);
  32. List<QueryResp.QueryResult> records = queryRet.getQueryResults();
  33. for (QueryResp.QueryResult record : records) {
  34. System.out.println(record.getEntity());
  35. }
  36. System.out.printf("%d items matched%n", records.size());
  37. System.out.println("=============================================================");
  38. }
  39. private static void searchWithFilter(MilvusClientV2 client, String filter) {
  40. SearchResp searchResp = client.search(SearchReq.builder()
  41. .collectionName(COLLECTION_NAME)
  42. .data(Collections.singletonList(new FloatVec(CommonUtils.generateFloatVector(VECTOR_DIM))))
  43. .filter(filter)
  44. .topK(10)
  45. .outputFields(Collections.singletonList("text"))
  46. .build());
  47. System.out.println("\nSearch by filter: " + filter);
  48. List<List<SearchResp.SearchResult>> searchResults = searchResp.getSearchResults();
  49. for (List<SearchResp.SearchResult> results : searchResults) {
  50. for (SearchResp.SearchResult result : results) {
  51. System.out.printf("ID: %d, Score: %f, %s\n", (long)result.getId(), result.getScore(), result.getEntity().toString());
  52. }
  53. }
  54. System.out.println("=============================================================");
  55. }
  56. public static void main(String[] args) {
  57. ConnectConfig config = ConnectConfig.builder()
  58. .uri("http://localhost:19530")
  59. .build();
  60. MilvusClientV2 client = new MilvusClientV2(config);
  61. // Drop collection if exists
  62. client.dropCollection(DropCollectionReq.builder()
  63. .collectionName(COLLECTION_NAME)
  64. .build());
  65. // Create collection
  66. CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder()
  67. .build();
  68. collectionSchema.addField(AddFieldReq.builder()
  69. .fieldName(ID_FIELD)
  70. .dataType(DataType.Int64)
  71. .isPrimaryKey(true)
  72. .autoID(false)
  73. .build());
  74. collectionSchema.addField(AddFieldReq.builder()
  75. .fieldName(VECTOR_FIELD)
  76. .dataType(DataType.FloatVector)
  77. .dimension(VECTOR_DIM)
  78. .build());
  79. Map<String, Object> analyzerParams = new HashMap<>();
  80. analyzerParams.put("type", "english");
  81. collectionSchema.addField(AddFieldReq.builder()
  82. .fieldName("text")
  83. .dataType(DataType.VarChar)
  84. .maxLength(1000)
  85. .enableAnalyzer(true)
  86. .analyzerParams(analyzerParams)
  87. .enableMatch(true) // must enable this if you use TextMatch
  88. .build());
  89. List<IndexParam> indexes = new ArrayList<>();
  90. indexes.add(IndexParam.builder()
  91. .fieldName(VECTOR_FIELD)
  92. .indexType(IndexParam.IndexType.FLAT)
  93. .metricType(IndexParam.MetricType.L2)
  94. .build());
  95. CreateCollectionReq requestCreate = CreateCollectionReq.builder()
  96. .collectionName(COLLECTION_NAME)
  97. .collectionSchema(collectionSchema)
  98. .indexParams(indexes)
  99. .consistencyLevel(ConsistencyLevel.BOUNDED)
  100. .build();
  101. client.createCollection(requestCreate);
  102. System.out.println("Collection created");
  103. // Insert rows
  104. Gson gson = new Gson();
  105. List<JsonObject> rows = Arrays.asList(
  106. gson.fromJson("{\"id\": 0, \"text\": \"Milvus is an open-source vector database\"}", JsonObject.class),
  107. gson.fromJson("{\"id\": 1, \"text\": \"AI applications help people better life\"}", JsonObject.class),
  108. gson.fromJson("{\"id\": 2, \"text\": \"Will the electric car replace gas-powered car?\"}", JsonObject.class),
  109. gson.fromJson("{\"id\": 3, \"text\": \"LangChain is a composable framework to build with LLMs. Milvus is integrated into LangChain.\"}", JsonObject.class),
  110. gson.fromJson("{\"id\": 4, \"text\": \"RAG is the process of optimizing the output of a large language model\"}", JsonObject.class),
  111. gson.fromJson("{\"id\": 5, \"text\": \"Newton is one of the greatest scientist of human history\"}", JsonObject.class),
  112. gson.fromJson("{\"id\": 6, \"text\": \"Metric type L2 is Euclidean distance\"}", JsonObject.class),
  113. gson.fromJson("{\"id\": 7, \"text\": \"Embeddings represent real-world objects, like words, images, or videos, in a form that computers can process.\"}", JsonObject.class),
  114. gson.fromJson("{\"id\": 8, \"text\": \"The moon is 384,400 km distance away from earth\"}", JsonObject.class),
  115. gson.fromJson("{\"id\": 9, \"text\": \"Milvus supports L2 distance and IP similarity for float vector.\"}", JsonObject.class)
  116. );
  117. // TextMatch is keyword filtering, here we just fill the vector field by random vectors
  118. for (JsonObject obj : rows) {
  119. obj.add(VECTOR_FIELD, gson.toJsonTree(CommonUtils.generateFloatVector(VECTOR_DIM)));
  120. }
  121. client.insert(InsertReq.builder()
  122. .collectionName(COLLECTION_NAME)
  123. .data(rows)
  124. .build());
  125. // Get row count, set ConsistencyLevel.STRONG to sync the data to query node so that data is visible
  126. QueryResp countR = client.query(QueryReq.builder()
  127. .collectionName(COLLECTION_NAME)
  128. .filter("")
  129. .outputFields(Collections.singletonList("count(*)"))
  130. .consistencyLevel(ConsistencyLevel.STRONG)
  131. .build());
  132. System.out.printf("%d rows in collection\n", (long)countR.getQueryResults().get(0).getEntity().get("count(*)"));
  133. // Query by keyword filtering expression
  134. queryWithFilter(client, "TEXT_MATCH(text, \"distance\")");
  135. queryWithFilter(client, "TEXT_MATCH(text, \"Milvus\") or TEXT_MATCH(text, \"distance\")");
  136. queryWithFilter(client, "TEXT_MATCH(text, \"Euclidean\") and TEXT_MATCH(text, \"distance\")");
  137. // Search by keyword filtering expression
  138. searchWithFilter(client, "TEXT_MATCH(text, \"distance\")");
  139. searchWithFilter(client, "TEXT_MATCH(text, \"Euclidean distance\")");
  140. searchWithFilter(client, "TEXT_MATCH(text, \"vector database\")");
  141. client.close();
  142. }
  143. }