2
0

FullTextSearchExample.java 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. package io.milvus.v2;
  2. import com.google.gson.Gson;
  3. import com.google.gson.JsonObject;
  4. import io.milvus.common.clientenum.FunctionType;
  5. import io.milvus.v2.client.ConnectConfig;
  6. import io.milvus.v2.client.MilvusClientV2;
  7. import io.milvus.v2.common.ConsistencyLevel;
  8. import io.milvus.v2.common.DataType;
  9. import io.milvus.v2.common.IndexParam;
  10. import io.milvus.v2.service.collection.request.AddFieldReq;
  11. import io.milvus.v2.service.collection.request.CreateCollectionReq;
  12. import io.milvus.v2.service.collection.request.DropCollectionReq;
  13. import io.milvus.v2.service.collection.request.CreateCollectionReq.Function;
  14. import io.milvus.v2.service.vector.request.InsertReq;
  15. import io.milvus.v2.service.vector.request.QueryReq;
  16. import io.milvus.v2.service.vector.request.SearchReq;
  17. import io.milvus.v2.service.vector.request.data.EmbeddedText;
  18. import io.milvus.v2.service.vector.response.QueryResp;
  19. import io.milvus.v2.service.vector.response.SearchResp;
  20. import java.util.*;
  21. public class FullTextSearchExample {
  22. private static final String COLLECTION_NAME = "java_sdk_example_text_match_v2";
  23. private static final String ID_FIELD = "id";
  24. private static final String VECTOR_FIELD = "vector";
  25. private static void searchByText(MilvusClientV2 client, String text) {
  26. // The text is tokenized inside server and turned into a sparse embedding to compare with the vector field
  27. SearchResp searchResp = client.search(SearchReq.builder()
  28. .collectionName(COLLECTION_NAME)
  29. .data(Collections.singletonList(new EmbeddedText(text)))
  30. .topK(3)
  31. .outputFields(Collections.singletonList("text"))
  32. .build());
  33. System.out.println("\nSearch by text: " + text);
  34. List<List<SearchResp.SearchResult>> searchResults = searchResp.getSearchResults();
  35. for (List<SearchResp.SearchResult> results : searchResults) {
  36. for (SearchResp.SearchResult result : results) {
  37. System.out.printf("ID: %d, Score: %f, %s\n", (long)result.getId(), result.getScore(), result.getEntity().toString());
  38. }
  39. }
  40. System.out.println("=============================================================");
  41. }
  42. public static void main(String[] args) {
  43. ConnectConfig config = ConnectConfig.builder()
  44. .uri("http://localhost:19530")
  45. .build();
  46. MilvusClientV2 client = new MilvusClientV2(config);
  47. // Drop collection if exists
  48. client.dropCollection(DropCollectionReq.builder()
  49. .collectionName(COLLECTION_NAME)
  50. .build());
  51. // Create collection
  52. CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
  53. .build();
  54. schema.addField(AddFieldReq.builder()
  55. .fieldName(ID_FIELD)
  56. .dataType(DataType.Int64)
  57. .isPrimaryKey(true)
  58. .autoID(false)
  59. .build());
  60. schema.addField(AddFieldReq.builder()
  61. .fieldName("text")
  62. .dataType(DataType.VarChar)
  63. .maxLength(65535)
  64. .enableAnalyzer(true) // must enable this if you use Function
  65. .build());
  66. schema.addField(AddFieldReq.builder()
  67. .fieldName(VECTOR_FIELD)
  68. .dataType(DataType.SparseFloatVector)
  69. .build());
  70. // With this function, milvus will convert the strings of "text" field to sparse vectors of "vector" field
  71. // by built-in tokenizer and analyzer
  72. // Read the link for more info: https://milvus.io/docs/full-text-search.md
  73. schema.addFunction(Function.builder()
  74. .functionType(FunctionType.BM25)
  75. .name("function_bm25")
  76. .inputFieldNames(Collections.singletonList("text"))
  77. .outputFieldNames(Collections.singletonList(VECTOR_FIELD))
  78. .build());
  79. List<IndexParam> indexes = new ArrayList<>();
  80. indexes.add(IndexParam.builder()
  81. .fieldName(VECTOR_FIELD)
  82. .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)
  83. .metricType(IndexParam.MetricType.BM25) // to use full text search, metric type must be "BM25"
  84. .build());
  85. CreateCollectionReq requestCreate = CreateCollectionReq.builder()
  86. .collectionName(COLLECTION_NAME)
  87. .collectionSchema(schema)
  88. .indexParams(indexes)
  89. .consistencyLevel(ConsistencyLevel.BOUNDED)
  90. .build();
  91. client.createCollection(requestCreate);
  92. System.out.println("Collection created");
  93. // Insert rows
  94. Gson gson = new Gson();
  95. List<JsonObject> rows = Arrays.asList(
  96. gson.fromJson("{\"id\": 0, \"text\": \"Milvus is an open-source vector database\"}", JsonObject.class),
  97. gson.fromJson("{\"id\": 1, \"text\": \"AI applications help people better life\"}", JsonObject.class),
  98. gson.fromJson("{\"id\": 2, \"text\": \"Will the electric car replace gas-powered car?\"}", JsonObject.class),
  99. gson.fromJson("{\"id\": 3, \"text\": \"LangChain is a composable framework to build with LLMs. Milvus is integrated into LangChain.\"}", JsonObject.class),
  100. gson.fromJson("{\"id\": 4, \"text\": \"RAG is the process of optimizing the output of a large language model\"}", JsonObject.class),
  101. gson.fromJson("{\"id\": 5, \"text\": \"Newton is one of the greatest scientist of human history\"}", JsonObject.class),
  102. gson.fromJson("{\"id\": 6, \"text\": \"Metric type L2 is Euclidean distance\"}", JsonObject.class),
  103. gson.fromJson("{\"id\": 7, \"text\": \"Embeddings represent real-world objects, like words, images, or videos, in a form that computers can process.\"}", JsonObject.class),
  104. gson.fromJson("{\"id\": 8, \"text\": \"The moon is 384,400 km distance away from earth\"}", JsonObject.class),
  105. gson.fromJson("{\"id\": 9, \"text\": \"Milvus supports L2 distance and IP similarity for float vector.\"}", JsonObject.class)
  106. );
  107. client.insert(InsertReq.builder()
  108. .collectionName(COLLECTION_NAME)
  109. .data(rows)
  110. .build());
  111. // Get row count, set ConsistencyLevel.STRONG to sync the data to query node so that data is visible
  112. QueryResp countR = client.query(QueryReq.builder()
  113. .collectionName(COLLECTION_NAME)
  114. .filter("")
  115. .outputFields(Collections.singletonList("count(*)"))
  116. .consistencyLevel(ConsistencyLevel.STRONG)
  117. .build());
  118. System.out.printf("%d rows in collection\n", (long)countR.getQueryResults().get(0).getEntity().get("count(*)"));
  119. // Query by filtering expression
  120. searchByText(client, "moon and earth distance");
  121. searchByText(client, "Milvus vector database");
  122. client.close();
  123. }
  124. }