MilvusBasicExample.java 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing,
  13. * software distributed under the License is distributed on an
  14. * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. * KIND, either express or implied. See the License for the
  16. * specific language governing permissions and limitations
  17. * under the License.
  18. */
  19. import com.google.common.util.concurrent.Futures;
  20. import com.google.common.util.concurrent.ListenableFuture;
  21. import io.milvus.client.*;
  22. import java.util.ArrayList;
  23. import java.util.Arrays;
  24. import java.util.List;
  25. import java.util.Map;
  26. import java.util.Map.Entry;
  27. import java.util.SplittableRandom;
  28. import java.util.stream.Collectors;
  29. import java.util.stream.DoubleStream;
  30. import org.json.JSONObject;
  31. /**
  32. * This is a simple example demonstrating how to use Milvus Java SDK v0.9.2. For detailed API
  33. * documentation, please refer to
  34. * https://milvus-io.github.io/milvus-sdk-java/javadoc/io/milvus/client/package-summary.html You can
  35. * also find more information on https://milvus.io/docs/overview.md
  36. */
  37. public class MilvusBasicExample {
  38. // Helper function that generates random float vectors
  39. private static List<List<Float>> randomFloatVectors(int vectorCount, int dimension) {
  40. SplittableRandom splitCollectionRandom = new SplittableRandom();
  41. List<List<Float>> vectors = new ArrayList<>(vectorCount);
  42. for (int i = 0; i < vectorCount; ++i) {
  43. splitCollectionRandom = splitCollectionRandom.split();
  44. DoubleStream doubleStream = splitCollectionRandom.doubles(dimension);
  45. List<Float> vector =
  46. doubleStream.boxed().map(Double::floatValue).collect(Collectors.toList());
  47. vectors.add(vector);
  48. }
  49. return vectors;
  50. }
  51. public static void main(String[] args) {
  52. try {
  53. run();
  54. } catch (Exception e) {
  55. e.printStackTrace();
  56. }
  57. }
  58. public static void run() {
  59. /*
  60. * Setup:
  61. * First of all, you need a running Milvus server (v0.11.0). By default, Milvus runs on
  62. * localhost in port 19530. There are various configurations that you can set for
  63. * ConnectParam. Refer to JavaDoc for more information.
  64. *
  65. * You can use `withLogging()` for `client` to enable logging framework.
  66. */
  67. ConnectParam connectParam =
  68. new ConnectParam.Builder().withHost("127.0.0.1").withPort(19530).build();
  69. MilvusClient client = new MilvusGrpcClient(connectParam);
  70. /*
  71. * Basic create collection:
  72. * You now have a Milvus instance running and client connected to the server.
  73. * The first thing we will do is to create a collection `demo_films`. If we already had a
  74. * collection with the same name, we need to drop it before creating again.
  75. */
  76. final String collectionName = "demo_films";
  77. if (client.listCollections().contains(collectionName)) {
  78. client.dropCollection(collectionName);
  79. }
  80. /*
  81. * Basic create collection:
  82. * We will create a collection with three fields: film duration, release_year and an
  83. * embedding which is essentially a float vector.
  84. *
  85. * CollectionMapping will be used to create a collection. When adding vector fields, the
  86. * dimension must be specified. `auto_id` is set to false so we can provide custom ids.
  87. */
  88. final int dimension = 8;
  89. CollectionMapping collectionMapping =
  90. CollectionMapping.create(collectionName)
  91. .addField("duration", DataType.INT32)
  92. .addField("release_year", DataType.INT64)
  93. .addVectorField("embedding", DataType.VECTOR_FLOAT, dimension)
  94. .setParamsInJson("{\"segment_row_limit\": 4096, \"auto_id\": false}");
  95. client.createCollection(collectionMapping);
  96. // Check the existence of collection
  97. if (!client.hasCollection(collectionName)) {
  98. throw new AssertionError("Collection not found!");
  99. }
  100. /*
  101. * Basic create partition:
  102. * We can create partitions in a collection. Here we create a partition called "American"
  103. * since the films we insert will be American.
  104. */
  105. final String partitionTag = "American";
  106. client.createPartition(collectionName, partitionTag);
  107. // Check the existence of partition
  108. if (!client.hasPartition(collectionName, partitionTag)) {
  109. throw new AssertionError("Partition not found!");
  110. }
  111. // You can now get information about the collection and partition created.
  112. System.out.println("\n--------Get Collection Info--------");
  113. CollectionMapping collectionInfo = client.getCollectionInfo(collectionName);
  114. System.out.println(collectionInfo.toString());
  115. System.out.println("\n--------Get Partition List--------");
  116. List<String> partitions = client.listPartitions(collectionName);
  117. System.out.println(partitions);
  118. /*
  119. * Basic insert:
  120. * We will insert three films of The_Lord_of_the_Rings series with their id, duration,
  121. * release year and fake embeddings. When inserting entities into Milvus, values from
  122. * the same field should be grouped together to create InsertParam. We also wish to
  123. * insert them into the partition "American".
  124. *
  125. * The titles and relative film properties are listed below for your reference.
  126. */
  127. List<Long> ids = new ArrayList<>(Arrays.asList(1L, 2L, 3L));
  128. List<String> titles =
  129. Arrays.asList("The_Fellowship_of_the_Ring", "The_Two_Towers", "The_Return_of_the_King");
  130. List<Integer> durations = new ArrayList<>(Arrays.asList(208, 226, 252));
  131. List<Long> releaseYears = new ArrayList<>(Arrays.asList(2001L, 2002L, 2003L));
  132. List<List<Float>> embeddings = randomFloatVectors(3, dimension);
  133. InsertParam insertParam =
  134. InsertParam.create(collectionName)
  135. .addField("duration", DataType.INT32, durations)
  136. .addField("release_year", DataType.INT64, releaseYears)
  137. .addVectorField("embedding", DataType.VECTOR_FLOAT, embeddings)
  138. .setEntityIds(ids)
  139. .setPartitionTag(partitionTag);
  140. System.out.println("\n--------Insert Entities--------");
  141. List<Long> entityIds = client.insert(insertParam);
  142. System.out.println(entityIds);
  143. /*
  144. * Basic insert:
  145. * After inserting entities into the collection, we need to perform flush to make sure the
  146. * data is on disk. Then we are able to retrieve it.
  147. */
  148. long beforeEntityCount = client.countEntities(collectionName);
  149. client.flush(collectionName);
  150. long afterEntityCount = client.countEntities(collectionName);
  151. System.out.println("\n--------Flush Collection--------");
  152. System.out.printf("There are %d films in the collection before flush.\n", beforeEntityCount);
  153. System.out.printf("There are %d films in the collection after flush.\n", afterEntityCount);
  154. // We can get the detail of collection statistics.
  155. System.out.println("\n--------Collection Stats--------");
  156. JSONObject json = new JSONObject(client.getCollectionStats(collectionName));
  157. System.out.println(json.toString(4));
  158. /*
  159. * Basic search entities:
  160. * Now that we have 3 films inserted into our collection, it's time to obtain them.
  161. * We can get films by ids, and invalid/non-existent ids will be ignored.
  162. * In the case below, we will only films with ids 1 and 2.
  163. */
  164. List<Long> queryIds = new ArrayList<>(Arrays.asList(1L, 2L, 10L, 2333L));
  165. Map<Long, Map<String, Object>> entities = client.getEntityByID(collectionName, queryIds);
  166. System.out.println("\n--------Get Entity By ID--------");
  167. for (Entry<Long, Map<String, Object>> entry : entities.entrySet()) {
  168. Long id = entry.getKey();
  169. Map<String, Object> val = entry.getValue();
  170. System.out.printf(
  171. " > id: %d,\n > duration: %smin,\n > release_year: %s,\n > embedding: %s\n\n",
  172. id, val.get("duration"), val.get("release_year"), val.get("embedding").toString());
  173. }
  174. /*
  175. * Basic hybrid search:
  176. * Getting films by id is not enough, we are going to get films based on vector similarities.
  177. * Let's say we have a film with its `embedding` and we want to find `top3` films that are
  178. * most similar to it by L2 metric_type (Euclidean Distance).
  179. *
  180. * In addition to vector similarities, we also want to filter films such that:
  181. * - `released year` is 2002 or 2003,
  182. * - `duration` larger than 250 minutes.
  183. *
  184. * There will be only one film that satisfies our filters, namely "The_Return_of_the_King".
  185. *
  186. * Milvus provides Query DSL(Domain Specific Language) to support structured data filtering
  187. * in queries. This includes `term`, `range` and `vector` queries. For more information about
  188. * DSL statements, please refer to Milvus documentation for more details.
  189. */
  190. List<List<Float>> queryEmbedding = randomFloatVectors(1, dimension);
  191. final long topK = 3;
  192. String dsl =
  193. String.format(
  194. "{\"bool\": {"
  195. + "\"must\": [{"
  196. + " \"range\": {"
  197. + " \"duration\": {\"GT\": 250}" // "GT" for greater than
  198. + " }},{"
  199. + " \"term\": {"
  200. + " \"release_year\": %s" // "term" is a list
  201. + " }},{"
  202. + " \"vector\": {"
  203. + " \"embedding\": {"
  204. + " \"topk\": %d, \"metric_type\": \"L2\", \"type\": \"float\", \"query\": %s"
  205. + " }}}]}}",
  206. releaseYears.subList(1, 3).toString(), topK, queryEmbedding.toString());
  207. // Only specified fields in `setParamsInJson` will be returned from search request.
  208. // If not set, all fields will be returned.
  209. SearchParam searchParam =
  210. SearchParam.create(collectionName)
  211. .setDsl(dsl)
  212. .setParamsInJson("{\"fields\": [\"duration\", \"release_year\", \"embedding\"]}");
  213. System.out.println("\n--------Search Result--------");
  214. SearchResult searchResult = client.search(searchParam);
  215. System.out.println("- ids: " + searchResult.getResultIdsList().toString());
  216. System.out.println("- distances: " + searchResult.getResultDistancesList().toString());
  217. for (List<Map<String, Object>> singleQueryResult : searchResult.getFieldsMap()) {
  218. // We only have 1 film returned
  219. for (Map<String, Object> res : singleQueryResult) {
  220. System.out.println("- release_year: " + res.get("release_year"));
  221. System.out.println("- duration: " + res.get("duration"));
  222. System.out.println("- embedding: " + res.get("embedding"));
  223. }
  224. }
  225. /*
  226. * Basic hybrid search:
  227. * You can send search request asynchronously, which returns a ListenableFuture object.
  228. */
  229. ListenableFuture<SearchResult> searchResponseFuture = client.searchAsync(searchParam);
  230. Futures.getUnchecked(searchResponseFuture);
  231. /*
  232. * Basic delete:
  233. * Now let's see how to delete entities in Milvus.
  234. * You can simply delete entities by their ids. Here we delete the first two films.
  235. * After deleting, it is obvious that `getEntityByID` should return an empty map.
  236. */
  237. client.deleteEntityByID(collectionName, ids.subList(0, 2));
  238. client.flush(collectionName);
  239. entities = client.getEntityByID(collectionName, queryIds);
  240. if (!entities.isEmpty()) {
  241. throw new AssertionError("Unexpected entity count!");
  242. }
  243. System.out.println("\n--------Delete Entities--------");
  244. long entityCount = client.countEntities(collectionName);
  245. System.out.println(entityCount + " entity remains after delete.");
  246. /*
  247. * Other operations:
  248. * There are some other operations in Milvus, such as `compact` and `listIDInSegment`.
  249. *
  250. * Compacting the collection will erase deleted data from disk and rebuild index in background.
  251. * Data were only soft-deleted until you call compact.
  252. *
  253. * `listIDInSegment` will simply list all ids in a segment given the `segmentId`.
  254. */
  255. client.compact(CompactParam.create(collectionName).setThreshold(0.2));
  256. /*
  257. * Basic delete:
  258. * You can drop the partitions and finally the whole collection after use.
  259. */
  260. client.dropPartition(collectionName, partitionTag);
  261. if (client.listCollections().contains(collectionName)) {
  262. client.dropCollection(collectionName);
  263. }
  264. // Close connection
  265. client.close();
  266. }
  267. }