2
0

CommonFunction.java 78 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864
  1. package com.zilliz.milvustestv2.common;
  2. import com.google.gson.*;
  3. import com.google.common.collect.Lists;
  4. import com.zilliz.milvustestv2.params.FieldParam;
  5. import com.zilliz.milvustestv2.utils.GenerateUtil;
  6. import com.zilliz.milvustestv2.utils.JsonObjectUtil;
  7. import com.zilliz.milvustestv2.utils.MathUtil;
  8. import com.zilliz.milvustestv2.utils.PropertyFilesUtil;
  9. import io.milvus.bulkwriter.LocalBulkWriter;
  10. import io.milvus.bulkwriter.LocalBulkWriterParam;
  11. import io.milvus.bulkwriter.RemoteBulkWriter;
  12. import io.milvus.bulkwriter.RemoteBulkWriterParam;
  13. import io.milvus.bulkwriter.common.clientenum.BulkFileType;
  14. import io.milvus.bulkwriter.common.clientenum.CloudStorage;
  15. import io.milvus.bulkwriter.connect.S3ConnectParam;
  16. import io.milvus.bulkwriter.connect.StorageConnectParam;
  17. import io.milvus.common.utils.Float16Utils;
  18. import io.milvus.v2.common.ConsistencyLevel;
  19. import io.milvus.v2.common.DataType;
  20. import io.milvus.v2.common.IndexParam;
  21. import io.milvus.v2.service.collection.request.CreateCollectionReq;
  22. import io.milvus.v2.service.collection.request.DescribeCollectionReq;
  23. import io.milvus.v2.service.collection.request.LoadCollectionReq;
  24. import io.milvus.v2.service.collection.response.DescribeCollectionResp;
  25. import io.milvus.v2.service.index.request.CreateIndexReq;
  26. import io.milvus.v2.service.index.request.DropIndexReq;
  27. import io.milvus.v2.service.partition.request.CreatePartitionReq;
  28. import io.milvus.v2.service.vector.request.AnnSearchReq;
  29. import io.milvus.v2.service.vector.request.InsertReq;
  30. import io.milvus.v2.service.vector.request.SearchReq;
  31. import io.milvus.v2.service.vector.request.data.*;
  32. import io.milvus.v2.service.vector.response.InsertResp;
  33. import io.milvus.v2.service.vector.response.SearchResp;
  34. import io.minio.BucketExistsArgs;
  35. import io.minio.MakeBucketArgs;
  36. import io.minio.MinioClient;
  37. import io.minio.UploadObjectArgs;
  38. import io.minio.errors.*;
  39. import lombok.NonNull;
  40. import lombok.extern.slf4j.Slf4j;
  41. import javax.annotation.Nullable;
  42. import java.io.IOException;
  43. import java.nio.ByteBuffer;
  44. import java.security.InvalidKeyException;
  45. import java.security.NoSuchAlgorithmException;
  46. import java.util.*;
  47. import java.util.stream.Collectors;
  48. import static com.zilliz.milvustestv2.common.BaseTest.milvusClientV2;
  49. /**
  50. * @Author yongpeng.li
  51. * @Date 2024/2/1 15:55
  52. */
  53. @Slf4j
  54. public class CommonFunction {
  55. /**
  56. * 提供Collection Schema
  57. *
  58. * @param dim 维度
  59. * @param vectorType 向量类型
  60. * @return CollectionSchema
  61. */
  62. public static CreateCollectionReq.CollectionSchema providerCollectionSchema(int dim, DataType vectorType) {
  63. CreateCollectionReq.FieldSchema fieldInt64 = CreateCollectionReq.FieldSchema.builder()
  64. .autoID(false)
  65. .dataType(io.milvus.v2.common.DataType.Int64)
  66. .isPrimaryKey(true)
  67. .name(CommonData.fieldInt64)
  68. .build();
  69. CreateCollectionReq.FieldSchema fieldInt32 = CreateCollectionReq.FieldSchema.builder()
  70. .dataType(DataType.Int32)
  71. .name(CommonData.fieldInt32)
  72. .isPrimaryKey(false)
  73. .build();
  74. CreateCollectionReq.FieldSchema fieldInt16 = CreateCollectionReq.FieldSchema.builder()
  75. .dataType(DataType.Int16)
  76. .name(CommonData.fieldInt16)
  77. .isPrimaryKey(false)
  78. .build();
  79. CreateCollectionReq.FieldSchema fieldInt8 = CreateCollectionReq.FieldSchema.builder()
  80. .dataType(DataType.Int8)
  81. .name(CommonData.fieldInt8)
  82. .isPrimaryKey(false)
  83. .build();
  84. CreateCollectionReq.FieldSchema fieldDouble = CreateCollectionReq.FieldSchema.builder()
  85. .dataType(DataType.Double)
  86. .name(CommonData.fieldDouble)
  87. .isPrimaryKey(false)
  88. .build();
  89. CreateCollectionReq.FieldSchema fieldArray = CreateCollectionReq.FieldSchema.builder()
  90. .dataType(DataType.Array)
  91. .name(CommonData.fieldArray)
  92. .elementType(DataType.Int64)
  93. .maxCapacity(100)
  94. .isPrimaryKey(false)
  95. .build();
  96. CreateCollectionReq.FieldSchema fieldBool = CreateCollectionReq.FieldSchema.builder()
  97. .dataType(DataType.Bool)
  98. .name(CommonData.fieldBool)
  99. .isPrimaryKey(false)
  100. .build();
  101. CreateCollectionReq.FieldSchema fieldVarchar = CreateCollectionReq.FieldSchema.builder()
  102. .dataType(DataType.VarChar)
  103. .name(CommonData.fieldVarchar)
  104. .isPrimaryKey(false)
  105. .maxLength(100)
  106. .build();
  107. CreateCollectionReq.FieldSchema fieldFloat = CreateCollectionReq.FieldSchema.builder()
  108. .dataType(DataType.Float)
  109. .name(CommonData.fieldFloat)
  110. .isPrimaryKey(false)
  111. .build();
  112. CreateCollectionReq.FieldSchema fieldJson = CreateCollectionReq.FieldSchema.builder()
  113. .dataType(DataType.JSON)
  114. .name(CommonData.fieldJson)
  115. .isPrimaryKey(false)
  116. .build();
  117. CreateCollectionReq.FieldSchema fieldVector = CreateCollectionReq.FieldSchema.builder()
  118. .dataType(vectorType)
  119. .isPrimaryKey(false)
  120. .build();
  121. if (vectorType == DataType.FloatVector) {
  122. fieldVector.setDimension(dim);
  123. fieldVector.setName(CommonData.fieldFloatVector);
  124. }
  125. if (vectorType == DataType.BinaryVector) {
  126. fieldVector.setDimension(dim);
  127. fieldVector.setName(CommonData.fieldBinaryVector);
  128. }
  129. if (vectorType == DataType.Float16Vector) {
  130. fieldVector.setDimension(dim);
  131. fieldVector.setName(CommonData.fieldFloat16Vector);
  132. }
  133. if (vectorType == DataType.BFloat16Vector) {
  134. fieldVector.setDimension(dim);
  135. fieldVector.setName(CommonData.fieldBF16Vector);
  136. }
  137. if (vectorType == DataType.SparseFloatVector) {
  138. fieldVector.setName(CommonData.fieldSparseVector);
  139. }
  140. List<CreateCollectionReq.FieldSchema> fieldSchemaList = new ArrayList<>();
  141. fieldSchemaList.add(fieldInt64);
  142. fieldSchemaList.add(fieldInt32);
  143. fieldSchemaList.add(fieldInt16);
  144. fieldSchemaList.add(fieldInt8);
  145. fieldSchemaList.add(fieldFloat);
  146. fieldSchemaList.add(fieldDouble);
  147. fieldSchemaList.add(fieldArray);
  148. fieldSchemaList.add(fieldBool);
  149. fieldSchemaList.add(fieldJson);
  150. fieldSchemaList.add(fieldVarchar);
  151. fieldSchemaList.add(fieldVector);
  152. return CreateCollectionReq.CollectionSchema.builder()
  153. .fieldSchemaList(fieldSchemaList)
  154. .build();
  155. }
  156. /**
  157. * 创建DataType vectorType类型向量的collection
  158. *
  159. * @param dim 维度
  160. * @param collectionName collection name
  161. * @param vectorType 向量类型-sparse vector 不需要dim
  162. * @return collection name
  163. */
  164. public static String createNewCollection(int dim, String collectionName, DataType vectorType) {
  165. if (collectionName == null || collectionName.equals("")) {
  166. collectionName = "Collection_" + GenerateUtil.getRandomString(10);
  167. }
  168. CreateCollectionReq.FieldSchema fieldInt64 = CreateCollectionReq.FieldSchema.builder()
  169. .autoID(false)
  170. .dataType(io.milvus.v2.common.DataType.Int64)
  171. .isPrimaryKey(true)
  172. .name(CommonData.fieldInt64)
  173. .build();
  174. CreateCollectionReq.FieldSchema fieldInt32 = CreateCollectionReq.FieldSchema.builder()
  175. .dataType(DataType.Int32)
  176. .name(CommonData.fieldInt32)
  177. .isPrimaryKey(false)
  178. .build();
  179. CreateCollectionReq.FieldSchema fieldInt16 = CreateCollectionReq.FieldSchema.builder()
  180. .dataType(DataType.Int16)
  181. .name(CommonData.fieldInt16)
  182. .isPrimaryKey(false)
  183. .build();
  184. CreateCollectionReq.FieldSchema fieldInt8 = CreateCollectionReq.FieldSchema.builder()
  185. .dataType(DataType.Int8)
  186. .name(CommonData.fieldInt8)
  187. .isPrimaryKey(false)
  188. .build();
  189. CreateCollectionReq.FieldSchema fieldDouble = CreateCollectionReq.FieldSchema.builder()
  190. .dataType(DataType.Double)
  191. .name(CommonData.fieldDouble)
  192. .isPrimaryKey(false)
  193. .build();
  194. CreateCollectionReq.FieldSchema fieldArray = CreateCollectionReq.FieldSchema.builder()
  195. .dataType(DataType.Array)
  196. .name(CommonData.fieldArray)
  197. .elementType(DataType.Int64)
  198. .maxCapacity(100)
  199. .isPrimaryKey(false)
  200. .build();
  201. CreateCollectionReq.FieldSchema fieldBool = CreateCollectionReq.FieldSchema.builder()
  202. .dataType(DataType.Bool)
  203. .name(CommonData.fieldBool)
  204. .isPrimaryKey(false)
  205. .build();
  206. CreateCollectionReq.FieldSchema fieldVarchar = CreateCollectionReq.FieldSchema.builder()
  207. .dataType(DataType.VarChar)
  208. .name(CommonData.fieldVarchar)
  209. .isPrimaryKey(false)
  210. .maxLength(100)
  211. .build();
  212. CreateCollectionReq.FieldSchema fieldFloat = CreateCollectionReq.FieldSchema.builder()
  213. .dataType(DataType.Float)
  214. .name(CommonData.fieldFloat)
  215. .isPrimaryKey(false)
  216. .build();
  217. CreateCollectionReq.FieldSchema fieldJson = CreateCollectionReq.FieldSchema.builder()
  218. .dataType(DataType.JSON)
  219. .name(CommonData.fieldJson)
  220. .isPrimaryKey(false)
  221. .build();
  222. CreateCollectionReq.FieldSchema fieldVector = CreateCollectionReq.FieldSchema.builder()
  223. .dataType(vectorType)
  224. .isPrimaryKey(false)
  225. .build();
  226. if (vectorType == DataType.FloatVector) {
  227. fieldVector.setDimension(dim);
  228. fieldVector.setName(CommonData.fieldFloatVector);
  229. }
  230. if (vectorType == DataType.BinaryVector) {
  231. fieldVector.setDimension(dim);
  232. fieldVector.setName(CommonData.fieldBinaryVector);
  233. }
  234. if (vectorType == DataType.Float16Vector) {
  235. fieldVector.setDimension(dim);
  236. fieldVector.setName(CommonData.fieldFloat16Vector);
  237. }
  238. if (vectorType == DataType.BFloat16Vector) {
  239. fieldVector.setDimension(dim);
  240. fieldVector.setName(CommonData.fieldBF16Vector);
  241. }
  242. if (vectorType == DataType.SparseFloatVector) {
  243. fieldVector.setName(CommonData.fieldSparseVector);
  244. }
  245. List<CreateCollectionReq.FieldSchema> fieldSchemaList = new ArrayList<>();
  246. fieldSchemaList.add(fieldInt64);
  247. fieldSchemaList.add(fieldInt32);
  248. fieldSchemaList.add(fieldInt16);
  249. fieldSchemaList.add(fieldInt8);
  250. fieldSchemaList.add(fieldFloat);
  251. fieldSchemaList.add(fieldDouble);
  252. fieldSchemaList.add(fieldArray);
  253. fieldSchemaList.add(fieldBool);
  254. fieldSchemaList.add(fieldJson);
  255. fieldSchemaList.add(fieldVarchar);
  256. fieldSchemaList.add(fieldVector);
  257. CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder()
  258. .fieldSchemaList(fieldSchemaList)
  259. .build();
  260. CreateCollectionReq createCollectionReq = CreateCollectionReq.builder()
  261. .collectionSchema(collectionSchema)
  262. .collectionName(collectionName)
  263. .enableDynamicField(false)
  264. .description("collection desc")
  265. .numShards(1)
  266. .build();
  267. milvusClientV2.createCollection(createCollectionReq);
  268. log.info("create collection:" + collectionName);
  269. return collectionName;
  270. }
  271. public static String createNewCollectionWithDynamic(int dim, String collectionName, DataType vectorType) {
  272. if (collectionName == null || collectionName.equals("")) {
  273. collectionName = "Collection_" + GenerateUtil.getRandomString(10);
  274. }
  275. CreateCollectionReq.FieldSchema fieldInt64 = CreateCollectionReq.FieldSchema.builder()
  276. .autoID(false)
  277. .dataType(io.milvus.v2.common.DataType.Int64)
  278. .isPrimaryKey(true)
  279. .name(CommonData.fieldInt64)
  280. .build();
  281. CreateCollectionReq.FieldSchema fieldInt32 = CreateCollectionReq.FieldSchema.builder()
  282. .dataType(DataType.Int32)
  283. .name(CommonData.fieldInt32)
  284. .isPrimaryKey(false)
  285. .build();
  286. CreateCollectionReq.FieldSchema fieldInt16 = CreateCollectionReq.FieldSchema.builder()
  287. .dataType(DataType.Int16)
  288. .name(CommonData.fieldInt16)
  289. .isPrimaryKey(false)
  290. .build();
  291. CreateCollectionReq.FieldSchema fieldInt8 = CreateCollectionReq.FieldSchema.builder()
  292. .dataType(DataType.Int8)
  293. .name(CommonData.fieldInt8)
  294. .isPrimaryKey(false)
  295. .build();
  296. CreateCollectionReq.FieldSchema fieldDouble = CreateCollectionReq.FieldSchema.builder()
  297. .dataType(DataType.Double)
  298. .name(CommonData.fieldDouble)
  299. .isPrimaryKey(false)
  300. .build();
  301. CreateCollectionReq.FieldSchema fieldArray = CreateCollectionReq.FieldSchema.builder()
  302. .dataType(DataType.Array)
  303. .name(CommonData.fieldArray)
  304. .elementType(DataType.Int64)
  305. .maxCapacity(100)
  306. .isPrimaryKey(false)
  307. .build();
  308. CreateCollectionReq.FieldSchema fieldBool = CreateCollectionReq.FieldSchema.builder()
  309. .dataType(DataType.Bool)
  310. .name(CommonData.fieldBool)
  311. .isPrimaryKey(false)
  312. .build();
  313. CreateCollectionReq.FieldSchema fieldVarchar = CreateCollectionReq.FieldSchema.builder()
  314. .dataType(DataType.VarChar)
  315. .name(CommonData.fieldVarchar)
  316. .isPrimaryKey(false)
  317. .maxLength(100)
  318. .build();
  319. CreateCollectionReq.FieldSchema fieldFloat = CreateCollectionReq.FieldSchema.builder()
  320. .dataType(DataType.Float)
  321. .name(CommonData.fieldFloat)
  322. .isPrimaryKey(false)
  323. .build();
  324. CreateCollectionReq.FieldSchema fieldJson = CreateCollectionReq.FieldSchema.builder()
  325. .dataType(DataType.JSON)
  326. .name(CommonData.fieldJson)
  327. .isPrimaryKey(false)
  328. .build();
  329. CreateCollectionReq.FieldSchema fieldVector = CreateCollectionReq.FieldSchema.builder()
  330. .dataType(vectorType)
  331. .isPrimaryKey(false)
  332. .build();
  333. if (vectorType == DataType.FloatVector) {
  334. fieldVector.setDimension(dim);
  335. fieldVector.setName(CommonData.fieldFloatVector);
  336. }
  337. if (vectorType == DataType.BinaryVector) {
  338. fieldVector.setDimension(dim);
  339. fieldVector.setName(CommonData.fieldBinaryVector);
  340. }
  341. if (vectorType == DataType.Float16Vector) {
  342. fieldVector.setDimension(dim);
  343. fieldVector.setName(CommonData.fieldFloat16Vector);
  344. }
  345. if (vectorType == DataType.BFloat16Vector) {
  346. fieldVector.setDimension(dim);
  347. fieldVector.setName(CommonData.fieldBF16Vector);
  348. }
  349. if (vectorType == DataType.SparseFloatVector) {
  350. fieldVector.setName(CommonData.fieldSparseVector);
  351. }
  352. List<CreateCollectionReq.FieldSchema> fieldSchemaList = new ArrayList<>();
  353. fieldSchemaList.add(fieldInt64);
  354. fieldSchemaList.add(fieldInt32);
  355. fieldSchemaList.add(fieldInt16);
  356. fieldSchemaList.add(fieldInt8);
  357. fieldSchemaList.add(fieldFloat);
  358. fieldSchemaList.add(fieldDouble);
  359. fieldSchemaList.add(fieldArray);
  360. fieldSchemaList.add(fieldBool);
  361. fieldSchemaList.add(fieldJson);
  362. fieldSchemaList.add(fieldVarchar);
  363. fieldSchemaList.add(fieldVector);
  364. CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder()
  365. .fieldSchemaList(fieldSchemaList)
  366. .enableDynamicField(true)
  367. .build();
  368. CreateCollectionReq createCollectionReq = CreateCollectionReq.builder()
  369. .collectionSchema(collectionSchema)
  370. .collectionName(collectionName)
  371. .enableDynamicField(true)
  372. .description("collection desc")
  373. .numShards(1)
  374. .build();
  375. milvusClientV2.createCollection(createCollectionReq);
  376. log.info("create collection with dynamic field:" + collectionName);
  377. return collectionName;
  378. }
  379. /**
  380. * 创建包含nullable列的collection
  381. *
  382. * @param dim 维度
  383. * @param collectionName collection name
  384. * @param vectorType 向量类型-sparse vector 不需要dim
  385. * @return collection name
  386. */
  387. public static String createNewNullableCollection(int dim, String collectionName, DataType vectorType) {
  388. if (collectionName == null || collectionName.equals("")) {
  389. collectionName = "Collection_" + GenerateUtil.getRandomString(10);
  390. }
  391. CreateCollectionReq.FieldSchema fieldInt64 = CreateCollectionReq.FieldSchema.builder()
  392. .autoID(false)
  393. .dataType(io.milvus.v2.common.DataType.Int64)
  394. .isPrimaryKey(true)
  395. .name(CommonData.fieldInt64)
  396. .build();
  397. CreateCollectionReq.FieldSchema fieldInt32 = CreateCollectionReq.FieldSchema.builder()
  398. .dataType(DataType.Int32)
  399. .name(CommonData.fieldInt32)
  400. .isPrimaryKey(false)
  401. .isNullable(true)
  402. .build();
  403. CreateCollectionReq.FieldSchema fieldInt16 = CreateCollectionReq.FieldSchema.builder()
  404. .dataType(DataType.Int16)
  405. .name(CommonData.fieldInt16)
  406. .isPrimaryKey(false)
  407. .isNullable(true)
  408. .build();
  409. CreateCollectionReq.FieldSchema fieldInt8 = CreateCollectionReq.FieldSchema.builder()
  410. .dataType(DataType.Int8)
  411. .name(CommonData.fieldInt8)
  412. .isPrimaryKey(false)
  413. .isNullable(true)
  414. .build();
  415. CreateCollectionReq.FieldSchema fieldDouble = CreateCollectionReq.FieldSchema.builder()
  416. .dataType(DataType.Double)
  417. .name(CommonData.fieldDouble)
  418. .isPrimaryKey(false)
  419. .isNullable(true)
  420. .build();
  421. CreateCollectionReq.FieldSchema fieldArray = CreateCollectionReq.FieldSchema.builder()
  422. .dataType(DataType.Array)
  423. .name(CommonData.fieldArray)
  424. .elementType(DataType.Int64)
  425. .maxCapacity(1000)
  426. .isPrimaryKey(false)
  427. .isNullable(true)
  428. .build();
  429. CreateCollectionReq.FieldSchema fieldBool = CreateCollectionReq.FieldSchema.builder()
  430. .dataType(DataType.Bool)
  431. .name(CommonData.fieldBool)
  432. .isPrimaryKey(false)
  433. .isNullable(true)
  434. .build();
  435. CreateCollectionReq.FieldSchema fieldVarchar = CreateCollectionReq.FieldSchema.builder()
  436. .dataType(DataType.VarChar)
  437. .name(CommonData.fieldVarchar)
  438. .isPrimaryKey(false)
  439. .maxLength(1000)
  440. .isNullable(true)
  441. .build();
  442. CreateCollectionReq.FieldSchema fieldFloat = CreateCollectionReq.FieldSchema.builder()
  443. .dataType(DataType.Float)
  444. .name(CommonData.fieldFloat)
  445. .isPrimaryKey(false)
  446. .isNullable(true)
  447. .build();
  448. CreateCollectionReq.FieldSchema fieldJson = CreateCollectionReq.FieldSchema.builder()
  449. .dataType(DataType.JSON)
  450. .name(CommonData.fieldJson)
  451. .isPrimaryKey(false)
  452. .isNullable(true)
  453. .build();
  454. CreateCollectionReq.FieldSchema fieldVector = CreateCollectionReq.FieldSchema.builder()
  455. .dataType(vectorType)
  456. .isPrimaryKey(false)
  457. .build();
  458. if (vectorType == DataType.FloatVector) {
  459. fieldVector.setDimension(dim);
  460. fieldVector.setName(CommonData.fieldFloatVector);
  461. }
  462. if (vectorType == DataType.BinaryVector) {
  463. fieldVector.setDimension(dim);
  464. fieldVector.setName(CommonData.fieldBinaryVector);
  465. }
  466. if (vectorType == DataType.Float16Vector) {
  467. fieldVector.setDimension(dim);
  468. fieldVector.setName(CommonData.fieldFloat16Vector);
  469. }
  470. if (vectorType == DataType.BFloat16Vector) {
  471. fieldVector.setDimension(dim);
  472. fieldVector.setName(CommonData.fieldBF16Vector);
  473. }
  474. if (vectorType == DataType.SparseFloatVector) {
  475. fieldVector.setName(CommonData.fieldSparseVector);
  476. }
  477. List<CreateCollectionReq.FieldSchema> fieldSchemaList = new ArrayList<>();
  478. fieldSchemaList.add(fieldInt64);
  479. fieldSchemaList.add(fieldInt32);
  480. fieldSchemaList.add(fieldInt16);
  481. fieldSchemaList.add(fieldInt8);
  482. fieldSchemaList.add(fieldFloat);
  483. fieldSchemaList.add(fieldDouble);
  484. fieldSchemaList.add(fieldArray);
  485. fieldSchemaList.add(fieldBool);
  486. fieldSchemaList.add(fieldJson);
  487. fieldSchemaList.add(fieldVarchar);
  488. fieldSchemaList.add(fieldVector);
  489. CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder()
  490. .fieldSchemaList(fieldSchemaList)
  491. .build();
  492. CreateCollectionReq createCollectionReq = CreateCollectionReq.builder()
  493. .collectionSchema(collectionSchema)
  494. .collectionName(collectionName)
  495. .enableDynamicField(false)
  496. .description("collection desc")
  497. .numShards(1)
  498. .build();
  499. milvusClientV2.createCollection(createCollectionReq);
  500. log.info("create collection:" + collectionName);
  501. return collectionName;
  502. }
  503. /**
  504. * 创建包含default value列的collection
  505. *
  506. * @param dim 维度
  507. * @param collectionName collection name
  508. * @param vectorType 向量类型-sparse vector 不需要dim
  509. * @return collection name
  510. */
  511. public static String createNewDefaultValueCollection(int dim, String collectionName, DataType vectorType) {
  512. if (collectionName == null || collectionName.equals("")) {
  513. collectionName = "Collection_" + GenerateUtil.getRandomString(10);
  514. }
  515. CreateCollectionReq.FieldSchema fieldInt64 = CreateCollectionReq.FieldSchema.builder()
  516. .autoID(false)
  517. .dataType(io.milvus.v2.common.DataType.Int64)
  518. .isPrimaryKey(true)
  519. .name(CommonData.fieldInt64)
  520. .build();
  521. CreateCollectionReq.FieldSchema fieldInt32 = CreateCollectionReq.FieldSchema.builder()
  522. .dataType(DataType.Int32)
  523. .name(CommonData.fieldInt32)
  524. .isPrimaryKey(false)
  525. .defaultValue(CommonData.defaultValueInt)
  526. .build();
  527. CreateCollectionReq.FieldSchema fieldInt16 = CreateCollectionReq.FieldSchema.builder()
  528. .dataType(DataType.Int16)
  529. .name(CommonData.fieldInt16)
  530. .isPrimaryKey(false)
  531. .defaultValue(CommonData.defaultValueShort)
  532. .build();
  533. CreateCollectionReq.FieldSchema fieldInt8 = CreateCollectionReq.FieldSchema.builder()
  534. .dataType(DataType.Int8)
  535. .name(CommonData.fieldInt8)
  536. .isPrimaryKey(false)
  537. .defaultValue(CommonData.defaultValueShort)
  538. .build();
  539. CreateCollectionReq.FieldSchema fieldDouble = CreateCollectionReq.FieldSchema.builder()
  540. .dataType(DataType.Double)
  541. .name(CommonData.fieldDouble)
  542. .isPrimaryKey(false)
  543. .defaultValue(CommonData.defaultValueDouble)
  544. .build();
  545. CreateCollectionReq.FieldSchema fieldArray = CreateCollectionReq.FieldSchema.builder()
  546. .dataType(DataType.Array)
  547. .name(CommonData.fieldArray)
  548. .elementType(DataType.Int64)
  549. .maxCapacity(1000)
  550. .isPrimaryKey(false)
  551. .build();
  552. CreateCollectionReq.FieldSchema fieldBool = CreateCollectionReq.FieldSchema.builder()
  553. .dataType(DataType.Bool)
  554. .name(CommonData.fieldBool)
  555. .isPrimaryKey(false)
  556. .defaultValue(CommonData.defaultValueBool)
  557. .build();
  558. CreateCollectionReq.FieldSchema fieldVarchar = CreateCollectionReq.FieldSchema.builder()
  559. .dataType(DataType.VarChar)
  560. .name(CommonData.fieldVarchar)
  561. .isPrimaryKey(false)
  562. .maxLength(1000)
  563. .defaultValue(CommonData.defaultValueString)
  564. .build();
  565. CreateCollectionReq.FieldSchema fieldFloat = CreateCollectionReq.FieldSchema.builder()
  566. .dataType(DataType.Float)
  567. .name(CommonData.fieldFloat)
  568. .isPrimaryKey(false)
  569. .defaultValue(CommonData.defaultValueFloat)
  570. .build();
  571. CreateCollectionReq.FieldSchema fieldJson = CreateCollectionReq.FieldSchema.builder()
  572. .dataType(DataType.JSON)
  573. .name(CommonData.fieldJson)
  574. .isPrimaryKey(false)
  575. .build();
  576. CreateCollectionReq.FieldSchema fieldVector = CreateCollectionReq.FieldSchema.builder()
  577. .dataType(vectorType)
  578. .isPrimaryKey(false)
  579. .build();
  580. if (vectorType == DataType.FloatVector) {
  581. fieldVector.setDimension(dim);
  582. fieldVector.setName(CommonData.fieldFloatVector);
  583. }
  584. if (vectorType == DataType.BinaryVector) {
  585. fieldVector.setDimension(dim);
  586. fieldVector.setName(CommonData.fieldBinaryVector);
  587. }
  588. if (vectorType == DataType.Float16Vector) {
  589. fieldVector.setDimension(dim);
  590. fieldVector.setName(CommonData.fieldFloat16Vector);
  591. }
  592. if (vectorType == DataType.BFloat16Vector) {
  593. fieldVector.setDimension(dim);
  594. fieldVector.setName(CommonData.fieldBF16Vector);
  595. }
  596. if (vectorType == DataType.SparseFloatVector) {
  597. fieldVector.setName(CommonData.fieldSparseVector);
  598. }
  599. List<CreateCollectionReq.FieldSchema> fieldSchemaList = new ArrayList<>();
  600. fieldSchemaList.add(fieldInt64);
  601. fieldSchemaList.add(fieldInt32);
  602. fieldSchemaList.add(fieldInt16);
  603. fieldSchemaList.add(fieldInt8);
  604. fieldSchemaList.add(fieldFloat);
  605. fieldSchemaList.add(fieldDouble);
  606. fieldSchemaList.add(fieldArray);
  607. fieldSchemaList.add(fieldBool);
  608. fieldSchemaList.add(fieldJson);
  609. fieldSchemaList.add(fieldVarchar);
  610. fieldSchemaList.add(fieldVector);
  611. CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder()
  612. .fieldSchemaList(fieldSchemaList)
  613. .build();
  614. CreateCollectionReq createCollectionReq = CreateCollectionReq.builder()
  615. .collectionSchema(collectionSchema)
  616. .collectionName(collectionName)
  617. .enableDynamicField(false)
  618. .description("collection desc")
  619. .numShards(1)
  620. .build();
  621. milvusClientV2.createCollection(createCollectionReq);
  622. log.info("create collection:" + collectionName);
  623. return collectionName;
  624. }
  625. /**
  626. * 创建包含同时enable nullable和default value列的collection
  627. *
  628. * @param dim 维度
  629. * @param collectionName collection name
  630. * @param vectorType 向量类型-sparse vector 不需要dim
  631. * @return collection name
  632. */
  633. public static String createNewNullableDefaultValueCollection(int dim, String collectionName, DataType vectorType) {
  634. if (collectionName == null || collectionName.equals("")) {
  635. collectionName = "Collection_" + GenerateUtil.getRandomString(10);
  636. }
  637. CreateCollectionReq.FieldSchema fieldInt64 = CreateCollectionReq.FieldSchema.builder()
  638. .autoID(false)
  639. .dataType(io.milvus.v2.common.DataType.Int64)
  640. .isPrimaryKey(true)
  641. .name(CommonData.fieldInt64)
  642. .build();
  643. CreateCollectionReq.FieldSchema fieldInt32 = CreateCollectionReq.FieldSchema.builder()
  644. .dataType(DataType.Int32)
  645. .name(CommonData.fieldInt32)
  646. .isPrimaryKey(false)
  647. .isNullable(true)
  648. .defaultValue(CommonData.defaultValueInt)
  649. .build();
  650. CreateCollectionReq.FieldSchema fieldInt16 = CreateCollectionReq.FieldSchema.builder()
  651. .dataType(DataType.Int16)
  652. .name(CommonData.fieldInt16)
  653. .isPrimaryKey(false)
  654. .isNullable(true)
  655. .defaultValue(CommonData.defaultValueShort)
  656. .build();
  657. CreateCollectionReq.FieldSchema fieldInt8 = CreateCollectionReq.FieldSchema.builder()
  658. .dataType(DataType.Int8)
  659. .name(CommonData.fieldInt8)
  660. .isPrimaryKey(false)
  661. .isNullable(true)
  662. .defaultValue(CommonData.defaultValueShort)
  663. .build();
  664. CreateCollectionReq.FieldSchema fieldDouble = CreateCollectionReq.FieldSchema.builder()
  665. .dataType(DataType.Double)
  666. .name(CommonData.fieldDouble)
  667. .isPrimaryKey(false)
  668. .isNullable(true)
  669. .defaultValue(CommonData.defaultValueDouble)
  670. .build();
  671. CreateCollectionReq.FieldSchema fieldArray = CreateCollectionReq.FieldSchema.builder()
  672. .dataType(DataType.Array)
  673. .name(CommonData.fieldArray)
  674. .elementType(DataType.Int64)
  675. .maxCapacity(1000)
  676. .isNullable(true)
  677. .isPrimaryKey(false)
  678. .build();
  679. CreateCollectionReq.FieldSchema fieldBool = CreateCollectionReq.FieldSchema.builder()
  680. .dataType(DataType.Bool)
  681. .name(CommonData.fieldBool)
  682. .isPrimaryKey(false)
  683. .isNullable(true)
  684. .defaultValue(CommonData.defaultValueBool)
  685. .build();
  686. CreateCollectionReq.FieldSchema fieldVarchar = CreateCollectionReq.FieldSchema.builder()
  687. .dataType(DataType.VarChar)
  688. .name(CommonData.fieldVarchar)
  689. .isPrimaryKey(false)
  690. .maxLength(1000)
  691. .isNullable(true)
  692. .defaultValue(CommonData.defaultValueString)
  693. .build();
  694. CreateCollectionReq.FieldSchema fieldFloat = CreateCollectionReq.FieldSchema.builder()
  695. .dataType(DataType.Float)
  696. .name(CommonData.fieldFloat)
  697. .isPrimaryKey(false)
  698. .isNullable(true)
  699. .defaultValue(CommonData.defaultValueFloat)
  700. .build();
  701. CreateCollectionReq.FieldSchema fieldJson = CreateCollectionReq.FieldSchema.builder()
  702. .dataType(DataType.JSON)
  703. .name(CommonData.fieldJson)
  704. .isNullable(true)
  705. .isPrimaryKey(false)
  706. .build();
  707. CreateCollectionReq.FieldSchema fieldVector = CreateCollectionReq.FieldSchema.builder()
  708. .dataType(vectorType)
  709. .isPrimaryKey(false)
  710. .build();
  711. if (vectorType == DataType.FloatVector) {
  712. fieldVector.setDimension(dim);
  713. fieldVector.setName(CommonData.fieldFloatVector);
  714. }
  715. if (vectorType == DataType.BinaryVector) {
  716. fieldVector.setDimension(dim);
  717. fieldVector.setName(CommonData.fieldBinaryVector);
  718. }
  719. if (vectorType == DataType.Float16Vector) {
  720. fieldVector.setDimension(dim);
  721. fieldVector.setName(CommonData.fieldFloat16Vector);
  722. }
  723. if (vectorType == DataType.BFloat16Vector) {
  724. fieldVector.setDimension(dim);
  725. fieldVector.setName(CommonData.fieldBF16Vector);
  726. }
  727. if (vectorType == DataType.SparseFloatVector) {
  728. fieldVector.setName(CommonData.fieldSparseVector);
  729. }
  730. List<CreateCollectionReq.FieldSchema> fieldSchemaList = new ArrayList<>();
  731. fieldSchemaList.add(fieldInt64);
  732. fieldSchemaList.add(fieldInt32);
  733. fieldSchemaList.add(fieldInt16);
  734. fieldSchemaList.add(fieldInt8);
  735. fieldSchemaList.add(fieldFloat);
  736. fieldSchemaList.add(fieldDouble);
  737. fieldSchemaList.add(fieldArray);
  738. fieldSchemaList.add(fieldBool);
  739. fieldSchemaList.add(fieldJson);
  740. fieldSchemaList.add(fieldVarchar);
  741. fieldSchemaList.add(fieldVector);
  742. CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder()
  743. .fieldSchemaList(fieldSchemaList)
  744. .build();
  745. CreateCollectionReq createCollectionReq = CreateCollectionReq.builder()
  746. .collectionSchema(collectionSchema)
  747. .collectionName(collectionName)
  748. .enableDynamicField(false)
  749. .description("collection desc")
  750. .numShards(1)
  751. .build();
  752. milvusClientV2.createCollection(createCollectionReq);
  753. log.info("create collection:" + collectionName);
  754. return collectionName;
  755. }
  756. /**
  757. * 为不同类型向量的collection提供导入的数据,目前只支持行式插入
  758. *
  759. * @param num 数据量
  760. * @param dim 维度
  761. * @return List<JsonObject>
  762. */
  763. public static List<JsonObject> generateDefaultData(long startId, long num, int dim, DataType vectorType) {
  764. List<JsonObject> jsonList = new ArrayList<>();
  765. Random ran = new Random();
  766. Gson gson = new Gson();
  767. for (long i = startId; i < (num + startId); i++) {
  768. JsonObject row = new JsonObject();
  769. row.addProperty(CommonData.fieldInt64, i);
  770. row.addProperty(CommonData.fieldInt32, (int) i % 32767);
  771. row.addProperty(CommonData.fieldInt16, (int) i % 32767);
  772. row.addProperty(CommonData.fieldInt8, (short) i % 127);
  773. row.addProperty(CommonData.fieldDouble, (double) i);
  774. row.add(CommonData.fieldArray, gson.toJsonTree(Arrays.asList(i, i + 1, i + 2)));
  775. row.addProperty(CommonData.fieldBool, i % 2 == 0);
  776. row.addProperty(CommonData.fieldVarchar, "Str" + i);
  777. row.addProperty(CommonData.fieldFloat, (float) i);
  778. // 判断vectorType
  779. if (vectorType == DataType.FloatVector) {
  780. List<Float> vector = new ArrayList<>();
  781. for (int k = 0; k < dim; ++k) {
  782. vector.add(ran.nextFloat());
  783. }
  784. row.add(CommonData.fieldFloatVector, gson.toJsonTree(vector));
  785. }
  786. if (vectorType == DataType.BinaryVector) {
  787. row.add(CommonData.fieldBinaryVector, gson.toJsonTree(generateBinaryVector(dim).array()));
  788. }
  789. if (vectorType == DataType.Float16Vector) {
  790. row.add(CommonData.fieldFloat16Vector, gson.toJsonTree(generateFloat16Vector(dim).array()));
  791. }
  792. if (vectorType == DataType.BFloat16Vector) {
  793. row.add(CommonData.fieldBF16Vector, gson.toJsonTree(generateBF16Vector(dim).array()));
  794. }
  795. if (vectorType == DataType.SparseFloatVector) {
  796. row.add(CommonData.fieldSparseVector, gson.toJsonTree(generateSparseVector(dim)));
  797. }
  798. JsonObject json = new JsonObject();
  799. json.addProperty(CommonData.fieldInt64, (int) i % 32767);
  800. json.addProperty(CommonData.fieldInt32, (int) i % 32767);
  801. json.addProperty(CommonData.fieldDouble, (double) i);
  802. json.add(CommonData.fieldArray, gson.toJsonTree(Arrays.asList(i, i + 1, i + 2)));
  803. json.addProperty(CommonData.fieldBool, i % 2 == 0);
  804. json.addProperty(CommonData.fieldVarchar, "Str" + i);
  805. json.addProperty(CommonData.fieldFloat, (float) i);
  806. row.add(CommonData.fieldJson, json);
  807. jsonList.add(row);
  808. }
  809. return jsonList;
  810. }
  811. public static List<JsonObject> generateDefaultDataWithDynamic(long startId, long num, int dim, DataType vectorType) {
  812. List<JsonObject> jsonList = new ArrayList<>();
  813. Random ran = new Random();
  814. Gson gson = new Gson();
  815. for (long i = startId; i < (num + startId); i++) {
  816. JsonObject row = new JsonObject();
  817. row.addProperty(CommonData.fieldInt64, i);
  818. row.addProperty(CommonData.fieldInt32, (int) i % 32767);
  819. row.addProperty(CommonData.fieldInt16, (int) i % 32767);
  820. row.addProperty(CommonData.fieldInt8, (short) i % 127);
  821. row.addProperty(CommonData.fieldDouble, (double) i);
  822. row.add(CommonData.fieldArray, gson.toJsonTree(Arrays.asList(i, i + 1, i + 2)));
  823. row.addProperty(CommonData.fieldBool, i % 2 == 0);
  824. row.addProperty(CommonData.fieldVarchar, "Str" + i);
  825. row.addProperty(CommonData.fieldFloat, (float) i);
  826. // 判断vectorType
  827. if (vectorType == DataType.FloatVector) {
  828. List<Float> vector = new ArrayList<>();
  829. for (int k = 0; k < dim; ++k) {
  830. vector.add(ran.nextFloat());
  831. }
  832. row.add(CommonData.fieldFloatVector, gson.toJsonTree(vector));
  833. }
  834. if (vectorType == DataType.BinaryVector) {
  835. row.add(CommonData.fieldBinaryVector, gson.toJsonTree(generateBinaryVector(dim).array()));
  836. }
  837. if (vectorType == DataType.Float16Vector) {
  838. row.add(CommonData.fieldFloat16Vector, gson.toJsonTree(generateFloat16Vector(dim).array()));
  839. }
  840. if (vectorType == DataType.BFloat16Vector) {
  841. row.add(CommonData.fieldBF16Vector, gson.toJsonTree(generateBF16Vector(dim).array()));
  842. }
  843. if (vectorType == DataType.SparseFloatVector) {
  844. row.add(CommonData.fieldSparseVector, gson.toJsonTree(generateSparseVector(dim)));
  845. }
  846. JsonObject json = new JsonObject();
  847. json.addProperty(CommonData.fieldInt64, (int) i % 32767);
  848. json.addProperty(CommonData.fieldInt32, (int) i % 32767);
  849. json.addProperty(CommonData.fieldDouble, (double) i);
  850. json.add(CommonData.fieldArray, gson.toJsonTree(Arrays.asList(i, i + 1, i + 2)));
  851. json.addProperty(CommonData.fieldBool, i % 2 == 0);
  852. json.addProperty(CommonData.fieldVarchar, "Str" + i);
  853. json.addProperty(CommonData.fieldFloat, (float) i);
  854. row.add(CommonData.fieldJson, json);
  855. // dynamic field
  856. JsonObject jsonDynamic = new JsonObject();
  857. json.addProperty(CommonData.fieldInt64, (int) i % 32767);
  858. json.addProperty(CommonData.fieldInt32, (int) i % 32767);
  859. json.addProperty(CommonData.fieldDouble, (double) i);
  860. json.add(CommonData.fieldArray, gson.toJsonTree(Arrays.asList(i, i + 1, i + 2)));
  861. json.addProperty(CommonData.fieldBool, i % 2 == 0);
  862. json.addProperty(CommonData.fieldVarchar, "Str" + i);
  863. json.addProperty(CommonData.fieldFloat, (float) i);
  864. row.add(CommonData.fieldDynamic, json);
  865. jsonList.add(row);
  866. }
  867. return jsonList;
  868. }
  869. /**
  870. * 为快速生成的collection提供导入数据
  871. *
  872. * @param num 数据量
  873. * @param dim 维度
  874. * @return List<JsonObject>
  875. */
  876. public static List<JsonObject> generateSimpleData(long num, int dim) {
  877. List<JsonObject> jsonList = new ArrayList<>();
  878. Random ran = new Random();
  879. Gson gson = new Gson();
  880. for (long i = 0; i < num; i++) {
  881. JsonObject row = new JsonObject();
  882. row.addProperty(CommonData.simplePk, i);
  883. List<Float> vector = new ArrayList<>();
  884. for (int k = 0; k < dim; ++k) {
  885. vector.add(ran.nextFloat());
  886. }
  887. row.add(CommonData.simpleVector, gson.toJsonTree(vector));
  888. jsonList.add(row);
  889. }
  890. return jsonList;
  891. }
  892. /**
  893. * 为collection提供导入含有NULL的数据,目前只支持行式插入
  894. *
  895. * @param num 数据量
  896. * @param dim 维度
  897. * @return List<JsonObject>
  898. */
  899. public static List<JsonObject> generateSimpleNullData(long startId, long num, int dim, DataType vectorType) {
  900. List<JsonObject> jsonList = new ArrayList<>();
  901. Random ran = new Random();
  902. Gson gson = new Gson();
  903. for (long i = startId; i < (num + startId); i++) {
  904. JsonObject row = new JsonObject();
  905. row.addProperty(CommonData.fieldInt64, i);
  906. if (i % 2 == 0) {
  907. row.addProperty(CommonData.fieldInt32, (int) i % 32767);
  908. row.addProperty(CommonData.fieldInt16, (int) i % 32767);
  909. row.addProperty(CommonData.fieldInt8, (short) i % 127);
  910. row.addProperty(CommonData.fieldBool, i % 3 == 0);
  911. row.addProperty(CommonData.fieldDouble, (double) i);
  912. row.addProperty(CommonData.fieldVarchar, "Str" + i);
  913. row.addProperty(CommonData.fieldFloat, (float) i);
  914. row.add(CommonData.fieldArray, gson.toJsonTree(Arrays.asList(i, i + 1, i + 2)));
  915. }
  916. // 判断vectorType
  917. if (vectorType == DataType.FloatVector) {
  918. List<Float> vector = new ArrayList<>();
  919. for (int k = 0; k < dim; ++k) {
  920. vector.add(ran.nextFloat());
  921. }
  922. row.add(CommonData.fieldFloatVector, gson.toJsonTree(vector));
  923. }
  924. if (vectorType == DataType.BinaryVector) {
  925. row.add(CommonData.fieldBinaryVector, gson.toJsonTree(generateBinaryVector(dim).array()));
  926. }
  927. if (vectorType == DataType.Float16Vector) {
  928. row.add(CommonData.fieldFloat16Vector, gson.toJsonTree(generateFloat16Vector(dim).array()));
  929. }
  930. if (vectorType == DataType.BFloat16Vector) {
  931. row.add(CommonData.fieldBF16Vector, gson.toJsonTree(generateBF16Vector(dim).array()));
  932. }
  933. if (vectorType == DataType.SparseFloatVector) {
  934. row.add(CommonData.fieldSparseVector, gson.toJsonTree(generateSparseVector(dim)));
  935. }
  936. JsonObject json = new JsonObject();
  937. if (i % 2 == 0) {
  938. json.addProperty(CommonData.fieldInt64, (int) i % 32767);
  939. json.addProperty(CommonData.fieldInt32, (int) i % 32767);
  940. json.addProperty(CommonData.fieldDouble, (double) i);
  941. json.add(CommonData.fieldArray, gson.toJsonTree(Arrays.asList(i, i + 1, i + 2)));
  942. json.addProperty(CommonData.fieldBool, i % 3 == 0);
  943. json.addProperty(CommonData.fieldVarchar, "Str" + i);
  944. json.addProperty(CommonData.fieldFloat, (float) i);
  945. }
  946. row.add(CommonData.fieldJson, json);
  947. jsonList.add(row);
  948. }
  949. return jsonList;
  950. }
  951. /**
  952. * 快速创建一个collection,只有主键和向量字段
  953. *
  954. * @param dim 维度
  955. * @param collectionName collection name
  956. * @return collectionName
  957. */
  958. public static String createSimpleCollection(int dim, String collectionName, boolean autoPK) {
  959. if (collectionName == null) {
  960. collectionName = "Collection_" + GenerateUtil.getRandomString(10);
  961. }
  962. milvusClientV2.createCollection(CreateCollectionReq.builder()
  963. .collectionName(collectionName)
  964. .autoID(autoPK)
  965. .dimension(dim)
  966. .enableDynamicField(false)
  967. .build());
  968. return collectionName;
  969. }
  970. /**
  971. * 创建索引时,提供额外的参数
  972. *
  973. * @param indexType 索引类型
  974. * @return Map类型参数
  975. */
  976. public static Map<String, Object> provideExtraParam(IndexParam.IndexType indexType) {
  977. Map<String, Object> map = new HashMap<>();
  978. switch (indexType) {
  979. case FLAT:
  980. case AUTOINDEX:
  981. break;
  982. case HNSW:
  983. map.put("M", 16);
  984. map.put("efConstruction", 64);
  985. break;
  986. default:
  987. map.put("nlist", 128);
  988. break;
  989. }
  990. return map;
  991. }
  992. /**
  993. * 创建向量索引
  994. *
  995. * @param collectionName collectionName
  996. * @param vectorName 向量名称
  997. * @param indexType indexType
  998. * @param metricType metricType
  999. */
  1000. public static void createVectorIndex(String collectionName, String vectorName, IndexParam.IndexType indexType, IndexParam.MetricType metricType) {
  1001. IndexParam indexParam = IndexParam.builder()
  1002. .fieldName(vectorName)
  1003. .indexType(indexType)
  1004. .extraParams(provideExtraParam(indexType))
  1005. .metricType(metricType)
  1006. .build();
  1007. milvusClientV2.createIndex(CreateIndexReq.builder()
  1008. .collectionName(collectionName)
  1009. .indexParams(Collections.singletonList(indexParam))
  1010. .build());
  1011. }
  1012. /**
  1013. * 创建标量索引
  1014. *
  1015. * @param collectionName collectionName
  1016. * @param scalarName 多个标量名称的集合
  1017. */
  1018. public static void createScalarIndex(String collectionName, List<String> scalarName) {
  1019. List<IndexParam> indexParams = new ArrayList<>();
  1020. scalarName.forEach(x -> {
  1021. IndexParam indexParam = IndexParam.builder().indexType(IndexParam.IndexType.TRIE).fieldName(x).build();
  1022. indexParams.add(indexParam);
  1023. });
  1024. milvusClientV2.createIndex(CreateIndexReq.builder()
  1025. .collectionName(collectionName)
  1026. .indexParams(indexParams)
  1027. .build());
  1028. }
  1029. public static void createPartition(String collectionName, String partitionName) {
  1030. milvusClientV2.createPartition(CreatePartitionReq.builder()
  1031. .collectionName(collectionName)
  1032. .partitionName(partitionName)
  1033. .build());
  1034. }
  1035. public static SearchResp defaultSearch(String collectionName) {
  1036. List<List<Float>> vectors = GenerateUtil.generateFloatVector(10, 3, CommonData.dim);
  1037. List<BaseVector> data = new ArrayList<>();
  1038. vectors.forEach((v) -> {
  1039. data.add(new FloatVec(v));
  1040. });
  1041. return milvusClientV2.search(SearchReq.builder()
  1042. .collectionName(collectionName)
  1043. .outputFields(Lists.newArrayList("*"))
  1044. .consistencyLevel(ConsistencyLevel.STRONG)
  1045. .annsField(CommonData.fieldFloatVector)
  1046. .data(data)
  1047. .topK(CommonData.topK)
  1048. .build());
  1049. }
  1050. /**
  1051. * 创建一条float32的向量
  1052. *
  1053. * @param dimension 维度
  1054. * @return List<Float>
  1055. */
  1056. public static List<Float> generateFloatVector(int dimension) {
  1057. Random ran = new Random();
  1058. List<Float> vector = new ArrayList<>();
  1059. for (int i = 0; i < dimension; ++i) {
  1060. vector.add(ran.nextFloat());
  1061. }
  1062. return vector;
  1063. }
  1064. /**
  1065. * 创建一条Sparse向量数据
  1066. *
  1067. * @param dim 维度,sparse不需要指定维度,所以方法里随机
  1068. * @return SortedMap<Long, Float>
  1069. */
  1070. public static SortedMap<Long, Float> generateSparseVector(int dim) {
  1071. Random ran = new Random();
  1072. SortedMap<Long, Float> sparse = new TreeMap<>();
  1073. int dimNum = ran.nextInt(dim) + 1;
  1074. for (int i = 0; i < dimNum; ++i) {
  1075. sparse.put((long) ran.nextInt(1000000), ran.nextFloat());
  1076. }
  1077. return sparse;
  1078. }
  1079. /**
  1080. * 创建多条Sparse向量数据
  1081. *
  1082. * @param dim 维度,sparse不需要指定维度,所以方法里随机
  1083. * @return List<SortedMap < Long, Float>>
  1084. */
  1085. public static List<SortedMap<Long, Float>> generateSparseVectors(int dim, long count) {
  1086. List<SortedMap<Long, Float>> list = new ArrayList<>();
  1087. for (int n = 0; n < count; ++n) {
  1088. list.add(generateSparseVector(dim));
  1089. }
  1090. return list;
  1091. }
  1092. /**
  1093. * 创建一条float16的向量
  1094. *
  1095. * @param dim 维度
  1096. * @return ByteBuffer
  1097. */
  1098. public static ByteBuffer generateFloat16Vector(int dim) {
  1099. List<Float> originalVector = generateFloatVector(dim);
  1100. return Float16Utils.f32VectorToFp16Buffer(originalVector);
  1101. }
  1102. /**
  1103. * 创建指定数量的float16的向量
  1104. *
  1105. * @param dim 维度
  1106. * @param count 指定条数
  1107. * @return List<ByteBuffer>
  1108. */
  1109. public static List<ByteBuffer> generateFloat16Vectors(int dim, long count) {
  1110. List<ByteBuffer> vectors = new ArrayList<>();
  1111. for (int n = 0; n < count; ++n) {
  1112. vectors.add(generateFloat16Vector(dim));
  1113. }
  1114. return vectors;
  1115. }
  1116. /**
  1117. * 创建一条BF16的向量
  1118. *
  1119. * @param dim
  1120. * @return ByteBuffer
  1121. */
  1122. public static ByteBuffer generateBF16Vector(int dim) {
  1123. List<Float> originalVector = generateFloatVector(dim);
  1124. return Float16Utils.f32VectorToBf16Buffer(originalVector);
  1125. }
  1126. /**
  1127. * 创建指定数量的BF16的向量
  1128. *
  1129. * @param dim
  1130. * @param count
  1131. * @return List<ByteBuffer>
  1132. */
  1133. public static List<ByteBuffer> generateBF16Vectors(int dim, long count) {
  1134. List<ByteBuffer> vectors = new ArrayList<>();
  1135. for (int n = 0; n < count; ++n) {
  1136. vectors.add(generateBF16Vector(dim));
  1137. }
  1138. return vectors;
  1139. }
  1140. /**
  1141. * 生成一条binary向量
  1142. *
  1143. * @param dim 维度
  1144. * @return ByteBuffer
  1145. */
  1146. public static ByteBuffer generateBinaryVector(int dim) {
  1147. Random ran = new Random();
  1148. int byteCount = dim / 8;
  1149. ByteBuffer vector = ByteBuffer.allocate(byteCount);
  1150. for (int i = 0; i < byteCount; ++i) {
  1151. vector.put((byte) ran.nextInt(Byte.MAX_VALUE));
  1152. }
  1153. return vector;
  1154. }
  1155. /**
  1156. * 生成指定数量的binary向量数据
  1157. *
  1158. * @param count binary向量的数据条数
  1159. * @param dim 维度
  1160. * @return List<ByteBuffer>
  1161. */
  1162. public static List<ByteBuffer> generateBinaryVectors(int dim, long count) {
  1163. List<ByteBuffer> vectors = new ArrayList<>();
  1164. for (int n = 0; n < count; ++n) {
  1165. vectors.add(generateBinaryVector(dim));
  1166. }
  1167. return vectors;
  1168. }
  1169. private static JsonArray toJsonArray(byte[] bytes) {
  1170. JsonArray jsonArray = new JsonArray();
  1171. for (byte b : bytes) {
  1172. jsonArray.add(b);
  1173. }
  1174. return jsonArray;
  1175. }
  1176. /**
  1177. * 创建索引
  1178. *
  1179. * @param collection collection name
  1180. * @param vectorType 向量类型
  1181. */
  1182. public static void createIndex(String collection, DataType vectorType) {
  1183. IndexParam indexParam = IndexParam.builder()
  1184. .fieldName(provideFieldVectorName(vectorType))
  1185. .indexType(IndexParam.IndexType.AUTOINDEX)
  1186. .extraParams(CommonFunction.provideExtraParam(IndexParam.IndexType.AUTOINDEX))
  1187. .metricType(provideMetricTypeByVectorType(vectorType))
  1188. .build();
  1189. milvusClientV2.createIndex(CreateIndexReq.builder()
  1190. .collectionName(collection)
  1191. .indexParams(Collections.singletonList(indexParam))
  1192. .build());
  1193. }
  1194. /**
  1195. * 更具向量类型提供MetricType
  1196. *
  1197. * @param vectorType 向量类型
  1198. * @return MetricType
  1199. */
  1200. public static IndexParam.MetricType provideMetricTypeByVectorType(DataType vectorType) {
  1201. switch (vectorType.getCode()) {
  1202. case 101:
  1203. case 102:
  1204. case 103:
  1205. return IndexParam.MetricType.L2;
  1206. case 100:
  1207. return IndexParam.MetricType.HAMMING;
  1208. case 104:
  1209. return IndexParam.MetricType.IP;
  1210. default:
  1211. return IndexParam.MetricType.INVALID;
  1212. }
  1213. }
  1214. /**
  1215. * 更具向量类型提供向量name
  1216. *
  1217. * @param vectorType 向量类型
  1218. * @return vector field name
  1219. */
  1220. public static String provideFieldVectorName(DataType vectorType) {
  1221. switch (vectorType.getCode()) {
  1222. case 101:
  1223. return CommonData.fieldFloatVector;
  1224. case 102:
  1225. return CommonData.fieldFloat16Vector;
  1226. case 103:
  1227. return CommonData.fieldBF16Vector;
  1228. case 100:
  1229. return CommonData.fieldBinaryVector;
  1230. case 104:
  1231. return CommonData.fieldSparseVector;
  1232. default:
  1233. return "";
  1234. }
  1235. }
  1236. /**
  1237. * collection建索引+insert+load
  1238. *
  1239. * @param collectionName collection name
  1240. * @param vectorType 向量类型
  1241. * @param ifLoad 是否load
  1242. */
  1243. public static void createIndexAndInsertAndLoad(String collectionName, DataType vectorType, @NonNull Boolean ifLoad, Long numberEntities) {
  1244. IndexParam indexParam = IndexParam.builder()
  1245. .fieldName(provideFieldVectorName(vectorType))
  1246. .indexType(providerIndexType(vectorType))
  1247. .extraParams(CommonFunction.provideExtraParam(providerIndexType(vectorType)))
  1248. .metricType(provideMetricTypeByVectorType(vectorType))
  1249. .build();
  1250. milvusClientV2.createIndex(CreateIndexReq.builder()
  1251. .collectionName(collectionName)
  1252. .indexParams(Collections.singletonList(indexParam))
  1253. .build());
  1254. if (ifLoad) {
  1255. milvusClientV2.loadCollection(LoadCollectionReq.builder().collectionName(collectionName).build());
  1256. }
  1257. insertIntoCollectionByBatch(collectionName, numberEntities, CommonData.dim, vectorType);
  1258. }
  1259. public static void insertIntoCollectionByBatch(String collectionName, long num, int dim, DataType vectorType) {
  1260. long insertRounds = (num / CommonData.batchSize) == 0 ? 1 : (num / CommonData.batchSize);
  1261. for (int i = 0; i < insertRounds; i++) {
  1262. System.out.println("insert batch:" + (i + 1));
  1263. List<JsonObject> jsonObjects = generateDefaultData(i * CommonData.batchSize, CommonData.batchSize, dim, vectorType);
  1264. InsertResp insert = milvusClientV2.insert(InsertReq.builder().collectionName(collectionName).data(jsonObjects).build());
  1265. }
  1266. }
  1267. /**
  1268. * 提供search时候的向量参数
  1269. *
  1270. * @param nq 向量个数
  1271. * @param dim 维度
  1272. * @param vectorType 向量类型
  1273. * @return List<BaseVector>
  1274. */
  1275. public static List<BaseVector> providerBaseVector(int nq, int dim, DataType vectorType) {
  1276. List<BaseVector> data = new ArrayList<>();
  1277. if (vectorType.equals(DataType.FloatVector)) {
  1278. List<List<Float>> lists = GenerateUtil.generateFloatVector(nq, 3, dim);
  1279. lists.forEach((v) -> {
  1280. data.add(new FloatVec(v));
  1281. });
  1282. }
  1283. if (vectorType.equals(DataType.BinaryVector)) {
  1284. List<ByteBuffer> byteBuffers = generateBinaryVectors(dim, nq);
  1285. byteBuffers.forEach(x -> {
  1286. data.add(new BinaryVec(x));
  1287. });
  1288. }
  1289. if (vectorType.equals(DataType.Float16Vector)) {
  1290. List<ByteBuffer> byteBuffers = generateFloat16Vectors(dim, nq);
  1291. byteBuffers.forEach(x -> {
  1292. data.add(new Float16Vec(x));
  1293. });
  1294. }
  1295. if (vectorType.equals(DataType.BFloat16Vector)) {
  1296. List<ByteBuffer> byteBuffers = generateBF16Vectors(dim, nq);
  1297. byteBuffers.forEach(x -> {
  1298. data.add(new BFloat16Vec(x));
  1299. });
  1300. }
  1301. if (vectorType.equals(DataType.SparseFloatVector)) {
  1302. List<SortedMap<Long, Float>> list = generateSparseVectors(dim, nq);
  1303. list.forEach(x -> {
  1304. data.add(new SparseFloatVec(x));
  1305. });
  1306. }
  1307. return data;
  1308. }
  1309. /**
  1310. * 根据向量类型决定IndexType
  1311. *
  1312. * @param vectorType DataType
  1313. * @return IndexParam.IndexType
  1314. */
  1315. public static IndexParam.IndexType providerIndexType(DataType vectorType) {
  1316. switch (vectorType.getCode()) {
  1317. case 101:
  1318. return IndexParam.IndexType.HNSW;
  1319. case 102:
  1320. return IndexParam.IndexType.HNSW;
  1321. case 103:
  1322. return IndexParam.IndexType.HNSW;
  1323. case 100:
  1324. return IndexParam.IndexType.BIN_IVF_FLAT;
  1325. case 104:
  1326. return IndexParam.IndexType.SPARSE_WAND;
  1327. default:
  1328. return IndexParam.IndexType.TRIE;
  1329. }
  1330. }
  1331. /**
  1332. * 创建通用的collection方法,支持多个filed,多个向量
  1333. *
  1334. * @param collectionName collection 可不传
  1335. * @param pkDataType 主键类型
  1336. * @param enableDynamic 是否开启动态列
  1337. * @param fieldParamList 其他字段
  1338. * @return collection name
  1339. */
  1340. public static String genCommonCollection(@Nullable String collectionName, DataType pkDataType, boolean enableDynamic, List<FieldParam> fieldParamList) {
  1341. if (collectionName == null || collectionName.equals("")) {
  1342. collectionName = "Collection_" + GenerateUtil.getRandomString(10);
  1343. }
  1344. List<CreateCollectionReq.FieldSchema> fieldSchemaList = parseDataType(fieldParamList);
  1345. CreateCollectionReq.FieldSchema fieldInt64 = CreateCollectionReq.FieldSchema.builder()
  1346. .autoID(false)
  1347. .dataType(pkDataType)
  1348. .isPrimaryKey(true)
  1349. .name(pkDataType + "_0")
  1350. .build();
  1351. fieldSchemaList.add(fieldInt64);
  1352. CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder()
  1353. .fieldSchemaList(fieldSchemaList)
  1354. .build();
  1355. CreateCollectionReq createCollectionReq = CreateCollectionReq.builder()
  1356. .collectionSchema(collectionSchema)
  1357. .collectionName(collectionName)
  1358. .enableDynamicField(enableDynamic)
  1359. .description("collection desc")
  1360. .numShards(1)
  1361. .build();
  1362. milvusClientV2.createCollection(createCollectionReq);
  1363. return collectionName;
  1364. }
  1365. /**
  1366. * 遍历fieldParamList生成对应的schema
  1367. *
  1368. * @param fieldParamList field字段集合
  1369. * @return List<CreateCollectionReq.FieldSchema> 给创建collection提供
  1370. */
  1371. public static List<CreateCollectionReq.FieldSchema> parseDataType(List<FieldParam> fieldParamList) {
  1372. List<CreateCollectionReq.FieldSchema> fieldSchemaList = new ArrayList<>();
  1373. for (FieldParam fieldParam : fieldParamList) {
  1374. //按照_分组
  1375. DataType dataType = fieldParam.getDataType();
  1376. CreateCollectionReq.FieldSchema fieldSchema = CreateCollectionReq.FieldSchema.builder()
  1377. .dataType(dataType)
  1378. .name(fieldParam.getFieldName())
  1379. .isPrimaryKey(false)
  1380. .build();
  1381. if (dataType == DataType.FloatVector || dataType == DataType.BFloat16Vector || dataType == DataType.Float16Vector || dataType == DataType.BinaryVector) {
  1382. fieldSchema.setDimension(fieldParam.getDim());
  1383. }
  1384. if (dataType == DataType.String || dataType == DataType.VarChar) {
  1385. fieldSchema.setMaxLength(fieldParam.getMaxLength());
  1386. }
  1387. if (dataType == DataType.Array) {
  1388. fieldSchema.setMaxCapacity(fieldParam.getMaxCapacity());
  1389. fieldSchema.setElementType(fieldParam.getElementType());
  1390. }
  1391. fieldSchemaList.add(fieldSchema);
  1392. }
  1393. return fieldSchemaList;
  1394. }
  1395. /**
  1396. * 生成通用的数据
  1397. *
  1398. * @param collectionName 向量名称
  1399. * @param count 生成的数量
  1400. * @return List<JsonObject>
  1401. */
  1402. public static List<JsonObject> genCommonData(String collectionName, long count) {
  1403. DescribeCollectionResp describeCollectionResp = milvusClientV2.describeCollection(DescribeCollectionReq.builder().collectionName(collectionName).build());
  1404. CreateCollectionReq.CollectionSchema collectionSchema = describeCollectionResp.getCollectionSchema();
  1405. List<CreateCollectionReq.FieldSchema> fieldSchemaList = collectionSchema.getFieldSchemaList();
  1406. List<JsonObject> jsonList = new ArrayList<>();
  1407. for (int i = 0; i < count; i++) {
  1408. JsonObject row = new JsonObject();
  1409. for (CreateCollectionReq.FieldSchema fieldSchema : fieldSchemaList) {
  1410. String name = fieldSchema.getName();
  1411. DataType dataType = fieldSchema.getDataType();
  1412. Integer dimension = fieldSchema.getDimension();
  1413. Integer maxCapacity = fieldSchema.getMaxCapacity();
  1414. Integer maxLength = fieldSchema.getMaxLength();
  1415. JsonObject jsonObject;
  1416. if (dataType == DataType.FloatVector || dataType == DataType.BFloat16Vector || dataType == DataType.Float16Vector || dataType == DataType.BinaryVector) {
  1417. jsonObject = generalJsonObjectByDataType(name, dataType, dimension, i);
  1418. } else if (dataType == DataType.SparseFloatVector) {
  1419. jsonObject = generalJsonObjectByDataType(name, dataType, CommonData.dim, i);
  1420. } else if (dataType == DataType.VarChar || dataType == DataType.String) {
  1421. jsonObject = generalJsonObjectByDataType(name, dataType, maxLength, i);
  1422. } else if (dataType == DataType.Array) {
  1423. jsonObject = generalJsonObjectByDataType(name, dataType, maxCapacity, i);
  1424. } else {
  1425. jsonObject = generalJsonObjectByDataType(name, dataType, 0, i);
  1426. }
  1427. row = JsonObjectUtil.jsonMerge(row, jsonObject);
  1428. }
  1429. jsonList.add(row);
  1430. }
  1431. return jsonList;
  1432. }
  1433. /**
  1434. * 更具数据类型,创建JsonObject
  1435. *
  1436. * @param fieldName 字段名称
  1437. * @param dataType 类型
  1438. * @param dimOrLength 向量维度或者array容量或者varchar长度
  1439. * @param countIndex 索引i,避免多次创建时数据内容一样
  1440. * @return JsonObject
  1441. */
  1442. public static JsonObject generalJsonObjectByDataType(String fieldName, DataType dataType, int dimOrLength, long countIndex) {
  1443. JsonObject row = new JsonObject();
  1444. Gson gson = new Gson();
  1445. Random random = new Random();
  1446. if (dataType == DataType.Int64) {
  1447. row.addProperty(fieldName, countIndex);
  1448. }
  1449. if (dataType == DataType.Int32) {
  1450. row.addProperty(fieldName, (int) countIndex % 32767);
  1451. }
  1452. if (dataType == DataType.Int16) {
  1453. row.addProperty(fieldName, (int) countIndex % 32767);
  1454. }
  1455. if (dataType == DataType.Int8) {
  1456. row.addProperty(fieldName, (short) countIndex % 127);
  1457. }
  1458. if (dataType == DataType.Double) {
  1459. row.addProperty(fieldName, (double) countIndex * 0.1f);
  1460. }
  1461. if (dataType == DataType.Array) {
  1462. int i = random.nextInt(dimOrLength);
  1463. List<Long> arrays = new ArrayList<>();
  1464. for (int j = 0; j < i; j++) {
  1465. arrays.add(countIndex + j);
  1466. }
  1467. row.add(fieldName, gson.toJsonTree(arrays));
  1468. }
  1469. if (dataType == DataType.Bool) {
  1470. row.addProperty(fieldName, countIndex % 2 == 0);
  1471. }
  1472. if (dataType == DataType.VarChar) {
  1473. int i = random.nextInt(dimOrLength / 2);
  1474. String s = MathUtil.genRandomStringAndChinese(i);
  1475. row.addProperty(fieldName, s);
  1476. }
  1477. if (dataType == DataType.String) {
  1478. int i = random.nextInt(dimOrLength / 2);
  1479. String s = MathUtil.genRandomStringAndChinese(i);
  1480. row.addProperty(fieldName, s);
  1481. }
  1482. if (dataType == DataType.Float) {
  1483. row.addProperty(fieldName, (float) countIndex * 0.1f);
  1484. }
  1485. if (dataType == DataType.FloatVector) {
  1486. List<Float> vector = new ArrayList<>();
  1487. for (int k = 0; k < dimOrLength; ++k) {
  1488. vector.add(random.nextFloat());
  1489. }
  1490. row.add(fieldName, gson.toJsonTree(vector));
  1491. }
  1492. if (dataType == DataType.BinaryVector) {
  1493. row.add(fieldName, gson.toJsonTree(generateBinaryVector(dimOrLength).array()));
  1494. }
  1495. if (dataType == DataType.Float16Vector) {
  1496. row.add(fieldName, gson.toJsonTree(generateFloat16Vector(dimOrLength).array()));
  1497. }
  1498. if (dataType == DataType.BFloat16Vector) {
  1499. row.add(fieldName, gson.toJsonTree(generateBF16Vector(dimOrLength).array()));
  1500. }
  1501. if (dataType == DataType.SparseFloatVector) {
  1502. row.add(fieldName, gson.toJsonTree(generateSparseVector(dimOrLength)));
  1503. }
  1504. if (dataType == DataType.JSON) {
  1505. JsonObject json = new JsonObject();
  1506. json.addProperty(CommonData.fieldInt64, (int) countIndex % 32767);
  1507. json.addProperty(CommonData.fieldInt32, (int) countIndex % 32767);
  1508. json.addProperty(CommonData.fieldDouble, (double) countIndex);
  1509. json.add(CommonData.fieldArray, gson.toJsonTree(Arrays.asList(countIndex, countIndex + 1, countIndex + 2)));
  1510. json.addProperty(CommonData.fieldBool, countIndex % 2 == 0);
  1511. json.addProperty(CommonData.fieldVarchar, "Str" + countIndex);
  1512. json.addProperty(CommonData.fieldFloat, (float) countIndex);
  1513. row.add(fieldName, json);
  1514. }
  1515. return row;
  1516. }
  1517. /**
  1518. * 创建通用索引
  1519. *
  1520. * @param collection collection name
  1521. * @param fieldParamList field集合
  1522. */
  1523. public static void createCommonIndex(String collection, List<FieldParam> fieldParamList) {
  1524. List<IndexParam> indexParamList = new ArrayList<>();
  1525. for (FieldParam fieldParam : fieldParamList) {
  1526. //按照_分组
  1527. DataType dataType = fieldParam.getDataType();
  1528. String fieldName = fieldParam.getFieldName();
  1529. IndexParam indexParam = IndexParam.builder()
  1530. .fieldName(fieldName)
  1531. .indexType(providerIndexType(dataType))
  1532. .extraParams(CommonFunction.provideExtraParam(providerIndexType(dataType)))
  1533. .metricType(provideMetricTypeByVectorType(dataType))
  1534. .build();
  1535. indexParamList.add(indexParam);
  1536. }
  1537. milvusClientV2.createIndex(CreateIndexReq.builder()
  1538. .collectionName(collection)
  1539. .indexParams(indexParamList)
  1540. .build());
  1541. }
  1542. /**
  1543. * Create Scalar Indexes
  1544. *
  1545. * @param collection collection name
  1546. * @param fieldParamList scalar fields
  1547. */
  1548. public static void createScalarCommonIndex(String collection, List<FieldParam> fieldParamList) {
  1549. List<IndexParam> indexParamList = new ArrayList<>();
  1550. for (FieldParam fieldParam : fieldParamList) {
  1551. IndexParam.IndexType indexType = fieldParam.getIndextype();
  1552. String fieldName = fieldParam.getFieldName();
  1553. IndexParam indexParam = IndexParam.builder()
  1554. .fieldName(fieldName)
  1555. .indexType(indexType)
  1556. .indexName(fieldName)
  1557. .build();
  1558. indexParamList.add(indexParam);
  1559. }
  1560. milvusClientV2.createIndex(CreateIndexReq.builder()
  1561. .collectionName(collection)
  1562. .indexParams(indexParamList)
  1563. .build());
  1564. }
  1565. /**
  1566. * Drop Scalar Indexes
  1567. *
  1568. * @param collection collection name
  1569. * @param fieldParamList FieldParamList
  1570. */
  1571. public static void dropScalarCommonIndex(String collection, List<FieldParam> fieldParamList) {
  1572. List<String> fieldNames = fieldParamList.stream().map(FieldParam::getFieldName).collect(Collectors.toList());
  1573. fieldNames.forEach(x -> milvusClientV2.dropIndex(DropIndexReq.builder()
  1574. .collectionName(collection)
  1575. .fieldName(x)
  1576. .indexName(x)
  1577. .build()));
  1578. }
  1579. /**
  1580. * 为多向量查询提供AnnSearch
  1581. *
  1582. * @param fieldParam 字段参数
  1583. * @param nq 传入的向量数
  1584. * @param topK 查询数量
  1585. * @param expr 表达式
  1586. * @return AnnSearchReq
  1587. */
  1588. public static AnnSearchReq provideAnnSearch(FieldParam fieldParam, int nq, int topK, String expr) {
  1589. DataType dataType = fieldParam.getDataType();
  1590. int dim = fieldParam.getDim();
  1591. List<BaseVector> baseVectors = providerBaseVector(nq, dim, dataType);
  1592. return AnnSearchReq.builder().vectors(baseVectors)
  1593. .topK(topK)
  1594. .vectorFieldName(fieldParam.getFieldName())
  1595. .params(provideSearchParam(providerIndexType(dataType)))
  1596. .expr(expr).build();
  1597. }
  1598. /**
  1599. * 根据索引类型提供查询参数
  1600. *
  1601. * @param indexType index type
  1602. * @return String 查询参数
  1603. */
  1604. public static String provideSearchParam(IndexParam.IndexType indexType) {
  1605. String extraParam;
  1606. switch (indexType) {
  1607. case FLAT:
  1608. extraParam = "{}";
  1609. break;
  1610. case IVF_FLAT:
  1611. extraParam = "{\"nlist\":32,\"nprobe\":32}";
  1612. break;
  1613. case IVF_SQ8:
  1614. extraParam = "{\"nlist\":128}";
  1615. break;
  1616. case IVF_PQ:
  1617. extraParam = "{\"nlist\":128, \"m\":16, \"nbits\":8}";
  1618. break;
  1619. case HNSW:
  1620. extraParam = "{\"M\":16,\"efConstruction\":64}";
  1621. break;
  1622. case BIN_IVF_FLAT:
  1623. extraParam = "{\"nlist\": 128}";
  1624. break;
  1625. case SCANN:
  1626. extraParam = "{\"nlist\":1024,\"with_raw_data\":" + true + "}";
  1627. break;
  1628. case GPU_IVF_FLAT:
  1629. extraParam = "{\"nlist\": 64}";
  1630. break;
  1631. case GPU_IVF_PQ:
  1632. extraParam = "{\"nlist\": 64, \"m\": 16, \"nbits\": 8}";
  1633. break;
  1634. case SPARSE_INVERTED_INDEX:
  1635. case SPARSE_WAND:
  1636. extraParam = "{\"drop_ratio_search\":0.2}";
  1637. break;
  1638. default:
  1639. extraParam = "{\"nlist\":128}";
  1640. break;
  1641. }
  1642. return extraParam;
  1643. }
  1644. /**
  1645. * 提供bulk import时候的files
  1646. *
  1647. * @param collection collection
  1648. * @param bulkFileType 文件类型--枚举类bulkFileType
  1649. * @return
  1650. */
  1651. public static List<List<String>> providerBatchFiles(String collection, BulkFileType bulkFileType, long count) {
  1652. // 查询schema
  1653. DescribeCollectionResp describeCollectionResp = milvusClientV2.describeCollection(DescribeCollectionReq.builder().collectionName(collection).build());
  1654. CreateCollectionReq.CollectionSchema collectionSchema = describeCollectionResp.getCollectionSchema();
  1655. RemoteBulkWriter remoteBulkWriter = buildRemoteBulkWriter(collectionSchema, bulkFileType);
  1656. List<JsonObject> jsonObjects = CommonFunction.genCommonData(collection, count);
  1657. jsonObjects.forEach(x -> {
  1658. try {
  1659. remoteBulkWriter.appendRow(x);
  1660. } catch (IOException | InterruptedException e) {
  1661. log.error(e.getMessage());
  1662. }
  1663. });
  1664. System.out.printf("%s rows appends%n", remoteBulkWriter.getTotalRowCount());
  1665. System.out.printf("%s rows in buffer not flushed%n", remoteBulkWriter.getTotalRowCount());
  1666. try {
  1667. remoteBulkWriter.commit(false);
  1668. } catch (InterruptedException e) {
  1669. log.error(e.getMessage());
  1670. }
  1671. List<List<String>> batchFiles = remoteBulkWriter.getBatchFiles();
  1672. System.out.printf("Remote writer done! output remote files: %s%n", batchFiles);
  1673. return batchFiles;
  1674. }
  1675. /**
  1676. * 为开源提供 remote bulk writer
  1677. * RemoteBulkWriterParam = LocalBulkWriterParam + uploadObject + clearData
  1678. *
  1679. * @param collectionSchema
  1680. * @param bulkFileType
  1681. * @return
  1682. */
  1683. private static RemoteBulkWriter buildRemoteBulkWriter(CreateCollectionReq.CollectionSchema collectionSchema, BulkFileType bulkFileType) {
  1684. StorageConnectParam connectParam = S3ConnectParam.newBuilder()
  1685. .withEndpoint(System.getProperty("minio") == null ? PropertyFilesUtil.getRunValue("minio") : System.getProperty("minio"))
  1686. .withCloudName(CloudStorage.MINIO.getCloudName())
  1687. .withBucketName("milvus-bucket")
  1688. .withAccessKey("minioadmin")
  1689. .withSecretKey("minioadmin")
  1690. .withRegion("")
  1691. .build();
  1692. RemoteBulkWriterParam bulkWriterParam = RemoteBulkWriterParam.newBuilder()
  1693. .withCollectionSchema(collectionSchema)
  1694. .withRemotePath("bulk_data")
  1695. .withFileType(bulkFileType)
  1696. .withChunkSize(5 * 1024 * 1024 * 1024L)
  1697. .withConnectParam(connectParam)
  1698. .build();
  1699. RemoteBulkWriter remoteBulkWriter = null;
  1700. try {
  1701. remoteBulkWriter = new RemoteBulkWriter(bulkWriterParam);
  1702. } catch (IOException e) {
  1703. log.error(e.getMessage());
  1704. }
  1705. return remoteBulkWriter;
  1706. }
  1707. private static LocalBulkWriter buildLocalBulkWriter(CreateCollectionReq.CollectionSchema collectionSchema, BulkFileType bulkFileType) {
  1708. LocalBulkWriterParam bulkWriterParam = LocalBulkWriterParam.newBuilder()
  1709. .withCollectionSchema(collectionSchema)
  1710. .withLocalPath("/tmp/bulk_writer")
  1711. .withFileType(bulkFileType)
  1712. .withChunkSize(5 * 1024 * 1024 * 1024L)
  1713. .build();
  1714. LocalBulkWriter localBulkWriter;
  1715. try {
  1716. localBulkWriter = new LocalBulkWriter(bulkWriterParam);
  1717. } catch (IOException e) {
  1718. throw new RuntimeException(e);
  1719. }
  1720. return localBulkWriter;
  1721. }
  1722. public static List<List<String>> providerLocalBatchFiles(String collection, BulkFileType bulkFileType, long count) {
  1723. DescribeCollectionResp describeCollectionResp = milvusClientV2.describeCollection(DescribeCollectionReq.builder().collectionName(collection).build());
  1724. CreateCollectionReq.CollectionSchema collectionSchema = describeCollectionResp.getCollectionSchema();
  1725. LocalBulkWriter localBulkWriter = buildLocalBulkWriter(collectionSchema, bulkFileType);
  1726. List<JsonObject> jsonObjects = CommonFunction.genCommonData(collection, count);
  1727. for (JsonObject jsonObject : jsonObjects) {
  1728. try {
  1729. localBulkWriter.appendRow(jsonObject);
  1730. } catch (IOException | InterruptedException e) {
  1731. throw new RuntimeException(e);
  1732. }
  1733. }
  1734. System.out.printf("%s rows appends%n", localBulkWriter.getTotalRowCount());
  1735. System.out.printf("%s rows in buffer not flushed%n", localBulkWriter.getTotalRowCount());
  1736. try {
  1737. localBulkWriter.commit(false);
  1738. } catch (InterruptedException e) {
  1739. throw new RuntimeException(e);
  1740. }
  1741. List<List<String>> batchFiles = localBulkWriter.getBatchFiles();
  1742. System.out.printf("Local writer done! output remote files: %s%n", batchFiles);
  1743. return batchFiles;
  1744. }
  1745. // minio上传--copy from v1
  1746. public static void multiFilesUpload(String path, List<List<String>> batchFiles) {
  1747. MinioClient minioClient =
  1748. MinioClient.builder()
  1749. .endpoint(System.getProperty("minio") == null ? PropertyFilesUtil.getRunValue("minio") : System.getProperty("minio"))
  1750. .credentials("minioadmin", "minioadmin")
  1751. .build();
  1752. // Make 'jsonBucket' bucket if not exist.
  1753. boolean found = false;
  1754. try {
  1755. found = minioClient.bucketExists(BucketExistsArgs.builder().bucket("milvus-bucket").build());
  1756. if (!found) {
  1757. // Make a new bucket called 'jsonBucket'.
  1758. minioClient.makeBucket(MakeBucketArgs.builder().bucket("milvus-bucket").build());
  1759. } else {
  1760. System.out.println("Bucket 'milvus-bucket' already exists.");
  1761. }
  1762. List<String> fileNameList=new ArrayList<>();
  1763. for (List<String> batchFileList : batchFiles) {
  1764. fileNameList.addAll(batchFileList);
  1765. }
  1766. for (String fileName : fileNameList) {
  1767. minioClient.uploadObject(
  1768. UploadObjectArgs.builder()
  1769. .bucket("milvus-bucket")
  1770. .object( fileName)
  1771. .filename( fileName)
  1772. .build());
  1773. System.out.println(
  1774. "'"
  1775. + path
  1776. + fileName
  1777. + "' is successfully uploaded as "
  1778. + "object '"
  1779. + fileName
  1780. + "' to bucket 'milvus-bucket'.");
  1781. }
  1782. } catch (Exception e) {
  1783. System.out.println(e.getMessage());
  1784. }
  1785. }
  1786. }