Browse Source

feat: better import file (#926)

Signed-off-by: ryjiang <jiangruiyi@gmail.com>
ryjiang 2 weeks ago
parent
commit
e913e1e982

+ 1 - 1
client/src/consts/Insert.ts

@@ -3,4 +3,4 @@ export const INSERT_CSV_SAMPLE = `Date, Country, Units, Revenue,\n
 909,3898,[3898...], [84981...]\n
 ...`;
 
-export const INSERT_MAX_SIZE = 150;
+export const INSERT_MAX_SIZE = 256;

+ 6 - 5
client/src/i18n/cn/insert.ts

@@ -7,9 +7,10 @@ const insertTrans = {
   sample: '样本',
   noteTitle: '注意',
   notes: [
-    `确保数据中的列名与 Schema 中的字段标签名相同。`,
-    `数据大小应小于 150MB,行数应小于 100000,以便正确导入数据。`,
-    `"导入数据" 选项只会添加新记录。您不能使用此选项更新现有记录。`,
+    `支持 CSV 或者 JSON 文件。`,
+    `确保 CSV 列名或 JSON key值与 Schema 字段名一致`,
+    `文件大小不能超过 256MB。`,
+    `"导入数据" 只会插入新数据,不支持 upsert。`,
   ],
   overSizeWarning: '文件数据大小应小于 {{size}}MB',
   isContainFieldNames: '第一行包含字段名?',
@@ -22,8 +23,8 @@ const insertTrans = {
   previewTipAction: '*更改标题单元格选择器值以编辑字段名',
   requiredFieldName: '字段名*',
 
-  statusLoading: '您的数据正在导入中...可能需要几分钟',
-  statusLoadingTip: '请耐心等待,谢谢',
+  statusLoading: '您的数据正在导入中...可能需要几分钟.',
+  importingRecords: '正在导入 {{count}} 条数据...可能需要几分钟.',
   statusSuccess: '数据导入成功!',
   statusError: '数据导入失败!',
 

+ 7 - 6
client/src/i18n/en/insert.ts

@@ -7,10 +7,10 @@ const insertTrans = {
   sample: 'CSV Sample',
   noteTitle: 'Note',
   notes: [
-    `CSV or JSON file is supported`,
-    `Ensure data column names match field label names in Schema.`,
-    `Data should be <150MB and <100,000 rows for proper import.`,
-    `"Import File" only appends new records; it doesn't update existing ones.`,
+    `CSV or JSON file is supported.`,
+    `Ensure CSV column names or JSON key values match schema field names.`,
+    `File size should be <256MB.`,
+    `"Import File" only inserts new data; it doesn't support upsert.`,
   ],
   overSizeWarning: 'File data size should less than {{size}}MB',
   isContainFieldNames: 'First row contains field names?',
@@ -25,8 +25,9 @@ const insertTrans = {
   previewTipAction: '*Change header cell selector value to edit field name',
   requiredFieldName: 'Field Name*',
 
-  statusLoading: 'Your data is importing now...It may take few minutes',
-  statusLoadingTip: 'Please wait patiently, thank you',
+  statusLoading: 'Importing your data... This may take a few minutes.',
+  importingRecords:
+    'Importing {{count}} records... This may take a few minutes.',
   statusSuccess: 'Import File Successfully!',
   statusError: 'Import File Failed!',
 

+ 8 - 2
client/src/pages/dialogs/ImportSampleDialog.tsx

@@ -33,8 +33,14 @@ const sizeOptions = [
     label: '10k',
     value: '10000',
   },
+  {
+    label: '100k',
+    value: '100000',
+  },
 ];
 
+const biggestSize = Number(sizeOptions[sizeOptions.length - 1].value);
+
 const ImportSampleDialog: FC<{
   collection: CollectionObject;
   cb?: Function;
@@ -189,7 +195,7 @@ const ImportSampleDialog: FC<{
               value={size}
               onChange={(event: any, newValue: string | null) => {
                 if (newValue && /^\d+$/.test(newValue)) {
-                  const val = Math.min(Number(newValue), 10000).toString();
+                  const val = Math.min(Number(newValue), biggestSize).toString();
                   setSize(val);
                   setCsvFileName(
                     `${collection.collection_name}.sample.${val}.csv`
@@ -203,7 +209,7 @@ const ImportSampleDialog: FC<{
                 if (/^\d*$/.test(newInputValue)) {
                   let val = newInputValue;
                   if (val) {
-                    val = Math.min(Number(val), 10000).toString();
+                    val = Math.min(Number(val), biggestSize).toString();
                   }
                   setSize(val);
                   setCsvFileName(

+ 14 - 8
client/src/pages/dialogs/insert/Dialog.tsx

@@ -48,6 +48,7 @@ const InsertContainer: FC<InsertContentProps> = ({
     InsertStatusEnum.init
   );
   const [insertFailMsg, setInsertFailMsg] = useState<string>('');
+  const [importingCount, setImportingCount] = useState<number>(0);
 
   const [nextDisabled, setNextDisabled] = useState<boolean>(false);
 
@@ -308,6 +309,9 @@ const InsertContainer: FC<InsertContentProps> = ({
             fields!
           );
 
+    // Set the number of records being imported
+    setImportingCount(data.length);
+
     const param: InsertDataParam = {
       partition_name: partitionValue,
       fields_data: data,
@@ -325,13 +329,9 @@ const InsertContainer: FC<InsertContentProps> = ({
         setInsertStatus(InsertStatusEnum.success);
       }
     } catch (err: any) {
-      const {
-        response: {
-          data: { message },
-        },
-      } = err;
-      setInsertFailMsg(message);
-      setInsertStatus(InsertStatusEnum.error);
+      // back to import step
+      setInsertStatus(InsertStatusEnum.init);
+      setActiveStep(InsertStepperEnum.import);
     }
   };
 
@@ -409,7 +409,13 @@ const InsertContainer: FC<InsertContentProps> = ({
         );
       // default represents InsertStepperEnum.status
       default:
-        return <InsertStatus status={insertStatus} failMsg={insertFailMsg} />;
+        return (
+          <InsertStatus
+            status={insertStatus}
+            failMsg={insertFailMsg}
+            importingCount={importingCount}
+          />
+        );
     }
   };
 

+ 32 - 23
client/src/pages/dialogs/insert/Status.tsx

@@ -5,30 +5,41 @@ import type { InsertStatusProps } from './Types';
 import successPath from '@/assets/imgs/insert/success.png';
 import failPath from '@/assets/imgs/insert/fail.png';
 import { InsertStatusEnum } from './consts';
-
 import Box from '@mui/material/Box';
 
-const InsertStatus: FC<InsertStatusProps> = ({ status, failMsg }) => {
+const InsertStatus: FC<InsertStatusProps> = ({ status, failMsg, importingCount }) => {
   const { t: insertTrans } = useTranslation('insert');
 
-  const textSx = (theme: Theme) => ({ marginTop: theme.spacing(3) });
-  const loadingTipSx = (theme: Theme) => ({ marginBottom: theme.spacing(6) });
-  const loadingSvgSx = (theme: Theme) => ({
-    color: theme.palette.primary.main,
+  const iconSx = {
+    width: '64px',
+    height: '64px',
+    marginBottom: 2,
+  };
+
+  const textSx = (theme: Theme) => ({
+    marginTop: theme.spacing(2),
+    textAlign: 'center',
+  });
+
+  const errorTextSx = (theme: Theme) => ({
+    ...textSx(theme),
+    color: theme.palette.error.main,
   });
+
   const wrapperSx = (theme: Theme) => ({
-    width: '75vw',
-    height: status === InsertStatusEnum.loading ? '288px' : '200px',
+    width: '40vw',
+    minHeight: '200px',
     display: 'flex',
     flexDirection: 'column',
     alignItems: 'center',
     justifyContent: 'center',
+    padding: theme.spacing(4),
   });
 
   const InsertSuccess = () => (
     <>
-      <img src={successPath} alt="insert success" />
-      <Typography variant="h4" sx={textSx}>
+      <img src={successPath} alt="insert success" style={iconSx} />
+      <Typography variant="h5" sx={textSx}>
         {insertTrans('statusSuccess')}
       </Typography>
     </>
@@ -36,25 +47,24 @@ const InsertStatus: FC<InsertStatusProps> = ({ status, failMsg }) => {
 
   const InsertLoading = () => (
     <>
-      <CircularProgress size={64} thickness={5} sx={loadingSvgSx} />
-      <Typography variant="h4" sx={textSx}>
-        {insertTrans('statusLoading')}
-      </Typography>
-      <Typography
-        variant="h5"
-        sx={theme => ({ ...textSx(theme), ...loadingTipSx(theme) })}
-      >
-        {insertTrans('statusLoadingTip')}
+      <CircularProgress size={64} thickness={4} sx={{ marginBottom: 2 }} />
+      <Typography variant="h5" sx={textSx}>
+        {insertTrans('importingRecords', { count: importingCount })}
       </Typography>
     </>
   );
+
   const InsertError = () => (
     <>
-      <img src={failPath} alt="insert error" />
-      <Typography variant="h4" sx={textSx}>
+      <img src={failPath} alt="insert error" style={iconSx} />
+      <Typography variant="h5" sx={errorTextSx}>
         {insertTrans('statusError')}
       </Typography>
-      {failMsg && <Typography sx={textSx}>{failMsg}</Typography>}
+      {failMsg && (
+        <Typography variant="body1" sx={errorTextSx}>
+          {failMsg}
+        </Typography>
+      )}
     </>
   );
 
@@ -64,7 +74,6 @@ const InsertStatus: FC<InsertStatusProps> = ({ status, failMsg }) => {
         return <InsertLoading />;
       case InsertStatusEnum.success:
         return <InsertSuccess />;
-      // status error or init as default
       default:
         return <InsertError />;
     }

+ 1 - 0
client/src/pages/dialogs/insert/Types.ts

@@ -61,4 +61,5 @@ export interface InsertPreviewProps {
 export interface InsertStatusProps {
   status: InsertStatusEnum;
   failMsg: string;
+  importingCount: number;
 }

+ 1 - 1
server/src/app.ts

@@ -71,7 +71,7 @@ app.use(
   })
 );
 // limit json file size
-app.use(express.json({ limit: '150MB' }));
+app.use(express.json({ limit: '256MB' }));
 // TransformResInterceptor
 app.use(TransformResMiddleware);
 // LoggingInterceptor

+ 31 - 28
server/src/collections/collections.service.ts

@@ -30,6 +30,7 @@ import {
   LoadState,
   AlterCollectionFieldPropertiesReq,
   AlterIndexReq,
+  ErrorCode,
 } from '@zilliz/milvus2-sdk-node';
 import { Parser } from '@json2csv/plainjs';
 import {
@@ -284,8 +285,30 @@ export class CollectionsService {
 
   async insert(clientId: string, data: InsertReq) {
     const { milvusClient } = clientCache.get(clientId);
-    const res = await milvusClient.insert(data);
-    return res;
+    const BATCH_SIZE = 1000;
+    const fields_data = data.fields_data || data.data;
+
+    // If data size is less than or equal to batch size, insert directly
+    if (!fields_data || fields_data.length <= BATCH_SIZE) {
+      const res = await milvusClient.insert(data);
+      return res;
+    }
+
+    // Handle insertion in batches
+    const results = [];
+    for (let i = 0; i < fields_data.length; i += BATCH_SIZE) {
+      const batchData = fields_data.slice(i, i + BATCH_SIZE);
+      // Create a new request with only the necessary fields
+      const batchRequest = { ...data, fields_data: batchData } as InsertReq;
+      const result = await milvusClient.insert(batchRequest);
+
+      if (result.status.error_code !== ErrorCode.SUCCESS) {
+        throw new Error(result.status.reason);
+      }
+      results.push(result);
+    }
+
+    return results[0];
   }
 
   async upsert(clientId: string, data: InsertReq) {
@@ -613,7 +636,6 @@ export class CollectionsService {
       db_name,
     });
 
-    const BATCH_SIZE = 1000;
     const totalSize = parseInt(size, 10);
     const fields_data = genRows(
       collectionInfo.schema.fields,
@@ -630,31 +652,12 @@ export class CollectionsService {
       // If download is true, return the generated data directly
       return { sampleFile };
     } else {
-      // Handle insertion in batches if size > 1000
-      if (totalSize <= BATCH_SIZE) {
-        return await this.insert(clientId, {
-          collection_name,
-          fields_data,
-          db_name,
-        });
-      }
-
-      const results = [];
-      for (let i = 0; i < totalSize; i += BATCH_SIZE) {
-        const batchData = fields_data.slice(i, i + BATCH_SIZE);
-        const result = await this.insert(clientId, {
-          collection_name,
-          fields_data: batchData,
-          db_name,
-        });
-        results.push(result);
-      }
-
-      return {
-        status: 'success',
-        message: `Successfully inserted ${totalSize} records in ${Math.ceil(totalSize / BATCH_SIZE)} batches`,
-        results,
-      };
+      // Insert all data at once, batch handling is now in insert method
+      return await this.insert(clientId, {
+        collection_name,
+        fields_data,
+        db_name,
+      });
     }
   }