1
0

knowledge.py 24 KB


  1. from typing import List, Optional
  2. from pydantic import BaseModel
  3. from fastapi import APIRouter, Depends, HTTPException, status, Request, Query
  4. import logging
  5. from open_webui.models.knowledge import (
  6. Knowledges,
  7. KnowledgeForm,
  8. KnowledgeResponse,
  9. KnowledgeUserResponse,
  10. )
  11. from open_webui.models.files import Files, FileModel, FileMetadataResponse
  12. from open_webui.retrieval.vector.factory import VECTOR_DB_CLIENT
  13. from open_webui.routers.retrieval import (
  14. process_file,
  15. ProcessFileForm,
  16. process_files_batch,
  17. BatchProcessFilesForm,
  18. )
  19. from open_webui.storage.provider import Storage
  20. from open_webui.constants import ERROR_MESSAGES
  21. from open_webui.utils.auth import get_verified_user
  22. from open_webui.utils.access_control import has_access, has_permission
  23. from open_webui.env import SRC_LOG_LEVELS
  24. from open_webui.config import BYPASS_ADMIN_ACCESS_CONTROL
  25. from open_webui.models.models import Models, ModelForm
  26. log = logging.getLogger(__name__)
  27. log.setLevel(SRC_LOG_LEVELS["MODELS"])
  28. router = APIRouter()
  29. ############################
  30. # getKnowledgeBases
  31. ############################
  32. @router.get("/", response_model=list[KnowledgeUserResponse])
  33. async def get_knowledge(user=Depends(get_verified_user)):
  34. knowledge_bases = []
  35. if user.role == "admin" and BYPASS_ADMIN_ACCESS_CONTROL:
  36. knowledge_bases = Knowledges.get_knowledge_bases()
  37. else:
  38. knowledge_bases = Knowledges.get_knowledge_bases_by_user_id(user.id, "read")
  39. # Get files for each knowledge base
  40. knowledge_with_files = []
  41. for knowledge_base in knowledge_bases:
  42. files = []
  43. if knowledge_base.data:
  44. files = Files.get_file_metadatas_by_ids(
  45. knowledge_base.data.get("file_ids", [])
  46. )
  47. # Check if all files exist
  48. if len(files) != len(knowledge_base.data.get("file_ids", [])):
  49. missing_files = list(
  50. set(knowledge_base.data.get("file_ids", []))
  51. - set([file.id for file in files])
  52. )
  53. if missing_files:
  54. data = knowledge_base.data or {}
  55. file_ids = data.get("file_ids", [])
  56. for missing_file in missing_files:
  57. file_ids.remove(missing_file)
  58. data["file_ids"] = file_ids
  59. Knowledges.update_knowledge_data_by_id(
  60. id=knowledge_base.id, data=data
  61. )
  62. files = Files.get_file_metadatas_by_ids(file_ids)
  63. knowledge_with_files.append(
  64. KnowledgeUserResponse(
  65. **knowledge_base.model_dump(),
  66. files=files,
  67. )
  68. )
  69. return knowledge_with_files
  70. @router.get("/list", response_model=list[KnowledgeUserResponse])
  71. async def get_knowledge_list(user=Depends(get_verified_user)):
  72. knowledge_bases = []
  73. if user.role == "admin" and BYPASS_ADMIN_ACCESS_CONTROL:
  74. knowledge_bases = Knowledges.get_knowledge_bases()
  75. else:
  76. knowledge_bases = Knowledges.get_knowledge_bases_by_user_id(user.id, "write")
  77. # Get files for each knowledge base
  78. knowledge_with_files = []
  79. for knowledge_base in knowledge_bases:
  80. files = []
  81. if knowledge_base.data:
  82. files = Files.get_file_metadatas_by_ids(
  83. knowledge_base.data.get("file_ids", [])
  84. )
  85. # Check if all files exist
  86. if len(files) != len(knowledge_base.data.get("file_ids", [])):
  87. missing_files = list(
  88. set(knowledge_base.data.get("file_ids", []))
  89. - set([file.id for file in files])
  90. )
  91. if missing_files:
  92. data = knowledge_base.data or {}
  93. file_ids = data.get("file_ids", [])
  94. for missing_file in missing_files:
  95. file_ids.remove(missing_file)
  96. data["file_ids"] = file_ids
  97. Knowledges.update_knowledge_data_by_id(
  98. id=knowledge_base.id, data=data
  99. )
  100. files = Files.get_file_metadatas_by_ids(file_ids)
  101. knowledge_with_files.append(
  102. KnowledgeUserResponse(
  103. **knowledge_base.model_dump(),
  104. files=files,
  105. )
  106. )
  107. return knowledge_with_files
  108. ############################
  109. # CreateNewKnowledge
  110. ############################
  111. @router.post("/create", response_model=Optional[KnowledgeResponse])
  112. async def create_new_knowledge(
  113. request: Request, form_data: KnowledgeForm, user=Depends(get_verified_user)
  114. ):
  115. if user.role != "admin" and not has_permission(
  116. user.id, "workspace.knowledge", request.app.state.config.USER_PERMISSIONS
  117. ):
  118. raise HTTPException(
  119. status_code=status.HTTP_401_UNAUTHORIZED,
  120. detail=ERROR_MESSAGES.UNAUTHORIZED,
  121. )
  122. # Check if user can share publicly
  123. if (
  124. user.role != "admin"
  125. and form_data.access_control == None
  126. and not has_permission(
  127. user.id,
  128. "sharing.public_knowledge",
  129. request.app.state.config.USER_PERMISSIONS,
  130. )
  131. ):
  132. form_data.access_control = {}
  133. knowledge = Knowledges.insert_new_knowledge(user.id, form_data)
  134. if knowledge:
  135. return knowledge
  136. else:
  137. raise HTTPException(
  138. status_code=status.HTTP_400_BAD_REQUEST,
  139. detail=ERROR_MESSAGES.FILE_EXISTS,
  140. )
  141. ############################
  142. # ReindexKnowledgeFiles
  143. ############################
  144. @router.post("/reindex", response_model=bool)
  145. async def reindex_knowledge_files(request: Request, user=Depends(get_verified_user)):
  146. if user.role != "admin":
  147. raise HTTPException(
  148. status_code=status.HTTP_401_UNAUTHORIZED,
  149. detail=ERROR_MESSAGES.UNAUTHORIZED,
  150. )
  151. knowledge_bases = Knowledges.get_knowledge_bases()
  152. log.info(f"Starting reindexing for {len(knowledge_bases)} knowledge bases")
  153. deleted_knowledge_bases = []
  154. for knowledge_base in knowledge_bases:
  155. # -- Robust error handling for missing or invalid data
  156. if not knowledge_base.data or not isinstance(knowledge_base.data, dict):
  157. log.warning(
  158. f"Knowledge base {knowledge_base.id} has no data or invalid data ({knowledge_base.data!r}). Deleting."
  159. )
  160. try:
  161. Knowledges.delete_knowledge_by_id(id=knowledge_base.id)
  162. deleted_knowledge_bases.append(knowledge_base.id)
  163. except Exception as e:
  164. log.error(
  165. f"Failed to delete invalid knowledge base {knowledge_base.id}: {e}"
  166. )
  167. continue
  168. try:
  169. file_ids = knowledge_base.data.get("file_ids", [])
  170. files = Files.get_files_by_ids(file_ids)
  171. try:
  172. if VECTOR_DB_CLIENT.has_collection(collection_name=knowledge_base.id):
  173. VECTOR_DB_CLIENT.delete_collection(
  174. collection_name=knowledge_base.id
  175. )
  176. except Exception as e:
  177. log.error(f"Error deleting collection {knowledge_base.id}: {str(e)}")
  178. continue # Skip, don't raise
  179. failed_files = []
  180. for file in files:
  181. try:
  182. process_file(
  183. request,
  184. ProcessFileForm(
  185. file_id=file.id, collection_name=knowledge_base.id
  186. ),
  187. user=user,
  188. )
  189. except Exception as e:
  190. log.error(
  191. f"Error processing file {file.filename} (ID: {file.id}): {str(e)}"
  192. )
  193. failed_files.append({"file_id": file.id, "error": str(e)})
  194. continue
  195. except Exception as e:
  196. log.error(f"Error processing knowledge base {knowledge_base.id}: {str(e)}")
  197. # Don't raise, just continue
  198. continue
  199. if failed_files:
  200. log.warning(
  201. f"Failed to process {len(failed_files)} files in knowledge base {knowledge_base.id}"
  202. )
  203. for failed in failed_files:
  204. log.warning(f"File ID: {failed['file_id']}, Error: {failed['error']}")
  205. log.info(
  206. f"Reindexing completed. Deleted {len(deleted_knowledge_bases)} invalid knowledge bases: {deleted_knowledge_bases}"
  207. )
  208. return True
  209. ############################
  210. # GetKnowledgeById
  211. ############################
  212. class KnowledgeFilesResponse(KnowledgeResponse):
  213. files: list[FileMetadataResponse]
  214. @router.get("/{id}", response_model=Optional[KnowledgeFilesResponse])
  215. async def get_knowledge_by_id(id: str, user=Depends(get_verified_user)):
  216. knowledge = Knowledges.get_knowledge_by_id(id=id)
  217. if knowledge:
  218. if (
  219. user.role == "admin"
  220. or knowledge.user_id == user.id
  221. or has_access(user.id, "read", knowledge.access_control)
  222. ):
  223. file_ids = knowledge.data.get("file_ids", []) if knowledge.data else []
  224. files = Files.get_file_metadatas_by_ids(file_ids)
  225. return KnowledgeFilesResponse(
  226. **knowledge.model_dump(),
  227. files=files,
  228. )
  229. else:
  230. raise HTTPException(
  231. status_code=status.HTTP_401_UNAUTHORIZED,
  232. detail=ERROR_MESSAGES.NOT_FOUND,
  233. )
  234. ############################
  235. # UpdateKnowledgeById
  236. ############################
  237. @router.post("/{id}/update", response_model=Optional[KnowledgeFilesResponse])
  238. async def update_knowledge_by_id(
  239. request: Request,
  240. id: str,
  241. form_data: KnowledgeForm,
  242. user=Depends(get_verified_user),
  243. ):
  244. knowledge = Knowledges.get_knowledge_by_id(id=id)
  245. if not knowledge:
  246. raise HTTPException(
  247. status_code=status.HTTP_400_BAD_REQUEST,
  248. detail=ERROR_MESSAGES.NOT_FOUND,
  249. )
  250. # Is the user the original creator, in a group with write access, or an admin
  251. if (
  252. knowledge.user_id != user.id
  253. and not has_access(user.id, "write", knowledge.access_control)
  254. and user.role != "admin"
  255. ):
  256. raise HTTPException(
  257. status_code=status.HTTP_400_BAD_REQUEST,
  258. detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
  259. )
  260. # Check if user can share publicly
  261. if (
  262. user.role != "admin"
  263. and form_data.access_control == None
  264. and not has_permission(
  265. user.id,
  266. "sharing.public_knowledge",
  267. request.app.state.config.USER_PERMISSIONS,
  268. )
  269. ):
  270. form_data.access_control = {}
  271. knowledge = Knowledges.update_knowledge_by_id(id=id, form_data=form_data)
  272. if knowledge:
  273. file_ids = knowledge.data.get("file_ids", []) if knowledge.data else []
  274. files = Files.get_file_metadatas_by_ids(file_ids)
  275. return KnowledgeFilesResponse(
  276. **knowledge.model_dump(),
  277. files=files,
  278. )
  279. else:
  280. raise HTTPException(
  281. status_code=status.HTTP_400_BAD_REQUEST,
  282. detail=ERROR_MESSAGES.ID_TAKEN,
  283. )
  284. ############################
  285. # AddFileToKnowledge
  286. ############################
  287. class KnowledgeFileIdForm(BaseModel):
  288. file_id: str
  289. @router.post("/{id}/file/add", response_model=Optional[KnowledgeFilesResponse])
  290. def add_file_to_knowledge_by_id(
  291. request: Request,
  292. id: str,
  293. form_data: KnowledgeFileIdForm,
  294. user=Depends(get_verified_user),
  295. ):
  296. knowledge = Knowledges.get_knowledge_by_id(id=id)
  297. if not knowledge:
  298. raise HTTPException(
  299. status_code=status.HTTP_400_BAD_REQUEST,
  300. detail=ERROR_MESSAGES.NOT_FOUND,
  301. )
  302. if (
  303. knowledge.user_id != user.id
  304. and not has_access(user.id, "write", knowledge.access_control)
  305. and user.role != "admin"
  306. ):
  307. raise HTTPException(
  308. status_code=status.HTTP_400_BAD_REQUEST,
  309. detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
  310. )
  311. file = Files.get_file_by_id(form_data.file_id)
  312. if not file:
  313. raise HTTPException(
  314. status_code=status.HTTP_400_BAD_REQUEST,
  315. detail=ERROR_MESSAGES.NOT_FOUND,
  316. )
  317. if not file.data:
  318. raise HTTPException(
  319. status_code=status.HTTP_400_BAD_REQUEST,
  320. detail=ERROR_MESSAGES.FILE_NOT_PROCESSED,
  321. )
  322. # Add content to the vector database
  323. try:
  324. process_file(
  325. request,
  326. ProcessFileForm(file_id=form_data.file_id, collection_name=id),
  327. user=user,
  328. )
  329. except Exception as e:
  330. log.debug(e)
  331. raise HTTPException(
  332. status_code=status.HTTP_400_BAD_REQUEST,
  333. detail=str(e),
  334. )
  335. if knowledge:
  336. data = knowledge.data or {}
  337. file_ids = data.get("file_ids", [])
  338. if form_data.file_id not in file_ids:
  339. file_ids.append(form_data.file_id)
  340. data["file_ids"] = file_ids
  341. knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data)
  342. if knowledge:
  343. files = Files.get_file_metadatas_by_ids(file_ids)
  344. return KnowledgeFilesResponse(
  345. **knowledge.model_dump(),
  346. files=files,
  347. )
  348. else:
  349. raise HTTPException(
  350. status_code=status.HTTP_400_BAD_REQUEST,
  351. detail=ERROR_MESSAGES.DEFAULT("knowledge"),
  352. )
  353. else:
  354. raise HTTPException(
  355. status_code=status.HTTP_400_BAD_REQUEST,
  356. detail=ERROR_MESSAGES.DEFAULT("file_id"),
  357. )
  358. else:
  359. raise HTTPException(
  360. status_code=status.HTTP_400_BAD_REQUEST,
  361. detail=ERROR_MESSAGES.NOT_FOUND,
  362. )
  363. @router.post("/{id}/file/update", response_model=Optional[KnowledgeFilesResponse])
  364. def update_file_from_knowledge_by_id(
  365. request: Request,
  366. id: str,
  367. form_data: KnowledgeFileIdForm,
  368. user=Depends(get_verified_user),
  369. ):
  370. knowledge = Knowledges.get_knowledge_by_id(id=id)
  371. if not knowledge:
  372. raise HTTPException(
  373. status_code=status.HTTP_400_BAD_REQUEST,
  374. detail=ERROR_MESSAGES.NOT_FOUND,
  375. )
  376. if (
  377. knowledge.user_id != user.id
  378. and not has_access(user.id, "write", knowledge.access_control)
  379. and user.role != "admin"
  380. ):
  381. raise HTTPException(
  382. status_code=status.HTTP_400_BAD_REQUEST,
  383. detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
  384. )
  385. file = Files.get_file_by_id(form_data.file_id)
  386. if not file:
  387. raise HTTPException(
  388. status_code=status.HTTP_400_BAD_REQUEST,
  389. detail=ERROR_MESSAGES.NOT_FOUND,
  390. )
  391. # Remove content from the vector database
  392. VECTOR_DB_CLIENT.delete(
  393. collection_name=knowledge.id, filter={"file_id": form_data.file_id}
  394. )
  395. # Add content to the vector database
  396. try:
  397. process_file(
  398. request,
  399. ProcessFileForm(file_id=form_data.file_id, collection_name=id),
  400. user=user,
  401. )
  402. except Exception as e:
  403. raise HTTPException(
  404. status_code=status.HTTP_400_BAD_REQUEST,
  405. detail=str(e),
  406. )
  407. if knowledge:
  408. data = knowledge.data or {}
  409. file_ids = data.get("file_ids", [])
  410. files = Files.get_file_metadatas_by_ids(file_ids)
  411. return KnowledgeFilesResponse(
  412. **knowledge.model_dump(),
  413. files=files,
  414. )
  415. else:
  416. raise HTTPException(
  417. status_code=status.HTTP_400_BAD_REQUEST,
  418. detail=ERROR_MESSAGES.NOT_FOUND,
  419. )
  420. ############################
  421. # RemoveFileFromKnowledge
  422. ############################
  423. @router.post("/{id}/file/remove", response_model=Optional[KnowledgeFilesResponse])
  424. def remove_file_from_knowledge_by_id(
  425. id: str,
  426. form_data: KnowledgeFileIdForm,
  427. delete_file: bool = Query(True),
  428. user=Depends(get_verified_user),
  429. ):
  430. knowledge = Knowledges.get_knowledge_by_id(id=id)
  431. if not knowledge:
  432. raise HTTPException(
  433. status_code=status.HTTP_400_BAD_REQUEST,
  434. detail=ERROR_MESSAGES.NOT_FOUND,
  435. )
  436. if (
  437. knowledge.user_id != user.id
  438. and not has_access(user.id, "write", knowledge.access_control)
  439. and user.role != "admin"
  440. ):
  441. raise HTTPException(
  442. status_code=status.HTTP_400_BAD_REQUEST,
  443. detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
  444. )
  445. file = Files.get_file_by_id(form_data.file_id)
  446. if not file:
  447. raise HTTPException(
  448. status_code=status.HTTP_400_BAD_REQUEST,
  449. detail=ERROR_MESSAGES.NOT_FOUND,
  450. )
  451. # Remove content from the vector database
  452. try:
  453. VECTOR_DB_CLIENT.delete(
  454. collection_name=knowledge.id, filter={"file_id": form_data.file_id}
  455. )
  456. except Exception as e:
  457. log.debug("This was most likely caused by bypassing embedding processing")
  458. log.debug(e)
  459. pass
  460. if delete_file:
  461. try:
  462. # Remove the file's collection from vector database
  463. file_collection = f"file-{form_data.file_id}"
  464. if VECTOR_DB_CLIENT.has_collection(collection_name=file_collection):
  465. VECTOR_DB_CLIENT.delete_collection(collection_name=file_collection)
  466. except Exception as e:
  467. log.debug("This was most likely caused by bypassing embedding processing")
  468. log.debug(e)
  469. pass
  470. # Delete file from database
  471. Files.delete_file_by_id(form_data.file_id)
  472. if knowledge:
  473. data = knowledge.data or {}
  474. file_ids = data.get("file_ids", [])
  475. if form_data.file_id in file_ids:
  476. file_ids.remove(form_data.file_id)
  477. data["file_ids"] = file_ids
  478. knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data)
  479. if knowledge:
  480. files = Files.get_file_metadatas_by_ids(file_ids)
  481. return KnowledgeFilesResponse(
  482. **knowledge.model_dump(),
  483. files=files,
  484. )
  485. else:
  486. raise HTTPException(
  487. status_code=status.HTTP_400_BAD_REQUEST,
  488. detail=ERROR_MESSAGES.DEFAULT("knowledge"),
  489. )
  490. else:
  491. raise HTTPException(
  492. status_code=status.HTTP_400_BAD_REQUEST,
  493. detail=ERROR_MESSAGES.DEFAULT("file_id"),
  494. )
  495. else:
  496. raise HTTPException(
  497. status_code=status.HTTP_400_BAD_REQUEST,
  498. detail=ERROR_MESSAGES.NOT_FOUND,
  499. )
  500. ############################
  501. # DeleteKnowledgeById
  502. ############################
  503. @router.delete("/{id}/delete", response_model=bool)
  504. async def delete_knowledge_by_id(id: str, user=Depends(get_verified_user)):
  505. knowledge = Knowledges.get_knowledge_by_id(id=id)
  506. if not knowledge:
  507. raise HTTPException(
  508. status_code=status.HTTP_400_BAD_REQUEST,
  509. detail=ERROR_MESSAGES.NOT_FOUND,
  510. )
  511. if (
  512. knowledge.user_id != user.id
  513. and not has_access(user.id, "write", knowledge.access_control)
  514. and user.role != "admin"
  515. ):
  516. raise HTTPException(
  517. status_code=status.HTTP_400_BAD_REQUEST,
  518. detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
  519. )
  520. log.info(f"Deleting knowledge base: {id} (name: {knowledge.name})")
  521. # Get all models
  522. models = Models.get_all_models()
  523. log.info(f"Found {len(models)} models to check for knowledge base {id}")
  524. # Update models that reference this knowledge base
  525. for model in models:
  526. if model.meta and hasattr(model.meta, "knowledge"):
  527. knowledge_list = model.meta.knowledge or []
  528. # Filter out the deleted knowledge base
  529. updated_knowledge = [k for k in knowledge_list if k.get("id") != id]
  530. # If the knowledge list changed, update the model
  531. if len(updated_knowledge) != len(knowledge_list):
  532. log.info(f"Updating model {model.id} to remove knowledge base {id}")
  533. model.meta.knowledge = updated_knowledge
  534. # Create a ModelForm for the update
  535. model_form = ModelForm(
  536. id=model.id,
  537. name=model.name,
  538. base_model_id=model.base_model_id,
  539. meta=model.meta,
  540. params=model.params,
  541. access_control=model.access_control,
  542. is_active=model.is_active,
  543. )
  544. Models.update_model_by_id(model.id, model_form)
  545. # Clean up vector DB
  546. try:
  547. VECTOR_DB_CLIENT.delete_collection(collection_name=id)
  548. except Exception as e:
  549. log.debug(e)
  550. pass
  551. result = Knowledges.delete_knowledge_by_id(id=id)
  552. return result
  553. ############################
  554. # ResetKnowledgeById
  555. ############################
  556. @router.post("/{id}/reset", response_model=Optional[KnowledgeResponse])
  557. async def reset_knowledge_by_id(id: str, user=Depends(get_verified_user)):
  558. knowledge = Knowledges.get_knowledge_by_id(id=id)
  559. if not knowledge:
  560. raise HTTPException(
  561. status_code=status.HTTP_400_BAD_REQUEST,
  562. detail=ERROR_MESSAGES.NOT_FOUND,
  563. )
  564. if (
  565. knowledge.user_id != user.id
  566. and not has_access(user.id, "write", knowledge.access_control)
  567. and user.role != "admin"
  568. ):
  569. raise HTTPException(
  570. status_code=status.HTTP_400_BAD_REQUEST,
  571. detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
  572. )
  573. try:
  574. VECTOR_DB_CLIENT.delete_collection(collection_name=id)
  575. except Exception as e:
  576. log.debug(e)
  577. pass
  578. knowledge = Knowledges.update_knowledge_data_by_id(id=id, data={"file_ids": []})
  579. return knowledge
  580. ############################
  581. # AddFilesToKnowledge
  582. ############################
  583. @router.post("/{id}/files/batch/add", response_model=Optional[KnowledgeFilesResponse])
  584. def add_files_to_knowledge_batch(
  585. request: Request,
  586. id: str,
  587. form_data: list[KnowledgeFileIdForm],
  588. user=Depends(get_verified_user),
  589. ):
  590. """
  591. Add multiple files to a knowledge base
  592. """
  593. knowledge = Knowledges.get_knowledge_by_id(id=id)
  594. if not knowledge:
  595. raise HTTPException(
  596. status_code=status.HTTP_400_BAD_REQUEST,
  597. detail=ERROR_MESSAGES.NOT_FOUND,
  598. )
  599. if (
  600. knowledge.user_id != user.id
  601. and not has_access(user.id, "write", knowledge.access_control)
  602. and user.role != "admin"
  603. ):
  604. raise HTTPException(
  605. status_code=status.HTTP_400_BAD_REQUEST,
  606. detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
  607. )
  608. # Get files content
  609. log.info(f"files/batch/add - {len(form_data)} files")
  610. files: List[FileModel] = []
  611. for form in form_data:
  612. file = Files.get_file_by_id(form.file_id)
  613. if not file:
  614. raise HTTPException(
  615. status_code=status.HTTP_400_BAD_REQUEST,
  616. detail=f"File {form.file_id} not found",
  617. )
  618. files.append(file)
  619. # Process files
  620. try:
  621. result = process_files_batch(
  622. request=request,
  623. form_data=BatchProcessFilesForm(files=files, collection_name=id),
  624. user=user,
  625. )
  626. except Exception as e:
  627. log.error(
  628. f"add_files_to_knowledge_batch: Exception occurred: {e}", exc_info=True
  629. )
  630. raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
  631. # Add successful files to knowledge base
  632. data = knowledge.data or {}
  633. existing_file_ids = data.get("file_ids", [])
  634. # Only add files that were successfully processed
  635. successful_file_ids = [r.file_id for r in result.results if r.status == "completed"]
  636. for file_id in successful_file_ids:
  637. if file_id not in existing_file_ids:
  638. existing_file_ids.append(file_id)
  639. data["file_ids"] = existing_file_ids
  640. knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data)
  641. # If there were any errors, include them in the response
  642. if result.errors:
  643. error_details = [f"{err.file_id}: {err.error}" for err in result.errors]
  644. return KnowledgeFilesResponse(
  645. **knowledge.model_dump(),
  646. files=Files.get_file_metadatas_by_ids(existing_file_ids),
  647. warnings={
  648. "message": "Some files failed to process",
  649. "errors": error_details,
  650. },
  651. )
  652. return KnowledgeFilesResponse(
  653. **knowledge.model_dump(),
  654. files=Files.get_file_metadatas_by_ids(existing_file_ids),
  655. )